Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file lock_file_blocking.ml
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405open!Coreopen!Importopen!Int.Replace_polymorphic_compare[%%import"config.h"](* We have reason to believe that lockf doesn't work properly on CIFS mounts. The idea
behind requiring both lockf and flock is to prevent programs taking locks on
network filesystems where they may not be sound.
However, this assumes that [lockf] and [flock] take independent locks, which
is true on local Linux filesystems, but is false on many OSes (for example, Mac OS X),
so we use just [flock] on non-linux OSes and give up the fail-on-CIFS-and-NFS property.
We prefer [flock] and not [lockf] because [lockf] has bad semantics if used multiple
times within the same process: for example [lockf a; lockf b; close a] succeeds (bad!)
and leaves the file unlocked (bad!) if [a] and [b] are unrelated file descriptors for
the same file. *)letflockfd=Unix.flockfdUnix.Flock_command.lock_exclusiveletlockf?(mode=Unix.F_TLOCK)fd=tryUnix.lockffd~mode~len:Int64.zero;truewith|_->false[%%ifdefJSC_LINUX_EXT]letlockfd=(* [lockf] doesn't throw any exceptions, so if an exception is raised from this
function, it must have come from [flock]. *)letflocked=flockfdinletlockfed=lockffdinflocked&&lockfed[%%else]letlock=flock[%%endif]letcreate?(message=Pid.to_string(Unix.getpid()))?(close_on_exec=true)?(unlink_on_exit=false)path=letmessage=sprintf"%s\n"messagein(* We use [~perm:0o664] rather than our usual default perms, [0o666], because
lock files shouldn't rely on the umask to disallow tampering by other. *)letfd=Unix.openfilepath~mode:[Unix.O_WRONLY;Unix.O_CREAT]~perm:0o664intryiflockfdthenbeginifclose_on_execthenUnix.set_close_on_execfd;letpid_when_lock_file_was_created=Unix.getpid()inifunlink_on_exitthenat_exit(fun()->(* Do not unlink if we are in a different process than the one
that created the lock file (e.g. a forked child)
*)if(Pid.(=)pid_when_lock_file_was_created(Unix.getpid()))thenbegintryUnix.unlinkpathwith_->()end);Unix.ftruncatefd~len:Int64.zero;ignore(Unix.write_substringfd~buf:message~pos:0~len:(String.lengthmessage));(* we truncated the file, so we need the region lock back. We don't really
understand why/if this call is needed, but experimental evidence indicates that
we need to do it. *)ignore(lockffd);trueendelsebeginUnix.closefd;(* releases any locks from [flock] and/or [lockf] *)falseendwith|e->Unix.closefd;(* releases any locks from [flock] and/or [lockf] *)raiseeletcreate_exn?message?close_on_exec?unlink_on_exitpath=ifnot(create?message?close_on_exec?unlink_on_exitpath)thenfailwithf"Lock_file.create_exn '%s' was unable to acquire the lock"path()letrandom=lazy(Random.State.make_self_init())(* no timeout specified = wait indefinitely *)letrepeat_with_timeout?timeoutlockfpath=letmax_delay=0.3inmatchtimeoutwith|None->letrecloop()=try(lockfpath)with|_->beginlet(_:float)=Unix.nanosleep(Random.State.float(Lazy.forcerandom)max_delay)inloop()endinloop()|Sometimeout->letstart_time=Time.now()inletrecloop()=trylockfpathwith|e->beginletsince_start=Time.abs_diffstart_time(Time.now())inifTime.Span.(since_start>timeout)thenfailwithf"Lock_file: '%s' timed out waiting for existing lock. \
Last error was %s"path(Exn.to_stringe)()elsebeginlet(_:float)=Unix.nanosleep(Random.State.float(Lazy.forcerandom)max_delay)inloop()endendinloop()(* default timeout is to wait indefinitely *)letblocking_create?timeout?message?close_on_exec?unlink_on_exitpath=repeat_with_timeout?timeout(funpath->create_exn?message?close_on_exec?unlink_on_exitpath)pathletis_lockedpath=tryletfd=Unix.openfilepath~mode:[Unix.O_RDONLY]~perm:0o664inletflocked=flockfdinletlockfed=lockffd~mode:Unix.F_TESTinUnix.closefd;(* releases any locks from [flock] and/or [lockf] *)ifflocked&&lockfedthenfalseelsetruewith|Unix.Unix_error(ENOENT,_,_)->false|e->raiseeletread_file_and_convert~of_stringpath=Option.try_with(fun()->In_channel.read_allpath|>String.strip|>of_string);;letget_pidpath=letof_stringstring=Int.of_stringstring|>Pid.of_intinread_file_and_convert~of_stringpath;;moduleNfs=structletprocess_start_timepid=(* Find the start time for a process, without requiring the [Procfs] library
-- start time is represented in USER_HZ units in /proc/<pid>/stat (confusingly
referred to as 'jiffies' in the man page); USER_HZ is almost certainly 100, for
mostly historical reasons, but just to be sure we'll ask sysconf.
*)matchLinux_ext.Sysinfo.sysinfowith|Error_->None|Oksysinfo->letof_stringstat=(* [read_file_and_convert] will catch any exceptions raised here *)letboot_time=Time.sub(Time.now())(sysinfo()).Linux_ext.Sysinfo.uptimeinletjiffies=letfields=String.rsplit2_exnstat~on:')'|>snd|>String.strip|>String.split~on:' 'inList.nth_exnfields19|>Float.of_stringinlethz=Unix.sysconfUnix.CLK_TCK|>Option.value_exn|>Int64.to_floatinjiffies/.hz|>Time.Span.of_sec|>Time.addboot_timeinread_file_and_convert(sprintf!"/proc/%{Pid}/stat"pid)~of_string;;moduleInfo=structtypet={host:string;pid:Pid.Stable.V1.t;message:string;start_time:Time.Stable.With_utc_sexp.V2.toption[@sexp.option]}[@@derivingsexp,fields]letcreate~message=letpid=Unix.getpid()in{host=Unix.gethostname();pid;message;start_time=process_start_timepid};;letof_stringstring=Sexp.of_stringstring|>t_of_sexp;;letof_file=read_file_and_convert~of_stringendletlock_pathpath=path^".nfs_lock"letget_hostname_and_pidpath=Option.map(Info.of_filepath)~f:(funinfo->(Info.hostinfo),(Info.pidinfo));;letget_messagepath=Option.map(Info.of_filepath)~f:Info.messageletunlock_safely_exn~unlock_myselfpath=(* Make sure error messages contain a reference to "lock.nfs_lock", which is the
actually important file. *)letlock_path=lock_pathpathinleterrors=failwithf"Lock_file.Nfs.unlock_safely_exn: unable to unlock %s: %s"lock_paths()inmatchSys.file_exists~follow_symlinks:falselock_pathwith|`Unknown->error(sprintf"unable to read %s"lock_path)|`No->()|`Yes->matchInfo.of_filelock_pathwith|None->error"unknown lock file format"|Someinfo->letmy_pid=Unix.getpid()inletmy_hostname=Unix.gethostname()inletlocking_hostname=Info.hostinfoinletlocking_pid=Info.pidinfoinifString.(<>)my_hostnamelocking_hostnamethenerror(sprintf"locked from %s, unlock attempted from %s"locking_hostnamemy_hostname)else(* Check if the process is running: sends signal 0 to pid, which should work if
the process is running and is owned by the user running this code. If the
process is not owned by the user running this code we should fail to unlock
either earlier (unable to read the file) or later (unable to remove the
file). *)letpid_start_matches_lockpid=matchOption.both(Info.start_timeinfo)(process_start_timepid)with|None->true(* don't have both start times: fall back to old behaviour *)|Some(lock_start,pid_start)->(* our method of calculating start time is open to some inaccuracy, so let's
be generous and allow for up to 1s of difference (this would only allow
for a collision if pids get reused within 1s, which seems unlikely) *)letepsilon=Time.Span.of_sec1.inTime.Span.(<)(Time.abs_difflock_startpid_start)epsiloninletis_locked_by_me()=(Pid.equallocking_pidmy_pid)&&(pid_start_matches_lockmy_pid)inletlocking_pid_exists()=(Signal.can_send_tolocking_pid)&&(pid_start_matches_locklocking_pid)inif(unlock_myself&&(is_locked_by_me()))||not(locking_pid_exists())thenbegin(* We need to be able to recover from situation where [path] does not exist
for whatever reason, but [lock_path] is present. Error during unlink of
[path] are ignored to be able to cope with this situation and properly
clean up stale locks. *)begintryUnix.unlinkpathwith|Unix.Unix_error(ENOENT,_,_)->()|e->error(Exn.to_stringe)end;tryUnix.unlinklock_pathwith|e->error(Exn.to_stringe)endelseerror(sprintf"locking process (pid %i) still running on %s"(Pid.to_intlocking_pid)locking_hostname);;(* See mli for more information on the algorithm we use for locking over NFS. Ensure
that you understand it before you make any changes here. *)letcreate_exn?(message="")path=tryunlock_safely_exn~unlock_myself:falsepath;letfd=Unix.openfilepath~mode:[Unix.O_WRONLY;Unix.O_CREAT]inletcleanup=ref(fun()->Unix.closefd)inprotect~finally:(fun()->!cleanup())~f:(fun()->Unix.link~target:path~link_name:(lock_pathpath)();Unix.ftruncatefd~len:0L;letinfo=Info.create~messagein(* if this fprintf fails, empty lock file would be left behind, and
subsequent calls to [Lock_file.Nfs.create_exn] would be unable to
figure out that it is stale/corrupt and remove it. So we need to
remove it ourselves *)tryletout_channel=Unix.out_channel_of_descrfdincleanup:=(fun()->Caml.close_out_noerrout_channel);fprintfout_channel"%s\n%!"(Sexp.to_string_hum(Info.sexp_of_tinfo))with|(Sys_error_)aserr->beginUnix.unlinkpath;Unix.unlink(lock_pathpath);raiseerrend);at_exit(fun()->tryunlock_safely_exn~unlock_myself:truepathwith_->());with|e->failwithf"Lock_file.Nfs.create_exn: unable to lock '%s' - %s"path(Exn.to_stringe)();;letcreate?messagepath=Or_error.try_with(fun()->create_exn?messagepath)(* default timeout is to wait indefinitely *)letblocking_create?timeout?messagepath=repeat_with_timeout?timeout(funpath->create_exn?messagepath)path;;letcritical_section?messagepath~timeout~f=blocking_create~timeout?messagepath;Exn.protect~f~finally:(fun()->unlock_safely_exn~unlock_myself:truepath);;letunlock_exnpath=unlock_safely_exn~unlock_myself:truepathletunlockpath=Or_error.try_with(fun()->unlock_exnpath)end(* The reason this function is used is to make sure the file the path is pointing to
remains stable across [chdir]. In fact we'd prefer for it to remain stable over
other things, such as [rename] of a parent directory.
That could be achieved if we [open] the [dir] and use the resulting file descriptor
with linkat, unlinkat, etc system calls, but that's less portable and most
programs that use locks will break anyway if their directory is renamed. *)letcanonicalize_dirnamepath=letdir,name=Filename.dirnamepath,Filename.basenamepathinletdir=Filename.realpathdirindir^/namemoduleMkdir=structtypet=Lockedof{lock_path:string}letlock_exn~lock_path=letlock_path=canonicalize_dirnamelock_pathinmatchUnix.mkdirlock_pathwith|exception(Core.Unix.Unix_error(EEXIST,_,_))->`Somebody_else_took_it|()->`We_took_it(Locked{lock_path})letunlock_exn(Locked{lock_path})=Unix.rmdirlock_pathendmoduleSymlink=structtypet=Lockedof{lock_path:string}letlock_exn~lock_path~metadata=letlock_path=canonicalize_dirnamelock_pathinmatchUnix.symlink~link_name:lock_path~target:metadatawith|exception(Core.Unix.Unix_error(EEXIST,_,_))->`Somebody_else_took_it(Or_error.try_with(fun()->Unix.readlinklock_path))|()->`We_took_it(Locked{lock_path})letunlock_exn(Locked{lock_path})=Unix.unlinklock_pathendmoduleFlock=structtypet={fd:Caml.Unix.file_descr;mutableunlocked:bool;}letlock_exn~lock_path=letfd=Core.Unix.openfile~perm:0o664~mode:[O_CREAT;O_WRONLY;O_CLOEXEC]lock_pathinmatchflockfdwith|false->Core.Unix.close~restart:truefd;`Somebody_else_took_it|true->`We_took_it{fd;unlocked=false}|exceptionexn->Core.Unix.close~restart:truefd;raiseexnletunlock_exnt=ift.unlockedthenraise_s[%sexp"Lock_file_blocking.Flock.unlock_exn called twice"];t.unlocked<-true;Core.Unix.close~restart:truet.fd;end