package metadb
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file metadb.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513(******************************************************************************) (* Metadb *) (* Copyright (C) 2022 Nathan Guermond *) (* *) (* This program is free software: you can redistribute it and/or modify it *) (* under the terms of the GNU General Public License as published by the Free *) (* Software Foundation, either version 3 of the License, or (at your option) *) (* any later version. *) (* *) (* This program is distributed in the hope that it will be useful, but *) (* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY *) (* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License *) (* for more details. *) (* *) (* You should have received a copy of the GNU General Public License along *) (* with this program. If not, see <https://www.gnu.org/licenses/>. *) (* *) (******************************************************************************) exception FileExists of Path.root exception EntryExists of (string * Path.rel) exception EntryDoesNotExist of (string * Path.rel) exception DirNotEmpty of Path.root exception LibraryExists exception InternalError module Path = Path module Hash = Hash module Json = Json module System = System module type Metadata = sig type t val to_json : t -> Json.t val from_json : Json.t -> t val init : t val merge : t -> t -> t option val to_string : t -> string end module Entry (D : Metadata) = struct type t = { data : D.t; hash : Hash.t; mutable modified : bool; } let make (d : D.t) (h : Hash.t) : t = {data = d; hash = h; modified = true} let empty (e : t) : bool = e.data = D.init let set_modified (e : t) : unit = e.modified <- true let modified (e : t) : bool = e.modified let to_json (e : t) : Json.t = (`Assoc [("data", (D.to_json e.data)); ("hash", `String (Hash.to_string e.hash))]) let from_json (j : Json.t) : t = let jmd = (Json.get_err "data" j) in let h = (Json.to_string (Json.get_err "hash" j)) in let d = D.from_json jmd in {data = d; hash = (Hash.of_string h); modified = false; } let write_file (file : Path.root) (e : t) : unit = let j = to_json e in (if not (System.file_exists file) then System.make_dirp file); Json.to_file file j; e.modified <- false let read_file (path : Path.root) : t = let j = Json.from_file path in from_json j let get_data (e : t) : D.t = e.data let get_hash (e : t) : Hash.t = e.hash let to_string (path : Path.rel) (e : t) : string = Format.sprintf "%s => %s:@\n@[<2>%s@]\n" (Path.string_of_rel path) (Hash.to_string e.hash) (D.to_string e.data) end type file_table = (Hash.t * (string * Path.rel)) list module type LibData = sig type t val to_json : t -> Json.t val from_json : Json.t -> t end module Library (D : Metadata) (LD : LibData) = struct module E = Entry(D) type t = { mutable name : string; root : Path.root; libdata : LD.t; entries : (Path.rel, E.t) Hashtbl.t; } let rename (lib : t) new_name : t = lib.name <- new_name; lib let store (lib : t) : Path.root = Path.merge lib.root (Path.mk_rel ".metadata") let load_entries (lib : t) : unit = let root = (store lib) in (Seq.iter (fun path -> let entry = (E.read_file path) in let path = (Path.remove_file_ext "json" path) in let key = (Path.strip_root root path) in Hashtbl.add lib.entries key entry) (System.get_files ~hidden:true root)) let entry_empty (lib : t) (key : Path.rel) : bool = E.empty (Hashtbl.find lib.entries key) let entry_exists (lib : t) (key : Path.rel) : bool = match (Hashtbl.find_opt lib.entries key) with | None -> false | Some _ -> true let file_exists (lib : t) (key : Path.rel) : bool = let path = (Path.merge lib.root key) in System.file_exists path (* Add entries for new files *) let read_files (lib : t) : (Path.rel * E.t) Seq.t = let root = lib.root in Seq.filter_map (fun path -> let key = (Path.strip_root root path) in if (entry_exists lib key) then None else let d = D.init in let h = Hash.hash_file path in let entry = (E.make d h) in Hashtbl.add lib.entries key entry; Some (key,entry)) (System.get_files root) let init (lib : t) : unit = if not(System.file_exists (store lib)) then System.make_dirp_leaf (store lib); load_entries lib; ignore @@ (read_files lib ()) let refresh (lib : t) : (Path.rel * D.t) Seq.t = Seq.map (fun (path,e) -> (path,E.get_data e)) (read_files lib) let make (name : string) (root : Path.root) (libdata : LD.t) : t = {name = name; root = root; libdata = libdata; entries = Hashtbl.create 1 } let to_json (lib : t) : Json.t = (`Assoc [("name",`String lib.name); ("root", `String (Path.string_of_root lib.root)); ("libdata", (LD.to_json lib.libdata)) ]) let from_json (json : Json.t) : string * t = let jname = (Json.get_err "name" json) in let jroot = (Json.get_err "root" json) in let jdata = (Json.get_err "libdata" json) in let name = (Json.to_string jname) in let root = (Path.mk_root (Json.to_string jroot)) in let libdata = LD.from_json jdata in (name, make name root libdata) let add (lib : t) (key : Path.rel) (m : D.t) : unit = let path = (Path.merge lib.root key) in (if (file_exists lib key) then raise (FileExists path)); let h = Hash.hash_file path in let entry = E.make m h in (Hashtbl.add lib.entries key entry) let get (lib : t) (key : Path.rel) : D.t option = match (Hashtbl.find_opt lib.entries key) with | Some e -> Some (E.get_data e) | None -> None let set (lib : t) (key : Path.rel) (m : D.t) : unit = match (Hashtbl.find_opt lib.entries key) with | Some e -> let entry = E.make m (E.get_hash e) in Hashtbl.replace lib.entries key entry | None -> raise (EntryDoesNotExist(lib.name,key)) let set_entry (lib : t) (key : Path.rel) (e : E.t) : unit = E.set_modified e; Hashtbl.replace lib.entries key e let get_entry (lib : t) (key : Path.rel) : E.t option = Hashtbl.find_opt lib.entries key let remove_entry (lib : t) (key : Path.rel) : unit = Hashtbl.remove lib.entries key; let file = (Path.add_file_ext "json" (Path.merge (store lib) key)) in if System.file_exists file then System.remove file let remove_file (lib : t) (key : Path.rel) : unit = System.remove (Path.merge lib.root key) (* assumes new entry does not exists *) let remap (lib : t) (key : Path.rel) (key' : Path.rel) : unit = let e = Hashtbl.find lib.entries key in if (entry_exists lib key') then raise(EntryExists(lib.name,key')) else (remove_entry lib key; Hashtbl.add lib.entries key' e) let index_files (lib : t) (tbl : file_table) : file_table = Hashtbl.fold (fun key e tbl -> let library = lib.name in let hash = E.get_hash e in if (file_exists lib key) then ((hash,(library, key))::tbl) else tbl) lib.entries tbl (* Return the list of entries * that do not have associated files *) let get_unmatched_entries (lib : t) : (Path.rel * E.t) Seq.t = Seq.filter (fun (key,e) -> not (file_exists lib key)) (Hashtbl.to_seq lib.entries) (* write modified entries to disk *) let flush_modified_entries (lib : t) : unit = Hashtbl.iter (fun key entry -> if (E.modified entry) then let path = (Path.merge (store lib) key) in let file = (Path.add_file_ext "json" path) in E.write_file file entry else ()) lib.entries let get_root (lib : t) : Path.root = lib.root let get_libdata (lib : t) : LD.t = lib.libdata let to_string library (lib : t) : string = let str = Hashtbl.fold (fun key e str -> (E.to_string key e) ^ str) lib.entries "" in Format.sprintf "%s = {@\n@[<2>%s@]\n" library str let get_entries (lib : t) : (Path.rel * D.t) Seq.t = Seq.map (fun (path,e) -> (path,E.get_data e)) (Hashtbl.to_seq lib.entries) end module Make (D : Metadata) (LD : LibData) = struct module L = Library(D)(LD) module E = Entry(D) let libraries : ((string * L.t) list) ref = ref [] (* We keep a global index of files by their hash to deal * with moved/renamed files and duplicates. * file_index : file_hash -> (library, file_path) *) let file_index : file_table ref = ref [] let refresh_library ~library : (Path.rel * D.t) Seq.t = L.refresh (List.assoc library !libraries) let init_library ~library : unit = L.init (List.assoc library !libraries) let init_libraries () : unit = List.iter (fun (name,lib) -> L.init lib) !libraries let get_entry ~library (key : Path.rel) : D.t option = let lib = (List.assoc library !libraries) in L.get lib key let set_entry ~library (key : Path.rel) (m : D.t) : unit = let lib = (List.assoc library !libraries) in L.set lib key m let remove_entry ~library (key : Path.rel) : unit = let lib = (List.assoc library !libraries) in L.remove_entry lib key let remove_file ~library (key : Path.rel) : unit = let lib = (List.assoc library !libraries) in L.remove_file lib key let new_library ~library (root : Path.root) (libdata : LD.t) : unit = if (List.mem_assoc library !libraries) then raise (LibraryExists); let lib = L.make library root libdata in (* Note: Entries may already be present *) L.init lib; ignore @@ (L.read_files lib ()); libraries := (library, lib) :: !libraries let remove_library ~delete_metadata ~library : unit = let lib = List.assoc library !libraries in libraries := List.remove_assoc library !libraries; if delete_metadata && (System.file_exists (L.store lib)) then System.rmdir (L.store lib) let rename_library ~library new_name : unit = let lib = List.assoc library !libraries in let lib = L.rename lib new_name in remove_library ~delete_metadata:false ~library; libraries := (new_name,lib) :: !libraries let move_library ~library (root : Path.root) : unit = let lib = List.assoc library !libraries in if (System.file_exists root) then if not (System.empty_dir root) then raise(DirNotEmpty root) else System.move (L.get_root lib) (Path.drop_leaf root) else let _ = System.make_dirp root in System.move (L.get_root lib) root let index_files () : unit = file_index := List.fold_left (fun tbl (library,lib) -> L.index_files lib tbl) [] !libraries (* Move entry and file from one library to another *) let migrate_entry ~from_lib ~to_lib (key : Path.rel) : unit = let from_lib_ = (List.assoc from_lib !libraries) in let to_lib_ = (List.assoc to_lib !libraries) in if (L.file_exists to_lib_ key) then (* Output a warning message *) raise(FileExists (Path.merge (L.get_root to_lib_) key)) else if (L.entry_exists to_lib_ key) then raise(EntryExists (to_lib, key)) else (match L.get_entry from_lib_ key with | Some entry -> (L.set_entry to_lib_ key entry); (L.remove_entry from_lib_ key); (* Note: File may be missing, but this is okay, * we move the entry anyways *) if (L.file_exists from_lib_ key) then let from_path = Path.merge (L.get_root from_lib_) key in let to_path = Path.merge (L.get_root to_lib_) key in let _ = (System.make_dirp to_path) in (System.move from_path to_path) | None -> raise(EntryDoesNotExist(from_lib,key))) type resolution = Remap of (Path.rel * (string * Path.rel)) | Missing of Path.rel let remap_entry ~from_lib ~to_lib key key' : (Path.rel * (string * Path.rel)) = let lib = (List.assoc from_lib !libraries) in let lib' = (List.assoc to_lib !libraries) in match L.get_entry lib key, L.get_entry lib' key' with | Some entry, None -> L.set_entry lib' key' entry; L.remove_entry lib key; (key,(to_lib,key')) | Some entry, Some entry' -> (match D.merge (E.get_data entry) (E.get_data entry') with | Some d -> prerr_endline "Remap: Merging entries"; let hash = E.get_hash entry' in L.set_entry lib' key' (E.make d hash); L.remove_entry lib key; (key,(to_lib,key')) | None -> raise(EntryExists(to_lib,key'))) | None, _ -> raise(InternalError) (* This function assumes * 1. libraries are freshly initialized or have been refreshed * 2. files have been indexed *) let resolve_missing_files ~library : resolution Seq.t = let lib = List.assoc library !libraries in let entries = (L.get_unmatched_entries lib) in let resolutions = Seq.filter_map (fun (key,entry) -> let hash = (L.E.get_hash entry) in match (List.assoc_opt hash !file_index) with | Some (library',key') -> (try Some(Remap (remap_entry ~from_lib:library ~to_lib:library' key key')) with _ -> (Some (Missing key))) | None -> Some (Missing key)) entries in resolutions (* Return type is a partition of the duplicate files, of the form: [[(lib_11, file_11), (lib_12, file_12),...] [(lib_21, file_21), (lib_22, file_22),...] ... [(lib_n1, file_n1), (lib_n2, file_n2),...]] such that entries in each row are duplicates *) (* TODO: Attempt to merge duplicates if one is more precise than another * use D.merge *) let find_duplicates () : ((string * Path.rel) list) list = let rec find_dups dup_hashes dups bdgs = match bdgs, dup_hashes, dups with | [], _, dups -> dups | [(hash,v)], [], [] -> [] | [(hash,v)], h::_, dup::dups -> if ((hash = h)) then ((v::dup)::dups) else (dup::dups) | (hash,v)::bdgs, h::_, dup::dups -> if (hash = h) then (find_dups dup_hashes ((v::dup)::dups) bdgs) else if (hash = fst (List.hd bdgs)) then (find_dups (hash::dup_hashes) ([v]::(dup::dups)) bdgs) else (find_dups dup_hashes (dup::dups) bdgs) | (hash,v)::bdgs, [], [] -> if (hash = fst (List.hd bdgs)) then (find_dups ([hash]) ([[v]]) bdgs) else (find_dups [] [] bdgs) | _ -> raise (InternalError) in let bdgs = (List.sort (fun (h,u) (h',u') -> Hash.compare h h') !file_index) in (find_dups [] [] bdgs) let to_json () : Json.t = (`List (List.map (fun (library,lib) -> L.to_json lib) !libraries)) let from_json (json : Json.t) : (string * L.t) list = let libs = Json.to_list json in (List.map L.from_json libs) let load_config (libconfig : Path.root) : unit = if System.file_exists libconfig then let libs = from_json @@ Json.from_file libconfig in libraries := libs else (System.make_dirp libconfig; libraries := []) let write_config ?(ord = []) (libconfig : Path.root) : unit = libraries := (if ord = [] then !libraries else (List.init (List.length !libraries) (fun i -> let library = (List.nth ord i) in (library, List.assoc library !libraries)))); let jlibs = to_json () in Json.to_file libconfig jlibs let get_libdata () : (string * LD.t) list = List.map (fun (library,lib) -> (library, L.get_libdata lib)) !libraries let get_library_root ~library : Path.root = L.get_root (List.assoc library !libraries) let get_entries ~library : (Path.rel * D.t) Seq.t = let lib = List.assoc library !libraries in L.get_entries lib let flush_library_metadata ~library : unit = L.flush_modified_entries (List.assoc library !libraries) let flush_metadata () : unit = List.iter (fun (library,lib) -> L.flush_modified_entries lib) !libraries let library_to_string ~library : string = L.to_string library (List.assoc library !libraries) end