Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Page
Library
Module
Module type
Parameter
Class
Class type
Source
xmldiff.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089
(*********************************************************************************) (* Xmldiff *) (* *) (* Copyright (C) 2014-2021 Institut National de Recherche en Informatique *) (* et en Automatique. All rights reserved. *) (* *) (* This program is free software; you can redistribute it and/or modify *) (* it under the terms of the GNU Lesser General Public License version *) (* 3 as published by the Free Software Foundation. *) (* *) (* This program is distributed in the hope that it will be useful, *) (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) (* GNU Library General Public License for more details. *) (* *) (* You should have received a copy of the GNU Lesser General Public *) (* License along with this program; if not, write to the Free Software *) (* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA *) (* 02111-1307 USA *) (* *) (* Contact: Maxence.Guesdon@inria.fr *) (* *) (* *) (*********************************************************************************) (** *) let dbg_mode = match try Sys.getenv "XMLDIFF_DEBUG" with _ -> "" with "1" -> true | _ -> false let dbg = if dbg_mode then prerr_endline else fun _ -> () let on_dbg f x = if dbg_mode then f x else () module Smap = Map.Make(String) module Intmap = Map.Make (struct type t = int let compare (x:int) y = Stdlib.compare x y end) module Intset = Set.Make (struct type t = int let compare (x:int) y = Stdlib.compare x y end) module Nmap = Map.Make ( struct type t = string * string let compare (p1, s1) (p2, s2) = match String.compare p1 p2 with 0 -> String.compare s1 s2 | n -> n end) type name = Xmlm.name let string_of_name = function "", s -> s | s1, s2 -> s1 ^ ":" ^ s2 let atts_of_list l= List.fold_left (fun acc (name, v) -> Nmap.add name v acc) Nmap.empty l type 'a xmlt = [ `E of name * string Nmap.t * 'a list | `D of string ] type xmltree = xmltree xmlt type xmlnode = int option * xmlnode xmlt type label = Node of string | Text of string let compare_label l1 l2 = match l1, l2 with Node s1, Node s2 -> String.compare s1 s2 | Text s1, Text s2 -> String.compare s1 s2 | Node _, Text _ -> -1 | Text _, Node _ -> 1 module Lmap = Map.Make(struct type t = label let compare = compare_label end) type node = { number : int ; children : int array ; mutable parent : int option ; xml : xmltree ; weight : float ; hash : string ; label : label ; rank : int ; is_cut : bool ; mutable matched : int option ; } type doc = { height: int ; w0 : float ; nodes : node array ; } (*c==v=[File.file_of_string]=1.1====*) let file_of_string ~file s = let oc = open_out file in output_string oc s; close_out oc (*/c==v=[File.file_of_string]=1.1====*) let string_of_atts map = let l = Nmap.fold (fun name s acc -> (Printf.sprintf "%s=%S" (string_of_name name) s) :: acc) map [] in String.concat " " l let label_of_xml = function | `D s -> Text s | `E (tag, _, _) -> Node (string_of_name tag) let atts_of_map map = List.rev (Nmap.fold (fun name s acc -> (name, s) :: acc) map []) let string_of_xml ?(cut=false) tree = let tree = if cut then match tree with `D _ -> tree | `E (name,atts,_) -> `E (name,atts,[]) else tree in let b = Buffer.create 256 in let ns_prefix s = Some s in let output = Xmlm.make_output ~ns_prefix ~decl: false (`Buffer b) in let frag = function | `E (tag, atts, childs) -> let atts = atts_of_map atts in `El ((tag, atts), childs) | `D d -> `Data d in Xmlm.output_doc_tree frag output (None, tree); Buffer.contents b let hash xml = let s = match xml with `D s -> "!" ^ s | `E _ -> "<" ^ (Marshal.to_string xml []) in Digest.string s let short_label = function `E (("",s2), _, _) -> "<"^s2^">" | `E ((s1,s2), _, _) -> "<"^s1^":"^s2^">" | `D s -> let len = String.length s in let s = Printf.sprintf "%S" (String.sub s 0 (min 10 len)) in let len = String.length s in String.sub s 1 (len - 2) let xmlnode_of_t t = let rec unfold_cut = function (`D s) as xml -> (None, xml) | `E (tag, atts, subs) -> (None, `E (tag, atts, List.map unfold_cut subs)) in let len = Array.length t in let rec build n = let xml = t.(n).xml in match xml with `D s -> (Some n, `D s) | `E (tag,atts,children) -> let children = if t.(n).is_cut then List.map unfold_cut children else List.map build (Array.to_list t.(n).children) in (Some n, `E (tag, atts, children)) in build (len-1) let weight xml children = match xml with `D s -> 1. +. log (float (1 + String.length s)) | `E _ -> List.fold_left (fun acc c -> c.weight +. acc) 1. children let t_of_xml = let rec iter ?cut (n0, acc, acc_children, h) xml = let (label, subs, is_cut) = match xml with | `D _ -> (label_of_xml xml, [], false) | `E (tag, atts, l) -> match cut with | Some f when f tag atts l -> (Node (string_of_xml xml), [], true) | _ -> (label_of_xml xml, l, false) in let (n, acc, children, h_children) = List.fold_left (iter ?cut) (n0, acc, [], 0) subs in let children = List.rev children in List.iter (fun node -> node.parent <- Some n) children ; let hash = hash xml in let weight = weight xml children in let node = { number = n ; children = Array.of_list (List.map (fun node -> node.number) children) ; parent = None ; xml ; label ; hash ; weight ; rank = List.length acc_children ; is_cut ; matched = None ; } in (n+1, node :: acc, node :: acc_children, max h (h_children + 1)) in fun ?cut xml -> let (_, l, _, h) = iter ?cut (0, [], [], 0) xml in let t = Array.of_list l in Array.sort (fun n1 n2 -> n1.number - n2.number) t; Array.iteri (fun i node -> (* prerr_endline (Printf.sprintf "i=%d, node.number=%d, parent=%s, xml=%s" i node.number (match node.parent with None -> "" | Some n -> string_of_int n) (short_label node.xml) ); *) assert (i = node.number) ) t; { height = h; nodes = t; w0 = t.(Array.length t - 1).weight ; } type operation = | Replace of node * int | Move of int * int * int * int (* node * parent in t1 * new parent * rank *) | MoveRank of int * int * int (* node * parent * rank *) | Insert of node * int * int (* Insert(node,i,rank) insert tree from t2 as nth child of i *) | Delete of node (* delete tree from t1 *) | Edit of node * node (* change label of node from t1 to label of node from t2 *) type actions = operation list type patch_path = Path_cdata of int | Path_node of Xmlm.name * int * patch_path option type position = [ `FirstChild | `After] type patch_operation = | PInsert of xmltree * position | PDelete | PUpdateCData of string | PUpdateNode of Xmlm.name * string Nmap.t | PReplace of xmltree | PMove of patch_path * position type patch = (patch_path * patch_operation) list let rec xml_of_source s_source source = try let ns s = Some s in let input = Xmlm.make_input ~ns ~enc: (Some `UTF_8) source in let el (tag, atts) childs = let atts = List.fold_left (fun map (name, v) -> Nmap.add name v map) Nmap.empty atts in `E (tag, atts, childs) in let data d = `D d in let (_, tree) = Xmlm.input_doc_tree ~el ~data input in tree with Xmlm.Error ((line, col), error) -> let msg = Printf.sprintf "%sLine %d, column %d: %s" s_source line col (Xmlm.error_message error) in failwith msg | Invalid_argument e -> let msg = Printf.sprintf "%sInvalid_argumen(%s)" s_source e in failwith msg and xml_of_string s = xml_of_source s (`String (0, s)) let xml_of_file file = let ic = open_in file in try let xml = xml_of_source (Printf.sprintf "File %S, " file) (`Channel ic) in close_in ic; xml with e -> close_in ic; raise e let dot_of_t t = let b = Buffer.create 256 in let p b = Printf.bprintf b in p b "digraph g {\nrankdir=TB;\nordering=out;\n"; Array.iter (fun node -> p b "\"N%d\" [ label=\"%d: %s[%d]\", fontcolor=black ];\n" node.number node.number (short_label node.xml) node.rank; Array.iter (fun i -> p b "\"N%d\" -> \"N%d\";\n" node.number i) node.children ; ) t.nodes; p b "}\n"; Buffer.contents b let dot_of_xmlnode t = let b = Buffer.create 256 in let p b = Printf.bprintf b in p b "digraph g {\nrankdir=TB;\nordering=out;\n"; let string_of_id = function None -> "_" | Some n -> string_of_int n in let cpt = ref 0 in let rec iter parent rank (id, xml) = incr cpt; let n = !cpt in p b "\"N%d\" [ label=\"%s: %s[%d]\", fontcolor=black ];\n" n (string_of_id id) (short_label xml) rank; ( match xml with `D _ -> () | `E (_,_,subs) -> List.iteri (fun i child -> iter (Some n) i child ; ) subs ); match parent with None -> () | Some parent -> p b "\"N%d\" -> \"N%d\";\n" parent n in iter None 0 t ; p b "}\n"; Buffer.contents b let dot_of_matches t1 t2 = let b = Buffer.create 256 in let p b = Printf.bprintf b in p b "digraph g {\nrankdir=TB;\nordering=out;\n"; p b "subgraph cluster_2 {\n"; Array.iter (fun node -> p b "\"T%d\" [ label=\"%d: %s[%d]\", fontcolor=black ];\n" node.number node.number (short_label node.xml) node.rank; Array.iter (fun i -> p b "\"T%d\" -> \"T%d\";\n" node.number i) node.children ; ) t2.nodes; p b "}\n"; p b "subgraph cluster_1 {\n"; Array.iter (fun node -> p b "\"S%d\" [ label=\"%d: %s[%d]\", fontcolor=black ];\n" node.number node.number (short_label node.xml) node.rank; Array.iter (fun i -> p b "\"S%d\" -> \"S%d\";\n" node.number i) node.children ; match node.matched with None -> () | Some j -> p b "S%d -> T%d [style=\"dashed\"];\n" node.number j ) t1.nodes; p b "}\n"; p b "}\n"; Buffer.contents b let string_of_action = function | Replace (n2, i) -> Printf.sprintf "Replace (%d, %d): %s" n2.number i (string_of_xml ~cut:true n2.xml) | Move (i, parent, new_parent, rank) -> Printf.sprintf "Move(%d,%d,%d,%d)" i parent new_parent rank | MoveRank (i, new_parent, rank) -> Printf.sprintf "MoveRank(%d,%d,%d)" i new_parent rank | Insert (n2, i, rank) -> Printf.sprintf "Insert (%d, %d, %d): %s" n2.number i rank (string_of_xml ~cut:true n2.xml) | Delete n1 -> Printf.sprintf "Delete(%d): %s" n1.number (string_of_xml ~cut: true n1.xml) | Edit (n1, n2) -> Printf.sprintf "Edit(%d,%d): %s -> %s" n1.number n2.number (string_of_xml ~cut: true n1.xml) (string_of_xml ~cut: true n2.xml) let have_matching_parents nodes1 n1 n2 = match n1.parent, n2.parent with | None, None -> true | None, _ | _, None -> false | Some p1, Some p2 -> nodes1.(p1).matched = Some p2 let matching_parent nodes n = match n.parent with None -> None | Some p -> nodes.(p).matched let add_edit_action acc n1 n2 = match n1.xml, n2.xml with `E _, `D _ | `D _, `E _ -> assert false | `D s1, `D s2 -> Edit (n1, n2) :: acc | `E (tag1, atts1, _), `E (tag2, atts2, _) -> match n1.is_cut, n2.is_cut with true, _ | _, true -> Replace (n2, n1.number) :: acc | false, false -> begin if tag1 = tag2 && Nmap.equal (=) atts1 atts2 then acc else Edit (n1, n2) :: acc end (* FIXME: remove rank accumulator *) let make_actions t1 t2 = let nodes1 = t1.nodes in let nodes2 = t2.nodes in let rec f (acc, rank) i = let n1 = nodes1.(i) in match n1.matched with None -> let (acc, _) = Array.fold_left f (acc, 0) n1.children in ((Delete n1) :: acc, rank + 1) | Some j -> let n2 = nodes2.(j) in let matching_parents = have_matching_parents nodes1 n1 n2 in let (deleted, acc) = if matching_parents then if n1.rank = n2.rank then (false, acc) else ( let new_parent = match n1.parent with None -> assert false | Some i -> i in (false, MoveRank(n1.number, new_parent, n2.rank) :: acc) ) else ( match matching_parent nodes2 n2 with None -> dbg (Printf.sprintf "make_actions: missing matching parent, j=%d, parent=%d" j (match nodes2.(j).parent with None -> -1 | Some n -> n) ); (*assert false*) (true, (Delete n1 :: acc)) | Some new_parent -> let parent = match n2.parent with None -> assert false | Some i -> i in (false, (Move(n1.number, parent, new_parent, n2.rank)) :: acc) ) in let acc = if deleted then acc else if n1.hash = n2.hash then acc else ( let acc = add_edit_action acc n1 n2 in let (acc, _) = Array.fold_left f (acc, 0) n1.children in acc ) in (acc, rank + 1) in let (actions, _) = f ([], 0) (Array.length nodes1 - 1) in (* note: we should not have a node not matched in t2 with matched descendants *) let rec g acc j = let n2 = nodes2.(j) in match n2.matched with None -> dbg (Printf.sprintf "Insert %d which is not matched "n2.number); let new_parent = match matching_parent nodes2 n2 with None -> dbg (Printf.sprintf "no matching parent for t2.(%d)" n2.number); assert false | Some i -> i in (Insert (n2, new_parent, n2.rank)) :: acc | Some _ -> Array.fold_left g acc n2.children in let actions = g actions (Array.length nodes2 - 1) in actions let sort_actions = let pred a1 a2 = match a1, a2 with | Delete i1, Delete i2 -> i2.number - i1.number (* higher node first *) | Delete _, _ -> 1 | _, Delete _ -> -1 | Edit _, Edit _ -> 0 | Edit _, _ -> -1 | _, Edit _ -> 1 | Replace _, Replace _ -> 0 | Replace _, _ -> -1 | _, Replace _ -> 1 | MoveRank(_,_,rank1), Move(_,_,_,rank2) | Move(_,_,_,rank1), MoveRank(_,_,rank2) | Move (_,_,_,rank1), Move (_,_,_,rank2) | MoveRank(_,_,rank1), MoveRank(_,_,rank2) | MoveRank(_,_,rank1), Insert(_,_,rank2) | Insert(_,_,rank1), MoveRank(_,_,rank2) | Move (_,_,_,rank1), Insert (_,_,rank2) | Insert (_,_,rank1), Move (_,_,_,rank2) | Insert (_,_,rank1), Insert (_,_,rank2) -> rank1 - rank2 in List.sort pred let build_hash_map = let add node map = let l = try Smap.find node.hash map with Not_found -> [] in Smap.add node.hash (node.number :: l) map in fun t -> Array.fold_right add t.nodes Smap.empty let rec get_nth_parent t i level = match t.nodes.(i).parent with None -> None | Some p -> if level <= 1 then Some p else get_nth_parent t p (level-1) let d_of_node t i = 1. +. (float t.height) *. t.nodes.(i).weight /. t.w0 let rec match_nodes ?(fail=true) ?(with_subs=false) t1 t2 i j = dbg (Printf.sprintf "matching %d -> %d [with_subs=%B]" i j with_subs); let node1 = t1.nodes.(i) in match node1.matched with | Some j2 when j <> j2 -> dbg (Printf.sprintf "t1.(%d) already matched to t2.(%d)" i j2); if fail then assert false | _ -> let node2 = t2.nodes.(j) in match node2.matched with Some i2 when i <> i2 -> dbg (Printf.sprintf "t2.(%d) already matched to t1.(%d)" j i2); if fail then assert false | _ -> node1.matched <- Some j; node2.matched <- Some i; if with_subs then begin let ch_i = node1.children in let ch_j = node2.children in for x = 0 to Array.length ch_i - 1 do match_nodes ~with_subs: true t1 t2 ch_i.(x) ch_j.(x) done end let match_ancestors t1 t2 i j = let max_level = int_of_float (d_of_node t2 j) in dbg (Printf.sprintf "match_ancestors: i=%d, j=%d, max_level=%d" i j max_level); let rec iter i j level = if level > max_level then () else match t1.nodes.(i).parent, t2.nodes.(j).parent with Some p1, Some p2 when t1.nodes.(p1).label = t2.nodes.(p2).label -> match_nodes ~fail: false t1 t2 p1 p2; iter p1 p2 (level + 1) | _ -> () in iter i j 1 let min_list p v l = let rec iter acc = function [] -> acc | h :: q -> if p h < acc then iter h q else iter acc q in iter v l let best_candidate t1 t2 j cands = dbg ("best_candidates "^(String.concat ", " (List.map string_of_int cands))); let d = d_of_node t2 j in let map_parent acc = function (i, None) -> acc | (i, Some p) -> (i, t1.nodes.(p).parent) :: acc in let rec find level parent_j acc = function [] -> iter (level + 1) acc | (i, None) :: q -> find level parent_j acc q | (i, Some p) :: q -> match t1.nodes.(p).matched with Some j when j = parent_j -> Some i | _ -> find level parent_j ((i, Some p)::acc) q and iter level cands = if float level < d then begin let cands = List.rev (List.fold_left map_parent [] cands) in match get_nth_parent t2 j level with None -> None | Some parent_j -> find level parent_j [] cands end else None in iter 1 (List.map (fun i -> (i, Some i)) cands) let candidates hash_t1 t2 j = try Smap.find t2.nodes.(j).hash hash_t1 with Not_found -> [] let match_candidate hash_t1 t1 t2 j = let candidates = candidates hash_t1 t2 j in let pred i = t1.nodes.(i).matched = None in match List.filter pred candidates with [] -> None | [i] -> Some i | l -> best_candidate t1 t2 j l let (+=) map (k, v) = let x = try Intmap.find k map with Not_found -> 0. in Intmap.add k (v +. x) map let match_uniquely_labeled = let map_of_t nodes t f map = Array.fold_left (fun map i -> let node = nodes.(i) in match node.matched with Some _ -> map | None -> let label = node.label in let x = try Lmap.find label map with Not_found -> ([], []) in let x = f i x in Lmap.add label x map ) map t in fun t1 t2 li lj -> let map = map_of_t t1.nodes li (fun i (l1,l2) -> (i :: l1, l2)) Lmap.empty in let map = map_of_t t2.nodes lj (fun j (l1,l2) -> (l1, j :: l2)) map in Lmap.iter (fun _ -> function | [ i ], [ j ] -> match_nodes t1 t2 i j | _ -> ()) map let match_uniquely_labeled_children = let do_match t1 t2 j = let nj = t2.nodes.(j) in match nj.matched with None -> () | Some i -> let children_i = t1.nodes.(i).children in let children_j = nj.children in match_uniquely_labeled t1 t2 children_i children_j in fun t1 t2 -> for j = Array.length t2.nodes -1 downto 0 do do_match t1 t2 j done let run_phase4 t1 t2 = let f j node = match node.matched with Some _ -> () | None -> let parents = Array.fold_left (fun acc jc -> match t2.nodes.(jc).matched with None -> acc | Some i -> dbg (Printf.sprintf "%d has a child %d matched to %d" j jc i); match t1.nodes.(i).parent with | Some p when t1.nodes.(p).matched = None -> dbg (Printf.sprintf "%d has a non-matched parent %d" i p); acc += (p, t1.nodes.(i).weight) | Some p -> dbg (Printf.sprintf "[j=%d] i=%d has a parent %d already matched" j i p); acc | None -> acc ) Intmap.empty node.children in let (parent, _) = Intmap.fold (fun p w ((acc_parent, acc_w) as acc) -> if w > acc_w then (p, w) else acc) parents (-1, -1.0) in if parent >= 0 then match_nodes t1 t2 parent j in Array.iteri f t2.nodes; match_uniquely_labeled_children t1 t2 let order_by_weight n1 n2 = match Stdlib.compare n2.weight n1.weight with 0 -> Stdlib.compare n1.rank n2.rank | n -> n let compute t1 t2 = let weight_queue = Queue.create () in let root2 = Array.length t2.nodes - 1 in let hash_t1 = build_hash_map t1 in (* make roots match orelse we'll have problems *) match_nodes t1 t2 (Array.length t1.nodes - 1) root2 ; let queue_nodes children = let t = Array.map (Array.get t2.nodes) children in Array.sort order_by_weight t; Array.iteri (fun _ n -> (*prerr_endline (Printf.sprintf "queuing %d" n.number);*) Queue.add n.number weight_queue) t in queue_nodes t2.nodes.(root2).children ; while not (Queue.is_empty weight_queue) do let j = Queue.pop weight_queue in (*prerr_endline (Printf.sprintf "trying to match %d" j);*) match t2.nodes.(j).matched with Some _ -> () | None -> match match_candidate hash_t1 t1 t2 j with Some i -> match_nodes ~with_subs: true t1 t2 i j; match_ancestors t1 t2 i j | None -> queue_nodes t2.nodes.(j).children done; run_phase4 t1 t2 ; on_dbg (fun () -> file_of_string ~file:"/tmp/matches.dot" (dot_of_matches t1 t2)) (); sort_actions (make_actions t1 t2) type cur_path = N of Xmlm.name | CData module Cur_path = Map.Make (struct type t = cur_path let compare = Stdlib.compare end) let cur_path_get cp map = try Cur_path.find cp map with Not_found -> 0 let cur_path_inc cp map = let n = cur_path_get cp map in Cur_path.add cp (n+1) map let patch_path_of_cur_path_list = let iter (cp, n) acc = match acc, cp with (None, CData) -> Some (Path_cdata n) | (Some _, CData) -> assert false | (_, N name) -> Some (Path_node (name, n, acc)) in fun l -> match List.fold_right iter l None with None -> assert false | Some p -> p let rec string_of_path = function Path_cdata n -> "CData("^(string_of_int n)^")" | Path_node (name, n, next) -> let s = (string_of_name name)^"("^(string_of_int n)^")" in match next with None -> s | Some p -> s^"/"^(string_of_path p) let path_of_id = let cp_of_xml = function `D s -> CData | `E (name,_,_) -> N name in let rec forward to_move xmlnode path cur_path ~skip n = function | (Some i, _) :: q when i = skip -> forward to_move xmlnode path cur_path ~skip n q | [] -> let b = Buffer.create 256 in Buffer.add_string b (string_of_path (patch_path_of_cur_path_list (List.rev path))); Buffer.add_char b '{'; Cur_path.iter (fun k n -> Printf.bprintf b "%s->%d," (match k with N name -> string_of_name name | CData -> "cdata") n) cur_path ; Buffer.add_char b '}'; on_dbg (fun () -> file_of_string ~file: "/tmp/xmldiff_state.dot" (dot_of_xmlnode xmlnode)) (); failwith (Printf.sprintf "Invalid rank: %d element missing\npath: %s" (n+1) (Buffer.contents b)) | (Some id, xml) :: q when Intset.mem id to_move -> let cur_path = let cp = cp_of_xml xml in cur_path_inc cp cur_path in forward to_move xmlnode path cur_path ~skip n q | (_, xml) :: _ when n = 0 -> let cp = cp_of_xml xml in (cp, cur_path_get cp cur_path) :: path | (id, xml) :: q -> let cur_path = let cp = cp_of_xml xml in cur_path_inc cp cur_path in (*dbg ("forward: id="^(match id with None -> "None" | Some n -> string_of_int n));*) forward to_move xmlnode path cur_path ~skip (n-1) q in let rec iter to_move xmlnode ~rank ~skip i path cur_path = function | (Some j, xml) when i = j -> begin let cp = cp_of_xml xml in let path = (cp, cur_path_get cp cur_path) :: path in let (path, pos) = match rank with None -> (path, `After) | Some 0 -> (path, `FirstChild) | Some n -> match xml with `D _ -> assert false | `E (_,_,subs) -> (forward to_move xmlnode path Cur_path.empty ~skip (n-1) subs, `After) in (patch_path_of_cur_path_list (List.rev path), pos) end (* | (Some j, _) when j < i -> raise Not_found*) | (_, `D _) -> raise Not_found | (_, `E (name, atts, subs)) -> (* let's go down after adding cur_path to path *) let cpt = cur_path_get (N name) cur_path in let path = (N name, cpt) :: path in iter_list to_move xmlnode ~rank ~skip i path Cur_path.empty subs and iter_list to_move xmlnode ~rank ~skip i path cur_path = function [] -> raise Not_found | h :: q -> try iter to_move xmlnode ~rank ~skip i path cur_path h with Not_found -> let cur_path = let cp = cp_of_xml (snd h) in cur_path_inc cp cur_path in iter_list to_move xmlnode ~rank ~skip i path cur_path q in (* the skip parameter is used to not take into account the node we are moving under its parent. *) fun to_move xmlnode ?rank ?(skip=(-1)) i -> try iter to_move xmlnode ~rank ~skip i [] Cur_path.empty xmlnode with Not_found -> let msg = "Id "^(string_of_int i)^" not found" in failwith msg let rec xmlnode_of_xmltree = function `D s -> (None, `D s) | `E (name,atts,subs) -> (None, `E (name,atts, List.map xmlnode_of_xmltree subs)) let string_of_position = function `FirstChild -> "FirstChild" | `After -> "After" let string_of_patch_operation (path, op) = match op with | PReplace xmltree -> "REPLACE("^(string_of_path path)^", "^(string_of_xml ~cut:true xmltree)^")" | PInsert (xmltree, pos) -> "INSERT("^(string_of_path path)^", "^(string_of_xml ~cut: true xmltree)^", "^(string_of_position pos)^")" | PDelete -> "DELETE("^(string_of_path path)^")" | PUpdateCData s -> Printf.sprintf "UPDATE_CDATA(%s, %S)" (string_of_path path) s | PUpdateNode (name, atts) -> Printf.sprintf "UPDATE_NODE(%s, %S, _)" (string_of_path path) (string_of_name name) | PMove (newpath, pos) -> Printf.sprintf "MOVE(%s, %s, %s)" (string_of_path path) (string_of_path newpath) (string_of_position pos) let string_of_patch l = String.concat "\n" (List.map string_of_patch_operation l) let remove_xmlnode t path = let rec iter xmls path = match xmls, path with ((x, `D _) as xml):: q, Path_cdata 0 -> (xml, q) | (x, `D s) :: q, Path_cdata n -> let (removed, xmls) = iter q (Path_cdata (n-1)) in (removed, (x, `D s) :: xmls) | ((x, `E (name,atts,subs) as xml) :: q, Path_node (name2, n, next)) when name = name2 -> if n = 0 then (match next with None -> (xml, q) | Some p -> let (removed, xmls) = iter subs p in (removed, [x, `E (name, atts, xmls)] @ q) ) else ( let (removed, xmls) = iter q (Path_node (name2, n-1, next)) in (removed, xml :: xmls) ) | xml :: q, p -> let (removed, xmls) = iter q p in (removed, xml :: xmls) | [], _ -> assert false in match iter [t] path with removed, [t] -> (removed, t) | _ -> assert false let insert_xmlnode t node path pos = let rec iter xmls path = match xmls, path with ((x, `D _) as xml):: q, Path_cdata 0 -> begin match pos with `FirstChild -> assert false | `After -> xml :: node :: q end | (x, `D s) :: q, Path_cdata n -> (x, `D s) :: iter q (Path_cdata (n-1)) | ((x, `E (name,atts,subs) as xml) :: q, Path_node (name2, n, next)) when name = name2 -> if n = 0 then (match next with None -> begin match pos with `FirstChild -> (x, `E(name,atts,node::subs)) :: q | `After -> xml :: node :: q end | Some p -> [x, `E (name, atts, iter subs p)] @ q ) else xml :: iter q (Path_node (name2, n-1, next)) | xml :: q, p -> xml :: iter q p | [], _ -> assert false in match iter [t] path with [t] -> t | _ -> assert false let patch_xmlnode t path op = try match op with PMove (newpath, pos) -> dbg (string_of_patch_operation (path, op)); let removed, t = remove_xmlnode t path in dbg ("node removed"); insert_xmlnode t removed newpath pos | _ -> let apply xml op = match xml, op with | _, PReplace tree -> [xmlnode_of_xmltree tree] | _, PInsert (tree, `After) -> [ xml ; xmlnode_of_xmltree tree ] | (_, `D _), PInsert (_, `FirstChild) -> assert false | (x, `E (tag,atts,subs)), PInsert (tree, `FirstChild) -> [ (x, `E (tag, atts, xmlnode_of_xmltree tree :: subs)) ] | _, PDelete -> [] | (x, _), PUpdateCData s -> [(x, `D s)] | (x, `D _), PUpdateNode (name, atts) -> [x, `E (name,atts,[])] | (x, `E (_,_,subs)), PUpdateNode (name, atts) -> [x, `E (name,atts,subs)] | (_,_), PMove (_, _) -> assert false in let rec iter xmls path = match xmls, path with ((x, `D _) as xml):: q, Path_cdata 0 -> (apply xml op) @ q | (x, `D s) :: q, Path_cdata n -> (x, `D s) :: iter q (Path_cdata (n-1)) | ((x, `E (name,atts,subs) as xml) :: q, Path_node (name2, n, next)) when name = name2 -> if n = 0 then (match next with None -> (apply xml op) @ q | Some p -> [x, `E (name, atts, iter subs p)] @ q ) else xml :: iter q (Path_node (name2, n-1, next)) | xml :: q, p -> xml :: iter q p | [], _ -> assert false in match iter [t] path with [t] -> t | _ -> assert false with Failure msg -> failwith ("Error: "^msg^" when applying path:\n"^ (string_of_patch_operation (path, op))) let patch_of_action (t1, to_move, patch) action = dbg ("patch_of_action: "^string_of_action action); dbg ("to_move = "^(String.concat ", " (List.map string_of_int (Intset.elements to_move)))); match action with | Replace (n2, i) -> let xmltree2 = n2.xml in let (path, _) = path_of_id to_move t1 i in let op = PReplace xmltree2 in let t1 = patch_xmlnode t1 path op in (t1, to_move, (path, op) :: patch) | Move (i, parent, new_parent, rank) -> let (path, _) = path_of_id to_move t1 i in let (new_path, pos) = path_of_id to_move t1 ~rank ~skip: i new_parent in let op = PMove (new_path, pos) in let t1 = patch_xmlnode t1 path op in let to_move = Intset.remove i to_move in (t1, to_move, (path, op) :: patch) | MoveRank (i, parent, rank) -> let (path, _) = path_of_id to_move t1 i in let (new_path, pos) = path_of_id to_move t1 ~rank ~skip: i parent in let op = PMove (new_path, pos) in let t1_patched = patch_xmlnode t1 path op in let to_move = Intset.remove i to_move in if t1_patched = t1 then (t1, to_move, patch) else (t1_patched, to_move, (path, op) :: patch) | Insert (n2, i, rank) -> let xmltree2 = n2.xml in let (path, pos) = path_of_id to_move t1 ~rank i in let op = PInsert (xmltree2, pos) in let t1 = patch_xmlnode t1 path op in (t1, to_move, (path, op) :: patch) | Delete i -> begin match path_of_id to_move t1 i.number with | exception _ -> (* the node may have already been deleted when its parent was, see sort_actions: we delete first higher nodes, to prevent deleting all children then parent *) let to_move = Intset.remove i.number to_move in (t1, to_move, patch) | (path,_) -> let op = PDelete in let t1 = patch_xmlnode t1 path op in let to_move = Intset.remove i.number to_move in (t1, to_move, (path, op) :: patch) end | Edit (n1, n2) -> let (path,_) = path_of_id to_move t1 n1.number in let op = match n1.xml, n2.xml with _ , `D s2 -> PUpdateCData s2 | `E (_,_,_), `E (name,atts,_) -> PUpdateNode (name, atts) | `D _, `E (name,atts,subs) -> PUpdateNode (name, atts) in let t1 = patch_xmlnode t1 path op in (t1, to_move, (path, op) :: patch) let rec xmltree_of_xmlnode = function (_, `D s) -> `D s | (_, `E (tag,atts,subs)) -> `E (tag, atts, List.map xmltree_of_xmlnode subs) let nodes_to_move = let add acc = function Move (i,_,_,_) | MoveRank (i,_,_) -> Intset.add i acc | Delete i -> Intset.add i.number acc | _ -> acc in List.fold_left add Intset.empty let patch_of_actions t1 t2 actions = let to_move = nodes_to_move actions in let nodes1 = xmlnode_of_t t1.nodes in on_dbg (fun () -> file_of_string ~file: "/tmp/before_patch.dot" (dot_of_xmlnode nodes1)) (); let (nodes1, to_move, l) = List.fold_left patch_of_action (nodes1, to_move, []) actions in assert (Intset.is_empty to_move); on_dbg (fun () -> file_of_string ~file: "/tmp/patch_result.dot" (dot_of_xmlnode nodes1)) (); let t1 = xmltree_of_xmlnode nodes1 in on_dbg (fun () -> let t2 = xmltree_of_xmlnode (xmlnode_of_t t2.nodes) in file_of_string ~file: "/tmp/xml1.xml" (string_of_xml t1) ; file_of_string ~file: "/tmp/xml2.xml" (string_of_xml t2) ; ) (); (List.rev l, t1) let diff_with_final_tree ?cut xml1 xml2 = let t1 = t_of_xml ?cut xml1 in let t2 = t_of_xml ?cut xml2 in on_dbg (fun () -> file_of_string ~file: "/tmp/t1.dot" (dot_of_t t1); file_of_string ~file: "/tmp/t2.dot" (dot_of_t t2); ) (); let actions = compute t1 t2 in dbg ("actions=\n "^(String.concat "\n " (List.map string_of_action actions))); patch_of_actions t1 t2 actions let diff ?cut xml1 xml2 = fst (diff_with_final_tree ?cut xml1 xml2)