Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Page
Library
Module
Module type
Parameter
Class
Class type
Source
xml.ml1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343(*********************************************************************************) (* OCaml-Stk *) (* *) (* Copyright (C) 2023-2024 INRIA All rights reserved. *) (* Author: Maxence Guesdon, INRIA Saclay *) (* *) (* This program is free software; you can redistribute it and/or modify *) (* it under the terms of the GNU General Public License as *) (* published by the Free Software Foundation, version 3 of the License. *) (* *) (* This program is distributed in the hope that it will be useful, *) (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) (* GNU General Public License for more details. *) (* *) (* You should have received a copy of the GNU General Public *) (* License along with this program; if not, write to the Free Software *) (* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA *) (* 02111-1307 USA *) (* *) (* As a special exception, you have permission to link this program *) (* with the OCaml compiler and distribute executables, as long as you *) (* follow the requirements of the GNU GPL in regard to all of the *) (* software in the executable aside from the OCaml compiler. *) (* *) (* Contact: Maxence.Guesdon@inria.fr *) (* *) (*********************************************************************************) (** XML documents *) module Log = (val (Log.create_src "xml")) open Log let empty_iri = Iri.of_string "" module QName = struct type t = Iri.t * string let compare (iri1, s1) (iri2, s2) = match String.compare s1 s2 with 0 -> Iri.compare iri1 iri2 | n -> n let to_string (ns, ln) = Printf.sprintf "[%s]%s" (Iri.to_string ns) ln let pp ppf x = Format.pp_print_string ppf (to_string x) end let qname_equal ?(no_ns=false) (ns1,n1) (ns2,n2) = match String.compare n1 n2 with | 0 -> no_ns || Iri.equal ns1 ns2 | _ -> false module Attributes = Map.Make(QName) module P = struct module Attributes = Attributes type attr_value = string Types.with_loc_option type data = unit let compare_name = QName.compare let compare_attr_value (s1,_) (s2,_) = String.compare s1 s2 let compare_data _ _ = 0 let default_data () = () let version_name = let iri = Iri.of_string "" in fun () -> (iri, "version") let default_version = Xtmpl.Xml.P.default_version let default_attr_value () = "", None let pp_name ppf (iri, str) = Format.fprintf ppf "%a%s" Iri.pp iri str let pp_attr_value = Xtmpl.Xml.P.pp_attr_value let pp_attributes = None end include (Xtmpl.Types.Make(P)) module TXml = struct type t = doc let compare = compare_doc let wrapper = None let transition = None end module PXml = Stk.Props.Add_prop_type(TXml) let mk_prop_xml = PXml.mk_prop ~default:(doc_empty()) let opt_att node ?(try_no_ns=true) x = match Attributes.find_opt x node.atts with | None when try_no_ns -> Attributes.find_opt (empty_iri, snd x) node.atts | x -> x let get_att node ?try_no_ns iri def = match opt_att node ?try_no_ns iri with | None -> def | Some v -> v let map_att node ?try_no_ns iri f def = match opt_att node ?try_no_ns iri with | None -> def | Some (x,_) -> try f x with _ -> def let map_opt_att node ?try_no_ns iri f = match opt_att node ?try_no_ns iri with | None -> None | Some (x,_) -> try Some (f x) with _ -> None let int_att node ?try_no_ns iri def = map_att node ?try_no_ns iri int_of_string def let opt_int_att node ?try_no_ns iri = map_opt_att node ?try_no_ns iri int_of_string let xhtml_ns_str = "http://www.w3.org/1999/xhtml" let xhtml_ns = Iri.of_string xhtml_ns_str let xhtml_ str = (xhtml_ns, str) let xhtml_id = xhtml_ "id" let xhtml_name = xhtml_ "name" let xhtml_href = xhtml_ "href" let xhtml_class = xhtml_ "class" let xhtml_a = xhtml_ "a" let node_id node = match opt_att node ~try_no_ns:true xhtml_id with | Some (a, _) -> Some a | None -> match opt_att node ~try_no_ns:true xhtml_name with | Some (a, _) -> Some a | None -> None let mime_html = Ldp.Ct.to_mime (Ldp.Types.content_type_of_string "text/html") let html_mime_types = [ Ldp.Ct.mime_xhtml ; mime_html ] let mime_is_xml m = m = mime_html || m = Ldp.Ct.mime_xhtml || (match snd m with "xml" -> true | _ -> false) || (let s = Ldp.Ct.mime_to_string m in let len = String.length s in len >= 4 && String.sub s (len - 4) 4 = "+xml") module Of_xtmpl = struct module X = Xtmpl.Xml let map_name base att ns = function | ("xmlns",_) | ("","xmlns") -> (Iri.of_string "xmlns", "") | ("",ln) -> if att then (Iri.of_string "", ln) else (base, ln) | (n,ln) -> try let iri = Stk.Smap.find n ns in (iri, ln) with Not_found -> warn (fun m -> m "Unknown namespace %S, using base %S" n (Iri.to_string base)) ; (base, ln) let set_ns base ns attr = X.Name_map.fold (fun (n,ln) (str,_) (base, ns) -> match n, ln with | ("","xmlns") -> (Iri.of_string str, ns) | ("xmlns", ln) -> (base, Stk.Smap.add ln (Iri.of_string str) ns) | _ -> (base,ns) ) attr (base, ns) let map_atts base ns (atts:X.attributes) = X.Name_map.fold (fun name (str,loc) acc -> let str = (str, loc) in Attributes.add (map_name base true ns name) str acc) atts Attributes.empty let map_proc_inst base ns (pi:X.proc_inst) = pi_ ?loc:pi.loc (map_name base false ns pi.app) pi.args let rec map_tree base ns = function | X.D c -> cdata ?loc:c.loc ~quoted:c.quoted c.text | X.C c -> comment ?loc:c.loc c.comment | X.PI p -> PI (map_proc_inst base ns p) | X.E n -> let (base, ns) = set_ns base ns n.atts in let loc = n.loc in let name = map_name base false ns n.name in let atts = map_atts base ns n.atts in let subs = map_trees base ns n.subs in node ?loc name ~atts subs and map_trees base ns (xmls:X.tree list) = List.map (map_tree base ns) xmls let map_prolog_misc base ns = function | X.PC c -> PC c | PPI pi -> PPI (map_proc_inst base ns pi) let map_prolog base ns (p:X.prolog) = let decl = match p.X.decl with | None -> None | Some d -> let atts = map_atts base ns d.atts in Some(xml_decl ?loc:d.loc atts) in let doctype = match p.X.doctype with | None -> None | Some d -> Some (doctype ?loc:d.loc (map_name base false ns d.name) d.args) in let misc = List.map (map_prolog_misc base ns) p.X.misc in prolog ?decl ?doctype misc let map_doc ?(base=xhtml_ns) (d:X.doc) = let prolog = map_prolog base Stk.Smap.empty d.prolog in let elements = map_trees base Stk.Smap.empty d.elements in doc prolog elements end let doc_from_string ?base ?param str = Of_xtmpl.map_doc ?base (Xtmpl.Xml.doc_from_string ?param str) let from_string ?(base=Iri.of_string "") ?param ?(ns=Stk.Smap.empty) str = Of_xtmpl.map_trees base ns (Xtmpl.Xml.from_string ?param str) let split_text = let rec iter s len acc start pos = if pos >= len then let acc = if start < pos then (String.sub s start (pos-start)) :: acc else acc in List.rev acc else match String.get s pos with | ' '|'\t'|'\r' as c -> let acc = if start < pos then (String.sub s start (pos-start)) :: acc else acc in let acc = String.make 1 c :: acc in iter s len acc (pos+1) (pos+1) | '\n' -> let acc = if start < pos then (String.sub s start (pos-start)) :: acc else acc in let acc = "\n" :: acc in iter s len acc (pos+1) (pos+1) | c -> if start < pos then iter s len acc start (pos+1) else iter s len acc pos (pos+1) in fun text-> let len = String.length text in iter text len [] 0 0 let normalize_xmls = let rec iter = function | E node -> let subs = List.flatten (List.map iter node.subs) in [ E { node with subs } ] | (PI _ | C _) as x -> [ x ] | D { text ; loc } -> let str = Stk.Utf8.normalize text in let chunks = split_text str in let build_cdata (pos, acc) text = let loc, next_pos = match pos, text with | None, _ -> None, None | Some p, _ -> let p2 = { p with Lexing.pos_cnum = p.Lexing.pos_cnum + String.length text } in let loc = (p, p2) in let next_pos = match text with | "\n" -> { p2 with Lexing.pos_lnum = p2.pos_lnum + 1; pos_bol = p2.pos_bol + p2.pos_cnum ; pos_cnum = 0 ; } | _ -> p2 in Some loc, Some next_pos in let acc = D { text ; loc ; quoted = false } :: acc in (next_pos, acc) in let pos = match loc with None -> None | Some (p,_) -> Some p in let (_, l) = List.fold_left build_cdata (pos, []) chunks in List.rev l in fun xmls -> List.flatten (List.map iter xmls) let xhtml_base xmls base = let find str xmls = let pred = function | E { name } -> QName.compare name (xhtml_ str) = 0 | _ -> false in List.find_opt pred xmls in match find "html" xmls with | Some (E { subs }) -> ( match find "head" subs with | Some (E { subs }) -> ( match find "base" subs with | None -> base | Some (E node) -> ( match opt_att node (Iri.of_string "", "href") with | None -> base | Some (str,loc) -> try let iri = Iri.of_string str in Iri.resolve ~base iri with e -> Log.err (fun m -> m "%a: %s" Types.pp_loc_option loc (Printexc.to_string e)); base ) | _ -> base ) | _ -> base ) | _ -> base let html_self_closing_elements = Xtmpl.Xml.SSet.of_list [ "area"; "base"; "br" ; "col" ; "embed"; "hr" ; "img"; "input"; "link" ; "meta" ; "param" ; "source" ; "track" ; "wbr" ; (* < html 5: *) "command" ; "keygen" ; "menuitem" ; ]