Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Page
Library
Module
Module type
Parameter
Class
Class type
Source
doc.ml1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211(*********************************************************************************) (* OCaml-Stk *) (* *) (* Copyright (C) 2023-2024 INRIA All rights reserved. *) (* Author: Maxence Guesdon, INRIA Saclay *) (* *) (* This program is free software; you can redistribute it and/or modify *) (* it under the terms of the GNU General Public License as *) (* published by the Free Software Foundation, version 3 of the License. *) (* *) (* This program is distributed in the hope that it will be useful, *) (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) (* GNU General Public License for more details. *) (* *) (* You should have received a copy of the GNU General Public *) (* License along with this program; if not, write to the Free Software *) (* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA *) (* 02111-1307 USA *) (* *) (* As a special exception, you have permission to link this program *) (* with the OCaml compiler and distribute executables, as long as you *) (* follow the requirements of the GNU GPL in regard to all of the *) (* software in the executable aside from the OCaml compiler. *) (* *) (* Contact: Maxence.Guesdon@inria.fr *) (* *) (*********************************************************************************) module WMap = Stk.Widget.Map type node = { xml : Xml.tree option ; widget : Stk.Widget.widget option ; subs : node list ; id : string option ; container : Stk.Widget.widget ; props : Css.C.t ; mutable display : bool ; mutable parent : node option ; } let node_widget n = n.widget let node_widget_type n = match n.widget with | None -> None | Some w -> w#typ let node_size n = Option.map (fun w -> let g = w#geometry in (g.Stk.G.w, g.h)) (node_widget n) let node_geometry n = Option.map (fun w -> w#geometry) (node_widget n) let string_of_node node = match node.xml with | Some (Xml.E { name }) -> Xml.QName.to_string name | Some (Xml.D { text }) -> text | _ -> "NONE" let get_content_text = let open Xml in let rec iter b = function | C _ | PI _ -> () | E { subs } -> List.iter (iter b) subs | D { text } -> Buffer.add_string b text in fun node -> match node.xml with | None -> "" | Some xml -> let b = Buffer.create 256 in iter b xml; Buffer.contents b let rec node_parent ?until ?(itself=false) node = match until with | None -> node.parent | Some pred -> match if itself && pred node then Some node else None with | (Some _) as x -> x | None -> match node.parent with | None -> None | Some node -> node_parent ~until:pred ~itself:true node module TI = struct module X = Xml type tree = | L of { text: string ; pos: int ; len: int; node: node} | N of { pos: int; len: int; nodes : node list ; children: tree list} type t = { fulltext : string ; tree : tree list ; } let utf8_length str = Uutf.String.fold_utf_8 (fun acc _pos _char -> acc + 1) 0 str let build_text_index = let rec iter ((b, acc_pos, acc_len, acc_nodes, acc_ch) as acc) node = match node.xml with | None -> iter_list acc node.subs | Some (X.C _| X.PI _) -> acc (*| Some (X.D _ | X.E _) when not node.display -> acc JS textContent must return content of all elements, without taking into account their css properties: https://www.w3schools.com/jsref/prop_node_textcontent.asp *) | Some (X.D { text }) -> let len = utf8_length text in Buffer.add_string b text ; let t = L { text ; pos = acc_pos ; len ; node } in (b, acc_pos + len, acc_len + len, node::acc_nodes, t::acc_ch) | Some (X.E _) -> let (b, pos, len, nodes, children) = iter_list (b, acc_pos, 0, [], []) node.subs in let t = N { pos = acc_pos ; len ; nodes = List.rev nodes ; children = List.rev children } in let nodes = nodes @ acc_nodes in (b, pos, acc_len + len, nodes, t::acc_ch) and iter_list acc nodes = List.fold_left iter acc nodes in fun nodes -> let b = Buffer.create 256 in let (_b, _pos,_len,_nodes,children) = iter_list (b, 0, 0, [], []) nodes in let fulltext = Buffer.contents b in [%debug "index fulltext: %s" fulltext] ; { fulltext ; tree = List.rev children } end type doc = { doc_nodes : node list ; doc_item_map : node WMap.t ; doc_id_map : node Stk.Smap.t ; mutable doc_filter : string option ; mutable doc_text_index : TI.t option ; } let doc_empty = { doc_nodes = [] ; doc_item_map = WMap.empty ; doc_id_map = Stk.Smap.empty ; doc_filter = None ; doc_text_index = None ; } let find_node = let rec iter_node pred node = match pred node with | true -> Some node | false -> iter pred node.subs and iter pred = function | [] -> None | node :: q -> match iter_node pred node with | None -> iter pred q | x -> x in fun doc pred -> iter pred doc.doc_nodes (* let find_node_by_id = let pred id node = match node.id with | Some i -> i = id | _ -> false in fun doc id -> find_node doc (pred id) *) let find_node_by_id doc id = Stk.Smap.find_opt id doc.doc_id_map let find_first_node_by_tag = let pred tag node = match node.xml with | Some (E { name = (_,t) }) -> t = tag | _ -> false in fun doc tag -> find_node doc (pred tag) let build_doc_text_index ?(rebuild=false) ?ct doc = match doc.doc_text_index with | Some ti when not rebuild -> () | _ -> let nodes = match ct with | Some ct when List.mem (Ldp.Ct.to_mime ct) Xml.html_mime_types -> (match find_first_node_by_tag doc "body" with | None -> Log.warn (fun m -> m "HTML-like document but no <body> found"); doc.doc_nodes | Some n -> [n] ) | _ -> doc.doc_nodes in doc.doc_text_index <- Some (TI.build_text_index nodes)