Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Page
Library
Module
Module type
Parameter
Class
Class type
Source
rosetta.ml1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118type yuscii_encoding = [`UTF_7] type encoding = [yuscii_encoding | Uuuu.encoding | Coin.encoding] let invalid_arg fmt = Format.ksprintf (fun s -> invalid_arg s) fmt let try_or f g x = try f x with _ -> g x let ( <.> ) f g x = f (g x) let cast x = (x :> encoding) let yuscii_encoding_of_string = function | "UTF-7" | "csUTF7" -> `UTF_7 | s -> invalid_arg "Invalid_character-sets: %s" s let encoding_of_string = try_or (cast <.> yuscii_encoding_of_string) @@ try_or (cast <.> Uuuu.encoding_of_string) @@ try_or (cast <.> Coin.encoding_of_string) @@ invalid_arg "Invalid_character-sets: %s" let encoding_to_string = function | #Uuuu.encoding as encoding -> Uuuu.encoding_to_string encoding | #Coin.encoding as encoding -> Coin.encoding_to_string encoding | `UTF_7 -> "UTF-7" type ('kind, 'decoder) tag = | UTF_7 : ([> yuscii_encoding], Yuscii.decoder) tag | ISO8859 : ([> Uuuu.encoding], 'kind Uuuu.decoder) tag | KOI8 : ([> Coin.encoding], 'kind Coin.decoder) tag type 'kind pack = | V : ('kind, 'decoder) tag * 'decoder -> 'kind pack type src = [`Channel of in_channel | `String of string | `Manual] type decode = [`Await | `End | `Uchar of Uchar.t | `Malformed of string] type 'kind decoder = {src: src; pack: 'kind pack} constraint 'kind = [< encoding] let src {pack= V (kind, decoder); _} source off len = match kind with | UTF_7 -> Yuscii.src decoder source off len | ISO8859 -> Uuuu.src decoder source off len | KOI8 -> Coin.src decoder source off len let decode {pack= V (kind, decoder); _} = match kind with | UTF_7 -> Yuscii.decode decoder | ISO8859 -> Uuuu.decode decoder | KOI8 -> Coin.decode decoder let decoder : ([< encoding] as 'kind) -> src -> 'kind decoder = fun kind src -> match kind with | #Uuuu.encoding as k -> {src; pack= V (ISO8859, Uuuu.decoder k src)} | #Coin.encoding as k -> {src; pack= V (KOI8, Coin.decoder k src)} | #yuscii_encoding -> {src; pack= V (UTF_7, Yuscii.decoder src)} let decoder_byte_count {pack= V (kind, decoder); _} = match kind with | UTF_7 -> Yuscii.decoder_byte_count decoder | ISO8859 -> Uuuu.decoder_byte_count decoder | KOI8 -> Coin.decoder_byte_count decoder let decoder_src {src; _} = src let decoder_kind {pack= V (kind, decoder); _} = match kind with | UTF_7 -> `UTF_7 | ISO8859 -> (Uuuu.decoder_kind decoder :> encoding) | KOI8 -> (Coin.decoder_kind decoder :> encoding) let to_utf_8_string ?(rep= Uchar.rep) ~charset ?(off= 0) ?len str = let len = match len with | Some len -> len | None -> String.length str - off in match String.lowercase_ascii charset with | "utf-8" -> Some (String.sub str off len) | _ -> let ( let* ) = Option.bind in let* encoding = try Some (encoding_of_string charset) with _ -> None in let decoder = decoder encoding (`String (String.sub str off len)) in let buf = Buffer.create 0x7ff in let rec go decoder = match decode decoder with | `Uchar uchr -> Buffer.add_utf_8_uchar buf uchr; go decoder | `End -> Some (Buffer.contents buf) | `Malformed _ -> Buffer.add_utf_8_uchar buf rep; go decoder | `Await -> assert false in go decoder module String = struct type 'a folder = 'a -> int -> [`Malformed of string | `Uchar of Uchar.t] -> 'a let fold kind ?off ?len folder acc str = let off, len = match (off, len) with | Some off, Some len -> (off, len) | None, Some len -> (0, len) | Some off, None -> (off, String.length str - off) | None, None -> (0, String.length str) in let acc = ref acc in let decoder = decoder kind (`String (String.sub str off len)) in let rec go decoder = match decode decoder with | (`Uchar _ | `Malformed _) as res -> acc := folder !acc (decoder_byte_count decoder) res ; go decoder | `End -> !acc | `Await -> assert false in go decoder end