package camomile

  1. Overview
  2. Docs
Legend:
Library
Module
Module type
Parameter
Class
Class type

Character Information

type general_category_type = [
  1. | `Lu
    (*

    Letter, Uppercase

    *)
  2. | `Ll
    (*

    Letter, Lowercase

    *)
  3. | `Lt
    (*

    Letter, Titlecase

    *)
  4. | `Mn
    (*

    Mark, Non-Spacing

    *)
  5. | `Mc
    (*

    Mark, Spacing Combining

    *)
  6. | `Me
    (*

    Mark, Enclosing

    *)
  7. | `Nd
    (*

    Number, Decimal Digit

    *)
  8. | `Nl
    (*

    Number, Letter

    *)
  9. | `No
    (*

    Number, Other

    *)
  10. | `Zs
    (*

    Separator, Space

    *)
  11. | `Zl
    (*

    Separator, Line

    *)
  12. | `Zp
    (*

    Separator, Paragraph

    *)
  13. | `Cc
    (*

    Other, Control

    *)
  14. | `Cf
    (*

    Other, Format

    *)
  15. | `Cs
    (*

    Other, Surrogate

    *)
  16. | `Co
    (*

    Other, Private Use

    *)
  17. | `Cn
    (*

    Other, Not Assigned

    *)
  18. | `Lm
    (*

    Letter, Modifier

    *)
  19. | `Lo
    (*

    Letter, Other

    *)
  20. | `Pc
    (*

    Punctuation, Connector

    *)
  21. | `Pd
    (*

    Punctuation, Dash

    *)
  22. | `Ps
    (*

    Punctuation, Open

    *)
  23. | `Pe
    (*

    Punctuation, Close

    *)
  24. | `Pi
    (*

    Punctuation, Initial quote

    *)
  25. | `Pf
    (*

    Punctuation, Final quote

    *)
  26. | `Po
    (*

    Punctuation, Other

    *)
  27. | `Sm
    (*

    Symbol, Math

    *)
  28. | `Sc
    (*

    Symbol, Currency

    *)
  29. | `Sk
    (*

    Symbol, Modifier

    *)
  30. | `So
    (*

    Symbol, Other

    *)
]

Type of Unicode general character categories. Each variant specifies

  • `Lu : Letter, Uppercase
  • `Ll : Letter, Lowercase
  • `Lt : Letter, Titlecase
  • `Mn : Mark, Non-Spacing
  • `Mc : Mark, Spacing Combining
  • `Me : Mark, Enclosing
  • `Nd : Number, Decimal Digit
  • `Nl : Number, Letter
  • `No : Number, Other
  • `Zs : Separator, Space
  • `Zl : Separator, Line
  • `Zp : Separator, Paragraph
  • `Cc : Other, Control
  • `Cf : Other, Format
  • `Cs : Other, Surrogate
  • `Co : Other, Private Use
  • `Cn : Other, Not Assigned
  • `Lm : Letter, Modifier
  • `Lo : Letter, Other
  • `Pc : Punctuation, Connector
  • `Pd : Punctuation, Dash
  • `Ps : Punctuation, Open
  • `Pe : Punctuation, Close
  • `Pi : Punctuation, Initial
  • `Pf : Punctuation, Final
  • `Po : Punctuation, Other
  • `Sm : Symbol, Math
  • `Sc : Symbol, Currency
  • `Sk : Symbol, Modifier
  • `So : Symbol, Other
val general_category : UChar.t -> general_category_type
val load_general_category_map : unit -> general_category_type UMap.t
type character_property_type = [
  1. | `Math
    (*

    Derived Core Properties

    *)
  2. | `Alphabetic
  3. | `Lowercase
  4. | `Uppercase
  5. | `ID_Start
  6. | `ID_Continue
  7. | `XID_Start
  8. | `XID_Continue
  9. | `Default_Ignorable_Code_Point
  10. | `Grapheme_Extend
  11. | `Grapheme_Base
  12. | `Bidi_Control
    (*

    Extended Properties

    *)
  13. | `White_Space
  14. | `Hyphen
  15. | `Quotation_Mark
  16. | `Terminal_Punctuation
  17. | `Other_Math
  18. | `Hex_Digit
  19. | `Ascii_Hex_Digit
  20. | `Other_Alphabetic
  21. | `Ideographic
  22. | `Diacritic
  23. | `Extender
  24. | `Other_Lowercase
  25. | `Other_Uppercase
  26. | `Noncharacter_Code_Point
  27. | `Other_Grapheme_Extend
  28. | `IDS_Binary_Operator
  29. | `IDS_Trinary_Operator
  30. | `Radical
  31. | `Unified_Ideograph
  32. | `Other_default_Ignorable_Code_Point
  33. | `Deprecated
  34. | `Soft_Dotted
  35. | `Logical_Order_Exception
]

Type of character properties

val load_property_tbl : character_property_type -> UCharTbl.Bool.t

Load the table for the given character type.

val load_property_tbl_by_name : string -> UCharTbl.Bool.t

Load the table for the given name of the character type. The name can be obtained by removing ` from its name of the polymorphic variant tag.

val load_property_set : character_property_type -> USet.t

Load the set of characters of the given character type.

val load_property_set_by_name : string -> USet.t

Load the set of characters of the given name of the character type. The name can be obtained by removing ` from its name of the polymorphic variant tag.

type script_type = [
  1. | `Common
  2. | `Inherited
  3. | `Latin
  4. | `Greek
  5. | `Cyrillic
  6. | `Armenian
  7. | `Hebrew
  8. | `Arabic
  9. | `Syriac
  10. | `Thaana
  11. | `Devanagari
  12. | `Bengali
  13. | `Gurmukhi
  14. | `Gujarati
  15. | `Oriya
  16. | `Tamil
  17. | `Telugu
  18. | `Kannada
  19. | `Malayalam
  20. | `Sinhala
  21. | `Thai
  22. | `Lao
  23. | `Tibetan
  24. | `Myanmar
  25. | `Georgian
  26. | `Hangul
  27. | `Ethiopic
  28. | `Cherokee
  29. | `Canadian_Aboriginal
  30. | `Ogham
  31. | `Runic
  32. | `Khmer
  33. | `Mongolian
  34. | `Hiragana
  35. | `Katakana
  36. | `Bopomofo
  37. | `Han
  38. | `Yi
  39. | `Old_Italic
  40. | `Gothic
  41. | `Deseret
  42. | `Tagalog
  43. | `Hanunoo
  44. | `Buhid
  45. | `Tagbanwa
]

Type for script type

val script : UChar.t -> script_type
val load_script_map : unit -> script_type UMap.t
type version_type = [
  1. | `Nc
    (*

    undefined code point

    *)
  2. | `v1_0
  3. | `v1_1
  4. | `v2_0
  5. | `v2_1
  6. | `v3_0
  7. | `v3_1
  8. | `v3_2
]

age

val age : UChar.t -> version_type

age c unicode version in wich c was introduced

val older : version_type -> version_type -> bool

older v1 v2 is true if v1 is older ( or the same version ) than v2. Everithing is older than `Nc

casing

val load_to_lower1_tbl : unit -> UChar.t UCharTbl.t
val load_to_upper1_tbl : unit -> UChar.t UCharTbl.t
val load_to_title1_tbl : unit -> UChar.t UCharTbl.t
type casemap_condition = [
  1. | `Locale of string
  2. | `FinalSigma
  3. | `AfterSoftDotted
  4. | `MoreAbove
  5. | `BeforeDot
  6. | `Not of casemap_condition
]
type special_casing_property = {
  1. lower : UChar.t list;
  2. title : UChar.t list;
  3. upper : UChar.t list;
  4. condition : casemap_condition list;
}
val load_conditional_casing_tbl : unit -> special_casing_property list UCharTbl.t
val load_casefolding_tbl : unit -> UChar.t list UCharTbl.t
val combined_class : UChar.t -> int

Combined class A combined class is an integer of 0 -- 255, showing how this character interacts to other combined characters.

Decomposition

type decomposition_type = [
  1. | `Canon
  2. | `Font
  3. | `NoBreak
  4. | `Initial
  5. | `Medial
  6. | `Final
  7. | `Isolated
  8. | `Circle
  9. | `Super
  10. | `Sub
  11. | `Vertical
  12. | `Wide
  13. | `Narrow
  14. | `Small
  15. | `Square
  16. | `Fraction
  17. | `Compat
]

Types of decomposition.

type decomposition_info = [
  1. | `Canonform
    (*

    Already in the canonical form

    *)
  2. | `HangulSyllable
    (*

    Hangul is treated algotighmically.

    *)
  3. | `Composite of decomposition_type * UChar.t list
    (*

    `Composite (dtype, text) means the given character is decomposed into text by dtype decomposition.

    *)
]
val load_decomposition_tbl : unit -> decomposition_info UCharTbl.t

Canonical Composition

val load_composition_tbl : unit -> (UChar.t * UChar.t) list UCharTbl.t

The return value [(u_1, u'_1); ... (u_n, u'_1)] means for the given character u, u u_i forms the canonical composition u'_i. If u is a Hangul jamo, composition returns .

val load_composition_exclusion_tbl : unit -> UCharTbl.Bool.t

Whether the given composed character is used in NFC or NFKC