Initial comit - Clone

This commit is contained in:
2024-03-05 22:01:20 +01:00
commit 385cf8e5aa
727 changed files with 164567 additions and 0 deletions

36
lib/util/buff.ml Normal file
View File

@@ -0,0 +1,36 @@
(* $Id: buff.ml,v 5.1 2006-10-15 15:39:39 ddr Exp $ *)
module Make () = struct
let buff = ref (Bytes.create 80)
let store len x =
if len >= Bytes.length !buff then
buff := Bytes.extend !buff 0 (Bytes.length !buff);
Bytes.set !buff len x;
succ len
(* gstore and mstore axillary function *)
let unsafe_gstore len s si slen =
let newlen = len + slen in
(if newlen > Bytes.length !buff then
let more = max slen (Bytes.length !buff) in
buff := Bytes.extend !buff 0 more);
Bytes.blit_string s si !buff len slen;
newlen
let mstore len s = unsafe_gstore len s 0 (String.length s)
let gstore len s si slen =
unsafe_gstore len s si (min slen (String.length s - si))
let get len = Bytes.sub_string !buff 0 len
end
(* Global buffer *)
module BB = Make ()
let buff = BB.buff
let store = BB.store
let mstore = BB.mstore
let gstore = BB.gstore
let get = BB.get

42
lib/util/buff.mli Normal file
View File

@@ -0,0 +1,42 @@
(** Functor building a local implementation of the buffer. *)
module Make () : sig
val buff : bytes ref
(** Internal representation of the buffer *)
val store : int -> char -> int
(** [store i c] stores a character [c] at the position [i] inside the buffer.
Automatically extends buffer if needed. Returns the position that follows
inserted character ([i+1]) in buffer. Should be used either with position
0 or with position returned by previous calls of store functions. *)
val mstore : int -> string -> int
(** [mstore i s] stores a string [s] starting from the postion [i] inside the
buffer. Automatically extends buffer if needed. Returns the position that
follows inserted string in buffer. Should be used either with position 0
or with position returned by previous calls of store functions.*)
val gstore : int -> string -> int -> int -> int
(** [gstore i s si len] stores substring of [s] from [si] position with length
[len] inside the buffer starting from the postion [i]. Automatically extends
buffer if needed. Returns the position that follows inserted substring in
buffer. Should be used either with position 0 or with position returned
by previous calls of store functions.*)
val get : int -> string
(** [get len] returns buffer's content until position [len] *)
end
val buff : bytes ref
(** Variable [buff] for the global buffer *)
val get : int -> string
(** Function [get] for the global buffer. *)
val store : int -> char -> int
(** Function [store] for the global buffer. *)
val mstore : int -> string -> int
(** Function [mstore] for the global buffer. *)
val gstore : int -> string -> int -> int -> int
(** Function [gstore] for the global buffer. *)

66
lib/util/calendar.ml Normal file
View File

@@ -0,0 +1,66 @@
(* TODO this is probably buggy,
because geneweb uses month|day = 0 for incomplete dates *)
(** Convert [Adef.date] to Calendars.d *)
let to_calendars : Def.dmy -> Calendars.d =
fun { Def.day; month; year; delta; _ } -> { Calendars.day; month; year; delta }
(** Convert Calendars.d to [Adef.date] *)
let of_calendars : ?prec:Def.precision -> Calendars.d -> Def.dmy =
fun ?(prec = Def.Sure) { Calendars.day; month; year; delta } ->
{ Def.day; month; year; delta; prec }
let sdn_of_gregorian (d : Def.dmy) =
Calendars.sdn_of_gregorian @@ to_calendars d
let gregorian_of_sdn prec sdn =
of_calendars ~prec @@ Calendars.gregorian_of_sdn sdn
let sdn_of_julian (d : Def.dmy) = Calendars.sdn_of_julian @@ to_calendars d
let julian_of_sdn prec sdn = of_calendars ~prec @@ Calendars.julian_of_sdn sdn
let sdn_of_french (d : Def.dmy) = Calendars.sdn_of_french @@ to_calendars d
let french_of_sdn prec sdn = of_calendars ~prec @@ Calendars.french_of_sdn sdn
let sdn_of_hebrew (d : Def.dmy) = Calendars.sdn_of_hebrew @@ to_calendars d
let hebrew_of_sdn prec sdn = of_calendars ~prec @@ Calendars.hebrew_of_sdn sdn
let dmy_of_dmy2 : Def.dmy2 -> Def.dmy =
fun { Def.day2; month2; year2; delta2 } ->
{
Def.day = day2;
month = month2;
year = year2;
prec = Def.Sure;
delta = delta2;
}
let aux fn (d : Def.dmy) : Def.dmy =
let aux2 d2 =
let d = of_calendars @@ fn @@ to_calendars @@ dmy_of_dmy2 d2 in
{
Def.day2 = d.Def.day;
month2 = d.Def.month;
year2 = d.Def.year;
delta2 = d.Def.delta;
}
in
let prec =
match d.Def.prec with
| Def.OrYear d2 -> Def.OrYear (aux2 d2)
| Def.YearInt d2 -> Def.YearInt (aux2 d2)
| prec -> prec
in
of_calendars ~prec @@ fn @@ to_calendars d
let gregorian_of_julian = aux Calendars.gregorian_of_julian
let julian_of_gregorian = aux Calendars.julian_of_gregorian
let gregorian_of_french = aux Calendars.gregorian_of_french
let french_of_gregorian = aux Calendars.french_of_gregorian
let gregorian_of_hebrew = aux Calendars.gregorian_of_hebrew
let hebrew_of_gregorian = aux Calendars.hebrew_of_gregorian
type moon_phase = Calendars.moon_phase =
| NewMoon
| FirstQuarter
| FullMoon
| LastQuarter
let moon_phase_of_sdn = Calendars.moon_phase_of_sdn

51
lib/util/calendar.mli Normal file
View File

@@ -0,0 +1,51 @@
val gregorian_of_sdn : Def.precision -> int -> Def.dmy
(** Returns date of gregorian calendar from SDN and specified precision. *)
val julian_of_sdn : Def.precision -> int -> Def.dmy
(** Returns date of julian calendar from SDN and specified precision. *)
val french_of_sdn : Def.precision -> int -> Def.dmy
(** Returns date of french calendar from SDN and specified precision. *)
val hebrew_of_sdn : Def.precision -> int -> Def.dmy
(** Returns date of hebrew calendar from SDN and specified precision. *)
val sdn_of_gregorian : Def.dmy -> int
(** Returns SDN of the date of gregorian calendar. *)
val sdn_of_julian : Def.dmy -> int
(** Returns SDN of the date of julian calendar. *)
val sdn_of_french : Def.dmy -> int
(** Returns SDN of the date of french calendar. *)
val sdn_of_hebrew : Def.dmy -> int
(** Returns SDN of the date of hebrew calendar. *)
val gregorian_of_julian : Def.dmy -> Def.dmy
(** Converts julian calendar's date to gregorian. *)
val julian_of_gregorian : Def.dmy -> Def.dmy
(** Converts gregorian calendar's date to julian date. *)
val gregorian_of_french : Def.dmy -> Def.dmy
(** Converts french calendar's date to gregorian date. *)
val french_of_gregorian : Def.dmy -> Def.dmy
(** Converts gregorian calendar's date to french date. *)
val gregorian_of_hebrew : Def.dmy -> Def.dmy
(** Converts hebrew calendar's date to gregorian date. *)
val hebrew_of_gregorian : Def.dmy -> Def.dmy
(** Converts gregorian calendar's date to hebrew date. *)
(** Moon phases *)
type moon_phase = NewMoon | FirstQuarter | FullMoon | LastQuarter
val moon_phase_of_sdn : int -> (moon_phase * int * int) option * int
(** Returns information about moon phase from the given SDN.
Result [(Some (mph,h,m), day)] describes moon phase [mph], hour [h] and minute
[m] when this phase appears and days [day] since last New Moon phase (moon's age).
If result is [(None,_)], it tells that there wasn't any moon's phase (one
of the mentionned in [moon_phase]) this day. *)

222
lib/util/date.ml Normal file
View File

@@ -0,0 +1,222 @@
(* Copyright (c) 1998-2007 INRIA *)
open Def
type cdate = Adef.cdate =
| Cgregorian of int
| Cjulian of int
| Cfrench of int
| Chebrew of int
| Ctext of string
| Cdate of date
| Cnone
(* compress concrete date if it's possible *)
let compress d =
let simple =
match d.prec with
| Sure | About | Maybe | Before | After ->
d.day >= 0 && d.month >= 0 && d.year > 0 && d.year < 2500 && d.delta = 0
| OrYear _ | YearInt _ -> false
in
if simple then
let p =
match d.prec with
| About -> 1
| Maybe -> 2
| Before -> 3
| After -> 4
| Sure | OrYear _ | YearInt _ -> 0
in
Some ((((((p * 32) + d.day) * 13) + d.month) * 2500) + d.year)
else None
(* uncompress concrete date *)
let uncompress x =
let year, x = (x mod 2500, x / 2500) in
let month, x = (x mod 13, x / 13) in
let day, x = (x mod 32, x / 32) in
let prec =
match x with
| 1 -> About
| 2 -> Maybe
| 3 -> Before
| 4 -> After
| _ -> Sure
in
{ day; month; year; prec; delta = 0 }
let date_of_cdate = function
| Cgregorian i -> Dgreg (uncompress i, Dgregorian)
| Cjulian i -> Dgreg (uncompress i, Djulian)
| Cfrench i -> Dgreg (uncompress i, Dfrench)
| Chebrew i -> Dgreg (uncompress i, Dhebrew)
| Cdate d -> d
| Ctext t -> Dtext t
| Cnone -> failwith "date_of_cdate"
let cdate_of_date d =
match d with
| Dgreg (g, cal) -> (
match compress g with
| Some i -> (
match cal with
| Dgregorian -> Cgregorian i
| Djulian -> Cjulian i
| Dfrench -> Cfrench i
| Dhebrew -> Chebrew i)
| None -> Cdate d)
| Dtext t -> Ctext t
let cdate_of_od = function Some d -> cdate_of_date d | None -> Cnone
let od_of_cdate od =
match od with Cnone -> None | _ -> Some (date_of_cdate od)
let cdate_None = cdate_of_od None
let dmy_of_dmy2 dmy2 =
{
day = dmy2.day2;
month = dmy2.month2;
year = dmy2.year2;
prec = Sure;
delta = dmy2.delta2;
}
let leap_year a = if a mod 100 = 0 then a / 100 mod 4 = 0 else a mod 4 = 0
let nb_days_in_month m a =
if m = 2 && leap_year a then 29
else if m >= 1 && m <= 12 then
[| 31; 28; 31; 30; 31; 30; 31; 31; 30; 31; 30; 31 |].(m - 1)
else 0
(* TODO use SDN instead *)
let time_elapsed d1 d2 =
let prec =
match (d1.prec, d2.prec) with
| Sure, Sure -> Sure
| (Maybe | Sure | About), (Maybe | Sure | About) -> Maybe
| (About | Maybe | Sure | Before), (After | Sure | Maybe | About) -> After
| (About | Maybe | Sure | After), (Before | Sure | Maybe | About) -> Before
| _ -> Maybe
in
match d1 with
| { day = 0; month = 0; year = a1 } ->
{ day = 0; month = 0; year = d2.year - a1; prec; delta = 0 }
| { day = 0; month = m1; year = a1 } -> (
match d2 with
| { day = 0; month = 0; year = a2 } ->
{ day = 0; month = 0; year = a2 - a1; prec; delta = 0 }
| { day = 0; month = m2; year = a2 } ->
let month, r = if m1 <= m2 then (m2 - m1, 0) else (m2 - m1 + 12, 1) in
let year = a2 - a1 - r in
{ day = 0; month; year; prec; delta = 0 }
| { month = m2; year = a2 } ->
let month, r = if m1 <= m2 then (m2 - m1, 0) else (m2 - m1 + 12, 1) in
let year = a2 - a1 - r in
{ day = 0; month; year; prec; delta = 0 })
| { day = j1; month = m1; year = a1 } -> (
match d2 with
| { day = 0; month = 0; year = a2 } ->
{ day = 0; month = 0; year = a2 - a1; prec; delta = 0 }
| { day = 0; month = m2; year = a2 } ->
let month, r = if m1 <= m2 then (m2 - m1, 0) else (m2 - m1 + 12, 1) in
let year = a2 - a1 - r in
{ day = 0; month; year; prec; delta = 0 }
| { day = j2; month = m2; year = a2 } ->
let day, r =
if j1 <= j2 then (j2 - j1, 0)
else (j2 - j1 + nb_days_in_month m1 a1, 1)
in
let month, r =
if m1 + r <= m2 then (m2 - m1 - r, 0) else (m2 - m1 - r + 12, 1)
in
let year = a2 - a1 - r in
{ day; month; year; prec; delta = 0 })
let time_elapsed_opt d1 d2 =
match (d1.prec, d2.prec) with
| After, After | Before, Before -> None
| _ -> Some (time_elapsed d1 d2)
(* TODO use SDN to compare date (?) *)
(* use strict = false to compare date as if they are points on a timeline.
use strict = true to compare date by taking precision in account. This makes some dates not comparable, do not use to sort a list *)
let rec compare_dmy_opt ?(strict = false) dmy1 dmy2 =
match compare dmy1.year dmy2.year with
| 0 -> compare_month_or_day ~is_day:false strict dmy1 dmy2
| x -> eval_strict strict dmy1 dmy2 x
and compare_month_or_day ~is_day strict dmy1 dmy2 =
(* compare a known month|day with a unknown one (0) *)
let compare_with_unknown_value ~strict ~unkonwn ~known =
match unkonwn.prec with
| After -> Some 1
| Before -> Some (-1)
| _other -> if strict then None else compare_prec false unkonwn known
in
(* if we are comparing months the next comparison to do is on days
else if we are comparing days it is compare_prec *)
let x, y, next_comparison =
if is_day then (dmy1.day, dmy2.day, compare_prec)
else (dmy1.month, dmy2.month, compare_month_or_day ~is_day:true)
in
(* 0 means month|day is unknow*)
match (x, y) with
| 0, 0 -> compare_prec strict dmy1 dmy2
| 0, _ -> compare_with_unknown_value ~strict ~unkonwn:dmy1 ~known:dmy2
| _, 0 ->
(* swap dmy1 and dmy2 *)
Option.map Int.neg
@@ compare_with_unknown_value ~strict ~unkonwn:dmy2 ~known:dmy1
| m1, m2 -> (
match Int.compare m1 m2 with
| 0 -> next_comparison strict dmy1 dmy2
| x -> eval_strict strict dmy1 dmy2 x)
and compare_prec strict dmy1 dmy2 =
match (dmy1.prec, dmy2.prec) with
| (Sure | About | Maybe), (Sure | About | Maybe) -> Some 0
| After, After | Before, Before -> Some 0
| OrYear dmy1, OrYear dmy2 | YearInt dmy1, YearInt dmy2 ->
compare_dmy_opt ~strict (dmy_of_dmy2 dmy1) (dmy_of_dmy2 dmy2)
| _, After | Before, _ -> Some (-1)
| After, _ | _, Before -> Some 1
| _ -> Some 0
and eval_strict strict dmy1 dmy2 x =
if strict then
match x with
| -1 when dmy1.prec = After || dmy2.prec = Before -> None
| 1 when dmy1.prec = Before || dmy2.prec = After -> None
| x -> Some x
else Some x
exception Not_comparable
let compare_dmy ?(strict = false) dmy1 dmy2 =
match compare_dmy_opt ~strict dmy1 dmy2 with
| None -> raise Not_comparable
| Some x -> x
let compare_date ?(strict = false) d1 d2 =
match (d1, d2) with
| Dgreg (dmy1, _), Dgreg (dmy2, _) -> compare_dmy ~strict dmy1 dmy2
| Dgreg (_, _), Dtext _ -> if strict then raise Not_comparable else 1
| Dtext _, Dgreg (_, _) -> if strict then raise Not_comparable else -1
| Dtext _, Dtext _ -> if strict then raise Not_comparable else 0
let cdate_to_dmy_opt cdate =
match od_of_cdate cdate with
| Some (Dgreg (d, _)) -> Some d
| Some (Dtext _) | None -> None
let cdate_of_death = function
| Death (_, cd) -> Some cd
| NotDead | DeadYoung | DeadDontKnowWhen | DontKnowIfDead | OfCourseDead ->
None
let dmy_of_death death = Option.bind (cdate_of_death death) cdate_to_dmy_opt
let date_of_death death = Option.bind (cdate_of_death death) od_of_cdate

88
lib/util/date.mli Normal file
View File

@@ -0,0 +1,88 @@
(* Copyright (c) 1998-2007 INRIA *)
open Def
val leap_year : int -> bool
(** Says if the given year is a leap year. *)
val nb_days_in_month : int -> int -> int
(** Returns number of days for the given month and year for
gregorian calendar. Takes into account leap years. *)
val time_elapsed : Def.dmy -> Def.dmy -> Def.dmy
(** [time_elapsed start stop]
Compute the time elapsed between [start] and [stop].
If [stop] is prior to [start], resulting [dmy]'s field
are negative (but correct).
Resulting [prec] can be:
- [Sure] for exact duration
- [Before] for "less than" duration
- [After] for "more than" duration
- [Maybe] for other cases
Used to compare only gregorian calendar's dates.
*)
val time_elapsed_opt : Def.dmy -> Def.dmy -> Def.dmy option
(** Same as [time_elapsed], but will return [None]
if computation is not possible
(e.g. time_elapsed_opt /1839 /1859). *)
(* TODO add date_of_burial/event? *)
val dmy_of_death : Def.death -> Adef.dmy option
val date_of_death : Def.death -> Adef.date option
(** Returns date of death if present. *)
val dmy_of_dmy2 : dmy2 -> dmy
(** [dmy_of_dmy2 dmy2]
Convert a [dmy2] to [dmy] using [Sure] as precision. *)
exception Not_comparable
(** [Not_comparable] is raised by [compare_dmy] and [compare_date] when
[strict] mode is used and precision of dates are incompatibles to
have a reliable result (e.g. is [compare_dmy 2019 07/2019]) or when
one of the date in [compare_date] is [Dtext]. *)
val compare_dmy : ?strict:bool -> dmy -> dmy -> int
(** [compare_dmy ?strict d1 d2]
Return a negative integer if [d1] is prior to [d2],
[0] if [d1] is equal to [d2],
and a positive integer if [d2] is prior to [d1].
[strict] parameter enable or disable strict mode, and
is false by default (see [Not_comparable])
*)
val compare_dmy_opt : ?strict:bool -> dmy -> dmy -> int option
(** [compare_dmy_opt ?strict d1 d2]
Same as [compare_dmy], but do not raise an exception
*)
val compare_date : ?strict:bool -> date -> date -> int
(** [compare_date d1 d2]
If both [d1] and [d2] are [Dgreg] date, uses [compare_dmy]
to compare them.
[Dtext] dates are always considered prior to any [Dgreg] date,
and equal to any other [Dtext] date.
[strict] parameter enable or disable strict mode, and
is false by default (see [Not_comparable])
*)
val cdate_None : cdate
(** Absent compressed date *)
val date_of_cdate : cdate -> date
(** Convert [cdate] to [date]; fail if [cdate] is [Cnone] *)
val od_of_cdate : cdate -> date option
(** Optional date from [cdate] *)
val cdate_to_dmy_opt : cdate -> dmy option
(** [cdate_to_dmy_opt d] is [Some dmy] iff [d] resolve to [Dgreg (dmy,_)] *)
val cdate_of_date : date -> cdate
(** Convert [date] to [cdate] *)
val cdate_of_od : date option -> cdate
(** Optional date to [cdate] *)
(* TODO date_to_dmy? *)

17
lib/util/dune.in Normal file
View File

@@ -0,0 +1,17 @@
(library
(name geneweb_util)
(public_name geneweb.util)
(wrapped false)
(preprocess (action (run %{bin:cppo} %%%CPPO_D%%% -V OCAML:%{ocaml_version} %{input-file})))
(libraries
calendars
stdlib-shims
unidecode
geneweb.def
re
unix
uucp
uunf
uutf
)
)

295
lib/util/futil.ml Normal file
View File

@@ -0,0 +1,295 @@
(* Copyright (c) 2006-2007 INRIA *)
open Adef
open Def
external identity : 'a -> 'a = "%identity"
let map_cdate fd d =
match Date.od_of_cdate d with
| Some d -> Date.cdate_of_date (fd d)
| None -> d
let map_title_strings ?(fd = identity) f t =
let t_name =
match t.t_name with
| Tmain -> Tmain
| Tname s -> Tname (f s)
| Tnone -> Tnone
in
let t_ident = f t.t_ident in
let t_place = f t.t_place in
{
t_name;
t_ident;
t_place;
t_date_start = map_cdate fd t.t_date_start;
t_date_end = map_cdate fd t.t_date_end;
t_nth = t.t_nth;
}
let map_pers_event ?(fd = identity) fp fs e =
let epers_name =
match e.epers_name with
| ( Epers_Birth | Epers_Baptism | Epers_Death | Epers_Burial
| Epers_Cremation | Epers_Accomplishment | Epers_Acquisition
| Epers_Adhesion | Epers_BaptismLDS | Epers_BarMitzvah | Epers_BatMitzvah
| Epers_Benediction | Epers_ChangeName | Epers_Circumcision
| Epers_Confirmation | Epers_ConfirmationLDS | Epers_Decoration
| Epers_DemobilisationMilitaire | Epers_Diploma | Epers_Distinction
| Epers_Dotation | Epers_DotationLDS | Epers_Education | Epers_Election
| Epers_Emigration | Epers_Excommunication | Epers_FamilyLinkLDS
| Epers_FirstCommunion | Epers_Funeral | Epers_Graduate
| Epers_Hospitalisation | Epers_Illness | Epers_Immigration
| Epers_ListePassenger | Epers_MilitaryDistinction
| Epers_MilitaryPromotion | Epers_MilitaryService
| Epers_MobilisationMilitaire | Epers_Naturalisation | Epers_Occupation
| Epers_Ordination | Epers_Property | Epers_Recensement | Epers_Residence
| Epers_Retired | Epers_ScellentChildLDS | Epers_ScellentParentLDS
| Epers_ScellentSpouseLDS | Epers_VenteBien | Epers_Will ) as evt ->
evt
| Epers_Name s -> Epers_Name (fs s)
in
let epers_date = map_cdate fd e.epers_date in
let epers_place = fs e.epers_place in
let epers_reason = fs e.epers_reason in
let epers_note = fs e.epers_note in
let epers_src = fs e.epers_src in
let epers_witnesses = Array.map (fun (p, w) -> (fp p, w)) e.epers_witnesses in
{
epers_name;
epers_date;
epers_place;
epers_reason;
epers_note;
epers_src;
epers_witnesses;
}
let map_fam_event ?(fd = identity) fp fs e =
let efam_name =
match e.efam_name with
| ( Efam_Marriage | Efam_NoMarriage | Efam_NoMention | Efam_Engage
| Efam_Divorce | Efam_Separated | Efam_Annulation | Efam_MarriageBann
| Efam_MarriageContract | Efam_MarriageLicense | Efam_PACS
| Efam_Residence ) as evt ->
evt
| Efam_Name s -> Efam_Name (fs s)
in
let efam_date = map_cdate fd e.efam_date in
let efam_place = fs e.efam_place in
let efam_reason = fs e.efam_reason in
let efam_note = fs e.efam_note in
let efam_src = fs e.efam_src in
let efam_witnesses = Array.map (fun (p, w) -> (fp p, w)) e.efam_witnesses in
{
efam_name;
efam_date;
efam_place;
efam_reason;
efam_note;
efam_src;
efam_witnesses;
}
let map_relation_ps fp fs r =
{
r_type = r.r_type;
r_fath = (match r.r_fath with Some x -> Some (fp x) | None -> None);
r_moth = (match r.r_moth with Some x -> Some (fp x) | None -> None);
r_sources = fs r.r_sources;
}
let map_death fd = function
| (NotDead | DeadYoung | DeadDontKnowWhen | DontKnowIfDead | OfCourseDead) as
x ->
x
| Death (r, d) -> Death (r, map_cdate fd d)
let map_burial fd = function
| UnknownBurial -> UnknownBurial
| Buried d -> Buried (map_cdate fd d)
| Cremated d -> Cremated (map_cdate fd d)
let map_person_ps ?(fd = identity) fp fs p =
{
first_name = fs p.first_name;
surname = fs p.surname;
occ = p.occ;
image = fs p.image;
first_names_aliases = List.map fs p.first_names_aliases;
surnames_aliases = List.map fs p.surnames_aliases;
public_name = fs p.public_name;
qualifiers = List.map fs p.qualifiers;
titles = List.map (map_title_strings ~fd fs) p.titles;
rparents = List.map (map_relation_ps fp fs) p.rparents;
related = List.map fp p.related;
aliases = List.map fs p.aliases;
occupation = fs p.occupation;
sex = p.sex;
access = p.access;
birth = map_cdate fd p.birth;
birth_place = fs p.birth_place;
birth_note = fs p.birth_note;
birth_src = fs p.birth_src;
baptism = map_cdate fd p.baptism;
baptism_place = fs p.baptism_place;
baptism_note = fs p.baptism_note;
baptism_src = fs p.baptism_src;
death = map_death fd p.death;
death_place = fs p.death_place;
death_note = fs p.death_note;
death_src = fs p.death_src;
burial = map_burial fd p.burial;
burial_place = fs p.burial_place;
burial_note = fs p.burial_note;
burial_src = fs p.burial_src;
pevents = List.map (map_pers_event ~fd fp fs) p.pevents;
notes = fs p.notes;
psources = fs p.psources;
key_index = p.key_index;
}
let map_ascend_f ff a =
match a.parents with
| Some f -> { parents = Some (ff f); consang = a.consang }
| None -> { parents = None; consang = a.consang }
let map_union_f ff u = { family = Array.map ff u.family }
let map_divorce fd = function
| (NotDivorced | Separated) as x -> x
| Divorced d -> Divorced (map_cdate fd d)
let map_family_ps ?(fd = identity) fp ff fs fam =
{
marriage = map_cdate fd fam.marriage;
marriage_place = fs fam.marriage_place;
marriage_note = fs fam.marriage_note;
marriage_src = fs fam.marriage_src;
witnesses = Array.map fp fam.witnesses;
relation = fam.relation;
divorce = map_divorce fd fam.divorce;
fevents = List.map (map_fam_event ~fd fp fs) fam.fevents;
comment = fs fam.comment;
origin_file = fs fam.origin_file;
fsources = fs fam.fsources;
fam_index = ff fam.fam_index;
}
let parent multi parent =
if not multi then Adef.parent parent else Adef.multi_parent parent
let map_couple_p multi_parents fp cpl =
parent multi_parents (Array.map fp (parent_array cpl))
let map_descend_p fp des = { children = Array.map fp des.children }
let gen_person_misc_names sou empty_string quest_string first_name surname
public_name qualifiers aliases first_names_aliases surnames_aliases titles
husbands father_titles_places =
if first_name = quest_string || surname = quest_string then []
else
let s_first_name = Mutil.nominative @@ sou first_name in
let s_surname = Mutil.nominative @@ sou surname in
let s_titles_names =
List.fold_left
(fun acc t ->
match t.t_name with Tmain | Tnone -> acc | Tname x -> sou x :: acc)
[] titles
in
let s_public_names =
if public_name = empty_string then s_titles_names
else sou public_name :: s_titles_names
in
let s_first_names =
s_first_name
:: (* List.rev_append *)
List.rev_map sou first_names_aliases (* public_names *)
in
let s_surnames =
s_surname
:: Mutil.list_rev_map_append sou surnames_aliases
(Mutil.list_rev_map_append sou qualifiers
@@ Mutil.surnames_pieces s_surname)
in
let s_surnames =
Array.fold_left
(fun s_list (husband_surname, husband_surnames_aliases) ->
if husband_surname = quest_string then
Mutil.list_rev_map_append sou husband_surnames_aliases s_list
else
let s_husband_surname = Mutil.nominative @@ sou husband_surname in
s_husband_surname
:: Mutil.list_rev_map_append sou husband_surnames_aliases
(List.rev_append
(Mutil.surnames_pieces s_husband_surname)
s_list))
s_surnames husbands
in
(* (public names) *)
let s_list = s_public_names in
(* + (first names) x (surnames) *)
let s_list =
List.fold_left
(fun list f ->
List.fold_left (fun list s -> Name.concat f s :: list) list s_surnames)
s_list s_first_names
in
(* + (first names + (title | (public name)) ) x (titles places) *)
let s_list =
(* let first_names = first_name :: first_names_aliases in *)
List.fold_left
(fun list t ->
let s = t.t_place in
if s = empty_string then list
else
let s = sou s in
let s_first_names =
match t.t_name with
| Tname f -> sou f :: s_first_names
| Tmain | Tnone ->
if public_name = empty_string then s_first_names
else sou public_name :: s_first_names
in
List.fold_left
(fun list f -> Name.concat f s :: list)
list s_first_names)
s_list titles
in
(* + (first names) x (father's title places) *)
let list =
if father_titles_places = [] then s_list
else
List.fold_left
(fun list t ->
let s = t.t_place in
if s = empty_string then list
else
let s = sou s in
List.fold_left
(fun list f -> Name.concat f s :: list)
list s_first_names)
s_list father_titles_places
in
let list = Mutil.list_rev_map_append sou aliases list in
list
let rec eq_lists eq l1 l2 =
match (l1, l2) with
| x1 :: l1, x2 :: l2 -> eq x1 x2 && eq_lists eq l1 l2
| [], [] -> true
| _ -> false
let eq_title_names eq tn1 tn2 =
match (tn1, tn2) with
| Tname i1, Tname i2 -> eq i1 i2
| Tmain, Tmain | Tnone, Tnone -> true
| _ -> false
let eq_titles eq t1 t2 =
eq_title_names eq t1.t_name t2.t_name
&& eq t1.t_ident t2.t_ident && eq t1.t_place t2.t_place
&& t1.t_date_start = t2.t_date_start
&& t1.t_date_end = t2.t_date_end
&& t1.t_nth = t2.t_nth

124
lib/util/futil.mli Normal file
View File

@@ -0,0 +1,124 @@
(* Copyright (c) 2006-2007 INRIA *)
open Def
val map_title_strings :
?fd:(Def.date -> Def.date) -> ('a -> 'b) -> 'a gen_title -> 'b gen_title
(** Convert generic type used to represent name, id and the place of [Def.gen_title] into
another one. If [fd] is present, apply it on the date of the start and date of the end of a title *)
val map_pers_event :
?fd:(Def.date -> Def.date) ->
('a -> 'c) ->
('b -> 'd) ->
('a, 'b) gen_pers_event ->
('c, 'd) gen_pers_event
(** Convert:
- Generic type used to represent witnesses of [Def.gen_pers_event] into another one.
- Generic type used to represent name, place, reason, note and source of [Def.gen_pers_event]
into another one.
If [fd] is present, apply it on date of the personal event. *)
val map_fam_event :
?fd:(Def.date -> Def.date) ->
('a -> 'c) ->
('b -> 'd) ->
('a, 'b) gen_fam_event ->
('c, 'd) gen_fam_event
(** Convert:
- Generic type used to represent witnesses of [Def.gen_fam_event] into another one.
- Generic type used to represent name, place, reason, note and source of [Def.gen_fam_event]
into another one.
If [fd] is present, apply it on date of the familial event. *)
val map_relation_ps :
('a -> 'c) -> ('b -> 'd) -> ('a, 'b) gen_relation -> ('c, 'd) gen_relation
(** Convert:
- Generic type used to represent father and mother inside [Def.gen_relation] into another one.
- Generic type used to represent sources of [Def.gen_relation] into another one. *)
val map_person_ps :
?fd:(Def.date -> Def.date) ->
('b -> 'd) ->
('c -> 'e) ->
('a, 'b, 'c) gen_person ->
('a, 'd, 'e) gen_person
(** Convert:
- Generic type used to represent related persons (parents, witnesses of a personal event, etc.)
of [Def.gen_person] into another one.
- Generic type used to represent another large part of information of [Def.gen_person]
into another one.
If [fd] is present, apply it on every date (birth, death, titles,, personal events, etc.).
Generic type that is used to represent indexation key isn't converted. *)
val map_ascend_f : ('a -> 'b) -> 'a gen_ascend -> 'b gen_ascend
(** Convert generic type used to represent family inside [Def.gen_ascend] into
another one. *)
val map_union_f : ('a -> 'b) -> 'a gen_union -> 'b gen_union
(** Convert generic type used to represent one of the famillies inside [Def.gen_union] into
another one. *)
val map_family_ps :
?fd:(Def.date -> Def.date) ->
('a -> 'b) ->
('c -> 'd) ->
('e -> 'f) ->
('a, 'c, 'e) gen_family ->
('b, 'd, 'f) gen_family
(** Convert:
- Generic type used to represent faimily indexation key into another one.
- Generic type used to represent witnesses (of the marriage or of a famillial events, etc.)
of [Def.gen_family] into another one.
- Generic type used to represent another large part of information of [Def.gen_family]
into another one.
If [fd] is present, apply it on it on every date (marriage, divorce, famillial events, etc.).*)
val map_couple_p : bool -> ('a -> 'b) -> 'a gen_couple -> 'b gen_couple
(** Convert generic type used to represent father and mother inside [Def.gen_couple] into
another one. If first argument is true then use multi-parent functionality. *)
val parent : bool -> 'a array -> 'a gen_couple
(** @deprecated Use [Adef.parent] instead. *)
val map_descend_p : ('a -> 'b) -> 'a gen_descend -> 'b gen_descend
(** Convert generic type used to represent children inside [Def.gen_descend] into
another one.*)
val eq_lists : ('a -> 'b -> bool) -> 'a list -> 'b list -> bool
(** Says if two lists with different element's type are equal with given comparison
function. *)
val eq_titles : ('a -> 'b -> bool) -> 'a gen_title -> 'b gen_title -> bool
(** Says if two titles with different types are equal with given comparison
function. *)
val eq_title_names :
('a -> 'b -> bool) -> 'a gen_title_name -> 'b gen_title_name -> bool
(** Says if two title names with different types are equal with given comparison
function. *)
val gen_person_misc_names :
('a -> string) ->
'a ->
'a ->
'a ->
'a ->
'a ->
'a list ->
'a list ->
'a list ->
'a list ->
'a Def.gen_title list ->
('a * 'a list) array ->
'a Def.gen_title list ->
string list
(** Return a list of string corresponding to various
mix between all kind of names.
It can contain duplicates. Strings are used raw (not lowered).
*)

43
lib/util/lock.ml Normal file
View File

@@ -0,0 +1,43 @@
(* Copyright (c) 1998-2007 INRIA *)
let no_lock_flag = ref false
let print_error_and_exit () =
Printf.printf "\nSorry. Impossible to lock base.\n";
flush stdout;
exit 2
let print_try_again () =
Printf.eprintf "Base locked. Try again.\n";
flush stdout
let control ~onerror lname wait f =
if !no_lock_flag then f ()
else
try
let fd = Unix.openfile lname [ Unix.O_RDWR; Unix.O_CREAT ] 0o666 in
(try Unix.chmod lname 0o666 with _ -> ());
try
if Sys.unix then
if wait then Unix.lockf fd Unix.F_LOCK 0
else Unix.lockf fd Unix.F_TLOCK 0;
let r = f () in
Unix.close fd;
r
with e ->
Unix.close fd;
raise e
with
| Unix.Unix_error _ -> onerror ()
| e -> raise e
let control_retry ~onerror lname f =
control lname false
~onerror:(fun () ->
Printf.eprintf "Base is locked. Waiting... ";
flush stderr;
control lname true ~onerror (fun () ->
Printf.eprintf "Ok\n";
flush stderr;
f ()))
f

17
lib/util/lock.mli Normal file
View File

@@ -0,0 +1,17 @@
val no_lock_flag : bool ref
(** Flag that indicates if the lock should be used. *)
val print_error_and_exit : unit -> unit
(** Print lock error message and terminate program. *)
val print_try_again : unit -> unit
(** Print message about locked database. *)
val control : onerror:(unit -> 'a) -> string -> bool -> (unit -> 'a) -> 'a
(** [control ~onerror lname wait f] opens file [lname], puts a write lock on it and then calls [f].
If [wait] is true then if it tries to access locked file it will be blocked until these lock is removed. Otherwise
it will fail, and function [onerror] will be called. If flag [no_lock_flag] is set then returns [f ()] immediatly. *)
val control_retry : onerror:(unit -> 'a) -> string -> (unit -> 'a) -> 'a
(** Tries to call [control] without blocking. If it fail (lock is put) then call again [control]
and waits untill lock is removed. If it fails with another reason calls [onerror]. *)

1244
lib/util/mutil.ml Normal file

File diff suppressed because it is too large Load Diff

446
lib/util/mutil.mli Normal file
View File

@@ -0,0 +1,446 @@
(* Copyright (c) 2006-2007 INRIA *)
val verbose : bool ref
(** Global variable that indicates either
servers should be in verbose mode. *)
val list_iter_first : (bool -> 'a -> unit) -> 'a list -> unit
(** [list_iter_first f l] iter over first element with [f true] and over others with [f false]. *)
val strip_all_trailing_spaces : string -> string
(** Remove all trailing spaces in string *)
(* [decline dform dformat] encode name that could be declined (like in the czech language)
and its declination form in more comprehensible for computer format.
Declination form [dform] is one of the follows:
- 'n' for nominative
- 'a' for accusative
- 'g' for genitif
Declination format [dformat] describes how does a name changes throughout different
declination forms comparing to the nominative form.
See {{: https://geneweb.tuxfamily.org/wiki/declension }Declination in Geneweb} for more details.
Example 1: [decline 'a' "Vladana:a:Vladanu:g:Vladany"] returns encoding "@(@(a)@(a?Vladanu:g?Vladany:Vladana))."
Example 2: [decline 'a' "Vladana:a:-u:g:-y"] returns encoding "@(@(a)Vladan@(a?u:g?y:a))"
@deprecated *)
val decline : char -> string -> string
val nominative : string -> string
(** Encodes name for nominative declination format.
@deprecated *)
val mkdir_p : ?perm:int -> string -> unit
(** [mkdir_p ?perm dir]
Create the directory [dir].
No error if existing, make parent directories as needed.
*)
val remove_dir : string -> unit
(** Remove every file in the directory and then remove the directory itself *)
val lock_file : string -> string
(** Returns the name of a lock file (with extension .lck). Result is generally used as an
argument for [Lock.control] function. *)
val initial : string -> int
(** Returns position of first capital letter in the name (0 if no capitals). *)
val input_particles : string -> string list
(** [input_particles fname] read file and returns list of lines.
Empty lines are skipped. *)
val surnames_pieces : string -> string list
(** Divide surnames on pieces. Every separated word that contains at least 4 character
forms one piece. Words that contains less than 4 characters or words "saint" and "sainte"
are considered as the particles and are attached to the another word to form a piece.
If string contains less than two pieces, returns an empty list. *)
val utf_8_of_iso_8859_1 : string -> string
(** Convert encoded string with ISO 8859-1 to UTF 8 *)
val iso_8859_1_of_utf_8 : string -> string
(** Convert encoded string with UTF 8 to ISO 8859-1 *)
val roman_of_arabian : int -> string
(** Convert arabic number (int) to roman (string). Number should be < 4000. *)
val arabian_of_roman : string -> int
(** Convert roman number (string) to arabic (int). Number should be less or equal
to MMMCMXCIX (3999). *)
val fallback : (string * string) list ref
val read_fallback : string -> string -> unit
(** reads a file lexicon.gwf which defines a possible fallback language for
each of the available languages.
Most of the lines of this file are commented and can be uncommented by
the user who can place the new file in bases/etc/lang *)
val input_lexicon :
string -> (string, string) Hashtbl.t -> (unit -> in_channel) -> unit
(** [input_lexicon lang ht open_file] open {i lexicon.txt} file with [open_file ()],
parse it and fill [ht] where key is the keyword and value is
a coresponding traduction associated to a [lang] language code.
The second parameter defines a set of fallback languages if a translation is not
available in a given language.
If traduction line has a form [->: kw] it associates to the current section name the value
associated to [kw] section name inside [ht] (keyword alias). *)
module StrSet : Set.S with type elt = string
(** Set of strings *)
val tr : char -> char -> string -> string
(** [tr c1 c2 str]
Return a new string which is the same as [str] with all occurences of [c1]
replaced by [c2]. If [str] does not contain [c1] [str] is returned untouched.
*)
val unsafe_tr : char -> char -> string -> string
(** [unsafe_tr c1 c2 str]
Update [str] in place. Replace all occurences of [c1] by [c2].
*)
val array_to_list_map : ('a -> 'b) -> 'a array -> 'b list
(** [array_to_list_map fn a] is almost like [Array.to_list a |> List.map fn]
but is more efficient.
The list is constructed backward,
so if [fn] have side effects it may not behave as excepted.
*)
val array_to_list_rev_map : ('a -> 'b) -> 'a array -> 'b list
(** [array_to_list_revmap fn a] is almost like [Array.to_list a |> List.rev_map fn]
but is more efficient.
*)
val array_assoc : 'k -> ('k * 'v) array -> 'v
(** [array_assoc k arr]
returns the value associated with key [k] in the array of pairs [arr].
That is, [array_assoc k [| ... ; (k,v) ; ... |] = v]
if [(k,v)] is the leftmost binding of a in array [arr].
Raise [Not_found] if there is no value associated with [k] in [arr]. *)
val start_with : string -> int -> string -> bool
(** [start_with prefix off str]
Test if [str] starts with [prefix] (at offset [off]).
Raise [Invalid_argument] if [off] is not a valid index in [str].
*)
val start_with_wildcard : string -> int -> string -> bool
(** [start_with_wildcard prefix off str]
Test if [str] starts with [prefix] (at offset [off]).
Occurences of ['_'] in [prefix] will match both ['_']
and [' '] in [str] and trailing ['_'] of [prefix]
is treated as an optional ['_'] [' '].
Raise [Invalid_argument] if [off] is not a valid index in [str].
*)
val contains : string -> string -> bool
(** [contains str sub] Test [sub] is contained in [str].
*)
val compile_particles : string list -> Re.re
(** [compile_particles list]
Compile [list] so it can be used with [get_particle]
or [compare_after_particle] function. *)
val get_particle : Re.re -> string -> string
(** [get_particle particles name]
Return [p] where [p] is in [particles] and is prefix of [name].
If no such [p] exists, empty string [""] is returned. *)
val compare_after_particle : Re.re -> string -> string -> int
(** [compare_after_particle particles s1 s2]
compare strings [s1] [s2] starting from the first character after
particle's match. If they are equal, compare particles. *)
val rm : string -> unit
(** [rm fname]
Remove [fname]. If [fname] does not exists, do nothing.
*)
val mv : string -> string -> unit
(** [mv src dst]
Move [src] to [dst]. If [src] does not exists, do nothing.
*)
val string_of_int_sep : string -> int -> string
(** [string_of_int_sep "," 1000000] is ["1,000,000"]
*)
val list_compare : ('a -> 'a -> int) -> 'a list -> 'a list -> int
(** [list_compare cmp l1 l2]
Comparison function for lists, using [cmp] to compare each elements
*)
val list_find_map : ('a -> 'b option) -> 'a list -> 'b option
(** [list_find_map fn list]
OCaml Stdlib's [List.find_map] (introduced in 4.10.0)
backported into GeneWeb
*)
val array_find_map : ('a -> 'b option) -> 'a array -> 'b option
(** [array_find_map f a] applies [f] to the elements of [a] in order, and returns the
first result of the form [Some v], or [None] if none exist.
TODO OCaml 4.13; use Stdlib
*)
val list_rev_iter : ('a -> unit) -> 'a list -> unit
(** [list_rev_iter f l] gives the same result as [List.rev l |> List.iter fn],
but without creating intermediate list (not tail-recursive).
*)
val list_last : 'a list -> 'a
(** [list_last list]
Return the last element of the list.
Raises [Failure] if the list is empty.
*)
val list_slice : int -> int -> 'a list -> 'a list
(** [list_slice from_ to_ list]
Extracts elements from [a]-nth (starts with zero, inclusive)
to [b]-nth (exclusive). If [list] is not long enough, result
will be shorter than requested, but the function will not fail.
*)
val check_magic : string -> in_channel -> bool
(** [check_magic magic ic]
Read (and consume) the [magic] string at the beggining of [ic]
and return [true].
If [ic] does not start with [magic], reset the reading position
of [ic] to where is was before you call [check_magic] and return [false].
*)
val executable_magic : string
(** Magic string are either get from {i GW_EXECUTABLE_MAGIC} environement variable
either generated from the md5sum of the running executable.
It can be used for volatile files which can be easily corrupted
by any change in program or data representation.
*)
val random_magic : string
(** Magic string generated from 30 random bits.
It should be different each time you launch the program.
*)
val array_except : 'a -> 'a array -> 'a array
(** [array_except value array]
Return a new array containing all the elements
from [array] except the first occurence of [value]
*)
val default_particles : string list
(** List of default particles used in GeneWeb *)
val array_forall2 : ('a -> 'b -> bool) -> 'a array -> 'b array -> bool
(** [array_forall2 p a b]
Checks if all elements of the arrays satisfy the predicate [p].
That is, it returns [(p a1 b1) && (p a2 b2) && ... && (p an bn)].
Raise Invalid_argument if the two lists are determined to have different lengths.
*)
val list_replace : 'a -> 'a -> 'a list -> 'a list
(** [list_replace old_v new_v list]
Return the same list as [list] were the first occurence of [old_v]
has been replaced by [new_v]. If [old_v] is unbound, the list is
returned unchanged.
*)
val list_except : 'a -> 'a list -> 'a list
(** [list_except x list]
Return a list containing all the elements from [list]
except the first occurence of [x]. *)
val list_index : 'a -> 'a list -> int
(** [list_index element list]
Finds the index of [element] in list.
Raises [Not_found] if it does not exists.
*)
val list_ref_append : 'a list ref -> 'a -> unit
(** [list_ref_append tl hd]
Add [hd] at the beginning of [tl] ref.
*)
val input_file_ic : in_channel -> string
(** Read the content of a file.
Starts from the position where it is when calling [input_file_ic],
and read until the end of the file.
This function avoid crashes with text files on Windows platform.
If the channel is opened on a file that is not a regular file,
the result is meaningless.
*)
val normalize_utf_8 : string -> string
(** [normalize_utf_8 s]
Return [s] normalized using
{{:http://www.unicode.org/glossary/#normalization_form_c}NFC}
with all malformed UTF-8 character replaced by
{{:http://unicode.org/glossary/#replacement_character}the replacement character}
*)
val list_map_sort_uniq : ('a -> 'b) -> 'a list -> 'b list
(** [list_map_sort_uniq f l] apply [f] to every element and return
sorted with Merge Sort algorithm list where every element is unique. *)
val list_rev_map_append : ('a -> 'b) -> 'a list -> 'b list -> 'b list
(** [list_rev_map_append f l1 l2] apply [f] to every element in [l1], reverse it and
concat with [l2]. *)
val read_or_create_channel :
?magic:string ->
?wait:bool ->
string ->
(in_channel -> 'a) ->
(out_channel -> 'a) ->
'a
(** [read_or_create_channel ?magic fname read write]
If [fname] exists (and starts with [magic] if this one is provided),
[read] function is used on the file.
If it does not, or does not start with [magic], or if [read] raise an exception,
[write] function is used on the file.
This function takes care of locking and closing files so you must not take care of
that in [read]/[write].
It also takes care of writing [magic] at the beginning of the file before calling
[write]
On Windows, file is not locked.
*)
val read_or_create_value :
?magic:string -> ?wait:bool -> string -> (unit -> 'a) -> 'a
(** [read_or_create_value ?magic fname create]
If [fname] exists (and starts and ends with [magic] if this one is provided),
return the unmarshalled value.
If it does not, or does not start with [magic], or if unmarshalling raise an exception,
[create] function is used to produce the value to be marshalled.
On Windows, file is not locked.
*)
val bench : string -> (unit -> 'a) -> 'a
(** [bench name fn]
Execute [fn], print stats about time and memory allocation, return [fn] result.
*)
val print_callstack : ?max:int -> unit -> unit
(** Prints call stack on stderr with at most [max] entries. *)
val encode : string -> Adef.encoded_string
(** [encode s]
Encodes the string [s] in another string
where spaces and special characters are coded. This allows
to put such strings in html links <a href=...>. This is
the same encoding done by Web browsers in forms.
*)
val decode : Adef.encoded_string -> string
(** [decode s]
Does the inverse job than [code],
restoring the initial string. The heading and trailing spaces
are stripped.
*)
val gen_decode : bool -> Adef.encoded_string -> string
(** Like above but heading and trailing spaces are stripped
only if bool parameter is [true]. [decode] = [gen_decode true].
*)
val extract_param : string -> char -> string list -> string
(** [extract_param name stopc request] can be used to extract some
parameter from a browser [request] (list of strings); [name]
is a string which should match the beginning of a request line,
[stopc] is a character ending the request line. For example, the
string request has been obtained by: [extract_param "GET /" ' '].
Answers the empty string if the parameter is not found. *)
val sprintf_date : Unix.tm -> Adef.safe_string
(** Print a date using "%04d-%02d-%02d %02d:%02d:%02d" format
Example : 2021-12-13 22:35:08. *)
val rev_input_line : in_channel -> int -> bytes ref * int ref -> string * int
(** [rev_input_line ic pos (rbytes, rpos)]
Read characters in reverse order from the given input channel,
until a newline character is encountered.
Return the string of all characters read, without the newline
character at the end, and the position of the first character
of the returned line (to be used with next [rev_input_line] call).
[rpos] and [rbytes] are intermediate between [ic] and reading functions.
At the beginig when [!rpos = 0] and [rbytes] is empty, initialise buffer with
the size = 1024, then reads last 1024 characters from [ci]. When [rpos] comes
down to 0, resize buffer *2 and reads 2048 characters before 1024 last
characters. [rpos] and [rbytes] must be the same in each subsequents calls
Raises [End_of_file] if the beginning of the file is reached
at the beginning of line.
*)
val search_file_opt : string list -> string -> string option
(** [search_file directories file]
Search for a [file] in different [directories] and return
then first result or [None] if not found
*)
val search_asset_opt : string -> string option
(** [search_asset fname]
Searches for a file in assets directories.
i.e. directories previously registered with [Secure.add_assets] *)
val eq_key : string * string * int -> string * string * int -> bool
(** [eq_key (fn1, sn1, oc1) (fn2, sn2, oc2)]
Tests if two persons would have the same key
*)
val ls_r : string list -> string list
(** [ls_r dirs]
List directories (and subdirectories) contents of [dirs], including [dirs] themselves.
*)
val rm_rf : string -> unit
(** [rm_rf dir]
Remove directory [dir] and everything inside [dir].
*)
val filter_map : ('a -> 'b option) -> 'a list -> 'b list
(** [filter_map fn list] is a combination of map and filter.
Not tail-recursive.
*)
val rev_iter : ('a -> unit) -> 'a list -> unit
(** [rev_iter fn list] is like [List.iter fn (List.rev list)].
Not tail-recursive.
*)
val groupby :
key:('a -> 'k) -> value:('a -> 'v) -> 'a list -> ('k * 'v list) list
(** [groupby ~key ~value list]
Group the elements returning the same key together.
Ordering of elements is unspecified.
*)
val digest : string -> string
(** [digest s]
Returns the (128 bits long, using MD5 algorithm) digest of [s].
*)
val empty_person : 'string -> 'string -> (unit, _, 'string) Def.gen_person
(** [empty_person empty quest] returns a Def.gen_person with
[first_name] and [surname] initialized to [quest],
other 'string field initialized to [empty], and
only empty arrays/lists.
*)
val empty_family : 'string -> (_, unit, 'string) Def.gen_family
(** [empty_family empty] returns a Def.gen_person with string field initialized
initialized with [empty] and only empty arrays/lists.
*)
val good_name : string -> bool
(** test the base name fir accepted characters: a..z, A..Z, - *)

269
lib/util/name.ml Normal file
View File

@@ -0,0 +1,269 @@
(* Copyright (c) 1998-2007 INRIA *)
(* La liste des caractères interdits *)
let forbidden_char = [ ':'; '@'; '#'; '='; '$' ]
(* Name.lower *)
let unaccent_utf_8 lower s i =
let fns =
if lower then fun n s -> (String.lowercase_ascii s, n) else fun n s -> (s, n)
in
let fnc =
if lower then fun n c -> (String.make 1 @@ Char.lowercase_ascii c, n)
else fun n c -> (String.make 1 c, n)
in
let s, n =
Unidecode.decode fns fnc
(fun n -> (String.sub s i (n - i), n))
s i (String.length s)
in
if lower then (String.lowercase_ascii s, n) else (s, n)
let next_chars_if_equiv s i t j =
if i >= String.length s || j >= String.length t then None
else
let s1, i1 = unaccent_utf_8 true s i in
let t1, j1 = unaccent_utf_8 true t j in
if s1 = t1 then Some (i1, j1) else None
(* Name.lower:
- uppercase -> lowercase
- no accents
- chars no letters and no numbers (except '.') => spaces (stripped)
Key comparison (first name, surname, number) applies "lower" equality
on first names and surnames *)
let lower s =
let rec copy special i len =
if i = String.length s then Buff.get len
else if Char.code s.[i] < 0x80 then
match s.[i] with
| ('a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '.') as c ->
let len = if special then Buff.store len ' ' else len in
let c = Char.lowercase_ascii c in
copy false (i + 1) (Buff.store len c)
| _ -> copy (len <> 0) (i + 1) len
else
let len = if special then Buff.store len ' ' else len in
let t, j = unaccent_utf_8 true s i in
copy false j (Buff.mstore len t)
in
copy false 0 0
let title s =
let t = ref true in
let cmap u =
let r = if !t then Uucp.Case.Map.to_upper u else Uucp.Case.Map.to_lower u in
t := not (Uucp.Alpha.is_alphabetic u);
r
in
Utf8.cmap_utf_8 cmap s
(* Name.abbrev *)
(* List of abbreviations. If abbreviation is mapped to [Some s] should be remplaced by
[s]. If mapped to None, it should be removed from name. *)
let abbrev_list =
[
("a", None);
("af", None);
("d", None);
("de", None);
("di", None);
("ier", Some "i");
("of", None);
("saint", Some "st");
("sainte", Some "ste");
("van", None);
("von", None);
("zu", None);
("zur", None);
]
(* Checks if the word starting at [i] in [s] is [p]. *)
let is_word s i p =
let rec is_word s i p ip =
if ip = String.length p then
if i = String.length s then true else if s.[i] = ' ' then true else false
else if i = String.length s then false
else if s.[i] = p.[ip] then is_word s (i + 1) p (ip + 1)
else false
in
is_word s i p 0
(* Checks if word that starts at position [i] in [s] is one of abbreviation *)
let rec search_abbrev s i = function
| (w, a) :: pl ->
if is_word s i w then Some (String.length w, a) else search_abbrev s i pl
| [] -> None
(* Name.abbrev: suppress lowercase particles, shorten "saint" into "st" *)
let abbrev s =
let rec copy can_start_abbrev i len =
if i >= String.length s then Buff.get len
else
match s.[i] with
| ' ' -> copy true (i + 1) (Buff.store len ' ')
| c ->
if can_start_abbrev then
match search_abbrev s i abbrev_list with
| None -> copy false (i + 1) (Buff.store len c)
| Some (n, Some a) -> copy false (i + n) (Buff.mstore len a)
| Some (n, None) -> copy true (i + n + 1) len
else copy false (i + 1) (Buff.store len c)
in
copy true 0 0
(* Name.strip *)
(* Name.strip_c = name without the charater c given as parameter *)
let strip_c s c =
let rec copy i len =
if i = String.length s then Buff.get len
else if s.[i] = c then copy (i + 1) len
else copy (i + 1) (Buff.store len s.[i])
in
copy 0 0
let strip s = strip_c s ' '
(* String without any forbidden caracters defined in forbidden_char *)
(* ******************************************************************** *)
(* [Fonc] purge : string -> string *)
(* ******************************************************************** *)
(** [Description] : Supprime tous les caractères interdits (défini par
forbidden_char) présents dans la chaine passée en
argument.
[Args] :
- s : string que l'on veut purger
[Retour] :
- string : retourne la chaîne délestée des caractères interdits
[Rem] : Exporté en clair hors de ce module. *)
let purge s = List.fold_left strip_c s forbidden_char
(* Name.crush *)
(* If string starting from [i] contains roman number then returns the next position,
else returns None. *)
let roman_number s i =
let rec loop i =
if i = String.length s then Some i
else if s.[i] = ' ' then Some i
else match s.[i] with 'i' | 'v' | 'x' | 'l' -> loop (i + 1) | _ -> None
in
if i = 0 || s.[i - 1] = ' ' then loop i else None
(* Name.crush, a custom sonnex/soundex-like phonetic algorithm:
- no spaces
- roman numbers are keeped
- vowels are suppressed, except in words starting with a vowel,
where this vowel is converted into "e"
- "k" and "q" replaced by "c"
- "y" replaced by "i"
- "z" replaced by "s"
- "ph" replaced by "f"
- others "h" deleted
- s at end of words are deleted
- no double lowercase consons *)
let crush s =
let rec copy i len first_vowel =
if i = String.length s then Buff.get len
else if s.[i] = ' ' then copy (i + 1) len true
else
match roman_number s i with
| Some j ->
let rec loop i len =
if i = j then copy j len true
else loop (i + 1) (Buff.store len s.[i])
in
loop i len
| _ -> (
match s.[i] with
| 'a' | 'e' | 'i' | 'o' | 'u' | 'y' ->
let len = if first_vowel then Buff.store len 'e' else len in
copy (i + 1) len false
| 'h' ->
let len =
if i > 0 && s.[i - 1] = 'p' then Buff.store (len - 1) 'f'
else len
in
copy (i + 1) len first_vowel
| ('s' | 'z') when i = String.length s - 1 || s.[i + 1] = ' ' ->
let len =
let rec loop i len =
if
i > 0 && len > 0
&& s.[i] = Bytes.get !Buff.buff len
&& (s.[i] = 's' || s.[i] = 'z')
then loop (i - 1) (len - 1)
else len + 1
in
loop (i - 1) (len - 1)
in
copy (i + 1) len false
| 's' when i = String.length s - 1 || s.[i + 1] = ' ' ->
copy (i + 1) len false
| c ->
if i > 0 && s.[i - 1] = c then copy (i + 1) len false
else
let c = match c with 'k' | 'q' -> 'c' | 'z' -> 's' | c -> c in
copy (i + 1) (Buff.store len c) false)
in
copy 0 0 true
(* strip_lower *)
(* strip_lower = strip o lower, as first comparison of names.
First names and Surnames comparison is strip_lower equality. *)
let strip_lower s = strip (lower s)
(* crush_lower *)
(* crush_lower = crush o abbrev o lower, as second comparison of names.
In index by names, the "names" are crush_lowers *)
let crush_lower s = crush (abbrev (lower s))
(* concat two strings using Bytes module *)
let concat_aux fn l1 sn l2 =
let b = Bytes.create (l1 + l2 + 1) in
Bytes.blit_string fn 0 b 0 l1;
Bytes.blit_string sn 0 b (l1 + 1) l2;
Bytes.unsafe_set b l1 ' ';
Bytes.unsafe_to_string b
let concat fn sn = concat_aux fn (String.length fn) sn (String.length sn)
let contains_forbidden_char s = List.exists (String.contains s) forbidden_char
(* Copy/paste from String.split_on_char adapted to our needs *)
let split_sname_callback fn s =
let open String in
let j = ref (length s) in
for i = length s - 1 downto 0 do
if match unsafe_get s i with ' ' | '-' -> true | _ -> false then (
fn (i + 1) (!j - i - 1);
j := i)
done;
fn 0 !j
(* Copy/paste from String.split_on_char adapted to our needs *)
let split_fname_callback fn s =
let open String in
let j = ref (length s) in
for i = length s - 1 downto 0 do
if unsafe_get s i = ' ' then (
fn (i + 1) (!j - i - 1);
j := i)
done;
fn 0 !j
let split_sname s =
let r = ref [] in
split_sname_callback (fun i j -> r := String.sub s i j :: !r) s;
!r
let split_fname s =
let r = ref [] in
split_fname_callback (fun i j -> r := String.sub s i j :: !r) s;
!r

86
lib/util/name.mli Normal file
View File

@@ -0,0 +1,86 @@
(* Copyright (c) 1998-2007 INRIA *)
val forbidden_char : char list
(** List of forbidden to use characters *)
val unaccent_utf_8 : bool -> string -> int -> string * int
(** [unaccent_utf_8 lower s i] checks UTF-8 characher that starts at position [i] inside [s]
and returns couple (cs,np) where [cs] is ASCII representation of this character (characters
between 0x00 and 0x7F) and [np] it's a position of next utf8 character inside [s]. If [lower]
is true then [cs] will contain only lowercase letters.
Example : unaccent_utf_8 "aÈa" 1 -> ("e",3) *)
val next_chars_if_equiv : string -> int -> string -> int -> (int * int) option
(** [next_chars_if_equiv s1 i1 s2 i2] checks if UTF-8 characters that start at position
[i1] inside [s1] and at [i2] inside [s2] are equivalent (have the same ASCII representation).
In this case returns position of the next charecter for each of them. Otherwise, returns None. *)
val lower : string -> string
(** Convert every letter to lowercase and use *unidecode* library to
represent unicode characters with ASCII. Non-alphanumeric characters
(except '.') are remplaced by space. *)
val title : string -> string
(** Apply uppercasing to the first letter of each name (sequence of alphabetic characters) part,
and lowercasing to the rest of the text. *)
val abbrev : string -> string
(** Remplace by an abbreviation or remove particles inside the name *)
val strip : string -> string
(** Removes all the spaces inside the name *)
val strip_c : string -> char -> string
(** [strip_c s c] removes all the occurences of [c] inside the name *)
val purge : string -> string
(** Removes all the forbiden characters from [forbidden_char] inside the name *)
val crush : string -> string
(** A custom sonnex/soundex-like phonetic algorithm:
- no spaces
- roman numbers are keeped
- vowels are suppressed, except in words starting with a vowel,
where this vowel is converted into "e"
- "k" and "q" replaced by "c"
- "y" replaced by "i"
- "z" replaced by "s"
- "ph" replaced by "f"
- others "h" deleted
- s at end of words are deleted
- no double lowercase consons *)
val strip_lower : string -> string
(** Equivalent to [strip o lower]. Used as:
- First comparison of names.
- Comparison for first names and surnames. *)
val crush_lower : string -> string
(** Equivalent to [crush o abbrev o lower]. Used as:
- Second comparison of names.
- Key when index by names *)
val concat : string -> string -> string
(** [concat fn sn] is [fn ^ " " ^ sn] but faster. *)
val contains_forbidden_char : string -> bool
(** [contains_forbidden_char s] is [true] iif s contains forbidden characters *)
val split_sname_callback : (int -> int -> unit) -> string -> unit
(** [split_sname_callback fn s]
Same as [split_sname], but call [fn] with substring indexes instead of building
a list *)
val split_fname_callback : (int -> int -> unit) -> string -> unit
(** [split_fname_callback fn s]
Same as [split_fname], but call [fn] with substring indexes instead of building
a list *)
val split_sname : string -> string list
(** [split_sname s] split the surname [s] in parts composing it.
e.g. [split_sname base "Foo-Bar"] is [[ "Foo" ; "Bar"]] *)
val split_fname : string -> string list
(** [split_fname s] split the string [s] representing multiple first names
into this list of firstname.
e.g. [split_fname base "Foo-Bar Baz"] is [[ "Foo-Bar" ; "Baz"]] *)

58
lib/util/pqueue.ml Normal file
View File

@@ -0,0 +1,58 @@
(* Copyright (c) 1998-2007 INRIA *)
module type OrderedType = sig
type t
val leq : t -> t -> bool
end
module type S = sig
type elt
type t
val empty : t
val is_empty : t -> bool
val add : elt -> t -> t
val take : t -> elt * t
val union : t -> t -> t
end
module Make (Ord : OrderedType) = struct
type elt = Ord.t
type t = tree list
and tree = { node : elt; rank : int; list : t }
let link t1 t2 =
if Ord.leq t1.node t2.node then
{ node = t1.node; rank = t1.rank + 1; list = t2 :: t1.list }
else { node = t2.node; rank = t2.rank + 1; list = t1 :: t2.list }
let rec ins t = function
| [] -> [ t ]
| t' :: ts -> if t.rank < t'.rank then t :: t' :: ts else ins (link t t') ts
let rec union fts1 fts2 =
match (fts1, fts2) with
| [], ts -> ts
| ts, [] -> ts
| t1 :: ts1, t2 :: ts2 ->
if t1.rank < t2.rank then t1 :: union ts1 fts2
else if t2.rank < t1.rank then t2 :: union fts1 ts2
else ins (link t1 t2) (union ts1 ts2)
let empty : t = []
let is_empty (q : t) = q = []
let add x q = ins { node = x; rank = 0; list = [] } q
let rec getMin = function
| [] -> raise Not_found
| [ t ] -> (t, [])
| t :: ts ->
let t', ts' = getMin ts in
if Ord.leq t.node t'.node then (t, ts) else (t', t :: ts')
let take ts =
let t, ts = getMin ts in
(t.node, union (List.rev t.list) ts)
end

48
lib/util/pqueue.mli Normal file
View File

@@ -0,0 +1,48 @@
(* Copyright (c) 1998-2007 INRIA *)
(** Module [Pqueue]: priority queues. *)
(** This module implements priority queues, given a total ordering function
over the elements inserted. All operations are purely applicative
(no side effects).
The implementation uses binomial queues from Chris Okasaki.
"add", "take" and "union" are in o(log n) in the worst case. *)
(** The input signature of the functor [Pqueue.Make].
[t] is the type of the inserted elements.
[leq] is a total ordering function over the elements.
This is a two-argument function [f] returning [True] if the
first argument is less or equal to the second one. *)
module type OrderedType = sig
type t
val leq : t -> t -> bool
end
(** Output signature for priority queue *)
module type S = sig
type elt
(** Type of elementes contained in priority queues. *)
type t
(** Type of priority queues. *)
val empty : t
(** The empty queue. *)
val is_empty : t -> bool
(** [is_empty q] checks the emptiness of [q]. *)
val add : elt -> t -> t
(** [add x h] adds a new element [x] in heap [h]. *)
val take : t -> elt * t
(** [take x] removes the minimum element of [x] and returns it;
raises [Not_found] when [x] is empty. *)
val union : t -> t -> t
(** [union q1 q2] returns heap constructed by union of [q1] [q2] *)
end
(** Functor that creates instance of priority queue from given element type. *)
module Make (Ord : OrderedType) : S with type elt = Ord.t

46
lib/util/progrBar.ml Normal file
View File

@@ -0,0 +1,46 @@
(* $Id: progrBar.ml,v 5.4 2007-02-01 10:28:55 ddr Exp $ *)
(* bar size in characters *)
let size = 60
let draw_rep = 5
let draw = "|/-\\"
let empty = ref '.'
let full = ref '#'
let draw_len = String.length draw
let pb_cnt = size * draw_rep * draw_len
let start () =
for _i = 1 to size do
Printf.eprintf "%c" !empty
done;
Printf.eprintf "\013"
let run cnt max_cnt =
let pb_cnt = if max_cnt < pb_cnt then size * draw_len else pb_cnt in
let already_disp = cnt * size / max_cnt in
let to_disp = (cnt + 1) * size / max_cnt in
for _i = already_disp + 1 to to_disp do
Printf.eprintf "%c" !full
done;
let already_disp = cnt * pb_cnt / max_cnt in
let to_disp = (cnt + 1) * pb_cnt / max_cnt in
(if cnt = max_cnt - 1 then Printf.eprintf " \008"
else if to_disp > already_disp then
let k = to_disp mod draw_len in
let k = if k < 0 then draw_len + k else k in
Printf.eprintf "%c\008" draw.[k]);
flush stderr
let suspend () =
Printf.eprintf "%c\n" !full;
flush stderr
let restart cnt max_cnt =
start ();
for i = 0 to cnt do
run i max_cnt
done
let finish () =
Printf.eprintf "\n";
flush stderr

24
lib/util/progrBar.mli Normal file
View File

@@ -0,0 +1,24 @@
(* $Id: progrBar.mli,v 5.3 2007-02-01 10:28:55 ddr Exp $ *)
val empty : char ref
(** Character that represents not passed part of progression bar *)
val full : char ref
(** Character that represents passed part of progression bar *)
val start : unit -> unit
(** Prints empty bar with carriage return. *)
val run : int -> int -> unit
(** [run i len] modifies progression bar that is now filled proportionally to
[i] by comparison with [len]. *)
val finish : unit -> unit
(** Stop printing progression bar and prints a new line. *)
val suspend : unit -> unit
(** Stop printing progression bar and prints a new line. *)
val restart : int -> int -> unit
(** [restart i len] restart progression bar. It's equivalent to call successively
[run] from 0 to [i]. *)

103
lib/util/secure.ml Normal file
View File

@@ -0,0 +1,103 @@
(* $Id: secure.ml,v 5.2 2007-01-19 01:53:17 ddr Exp $ *)
(* Copyright (c) 1998-2007 INRIA *)
(* secure open; forbids to access anywhere in the machine;
this is an extra security: the program should check for
correct open instead of hoping Secure do it for it *)
let ok_r = ref []
let assets_r = ref [ "gw" ]
let bd_r = ref (Filename.concat Filename.current_dir_name "bases")
(* [decompose: string -> string list] decompose a path into a list of
directory and a basename. "a/b/c" -> [ "a" ; "b"; "c" ] *)
let decompose =
let rec loop r s =
let b = Filename.basename s in
if b = "" || b = Filename.current_dir_name || b = Filename.dir_sep then
let d = Filename.dirname s in
if d = "" || d = Filename.current_dir_name then r
else if d = s then d :: r
else loop r d
else if b = s then b :: r
else loop (b :: r) (Filename.dirname s)
in
loop []
(* add asset to the list of allowed to acces assets *)
let add_assets d =
if not (List.mem d !assets_r) then (
assets_r := List.rev (d :: List.rev !assets_r);
ok_r := decompose d :: !ok_r)
(* set base dir to which acces could be allowed *)
let set_base_dir d =
let ok = decompose d in
bd_r := d;
ok_r := ok :: (List.filter (( <> ) ok)) !ok_r
(* get all assets *)
let assets () = !assets_r
let base_dir () = !bd_r
(* [list_check_prefix d df] returns either [None] if [d] is not a prefix of
[df], or [Some suffix], where [df = d @ suffix] *)
let list_check_prefix d df =
let rec loop = function
| x :: xl, y :: yl -> if x = y then loop (xl, yl) else None
| [], df -> Some df
| _, [] -> None
in
loop (d, df)
(** Check if a filename is safe to read:
* it must not contain the '\000' character
* it must either be relative to the local directory OR
included in one of the allowed directories (base_dir or assets)
* the relative part does not contain the '..' directory
*)
let check fname =
if String.contains fname '\000' then false
else
let df = decompose fname in
let rec loop = function
| d :: dl -> (
match list_check_prefix d df with
| Some bf when not (List.mem Filename.parent_dir_name bf) -> true
| _ -> loop dl)
| [] ->
if Filename.is_relative fname then
not (List.mem Filename.parent_dir_name df)
else false
in
loop !ok_r
let check_open fname =
if not (check fname) then (
if Sys.unix then (
Printf.eprintf "*** secure rejects open %s\n" (String.escaped fname);
flush stderr);
raise (Sys_error "invalid access"))
(* The following functions perform a [check] before opening the file,
preventing potential attacks on the system.
*)
let open_in fname =
check_open fname;
Stdlib.open_in fname
let open_in_bin fname =
check_open fname;
Stdlib.open_in_bin fname
let open_out fname =
check_open fname;
Stdlib.open_out fname
let open_out_bin fname =
check_open fname;
Stdlib.open_out_bin fname
let open_out_gen mode perm fname =
check_open fname;
Stdlib.open_out_gen mode perm fname

36
lib/util/secure.mli Normal file
View File

@@ -0,0 +1,36 @@
(* Copyright (c) 1998-2007 INRIA *)
val assets : unit -> string list
(** Returns list of allowed to acces assets *)
val base_dir : unit -> string
(** Returns directory where databases are installed to which acces is allowed *)
val add_assets : string -> unit
(** Add new asset to the [assets] list *)
val set_base_dir : string -> unit
(** Set base directory *)
val check : string -> bool
(** Check if a filename is safe to read:
- it must not contain the '\000' character
- it must either be relative to the local directory OR
included in one of the allowed directories (base_dir or assets)
- the relative part does not contain the '..' directory
*)
val open_in : string -> in_channel
(** Secured version of [open_in] *)
val open_in_bin : string -> in_channel
(** Secured version of [open_in_bin] *)
val open_out : string -> out_channel
(** Secured version of [open_out] *)
val open_out_bin : string -> out_channel
(** Secured version of [open_out_bin] *)
val open_out_gen : open_flag list -> int -> string -> out_channel
(** Secured version of [open_out_gen] *)

209
lib/util/utf8.ml Normal file
View File

@@ -0,0 +1,209 @@
(* TODO: replace with Unidecode.nbc
when version constraint [= 0.2.0] will be removed *)
let nbc c =
if Char.code c < 0x80 then 1
else if Char.code c < 0xC0 then invalid_arg "nbc"
else if Char.code c < 0xE0 then 2
else if Char.code c < 0xF0 then 3
else if Char.code c < 0xF8 then 4
else if Char.code c < 0xFC then 5
else if Char.code c < 0xFE then 6
else invalid_arg "nbc"
let next s i = i + nbc s.[i]
let get s i =
let rec loop i k = if k = 0 then i else loop (next s i) (pred k) in
loop 0 i
let length s =
let rec loop i len =
if i < String.length s then loop (next s i) (succ len) else len
in
loop 0 0
let sub ?pad str start len =
let strlen = String.length str in
let n, i =
let rec loop n i =
if n = len || strlen <= i then (n, i) else loop (n + 1) (next str i)
in
loop 0 start
in
if n = len then String.sub str start (i - start)
else
match pad with
| None -> raise (Invalid_argument "str_sub")
| Some pad ->
let bytes = Bytes.make (i - start + len - n) pad in
Bytes.blit
(Bytes.unsafe_of_string str)
start bytes 0 (String.length str);
Bytes.unsafe_to_string bytes
(**/**)
(* cmap_utf_8 code code comes from
http://erratique.ch/software/uucp/doc/Uucp.Case.html *)
let cmap_utf_8 cmap s =
let b = Buffer.create (String.length s * 2) in
let add_map _ _ u =
let u = match u with `Malformed _ -> Uutf.u_rep | `Uchar u -> u in
match cmap u with
| `Self -> Uutf.Buffer.add_utf_8 b u
| `Uchars us -> List.iter (Uutf.Buffer.add_utf_8 b) us
in
Uutf.String.fold_utf_8 add_map () s;
Buffer.contents b
(**/**)
let lowercase s = cmap_utf_8 Uucp.Case.Map.to_lower s
let uppercase s = cmap_utf_8 Uucp.Case.Map.to_upper s
let capitalize_fst s =
let first = ref true in
let cmap u =
if !first then (
first := false;
Uucp.Case.Map.to_upper u)
else `Self
in
cmap_utf_8 cmap s
let capitalize s =
let first = ref true in
let cmap u =
if !first then (
first := false;
Uucp.Case.Map.to_upper u)
else Uucp.Case.Map.to_lower u
in
cmap_utf_8 cmap s
module C = struct
type t = Str of string | Chr of char | Empty
let unaccent trimmed s i0 len =
let rec loop i =
if i < len then
match
match Char.code @@ String.unsafe_get s i with
(* A..Z *)
| ( 0x41 | 0x42 | 0x43 | 0x44 | 0x45 | 0x46 | 0x47 | 0x48 | 0x49
| 0x4A | 0x4B | 0x4C | 0x4D | 0x4E | 0x4F | 0x50 | 0x51 | 0x52
| 0x53 | 0x54 | 0x55 | 0x56 | 0x57 | 0x58 | 0x59 | 0x5A ) as c ->
(Chr (Char.unsafe_chr @@ (c + 32)), i, succ i)
(* a..z *)
| ( 0x61 | 0x62 | 0x63 | 0x64 | 0x65 | 0x66 | 0x67 | 0x68 | 0x69
| 0x6A | 0x6B | 0x6C | 0x6D | 0x6E | 0x6F | 0x70 | 0x71 | 0x72
| 0x73 | 0x74 | 0x75 | 0x76 | 0x77 | 0x78 | 0x79 | 0x7A
(* 0..9 *)
| 0x30 | 0x31 | 0x32 | 0x33 | 0x34 | 0x35 | 0x36 | 0x37 | 0x38
| 0x39 ) as c ->
(Chr (Char.unsafe_chr c), i, succ i)
(* '-' | ' ' | '\'' *)
| (0x2D | 0x20 | 0x27) as c ->
((if trimmed then Chr (Char.unsafe_chr c) else Empty), i, succ i)
| _ ->
Unidecode.decode
(fun n -> function
| "" -> (Empty, i, n)
| s -> (Str (String.lowercase_ascii s), i, n))
(fun n c ->
match Char.lowercase_ascii c with
| ('a' .. 'z' | '0' .. '9') as c -> (Chr c, i, n)
| ('-' | ' ' | '\'') as c ->
((if trimmed then Chr c else Empty), i, n)
| _ -> (Empty, i, n))
(fun n -> (Empty, i, n))
s i len
with
| Empty, _, n -> loop n
| x, i, n -> (x, i, n)
else (Empty, i0, len)
in
loop i0
(* See BatUTF8.look source (from batteries) *)
let cp s i =
Uchar.of_int
@@
let n = Char.code (String.unsafe_get s i) in
if n < 0x80 then n
else if n <= 0xdf then
((n - 0xc0) lsl 6) lor (0x7f land Char.code (String.unsafe_get s (i + 1)))
else if n <= 0xef then
let n' = n - 0xe0 in
let m = Char.code (String.unsafe_get s (i + 1)) in
let n' = (n' lsl 6) lor (0x7f land m) in
let m = Char.code (String.unsafe_get s (i + 2)) in
(n' lsl 6) lor (0x7f land m)
else
let n' = n - 0xf0 in
let m = Char.code (String.unsafe_get s (i + 1)) in
let n' = (n' lsl 6) lor (0x7f land m) in
let m = Char.code (String.unsafe_get s (i + 2)) in
let n' = (n' lsl 6) lor (0x7f land m) in
let m = Char.code (String.unsafe_get s (i + 3)) in
(n' lsl 6) lor (0x7f land m)
(* compare bytes (UTF-8 charachter) delimited by intevals [i1,j1] and [i2,j2] *)
let cmp_substring s1 i1 j1 s2 i2 j2 =
let l1 = j1 - i1 in
let l2 = j2 - i2 in
if l1 = 1 && l2 = 1 then
(* Optimize ASCII characters comparison *)
Char.compare
(Char.lowercase_ascii @@ String.get s1 i1)
(Char.lowercase_ascii @@ String.get s2 i2)
else
let c1 = cp s1 i1 in
let c2 = cp s2 i2 in
let c1 =
match Uucp.Case.Fold.fold c1 with `Self -> [ c1 ] | `Uchars us -> us
in
let c2 =
match Uucp.Case.Fold.fold c2 with `Self -> [ c2 ] | `Uchars us -> us
in
Stdlib.compare c1 c2
(* See [Utf8.compare] *)
let compare n1 n2 =
let trimmed1 = ref false in
let trimmed2 = ref false in
let rec loop i1 i2 =
if i1 >= String.length n1 && i2 >= String.length n2 then i1 - i2
else if i1 >= String.length n1 then -1
else if i2 >= String.length n2 then 1
else
let c1, i1, ii1 = unaccent !trimmed1 n1 i1 (String.length n1) in
let c2, i2, ii2 = unaccent !trimmed2 n2 i2 (String.length n2) in
let () = trimmed1 := true in
let () = trimmed2 := true in
let cmp_aux = function
| 0 -> (
match cmp_substring n1 i1 ii1 n2 i2 ii2 with
| 0 -> loop ii1 ii2
| x -> x)
| x -> x
in
match (c1, c2) with
| Str s1, Str s2 -> cmp_aux (String.compare s1 s2)
| Chr c1, Chr c2 -> cmp_aux (Char.compare c1 c2)
| Empty, Empty -> cmp_aux 0
| (Str _ | Chr _), Empty -> -1
| Empty, (Str _ | Chr _) -> 1
| Str s1, Chr c2 -> (
match Char.compare (String.unsafe_get s1 0) c2 with
| 0 -> 1
| x -> x)
| Chr c1, Str s2 -> (
match Char.compare c1 (String.unsafe_get s2 0) with
| 0 -> -1
| x -> x)
in
if n1 = n2 then 0 else loop 0 0
end
let compare = C.compare

68
lib/util/utf8.mli Normal file
View File

@@ -0,0 +1,68 @@
val nbc : char -> int
(** Return the number of bytes composing the UTF8 character starting with [c] *)
val next : string -> int -> int
(** [Utf8.next s i] returns the index of the character comming after
the one which starts at [i]. *)
val get : string -> int -> int
(** [Utf8.get s n] returns the index where the [n]-th character
starts in string [s]. *)
val length : string -> int
(** Return the length (number of characters, not bytes)
of the given string. *)
val sub : ?pad:char -> string -> int -> int -> string
(** [sub ?pad s start len]
Return a fresh UTF8-friendly substring of [len] characters, padded if needed.
Be careful [start] is the index of the byte where to start in [s],
not the [start-th] UTF8-character. *)
val cmap_utf_8 :
(Uchar.t -> [< `Self | `Uchars of Uchar.t list ]) -> string -> string
(** [cmap_utf_8 cmap s] returns the UTF-8 encoded string
resulting from applying the character map [cmap] to every character
of the UTF-8 encoded string [s]. *)
val lowercase : string -> string
(** Returns UTF-8 encoded string with all uppercase letters translated to lowercase *)
val uppercase : string -> string
(** Returns UTF-8 encoded string with all lowercase letters translated to uppercase *)
val capitalize_fst : string -> string
(** Returns UTF-8 encoded string where the first letter is capitalised *)
val capitalize : string -> string
(** Returns UTF-8 encoded string where the first letter is capitalised and others minimalised *)
module C : sig
(** Utf8 char type. *)
type t = Str of string | Chr of char | Empty
val unaccent : bool -> string -> int -> int -> t * int * int
(** [unaccent trimmed s i0 len]
Returns [(t, start, next)]: next UTF-8 character in string [s] starting at position [i0].
The diacritic marks are removed, character is also case lowered, and any character
returning [Empty] (unsupported or reported as empty) is ignored: the next character in [s]
will be picked except if you reach [len]. In that case, [Empty] is returned.
[start] is the byte offset in [s] where the resulting character [t] starts.
[next] is the offset of the byte after [t].
*)
val cp : string -> int -> Uchar.t
(** [cp s i] returns the Unicode code point of the character starting
at [i]-th byte. *)
end
val compare : string -> string -> int
(** [compare a b] compare normalized version of [a] and [b]
It is case insensitive.
It starts with unaccented comparison of [a] and [b],
and refine the result with accents comparison.
Here is an exemple of how letters would be sorted:
[A À Á  B C Ç Č D E É L Ł Ô Ö Ø Œ P Q R * . ?]
*)