Initial comit - Clone
This commit is contained in:
36
lib/util/buff.ml
Normal file
36
lib/util/buff.ml
Normal file
@@ -0,0 +1,36 @@
|
||||
(* $Id: buff.ml,v 5.1 2006-10-15 15:39:39 ddr Exp $ *)
|
||||
|
||||
module Make () = struct
|
||||
let buff = ref (Bytes.create 80)
|
||||
|
||||
let store len x =
|
||||
if len >= Bytes.length !buff then
|
||||
buff := Bytes.extend !buff 0 (Bytes.length !buff);
|
||||
Bytes.set !buff len x;
|
||||
succ len
|
||||
|
||||
(* gstore and mstore axillary function *)
|
||||
let unsafe_gstore len s si slen =
|
||||
let newlen = len + slen in
|
||||
(if newlen > Bytes.length !buff then
|
||||
let more = max slen (Bytes.length !buff) in
|
||||
buff := Bytes.extend !buff 0 more);
|
||||
Bytes.blit_string s si !buff len slen;
|
||||
newlen
|
||||
|
||||
let mstore len s = unsafe_gstore len s 0 (String.length s)
|
||||
|
||||
let gstore len s si slen =
|
||||
unsafe_gstore len s si (min slen (String.length s - si))
|
||||
|
||||
let get len = Bytes.sub_string !buff 0 len
|
||||
end
|
||||
|
||||
(* Global buffer *)
|
||||
module BB = Make ()
|
||||
|
||||
let buff = BB.buff
|
||||
let store = BB.store
|
||||
let mstore = BB.mstore
|
||||
let gstore = BB.gstore
|
||||
let get = BB.get
|
||||
42
lib/util/buff.mli
Normal file
42
lib/util/buff.mli
Normal file
@@ -0,0 +1,42 @@
|
||||
(** Functor building a local implementation of the buffer. *)
|
||||
module Make () : sig
|
||||
val buff : bytes ref
|
||||
(** Internal representation of the buffer *)
|
||||
|
||||
val store : int -> char -> int
|
||||
(** [store i c] stores a character [c] at the position [i] inside the buffer.
|
||||
Automatically extends buffer if needed. Returns the position that follows
|
||||
inserted character ([i+1]) in buffer. Should be used either with position
|
||||
0 or with position returned by previous calls of store functions. *)
|
||||
|
||||
val mstore : int -> string -> int
|
||||
(** [mstore i s] stores a string [s] starting from the postion [i] inside the
|
||||
buffer. Automatically extends buffer if needed. Returns the position that
|
||||
follows inserted string in buffer. Should be used either with position 0
|
||||
or with position returned by previous calls of store functions.*)
|
||||
|
||||
val gstore : int -> string -> int -> int -> int
|
||||
(** [gstore i s si len] stores substring of [s] from [si] position with length
|
||||
[len] inside the buffer starting from the postion [i]. Automatically extends
|
||||
buffer if needed. Returns the position that follows inserted substring in
|
||||
buffer. Should be used either with position 0 or with position returned
|
||||
by previous calls of store functions.*)
|
||||
|
||||
val get : int -> string
|
||||
(** [get len] returns buffer's content until position [len] *)
|
||||
end
|
||||
|
||||
val buff : bytes ref
|
||||
(** Variable [buff] for the global buffer *)
|
||||
|
||||
val get : int -> string
|
||||
(** Function [get] for the global buffer. *)
|
||||
|
||||
val store : int -> char -> int
|
||||
(** Function [store] for the global buffer. *)
|
||||
|
||||
val mstore : int -> string -> int
|
||||
(** Function [mstore] for the global buffer. *)
|
||||
|
||||
val gstore : int -> string -> int -> int -> int
|
||||
(** Function [gstore] for the global buffer. *)
|
||||
66
lib/util/calendar.ml
Normal file
66
lib/util/calendar.ml
Normal file
@@ -0,0 +1,66 @@
|
||||
(* TODO this is probably buggy,
|
||||
because geneweb uses month|day = 0 for incomplete dates *)
|
||||
(** Convert [Adef.date] to Calendars.d *)
|
||||
let to_calendars : Def.dmy -> Calendars.d =
|
||||
fun { Def.day; month; year; delta; _ } -> { Calendars.day; month; year; delta }
|
||||
|
||||
(** Convert Calendars.d to [Adef.date] *)
|
||||
let of_calendars : ?prec:Def.precision -> Calendars.d -> Def.dmy =
|
||||
fun ?(prec = Def.Sure) { Calendars.day; month; year; delta } ->
|
||||
{ Def.day; month; year; delta; prec }
|
||||
|
||||
let sdn_of_gregorian (d : Def.dmy) =
|
||||
Calendars.sdn_of_gregorian @@ to_calendars d
|
||||
|
||||
let gregorian_of_sdn prec sdn =
|
||||
of_calendars ~prec @@ Calendars.gregorian_of_sdn sdn
|
||||
|
||||
let sdn_of_julian (d : Def.dmy) = Calendars.sdn_of_julian @@ to_calendars d
|
||||
let julian_of_sdn prec sdn = of_calendars ~prec @@ Calendars.julian_of_sdn sdn
|
||||
let sdn_of_french (d : Def.dmy) = Calendars.sdn_of_french @@ to_calendars d
|
||||
let french_of_sdn prec sdn = of_calendars ~prec @@ Calendars.french_of_sdn sdn
|
||||
let sdn_of_hebrew (d : Def.dmy) = Calendars.sdn_of_hebrew @@ to_calendars d
|
||||
let hebrew_of_sdn prec sdn = of_calendars ~prec @@ Calendars.hebrew_of_sdn sdn
|
||||
|
||||
let dmy_of_dmy2 : Def.dmy2 -> Def.dmy =
|
||||
fun { Def.day2; month2; year2; delta2 } ->
|
||||
{
|
||||
Def.day = day2;
|
||||
month = month2;
|
||||
year = year2;
|
||||
prec = Def.Sure;
|
||||
delta = delta2;
|
||||
}
|
||||
|
||||
let aux fn (d : Def.dmy) : Def.dmy =
|
||||
let aux2 d2 =
|
||||
let d = of_calendars @@ fn @@ to_calendars @@ dmy_of_dmy2 d2 in
|
||||
{
|
||||
Def.day2 = d.Def.day;
|
||||
month2 = d.Def.month;
|
||||
year2 = d.Def.year;
|
||||
delta2 = d.Def.delta;
|
||||
}
|
||||
in
|
||||
let prec =
|
||||
match d.Def.prec with
|
||||
| Def.OrYear d2 -> Def.OrYear (aux2 d2)
|
||||
| Def.YearInt d2 -> Def.YearInt (aux2 d2)
|
||||
| prec -> prec
|
||||
in
|
||||
of_calendars ~prec @@ fn @@ to_calendars d
|
||||
|
||||
let gregorian_of_julian = aux Calendars.gregorian_of_julian
|
||||
let julian_of_gregorian = aux Calendars.julian_of_gregorian
|
||||
let gregorian_of_french = aux Calendars.gregorian_of_french
|
||||
let french_of_gregorian = aux Calendars.french_of_gregorian
|
||||
let gregorian_of_hebrew = aux Calendars.gregorian_of_hebrew
|
||||
let hebrew_of_gregorian = aux Calendars.hebrew_of_gregorian
|
||||
|
||||
type moon_phase = Calendars.moon_phase =
|
||||
| NewMoon
|
||||
| FirstQuarter
|
||||
| FullMoon
|
||||
| LastQuarter
|
||||
|
||||
let moon_phase_of_sdn = Calendars.moon_phase_of_sdn
|
||||
51
lib/util/calendar.mli
Normal file
51
lib/util/calendar.mli
Normal file
@@ -0,0 +1,51 @@
|
||||
val gregorian_of_sdn : Def.precision -> int -> Def.dmy
|
||||
(** Returns date of gregorian calendar from SDN and specified precision. *)
|
||||
|
||||
val julian_of_sdn : Def.precision -> int -> Def.dmy
|
||||
(** Returns date of julian calendar from SDN and specified precision. *)
|
||||
|
||||
val french_of_sdn : Def.precision -> int -> Def.dmy
|
||||
(** Returns date of french calendar from SDN and specified precision. *)
|
||||
|
||||
val hebrew_of_sdn : Def.precision -> int -> Def.dmy
|
||||
(** Returns date of hebrew calendar from SDN and specified precision. *)
|
||||
|
||||
val sdn_of_gregorian : Def.dmy -> int
|
||||
(** Returns SDN of the date of gregorian calendar. *)
|
||||
|
||||
val sdn_of_julian : Def.dmy -> int
|
||||
(** Returns SDN of the date of julian calendar. *)
|
||||
|
||||
val sdn_of_french : Def.dmy -> int
|
||||
(** Returns SDN of the date of french calendar. *)
|
||||
|
||||
val sdn_of_hebrew : Def.dmy -> int
|
||||
(** Returns SDN of the date of hebrew calendar. *)
|
||||
|
||||
val gregorian_of_julian : Def.dmy -> Def.dmy
|
||||
(** Converts julian calendar's date to gregorian. *)
|
||||
|
||||
val julian_of_gregorian : Def.dmy -> Def.dmy
|
||||
(** Converts gregorian calendar's date to julian date. *)
|
||||
|
||||
val gregorian_of_french : Def.dmy -> Def.dmy
|
||||
(** Converts french calendar's date to gregorian date. *)
|
||||
|
||||
val french_of_gregorian : Def.dmy -> Def.dmy
|
||||
(** Converts gregorian calendar's date to french date. *)
|
||||
|
||||
val gregorian_of_hebrew : Def.dmy -> Def.dmy
|
||||
(** Converts hebrew calendar's date to gregorian date. *)
|
||||
|
||||
val hebrew_of_gregorian : Def.dmy -> Def.dmy
|
||||
(** Converts gregorian calendar's date to hebrew date. *)
|
||||
|
||||
(** Moon phases *)
|
||||
type moon_phase = NewMoon | FirstQuarter | FullMoon | LastQuarter
|
||||
|
||||
val moon_phase_of_sdn : int -> (moon_phase * int * int) option * int
|
||||
(** Returns information about moon phase from the given SDN.
|
||||
Result [(Some (mph,h,m), day)] describes moon phase [mph], hour [h] and minute
|
||||
[m] when this phase appears and days [day] since last New Moon phase (moon's age).
|
||||
If result is [(None,_)], it tells that there wasn't any moon's phase (one
|
||||
of the mentionned in [moon_phase]) this day. *)
|
||||
222
lib/util/date.ml
Normal file
222
lib/util/date.ml
Normal file
@@ -0,0 +1,222 @@
|
||||
(* Copyright (c) 1998-2007 INRIA *)
|
||||
|
||||
open Def
|
||||
|
||||
type cdate = Adef.cdate =
|
||||
| Cgregorian of int
|
||||
| Cjulian of int
|
||||
| Cfrench of int
|
||||
| Chebrew of int
|
||||
| Ctext of string
|
||||
| Cdate of date
|
||||
| Cnone
|
||||
|
||||
(* compress concrete date if it's possible *)
|
||||
let compress d =
|
||||
let simple =
|
||||
match d.prec with
|
||||
| Sure | About | Maybe | Before | After ->
|
||||
d.day >= 0 && d.month >= 0 && d.year > 0 && d.year < 2500 && d.delta = 0
|
||||
| OrYear _ | YearInt _ -> false
|
||||
in
|
||||
if simple then
|
||||
let p =
|
||||
match d.prec with
|
||||
| About -> 1
|
||||
| Maybe -> 2
|
||||
| Before -> 3
|
||||
| After -> 4
|
||||
| Sure | OrYear _ | YearInt _ -> 0
|
||||
in
|
||||
Some ((((((p * 32) + d.day) * 13) + d.month) * 2500) + d.year)
|
||||
else None
|
||||
|
||||
(* uncompress concrete date *)
|
||||
let uncompress x =
|
||||
let year, x = (x mod 2500, x / 2500) in
|
||||
let month, x = (x mod 13, x / 13) in
|
||||
let day, x = (x mod 32, x / 32) in
|
||||
let prec =
|
||||
match x with
|
||||
| 1 -> About
|
||||
| 2 -> Maybe
|
||||
| 3 -> Before
|
||||
| 4 -> After
|
||||
| _ -> Sure
|
||||
in
|
||||
{ day; month; year; prec; delta = 0 }
|
||||
|
||||
let date_of_cdate = function
|
||||
| Cgregorian i -> Dgreg (uncompress i, Dgregorian)
|
||||
| Cjulian i -> Dgreg (uncompress i, Djulian)
|
||||
| Cfrench i -> Dgreg (uncompress i, Dfrench)
|
||||
| Chebrew i -> Dgreg (uncompress i, Dhebrew)
|
||||
| Cdate d -> d
|
||||
| Ctext t -> Dtext t
|
||||
| Cnone -> failwith "date_of_cdate"
|
||||
|
||||
let cdate_of_date d =
|
||||
match d with
|
||||
| Dgreg (g, cal) -> (
|
||||
match compress g with
|
||||
| Some i -> (
|
||||
match cal with
|
||||
| Dgregorian -> Cgregorian i
|
||||
| Djulian -> Cjulian i
|
||||
| Dfrench -> Cfrench i
|
||||
| Dhebrew -> Chebrew i)
|
||||
| None -> Cdate d)
|
||||
| Dtext t -> Ctext t
|
||||
|
||||
let cdate_of_od = function Some d -> cdate_of_date d | None -> Cnone
|
||||
|
||||
let od_of_cdate od =
|
||||
match od with Cnone -> None | _ -> Some (date_of_cdate od)
|
||||
|
||||
let cdate_None = cdate_of_od None
|
||||
|
||||
let dmy_of_dmy2 dmy2 =
|
||||
{
|
||||
day = dmy2.day2;
|
||||
month = dmy2.month2;
|
||||
year = dmy2.year2;
|
||||
prec = Sure;
|
||||
delta = dmy2.delta2;
|
||||
}
|
||||
|
||||
let leap_year a = if a mod 100 = 0 then a / 100 mod 4 = 0 else a mod 4 = 0
|
||||
|
||||
let nb_days_in_month m a =
|
||||
if m = 2 && leap_year a then 29
|
||||
else if m >= 1 && m <= 12 then
|
||||
[| 31; 28; 31; 30; 31; 30; 31; 31; 30; 31; 30; 31 |].(m - 1)
|
||||
else 0
|
||||
|
||||
(* TODO use SDN instead *)
|
||||
let time_elapsed d1 d2 =
|
||||
let prec =
|
||||
match (d1.prec, d2.prec) with
|
||||
| Sure, Sure -> Sure
|
||||
| (Maybe | Sure | About), (Maybe | Sure | About) -> Maybe
|
||||
| (About | Maybe | Sure | Before), (After | Sure | Maybe | About) -> After
|
||||
| (About | Maybe | Sure | After), (Before | Sure | Maybe | About) -> Before
|
||||
| _ -> Maybe
|
||||
in
|
||||
match d1 with
|
||||
| { day = 0; month = 0; year = a1 } ->
|
||||
{ day = 0; month = 0; year = d2.year - a1; prec; delta = 0 }
|
||||
| { day = 0; month = m1; year = a1 } -> (
|
||||
match d2 with
|
||||
| { day = 0; month = 0; year = a2 } ->
|
||||
{ day = 0; month = 0; year = a2 - a1; prec; delta = 0 }
|
||||
| { day = 0; month = m2; year = a2 } ->
|
||||
let month, r = if m1 <= m2 then (m2 - m1, 0) else (m2 - m1 + 12, 1) in
|
||||
let year = a2 - a1 - r in
|
||||
{ day = 0; month; year; prec; delta = 0 }
|
||||
| { month = m2; year = a2 } ->
|
||||
let month, r = if m1 <= m2 then (m2 - m1, 0) else (m2 - m1 + 12, 1) in
|
||||
let year = a2 - a1 - r in
|
||||
{ day = 0; month; year; prec; delta = 0 })
|
||||
| { day = j1; month = m1; year = a1 } -> (
|
||||
match d2 with
|
||||
| { day = 0; month = 0; year = a2 } ->
|
||||
{ day = 0; month = 0; year = a2 - a1; prec; delta = 0 }
|
||||
| { day = 0; month = m2; year = a2 } ->
|
||||
let month, r = if m1 <= m2 then (m2 - m1, 0) else (m2 - m1 + 12, 1) in
|
||||
let year = a2 - a1 - r in
|
||||
{ day = 0; month; year; prec; delta = 0 }
|
||||
| { day = j2; month = m2; year = a2 } ->
|
||||
let day, r =
|
||||
if j1 <= j2 then (j2 - j1, 0)
|
||||
else (j2 - j1 + nb_days_in_month m1 a1, 1)
|
||||
in
|
||||
let month, r =
|
||||
if m1 + r <= m2 then (m2 - m1 - r, 0) else (m2 - m1 - r + 12, 1)
|
||||
in
|
||||
let year = a2 - a1 - r in
|
||||
{ day; month; year; prec; delta = 0 })
|
||||
|
||||
let time_elapsed_opt d1 d2 =
|
||||
match (d1.prec, d2.prec) with
|
||||
| After, After | Before, Before -> None
|
||||
| _ -> Some (time_elapsed d1 d2)
|
||||
|
||||
(* TODO use SDN to compare date (?) *)
|
||||
(* use strict = false to compare date as if they are points on a timeline.
|
||||
use strict = true to compare date by taking precision in account. This makes some dates not comparable, do not use to sort a list *)
|
||||
let rec compare_dmy_opt ?(strict = false) dmy1 dmy2 =
|
||||
match compare dmy1.year dmy2.year with
|
||||
| 0 -> compare_month_or_day ~is_day:false strict dmy1 dmy2
|
||||
| x -> eval_strict strict dmy1 dmy2 x
|
||||
|
||||
and compare_month_or_day ~is_day strict dmy1 dmy2 =
|
||||
(* compare a known month|day with a unknown one (0) *)
|
||||
let compare_with_unknown_value ~strict ~unkonwn ~known =
|
||||
match unkonwn.prec with
|
||||
| After -> Some 1
|
||||
| Before -> Some (-1)
|
||||
| _other -> if strict then None else compare_prec false unkonwn known
|
||||
in
|
||||
(* if we are comparing months the next comparison to do is on days
|
||||
else if we are comparing days it is compare_prec *)
|
||||
let x, y, next_comparison =
|
||||
if is_day then (dmy1.day, dmy2.day, compare_prec)
|
||||
else (dmy1.month, dmy2.month, compare_month_or_day ~is_day:true)
|
||||
in
|
||||
(* 0 means month|day is unknow*)
|
||||
match (x, y) with
|
||||
| 0, 0 -> compare_prec strict dmy1 dmy2
|
||||
| 0, _ -> compare_with_unknown_value ~strict ~unkonwn:dmy1 ~known:dmy2
|
||||
| _, 0 ->
|
||||
(* swap dmy1 and dmy2 *)
|
||||
Option.map Int.neg
|
||||
@@ compare_with_unknown_value ~strict ~unkonwn:dmy2 ~known:dmy1
|
||||
| m1, m2 -> (
|
||||
match Int.compare m1 m2 with
|
||||
| 0 -> next_comparison strict dmy1 dmy2
|
||||
| x -> eval_strict strict dmy1 dmy2 x)
|
||||
|
||||
and compare_prec strict dmy1 dmy2 =
|
||||
match (dmy1.prec, dmy2.prec) with
|
||||
| (Sure | About | Maybe), (Sure | About | Maybe) -> Some 0
|
||||
| After, After | Before, Before -> Some 0
|
||||
| OrYear dmy1, OrYear dmy2 | YearInt dmy1, YearInt dmy2 ->
|
||||
compare_dmy_opt ~strict (dmy_of_dmy2 dmy1) (dmy_of_dmy2 dmy2)
|
||||
| _, After | Before, _ -> Some (-1)
|
||||
| After, _ | _, Before -> Some 1
|
||||
| _ -> Some 0
|
||||
|
||||
and eval_strict strict dmy1 dmy2 x =
|
||||
if strict then
|
||||
match x with
|
||||
| -1 when dmy1.prec = After || dmy2.prec = Before -> None
|
||||
| 1 when dmy1.prec = Before || dmy2.prec = After -> None
|
||||
| x -> Some x
|
||||
else Some x
|
||||
|
||||
exception Not_comparable
|
||||
|
||||
let compare_dmy ?(strict = false) dmy1 dmy2 =
|
||||
match compare_dmy_opt ~strict dmy1 dmy2 with
|
||||
| None -> raise Not_comparable
|
||||
| Some x -> x
|
||||
|
||||
let compare_date ?(strict = false) d1 d2 =
|
||||
match (d1, d2) with
|
||||
| Dgreg (dmy1, _), Dgreg (dmy2, _) -> compare_dmy ~strict dmy1 dmy2
|
||||
| Dgreg (_, _), Dtext _ -> if strict then raise Not_comparable else 1
|
||||
| Dtext _, Dgreg (_, _) -> if strict then raise Not_comparable else -1
|
||||
| Dtext _, Dtext _ -> if strict then raise Not_comparable else 0
|
||||
|
||||
let cdate_to_dmy_opt cdate =
|
||||
match od_of_cdate cdate with
|
||||
| Some (Dgreg (d, _)) -> Some d
|
||||
| Some (Dtext _) | None -> None
|
||||
|
||||
let cdate_of_death = function
|
||||
| Death (_, cd) -> Some cd
|
||||
| NotDead | DeadYoung | DeadDontKnowWhen | DontKnowIfDead | OfCourseDead ->
|
||||
None
|
||||
|
||||
let dmy_of_death death = Option.bind (cdate_of_death death) cdate_to_dmy_opt
|
||||
let date_of_death death = Option.bind (cdate_of_death death) od_of_cdate
|
||||
88
lib/util/date.mli
Normal file
88
lib/util/date.mli
Normal file
@@ -0,0 +1,88 @@
|
||||
(* Copyright (c) 1998-2007 INRIA *)
|
||||
|
||||
open Def
|
||||
|
||||
val leap_year : int -> bool
|
||||
(** Says if the given year is a leap year. *)
|
||||
|
||||
val nb_days_in_month : int -> int -> int
|
||||
(** Returns number of days for the given month and year for
|
||||
gregorian calendar. Takes into account leap years. *)
|
||||
|
||||
val time_elapsed : Def.dmy -> Def.dmy -> Def.dmy
|
||||
(** [time_elapsed start stop]
|
||||
Compute the time elapsed between [start] and [stop].
|
||||
If [stop] is prior to [start], resulting [dmy]'s field
|
||||
are negative (but correct).
|
||||
Resulting [prec] can be:
|
||||
- [Sure] for exact duration
|
||||
- [Before] for "less than" duration
|
||||
- [After] for "more than" duration
|
||||
- [Maybe] for other cases
|
||||
Used to compare only gregorian calendar's dates.
|
||||
*)
|
||||
|
||||
val time_elapsed_opt : Def.dmy -> Def.dmy -> Def.dmy option
|
||||
(** Same as [time_elapsed], but will return [None]
|
||||
if computation is not possible
|
||||
(e.g. time_elapsed_opt /1839 /1859). *)
|
||||
|
||||
(* TODO add date_of_burial/event? *)
|
||||
val dmy_of_death : Def.death -> Adef.dmy option
|
||||
|
||||
val date_of_death : Def.death -> Adef.date option
|
||||
(** Returns date of death if present. *)
|
||||
|
||||
val dmy_of_dmy2 : dmy2 -> dmy
|
||||
(** [dmy_of_dmy2 dmy2]
|
||||
Convert a [dmy2] to [dmy] using [Sure] as precision. *)
|
||||
|
||||
exception Not_comparable
|
||||
(** [Not_comparable] is raised by [compare_dmy] and [compare_date] when
|
||||
[strict] mode is used and precision of dates are incompatibles to
|
||||
have a reliable result (e.g. is [compare_dmy 2019 07/2019]) or when
|
||||
one of the date in [compare_date] is [Dtext]. *)
|
||||
|
||||
val compare_dmy : ?strict:bool -> dmy -> dmy -> int
|
||||
(** [compare_dmy ?strict d1 d2]
|
||||
Return a negative integer if [d1] is prior to [d2],
|
||||
[0] if [d1] is equal to [d2],
|
||||
and a positive integer if [d2] is prior to [d1].
|
||||
[strict] parameter enable or disable strict mode, and
|
||||
is false by default (see [Not_comparable])
|
||||
*)
|
||||
|
||||
val compare_dmy_opt : ?strict:bool -> dmy -> dmy -> int option
|
||||
(** [compare_dmy_opt ?strict d1 d2]
|
||||
Same as [compare_dmy], but do not raise an exception
|
||||
*)
|
||||
|
||||
val compare_date : ?strict:bool -> date -> date -> int
|
||||
(** [compare_date d1 d2]
|
||||
If both [d1] and [d2] are [Dgreg] date, uses [compare_dmy]
|
||||
to compare them.
|
||||
[Dtext] dates are always considered prior to any [Dgreg] date,
|
||||
and equal to any other [Dtext] date.
|
||||
[strict] parameter enable or disable strict mode, and
|
||||
is false by default (see [Not_comparable])
|
||||
*)
|
||||
|
||||
val cdate_None : cdate
|
||||
(** Absent compressed date *)
|
||||
|
||||
val date_of_cdate : cdate -> date
|
||||
(** Convert [cdate] to [date]; fail if [cdate] is [Cnone] *)
|
||||
|
||||
val od_of_cdate : cdate -> date option
|
||||
(** Optional date from [cdate] *)
|
||||
|
||||
val cdate_to_dmy_opt : cdate -> dmy option
|
||||
(** [cdate_to_dmy_opt d] is [Some dmy] iff [d] resolve to [Dgreg (dmy,_)] *)
|
||||
|
||||
val cdate_of_date : date -> cdate
|
||||
(** Convert [date] to [cdate] *)
|
||||
|
||||
val cdate_of_od : date option -> cdate
|
||||
(** Optional date to [cdate] *)
|
||||
|
||||
(* TODO date_to_dmy? *)
|
||||
17
lib/util/dune.in
Normal file
17
lib/util/dune.in
Normal file
@@ -0,0 +1,17 @@
|
||||
(library
|
||||
(name geneweb_util)
|
||||
(public_name geneweb.util)
|
||||
(wrapped false)
|
||||
(preprocess (action (run %{bin:cppo} %%%CPPO_D%%% -V OCAML:%{ocaml_version} %{input-file})))
|
||||
(libraries
|
||||
calendars
|
||||
stdlib-shims
|
||||
unidecode
|
||||
geneweb.def
|
||||
re
|
||||
unix
|
||||
uucp
|
||||
uunf
|
||||
uutf
|
||||
)
|
||||
)
|
||||
295
lib/util/futil.ml
Normal file
295
lib/util/futil.ml
Normal file
@@ -0,0 +1,295 @@
|
||||
(* Copyright (c) 2006-2007 INRIA *)
|
||||
|
||||
open Adef
|
||||
open Def
|
||||
|
||||
external identity : 'a -> 'a = "%identity"
|
||||
|
||||
let map_cdate fd d =
|
||||
match Date.od_of_cdate d with
|
||||
| Some d -> Date.cdate_of_date (fd d)
|
||||
| None -> d
|
||||
|
||||
let map_title_strings ?(fd = identity) f t =
|
||||
let t_name =
|
||||
match t.t_name with
|
||||
| Tmain -> Tmain
|
||||
| Tname s -> Tname (f s)
|
||||
| Tnone -> Tnone
|
||||
in
|
||||
let t_ident = f t.t_ident in
|
||||
let t_place = f t.t_place in
|
||||
{
|
||||
t_name;
|
||||
t_ident;
|
||||
t_place;
|
||||
t_date_start = map_cdate fd t.t_date_start;
|
||||
t_date_end = map_cdate fd t.t_date_end;
|
||||
t_nth = t.t_nth;
|
||||
}
|
||||
|
||||
let map_pers_event ?(fd = identity) fp fs e =
|
||||
let epers_name =
|
||||
match e.epers_name with
|
||||
| ( Epers_Birth | Epers_Baptism | Epers_Death | Epers_Burial
|
||||
| Epers_Cremation | Epers_Accomplishment | Epers_Acquisition
|
||||
| Epers_Adhesion | Epers_BaptismLDS | Epers_BarMitzvah | Epers_BatMitzvah
|
||||
| Epers_Benediction | Epers_ChangeName | Epers_Circumcision
|
||||
| Epers_Confirmation | Epers_ConfirmationLDS | Epers_Decoration
|
||||
| Epers_DemobilisationMilitaire | Epers_Diploma | Epers_Distinction
|
||||
| Epers_Dotation | Epers_DotationLDS | Epers_Education | Epers_Election
|
||||
| Epers_Emigration | Epers_Excommunication | Epers_FamilyLinkLDS
|
||||
| Epers_FirstCommunion | Epers_Funeral | Epers_Graduate
|
||||
| Epers_Hospitalisation | Epers_Illness | Epers_Immigration
|
||||
| Epers_ListePassenger | Epers_MilitaryDistinction
|
||||
| Epers_MilitaryPromotion | Epers_MilitaryService
|
||||
| Epers_MobilisationMilitaire | Epers_Naturalisation | Epers_Occupation
|
||||
| Epers_Ordination | Epers_Property | Epers_Recensement | Epers_Residence
|
||||
| Epers_Retired | Epers_ScellentChildLDS | Epers_ScellentParentLDS
|
||||
| Epers_ScellentSpouseLDS | Epers_VenteBien | Epers_Will ) as evt ->
|
||||
evt
|
||||
| Epers_Name s -> Epers_Name (fs s)
|
||||
in
|
||||
let epers_date = map_cdate fd e.epers_date in
|
||||
let epers_place = fs e.epers_place in
|
||||
let epers_reason = fs e.epers_reason in
|
||||
let epers_note = fs e.epers_note in
|
||||
let epers_src = fs e.epers_src in
|
||||
let epers_witnesses = Array.map (fun (p, w) -> (fp p, w)) e.epers_witnesses in
|
||||
{
|
||||
epers_name;
|
||||
epers_date;
|
||||
epers_place;
|
||||
epers_reason;
|
||||
epers_note;
|
||||
epers_src;
|
||||
epers_witnesses;
|
||||
}
|
||||
|
||||
let map_fam_event ?(fd = identity) fp fs e =
|
||||
let efam_name =
|
||||
match e.efam_name with
|
||||
| ( Efam_Marriage | Efam_NoMarriage | Efam_NoMention | Efam_Engage
|
||||
| Efam_Divorce | Efam_Separated | Efam_Annulation | Efam_MarriageBann
|
||||
| Efam_MarriageContract | Efam_MarriageLicense | Efam_PACS
|
||||
| Efam_Residence ) as evt ->
|
||||
evt
|
||||
| Efam_Name s -> Efam_Name (fs s)
|
||||
in
|
||||
let efam_date = map_cdate fd e.efam_date in
|
||||
let efam_place = fs e.efam_place in
|
||||
let efam_reason = fs e.efam_reason in
|
||||
let efam_note = fs e.efam_note in
|
||||
let efam_src = fs e.efam_src in
|
||||
let efam_witnesses = Array.map (fun (p, w) -> (fp p, w)) e.efam_witnesses in
|
||||
{
|
||||
efam_name;
|
||||
efam_date;
|
||||
efam_place;
|
||||
efam_reason;
|
||||
efam_note;
|
||||
efam_src;
|
||||
efam_witnesses;
|
||||
}
|
||||
|
||||
let map_relation_ps fp fs r =
|
||||
{
|
||||
r_type = r.r_type;
|
||||
r_fath = (match r.r_fath with Some x -> Some (fp x) | None -> None);
|
||||
r_moth = (match r.r_moth with Some x -> Some (fp x) | None -> None);
|
||||
r_sources = fs r.r_sources;
|
||||
}
|
||||
|
||||
let map_death fd = function
|
||||
| (NotDead | DeadYoung | DeadDontKnowWhen | DontKnowIfDead | OfCourseDead) as
|
||||
x ->
|
||||
x
|
||||
| Death (r, d) -> Death (r, map_cdate fd d)
|
||||
|
||||
let map_burial fd = function
|
||||
| UnknownBurial -> UnknownBurial
|
||||
| Buried d -> Buried (map_cdate fd d)
|
||||
| Cremated d -> Cremated (map_cdate fd d)
|
||||
|
||||
let map_person_ps ?(fd = identity) fp fs p =
|
||||
{
|
||||
first_name = fs p.first_name;
|
||||
surname = fs p.surname;
|
||||
occ = p.occ;
|
||||
image = fs p.image;
|
||||
first_names_aliases = List.map fs p.first_names_aliases;
|
||||
surnames_aliases = List.map fs p.surnames_aliases;
|
||||
public_name = fs p.public_name;
|
||||
qualifiers = List.map fs p.qualifiers;
|
||||
titles = List.map (map_title_strings ~fd fs) p.titles;
|
||||
rparents = List.map (map_relation_ps fp fs) p.rparents;
|
||||
related = List.map fp p.related;
|
||||
aliases = List.map fs p.aliases;
|
||||
occupation = fs p.occupation;
|
||||
sex = p.sex;
|
||||
access = p.access;
|
||||
birth = map_cdate fd p.birth;
|
||||
birth_place = fs p.birth_place;
|
||||
birth_note = fs p.birth_note;
|
||||
birth_src = fs p.birth_src;
|
||||
baptism = map_cdate fd p.baptism;
|
||||
baptism_place = fs p.baptism_place;
|
||||
baptism_note = fs p.baptism_note;
|
||||
baptism_src = fs p.baptism_src;
|
||||
death = map_death fd p.death;
|
||||
death_place = fs p.death_place;
|
||||
death_note = fs p.death_note;
|
||||
death_src = fs p.death_src;
|
||||
burial = map_burial fd p.burial;
|
||||
burial_place = fs p.burial_place;
|
||||
burial_note = fs p.burial_note;
|
||||
burial_src = fs p.burial_src;
|
||||
pevents = List.map (map_pers_event ~fd fp fs) p.pevents;
|
||||
notes = fs p.notes;
|
||||
psources = fs p.psources;
|
||||
key_index = p.key_index;
|
||||
}
|
||||
|
||||
let map_ascend_f ff a =
|
||||
match a.parents with
|
||||
| Some f -> { parents = Some (ff f); consang = a.consang }
|
||||
| None -> { parents = None; consang = a.consang }
|
||||
|
||||
let map_union_f ff u = { family = Array.map ff u.family }
|
||||
|
||||
let map_divorce fd = function
|
||||
| (NotDivorced | Separated) as x -> x
|
||||
| Divorced d -> Divorced (map_cdate fd d)
|
||||
|
||||
let map_family_ps ?(fd = identity) fp ff fs fam =
|
||||
{
|
||||
marriage = map_cdate fd fam.marriage;
|
||||
marriage_place = fs fam.marriage_place;
|
||||
marriage_note = fs fam.marriage_note;
|
||||
marriage_src = fs fam.marriage_src;
|
||||
witnesses = Array.map fp fam.witnesses;
|
||||
relation = fam.relation;
|
||||
divorce = map_divorce fd fam.divorce;
|
||||
fevents = List.map (map_fam_event ~fd fp fs) fam.fevents;
|
||||
comment = fs fam.comment;
|
||||
origin_file = fs fam.origin_file;
|
||||
fsources = fs fam.fsources;
|
||||
fam_index = ff fam.fam_index;
|
||||
}
|
||||
|
||||
let parent multi parent =
|
||||
if not multi then Adef.parent parent else Adef.multi_parent parent
|
||||
|
||||
let map_couple_p multi_parents fp cpl =
|
||||
parent multi_parents (Array.map fp (parent_array cpl))
|
||||
|
||||
let map_descend_p fp des = { children = Array.map fp des.children }
|
||||
|
||||
let gen_person_misc_names sou empty_string quest_string first_name surname
|
||||
public_name qualifiers aliases first_names_aliases surnames_aliases titles
|
||||
husbands father_titles_places =
|
||||
if first_name = quest_string || surname = quest_string then []
|
||||
else
|
||||
let s_first_name = Mutil.nominative @@ sou first_name in
|
||||
let s_surname = Mutil.nominative @@ sou surname in
|
||||
let s_titles_names =
|
||||
List.fold_left
|
||||
(fun acc t ->
|
||||
match t.t_name with Tmain | Tnone -> acc | Tname x -> sou x :: acc)
|
||||
[] titles
|
||||
in
|
||||
let s_public_names =
|
||||
if public_name = empty_string then s_titles_names
|
||||
else sou public_name :: s_titles_names
|
||||
in
|
||||
let s_first_names =
|
||||
s_first_name
|
||||
:: (* List.rev_append *)
|
||||
List.rev_map sou first_names_aliases (* public_names *)
|
||||
in
|
||||
let s_surnames =
|
||||
s_surname
|
||||
:: Mutil.list_rev_map_append sou surnames_aliases
|
||||
(Mutil.list_rev_map_append sou qualifiers
|
||||
@@ Mutil.surnames_pieces s_surname)
|
||||
in
|
||||
let s_surnames =
|
||||
Array.fold_left
|
||||
(fun s_list (husband_surname, husband_surnames_aliases) ->
|
||||
if husband_surname = quest_string then
|
||||
Mutil.list_rev_map_append sou husband_surnames_aliases s_list
|
||||
else
|
||||
let s_husband_surname = Mutil.nominative @@ sou husband_surname in
|
||||
s_husband_surname
|
||||
:: Mutil.list_rev_map_append sou husband_surnames_aliases
|
||||
(List.rev_append
|
||||
(Mutil.surnames_pieces s_husband_surname)
|
||||
s_list))
|
||||
s_surnames husbands
|
||||
in
|
||||
(* (public names) *)
|
||||
let s_list = s_public_names in
|
||||
(* + (first names) x (surnames) *)
|
||||
let s_list =
|
||||
List.fold_left
|
||||
(fun list f ->
|
||||
List.fold_left (fun list s -> Name.concat f s :: list) list s_surnames)
|
||||
s_list s_first_names
|
||||
in
|
||||
(* + (first names + (title | (public name)) ) x (titles places) *)
|
||||
let s_list =
|
||||
(* let first_names = first_name :: first_names_aliases in *)
|
||||
List.fold_left
|
||||
(fun list t ->
|
||||
let s = t.t_place in
|
||||
if s = empty_string then list
|
||||
else
|
||||
let s = sou s in
|
||||
let s_first_names =
|
||||
match t.t_name with
|
||||
| Tname f -> sou f :: s_first_names
|
||||
| Tmain | Tnone ->
|
||||
if public_name = empty_string then s_first_names
|
||||
else sou public_name :: s_first_names
|
||||
in
|
||||
List.fold_left
|
||||
(fun list f -> Name.concat f s :: list)
|
||||
list s_first_names)
|
||||
s_list titles
|
||||
in
|
||||
(* + (first names) x (father's title places) *)
|
||||
let list =
|
||||
if father_titles_places = [] then s_list
|
||||
else
|
||||
List.fold_left
|
||||
(fun list t ->
|
||||
let s = t.t_place in
|
||||
if s = empty_string then list
|
||||
else
|
||||
let s = sou s in
|
||||
List.fold_left
|
||||
(fun list f -> Name.concat f s :: list)
|
||||
list s_first_names)
|
||||
s_list father_titles_places
|
||||
in
|
||||
let list = Mutil.list_rev_map_append sou aliases list in
|
||||
list
|
||||
|
||||
let rec eq_lists eq l1 l2 =
|
||||
match (l1, l2) with
|
||||
| x1 :: l1, x2 :: l2 -> eq x1 x2 && eq_lists eq l1 l2
|
||||
| [], [] -> true
|
||||
| _ -> false
|
||||
|
||||
let eq_title_names eq tn1 tn2 =
|
||||
match (tn1, tn2) with
|
||||
| Tname i1, Tname i2 -> eq i1 i2
|
||||
| Tmain, Tmain | Tnone, Tnone -> true
|
||||
| _ -> false
|
||||
|
||||
let eq_titles eq t1 t2 =
|
||||
eq_title_names eq t1.t_name t2.t_name
|
||||
&& eq t1.t_ident t2.t_ident && eq t1.t_place t2.t_place
|
||||
&& t1.t_date_start = t2.t_date_start
|
||||
&& t1.t_date_end = t2.t_date_end
|
||||
&& t1.t_nth = t2.t_nth
|
||||
124
lib/util/futil.mli
Normal file
124
lib/util/futil.mli
Normal file
@@ -0,0 +1,124 @@
|
||||
(* Copyright (c) 2006-2007 INRIA *)
|
||||
|
||||
open Def
|
||||
|
||||
val map_title_strings :
|
||||
?fd:(Def.date -> Def.date) -> ('a -> 'b) -> 'a gen_title -> 'b gen_title
|
||||
(** Convert generic type used to represent name, id and the place of [Def.gen_title] into
|
||||
another one. If [fd] is present, apply it on the date of the start and date of the end of a title *)
|
||||
|
||||
val map_pers_event :
|
||||
?fd:(Def.date -> Def.date) ->
|
||||
('a -> 'c) ->
|
||||
('b -> 'd) ->
|
||||
('a, 'b) gen_pers_event ->
|
||||
('c, 'd) gen_pers_event
|
||||
(** Convert:
|
||||
|
||||
- Generic type used to represent witnesses of [Def.gen_pers_event] into another one.
|
||||
- Generic type used to represent name, place, reason, note and source of [Def.gen_pers_event]
|
||||
into another one.
|
||||
If [fd] is present, apply it on date of the personal event. *)
|
||||
|
||||
val map_fam_event :
|
||||
?fd:(Def.date -> Def.date) ->
|
||||
('a -> 'c) ->
|
||||
('b -> 'd) ->
|
||||
('a, 'b) gen_fam_event ->
|
||||
('c, 'd) gen_fam_event
|
||||
(** Convert:
|
||||
|
||||
- Generic type used to represent witnesses of [Def.gen_fam_event] into another one.
|
||||
- Generic type used to represent name, place, reason, note and source of [Def.gen_fam_event]
|
||||
into another one.
|
||||
If [fd] is present, apply it on date of the familial event. *)
|
||||
|
||||
val map_relation_ps :
|
||||
('a -> 'c) -> ('b -> 'd) -> ('a, 'b) gen_relation -> ('c, 'd) gen_relation
|
||||
(** Convert:
|
||||
|
||||
- Generic type used to represent father and mother inside [Def.gen_relation] into another one.
|
||||
- Generic type used to represent sources of [Def.gen_relation] into another one. *)
|
||||
|
||||
val map_person_ps :
|
||||
?fd:(Def.date -> Def.date) ->
|
||||
('b -> 'd) ->
|
||||
('c -> 'e) ->
|
||||
('a, 'b, 'c) gen_person ->
|
||||
('a, 'd, 'e) gen_person
|
||||
(** Convert:
|
||||
|
||||
- Generic type used to represent related persons (parents, witnesses of a personal event, etc.)
|
||||
of [Def.gen_person] into another one.
|
||||
- Generic type used to represent another large part of information of [Def.gen_person]
|
||||
into another one.
|
||||
If [fd] is present, apply it on every date (birth, death, titles,, personal events, etc.).
|
||||
Generic type that is used to represent indexation key isn't converted. *)
|
||||
|
||||
val map_ascend_f : ('a -> 'b) -> 'a gen_ascend -> 'b gen_ascend
|
||||
(** Convert generic type used to represent family inside [Def.gen_ascend] into
|
||||
another one. *)
|
||||
|
||||
val map_union_f : ('a -> 'b) -> 'a gen_union -> 'b gen_union
|
||||
(** Convert generic type used to represent one of the famillies inside [Def.gen_union] into
|
||||
another one. *)
|
||||
|
||||
val map_family_ps :
|
||||
?fd:(Def.date -> Def.date) ->
|
||||
('a -> 'b) ->
|
||||
('c -> 'd) ->
|
||||
('e -> 'f) ->
|
||||
('a, 'c, 'e) gen_family ->
|
||||
('b, 'd, 'f) gen_family
|
||||
(** Convert:
|
||||
|
||||
- Generic type used to represent faimily indexation key into another one.
|
||||
- Generic type used to represent witnesses (of the marriage or of a famillial events, etc.)
|
||||
of [Def.gen_family] into another one.
|
||||
- Generic type used to represent another large part of information of [Def.gen_family]
|
||||
into another one.
|
||||
If [fd] is present, apply it on it on every date (marriage, divorce, famillial events, etc.).*)
|
||||
|
||||
val map_couple_p : bool -> ('a -> 'b) -> 'a gen_couple -> 'b gen_couple
|
||||
(** Convert generic type used to represent father and mother inside [Def.gen_couple] into
|
||||
another one. If first argument is true then use multi-parent functionality. *)
|
||||
|
||||
val parent : bool -> 'a array -> 'a gen_couple
|
||||
(** @deprecated Use [Adef.parent] instead. *)
|
||||
|
||||
val map_descend_p : ('a -> 'b) -> 'a gen_descend -> 'b gen_descend
|
||||
(** Convert generic type used to represent children inside [Def.gen_descend] into
|
||||
another one.*)
|
||||
|
||||
val eq_lists : ('a -> 'b -> bool) -> 'a list -> 'b list -> bool
|
||||
(** Says if two lists with different element's type are equal with given comparison
|
||||
function. *)
|
||||
|
||||
val eq_titles : ('a -> 'b -> bool) -> 'a gen_title -> 'b gen_title -> bool
|
||||
(** Says if two titles with different types are equal with given comparison
|
||||
function. *)
|
||||
|
||||
val eq_title_names :
|
||||
('a -> 'b -> bool) -> 'a gen_title_name -> 'b gen_title_name -> bool
|
||||
(** Says if two title names with different types are equal with given comparison
|
||||
function. *)
|
||||
|
||||
val gen_person_misc_names :
|
||||
('a -> string) ->
|
||||
'a ->
|
||||
'a ->
|
||||
'a ->
|
||||
'a ->
|
||||
'a ->
|
||||
'a list ->
|
||||
'a list ->
|
||||
'a list ->
|
||||
'a list ->
|
||||
'a Def.gen_title list ->
|
||||
('a * 'a list) array ->
|
||||
'a Def.gen_title list ->
|
||||
string list
|
||||
(** Return a list of string corresponding to various
|
||||
mix between all kind of names.
|
||||
It can contain duplicates. Strings are used raw (not lowered).
|
||||
*)
|
||||
43
lib/util/lock.ml
Normal file
43
lib/util/lock.ml
Normal file
@@ -0,0 +1,43 @@
|
||||
(* Copyright (c) 1998-2007 INRIA *)
|
||||
|
||||
let no_lock_flag = ref false
|
||||
|
||||
let print_error_and_exit () =
|
||||
Printf.printf "\nSorry. Impossible to lock base.\n";
|
||||
flush stdout;
|
||||
exit 2
|
||||
|
||||
let print_try_again () =
|
||||
Printf.eprintf "Base locked. Try again.\n";
|
||||
flush stdout
|
||||
|
||||
let control ~onerror lname wait f =
|
||||
if !no_lock_flag then f ()
|
||||
else
|
||||
try
|
||||
let fd = Unix.openfile lname [ Unix.O_RDWR; Unix.O_CREAT ] 0o666 in
|
||||
(try Unix.chmod lname 0o666 with _ -> ());
|
||||
try
|
||||
if Sys.unix then
|
||||
if wait then Unix.lockf fd Unix.F_LOCK 0
|
||||
else Unix.lockf fd Unix.F_TLOCK 0;
|
||||
let r = f () in
|
||||
Unix.close fd;
|
||||
r
|
||||
with e ->
|
||||
Unix.close fd;
|
||||
raise e
|
||||
with
|
||||
| Unix.Unix_error _ -> onerror ()
|
||||
| e -> raise e
|
||||
|
||||
let control_retry ~onerror lname f =
|
||||
control lname false
|
||||
~onerror:(fun () ->
|
||||
Printf.eprintf "Base is locked. Waiting... ";
|
||||
flush stderr;
|
||||
control lname true ~onerror (fun () ->
|
||||
Printf.eprintf "Ok\n";
|
||||
flush stderr;
|
||||
f ()))
|
||||
f
|
||||
17
lib/util/lock.mli
Normal file
17
lib/util/lock.mli
Normal file
@@ -0,0 +1,17 @@
|
||||
val no_lock_flag : bool ref
|
||||
(** Flag that indicates if the lock should be used. *)
|
||||
|
||||
val print_error_and_exit : unit -> unit
|
||||
(** Print lock error message and terminate program. *)
|
||||
|
||||
val print_try_again : unit -> unit
|
||||
(** Print message about locked database. *)
|
||||
|
||||
val control : onerror:(unit -> 'a) -> string -> bool -> (unit -> 'a) -> 'a
|
||||
(** [control ~onerror lname wait f] opens file [lname], puts a write lock on it and then calls [f].
|
||||
If [wait] is true then if it tries to access locked file it will be blocked until these lock is removed. Otherwise
|
||||
it will fail, and function [onerror] will be called. If flag [no_lock_flag] is set then returns [f ()] immediatly. *)
|
||||
|
||||
val control_retry : onerror:(unit -> 'a) -> string -> (unit -> 'a) -> 'a
|
||||
(** Tries to call [control] without blocking. If it fail (lock is put) then call again [control]
|
||||
and waits untill lock is removed. If it fails with another reason calls [onerror]. *)
|
||||
1244
lib/util/mutil.ml
Normal file
1244
lib/util/mutil.ml
Normal file
File diff suppressed because it is too large
Load Diff
446
lib/util/mutil.mli
Normal file
446
lib/util/mutil.mli
Normal file
@@ -0,0 +1,446 @@
|
||||
(* Copyright (c) 2006-2007 INRIA *)
|
||||
|
||||
val verbose : bool ref
|
||||
(** Global variable that indicates either
|
||||
servers should be in verbose mode. *)
|
||||
|
||||
val list_iter_first : (bool -> 'a -> unit) -> 'a list -> unit
|
||||
(** [list_iter_first f l] iter over first element with [f true] and over others with [f false]. *)
|
||||
|
||||
val strip_all_trailing_spaces : string -> string
|
||||
(** Remove all trailing spaces in string *)
|
||||
|
||||
(* [decline dform dformat] encode name that could be declined (like in the czech language)
|
||||
and its declination form in more comprehensible for computer format.
|
||||
Declination form [dform] is one of the follows:
|
||||
- 'n' for nominative
|
||||
- 'a' for accusative
|
||||
- 'g' for genitif
|
||||
Declination format [dformat] describes how does a name changes throughout different
|
||||
declination forms comparing to the nominative form.
|
||||
See {{: https://geneweb.tuxfamily.org/wiki/declension }Declination in Geneweb} for more details.
|
||||
Example 1: [decline 'a' "Vladana:a:Vladanu:g:Vladany"] returns encoding "@(@(a)@(a?Vladanu:g?Vladany:Vladana))."
|
||||
Example 2: [decline 'a' "Vladana:a:-u:g:-y"] returns encoding "@(@(a)Vladan@(a?u:g?y:a))"
|
||||
@deprecated *)
|
||||
val decline : char -> string -> string
|
||||
|
||||
val nominative : string -> string
|
||||
(** Encodes name for nominative declination format.
|
||||
@deprecated *)
|
||||
|
||||
val mkdir_p : ?perm:int -> string -> unit
|
||||
(** [mkdir_p ?perm dir]
|
||||
Create the directory [dir].
|
||||
No error if existing, make parent directories as needed.
|
||||
*)
|
||||
|
||||
val remove_dir : string -> unit
|
||||
(** Remove every file in the directory and then remove the directory itself *)
|
||||
|
||||
val lock_file : string -> string
|
||||
(** Returns the name of a lock file (with extension .lck). Result is generally used as an
|
||||
argument for [Lock.control] function. *)
|
||||
|
||||
val initial : string -> int
|
||||
(** Returns position of first capital letter in the name (0 if no capitals). *)
|
||||
|
||||
val input_particles : string -> string list
|
||||
(** [input_particles fname] read file and returns list of lines.
|
||||
Empty lines are skipped. *)
|
||||
|
||||
val surnames_pieces : string -> string list
|
||||
(** Divide surnames on pieces. Every separated word that contains at least 4 character
|
||||
forms one piece. Words that contains less than 4 characters or words "saint" and "sainte"
|
||||
are considered as the particles and are attached to the another word to form a piece.
|
||||
If string contains less than two pieces, returns an empty list. *)
|
||||
|
||||
val utf_8_of_iso_8859_1 : string -> string
|
||||
(** Convert encoded string with ISO 8859-1 to UTF 8 *)
|
||||
|
||||
val iso_8859_1_of_utf_8 : string -> string
|
||||
(** Convert encoded string with UTF 8 to ISO 8859-1 *)
|
||||
|
||||
val roman_of_arabian : int -> string
|
||||
(** Convert arabic number (int) to roman (string). Number should be < 4000. *)
|
||||
|
||||
val arabian_of_roman : string -> int
|
||||
(** Convert roman number (string) to arabic (int). Number should be less or equal
|
||||
to MMMCMXCIX (3999). *)
|
||||
|
||||
val fallback : (string * string) list ref
|
||||
|
||||
val read_fallback : string -> string -> unit
|
||||
(** reads a file lexicon.gwf which defines a possible fallback language for
|
||||
each of the available languages.
|
||||
Most of the lines of this file are commented and can be uncommented by
|
||||
the user who can place the new file in bases/etc/lang *)
|
||||
|
||||
val input_lexicon :
|
||||
string -> (string, string) Hashtbl.t -> (unit -> in_channel) -> unit
|
||||
(** [input_lexicon lang ht open_file] open {i lexicon.txt} file with [open_file ()],
|
||||
parse it and fill [ht] where key is the keyword and value is
|
||||
a coresponding traduction associated to a [lang] language code.
|
||||
The second parameter defines a set of fallback languages if a translation is not
|
||||
available in a given language.
|
||||
If traduction line has a form [->: kw] it associates to the current section name the value
|
||||
associated to [kw] section name inside [ht] (keyword alias). *)
|
||||
|
||||
module StrSet : Set.S with type elt = string
|
||||
(** Set of strings *)
|
||||
|
||||
val tr : char -> char -> string -> string
|
||||
(** [tr c1 c2 str]
|
||||
Return a new string which is the same as [str] with all occurences of [c1]
|
||||
replaced by [c2]. If [str] does not contain [c1] [str] is returned untouched.
|
||||
*)
|
||||
|
||||
val unsafe_tr : char -> char -> string -> string
|
||||
(** [unsafe_tr c1 c2 str]
|
||||
Update [str] in place. Replace all occurences of [c1] by [c2].
|
||||
*)
|
||||
|
||||
val array_to_list_map : ('a -> 'b) -> 'a array -> 'b list
|
||||
(** [array_to_list_map fn a] is almost like [Array.to_list a |> List.map fn]
|
||||
but is more efficient.
|
||||
|
||||
The list is constructed backward,
|
||||
so if [fn] have side effects it may not behave as excepted.
|
||||
*)
|
||||
|
||||
val array_to_list_rev_map : ('a -> 'b) -> 'a array -> 'b list
|
||||
(** [array_to_list_revmap fn a] is almost like [Array.to_list a |> List.rev_map fn]
|
||||
but is more efficient.
|
||||
*)
|
||||
|
||||
val array_assoc : 'k -> ('k * 'v) array -> 'v
|
||||
(** [array_assoc k arr]
|
||||
returns the value associated with key [k] in the array of pairs [arr].
|
||||
That is, [array_assoc k [| ... ; (k,v) ; ... |] = v]
|
||||
if [(k,v)] is the leftmost binding of a in array [arr].
|
||||
Raise [Not_found] if there is no value associated with [k] in [arr]. *)
|
||||
|
||||
val start_with : string -> int -> string -> bool
|
||||
(** [start_with prefix off str]
|
||||
Test if [str] starts with [prefix] (at offset [off]).
|
||||
|
||||
Raise [Invalid_argument] if [off] is not a valid index in [str].
|
||||
*)
|
||||
|
||||
val start_with_wildcard : string -> int -> string -> bool
|
||||
(** [start_with_wildcard prefix off str]
|
||||
Test if [str] starts with [prefix] (at offset [off]).
|
||||
Occurences of ['_'] in [prefix] will match both ['_']
|
||||
and [' '] in [str] and trailing ['_'] of [prefix]
|
||||
is treated as an optional ['_'] [' '].
|
||||
|
||||
Raise [Invalid_argument] if [off] is not a valid index in [str].
|
||||
*)
|
||||
|
||||
val contains : string -> string -> bool
|
||||
(** [contains str sub] Test [sub] is contained in [str].
|
||||
*)
|
||||
|
||||
val compile_particles : string list -> Re.re
|
||||
(** [compile_particles list]
|
||||
Compile [list] so it can be used with [get_particle]
|
||||
or [compare_after_particle] function. *)
|
||||
|
||||
val get_particle : Re.re -> string -> string
|
||||
(** [get_particle particles name]
|
||||
Return [p] where [p] is in [particles] and is prefix of [name].
|
||||
If no such [p] exists, empty string [""] is returned. *)
|
||||
|
||||
val compare_after_particle : Re.re -> string -> string -> int
|
||||
(** [compare_after_particle particles s1 s2]
|
||||
compare strings [s1] [s2] starting from the first character after
|
||||
particle's match. If they are equal, compare particles. *)
|
||||
|
||||
val rm : string -> unit
|
||||
(** [rm fname]
|
||||
Remove [fname]. If [fname] does not exists, do nothing.
|
||||
*)
|
||||
|
||||
val mv : string -> string -> unit
|
||||
(** [mv src dst]
|
||||
Move [src] to [dst]. If [src] does not exists, do nothing.
|
||||
*)
|
||||
|
||||
val string_of_int_sep : string -> int -> string
|
||||
(** [string_of_int_sep "," 1000000] is ["1,000,000"]
|
||||
*)
|
||||
|
||||
val list_compare : ('a -> 'a -> int) -> 'a list -> 'a list -> int
|
||||
(** [list_compare cmp l1 l2]
|
||||
Comparison function for lists, using [cmp] to compare each elements
|
||||
*)
|
||||
|
||||
val list_find_map : ('a -> 'b option) -> 'a list -> 'b option
|
||||
(** [list_find_map fn list]
|
||||
OCaml Stdlib's [List.find_map] (introduced in 4.10.0)
|
||||
backported into GeneWeb
|
||||
*)
|
||||
|
||||
val array_find_map : ('a -> 'b option) -> 'a array -> 'b option
|
||||
(** [array_find_map f a] applies [f] to the elements of [a] in order, and returns the
|
||||
first result of the form [Some v], or [None] if none exist.
|
||||
TODO OCaml 4.13; use Stdlib
|
||||
*)
|
||||
|
||||
val list_rev_iter : ('a -> unit) -> 'a list -> unit
|
||||
(** [list_rev_iter f l] gives the same result as [List.rev l |> List.iter fn],
|
||||
but without creating intermediate list (not tail-recursive).
|
||||
*)
|
||||
|
||||
val list_last : 'a list -> 'a
|
||||
(** [list_last list]
|
||||
Return the last element of the list.
|
||||
Raises [Failure] if the list is empty.
|
||||
*)
|
||||
|
||||
val list_slice : int -> int -> 'a list -> 'a list
|
||||
(** [list_slice from_ to_ list]
|
||||
Extracts elements from [a]-nth (starts with zero, inclusive)
|
||||
to [b]-nth (exclusive). If [list] is not long enough, result
|
||||
will be shorter than requested, but the function will not fail.
|
||||
*)
|
||||
|
||||
val check_magic : string -> in_channel -> bool
|
||||
(** [check_magic magic ic]
|
||||
Read (and consume) the [magic] string at the beggining of [ic]
|
||||
and return [true].
|
||||
If [ic] does not start with [magic], reset the reading position
|
||||
of [ic] to where is was before you call [check_magic] and return [false].
|
||||
*)
|
||||
|
||||
val executable_magic : string
|
||||
(** Magic string are either get from {i GW_EXECUTABLE_MAGIC} environement variable
|
||||
either generated from the md5sum of the running executable.
|
||||
It can be used for volatile files which can be easily corrupted
|
||||
by any change in program or data representation.
|
||||
*)
|
||||
|
||||
val random_magic : string
|
||||
(** Magic string generated from 30 random bits.
|
||||
It should be different each time you launch the program.
|
||||
*)
|
||||
|
||||
val array_except : 'a -> 'a array -> 'a array
|
||||
(** [array_except value array]
|
||||
Return a new array containing all the elements
|
||||
from [array] except the first occurence of [value]
|
||||
*)
|
||||
|
||||
val default_particles : string list
|
||||
(** List of default particles used in GeneWeb *)
|
||||
|
||||
val array_forall2 : ('a -> 'b -> bool) -> 'a array -> 'b array -> bool
|
||||
(** [array_forall2 p a b]
|
||||
Checks if all elements of the arrays satisfy the predicate [p].
|
||||
That is, it returns [(p a1 b1) && (p a2 b2) && ... && (p an bn)].
|
||||
Raise Invalid_argument if the two lists are determined to have different lengths.
|
||||
*)
|
||||
|
||||
val list_replace : 'a -> 'a -> 'a list -> 'a list
|
||||
(** [list_replace old_v new_v list]
|
||||
Return the same list as [list] were the first occurence of [old_v]
|
||||
has been replaced by [new_v]. If [old_v] is unbound, the list is
|
||||
returned unchanged.
|
||||
*)
|
||||
|
||||
val list_except : 'a -> 'a list -> 'a list
|
||||
(** [list_except x list]
|
||||
Return a list containing all the elements from [list]
|
||||
except the first occurence of [x]. *)
|
||||
|
||||
val list_index : 'a -> 'a list -> int
|
||||
(** [list_index element list]
|
||||
Finds the index of [element] in list.
|
||||
Raises [Not_found] if it does not exists.
|
||||
*)
|
||||
|
||||
val list_ref_append : 'a list ref -> 'a -> unit
|
||||
(** [list_ref_append tl hd]
|
||||
Add [hd] at the beginning of [tl] ref.
|
||||
*)
|
||||
|
||||
val input_file_ic : in_channel -> string
|
||||
(** Read the content of a file.
|
||||
Starts from the position where it is when calling [input_file_ic],
|
||||
and read until the end of the file.
|
||||
|
||||
This function avoid crashes with text files on Windows platform.
|
||||
|
||||
If the channel is opened on a file that is not a regular file,
|
||||
the result is meaningless.
|
||||
*)
|
||||
|
||||
val normalize_utf_8 : string -> string
|
||||
(** [normalize_utf_8 s]
|
||||
Return [s] normalized using
|
||||
{{:http://www.unicode.org/glossary/#normalization_form_c}NFC}
|
||||
with all malformed UTF-8 character replaced by
|
||||
{{:http://unicode.org/glossary/#replacement_character}the replacement character}
|
||||
*)
|
||||
|
||||
val list_map_sort_uniq : ('a -> 'b) -> 'a list -> 'b list
|
||||
(** [list_map_sort_uniq f l] apply [f] to every element and return
|
||||
sorted with Merge Sort algorithm list where every element is unique. *)
|
||||
|
||||
val list_rev_map_append : ('a -> 'b) -> 'a list -> 'b list -> 'b list
|
||||
(** [list_rev_map_append f l1 l2] apply [f] to every element in [l1], reverse it and
|
||||
concat with [l2]. *)
|
||||
|
||||
val read_or_create_channel :
|
||||
?magic:string ->
|
||||
?wait:bool ->
|
||||
string ->
|
||||
(in_channel -> 'a) ->
|
||||
(out_channel -> 'a) ->
|
||||
'a
|
||||
(** [read_or_create_channel ?magic fname read write]
|
||||
|
||||
If [fname] exists (and starts with [magic] if this one is provided),
|
||||
[read] function is used on the file.
|
||||
If it does not, or does not start with [magic], or if [read] raise an exception,
|
||||
[write] function is used on the file.
|
||||
|
||||
This function takes care of locking and closing files so you must not take care of
|
||||
that in [read]/[write].
|
||||
It also takes care of writing [magic] at the beginning of the file before calling
|
||||
[write]
|
||||
|
||||
On Windows, file is not locked.
|
||||
*)
|
||||
|
||||
val read_or_create_value :
|
||||
?magic:string -> ?wait:bool -> string -> (unit -> 'a) -> 'a
|
||||
(** [read_or_create_value ?magic fname create]
|
||||
|
||||
If [fname] exists (and starts and ends with [magic] if this one is provided),
|
||||
return the unmarshalled value.
|
||||
If it does not, or does not start with [magic], or if unmarshalling raise an exception,
|
||||
[create] function is used to produce the value to be marshalled.
|
||||
|
||||
On Windows, file is not locked.
|
||||
*)
|
||||
|
||||
val bench : string -> (unit -> 'a) -> 'a
|
||||
(** [bench name fn]
|
||||
Execute [fn], print stats about time and memory allocation, return [fn] result.
|
||||
*)
|
||||
|
||||
val print_callstack : ?max:int -> unit -> unit
|
||||
(** Prints call stack on stderr with at most [max] entries. *)
|
||||
|
||||
val encode : string -> Adef.encoded_string
|
||||
(** [encode s]
|
||||
Encodes the string [s] in another string
|
||||
where spaces and special characters are coded. This allows
|
||||
to put such strings in html links <a href=...>. This is
|
||||
the same encoding done by Web browsers in forms.
|
||||
*)
|
||||
|
||||
val decode : Adef.encoded_string -> string
|
||||
(** [decode s]
|
||||
Does the inverse job than [code],
|
||||
restoring the initial string. The heading and trailing spaces
|
||||
are stripped.
|
||||
*)
|
||||
|
||||
val gen_decode : bool -> Adef.encoded_string -> string
|
||||
(** Like above but heading and trailing spaces are stripped
|
||||
only if bool parameter is [true]. [decode] = [gen_decode true].
|
||||
*)
|
||||
|
||||
val extract_param : string -> char -> string list -> string
|
||||
(** [extract_param name stopc request] can be used to extract some
|
||||
parameter from a browser [request] (list of strings); [name]
|
||||
is a string which should match the beginning of a request line,
|
||||
[stopc] is a character ending the request line. For example, the
|
||||
string request has been obtained by: [extract_param "GET /" ' '].
|
||||
Answers the empty string if the parameter is not found. *)
|
||||
|
||||
val sprintf_date : Unix.tm -> Adef.safe_string
|
||||
(** Print a date using "%04d-%02d-%02d %02d:%02d:%02d" format
|
||||
Example : 2021-12-13 22:35:08. *)
|
||||
|
||||
val rev_input_line : in_channel -> int -> bytes ref * int ref -> string * int
|
||||
(** [rev_input_line ic pos (rbytes, rpos)]
|
||||
Read characters in reverse order from the given input channel,
|
||||
until a newline character is encountered.
|
||||
Return the string of all characters read, without the newline
|
||||
character at the end, and the position of the first character
|
||||
of the returned line (to be used with next [rev_input_line] call).
|
||||
|
||||
[rpos] and [rbytes] are intermediate between [ic] and reading functions.
|
||||
At the beginig when [!rpos = 0] and [rbytes] is empty, initialise buffer with
|
||||
the size = 1024, then reads last 1024 characters from [ci]. When [rpos] comes
|
||||
down to 0, resize buffer *2 and reads 2048 characters before 1024 last
|
||||
characters. [rpos] and [rbytes] must be the same in each subsequents calls
|
||||
|
||||
Raises [End_of_file] if the beginning of the file is reached
|
||||
at the beginning of line.
|
||||
*)
|
||||
|
||||
val search_file_opt : string list -> string -> string option
|
||||
(** [search_file directories file]
|
||||
Search for a [file] in different [directories] and return
|
||||
then first result or [None] if not found
|
||||
*)
|
||||
|
||||
val search_asset_opt : string -> string option
|
||||
(** [search_asset fname]
|
||||
Searches for a file in assets directories.
|
||||
i.e. directories previously registered with [Secure.add_assets] *)
|
||||
|
||||
val eq_key : string * string * int -> string * string * int -> bool
|
||||
(** [eq_key (fn1, sn1, oc1) (fn2, sn2, oc2)]
|
||||
Tests if two persons would have the same key
|
||||
*)
|
||||
|
||||
val ls_r : string list -> string list
|
||||
(** [ls_r dirs]
|
||||
List directories (and subdirectories) contents of [dirs], including [dirs] themselves.
|
||||
*)
|
||||
|
||||
val rm_rf : string -> unit
|
||||
(** [rm_rf dir]
|
||||
Remove directory [dir] and everything inside [dir].
|
||||
*)
|
||||
|
||||
val filter_map : ('a -> 'b option) -> 'a list -> 'b list
|
||||
(** [filter_map fn list] is a combination of map and filter.
|
||||
Not tail-recursive.
|
||||
*)
|
||||
|
||||
val rev_iter : ('a -> unit) -> 'a list -> unit
|
||||
(** [rev_iter fn list] is like [List.iter fn (List.rev list)].
|
||||
Not tail-recursive.
|
||||
*)
|
||||
|
||||
val groupby :
|
||||
key:('a -> 'k) -> value:('a -> 'v) -> 'a list -> ('k * 'v list) list
|
||||
(** [groupby ~key ~value list]
|
||||
Group the elements returning the same key together.
|
||||
Ordering of elements is unspecified.
|
||||
*)
|
||||
|
||||
val digest : string -> string
|
||||
(** [digest s]
|
||||
Returns the (128 bits long, using MD5 algorithm) digest of [s].
|
||||
*)
|
||||
|
||||
val empty_person : 'string -> 'string -> (unit, _, 'string) Def.gen_person
|
||||
(** [empty_person empty quest] returns a Def.gen_person with
|
||||
[first_name] and [surname] initialized to [quest],
|
||||
other 'string field initialized to [empty], and
|
||||
only empty arrays/lists.
|
||||
*)
|
||||
|
||||
val empty_family : 'string -> (_, unit, 'string) Def.gen_family
|
||||
(** [empty_family empty] returns a Def.gen_person with string field initialized
|
||||
initialized with [empty] and only empty arrays/lists.
|
||||
*)
|
||||
|
||||
val good_name : string -> bool
|
||||
(** test the base name fir accepted characters: a..z, A..Z, - *)
|
||||
269
lib/util/name.ml
Normal file
269
lib/util/name.ml
Normal file
@@ -0,0 +1,269 @@
|
||||
(* Copyright (c) 1998-2007 INRIA *)
|
||||
|
||||
(* La liste des caractères interdits *)
|
||||
let forbidden_char = [ ':'; '@'; '#'; '='; '$' ]
|
||||
|
||||
(* Name.lower *)
|
||||
|
||||
let unaccent_utf_8 lower s i =
|
||||
let fns =
|
||||
if lower then fun n s -> (String.lowercase_ascii s, n) else fun n s -> (s, n)
|
||||
in
|
||||
let fnc =
|
||||
if lower then fun n c -> (String.make 1 @@ Char.lowercase_ascii c, n)
|
||||
else fun n c -> (String.make 1 c, n)
|
||||
in
|
||||
let s, n =
|
||||
Unidecode.decode fns fnc
|
||||
(fun n -> (String.sub s i (n - i), n))
|
||||
s i (String.length s)
|
||||
in
|
||||
if lower then (String.lowercase_ascii s, n) else (s, n)
|
||||
|
||||
let next_chars_if_equiv s i t j =
|
||||
if i >= String.length s || j >= String.length t then None
|
||||
else
|
||||
let s1, i1 = unaccent_utf_8 true s i in
|
||||
let t1, j1 = unaccent_utf_8 true t j in
|
||||
if s1 = t1 then Some (i1, j1) else None
|
||||
|
||||
(* Name.lower:
|
||||
- uppercase -> lowercase
|
||||
- no accents
|
||||
- chars no letters and no numbers (except '.') => spaces (stripped)
|
||||
Key comparison (first name, surname, number) applies "lower" equality
|
||||
on first names and surnames *)
|
||||
let lower s =
|
||||
let rec copy special i len =
|
||||
if i = String.length s then Buff.get len
|
||||
else if Char.code s.[i] < 0x80 then
|
||||
match s.[i] with
|
||||
| ('a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '.') as c ->
|
||||
let len = if special then Buff.store len ' ' else len in
|
||||
let c = Char.lowercase_ascii c in
|
||||
copy false (i + 1) (Buff.store len c)
|
||||
| _ -> copy (len <> 0) (i + 1) len
|
||||
else
|
||||
let len = if special then Buff.store len ' ' else len in
|
||||
let t, j = unaccent_utf_8 true s i in
|
||||
copy false j (Buff.mstore len t)
|
||||
in
|
||||
copy false 0 0
|
||||
|
||||
let title s =
|
||||
let t = ref true in
|
||||
let cmap u =
|
||||
let r = if !t then Uucp.Case.Map.to_upper u else Uucp.Case.Map.to_lower u in
|
||||
t := not (Uucp.Alpha.is_alphabetic u);
|
||||
r
|
||||
in
|
||||
Utf8.cmap_utf_8 cmap s
|
||||
|
||||
(* Name.abbrev *)
|
||||
|
||||
(* List of abbreviations. If abbreviation is mapped to [Some s] should be remplaced by
|
||||
[s]. If mapped to None, it should be removed from name. *)
|
||||
let abbrev_list =
|
||||
[
|
||||
("a", None);
|
||||
("af", None);
|
||||
("d", None);
|
||||
("de", None);
|
||||
("di", None);
|
||||
("ier", Some "i");
|
||||
("of", None);
|
||||
("saint", Some "st");
|
||||
("sainte", Some "ste");
|
||||
("van", None);
|
||||
("von", None);
|
||||
("zu", None);
|
||||
("zur", None);
|
||||
]
|
||||
|
||||
(* Checks if the word starting at [i] in [s] is [p]. *)
|
||||
let is_word s i p =
|
||||
let rec is_word s i p ip =
|
||||
if ip = String.length p then
|
||||
if i = String.length s then true else if s.[i] = ' ' then true else false
|
||||
else if i = String.length s then false
|
||||
else if s.[i] = p.[ip] then is_word s (i + 1) p (ip + 1)
|
||||
else false
|
||||
in
|
||||
is_word s i p 0
|
||||
|
||||
(* Checks if word that starts at position [i] in [s] is one of abbreviation *)
|
||||
let rec search_abbrev s i = function
|
||||
| (w, a) :: pl ->
|
||||
if is_word s i w then Some (String.length w, a) else search_abbrev s i pl
|
||||
| [] -> None
|
||||
|
||||
(* Name.abbrev: suppress lowercase particles, shorten "saint" into "st" *)
|
||||
let abbrev s =
|
||||
let rec copy can_start_abbrev i len =
|
||||
if i >= String.length s then Buff.get len
|
||||
else
|
||||
match s.[i] with
|
||||
| ' ' -> copy true (i + 1) (Buff.store len ' ')
|
||||
| c ->
|
||||
if can_start_abbrev then
|
||||
match search_abbrev s i abbrev_list with
|
||||
| None -> copy false (i + 1) (Buff.store len c)
|
||||
| Some (n, Some a) -> copy false (i + n) (Buff.mstore len a)
|
||||
| Some (n, None) -> copy true (i + n + 1) len
|
||||
else copy false (i + 1) (Buff.store len c)
|
||||
in
|
||||
copy true 0 0
|
||||
|
||||
(* Name.strip *)
|
||||
|
||||
(* Name.strip_c = name without the charater c given as parameter *)
|
||||
let strip_c s c =
|
||||
let rec copy i len =
|
||||
if i = String.length s then Buff.get len
|
||||
else if s.[i] = c then copy (i + 1) len
|
||||
else copy (i + 1) (Buff.store len s.[i])
|
||||
in
|
||||
copy 0 0
|
||||
|
||||
let strip s = strip_c s ' '
|
||||
|
||||
(* String without any forbidden caracters defined in forbidden_char *)
|
||||
(* ******************************************************************** *)
|
||||
(* [Fonc] purge : string -> string *)
|
||||
|
||||
(* ******************************************************************** *)
|
||||
|
||||
(** [Description] : Supprime tous les caractères interdits (défini par
|
||||
forbidden_char) présents dans la chaine passée en
|
||||
argument.
|
||||
[Args] :
|
||||
- s : string que l'on veut purger
|
||||
[Retour] :
|
||||
- string : retourne la chaîne délestée des caractères interdits
|
||||
[Rem] : Exporté en clair hors de ce module. *)
|
||||
let purge s = List.fold_left strip_c s forbidden_char
|
||||
|
||||
(* Name.crush *)
|
||||
|
||||
(* If string starting from [i] contains roman number then returns the next position,
|
||||
else returns None. *)
|
||||
let roman_number s i =
|
||||
let rec loop i =
|
||||
if i = String.length s then Some i
|
||||
else if s.[i] = ' ' then Some i
|
||||
else match s.[i] with 'i' | 'v' | 'x' | 'l' -> loop (i + 1) | _ -> None
|
||||
in
|
||||
if i = 0 || s.[i - 1] = ' ' then loop i else None
|
||||
|
||||
(* Name.crush, a custom sonnex/soundex-like phonetic algorithm:
|
||||
- no spaces
|
||||
- roman numbers are keeped
|
||||
- vowels are suppressed, except in words starting with a vowel,
|
||||
where this vowel is converted into "e"
|
||||
- "k" and "q" replaced by "c"
|
||||
- "y" replaced by "i"
|
||||
- "z" replaced by "s"
|
||||
- "ph" replaced by "f"
|
||||
- others "h" deleted
|
||||
- s at end of words are deleted
|
||||
- no double lowercase consons *)
|
||||
let crush s =
|
||||
let rec copy i len first_vowel =
|
||||
if i = String.length s then Buff.get len
|
||||
else if s.[i] = ' ' then copy (i + 1) len true
|
||||
else
|
||||
match roman_number s i with
|
||||
| Some j ->
|
||||
let rec loop i len =
|
||||
if i = j then copy j len true
|
||||
else loop (i + 1) (Buff.store len s.[i])
|
||||
in
|
||||
loop i len
|
||||
| _ -> (
|
||||
match s.[i] with
|
||||
| 'a' | 'e' | 'i' | 'o' | 'u' | 'y' ->
|
||||
let len = if first_vowel then Buff.store len 'e' else len in
|
||||
copy (i + 1) len false
|
||||
| 'h' ->
|
||||
let len =
|
||||
if i > 0 && s.[i - 1] = 'p' then Buff.store (len - 1) 'f'
|
||||
else len
|
||||
in
|
||||
copy (i + 1) len first_vowel
|
||||
| ('s' | 'z') when i = String.length s - 1 || s.[i + 1] = ' ' ->
|
||||
let len =
|
||||
let rec loop i len =
|
||||
if
|
||||
i > 0 && len > 0
|
||||
&& s.[i] = Bytes.get !Buff.buff len
|
||||
&& (s.[i] = 's' || s.[i] = 'z')
|
||||
then loop (i - 1) (len - 1)
|
||||
else len + 1
|
||||
in
|
||||
loop (i - 1) (len - 1)
|
||||
in
|
||||
copy (i + 1) len false
|
||||
| 's' when i = String.length s - 1 || s.[i + 1] = ' ' ->
|
||||
copy (i + 1) len false
|
||||
| c ->
|
||||
if i > 0 && s.[i - 1] = c then copy (i + 1) len false
|
||||
else
|
||||
let c = match c with 'k' | 'q' -> 'c' | 'z' -> 's' | c -> c in
|
||||
copy (i + 1) (Buff.store len c) false)
|
||||
in
|
||||
copy 0 0 true
|
||||
|
||||
(* strip_lower *)
|
||||
|
||||
(* strip_lower = strip o lower, as first comparison of names.
|
||||
First names and Surnames comparison is strip_lower equality. *)
|
||||
let strip_lower s = strip (lower s)
|
||||
|
||||
(* crush_lower *)
|
||||
|
||||
(* crush_lower = crush o abbrev o lower, as second comparison of names.
|
||||
In index by names, the "names" are crush_lowers *)
|
||||
let crush_lower s = crush (abbrev (lower s))
|
||||
|
||||
(* concat two strings using Bytes module *)
|
||||
let concat_aux fn l1 sn l2 =
|
||||
let b = Bytes.create (l1 + l2 + 1) in
|
||||
Bytes.blit_string fn 0 b 0 l1;
|
||||
Bytes.blit_string sn 0 b (l1 + 1) l2;
|
||||
Bytes.unsafe_set b l1 ' ';
|
||||
Bytes.unsafe_to_string b
|
||||
|
||||
let concat fn sn = concat_aux fn (String.length fn) sn (String.length sn)
|
||||
let contains_forbidden_char s = List.exists (String.contains s) forbidden_char
|
||||
|
||||
(* Copy/paste from String.split_on_char adapted to our needs *)
|
||||
let split_sname_callback fn s =
|
||||
let open String in
|
||||
let j = ref (length s) in
|
||||
for i = length s - 1 downto 0 do
|
||||
if match unsafe_get s i with ' ' | '-' -> true | _ -> false then (
|
||||
fn (i + 1) (!j - i - 1);
|
||||
j := i)
|
||||
done;
|
||||
fn 0 !j
|
||||
|
||||
(* Copy/paste from String.split_on_char adapted to our needs *)
|
||||
let split_fname_callback fn s =
|
||||
let open String in
|
||||
let j = ref (length s) in
|
||||
for i = length s - 1 downto 0 do
|
||||
if unsafe_get s i = ' ' then (
|
||||
fn (i + 1) (!j - i - 1);
|
||||
j := i)
|
||||
done;
|
||||
fn 0 !j
|
||||
|
||||
let split_sname s =
|
||||
let r = ref [] in
|
||||
split_sname_callback (fun i j -> r := String.sub s i j :: !r) s;
|
||||
!r
|
||||
|
||||
let split_fname s =
|
||||
let r = ref [] in
|
||||
split_fname_callback (fun i j -> r := String.sub s i j :: !r) s;
|
||||
!r
|
||||
86
lib/util/name.mli
Normal file
86
lib/util/name.mli
Normal file
@@ -0,0 +1,86 @@
|
||||
(* Copyright (c) 1998-2007 INRIA *)
|
||||
|
||||
val forbidden_char : char list
|
||||
(** List of forbidden to use characters *)
|
||||
|
||||
val unaccent_utf_8 : bool -> string -> int -> string * int
|
||||
(** [unaccent_utf_8 lower s i] checks UTF-8 characher that starts at position [i] inside [s]
|
||||
and returns couple (cs,np) where [cs] is ASCII representation of this character (characters
|
||||
between 0x00 and 0x7F) and [np] it's a position of next utf8 character inside [s]. If [lower]
|
||||
is true then [cs] will contain only lowercase letters.
|
||||
Example : unaccent_utf_8 "aÈa" 1 -> ("e",3) *)
|
||||
|
||||
val next_chars_if_equiv : string -> int -> string -> int -> (int * int) option
|
||||
(** [next_chars_if_equiv s1 i1 s2 i2] checks if UTF-8 characters that start at position
|
||||
[i1] inside [s1] and at [i2] inside [s2] are equivalent (have the same ASCII representation).
|
||||
In this case returns position of the next charecter for each of them. Otherwise, returns None. *)
|
||||
|
||||
val lower : string -> string
|
||||
(** Convert every letter to lowercase and use *unidecode* library to
|
||||
represent unicode characters with ASCII. Non-alphanumeric characters
|
||||
(except '.') are remplaced by space. *)
|
||||
|
||||
val title : string -> string
|
||||
(** Apply uppercasing to the first letter of each name (sequence of alphabetic characters) part,
|
||||
and lowercasing to the rest of the text. *)
|
||||
|
||||
val abbrev : string -> string
|
||||
(** Remplace by an abbreviation or remove particles inside the name *)
|
||||
|
||||
val strip : string -> string
|
||||
(** Removes all the spaces inside the name *)
|
||||
|
||||
val strip_c : string -> char -> string
|
||||
(** [strip_c s c] removes all the occurences of [c] inside the name *)
|
||||
|
||||
val purge : string -> string
|
||||
(** Removes all the forbiden characters from [forbidden_char] inside the name *)
|
||||
|
||||
val crush : string -> string
|
||||
(** A custom sonnex/soundex-like phonetic algorithm:
|
||||
- no spaces
|
||||
- roman numbers are keeped
|
||||
- vowels are suppressed, except in words starting with a vowel,
|
||||
where this vowel is converted into "e"
|
||||
- "k" and "q" replaced by "c"
|
||||
- "y" replaced by "i"
|
||||
- "z" replaced by "s"
|
||||
- "ph" replaced by "f"
|
||||
- others "h" deleted
|
||||
- s at end of words are deleted
|
||||
- no double lowercase consons *)
|
||||
|
||||
val strip_lower : string -> string
|
||||
(** Equivalent to [strip o lower]. Used as:
|
||||
- First comparison of names.
|
||||
- Comparison for first names and surnames. *)
|
||||
|
||||
val crush_lower : string -> string
|
||||
(** Equivalent to [crush o abbrev o lower]. Used as:
|
||||
- Second comparison of names.
|
||||
- Key when index by names *)
|
||||
|
||||
val concat : string -> string -> string
|
||||
(** [concat fn sn] is [fn ^ " " ^ sn] but faster. *)
|
||||
|
||||
val contains_forbidden_char : string -> bool
|
||||
(** [contains_forbidden_char s] is [true] iif s contains forbidden characters *)
|
||||
|
||||
val split_sname_callback : (int -> int -> unit) -> string -> unit
|
||||
(** [split_sname_callback fn s]
|
||||
Same as [split_sname], but call [fn] with substring indexes instead of building
|
||||
a list *)
|
||||
|
||||
val split_fname_callback : (int -> int -> unit) -> string -> unit
|
||||
(** [split_fname_callback fn s]
|
||||
Same as [split_fname], but call [fn] with substring indexes instead of building
|
||||
a list *)
|
||||
|
||||
val split_sname : string -> string list
|
||||
(** [split_sname s] split the surname [s] in parts composing it.
|
||||
e.g. [split_sname base "Foo-Bar"] is [[ "Foo" ; "Bar"]] *)
|
||||
|
||||
val split_fname : string -> string list
|
||||
(** [split_fname s] split the string [s] representing multiple first names
|
||||
into this list of firstname.
|
||||
e.g. [split_fname base "Foo-Bar Baz"] is [[ "Foo-Bar" ; "Baz"]] *)
|
||||
58
lib/util/pqueue.ml
Normal file
58
lib/util/pqueue.ml
Normal file
@@ -0,0 +1,58 @@
|
||||
(* Copyright (c) 1998-2007 INRIA *)
|
||||
|
||||
module type OrderedType = sig
|
||||
type t
|
||||
|
||||
val leq : t -> t -> bool
|
||||
end
|
||||
|
||||
module type S = sig
|
||||
type elt
|
||||
type t
|
||||
|
||||
val empty : t
|
||||
val is_empty : t -> bool
|
||||
val add : elt -> t -> t
|
||||
val take : t -> elt * t
|
||||
val union : t -> t -> t
|
||||
end
|
||||
|
||||
module Make (Ord : OrderedType) = struct
|
||||
type elt = Ord.t
|
||||
|
||||
type t = tree list
|
||||
and tree = { node : elt; rank : int; list : t }
|
||||
|
||||
let link t1 t2 =
|
||||
if Ord.leq t1.node t2.node then
|
||||
{ node = t1.node; rank = t1.rank + 1; list = t2 :: t1.list }
|
||||
else { node = t2.node; rank = t2.rank + 1; list = t1 :: t2.list }
|
||||
|
||||
let rec ins t = function
|
||||
| [] -> [ t ]
|
||||
| t' :: ts -> if t.rank < t'.rank then t :: t' :: ts else ins (link t t') ts
|
||||
|
||||
let rec union fts1 fts2 =
|
||||
match (fts1, fts2) with
|
||||
| [], ts -> ts
|
||||
| ts, [] -> ts
|
||||
| t1 :: ts1, t2 :: ts2 ->
|
||||
if t1.rank < t2.rank then t1 :: union ts1 fts2
|
||||
else if t2.rank < t1.rank then t2 :: union fts1 ts2
|
||||
else ins (link t1 t2) (union ts1 ts2)
|
||||
|
||||
let empty : t = []
|
||||
let is_empty (q : t) = q = []
|
||||
let add x q = ins { node = x; rank = 0; list = [] } q
|
||||
|
||||
let rec getMin = function
|
||||
| [] -> raise Not_found
|
||||
| [ t ] -> (t, [])
|
||||
| t :: ts ->
|
||||
let t', ts' = getMin ts in
|
||||
if Ord.leq t.node t'.node then (t, ts) else (t', t :: ts')
|
||||
|
||||
let take ts =
|
||||
let t, ts = getMin ts in
|
||||
(t.node, union (List.rev t.list) ts)
|
||||
end
|
||||
48
lib/util/pqueue.mli
Normal file
48
lib/util/pqueue.mli
Normal file
@@ -0,0 +1,48 @@
|
||||
(* Copyright (c) 1998-2007 INRIA *)
|
||||
|
||||
(** Module [Pqueue]: priority queues. *)
|
||||
|
||||
(** This module implements priority queues, given a total ordering function
|
||||
over the elements inserted. All operations are purely applicative
|
||||
(no side effects).
|
||||
The implementation uses binomial queues from Chris Okasaki.
|
||||
"add", "take" and "union" are in o(log n) in the worst case. *)
|
||||
|
||||
(** The input signature of the functor [Pqueue.Make].
|
||||
[t] is the type of the inserted elements.
|
||||
[leq] is a total ordering function over the elements.
|
||||
This is a two-argument function [f] returning [True] if the
|
||||
first argument is less or equal to the second one. *)
|
||||
module type OrderedType = sig
|
||||
type t
|
||||
|
||||
val leq : t -> t -> bool
|
||||
end
|
||||
|
||||
(** Output signature for priority queue *)
|
||||
module type S = sig
|
||||
type elt
|
||||
(** Type of elementes contained in priority queues. *)
|
||||
|
||||
type t
|
||||
(** Type of priority queues. *)
|
||||
|
||||
val empty : t
|
||||
(** The empty queue. *)
|
||||
|
||||
val is_empty : t -> bool
|
||||
(** [is_empty q] checks the emptiness of [q]. *)
|
||||
|
||||
val add : elt -> t -> t
|
||||
(** [add x h] adds a new element [x] in heap [h]. *)
|
||||
|
||||
val take : t -> elt * t
|
||||
(** [take x] removes the minimum element of [x] and returns it;
|
||||
raises [Not_found] when [x] is empty. *)
|
||||
|
||||
val union : t -> t -> t
|
||||
(** [union q1 q2] returns heap constructed by union of [q1] [q2] *)
|
||||
end
|
||||
|
||||
(** Functor that creates instance of priority queue from given element type. *)
|
||||
module Make (Ord : OrderedType) : S with type elt = Ord.t
|
||||
46
lib/util/progrBar.ml
Normal file
46
lib/util/progrBar.ml
Normal file
@@ -0,0 +1,46 @@
|
||||
(* $Id: progrBar.ml,v 5.4 2007-02-01 10:28:55 ddr Exp $ *)
|
||||
|
||||
(* bar size in characters *)
|
||||
let size = 60
|
||||
let draw_rep = 5
|
||||
let draw = "|/-\\"
|
||||
let empty = ref '.'
|
||||
let full = ref '#'
|
||||
let draw_len = String.length draw
|
||||
let pb_cnt = size * draw_rep * draw_len
|
||||
|
||||
let start () =
|
||||
for _i = 1 to size do
|
||||
Printf.eprintf "%c" !empty
|
||||
done;
|
||||
Printf.eprintf "\013"
|
||||
|
||||
let run cnt max_cnt =
|
||||
let pb_cnt = if max_cnt < pb_cnt then size * draw_len else pb_cnt in
|
||||
let already_disp = cnt * size / max_cnt in
|
||||
let to_disp = (cnt + 1) * size / max_cnt in
|
||||
for _i = already_disp + 1 to to_disp do
|
||||
Printf.eprintf "%c" !full
|
||||
done;
|
||||
let already_disp = cnt * pb_cnt / max_cnt in
|
||||
let to_disp = (cnt + 1) * pb_cnt / max_cnt in
|
||||
(if cnt = max_cnt - 1 then Printf.eprintf " \008"
|
||||
else if to_disp > already_disp then
|
||||
let k = to_disp mod draw_len in
|
||||
let k = if k < 0 then draw_len + k else k in
|
||||
Printf.eprintf "%c\008" draw.[k]);
|
||||
flush stderr
|
||||
|
||||
let suspend () =
|
||||
Printf.eprintf "%c\n" !full;
|
||||
flush stderr
|
||||
|
||||
let restart cnt max_cnt =
|
||||
start ();
|
||||
for i = 0 to cnt do
|
||||
run i max_cnt
|
||||
done
|
||||
|
||||
let finish () =
|
||||
Printf.eprintf "\n";
|
||||
flush stderr
|
||||
24
lib/util/progrBar.mli
Normal file
24
lib/util/progrBar.mli
Normal file
@@ -0,0 +1,24 @@
|
||||
(* $Id: progrBar.mli,v 5.3 2007-02-01 10:28:55 ddr Exp $ *)
|
||||
|
||||
val empty : char ref
|
||||
(** Character that represents not passed part of progression bar *)
|
||||
|
||||
val full : char ref
|
||||
(** Character that represents passed part of progression bar *)
|
||||
|
||||
val start : unit -> unit
|
||||
(** Prints empty bar with carriage return. *)
|
||||
|
||||
val run : int -> int -> unit
|
||||
(** [run i len] modifies progression bar that is now filled proportionally to
|
||||
[i] by comparison with [len]. *)
|
||||
|
||||
val finish : unit -> unit
|
||||
(** Stop printing progression bar and prints a new line. *)
|
||||
|
||||
val suspend : unit -> unit
|
||||
(** Stop printing progression bar and prints a new line. *)
|
||||
|
||||
val restart : int -> int -> unit
|
||||
(** [restart i len] restart progression bar. It's equivalent to call successively
|
||||
[run] from 0 to [i]. *)
|
||||
103
lib/util/secure.ml
Normal file
103
lib/util/secure.ml
Normal file
@@ -0,0 +1,103 @@
|
||||
(* $Id: secure.ml,v 5.2 2007-01-19 01:53:17 ddr Exp $ *)
|
||||
(* Copyright (c) 1998-2007 INRIA *)
|
||||
|
||||
(* secure open; forbids to access anywhere in the machine;
|
||||
this is an extra security: the program should check for
|
||||
correct open instead of hoping Secure do it for it *)
|
||||
|
||||
let ok_r = ref []
|
||||
let assets_r = ref [ "gw" ]
|
||||
let bd_r = ref (Filename.concat Filename.current_dir_name "bases")
|
||||
|
||||
(* [decompose: string -> string list] decompose a path into a list of
|
||||
directory and a basename. "a/b/c" -> [ "a" ; "b"; "c" ] *)
|
||||
let decompose =
|
||||
let rec loop r s =
|
||||
let b = Filename.basename s in
|
||||
if b = "" || b = Filename.current_dir_name || b = Filename.dir_sep then
|
||||
let d = Filename.dirname s in
|
||||
if d = "" || d = Filename.current_dir_name then r
|
||||
else if d = s then d :: r
|
||||
else loop r d
|
||||
else if b = s then b :: r
|
||||
else loop (b :: r) (Filename.dirname s)
|
||||
in
|
||||
loop []
|
||||
|
||||
(* add asset to the list of allowed to acces assets *)
|
||||
let add_assets d =
|
||||
if not (List.mem d !assets_r) then (
|
||||
assets_r := List.rev (d :: List.rev !assets_r);
|
||||
ok_r := decompose d :: !ok_r)
|
||||
|
||||
(* set base dir to which acces could be allowed *)
|
||||
let set_base_dir d =
|
||||
let ok = decompose d in
|
||||
bd_r := d;
|
||||
ok_r := ok :: (List.filter (( <> ) ok)) !ok_r
|
||||
|
||||
(* get all assets *)
|
||||
let assets () = !assets_r
|
||||
let base_dir () = !bd_r
|
||||
|
||||
(* [list_check_prefix d df] returns either [None] if [d] is not a prefix of
|
||||
[df], or [Some suffix], where [df = d @ suffix] *)
|
||||
let list_check_prefix d df =
|
||||
let rec loop = function
|
||||
| x :: xl, y :: yl -> if x = y then loop (xl, yl) else None
|
||||
| [], df -> Some df
|
||||
| _, [] -> None
|
||||
in
|
||||
loop (d, df)
|
||||
|
||||
(** Check if a filename is safe to read:
|
||||
* it must not contain the '\000' character
|
||||
* it must either be relative to the local directory OR
|
||||
included in one of the allowed directories (base_dir or assets)
|
||||
* the relative part does not contain the '..' directory
|
||||
*)
|
||||
let check fname =
|
||||
if String.contains fname '\000' then false
|
||||
else
|
||||
let df = decompose fname in
|
||||
let rec loop = function
|
||||
| d :: dl -> (
|
||||
match list_check_prefix d df with
|
||||
| Some bf when not (List.mem Filename.parent_dir_name bf) -> true
|
||||
| _ -> loop dl)
|
||||
| [] ->
|
||||
if Filename.is_relative fname then
|
||||
not (List.mem Filename.parent_dir_name df)
|
||||
else false
|
||||
in
|
||||
loop !ok_r
|
||||
|
||||
let check_open fname =
|
||||
if not (check fname) then (
|
||||
if Sys.unix then (
|
||||
Printf.eprintf "*** secure rejects open %s\n" (String.escaped fname);
|
||||
flush stderr);
|
||||
raise (Sys_error "invalid access"))
|
||||
|
||||
(* The following functions perform a [check] before opening the file,
|
||||
preventing potential attacks on the system.
|
||||
*)
|
||||
let open_in fname =
|
||||
check_open fname;
|
||||
Stdlib.open_in fname
|
||||
|
||||
let open_in_bin fname =
|
||||
check_open fname;
|
||||
Stdlib.open_in_bin fname
|
||||
|
||||
let open_out fname =
|
||||
check_open fname;
|
||||
Stdlib.open_out fname
|
||||
|
||||
let open_out_bin fname =
|
||||
check_open fname;
|
||||
Stdlib.open_out_bin fname
|
||||
|
||||
let open_out_gen mode perm fname =
|
||||
check_open fname;
|
||||
Stdlib.open_out_gen mode perm fname
|
||||
36
lib/util/secure.mli
Normal file
36
lib/util/secure.mli
Normal file
@@ -0,0 +1,36 @@
|
||||
(* Copyright (c) 1998-2007 INRIA *)
|
||||
|
||||
val assets : unit -> string list
|
||||
(** Returns list of allowed to acces assets *)
|
||||
|
||||
val base_dir : unit -> string
|
||||
(** Returns directory where databases are installed to which acces is allowed *)
|
||||
|
||||
val add_assets : string -> unit
|
||||
(** Add new asset to the [assets] list *)
|
||||
|
||||
val set_base_dir : string -> unit
|
||||
(** Set base directory *)
|
||||
|
||||
val check : string -> bool
|
||||
(** Check if a filename is safe to read:
|
||||
- it must not contain the '\000' character
|
||||
- it must either be relative to the local directory OR
|
||||
included in one of the allowed directories (base_dir or assets)
|
||||
- the relative part does not contain the '..' directory
|
||||
*)
|
||||
|
||||
val open_in : string -> in_channel
|
||||
(** Secured version of [open_in] *)
|
||||
|
||||
val open_in_bin : string -> in_channel
|
||||
(** Secured version of [open_in_bin] *)
|
||||
|
||||
val open_out : string -> out_channel
|
||||
(** Secured version of [open_out] *)
|
||||
|
||||
val open_out_bin : string -> out_channel
|
||||
(** Secured version of [open_out_bin] *)
|
||||
|
||||
val open_out_gen : open_flag list -> int -> string -> out_channel
|
||||
(** Secured version of [open_out_gen] *)
|
||||
209
lib/util/utf8.ml
Normal file
209
lib/util/utf8.ml
Normal file
@@ -0,0 +1,209 @@
|
||||
(* TODO: replace with Unidecode.nbc
|
||||
when version constraint [= 0.2.0] will be removed *)
|
||||
let nbc c =
|
||||
if Char.code c < 0x80 then 1
|
||||
else if Char.code c < 0xC0 then invalid_arg "nbc"
|
||||
else if Char.code c < 0xE0 then 2
|
||||
else if Char.code c < 0xF0 then 3
|
||||
else if Char.code c < 0xF8 then 4
|
||||
else if Char.code c < 0xFC then 5
|
||||
else if Char.code c < 0xFE then 6
|
||||
else invalid_arg "nbc"
|
||||
|
||||
let next s i = i + nbc s.[i]
|
||||
|
||||
let get s i =
|
||||
let rec loop i k = if k = 0 then i else loop (next s i) (pred k) in
|
||||
loop 0 i
|
||||
|
||||
let length s =
|
||||
let rec loop i len =
|
||||
if i < String.length s then loop (next s i) (succ len) else len
|
||||
in
|
||||
loop 0 0
|
||||
|
||||
let sub ?pad str start len =
|
||||
let strlen = String.length str in
|
||||
let n, i =
|
||||
let rec loop n i =
|
||||
if n = len || strlen <= i then (n, i) else loop (n + 1) (next str i)
|
||||
in
|
||||
loop 0 start
|
||||
in
|
||||
if n = len then String.sub str start (i - start)
|
||||
else
|
||||
match pad with
|
||||
| None -> raise (Invalid_argument "str_sub")
|
||||
| Some pad ->
|
||||
let bytes = Bytes.make (i - start + len - n) pad in
|
||||
Bytes.blit
|
||||
(Bytes.unsafe_of_string str)
|
||||
start bytes 0 (String.length str);
|
||||
Bytes.unsafe_to_string bytes
|
||||
|
||||
(**/**)
|
||||
|
||||
(* cmap_utf_8 code code comes from
|
||||
http://erratique.ch/software/uucp/doc/Uucp.Case.html *)
|
||||
let cmap_utf_8 cmap s =
|
||||
let b = Buffer.create (String.length s * 2) in
|
||||
let add_map _ _ u =
|
||||
let u = match u with `Malformed _ -> Uutf.u_rep | `Uchar u -> u in
|
||||
match cmap u with
|
||||
| `Self -> Uutf.Buffer.add_utf_8 b u
|
||||
| `Uchars us -> List.iter (Uutf.Buffer.add_utf_8 b) us
|
||||
in
|
||||
Uutf.String.fold_utf_8 add_map () s;
|
||||
Buffer.contents b
|
||||
|
||||
(**/**)
|
||||
|
||||
let lowercase s = cmap_utf_8 Uucp.Case.Map.to_lower s
|
||||
let uppercase s = cmap_utf_8 Uucp.Case.Map.to_upper s
|
||||
|
||||
let capitalize_fst s =
|
||||
let first = ref true in
|
||||
let cmap u =
|
||||
if !first then (
|
||||
first := false;
|
||||
Uucp.Case.Map.to_upper u)
|
||||
else `Self
|
||||
in
|
||||
cmap_utf_8 cmap s
|
||||
|
||||
let capitalize s =
|
||||
let first = ref true in
|
||||
let cmap u =
|
||||
if !first then (
|
||||
first := false;
|
||||
Uucp.Case.Map.to_upper u)
|
||||
else Uucp.Case.Map.to_lower u
|
||||
in
|
||||
cmap_utf_8 cmap s
|
||||
|
||||
module C = struct
|
||||
type t = Str of string | Chr of char | Empty
|
||||
|
||||
let unaccent trimmed s i0 len =
|
||||
let rec loop i =
|
||||
if i < len then
|
||||
match
|
||||
match Char.code @@ String.unsafe_get s i with
|
||||
(* A..Z *)
|
||||
| ( 0x41 | 0x42 | 0x43 | 0x44 | 0x45 | 0x46 | 0x47 | 0x48 | 0x49
|
||||
| 0x4A | 0x4B | 0x4C | 0x4D | 0x4E | 0x4F | 0x50 | 0x51 | 0x52
|
||||
| 0x53 | 0x54 | 0x55 | 0x56 | 0x57 | 0x58 | 0x59 | 0x5A ) as c ->
|
||||
(Chr (Char.unsafe_chr @@ (c + 32)), i, succ i)
|
||||
(* a..z *)
|
||||
| ( 0x61 | 0x62 | 0x63 | 0x64 | 0x65 | 0x66 | 0x67 | 0x68 | 0x69
|
||||
| 0x6A | 0x6B | 0x6C | 0x6D | 0x6E | 0x6F | 0x70 | 0x71 | 0x72
|
||||
| 0x73 | 0x74 | 0x75 | 0x76 | 0x77 | 0x78 | 0x79 | 0x7A
|
||||
(* 0..9 *)
|
||||
| 0x30 | 0x31 | 0x32 | 0x33 | 0x34 | 0x35 | 0x36 | 0x37 | 0x38
|
||||
| 0x39 ) as c ->
|
||||
(Chr (Char.unsafe_chr c), i, succ i)
|
||||
(* '-' | ' ' | '\'' *)
|
||||
| (0x2D | 0x20 | 0x27) as c ->
|
||||
((if trimmed then Chr (Char.unsafe_chr c) else Empty), i, succ i)
|
||||
| _ ->
|
||||
Unidecode.decode
|
||||
(fun n -> function
|
||||
| "" -> (Empty, i, n)
|
||||
| s -> (Str (String.lowercase_ascii s), i, n))
|
||||
(fun n c ->
|
||||
match Char.lowercase_ascii c with
|
||||
| ('a' .. 'z' | '0' .. '9') as c -> (Chr c, i, n)
|
||||
| ('-' | ' ' | '\'') as c ->
|
||||
((if trimmed then Chr c else Empty), i, n)
|
||||
| _ -> (Empty, i, n))
|
||||
(fun n -> (Empty, i, n))
|
||||
s i len
|
||||
with
|
||||
| Empty, _, n -> loop n
|
||||
| x, i, n -> (x, i, n)
|
||||
else (Empty, i0, len)
|
||||
in
|
||||
loop i0
|
||||
|
||||
(* See BatUTF8.look source (from batteries) *)
|
||||
let cp s i =
|
||||
Uchar.of_int
|
||||
@@
|
||||
let n = Char.code (String.unsafe_get s i) in
|
||||
if n < 0x80 then n
|
||||
else if n <= 0xdf then
|
||||
((n - 0xc0) lsl 6) lor (0x7f land Char.code (String.unsafe_get s (i + 1)))
|
||||
else if n <= 0xef then
|
||||
let n' = n - 0xe0 in
|
||||
let m = Char.code (String.unsafe_get s (i + 1)) in
|
||||
let n' = (n' lsl 6) lor (0x7f land m) in
|
||||
let m = Char.code (String.unsafe_get s (i + 2)) in
|
||||
(n' lsl 6) lor (0x7f land m)
|
||||
else
|
||||
let n' = n - 0xf0 in
|
||||
let m = Char.code (String.unsafe_get s (i + 1)) in
|
||||
let n' = (n' lsl 6) lor (0x7f land m) in
|
||||
let m = Char.code (String.unsafe_get s (i + 2)) in
|
||||
let n' = (n' lsl 6) lor (0x7f land m) in
|
||||
let m = Char.code (String.unsafe_get s (i + 3)) in
|
||||
(n' lsl 6) lor (0x7f land m)
|
||||
|
||||
(* compare bytes (UTF-8 charachter) delimited by intevals [i1,j1] and [i2,j2] *)
|
||||
let cmp_substring s1 i1 j1 s2 i2 j2 =
|
||||
let l1 = j1 - i1 in
|
||||
let l2 = j2 - i2 in
|
||||
if l1 = 1 && l2 = 1 then
|
||||
(* Optimize ASCII characters comparison *)
|
||||
Char.compare
|
||||
(Char.lowercase_ascii @@ String.get s1 i1)
|
||||
(Char.lowercase_ascii @@ String.get s2 i2)
|
||||
else
|
||||
let c1 = cp s1 i1 in
|
||||
let c2 = cp s2 i2 in
|
||||
let c1 =
|
||||
match Uucp.Case.Fold.fold c1 with `Self -> [ c1 ] | `Uchars us -> us
|
||||
in
|
||||
let c2 =
|
||||
match Uucp.Case.Fold.fold c2 with `Self -> [ c2 ] | `Uchars us -> us
|
||||
in
|
||||
Stdlib.compare c1 c2
|
||||
|
||||
(* See [Utf8.compare] *)
|
||||
let compare n1 n2 =
|
||||
let trimmed1 = ref false in
|
||||
let trimmed2 = ref false in
|
||||
let rec loop i1 i2 =
|
||||
if i1 >= String.length n1 && i2 >= String.length n2 then i1 - i2
|
||||
else if i1 >= String.length n1 then -1
|
||||
else if i2 >= String.length n2 then 1
|
||||
else
|
||||
let c1, i1, ii1 = unaccent !trimmed1 n1 i1 (String.length n1) in
|
||||
let c2, i2, ii2 = unaccent !trimmed2 n2 i2 (String.length n2) in
|
||||
let () = trimmed1 := true in
|
||||
let () = trimmed2 := true in
|
||||
let cmp_aux = function
|
||||
| 0 -> (
|
||||
match cmp_substring n1 i1 ii1 n2 i2 ii2 with
|
||||
| 0 -> loop ii1 ii2
|
||||
| x -> x)
|
||||
| x -> x
|
||||
in
|
||||
match (c1, c2) with
|
||||
| Str s1, Str s2 -> cmp_aux (String.compare s1 s2)
|
||||
| Chr c1, Chr c2 -> cmp_aux (Char.compare c1 c2)
|
||||
| Empty, Empty -> cmp_aux 0
|
||||
| (Str _ | Chr _), Empty -> -1
|
||||
| Empty, (Str _ | Chr _) -> 1
|
||||
| Str s1, Chr c2 -> (
|
||||
match Char.compare (String.unsafe_get s1 0) c2 with
|
||||
| 0 -> 1
|
||||
| x -> x)
|
||||
| Chr c1, Str s2 -> (
|
||||
match Char.compare c1 (String.unsafe_get s2 0) with
|
||||
| 0 -> -1
|
||||
| x -> x)
|
||||
in
|
||||
if n1 = n2 then 0 else loop 0 0
|
||||
end
|
||||
|
||||
let compare = C.compare
|
||||
68
lib/util/utf8.mli
Normal file
68
lib/util/utf8.mli
Normal file
@@ -0,0 +1,68 @@
|
||||
val nbc : char -> int
|
||||
(** Return the number of bytes composing the UTF8 character starting with [c] *)
|
||||
|
||||
val next : string -> int -> int
|
||||
(** [Utf8.next s i] returns the index of the character comming after
|
||||
the one which starts at [i]. *)
|
||||
|
||||
val get : string -> int -> int
|
||||
(** [Utf8.get s n] returns the index where the [n]-th character
|
||||
starts in string [s]. *)
|
||||
|
||||
val length : string -> int
|
||||
(** Return the length (number of characters, not bytes)
|
||||
of the given string. *)
|
||||
|
||||
val sub : ?pad:char -> string -> int -> int -> string
|
||||
(** [sub ?pad s start len]
|
||||
Return a fresh UTF8-friendly substring of [len] characters, padded if needed.
|
||||
Be careful [start] is the index of the byte where to start in [s],
|
||||
not the [start-th] UTF8-character. *)
|
||||
|
||||
val cmap_utf_8 :
|
||||
(Uchar.t -> [< `Self | `Uchars of Uchar.t list ]) -> string -> string
|
||||
(** [cmap_utf_8 cmap s] returns the UTF-8 encoded string
|
||||
resulting from applying the character map [cmap] to every character
|
||||
of the UTF-8 encoded string [s]. *)
|
||||
|
||||
val lowercase : string -> string
|
||||
(** Returns UTF-8 encoded string with all uppercase letters translated to lowercase *)
|
||||
|
||||
val uppercase : string -> string
|
||||
(** Returns UTF-8 encoded string with all lowercase letters translated to uppercase *)
|
||||
|
||||
val capitalize_fst : string -> string
|
||||
(** Returns UTF-8 encoded string where the first letter is capitalised *)
|
||||
|
||||
val capitalize : string -> string
|
||||
(** Returns UTF-8 encoded string where the first letter is capitalised and others minimalised *)
|
||||
|
||||
module C : sig
|
||||
(** Utf8 char type. *)
|
||||
type t = Str of string | Chr of char | Empty
|
||||
|
||||
val unaccent : bool -> string -> int -> int -> t * int * int
|
||||
(** [unaccent trimmed s i0 len]
|
||||
Returns [(t, start, next)]: next UTF-8 character in string [s] starting at position [i0].
|
||||
The diacritic marks are removed, character is also case lowered, and any character
|
||||
returning [Empty] (unsupported or reported as empty) is ignored: the next character in [s]
|
||||
will be picked except if you reach [len]. In that case, [Empty] is returned.
|
||||
|
||||
[start] is the byte offset in [s] where the resulting character [t] starts.
|
||||
[next] is the offset of the byte after [t].
|
||||
*)
|
||||
|
||||
val cp : string -> int -> Uchar.t
|
||||
(** [cp s i] returns the Unicode code point of the character starting
|
||||
at [i]-th byte. *)
|
||||
end
|
||||
|
||||
val compare : string -> string -> int
|
||||
(** [compare a b] compare normalized version of [a] and [b]
|
||||
It is case insensitive.
|
||||
It starts with unaccented comparison of [a] and [b],
|
||||
and refine the result with accents comparison.
|
||||
|
||||
Here is an exemple of how letters would be sorted:
|
||||
[A À Á  B C Ç Č D E É L Ł Ô Ö Ø Œ P Q R * . ?]
|
||||
*)
|
||||
Reference in New Issue
Block a user