Files
Geneweb/lib/gwdb-legacy/outbase.ml
2024-03-05 22:01:20 +01:00

391 lines
14 KiB
OCaml

(* Copyright (c) 2006-2007 INRIA *)
open Dbdisk
let load_ascends_array base = base.data.ascends.load_array ()
let load_unions_array base = base.data.unions.load_array ()
let load_couples_array base = base.data.couples.load_array ()
let load_descends_array base = base.data.descends.load_array ()
let load_strings_array base = base.data.strings.load_array ()
let close_base base = base.func.cleanup ()
let save_mem = ref false
let verbose = Mutil.verbose
let trace s =
if !verbose then (
Printf.eprintf "*** %s\n" s;
flush stderr)
let count_error computed found =
Printf.eprintf "Count error. Computed %d. Found %d.\n" computed found;
flush stderr;
exit 2
let output_index_aux oc_inx oc_inx_acc ni =
let bpos = pos_out oc_inx in
(* output name index (circular hash table) in the "names.inx" and position for hashed value in the "names.acc" *)
Dutil.output_value_no_sharing oc_inx ni;
let epos =
Iovalue.output_array_access oc_inx_acc (Array.get ni) (Array.length ni) bpos
in
if epos <> pos_out oc_inx then count_error epos (pos_out oc_inx)
let make_name_index base =
let t = Array.make Dutil.table_size [] in
for i = 0 to base.data.persons.len - 1 do
let p = base.data.persons.get i in
(* not ? ? *)
if p.first_name <> 1 && p.first_name <> 1 then
List.iter (fun i -> Array.set t i @@ (p.key_index :: Array.get t i))
@@ Mutil.list_map_sort_uniq Dutil.name_index
@@ Dutil.dsk_person_misc_names base p (fun p -> p.titles)
done;
Array.map Array.of_list t
let create_name_index oc_inx oc_inx_acc base =
output_index_aux oc_inx oc_inx_acc (make_name_index base)
module IntSet = Set.Make (struct
type t = int
let compare = compare
end)
let make_strings_of_fsname_aux split get base =
let t = Array.make Dutil.table_size IntSet.empty in
let add_name (key : string) (value : int) =
let key = Dutil.name_index key in
let set = Array.get t key in
let set' = IntSet.add value set in
if set == set' then () else Array.set t key set'
in
for i = 0 to base.data.persons.len - 1 do
let p = Dutil.poi base i in
let aux istr =
if istr <> 1 then (
let s = base.data.strings.get istr in
add_name s istr;
split (fun i j -> add_name (String.sub s i j) istr) s)
in
aux (get p)
done;
Array.map
(fun set ->
let a = Array.make (IntSet.cardinal set) 0 in
let i = ref 0 in
IntSet.iter
(fun e ->
Array.set a !i e;
incr i)
set;
a)
t
let make_strings_of_fname =
make_strings_of_fsname_aux Name.split_fname_callback (fun p -> p.first_name)
let make_strings_of_sname =
make_strings_of_fsname_aux Name.split_sname_callback (fun p -> p.surname)
let create_strings_of_sname oc_inx oc_inx_acc base =
output_index_aux oc_inx oc_inx_acc (make_strings_of_sname base)
let create_strings_of_fname oc_inx oc_inx_acc base =
output_index_aux oc_inx oc_inx_acc (make_strings_of_fname base)
let is_prime a =
let rec loop b =
if a / b < b then true else if a mod b = 0 then false else loop (b + 1)
in
loop 2
let rec prime_after n = if is_prime n then n else prime_after (n + 1)
let output_strings_hash tmp_strings_inx base =
let oc = Secure.open_out_bin tmp_strings_inx in
let () = base.data.strings.load_array () in
let strings_array = base.data.strings in
let taba =
Array.make
(min Sys.max_array_length (prime_after (max 2 (10 * strings_array.len))))
(-1)
in
let tabl = Array.make strings_array.len (-1) in
for i = 0 to strings_array.len - 1 do
let ia = Hashtbl.hash (base.data.strings.get i) mod Array.length taba in
(* store last associated value associated to the same hash *)
tabl.(i) <- taba.(ia);
taba.(ia) <- i
done;
output_binary_int oc (Array.length taba);
for i = 0 to Array.length taba - 1 do
output_binary_int oc taba.(i)
done;
for i = 0 to Array.length tabl - 1 do
output_binary_int oc tabl.(i)
done;
close_out oc
(* Associate istr to persons.
A person is associated with its first name/surname and aliases
*)
let output_name_index_aux cmp get base names_inx names_dat =
let ht = Dutil.IntHT.create 0 in
for i = 0 to base.data.persons.len - 1 do
let p = base.data.persons.get i in
let k = get p in
match Dutil.IntHT.find_opt ht k with
| Some list -> Dutil.IntHT.replace ht k (p.key_index :: list)
| None -> Dutil.IntHT.add ht k [ p.key_index ]
done;
let a = Array.make (Dutil.IntHT.length ht) (0, []) in
ignore
@@ Dutil.IntHT.fold
(fun k v i ->
Array.set a i (k, v);
succ i)
ht 0;
(* sort by name behind the int order *)
Array.sort (fun (k, _) (k', _) -> cmp k k') a;
let oc_n_dat = Secure.open_out_bin names_dat in
let bt2 =
Array.map
(fun (k, ipl) ->
let off = pos_out oc_n_dat in
output_binary_int oc_n_dat (List.length ipl);
List.iter (output_binary_int oc_n_dat) ipl;
(k, off))
a
in
close_out oc_n_dat;
let oc_n_inx = Secure.open_out_bin names_inx in
Dutil.output_value_no_sharing oc_n_inx (bt2 : (int * int) array);
close_out oc_n_inx
let output_surname_index base tmp_snames_inx tmp_snames_dat =
output_name_index_aux
(Dutil.compare_snames_i base.data)
(fun p -> p.surname)
base tmp_snames_inx tmp_snames_dat
let output_first_name_index base tmp_fnames_inx tmp_fnames_dat =
output_name_index_aux
(Dutil.compare_fnames_i base.data)
(fun p -> p.first_name)
base tmp_fnames_inx tmp_fnames_dat
let output_particles_file particles fname =
let oc = open_out fname in
List.iter (fun s -> Printf.fprintf oc "%s\n" (Mutil.tr ' ' '_' s)) particles;
close_out oc
let output base =
(* create database directory *)
let bname = base.data.bdir in
if not (Sys.file_exists bname) then Unix.mkdir bname 0o755;
(* temporary files *)
let tmp_particles = Filename.concat bname "1particles.txt" in
let tmp_base = Filename.concat bname "1base" in
let tmp_base_acc = Filename.concat bname "1base.acc" in
let tmp_names_inx = Filename.concat bname "1names.inx" in
let tmp_names_acc = Filename.concat bname "1names.acc" in
let tmp_snames_inx = Filename.concat bname "1snames.inx" in
let tmp_snames_dat = Filename.concat bname "1snames.dat" in
let tmp_fnames_inx = Filename.concat bname "1fnames.inx" in
let tmp_fnames_dat = Filename.concat bname "1fnames.dat" in
let tmp_strings_inx = Filename.concat bname "1strings.inx" in
let tmp_notes = Filename.concat bname "1notes" in
let tmp_notes_d = Filename.concat bname "1notes_d" in
load_ascends_array base;
load_unions_array base;
load_couples_array base;
load_descends_array base;
load_strings_array base;
let oc = Secure.open_out_bin tmp_base in
let oc_acc = Secure.open_out_bin tmp_base_acc in
let output_array arrname arr =
let bpos = pos_out oc in
if !verbose then Printf.eprintf "*** saving %s array\n" arrname;
flush stderr;
arr.output_array oc;
let epos = Iovalue.output_array_access oc_acc arr.get arr.len bpos in
if epos <> pos_out oc then count_error epos (pos_out oc)
in
(try
(* output header of "base" *)
output_string oc Dutil.magic_GnWb0024;
output_binary_int oc base.data.persons.len;
output_binary_int oc base.data.families.len;
output_binary_int oc base.data.strings.len;
let array_start_indexes = pos_out oc in
output_binary_int oc 0;
output_binary_int oc 0;
output_binary_int oc 0;
output_binary_int oc 0;
output_binary_int oc 0;
output_binary_int oc 0;
output_binary_int oc 0;
Dutil.output_value_no_sharing oc
(base.data.bnotes.Def.norigin_file : string);
(* output arrays in the "base" and position for each element in the "base.acc" *)
let persons_array_pos = pos_out oc in
output_array "persons" base.data.persons;
let ascends_array_pos = pos_out oc in
output_array "ascends" base.data.ascends;
let unions_array_pos = pos_out oc in
output_array "unions" base.data.unions;
let families_array_pos = pos_out oc in
output_array "families" base.data.families;
let couples_array_pos = pos_out oc in
output_array "couples" base.data.couples;
let descends_array_pos = pos_out oc in
output_array "descends" base.data.descends;
let strings_array_pos = pos_out oc in
output_array "strings" base.data.strings;
(* output arrays position in the header *)
seek_out oc array_start_indexes;
output_binary_int oc persons_array_pos;
output_binary_int oc ascends_array_pos;
output_binary_int oc unions_array_pos;
output_binary_int oc families_array_pos;
output_binary_int oc couples_array_pos;
output_binary_int oc descends_array_pos;
output_binary_int oc strings_array_pos;
base.data.families.clear_array ();
base.data.descends.clear_array ();
close_out oc;
close_out oc_acc;
(let oc_inx = Secure.open_out_bin tmp_names_inx in
let oc_inx_acc = Secure.open_out_bin tmp_names_acc in
try
trace "create name index";
output_binary_int oc_inx 0;
(* room for sname index *)
output_binary_int oc_inx 0;
(* room for fname index *)
create_name_index oc_inx oc_inx_acc base;
base.data.ascends.clear_array ();
base.data.unions.clear_array ();
base.data.couples.clear_array ();
if !save_mem then (
trace "compacting";
Gc.compact ());
let surname_pos = pos_out oc_inx in
trace "create strings of sname";
create_strings_of_sname oc_inx oc_inx_acc base;
let first_name_pos = pos_out oc_inx in
trace "create strings of fname";
create_strings_of_fname oc_inx oc_inx_acc base;
seek_out oc_inx 0;
(* sname index *)
output_binary_int oc_inx surname_pos;
seek_out oc_inx 1;
(* fname index *)
output_binary_int oc_inx first_name_pos;
close_out oc_inx;
close_out oc_inx_acc;
if !save_mem then (
trace "compacting";
Gc.compact ());
Gc.compact ();
trace "create string index";
output_strings_hash tmp_strings_inx base;
if !save_mem then (
trace "compacting";
Gc.compact ());
trace "create surname index";
output_surname_index base tmp_snames_inx tmp_snames_dat;
if !save_mem then (
trace "compacting";
Gc.compact ());
trace "create first name index";
output_first_name_index base tmp_fnames_inx tmp_fnames_dat;
let s = base.data.bnotes.Def.nread "" Def.RnAll in
(if s = "" then ()
else
let oc_not = Secure.open_out tmp_notes in
output_string oc_not s;
close_out oc_not);
List.iter
(fun f ->
let s = base.data.bnotes.Def.nread f Def.RnAll in
let fname = Filename.concat tmp_notes_d (f ^ ".txt") in
Mutil.mkdir_p (Filename.dirname fname);
let oc = open_out fname in
output_string oc s;
close_out oc)
(List.rev (base.data.bnotes.Def.efiles ()));
output_particles_file base.data.particles_txt tmp_particles
with e ->
(try close_out oc_inx with _ -> ());
(try close_out oc_inx_acc with _ -> ());
raise e);
trace "ok";
let nbp =
let rec loop i acc =
if i = base.data.persons.len then acc
else
let p = base.data.persons.get i in
let acc =
if
p.key_index = -1
|| (0 = p.surname || 1 = p.surname)
&& (0 = p.first_name || 1 = p.first_name)
then acc
else acc + 1
in
loop (i + 1) acc
in
loop 0 0
in
let oc = Secure.open_out_bin @@ Filename.concat bname "nb_persons" in
output_value oc nbp;
close_out oc
with e ->
(try close_out oc with _ -> ());
(try close_out oc_acc with _ -> ());
Mutil.rm tmp_base;
Mutil.rm tmp_base_acc;
Mutil.rm tmp_names_inx;
Mutil.rm tmp_names_acc;
Mutil.rm tmp_strings_inx;
Mutil.remove_dir tmp_notes_d;
raise e);
close_base base;
Mutil.rm (Filename.concat bname "base");
Sys.rename tmp_base (Filename.concat bname "base");
Mutil.rm (Filename.concat bname "base.acc");
Sys.rename tmp_base_acc (Filename.concat bname "base.acc");
Mutil.rm (Filename.concat bname "names.inx");
Sys.rename tmp_names_inx (Filename.concat bname "names.inx");
Mutil.rm (Filename.concat bname "names.acc");
Sys.rename tmp_names_acc (Filename.concat bname "names.acc");
Mutil.rm (Filename.concat bname "snames.dat");
Sys.rename tmp_snames_dat (Filename.concat bname "snames.dat");
Mutil.rm (Filename.concat bname "snames.inx");
Sys.rename tmp_snames_inx (Filename.concat bname "snames.inx");
Mutil.rm (Filename.concat bname "fnames.dat");
Sys.rename tmp_fnames_dat (Filename.concat bname "fnames.dat");
Mutil.rm (Filename.concat bname "fnames.inx");
Sys.rename tmp_fnames_inx (Filename.concat bname "fnames.inx");
Mutil.rm (Filename.concat bname "strings.inx");
Sys.rename tmp_strings_inx (Filename.concat bname "strings.inx");
Sys.rename tmp_particles (Filename.concat bname "particles.txt");
Mutil.rm (Filename.concat bname "notes");
if Sys.file_exists tmp_notes then
Sys.rename tmp_notes (Filename.concat bname "notes");
if Sys.file_exists tmp_notes_d then (
let notes_d = Filename.concat bname "notes_d" in
Mutil.remove_dir notes_d;
Sys.rename tmp_notes_d notes_d);
Mutil.rm (Filename.concat bname "patches");
Mutil.rm (Filename.concat bname "patches~");
Mutil.rm (Filename.concat bname "synchro_patches");
Mutil.rm (Filename.concat bname "notes_link");
Mutil.rm (Filename.concat bname "restrict");
Mutil.rm (Filename.concat bname "tstab_visitor");
Mutil.rm (Filename.concat bname "nb_persons");
(* FIXME: should not be present in this part of the code? *)
Mutil.rm (Filename.concat bname "tstab");
Mutil.rm (Filename.concat bname "tstab_visitor")