Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/index-format/granular_map.ml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ module type S = sig
val find_opt : key -> 'a t -> 'a option
val choose_opt : 'a t -> (key * 'a) option
val iter : (key -> 'a -> unit) -> 'a t -> unit
val iter_in_memory : (key -> 'a -> unit) -> 'a t -> unit
val fold : (key -> 'a -> 'acc -> 'acc) -> 'a t -> 'acc -> 'acc
val map : ('a -> 'b) -> 'a t -> 'b t
val is_empty : 'a t -> bool
Expand Down Expand Up @@ -191,6 +192,15 @@ module Make (Ord : Map.OrderedType) = struct
f v d;
iter f r

let rec iter_in_memory f s =
if not (Granular_marshal.is_on_disk s) then
match fetch s with
| Empty -> ()
| Node { l; v; d; r; _ } ->
iter_in_memory f l;
f v d;
iter_in_memory f r

let rec map f s =
match fetch s with
| Empty -> empty ()
Expand Down
1 change: 1 addition & 0 deletions src/index-format/granular_map.mli
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ module type S = sig
val find_opt : key -> 'a t -> 'a option
val choose_opt : 'a t -> (key * 'a) option
val iter : (key -> 'a -> unit) -> 'a t -> unit
val iter_in_memory : (key -> 'a -> unit) -> 'a t -> unit
val fold : (key -> 'a -> 'acc -> 'acc) -> 'a t -> 'acc -> 'acc
val map : ('a -> 'b) -> 'a t -> 'b t
val is_empty : 'a t -> bool
Expand Down
72 changes: 54 additions & 18 deletions src/index-format/granular_marshal.ml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
module Cache = Hashtbl.Make (Int)

type store = { filename : string; cache : any_link Cache.t }
type store = { filename : string; cache : cache }

and cache = any_link Cache.t

and any_link = Link : 'a link * 'a link Type.Id.t -> any_link

Expand All @@ -11,6 +13,7 @@ and 'a repr =
| Serialized of { loc : int }
| Serialized_reused of { loc : int }
| On_disk of { store : store; loc : int; schema : 'a schema }
| On_disk_ptr of { filename : string; loc : int; id : int }
| In_memory of 'a
| In_memory_reused of 'a
| Duplicate of 'a link
Expand All @@ -24,11 +27,39 @@ let schema_no_sublinks : _ schema = fun _ _ -> ()

let link v = ref (In_memory v)

let is_on_disk lnk =
match !lnk with
| On_disk _ | On_disk_ptr _ -> true
| _ -> false

let rec normalize lnk =
match !lnk with
| Duplicate lnk -> normalize lnk
| _ -> lnk

module Cache_cache = File_cache.Make (struct
type t = cache
let read _filename = Cache.create 0

let cache_name = "Cache_cache"
end)

let ptr_size = 8

let binstring_of_int v =
String.init ptr_size (fun i -> Char.chr ((v lsr i lsl 3) land 255))

let int_of_binstring s =
Array.fold_right
(fun v acc -> (acc lsl 8) + v)
(Array.init ptr_size (fun i -> Char.code s.[i]))
0

let fetch_id filename =
In_channel.with_open_bin filename (fun fd ->
seek_in fd (String.length Config.index_magic_number);
int_of_binstring (really_input_string fd ptr_size))
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks wrong to use with_open here, instead of checking an already open file descriptor: we have no guarantee that the file won't change in between the next time we open it to actually read from it. We can assume that the file contents will only be updated by creating a fresh new file (which guarantees a previously opened fd still see the old contents), so if we have checked a file descriptor there's no risk of a race :)


let read_loc store fd loc schema =
seek_in fd loc;
let v = Marshal.from_channel fd in
Expand All @@ -53,6 +84,14 @@ let read_loc store fd loc schema =
lnk := On_disk { store; loc; schema };
Cache.add store.cache loc (Link (lnk, type_id)))
| In_memory _ | In_memory_reused _ | On_disk _ | Duplicate _ -> ()
| On_disk_ptr { filename; loc; id } ->
let pointed_index_id = fetch_id filename in
if pointed_index_id = id then
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is too eager: We would want to delay this check in fetch when we have no choice but to read from the file (otherwise we are checking the id too early and the file may not have the same id when we re-open it to read it during fetch)

let store = { filename; cache = Cache_cache.read filename } in
lnk := On_disk { store; loc; schema }
else
failwith
"Granular_marshal.read_loc: pointing to an outdated index file"
| Placeholder -> invalid_arg "Granular_marshal.read_loc: Placeholder")
}
in
Expand Down Expand Up @@ -88,7 +127,7 @@ let fetch_loc store loc schema =
let rec fetch lnk =
match !lnk with
| In_memory v | In_memory_reused v -> v
| Serialized _ | Serialized_reused _ | Small _ ->
| Serialized _ | Serialized_reused _ | Small _ | On_disk_ptr _ ->
invalid_arg "Granular_marshal.fetch: serialized"
| Placeholder -> invalid_arg "Granular_marshal.fetch: during a write"
| Duplicate original_lnk ->
Expand Down Expand Up @@ -118,25 +157,20 @@ let cache (type a) (module Key : Hashtbl.HashedType with type t = a) =
lnk := Duplicate original_lnk
| exception Not_found -> H.add cache key lnk

let ptr_size = 8

let binstring_of_int v =
String.init ptr_size (fun i -> Char.chr ((v lsr i lsl 3) land 255))

let int_of_binstring s =
Array.fold_right
(fun v acc -> (acc lsl 8) + v)
(Array.init ptr_size (fun i -> Char.code s.[i]))
0

let write ?(flags = []) fd root_schema root_value =
let write ?(flags = []) fd root_schema root_value rand_state =
let id =
let buf = Bytes.create 8 in
Bytes.set_int64_be buf 0 (Random.State.int64 rand_state Int64.max_int);
Bytes.to_string buf
in
output_string fd id;
let pt_root = pos_out fd in
output_string fd (String.make ptr_size '\000');
let rec iter size ~placeholders ~restore =
{ yield =
(fun (type a) (lnk : a link) _type_id (schema : a schema) : unit ->
match !lnk with
| Serialized _ | Serialized_reused _ | Small _ -> ()
| Serialized _ | Serialized_reused _ | Small _ | On_disk_ptr _ -> ()
| Placeholder -> failwith "big nono"
| In_memory_reused v -> write_child_reused lnk schema v
| Duplicate original_lnk ->
Expand All @@ -146,8 +180,9 @@ let write ?(flags = []) fd root_schema root_value =
| _ -> failwith "Granular_marshal.write: duplicate not reused");
lnk := !original_lnk
| In_memory v -> write_child lnk schema v size ~placeholders ~restore
| On_disk _ ->
write_child lnk schema (fetch lnk) size ~placeholders ~restore)
| On_disk { store; loc; _ } ->
let id = fetch_id store.filename in
lnk := On_disk_ptr { filename = store.filename; loc; id })
}
and write_child : type a. a link -> a schema -> a -> _ =
fun lnk schema v size ~placeholders ~restore ->
Expand Down Expand Up @@ -185,7 +220,8 @@ let write ?(flags = []) fd root_schema root_value =
output_string fd (binstring_of_int root_loc)

let read filename fd root_schema =
let store = { filename; cache = Cache.create 0 } in
let store = { filename; cache = Cache_cache.read filename } in
let _id = really_input_string fd 8 in
let root_loc = int_of_binstring (really_input_string fd 8) in
let root_value = read_loc store fd root_loc root_schema in
root_value
9 changes: 5 additions & 4 deletions src/index-format/granular_marshal.mli
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ val reuse : 'a link -> unit
the same value, resulting in a compressed file. *)
val cache : 'a. (module Hashtbl.HashedType with type t = 'a) -> 'a link -> unit

(** [is_on_disk link] tests if [link] is stored in another index file. *)
val is_on_disk : 'a link -> bool

(** [fetch lnk] returns the value pointed by the link [lnk].

We of course have [fetch (link v) = v] and [link (fetch lnk) = lnk]. *)
Expand Down Expand Up @@ -53,11 +56,9 @@ and iter = { yield : 'a. 'a link -> 'a link Type.Id.t -> 'a schema -> unit }
(** A schema usable when the ['a] value does not contain any links. *)
val schema_no_sublinks : 'a schema

(** [write oc schema value] writes the [value] in the output channel [oc],
creating unmarshalling boundaries on every link in [value] specified
by the [schema]. *)
(** [write oc schema value rand_state] writes the [value] in the output channel [oc], creating unmarshalling boundaries on every link in [value] specified by the [schema]. [rand_state] is used to generate random index ID. *)
val write :
?flags:Marshal.extern_flags list -> out_channel -> 'a schema -> 'a -> unit
?flags:Marshal.extern_flags list -> out_channel -> 'a schema -> 'a -> Random.State.t -> unit

(** [read ic schema] reads the value marshalled in the input channel [ic],
stopping the unmarshalling on every link boundary indicated by the [schema].
Expand Down
10 changes: 10 additions & 0 deletions src/index-format/granular_set.ml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ module type S = sig
val union : t -> t -> t
val map : (elt -> elt) -> t -> t
val iter : (elt -> unit) -> t -> unit
val iter_in_memory : (elt -> unit) -> t -> unit
val cardinal : t -> int
val elements : t -> elt list
val fold : ('acc -> elt -> 'acc) -> 'acc -> t -> 'acc
Expand Down Expand Up @@ -266,6 +267,15 @@ module Make (Ord : Set.OrderedType) = struct
f v;
iter f r

let rec iter_in_memory f t =
if not (Granular_marshal.is_on_disk t) then
match fetch t with
| Empty -> ()
| Node { l; v; r; _ } ->
iter_in_memory f l;
f v;
iter_in_memory f r

let type_id = Type.Id.make ()

let rec schema iter f m =
Expand Down
1 change: 1 addition & 0 deletions src/index-format/granular_set.mli
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ module type S = sig
val union : t -> t -> t
val map : (elt -> elt) -> t -> t
val iter : (elt -> unit) -> t -> unit
val iter_in_memory : (elt -> unit) -> t -> unit
val cardinal : t -> int
val elements : t -> elt list
val fold : ('acc -> elt -> 'acc) -> 'acc -> t -> 'acc
Expand Down
6 changes: 4 additions & 2 deletions src/index-format/index_format.ml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ let index_schema (iter : Granular_marshal.iter) index =
let compress index =
let cache = Lid.cache () in
let compress_map_set =
Uid_map.iter (fun _ -> Lid_set.iter (Lid.deduplicate cache))
Uid_map.iter_in_memory (fun _ ->
Lid_set.iter_in_memory (Lid.deduplicate cache))
in
compress_map_set index.defs;
compress_map_set index.approximated;
Expand Down Expand Up @@ -130,10 +131,11 @@ let magic_number = Config.index_magic_number

let write ~file index =
let index = compress index in
let rand_state = Random.State.make_self_init () in
Misc.output_to_file_via_temporary ~mode:[ Open_binary ] file
(fun _temp_file_name oc ->
output_string oc magic_number;
Granular_marshal.write oc index_schema (index : index))
Granular_marshal.write oc index_schema (index : index) rand_state)

type file_content = Cmt of Cmt_format.cmt_infos | Index of index | Unknown

Expand Down
Loading