-
Notifications
You must be signed in to change notification settings - Fork 250
Add support for pointing indexes in ocaml-index #2051
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
Tim-ats-d
wants to merge
9
commits into
ocaml:main
Choose a base branch
from
Tim-ats-d:pointing-indexes
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
acc2ec6
Progress.
Tim-ats-d c4a34e4
Clean up.
Tim-ats-d 7567b8f
Use shared cache sync with filesystem for ocaml-index file reading.
Tim-ats-d 75cf152
Use shared cache in `Granular_marshal.read`.
Tim-ats-d bc62cca
Attach a random number to each index file in order to prevent reading…
Tim-ats-d a8ee6a6
More precise error message.
Tim-ats-d 8411fc7
Use 8 bytes to store index file random ID.
Tim-ats-d d8c00da
Make `is_on_disk` behavior correct.
Tim-ats-d 38b82cd
Use a random state instead of initialize a global one.
Tim-ats-d File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,8 @@ | ||
| module Cache = Hashtbl.Make (Int) | ||
|
|
||
| type store = { filename : string; cache : any_link Cache.t } | ||
| type store = { filename : string; cache : cache } | ||
|
|
||
| and cache = any_link Cache.t | ||
|
|
||
| and any_link = Link : 'a link * 'a link Type.Id.t -> any_link | ||
|
|
||
|
|
@@ -11,6 +13,7 @@ and 'a repr = | |
| | Serialized of { loc : int } | ||
| | Serialized_reused of { loc : int } | ||
| | On_disk of { store : store; loc : int; schema : 'a schema } | ||
| | On_disk_ptr of { filename : string; loc : int; id : int } | ||
| | In_memory of 'a | ||
| | In_memory_reused of 'a | ||
| | Duplicate of 'a link | ||
|
|
@@ -24,11 +27,39 @@ let schema_no_sublinks : _ schema = fun _ _ -> () | |
|
|
||
| let link v = ref (In_memory v) | ||
|
|
||
| let is_on_disk lnk = | ||
| match !lnk with | ||
| | On_disk _ | On_disk_ptr _ -> true | ||
| | _ -> false | ||
|
|
||
| let rec normalize lnk = | ||
| match !lnk with | ||
| | Duplicate lnk -> normalize lnk | ||
| | _ -> lnk | ||
|
|
||
| module Cache_cache = File_cache.Make (struct | ||
| type t = cache | ||
| let read _filename = Cache.create 0 | ||
|
|
||
| let cache_name = "Cache_cache" | ||
| end) | ||
|
|
||
| let ptr_size = 8 | ||
|
|
||
| let binstring_of_int v = | ||
| String.init ptr_size (fun i -> Char.chr ((v lsr i lsl 3) land 255)) | ||
|
|
||
| let int_of_binstring s = | ||
| Array.fold_right | ||
| (fun v acc -> (acc lsl 8) + v) | ||
| (Array.init ptr_size (fun i -> Char.code s.[i])) | ||
| 0 | ||
|
|
||
| let fetch_id filename = | ||
| In_channel.with_open_bin filename (fun fd -> | ||
| seek_in fd (String.length Config.index_magic_number); | ||
| int_of_binstring (really_input_string fd ptr_size)) | ||
|
|
||
| let read_loc store fd loc schema = | ||
| seek_in fd loc; | ||
| let v = Marshal.from_channel fd in | ||
|
|
@@ -53,6 +84,14 @@ let read_loc store fd loc schema = | |
| lnk := On_disk { store; loc; schema }; | ||
| Cache.add store.cache loc (Link (lnk, type_id))) | ||
| | In_memory _ | In_memory_reused _ | On_disk _ | Duplicate _ -> () | ||
| | On_disk_ptr { filename; loc; id } -> | ||
| let pointed_index_id = fetch_id filename in | ||
| if pointed_index_id = id then | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is too eager: We would want to delay this check in |
||
| let store = { filename; cache = Cache_cache.read filename } in | ||
| lnk := On_disk { store; loc; schema } | ||
| else | ||
| failwith | ||
| "Granular_marshal.read_loc: pointing to an outdated index file" | ||
| | Placeholder -> invalid_arg "Granular_marshal.read_loc: Placeholder") | ||
| } | ||
| in | ||
|
|
@@ -88,7 +127,7 @@ let fetch_loc store loc schema = | |
| let rec fetch lnk = | ||
| match !lnk with | ||
| | In_memory v | In_memory_reused v -> v | ||
| | Serialized _ | Serialized_reused _ | Small _ -> | ||
| | Serialized _ | Serialized_reused _ | Small _ | On_disk_ptr _ -> | ||
| invalid_arg "Granular_marshal.fetch: serialized" | ||
| | Placeholder -> invalid_arg "Granular_marshal.fetch: during a write" | ||
| | Duplicate original_lnk -> | ||
|
|
@@ -118,25 +157,20 @@ let cache (type a) (module Key : Hashtbl.HashedType with type t = a) = | |
| lnk := Duplicate original_lnk | ||
| | exception Not_found -> H.add cache key lnk | ||
|
|
||
| let ptr_size = 8 | ||
|
|
||
| let binstring_of_int v = | ||
| String.init ptr_size (fun i -> Char.chr ((v lsr i lsl 3) land 255)) | ||
|
|
||
| let int_of_binstring s = | ||
| Array.fold_right | ||
| (fun v acc -> (acc lsl 8) + v) | ||
| (Array.init ptr_size (fun i -> Char.code s.[i])) | ||
| 0 | ||
|
|
||
| let write ?(flags = []) fd root_schema root_value = | ||
| let write ?(flags = []) fd root_schema root_value rand_state = | ||
| let id = | ||
| let buf = Bytes.create 8 in | ||
| Bytes.set_int64_be buf 0 (Random.State.int64 rand_state Int64.max_int); | ||
| Bytes.to_string buf | ||
| in | ||
| output_string fd id; | ||
| let pt_root = pos_out fd in | ||
| output_string fd (String.make ptr_size '\000'); | ||
| let rec iter size ~placeholders ~restore = | ||
| { yield = | ||
| (fun (type a) (lnk : a link) _type_id (schema : a schema) : unit -> | ||
| match !lnk with | ||
| | Serialized _ | Serialized_reused _ | Small _ -> () | ||
| | Serialized _ | Serialized_reused _ | Small _ | On_disk_ptr _ -> () | ||
| | Placeholder -> failwith "big nono" | ||
| | In_memory_reused v -> write_child_reused lnk schema v | ||
| | Duplicate original_lnk -> | ||
|
|
@@ -146,8 +180,9 @@ let write ?(flags = []) fd root_schema root_value = | |
| | _ -> failwith "Granular_marshal.write: duplicate not reused"); | ||
| lnk := !original_lnk | ||
| | In_memory v -> write_child lnk schema v size ~placeholders ~restore | ||
| | On_disk _ -> | ||
| write_child lnk schema (fetch lnk) size ~placeholders ~restore) | ||
| | On_disk { store; loc; _ } -> | ||
| let id = fetch_id store.filename in | ||
| lnk := On_disk_ptr { filename = store.filename; loc; id }) | ||
| } | ||
| and write_child : type a. a link -> a schema -> a -> _ = | ||
| fun lnk schema v size ~placeholders ~restore -> | ||
|
|
@@ -185,7 +220,8 @@ let write ?(flags = []) fd root_schema root_value = | |
| output_string fd (binstring_of_int root_loc) | ||
|
|
||
| let read filename fd root_schema = | ||
| let store = { filename; cache = Cache.create 0 } in | ||
| let store = { filename; cache = Cache_cache.read filename } in | ||
| let _id = really_input_string fd 8 in | ||
| let root_loc = int_of_binstring (really_input_string fd 8) in | ||
| let root_value = read_loc store fd root_loc root_schema in | ||
| root_value | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks wrong to use
with_openhere, instead of checking an already open file descriptor: we have no guarantee that the file won't change in between the next time we open it to actually read from it. We can assume that the file contents will only be updated by creating a fresh new file (which guarantees a previously opened fd still see the old contents), so if we have checked a file descriptor there's no risk of a race :)