Scan the file-system lazily (#732)

Fix #228
Fix #718
This commit is contained in:
Jérémie Dimino 2018-05-01 16:55:31 +01:00 committed by GitHub
parent 744c182356
commit b54c438fda
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 104 additions and 64 deletions

View File

@ -23,6 +23,8 @@ next
- Fix attaching index.mld to documentation (#731, fixes #717 @rgrinberg)
- Scan the file system lazily (#732, fixes #718 and #228, @diml)
1.0+beta20 (10/04/2018)
-----------------------

View File

@ -3,25 +3,32 @@ open! Import
module Dir = struct
type t =
{ path : Path.t
; files : String.Set.t
; sub_dirs : t String.Map.t
; ignored : bool
; contents : contents Lazy.t
}
and contents =
{ files : String.Set.t
; sub_dirs : t String.Map.t
}
let contents t = Lazy.force t.contents
let path t = t.path
let files t = t.files
let sub_dirs t = t.sub_dirs
let ignored t = t.ignored
let files t = (contents t).files
let sub_dirs t = (contents t).sub_dirs
let file_paths t =
Path.Set.of_string_set t.files ~f:(Path.relative t.path)
Path.Set.of_string_set (files t) ~f:(Path.relative t.path)
let sub_dir_names t =
String.Map.foldi t.sub_dirs ~init:String.Set.empty
String.Map.foldi (sub_dirs t) ~init:String.Set.empty
~f:(fun s _ acc -> String.Set.add acc s)
let sub_dir_paths t =
String.Map.foldi t.sub_dirs ~init:Path.Set.empty
String.Map.foldi (sub_dirs t) ~init:Path.Set.empty
~f:(fun s _ acc -> Path.Set.add acc (Path.relative t.path s))
let rec fold t ~traverse_ignored_dirs ~init:acc ~f =
@ -29,13 +36,13 @@ module Dir = struct
acc
else
let acc = f t acc in
String.Map.fold t.sub_dirs ~init:acc ~f:(fun t acc ->
String.Map.fold (sub_dirs t) ~init:acc ~f:(fun t acc ->
fold t ~traverse_ignored_dirs ~init:acc ~f)
end
type t =
{ root : Dir.t
; dirs : Dir.t Path.Map.t
; dirs : (Path.t, Dir.t) Hashtbl.t
}
let root t = t.root
@ -47,69 +54,86 @@ let ignore_file fn ~is_directory =
let load ?(extra_ignored_subtrees=Path.Set.empty) path =
let rec walk path ~ignored : Dir.t =
let files, sub_dirs =
Path.readdir path
|> List.filter_partition_map ~f:(fun fn ->
let path = Path.relative path fn in
let is_directory = Path.is_directory path in
if ignore_file fn ~is_directory then
Skip
else if is_directory then
Right (fn, path)
let contents = lazy (
let files, sub_dirs =
Path.readdir path
|> List.filter_partition_map ~f:(fun fn ->
let path = Path.relative path fn in
let is_directory = Path.is_directory path in
if ignore_file fn ~is_directory then
Skip
else if is_directory then
Right (fn, path)
else
Left fn)
in
let files = String.Set.of_list files in
let ignored_sub_dirs =
if not ignored && String.Set.mem files "jbuild-ignore" then
let ignore_file = Path.relative path "jbuild-ignore" in
let files =
Io.lines_of_file ignore_file
in
let remove_subdirs index fn =
if Filename.dirname fn = Filename.current_dir_name then
true
else begin
Loc.(warn (of_pos ( Path.to_string ignore_file
, index + 1, 0, String.length fn))
"subdirectory expression %s ignored" fn);
false
end
in
String.Set.of_list (List.filteri ~f:remove_subdirs files)
else
Left fn)
in
let files = String.Set.of_list files in
let ignored_sub_dirs =
if not ignored && String.Set.mem files "jbuild-ignore" then
let ignore_file = Path.relative path "jbuild-ignore" in
let files =
Io.lines_of_file ignore_file
in
let remove_subdirs index fn =
if Filename.dirname fn = Filename.current_dir_name then
true
else begin
Loc.(warn (of_pos ( Path.to_string ignore_file
, index + 1, 0, String.length fn))
"subdirectory expression %s ignored" fn);
false
end
in
String.Set.of_list (List.filteri ~f:remove_subdirs files)
else
String.Set.empty
in
let sub_dirs =
List.map sub_dirs ~f:(fun (fn, path) ->
let ignored =
ignored
|| String.Set.mem ignored_sub_dirs fn
|| Path.Set.mem extra_ignored_subtrees path
in
(fn, walk path ~ignored))
|> String.Map.of_list_exn
String.Set.empty
in
let sub_dirs =
List.map sub_dirs ~f:(fun (fn, path) ->
let ignored =
ignored
|| String.Set.mem ignored_sub_dirs fn
|| Path.Set.mem extra_ignored_subtrees path
in
(fn, walk path ~ignored))
|> String.Map.of_list_exn
in
{ Dir. files; sub_dirs })
in
{ path
; files
; sub_dirs
; contents
; ignored
}
in
let root = walk path ~ignored:false in
let dirs =
Dir.fold root ~init:Path.Map.empty ~traverse_ignored_dirs:true
~f:(fun dir acc ->
Path.Map.add acc dir.path dir)
in
{ root
; dirs
}
let dirs = Hashtbl.create 1024 in
Hashtbl.add dirs Path.root root;
{ root; dirs }
let fold t ~traverse_ignored_dirs ~init ~f =
Dir.fold t.root ~traverse_ignored_dirs ~init ~f
let find_dir t path = Path.Map.find t.dirs path
let rec find_dir t path =
if not (Path.is_local path) then
None
else
match Hashtbl.find t.dirs path with
| Some _ as res -> res
| None ->
match
let open Option.O in
find_dir t (Path.parent path)
>>= fun parent ->
String.Map.find (Dir.sub_dirs parent) (Path.basename path)
with
| Some dir as res ->
Hashtbl.add t.dirs path dir;
res
| None ->
(* We don't cache failures in [t.dirs]. The expectation is
that these only happen when the user writes an invalid path
in a jbuild file, so there is no need to cache them. *)
None
let files_of t path =
match find_dir t path with
@ -118,12 +142,14 @@ let files_of t path =
Path.Set.of_string_set (Dir.files dir) ~f:(Path.relative path)
let file_exists t path fn =
match Path.Map.find t.dirs path with
match find_dir t path with
| None -> false
| Some { files; _ } -> String.Set.mem files fn
| Some dir -> String.Set.mem (Dir.files dir) fn
let dir_exists t path = Option.is_some (find_dir t path)
let exists t path =
Path.Map.mem t.dirs path ||
dir_exists t path ||
file_exists t (Path.parent path) (Path.basename path)
let files_recursively_in t ?(prefix_with=Path.root) path =

View File

@ -1,3 +1,5 @@
(** Dune representation of the source tree *)
open! Import
module Dir : sig
@ -22,10 +24,17 @@ module Dir : sig
-> 'a
end
(** A [t] value represent a view of the source tree. It is lazily
constructed by scanning the file system and interpreting [.dune-fs]
files, as well as [jbuild-ignore] files for backward
compatibility. *)
type t
val load : ?extra_ignored_subtrees:Path.Set.t -> Path.t -> t
(** Passing [~traverse_ignored_dirs:true] to this functions causes the
whole source tree to be deeply scanned, including ignored
directories. *)
val fold
: t
-> traverse_ignored_dirs:bool
@ -39,7 +48,10 @@ val find_dir : t -> Path.t -> Dir.t option
val files_of : t -> Path.t -> Path.Set.t
(** [true] iff the path is either a directory or a file *)
val exists : t -> Path.t -> bool
(** [true] iff the path is a file *)
val file_exists : t -> Path.t -> string -> bool
val files_recursively_in : t -> ?prefix_with:Path.t -> Path.t -> Path.Set.t