From 508c90201f31c8347ddc7083bae846057cab31e7 Mon Sep 17 00:00:00 2001 From: Jeremie Dimino Date: Fri, 4 Aug 2017 11:41:58 +0100 Subject: [PATCH] Replace timestamp checks by file contents checks --- src/action.ml | 37 ++++++++++++++---- src/action_intf.ml | 1 + src/alias.ml | 6 ++- src/build_system.ml | 92 ++++++++++----------------------------------- src/sexp.ml | 6 +++ src/sexp.mli | 1 + src/utils.ml | 77 +++++++++++++++++++++++++++++++++++++ src/utils.mli | 13 +++++++ 8 files changed, 153 insertions(+), 80 deletions(-) diff --git a/src/action.ml b/src/action.ml index af4a4dd1..40d2829e 100644 --- a/src/action.ml +++ b/src/action.ml @@ -91,6 +91,7 @@ struct | Rename (x, y) -> List [Atom "rename"; path x; path y] | Remove_tree x -> List [Atom "remove-tree"; path x] | Mkdir x -> List [Atom "mkdir"; path x] + | Digest_files paths -> List [Atom "digest-files"; List (List.map paths ~f:path)] end module Make_mapper @@ -124,6 +125,7 @@ module Make_mapper | Rename (x, y) -> Rename (f_path x, f_path y) | Remove_tree x -> Remove_tree (f_path x) | Mkdir x -> Mkdir (f_path x) + | Digest_files x -> Digest_files (List.map x ~f:f_path) end module type Ast = Action_intf.Ast @@ -337,13 +339,16 @@ module Unexpanded = struct Rename (E.path ~dir ~f x, E.path ~dir ~f y) | Remove_tree x -> Remove_tree (E.path ~dir ~f x) - | Mkdir x -> + | Mkdir x -> begin match x with | Inl path -> Mkdir path | Inr tmpl -> let path = E.path ~dir ~f x in check_mkdir (SW.loc tmpl) path; Mkdir path + end + | Digest_files x -> + Digest_files (List.map x ~f:(E.path ~dir ~f)) end module E = struct @@ -439,6 +444,8 @@ module Unexpanded = struct | Inl path -> check_mkdir (SW.loc x) path | Inr _ -> ()); Mkdir res + | Digest_files x -> + Digest_files (List.map x ~f:(E.path ~dir ~f)) end let fold_one_step t ~init:acc ~f = @@ -460,7 +467,8 @@ let fold_one_step t ~init:acc ~f = | Update_file _ | Rename _ | Remove_tree _ - | Mkdir _ -> acc + | Mkdir _ + | Digest_files _ -> acc include Make_mapper(Ast)(Ast) @@ -506,6 +514,12 @@ let run ~ectx ~dir ~env_extra ~stdout_to ~stderr_to prog args = ~purpose:ectx.purpose (Path.reach_for_running ~from:dir prog) args +let exec_echo stdout_to str = + return + (match stdout_to with + | None -> print_string str; flush stdout + | Some (_, oc) -> output_string oc str) + let rec exec t ~ectx ~dir ~env_extra ~stdout_to ~stderr_to = match t with | Run (prog, args) -> @@ -521,11 +535,7 @@ let rec exec t ~ectx ~dir ~env_extra ~stdout_to ~stderr_to = redirect ~ectx ~dir outputs Config.dev_null t ~env_extra ~stdout_to ~stderr_to | Progn l -> exec_list l ~ectx ~dir ~env_extra ~stdout_to ~stderr_to - | Echo str -> - return - (match stdout_to with - | None -> print_string str; flush stdout - | Some (_, oc) -> output_string oc str) + | Echo str -> exec_echo stdout_to str | Cat fn -> Io.with_file_in (Path.to_string fn) ~f:(fun ic -> let oc = @@ -603,6 +613,16 @@ let rec exec t ~ectx ~dir ~env_extra ~stdout_to ~stderr_to = | Local path -> Path.Local.mkdir_p path); return () + | Digest_files paths -> + let s = + let data = + List.map paths ~f:(fun fn -> + (fn, Utils.Cached_digest.file fn)) + in + Digest.string + (Marshal.to_string data []) + in + exec_echo stdout_to s and redirect outputs fn t ~ectx ~dir ~env_extra ~stdout_to ~stderr_to = let fn = Path.to_string fn in @@ -682,6 +702,7 @@ module Infer = struct | Setenv (_, _, t) | Ignore (_, t) -> infer acc t | Progn l -> List.fold_left l ~init:acc ~f:infer + | Digest_files l -> List.fold_left l ~init:acc ~f:(+<) | Echo _ | System _ | Bash _ @@ -723,6 +744,7 @@ module Infer = struct | Setenv (_, _, t) | Ignore (_, t) -> partial acc t | Progn l -> List.fold_left l ~init:acc ~f:partial + | Digest_files l -> List.fold_left l ~init:acc ~f:(+ partial_with_all_targets acc t | Progn l -> List.fold_left l ~init:acc ~f:partial_with_all_targets + | Digest_files l -> List.fold_left l ~init:acc ~f:(+>> - Build.create_file alias.file) + Build.action ~targets:[alias.file] + (Redirect (Stdout, + alias.file, + Digest_files + (Path.Set.elements deps)))) in rule :: acc) diff --git a/src/build_system.ml b/src/build_system.ml index 8135371e..e9bbd10b 100644 --- a/src/build_system.ml +++ b/src/build_system.ml @@ -121,54 +121,17 @@ type t = { (* File specification by targets *) files : (Path.t, File_spec.packed) Hashtbl.t ; contexts : Context.t list - ; (* Table from target to digest of [(deps, targets, action)] *) + ; (* Table from target to digest of + [(deps (filename + contents), targets (filename only), action)] *) trace : (Path.t, Digest.t) Hashtbl.t - ; timestamps : (Path.t, float) Hashtbl.t ; mutable local_mkdirs : Path.Local.Set.t } - let all_targets t = Hashtbl.fold t.files ~init:[] ~f:(fun ~key ~data:_ acc -> key :: acc) -let timestamp t fn = - match Hashtbl.find t.timestamps fn with - | Some _ as x -> x - | None -> - match Unix.lstat (Path.to_string fn) with - | exception _ -> None - | stat -> - let ts = stat.st_mtime in - Hashtbl.add t.timestamps ~key:fn ~data:ts; - Some ts - -type limit_timestamp = - { missing_files : bool - ; limit : float option - } - -let merge_timestamp t fns ~merge = - let init = - { missing_files = false - ; limit = None - } - in - List.fold_left fns ~init - ~f:(fun acc fn -> - match timestamp t fn with - | None -> { acc with missing_files = true } - | Some ts -> - { acc with - limit = - match acc.limit with - | None -> Some ts - | Some ts' -> Some (merge ts ts') - }) - -let min_timestamp t fns = merge_timestamp t fns ~merge:min -let max_timestamp t fns = merge_timestamp t fns ~merge:max - let find_file_exn t file = - Hashtbl.find_exn t.files file ~string_of_key:(fun fn -> sprintf "%S" (Path.to_string fn)) + Hashtbl.find_exn t.files file + ~string_of_key:(fun fn -> sprintf "%S" (Path.to_string fn)) ~table_desc:(fun _ -> "") let is_target t file = Hashtbl.mem t.files file @@ -390,14 +353,13 @@ let create_file_specs t targets rule ~copy_source = module Pre_rule = Build_interpret.Rule -let refresh_targets_timestamps_after_rule_execution t targets = +let clear_targets_digests_after_rule_execution targets = let missing = List.fold_left targets ~init:Pset.empty ~f:(fun acc fn -> match Unix.lstat (Path.to_string fn) with | exception _ -> Pset.add fn acc - | stat -> - let ts = stat.st_mtime in - Hashtbl.replace t.timestamps ~key:fn ~data:ts; + | (_ : Unix.stats) -> + Utils.Cached_digest.remove fn; acc) in if not (Pset.is_empty missing) then @@ -480,7 +442,8 @@ let compile_rule t ~all_targets_by_dir ?(copy_source=false) pre_rule = let targets_as_list = Pset.elements targets in let hash = let trace = - (all_deps_as_list, + (List.map all_deps_as_list ~f:(fun fn -> + (fn, Utils.Cached_digest.file fn)), targets_as_list, Option.map context ~f:(fun c -> c.name), action) @@ -493,7 +456,7 @@ let compile_rule t ~all_targets_by_dir ?(copy_source=false) pre_rule = else None in - let rule_changed = + let deps_or_rule_changed = List.fold_left targets_as_list ~init:false ~f:(fun acc fn -> match Hashtbl.find t.trace fn with | None -> @@ -503,29 +466,13 @@ let compile_rule t ~all_targets_by_dir ?(copy_source=false) pre_rule = Hashtbl.replace t.trace ~key:fn ~data:hash; acc || prev_hash <> hash) in - let targets_min_ts = min_timestamp t targets_as_list in - let deps_max_ts = max_timestamp t all_deps_as_list in - if rule_changed || - match deps_max_ts, targets_min_ts with - | _, { missing_files = true; _ } -> - (* Missing targets -> rebuild *) - true - | _, { missing_files = false; limit = None } -> - (* CR-someday jdimino: no target, this should be a user error *) - true - | { missing_files = true; _ }, _ -> - Sexp.code_error - "Dependencies missing after waiting for them" - [ "all_deps", Sexp.To_sexp.list Path.sexp_of_t all_deps_as_list ] - | { limit = None; missing_files = false }, - { missing_files = false; _ } -> - (* No dependencies, no need to do anything if the rule hasn't changed and targets - are here. *) - false - | { limit = Some deps_max; missing_files = false }, - { limit = Some targets_min; missing_files = false } -> - targets_min < deps_max - then ( + let targets_missing = + List.exists targets_as_list ~f:(fun fn -> + match Unix.lstat (Path.to_string fn) with + | exception _ -> true + | (_ : Unix.stats) -> false) + in + if deps_or_rule_changed || targets_missing then ( (* Do not remove files that are just updated, otherwise this would break incremental compilation *) let targets_to_remove = @@ -557,7 +504,7 @@ let compile_rule t ~all_targets_by_dir ?(copy_source=false) pre_rule = Option.iter sandbox_dir ~f:Path.rm_rf; (* All went well, these targets are no longer pending *) pending_targets := Pset.diff !pending_targets targets_to_remove; - refresh_targets_timestamps_after_rule_execution t targets_as_list + clear_targets_digests_after_rule_execution targets_as_list ) else return () in @@ -606,6 +553,7 @@ module Trace = struct let file = "_build/.db" let dump (trace : t) = + Utils.Cached_digest.dump (); let sexp = Sexp.List ( Hashtbl.fold trace ~init:Pmap.empty ~f:(fun ~key ~data acc -> @@ -618,6 +566,7 @@ module Trace = struct Io.write_file file (Sexp.to_string sexp) let load () = + Utils.Cached_digest.load (); let trace = Hashtbl.create 1024 in if Sys.file_exists file then begin let sexp = Sexp_lexer.Load.single file in @@ -676,7 +625,6 @@ let create ~contexts ~file_tree ~rules = { contexts ; files = Hashtbl.create 1024 ; trace = Trace.load () - ; timestamps = Hashtbl.create 1024 ; local_mkdirs = Path.Local.Set.empty } in List.iter rules ~f:(compile_rule t ~all_targets_by_dir ~copy_source:false); diff --git a/src/sexp.ml b/src/sexp.ml index b3bc019c..674d49a2 100644 --- a/src/sexp.ml +++ b/src/sexp.ml @@ -152,6 +152,7 @@ module type Combinators = sig val float : float t val bool : bool t val pair : 'a t -> 'b t -> ('a * 'b) t + val triple : 'a t -> 'b t -> 'c t -> ('a * 'b * 'c) t val list : 'a t -> 'a list t val array : 'a t -> 'a array t val option : 'a t -> 'a option t @@ -168,6 +169,7 @@ module To_sexp = struct let float f = Atom (string_of_float f) let bool b = Atom (string_of_bool b) let pair fa fb (a, b) = List [fa a; fb b] + let triple fa fb fc (a, b, c) = List [fa a; fb b; fc c] let list f l = List (List.map l ~f) let array f a = list f (Array.to_list a) let option f = function @@ -228,6 +230,10 @@ module Of_sexp = struct | List (_, [a; b]) -> (fa a, fb b) | sexp -> of_sexp_error sexp "S-expression of the form (_ _) expected" + let triple fa fb fc = function + | List (_, [a; b; c]) -> (fa a, fb b, fc c) + | sexp -> of_sexp_error sexp "S-expression of the form (_ _ _) expected" + let list f = function | Atom _ as sexp -> of_sexp_error sexp "List expected" | List (_, l) -> List.map l ~f diff --git a/src/sexp.mli b/src/sexp.mli index 31bd4520..244ed08a 100644 --- a/src/sexp.mli +++ b/src/sexp.mli @@ -41,6 +41,7 @@ module type Combinators = sig val float : float t val bool : bool t val pair : 'a t -> 'b t -> ('a * 'b) t + val triple : 'a t -> 'b t -> 'c t -> ('a * 'b * 'c) t val list : 'a t -> 'a list t val array : 'a t -> 'a array t val option : 'a t -> 'a option t diff --git a/src/utils.ml b/src/utils.ml index 0223395e..d0c1ea00 100644 --- a/src/utils.ml +++ b/src/utils.ml @@ -135,3 +135,80 @@ let obj_name_of_basename fn = match String.index fn '.' with | None -> fn | Some i -> String.sub fn ~pos:0 ~len:i + +module Cached_digest = struct + type file = + { mutable digest : Digest.t + ; mutable timestamp : float + ; mutable timestamp_checked : bool + } + + let cache = Hashtbl.create 1024 + + let file fn = + match Hashtbl.find cache fn with + | Some x -> + if x.timestamp_checked then + x.digest + else begin + let mtime = (Unix.stat (Path.to_string fn)).st_mtime in + if mtime <> x.timestamp then begin + let digest = Digest.file (Path.to_string fn) in + x.digest <- digest; + x.timestamp <- mtime; + end; + x.timestamp_checked <- true; + x.digest + end + | None -> + let digest = Digest.file (Path.to_string fn) in + Hashtbl.add cache ~key:fn + ~data:{ digest + ; timestamp = (Unix.stat (Path.to_string fn)).st_mtime + ; timestamp_checked = true + }; + digest + + let remove fn = + match Hashtbl.find cache fn with + | None -> () + | Some file -> file.timestamp_checked <- false + + let db_file = "_build/.digest-db" + + let dump () = + let module Pmap = Path.Map in + let sexp = + Sexp.List ( + Hashtbl.fold cache ~init:Pmap.empty ~f:(fun ~key ~data acc -> + Pmap.add acc ~key ~data) + |> Path.Map.bindings + |> List.map ~f:(fun (path, file) -> + Sexp.List [ Atom (Path.to_string path) + ; Atom (Digest.to_hex file.digest) + ; Atom (Int64.to_string (Int64.bits_of_float file.timestamp)) + ])) + in + if Sys.file_exists "_build" then + Io.write_file db_file (Sexp.to_string sexp) + + let load () = + if Sys.file_exists db_file then begin + let sexp = Sexp_lexer.Load.single db_file in + let bindings = + let open Sexp.Of_sexp in + list + (triple + Path.t + (fun s -> Digest.from_hex (string s)) + (fun s -> Int64.float_of_bits (Int64.of_string (string s))) + ) sexp + in + List.iter bindings ~f:(fun (path, digest, timestamp) -> + Hashtbl.add cache ~key:path + ~data:{ digest + ; timestamp + ; timestamp_checked = false + }); + end +end diff --git a/src/utils.mli b/src/utils.mli index 540b800f..e889026b 100644 --- a/src/utils.mli +++ b/src/utils.mli @@ -43,3 +43,16 @@ val find_deps : dir:Path.t -> 'a String_map.t -> string -> 'a - [obj_name_of_basename "toto.pp.ml" = "toto"] *) val obj_name_of_basename : string -> string + +(** Digest files with caching *) +module Cached_digest : sig + (** Digest the contents of the following file *) + val file : Path.t -> Digest.t + + (** Clear the following digest from the cache *) + val remove : Path.t -> unit + + (** Dump/load the cache to/from the disk *) + val dump : unit -> unit + val load : unit -> unit +end