From ad3a95655d315ae6df19c2e624226d2010becff2 Mon Sep 17 00:00:00 2001 From: Rudi Grinberg Date: Tue, 19 Jun 2018 15:36:17 +0700 Subject: [PATCH 1/9] Dune & Jbuild validation for atoms Atoms can now be constructed and pretty printed with a syntax = Jbuild | Dune. The syntax controls validation that will be used to make sure we are printing something/reading valid Signed-off-by: Rudi Grinberg --- src/dune_project.ml | 3 +- src/loc.ml | 3 +- src/stdune/sexp.ml | 9 +++--- src/usexp/atom.ml | 55 +++++++++++++++++++++++++++++++++++ src/usexp/atom.mli | 15 ++++++++++ src/usexp/lexer.mli | 4 --- src/usexp/lexer.mll | 8 ++--- src/usexp/usexp.ml | 53 +++++++++------------------------ src/usexp/usexp.mli | 13 ++++----- test/unit-tests/sexp.mlt | 8 ++--- test/unit-tests/sexp_tests.ml | 6 ++-- 11 files changed, 107 insertions(+), 70 deletions(-) create mode 100644 src/usexp/atom.ml create mode 100644 src/usexp/atom.mli diff --git a/src/dune_project.ml b/src/dune_project.ml index 52b149a0..31878120 100644 --- a/src/dune_project.ml +++ b/src/dune_project.ml @@ -149,8 +149,7 @@ module Lang = struct in let ver = Sexp.Of_sexp.parse Syntax.Version.t Univ_map.empty - (Atom (ver_loc, Sexp.Atom.of_string ver)) - in + (Atom (ver_loc, Sexp.Atom.of_string_exn Sexp.Atom.Dune ver)) in match Hashtbl.find langs name with | None -> Loc.fail name_loc "Unknown language %S.%s" name diff --git a/src/loc.ml b/src/loc.ml index 28d7a1dc..f1b78fba 100644 --- a/src/loc.ml +++ b/src/loc.ml @@ -11,7 +11,8 @@ let int n = Usexp.Atom (Usexp.Atom.of_int n) let string = Usexp.atom_or_quoted_string let record l = let open Usexp in - List (List.map l ~f:(fun (n, v) -> List [Atom(Atom.of_string n); v])) + List (List.map l ~f:(fun (n, v) -> + List [Atom(Atom.of_string_exn Atom.Dune n); v])) let sexp_of_position_no_file (p : Lexing.position) = record diff --git a/src/stdune/sexp.ml b/src/stdune/sexp.ml index 69dad943..8e8e96a6 100644 --- a/src/stdune/sexp.ml +++ b/src/stdune/sexp.ml @@ -34,7 +34,8 @@ module To_sexp = struct let string_set set = list atom (String.Set.to_list set) let string_map f map = list (pair atom f) (String.Map.to_list map) let record l = - List (List.map l ~f:(fun (n, v) -> List [Atom(Atom.of_string n); v])) + List (List.map l ~f:(fun (n, v) -> + List [Atom(Atom.of_string_exn Usexp.Atom.Dune n); v])) let string_hashtbl f h = string_map f (Hashtbl.foldi h ~init:String.Map.empty ~f:(fun key data acc -> @@ -54,7 +55,7 @@ module To_sexp = struct let record_fields (l : field list) = List (List.filter_map l ~f:(fun (k, v) -> - Option.map v ~f:(fun v -> List[Atom (Atom.of_string k); v]))) + Option.map v ~f:(fun v -> List[Atom (Atom.of_string_exn Atom.Dune k); v]))) let unknown _ = unsafe_atom_of_string "" end @@ -296,13 +297,13 @@ module Of_sexp = struct let string = plain_string (fun ~loc:_ x -> x) let int = basic "Integer" (fun s -> - match int_of_string s with + match int_of_string (s Atom.Dune) with | x -> Ok x | exception _ -> Error ()) let float = basic "Float" (fun s -> - match float_of_string s with + match float_of_string (s Atom.Dune) with | x -> Ok x | exception _ -> Error ()) diff --git a/src/usexp/atom.ml b/src/usexp/atom.ml new file mode 100644 index 00000000..471301e1 --- /dev/null +++ b/src/usexp/atom.ml @@ -0,0 +1,55 @@ +type t = A of string [@@unboxed] + +let invalid_argf fmt = Printf.ksprintf invalid_arg fmt + +type syntax = Jbuild | Dune + +let string_of_syntax = function + | Jbuild -> "jbuild" + | Dune -> "dune" + +let (is_valid_jbuild, is_valid_dune) = + let rec jbuild s i len = + i = len || + match String.unsafe_get s i with + | '"' | '(' | ')' | ';' | '\000'..'\032' | '\127'..'\255' -> false + | _ -> jbuild s (i + 1) len + in + let rec dune s i len = + i = len || + match String.unsafe_get s i with + | '%' | '"' | '(' | ')' | ';' | '\000'..'\032' | '\127'..'\255' -> false + | _ -> dune s (i + 1) len + in + let make looper s = + let len = String.length s in + len > 0 && looper s 0 len + in + (make jbuild, make dune) + +let of_string syn s = + match syn with + | Jbuild when is_valid_jbuild s -> Some (A s) + | Dune when is_valid_dune s -> Some (A s) + | _ -> None + +let of_string_exn syn s = + match of_string syn s with + | Some s -> s + | None -> + invalid_argf "'%s' is not a valid %s atom" s (string_of_syntax syn) + +let to_string (A t) syntax = + match syntax with + | Jbuild -> t + | Dune -> + if is_valid_dune t then + t + else + invalid_argf "Jbuild atom '%s' is not a valid dune atom" t + +let of_int i = of_string_exn Dune (string_of_int i) +let of_float x = of_string_exn Dune (string_of_float x) +let of_bool x = of_string_exn Dune (string_of_bool x) +let of_int64 i = of_string_exn Dune (Int64.to_string i) +let of_digest d = of_string_exn Dune (Digest.to_hex d) diff --git a/src/usexp/atom.mli b/src/usexp/atom.mli new file mode 100644 index 00000000..40bd6658 --- /dev/null +++ b/src/usexp/atom.mli @@ -0,0 +1,15 @@ +type t = private A of string [@@unboxed] + +type syntax = Jbuild | Dune + +val of_string : syntax -> string -> t option + +val of_string_exn : syntax -> string -> t + +val to_string : t -> syntax -> string + +val of_int : int -> t +val of_float : float -> t +val of_bool : bool -> t +val of_int64 : Int64.t -> t +val of_digest : Digest.t -> t diff --git a/src/usexp/lexer.mli b/src/usexp/lexer.mli index 5015e1ad..22407e45 100644 --- a/src/usexp/lexer.mli +++ b/src/usexp/lexer.mli @@ -1,7 +1,3 @@ -module Atom : sig - type t = A of string [@@unboxed] -end - module Token : sig type t = | Atom of Atom.t diff --git a/src/usexp/lexer.mll b/src/usexp/lexer.mll index 69912d31..ef072f06 100644 --- a/src/usexp/lexer.mll +++ b/src/usexp/lexer.mll @@ -1,8 +1,4 @@ { -module Atom = struct - type t = A of string [@@unboxed] -end - module Token = struct type t = | Atom of Atom.t @@ -128,7 +124,7 @@ and jbuild_atom acc start = parse error lexbuf "Internal error in the S-expression parser, \ please report upstream."; lexbuf.lex_start_p <- start; - Token.Atom (A acc) + Token.Atom (Atom.of_string_exn Jbuild acc) } and quoted_string mode = parse @@ -248,7 +244,7 @@ and token = parse Quoted_string s } | atom_char_dune+ as s - { Token.Atom (A s) } + { Token.Atom (Atom.of_string_exn Dune s) } | eof { Eof } diff --git a/src/usexp/usexp.ml b/src/usexp/usexp.ml index 48a3dd32..0252bc66 100644 --- a/src/usexp/usexp.ml +++ b/src/usexp/usexp.ml @@ -10,34 +10,7 @@ module Bytes = struct UnlabeledBytes.blit_string src src_pos dst dst_pos len end -module Atom = struct - type t = Lexer.Atom.t = A of string [@@unboxed] - - let is_valid = - let rec loop s i len = - i = len || - match String.unsafe_get s i with - | '"' | '(' | ')' | ';' | '\000'..'\032' | '\127'..'\255' -> false - | _ -> loop s (i + 1) len - in - fun s -> - let len = String.length s in - len > 0 && loop s 0 len - - (* XXX eventually we want to report a nice error message to the user - at the point the conversion is made. *) - let of_string s = - if is_valid s then A s - else invalid_arg(Printf.sprintf "Usexp.Atom.of_string: %S" s) - - let of_int i = A (string_of_int i) - let of_float x = A (string_of_float x) - let of_bool x = A (string_of_bool x) - let of_int64 i = A (Int64.to_string i) - let of_digest d = A (Digest.to_hex d) - - let to_string (A s) = s -end +module Atom = Atom type t = | Atom of Atom.t @@ -46,15 +19,14 @@ type t = type sexp = t -let atom s = - if Atom.is_valid s then Atom (A s) - else invalid_arg "Usexp.atom" +let atom s = Atom (Atom.of_string_exn Dune s) -let unsafe_atom_of_string s = Atom(A s) +let unsafe_atom_of_string s = atom s let atom_or_quoted_string s = - if Atom.is_valid s then Atom (A s) - else Quoted_string s + match Atom.of_string Atom.Dune s with + | None -> Quoted_string s + | Some a -> Atom a let quote_length s = let n = ref 0 in @@ -117,13 +89,13 @@ let quoted s = Bytes.unsafe_to_string s' let rec to_string = function - | Atom (A s) -> s + | Atom a -> Atom.to_string a Atom.Dune | Quoted_string s -> quoted s | List l -> Printf.sprintf "(%s)" (List.map l ~f:to_string |> String.concat ~sep:" ") let rec pp ppf = function - | Atom (A s) -> - Format.pp_print_string ppf s + | Atom s -> + Format.pp_print_string ppf (Atom.to_string s Atom.Dune) | Quoted_string s -> Format.pp_print_string ppf (quoted s) | List [] -> @@ -164,7 +136,7 @@ let pp_print_quoted_string ppf s = Format.pp_print_string ppf (quoted s) let rec pp_split_strings ppf = function - | Atom (A s) -> Format.pp_print_string ppf s + | Atom s -> Format.pp_print_string ppf (Atom.to_string s Atom.Dune) | Quoted_string s -> pp_print_quoted_string ppf s | List [] -> Format.pp_print_string ppf "()" @@ -249,8 +221,9 @@ module Ast = struct | List of Loc.t * t list let atom_or_quoted_string loc s = - if Atom.is_valid s then Atom (loc, A s) - else Quoted_string (loc, s) + match Atom.of_string Atom.Dune s with + | None -> Quoted_string (loc, s) + | Some a -> Atom (loc, a) let loc (Atom (loc, _) | Quoted_string (loc, _) | List (loc, _)) = loc diff --git a/src/usexp/usexp.mli b/src/usexp/usexp.mli index 6ade200b..781625e0 100644 --- a/src/usexp/usexp.mli +++ b/src/usexp/usexp.mli @@ -7,20 +7,19 @@ module Atom : sig (** Acceptable atoms are composed of chars in the range ['!' .. '~'] excluding [' ' '"' '(' ')' ';' '\\'], and must be nonempty. *) - val is_valid : string -> bool - (** [is_valid s] checks that [s] respects the constraints to be an atom. *) + type syntax = Jbuild | Dune - val of_string : string -> t - (** Convert a string to an atom. If the string contains invalid - characters, raise [Invalid_argument]. *) + val of_string : syntax -> string -> t option + + val of_string_exn : syntax -> string -> t + + val to_string : t -> syntax -> string val of_int : int -> t val of_float : float -> t val of_bool : bool -> t val of_int64 : Int64.t -> t val of_digest : Digest.t -> t - - val to_string : t -> string end module Loc : sig diff --git a/test/unit-tests/sexp.mlt b/test/unit-tests/sexp.mlt index 67e516e1..821f4488 100644 --- a/test/unit-tests/sexp.mlt +++ b/test/unit-tests/sexp.mlt @@ -140,7 +140,7 @@ parse {|"$bar%foo%"|} parse {|\%{foo}|} [%%expect{| -- : parse_result = Same (Ok [\%{foo}]) +Exception: Invalid_argument "'\\%{foo}' is not a valid dune atom". |}] parse {|\${foo}|} @@ -150,15 +150,15 @@ parse {|\${foo}|} parse {|\$bar%foo%|} [%%expect{| -- : parse_result = Same (Ok [\$bar%foo%]) +Exception: Invalid_argument "'\\$bar%foo%' is not a valid dune atom". |}] parse {|\$bar\%foo%|} [%%expect{| -- : parse_result = Same (Ok [\$bar\%foo%]) +Exception: Invalid_argument "'\\$bar\\%foo%' is not a valid dune atom". |}] parse {|\$bar\%foo%{bar}|} [%%expect{| -- : parse_result = Same (Ok [\$bar\%foo%{bar}]) +Exception: Invalid_argument "'\\$bar\\%foo%{bar}' is not a valid dune atom". |}] diff --git a/test/unit-tests/sexp_tests.ml b/test/unit-tests/sexp_tests.ml index 4fd2bc3a..2d4e3fbf 100644 --- a/test/unit-tests/sexp_tests.ml +++ b/test/unit-tests/sexp_tests.ml @@ -23,13 +23,15 @@ let () = | Atom _ -> true | _ -> false in - if Usexp.Atom.is_valid s <> parser_recognizes_as_atom then begin + let valid_dune_atom = + Option.is_some (Usexp.Atom.of_string Dune s) in + if valid_dune_atom <> parser_recognizes_as_atom then begin Printf.eprintf "Usexp.Atom.is_valid error:\n\ - s = %S\n\ - Usexp.Atom.is_valid s = %B\n\ - parser_recognizes_as_atom = %B\n" - s (Usexp.Atom.is_valid s) parser_recognizes_as_atom; + s valid_dune_atom parser_recognizes_as_atom; exit 1 end; if printed_as_atom && not parser_recognizes_as_atom then begin From 46eba2ef988cb9db83ec1c4529dfde9ef3377feb Mon Sep 17 00:00:00 2001 From: Rudi Grinberg Date: Tue, 19 Jun 2018 21:49:49 +0630 Subject: [PATCH 2/9] Improve validation of jbuild atoms Signed-off-by: Rudi Grinberg --- src/usexp/atom.ml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/usexp/atom.ml b/src/usexp/atom.ml index 471301e1..c53eaa97 100644 --- a/src/usexp/atom.ml +++ b/src/usexp/atom.ml @@ -12,8 +12,12 @@ let (is_valid_jbuild, is_valid_dune) = let rec jbuild s i len = i = len || match String.unsafe_get s i with + | '#' -> disallow_next '|' s (i + 1) len + | '|' -> disallow_next '#' s (i + 1) len | '"' | '(' | ')' | ';' | '\000'..'\032' | '\127'..'\255' -> false | _ -> jbuild s (i + 1) len + and disallow_next c s i len = + i = len || String.unsafe_get s i <> c && jbuild s i len in let rec dune s i len = i = len || From 5618be7ab04c5cadaa2b05cbc6630349e12a3c24 Mon Sep 17 00:00:00 2001 From: Rudi Grinberg Date: Tue, 19 Jun 2018 21:51:28 +0630 Subject: [PATCH 3/9] Fix 4.02.3 compat Signed-off-by: Rudi Grinberg --- src/stdune/sexp.ml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/stdune/sexp.ml b/src/stdune/sexp.ml index 8e8e96a6..a4636715 100644 --- a/src/stdune/sexp.ml +++ b/src/stdune/sexp.ml @@ -299,13 +299,13 @@ module Of_sexp = struct basic "Integer" (fun s -> match int_of_string (s Atom.Dune) with | x -> Ok x - | exception _ -> Error ()) + | exception _ -> Result.Error ()) let float = basic "Float" (fun s -> match float_of_string (s Atom.Dune) with | x -> Ok x - | exception _ -> Error ()) + | exception _ -> Result.Error ()) let pair a b = enter @@ -334,7 +334,7 @@ module Of_sexp = struct let string_map t = list (pair string t) >>= fun bindings -> match String.Map.of_list bindings with - | Ok x -> return x + | Result.Ok x -> return x | Error (key, _v1, _v2) -> loc >>= fun loc -> of_sexp_errorf loc "key %s present multiple times" key From 99fbac26ab33442b4f8323d2815f761b6513eb19 Mon Sep 17 00:00:00 2001 From: Rudi Grinberg Date: Wed, 20 Jun 2018 11:59:18 +0630 Subject: [PATCH 4/9] Remove constructor side validation And make the tests reflect back Invalid_argument Signed-off-by: Rudi Grinberg --- src/dune_project.ml | 2 +- src/loc.ml | 3 +-- src/stdune/sexp.ml | 5 ++--- src/usexp/atom.ml | 34 ++++++++++++---------------------- src/usexp/atom.mli | 4 ++-- src/usexp/lexer.mll | 7 ++++--- src/usexp/usexp.ml | 16 +++++++++------- src/usexp/usexp.mli | 4 ++-- test/unit-tests/sexp.mlt | 33 +++++++++++++++++++++++++++------ test/unit-tests/sexp_tests.ml | 3 +-- 10 files changed, 61 insertions(+), 50 deletions(-) diff --git a/src/dune_project.ml b/src/dune_project.ml index 31878120..a24003ab 100644 --- a/src/dune_project.ml +++ b/src/dune_project.ml @@ -149,7 +149,7 @@ module Lang = struct in let ver = Sexp.Of_sexp.parse Syntax.Version.t Univ_map.empty - (Atom (ver_loc, Sexp.Atom.of_string_exn Sexp.Atom.Dune ver)) in + (Atom (ver_loc, Sexp.Atom.of_string ver)) in match Hashtbl.find langs name with | None -> Loc.fail name_loc "Unknown language %S.%s" name diff --git a/src/loc.ml b/src/loc.ml index f1b78fba..28d7a1dc 100644 --- a/src/loc.ml +++ b/src/loc.ml @@ -11,8 +11,7 @@ let int n = Usexp.Atom (Usexp.Atom.of_int n) let string = Usexp.atom_or_quoted_string let record l = let open Usexp in - List (List.map l ~f:(fun (n, v) -> - List [Atom(Atom.of_string_exn Atom.Dune n); v])) + List (List.map l ~f:(fun (n, v) -> List [Atom(Atom.of_string n); v])) let sexp_of_position_no_file (p : Lexing.position) = record diff --git a/src/stdune/sexp.ml b/src/stdune/sexp.ml index a4636715..694452e0 100644 --- a/src/stdune/sexp.ml +++ b/src/stdune/sexp.ml @@ -34,8 +34,7 @@ module To_sexp = struct let string_set set = list atom (String.Set.to_list set) let string_map f map = list (pair atom f) (String.Map.to_list map) let record l = - List (List.map l ~f:(fun (n, v) -> - List [Atom(Atom.of_string_exn Usexp.Atom.Dune n); v])) + List (List.map l ~f:(fun (n, v) -> List [Atom(Atom.of_string n); v])) let string_hashtbl f h = string_map f (Hashtbl.foldi h ~init:String.Map.empty ~f:(fun key data acc -> @@ -55,7 +54,7 @@ module To_sexp = struct let record_fields (l : field list) = List (List.filter_map l ~f:(fun (k, v) -> - Option.map v ~f:(fun v -> List[Atom (Atom.of_string_exn Atom.Dune k); v]))) + Option.map v ~f:(fun v -> List[Atom (Atom.of_string k); v]))) let unknown _ = unsafe_atom_of_string "" end diff --git a/src/usexp/atom.ml b/src/usexp/atom.ml index c53eaa97..3034ff73 100644 --- a/src/usexp/atom.ml +++ b/src/usexp/atom.ml @@ -4,10 +4,6 @@ let invalid_argf fmt = Printf.ksprintf invalid_arg fmt type syntax = Jbuild | Dune -let string_of_syntax = function - | Jbuild -> "jbuild" - | Dune -> "dune" - let (is_valid_jbuild, is_valid_dune) = let rec jbuild s i len = i = len || @@ -31,29 +27,23 @@ let (is_valid_jbuild, is_valid_dune) = in (make jbuild, make dune) -let of_string syn s = - match syn with - | Jbuild when is_valid_jbuild s -> Some (A s) - | Dune when is_valid_dune s -> Some (A s) - | _ -> None - -let of_string_exn syn s = - match of_string syn s with - | Some s -> s - | None -> - invalid_argf "'%s' is not a valid %s atom" s (string_of_syntax syn) +let of_string s = A s let to_string (A t) syntax = match syntax with - | Jbuild -> t + | Jbuild -> + if is_valid_jbuild t then + t + else + invalid_argf "Dune atom '%s' cannot be printed" t | Dune -> if is_valid_dune t then t else - invalid_argf "Jbuild atom '%s' is not a valid dune atom" t + invalid_argf "Jbuild atom '%s' cannot be printed" t -let of_int i = of_string_exn Dune (string_of_int i) -let of_float x = of_string_exn Dune (string_of_float x) -let of_bool x = of_string_exn Dune (string_of_bool x) -let of_int64 i = of_string_exn Dune (Int64.to_string i) -let of_digest d = of_string_exn Dune (Digest.to_hex d) +let of_int i = of_string (string_of_int i) +let of_float x = of_string (string_of_float x) +let of_bool x = of_string (string_of_bool x) +let of_digest d = of_string (Digest.to_hex d) +let of_int64 i = of_string (Int64.to_string i) diff --git a/src/usexp/atom.mli b/src/usexp/atom.mli index 40bd6658..2036a159 100644 --- a/src/usexp/atom.mli +++ b/src/usexp/atom.mli @@ -2,9 +2,9 @@ type t = private A of string [@@unboxed] type syntax = Jbuild | Dune -val of_string : syntax -> string -> t option +val is_valid_dune : string -> bool -val of_string_exn : syntax -> string -> t +val of_string : string -> t val to_string : t -> syntax -> string diff --git a/src/usexp/lexer.mll b/src/usexp/lexer.mll index ef072f06..2c9098b0 100644 --- a/src/usexp/lexer.mll +++ b/src/usexp/lexer.mll @@ -77,7 +77,7 @@ let hexdigit = ['0'-'9' 'a'-'f' 'A'-'F'] let atom_char_jbuild = [^ ';' '(' ')' '"' ' ' '\t' '\r' '\n' '\012'] let atom_char_dune = - [^ ';' '(' ')' '"' '\000'-'\032' '\127'-'\255'] + [^ '%' ';' '(' ')' '"' '\000'-'\032' '\127'-'\255'] (* rule for jbuild files *) rule jbuild_token = parse @@ -124,7 +124,7 @@ and jbuild_atom acc start = parse error lexbuf "Internal error in the S-expression parser, \ please report upstream."; lexbuf.lex_start_p <- start; - Token.Atom (Atom.of_string_exn Jbuild acc) + Token.Atom (Atom.of_string acc) } and quoted_string mode = parse @@ -244,7 +244,8 @@ and token = parse Quoted_string s } | atom_char_dune+ as s - { Token.Atom (Atom.of_string_exn Dune s) } + { Token.Atom (Atom.of_string s) } + | _ as c { error lexbuf (Printf.sprintf "Invalid atom character '%c'" c) } | eof { Eof } diff --git a/src/usexp/usexp.ml b/src/usexp/usexp.ml index 0252bc66..ce9fca58 100644 --- a/src/usexp/usexp.ml +++ b/src/usexp/usexp.ml @@ -19,14 +19,15 @@ type t = type sexp = t -let atom s = Atom (Atom.of_string_exn Dune s) +let atom s = Atom (Atom.of_string s) let unsafe_atom_of_string s = atom s let atom_or_quoted_string s = - match Atom.of_string Atom.Dune s with - | None -> Quoted_string s - | Some a -> Atom a + if Atom.is_valid_dune s then + Atom (Atom.of_string s) + else + Quoted_string s let quote_length s = let n = ref 0 in @@ -221,9 +222,10 @@ module Ast = struct | List of Loc.t * t list let atom_or_quoted_string loc s = - match Atom.of_string Atom.Dune s with - | None -> Quoted_string (loc, s) - | Some a -> Atom (loc, a) + match atom_or_quoted_string s with + | Atom a -> Atom (loc, a) + | Quoted_string s -> Quoted_string (loc, s) + | List _ -> assert false let loc (Atom (loc, _) | Quoted_string (loc, _) | List (loc, _)) = loc diff --git a/src/usexp/usexp.mli b/src/usexp/usexp.mli index 781625e0..398ab39a 100644 --- a/src/usexp/usexp.mli +++ b/src/usexp/usexp.mli @@ -9,9 +9,9 @@ module Atom : sig type syntax = Jbuild | Dune - val of_string : syntax -> string -> t option + val is_valid_dune : string -> bool - val of_string_exn : syntax -> string -> t + val of_string : string -> t val to_string : t -> syntax -> string diff --git a/test/unit-tests/sexp.mlt b/test/unit-tests/sexp.mlt index 821f4488..880f372b 100644 --- a/test/unit-tests/sexp.mlt +++ b/test/unit-tests/sexp.mlt @@ -52,8 +52,9 @@ let parse s = let f ~lexer = try Ok (Sexp.parse_string ~fname:"" ~mode:Many ~lexer s) - with Sexp.Parse_error e -> - Error (Sexp.Parse_error.message e) + with + | Sexp.Parse_error e -> Error (Sexp.Parse_error.message e) + | Invalid_argument e -> Error e in let jbuild = f ~lexer:Sexp.Lexer.jbuild_token in let dune = f ~lexer:Sexp.Lexer.token in @@ -140,7 +141,12 @@ parse {|"$bar%foo%"|} parse {|\%{foo}|} [%%expect{| -Exception: Invalid_argument "'\\%{foo}' is not a valid dune atom". +- : parse_result = +Different + {jbuild = + Ok + []; + dune = Error "Invalid atom character '%'"} |}] parse {|\${foo}|} @@ -150,15 +156,30 @@ parse {|\${foo}|} parse {|\$bar%foo%|} [%%expect{| -Exception: Invalid_argument "'\\$bar%foo%' is not a valid dune atom". +- : parse_result = +Different + {jbuild = + Ok + []; + dune = Error "Invalid atom character '%'"} |}] parse {|\$bar\%foo%|} [%%expect{| -Exception: Invalid_argument "'\\$bar\\%foo%' is not a valid dune atom". +- : parse_result = +Different + {jbuild = + Ok + []; + dune = Error "Invalid atom character '%'"} |}] parse {|\$bar\%foo%{bar}|} [%%expect{| -Exception: Invalid_argument "'\\$bar\\%foo%{bar}' is not a valid dune atom". +- : parse_result = +Different + {jbuild = + Ok + []; + dune = Error "Invalid atom character '%'"} |}] diff --git a/test/unit-tests/sexp_tests.ml b/test/unit-tests/sexp_tests.ml index 2d4e3fbf..2d733007 100644 --- a/test/unit-tests/sexp_tests.ml +++ b/test/unit-tests/sexp_tests.ml @@ -23,8 +23,7 @@ let () = | Atom _ -> true | _ -> false in - let valid_dune_atom = - Option.is_some (Usexp.Atom.of_string Dune s) in + let valid_dune_atom = Usexp.Atom.is_valid_dune s in if valid_dune_atom <> parser_recognizes_as_atom then begin Printf.eprintf "Usexp.Atom.is_valid error:\n\ From a1d714f9d4ed73c1e38f3ed8ec34df438112b8b8 Mon Sep 17 00:00:00 2001 From: Rudi Grinberg Date: Wed, 20 Jun 2018 15:24:29 +0630 Subject: [PATCH 5/9] Fix definition of is_valid_jbuild Signed-off-by: Rudi Grinberg --- src/usexp/atom.ml | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/src/usexp/atom.ml b/src/usexp/atom.ml index 3034ff73..0935d0b8 100644 --- a/src/usexp/atom.ml +++ b/src/usexp/atom.ml @@ -4,28 +4,29 @@ let invalid_argf fmt = Printf.ksprintf invalid_arg fmt type syntax = Jbuild | Dune -let (is_valid_jbuild, is_valid_dune) = - let rec jbuild s i len = - i = len || - match String.unsafe_get s i with - | '#' -> disallow_next '|' s (i + 1) len - | '|' -> disallow_next '#' s (i + 1) len - | '"' | '(' | ')' | ';' | '\000'..'\032' | '\127'..'\255' -> false - | _ -> jbuild s (i + 1) len - and disallow_next c s i len = - i = len || String.unsafe_get s i <> c && jbuild s i len - in - let rec dune s i len = +let is_valid_dune = + let rec loop s i len = i = len || match String.unsafe_get s i with | '%' | '"' | '(' | ')' | ';' | '\000'..'\032' | '\127'..'\255' -> false - | _ -> dune s (i + 1) len + | _ -> loop s (i + 1) len in - let make looper s = + fun s -> let len = String.length s in - len > 0 && looper s 0 len + len > 0 && loop s 0 len + +let is_valid_jbuild str = + let len = String.length str in + len > 0 && + let rec loop ix = + match str.[ix] with + | '"' | '(' | ')' | ';' -> true + | '|' -> ix > 0 && let next = ix - 1 in str.[next] = '#' || loop next + | '#' -> ix > 0 && let next = ix - 1 in str.[next] = '|' || loop next + | ' ' | '\t' | '\n' | '\012' | '\r' -> true + | _ -> ix > 0 && loop (ix - 1) in - (make jbuild, make dune) + not (loop (len - 1)) let of_string s = A s From 8a87b5b5bfb8438c78960ce822b7a083b59120a6 Mon Sep 17 00:00:00 2001 From: Rudi Grinberg Date: Wed, 20 Jun 2018 15:31:18 +0630 Subject: [PATCH 6/9] Move validation to a `print` function Signed-off-by: Rudi Grinberg --- src/stdune/sexp.ml | 4 ++-- src/usexp/atom.ml | 3 ++- src/usexp/atom.mli | 3 ++- src/usexp/usexp.ml | 6 +++--- src/usexp/usexp.mli | 3 +-- 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/stdune/sexp.ml b/src/stdune/sexp.ml index 694452e0..e945cd85 100644 --- a/src/stdune/sexp.ml +++ b/src/stdune/sexp.ml @@ -296,13 +296,13 @@ module Of_sexp = struct let string = plain_string (fun ~loc:_ x -> x) let int = basic "Integer" (fun s -> - match int_of_string (s Atom.Dune) with + match int_of_string s with | x -> Ok x | exception _ -> Result.Error ()) let float = basic "Float" (fun s -> - match float_of_string (s Atom.Dune) with + match float_of_string s with | x -> Ok x | exception _ -> Result.Error ()) diff --git a/src/usexp/atom.ml b/src/usexp/atom.ml index 0935d0b8..80c24213 100644 --- a/src/usexp/atom.ml +++ b/src/usexp/atom.ml @@ -29,8 +29,9 @@ let is_valid_jbuild str = not (loop (len - 1)) let of_string s = A s +let to_string (A s) = s -let to_string (A t) syntax = +let print (A t) syntax = match syntax with | Jbuild -> if is_valid_jbuild t then diff --git a/src/usexp/atom.mli b/src/usexp/atom.mli index 2036a159..3922f43c 100644 --- a/src/usexp/atom.mli +++ b/src/usexp/atom.mli @@ -5,8 +5,9 @@ type syntax = Jbuild | Dune val is_valid_dune : string -> bool val of_string : string -> t +val to_string : t -> string -val to_string : t -> syntax -> string +val print : t -> syntax -> string val of_int : int -> t val of_float : float -> t diff --git a/src/usexp/usexp.ml b/src/usexp/usexp.ml index ce9fca58..e7d4a1e1 100644 --- a/src/usexp/usexp.ml +++ b/src/usexp/usexp.ml @@ -90,13 +90,13 @@ let quoted s = Bytes.unsafe_to_string s' let rec to_string = function - | Atom a -> Atom.to_string a Atom.Dune + | Atom a -> Atom.print a Atom.Dune | Quoted_string s -> quoted s | List l -> Printf.sprintf "(%s)" (List.map l ~f:to_string |> String.concat ~sep:" ") let rec pp ppf = function | Atom s -> - Format.pp_print_string ppf (Atom.to_string s Atom.Dune) + Format.pp_print_string ppf (Atom.print s Atom.Dune) | Quoted_string s -> Format.pp_print_string ppf (quoted s) | List [] -> @@ -137,7 +137,7 @@ let pp_print_quoted_string ppf s = Format.pp_print_string ppf (quoted s) let rec pp_split_strings ppf = function - | Atom s -> Format.pp_print_string ppf (Atom.to_string s Atom.Dune) + | Atom s -> Format.pp_print_string ppf (Atom.print s Atom.Dune) | Quoted_string s -> pp_print_quoted_string ppf s | List [] -> Format.pp_print_string ppf "()" diff --git a/src/usexp/usexp.mli b/src/usexp/usexp.mli index 398ab39a..12b05e76 100644 --- a/src/usexp/usexp.mli +++ b/src/usexp/usexp.mli @@ -12,8 +12,7 @@ module Atom : sig val is_valid_dune : string -> bool val of_string : string -> t - - val to_string : t -> syntax -> string + val to_string : t -> string val of_int : int -> t val of_float : float -> t From 7d2c7d95795e981403a4d5e837ce6e696e4aace2 Mon Sep 17 00:00:00 2001 From: Rudi Grinberg Date: Wed, 20 Jun 2018 15:54:52 +0630 Subject: [PATCH 7/9] Parameterize sexp_tests on dune and jbuild syntax Signed-off-by: Rudi Grinberg --- src/usexp/atom.ml | 23 +++++---- src/usexp/atom.mli | 1 + src/usexp/usexp.mli | 2 +- test/unit-tests/sexp.mlt | 8 +-- test/unit-tests/sexp_tests.ml | 91 ++++++++++++++++++++--------------- 5 files changed, 70 insertions(+), 55 deletions(-) diff --git a/src/usexp/atom.ml b/src/usexp/atom.ml index 80c24213..307857ce 100644 --- a/src/usexp/atom.ml +++ b/src/usexp/atom.ml @@ -31,18 +31,17 @@ let is_valid_jbuild str = let of_string s = A s let to_string (A s) = s -let print (A t) syntax = - match syntax with - | Jbuild -> - if is_valid_jbuild t then - t - else - invalid_argf "Dune atom '%s' cannot be printed" t - | Dune -> - if is_valid_dune t then - t - else - invalid_argf "Jbuild atom '%s' cannot be printed" t +let is_valid (A t) = function + | Jbuild -> is_valid_jbuild t + | Dune -> is_valid_dune t + +let print ((A s) as t) syntax = + if is_valid t syntax then + s + else + match syntax with + | Jbuild -> invalid_argf "atom '%s' cannot be printed in jbuild syntax" s + | Dune -> invalid_argf "atom '%s' cannot be in dune syntax" s let of_int i = of_string (string_of_int i) let of_float x = of_string (string_of_float x) diff --git a/src/usexp/atom.mli b/src/usexp/atom.mli index 3922f43c..9b63bc49 100644 --- a/src/usexp/atom.mli +++ b/src/usexp/atom.mli @@ -3,6 +3,7 @@ type t = private A of string [@@unboxed] type syntax = Jbuild | Dune val is_valid_dune : string -> bool +val is_valid : t -> syntax -> bool val of_string : string -> t val to_string : t -> string diff --git a/src/usexp/usexp.mli b/src/usexp/usexp.mli index 12b05e76..ec79fb5a 100644 --- a/src/usexp/usexp.mli +++ b/src/usexp/usexp.mli @@ -9,7 +9,7 @@ module Atom : sig type syntax = Jbuild | Dune - val is_valid_dune : string -> bool + val is_valid : t -> syntax -> bool val of_string : string -> t val to_string : t -> string diff --git a/test/unit-tests/sexp.mlt b/test/unit-tests/sexp.mlt index 880f372b..8b521688 100644 --- a/test/unit-tests/sexp.mlt +++ b/test/unit-tests/sexp.mlt @@ -145,7 +145,7 @@ parse {|\%{foo}|} Different {jbuild = Ok - []; + []; dune = Error "Invalid atom character '%'"} |}] @@ -160,7 +160,7 @@ parse {|\$bar%foo%|} Different {jbuild = Ok - []; + []; dune = Error "Invalid atom character '%'"} |}] @@ -170,7 +170,7 @@ parse {|\$bar\%foo%|} Different {jbuild = Ok - []; + []; dune = Error "Invalid atom character '%'"} |}] @@ -180,6 +180,6 @@ parse {|\$bar\%foo%{bar}|} Different {jbuild = Ok - []; + []; dune = Error "Invalid atom character '%'"} |}] diff --git a/test/unit-tests/sexp_tests.ml b/test/unit-tests/sexp_tests.ml index 2d733007..adec99fe 100644 --- a/test/unit-tests/sexp_tests.ml +++ b/test/unit-tests/sexp_tests.ml @@ -4,43 +4,58 @@ let () = Printexc.record_backtrace true (* Test that all strings of length <= 3 such that [Usexp.Atom.is_valid s] are recignized as atoms by the parser *) + +let string_of_syntax (x : Usexp.Atom.syntax) = + match x with + | Dune -> "dune" + | Jbuild -> "jbuild" + let () = - for len = 0 to 3 do - let s = Bytes.create len in - for i = 0 to 1 lsl (len * 8) - 1 do - if len > 0 then Bytes.set s 0 (Char.chr ( i land 0xff)); - if len > 1 then Bytes.set s 1 (Char.chr ((i lsr 4) land 0xff)); - if len > 2 then Bytes.set s 2 (Char.chr ((i lsr 8) land 0xff)); - let s = Bytes.unsafe_to_string s in - let parser_recognizes_as_atom = - match Usexp.parse_string ~fname:"" ~mode:Single s with - | exception _ -> false - | Atom (_, A s') -> s = s' - | _ -> false - in - let printed_as_atom = - match Usexp.atom_or_quoted_string s with - | Atom _ -> true - | _ -> false - in - let valid_dune_atom = Usexp.Atom.is_valid_dune s in - if valid_dune_atom <> parser_recognizes_as_atom then begin - Printf.eprintf - "Usexp.Atom.is_valid error:\n\ - - s = %S\n\ - - Usexp.Atom.is_valid s = %B\n\ - - parser_recognizes_as_atom = %B\n" - s valid_dune_atom parser_recognizes_as_atom; - exit 1 - end; - if printed_as_atom && not parser_recognizes_as_atom then begin - Printf.eprintf - "Usexp.Atom.atom_or_quoted_string error:\n\ - - s = %S\n\ - - printed_as_atom = %B\n\ - - parser_recognizes_as_atom = %B\n" - s printed_as_atom parser_recognizes_as_atom; - exit 1 - end + [ Usexp.Atom.Dune, Usexp.Lexer.token, (fun s -> Usexp.Atom.is_valid s Dune) + ; Jbuild, Usexp.Lexer.jbuild_token, (fun s -> Usexp.Atom.is_valid s Jbuild) + ] + |> List.iter ~f:(fun (syntax, lexer, validator) -> + for len = 0 to 3 do + let s = Bytes.create len in + for i = 0 to 1 lsl (len * 8) - 1 do + if len > 0 then Bytes.set s 0 (Char.chr ( i land 0xff)); + if len > 1 then Bytes.set s 1 (Char.chr ((i lsr 4) land 0xff)); + if len > 2 then Bytes.set s 2 (Char.chr ((i lsr 8) land 0xff)); + let s = Bytes.unsafe_to_string s in + let parser_recognizes_as_atom = + match Usexp.parse_string ~lexer ~fname:"" ~mode:Single s with + | exception _ -> false + | Atom (_, A s') -> s = s' + | _ -> false + in + let printed_as_atom = + match Usexp.atom_or_quoted_string s with + | Atom _ -> true + | _ -> false + in + let valid_dune_atom = validator (Usexp.Atom.of_string s) in + if valid_dune_atom <> parser_recognizes_as_atom then begin + Printf.eprintf + "Usexp.Atom.is_valid error:\n\ + - syntax = %s\n\ + - s = %S\n\ + - Usexp.Atom.is_valid s = %B\n\ + - parser_recognizes_as_atom = %B\n" + (string_of_syntax syntax) s valid_dune_atom + parser_recognizes_as_atom; + exit 1 + end; + if printed_as_atom && not parser_recognizes_as_atom then begin + Printf.eprintf + "Usexp.Atom.atom_or_quoted_string error:\n\ + - syntax = %s\n\ + - s = %S\n\ + - printed_as_atom = %B\n\ + - parser_recognizes_as_atom = %B\n" + (string_of_syntax syntax) s printed_as_atom + parser_recognizes_as_atom; + exit 1 + end + done done - done + ) From f1804701584e73cfc35194502cd9b28ab405f4f3 Mon Sep 17 00:00:00 2001 From: Rudi Grinberg Date: Wed, 20 Jun 2018 16:02:51 +0630 Subject: [PATCH 8/9] Remove outdated comment about atoms Signed-off-by: Rudi Grinberg --- src/usexp/usexp.mli | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/usexp/usexp.mli b/src/usexp/usexp.mli index ec79fb5a..79792ced 100644 --- a/src/usexp/usexp.mli +++ b/src/usexp/usexp.mli @@ -4,8 +4,6 @@ module Atom : sig type t = private A of string [@@unboxed] - (** Acceptable atoms are composed of chars in the range ['!' .. '~'] excluding - [' ' '"' '(' ')' ';' '\\'], and must be nonempty. *) type syntax = Jbuild | Dune From 53d9c6446890a424ea62f42780c854191c3031d6 Mon Sep 17 00:00:00 2001 From: Rudi Grinberg Date: Wed, 20 Jun 2018 16:45:55 +0630 Subject: [PATCH 9/9] Print atom using atom constructor Signed-off-by: Rudi Grinberg --- test/unit-tests/sexp.mlt | 61 ++++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 25 deletions(-) diff --git a/test/unit-tests/sexp.mlt b/test/unit-tests/sexp.mlt index 8b521688..79db5495 100644 --- a/test/unit-tests/sexp.mlt +++ b/test/unit-tests/sexp.mlt @@ -2,9 +2,21 @@ open Stdune;; open Sexp.Of_sexp;; -let pp_sexp_ast ppf sexp = - Sexp.pp ppf (Sexp.Ast.remove_locs sexp) +let pp_sexp_ast = + let rec subst_atoms ~f (s : Sexp.t) = + match s with + | Atom a -> f a + | Quoted_string _ -> s + | List xs -> List (List.map ~f:(subst_atoms ~f) xs) + in + fun ppf sexp -> + sexp + |> Sexp.Ast.remove_locs + |> subst_atoms ~f:(fun (A s) -> + List [(Sexp.atom "atom"); Sexp.atom_or_quoted_string s]) + |> Sexp.pp ppf ;; + #install_printer pp_sexp_ast;; [%%expect{| val pp_sexp_ast : Format.formatter -> Stdune.Sexp.Ast.t -> unit = @@ -20,7 +32,7 @@ let sexp = Sexp.parse_string ~fname:"" ~mode:Single {| (foo 2)) |} [%%expect{| -val sexp : Usexp.Ast.t = ((foo 1) (foo 2)) +val sexp : Usexp.Ast.t = (((atom foo) (atom 1)) ((atom foo) (atom 2))) |}] let of_sexp = record (field "foo" int) @@ -75,26 +87,34 @@ val parse : string -> parse_result = parse {| # ## x##y x||y a#b|c#d copy# |} [%%expect{| -- : parse_result = Same (Ok [#; ##; x##y; x||y; a#b|c#d; copy#]) +- : parse_result = +Same + (Ok + [(atom #); (atom ##); (atom x##y); (atom x||y); (atom a#b|c#d); + (atom copy#)]) |}] parse {|x #| comment |# y|} [%%expect{| - : parse_result = -Different {jbuild = Ok [x; y]; dune = Ok [x; #|; comment; |#; y]} +Different + {jbuild = Ok [(atom x); (atom y)]; + dune = Ok [(atom x); (atom #|); (atom comment); (atom |#); (atom y)]} |}] parse {|x#|y|} [%%expect{| - : parse_result = -Different {jbuild = Error "jbuild_atoms cannot contain #|"; dune = Ok [x#|y]} +Different + {jbuild = Error "jbuild_atoms cannot contain #|"; dune = Ok [(atom x#|y)]} |}] parse {|x|#y|} [%%expect{| - : parse_result = -Different {jbuild = Error "jbuild_atoms cannot contain |#"; dune = Ok [x|#y]} +Different + {jbuild = Error "jbuild_atoms cannot contain |#"; dune = Ok [(atom x|#y)]} |}] parse {|"\a"|} @@ -139,28 +159,23 @@ parse {|"$bar%foo%"|} - : parse_result = Same (Ok ["$bar%foo%"]) |}] +parse {|\${foo}|} +[%%expect{| +- : parse_result = Same (Ok [(atom \${foo})]) +|}] + parse {|\%{foo}|} [%%expect{| - : parse_result = Different - {jbuild = - Ok - []; - dune = Error "Invalid atom character '%'"} -|}] - -parse {|\${foo}|} -[%%expect{| -- : parse_result = Same (Ok [\${foo}]) + {jbuild = Ok [(atom "\\%{foo}")]; dune = Error "Invalid atom character '%'"} |}] parse {|\$bar%foo%|} [%%expect{| - : parse_result = Different - {jbuild = - Ok - []; + {jbuild = Ok [(atom "\\$bar%foo%")]; dune = Error "Invalid atom character '%'"} |}] @@ -168,9 +183,7 @@ parse {|\$bar\%foo%|} [%%expect{| - : parse_result = Different - {jbuild = - Ok - []; + {jbuild = Ok [(atom "\\$bar\\%foo%")]; dune = Error "Invalid atom character '%'"} |}] @@ -178,8 +191,6 @@ parse {|\$bar\%foo%{bar}|} [%%expect{| - : parse_result = Different - {jbuild = - Ok - []; + {jbuild = Ok [(atom "\\$bar\\%foo%{bar}")]; dune = Error "Invalid atom character '%'"} |}]