diff --git a/src/dune_project.ml b/src/dune_project.ml index 52b149a0..31878120 100644 --- a/src/dune_project.ml +++ b/src/dune_project.ml @@ -149,8 +149,7 @@ module Lang = struct in let ver = Sexp.Of_sexp.parse Syntax.Version.t Univ_map.empty - (Atom (ver_loc, Sexp.Atom.of_string ver)) - in + (Atom (ver_loc, Sexp.Atom.of_string_exn Sexp.Atom.Dune ver)) in match Hashtbl.find langs name with | None -> Loc.fail name_loc "Unknown language %S.%s" name diff --git a/src/loc.ml b/src/loc.ml index 28d7a1dc..f1b78fba 100644 --- a/src/loc.ml +++ b/src/loc.ml @@ -11,7 +11,8 @@ let int n = Usexp.Atom (Usexp.Atom.of_int n) let string = Usexp.atom_or_quoted_string let record l = let open Usexp in - List (List.map l ~f:(fun (n, v) -> List [Atom(Atom.of_string n); v])) + List (List.map l ~f:(fun (n, v) -> + List [Atom(Atom.of_string_exn Atom.Dune n); v])) let sexp_of_position_no_file (p : Lexing.position) = record diff --git a/src/stdune/sexp.ml b/src/stdune/sexp.ml index 69dad943..8e8e96a6 100644 --- a/src/stdune/sexp.ml +++ b/src/stdune/sexp.ml @@ -34,7 +34,8 @@ module To_sexp = struct let string_set set = list atom (String.Set.to_list set) let string_map f map = list (pair atom f) (String.Map.to_list map) let record l = - List (List.map l ~f:(fun (n, v) -> List [Atom(Atom.of_string n); v])) + List (List.map l ~f:(fun (n, v) -> + List [Atom(Atom.of_string_exn Usexp.Atom.Dune n); v])) let string_hashtbl f h = string_map f (Hashtbl.foldi h ~init:String.Map.empty ~f:(fun key data acc -> @@ -54,7 +55,7 @@ module To_sexp = struct let record_fields (l : field list) = List (List.filter_map l ~f:(fun (k, v) -> - Option.map v ~f:(fun v -> List[Atom (Atom.of_string k); v]))) + Option.map v ~f:(fun v -> List[Atom (Atom.of_string_exn Atom.Dune k); v]))) let unknown _ = unsafe_atom_of_string "" end @@ -296,13 +297,13 @@ module Of_sexp = struct let string = plain_string (fun ~loc:_ x -> x) let int = basic "Integer" (fun s -> - match int_of_string s with + match int_of_string (s Atom.Dune) with | x -> Ok x | exception _ -> Error ()) let float = basic "Float" (fun s -> - match float_of_string s with + match float_of_string (s Atom.Dune) with | x -> Ok x | exception _ -> Error ()) diff --git a/src/usexp/atom.ml b/src/usexp/atom.ml new file mode 100644 index 00000000..471301e1 --- /dev/null +++ b/src/usexp/atom.ml @@ -0,0 +1,55 @@ +type t = A of string [@@unboxed] + +let invalid_argf fmt = Printf.ksprintf invalid_arg fmt + +type syntax = Jbuild | Dune + +let string_of_syntax = function + | Jbuild -> "jbuild" + | Dune -> "dune" + +let (is_valid_jbuild, is_valid_dune) = + let rec jbuild s i len = + i = len || + match String.unsafe_get s i with + | '"' | '(' | ')' | ';' | '\000'..'\032' | '\127'..'\255' -> false + | _ -> jbuild s (i + 1) len + in + let rec dune s i len = + i = len || + match String.unsafe_get s i with + | '%' | '"' | '(' | ')' | ';' | '\000'..'\032' | '\127'..'\255' -> false + | _ -> dune s (i + 1) len + in + let make looper s = + let len = String.length s in + len > 0 && looper s 0 len + in + (make jbuild, make dune) + +let of_string syn s = + match syn with + | Jbuild when is_valid_jbuild s -> Some (A s) + | Dune when is_valid_dune s -> Some (A s) + | _ -> None + +let of_string_exn syn s = + match of_string syn s with + | Some s -> s + | None -> + invalid_argf "'%s' is not a valid %s atom" s (string_of_syntax syn) + +let to_string (A t) syntax = + match syntax with + | Jbuild -> t + | Dune -> + if is_valid_dune t then + t + else + invalid_argf "Jbuild atom '%s' is not a valid dune atom" t + +let of_int i = of_string_exn Dune (string_of_int i) +let of_float x = of_string_exn Dune (string_of_float x) +let of_bool x = of_string_exn Dune (string_of_bool x) +let of_int64 i = of_string_exn Dune (Int64.to_string i) +let of_digest d = of_string_exn Dune (Digest.to_hex d) diff --git a/src/usexp/atom.mli b/src/usexp/atom.mli new file mode 100644 index 00000000..40bd6658 --- /dev/null +++ b/src/usexp/atom.mli @@ -0,0 +1,15 @@ +type t = private A of string [@@unboxed] + +type syntax = Jbuild | Dune + +val of_string : syntax -> string -> t option + +val of_string_exn : syntax -> string -> t + +val to_string : t -> syntax -> string + +val of_int : int -> t +val of_float : float -> t +val of_bool : bool -> t +val of_int64 : Int64.t -> t +val of_digest : Digest.t -> t diff --git a/src/usexp/lexer.mli b/src/usexp/lexer.mli index 5015e1ad..22407e45 100644 --- a/src/usexp/lexer.mli +++ b/src/usexp/lexer.mli @@ -1,7 +1,3 @@ -module Atom : sig - type t = A of string [@@unboxed] -end - module Token : sig type t = | Atom of Atom.t diff --git a/src/usexp/lexer.mll b/src/usexp/lexer.mll index 69912d31..ef072f06 100644 --- a/src/usexp/lexer.mll +++ b/src/usexp/lexer.mll @@ -1,8 +1,4 @@ { -module Atom = struct - type t = A of string [@@unboxed] -end - module Token = struct type t = | Atom of Atom.t @@ -128,7 +124,7 @@ and jbuild_atom acc start = parse error lexbuf "Internal error in the S-expression parser, \ please report upstream."; lexbuf.lex_start_p <- start; - Token.Atom (A acc) + Token.Atom (Atom.of_string_exn Jbuild acc) } and quoted_string mode = parse @@ -248,7 +244,7 @@ and token = parse Quoted_string s } | atom_char_dune+ as s - { Token.Atom (A s) } + { Token.Atom (Atom.of_string_exn Dune s) } | eof { Eof } diff --git a/src/usexp/usexp.ml b/src/usexp/usexp.ml index 48a3dd32..0252bc66 100644 --- a/src/usexp/usexp.ml +++ b/src/usexp/usexp.ml @@ -10,34 +10,7 @@ module Bytes = struct UnlabeledBytes.blit_string src src_pos dst dst_pos len end -module Atom = struct - type t = Lexer.Atom.t = A of string [@@unboxed] - - let is_valid = - let rec loop s i len = - i = len || - match String.unsafe_get s i with - | '"' | '(' | ')' | ';' | '\000'..'\032' | '\127'..'\255' -> false - | _ -> loop s (i + 1) len - in - fun s -> - let len = String.length s in - len > 0 && loop s 0 len - - (* XXX eventually we want to report a nice error message to the user - at the point the conversion is made. *) - let of_string s = - if is_valid s then A s - else invalid_arg(Printf.sprintf "Usexp.Atom.of_string: %S" s) - - let of_int i = A (string_of_int i) - let of_float x = A (string_of_float x) - let of_bool x = A (string_of_bool x) - let of_int64 i = A (Int64.to_string i) - let of_digest d = A (Digest.to_hex d) - - let to_string (A s) = s -end +module Atom = Atom type t = | Atom of Atom.t @@ -46,15 +19,14 @@ type t = type sexp = t -let atom s = - if Atom.is_valid s then Atom (A s) - else invalid_arg "Usexp.atom" +let atom s = Atom (Atom.of_string_exn Dune s) -let unsafe_atom_of_string s = Atom(A s) +let unsafe_atom_of_string s = atom s let atom_or_quoted_string s = - if Atom.is_valid s then Atom (A s) - else Quoted_string s + match Atom.of_string Atom.Dune s with + | None -> Quoted_string s + | Some a -> Atom a let quote_length s = let n = ref 0 in @@ -117,13 +89,13 @@ let quoted s = Bytes.unsafe_to_string s' let rec to_string = function - | Atom (A s) -> s + | Atom a -> Atom.to_string a Atom.Dune | Quoted_string s -> quoted s | List l -> Printf.sprintf "(%s)" (List.map l ~f:to_string |> String.concat ~sep:" ") let rec pp ppf = function - | Atom (A s) -> - Format.pp_print_string ppf s + | Atom s -> + Format.pp_print_string ppf (Atom.to_string s Atom.Dune) | Quoted_string s -> Format.pp_print_string ppf (quoted s) | List [] -> @@ -164,7 +136,7 @@ let pp_print_quoted_string ppf s = Format.pp_print_string ppf (quoted s) let rec pp_split_strings ppf = function - | Atom (A s) -> Format.pp_print_string ppf s + | Atom s -> Format.pp_print_string ppf (Atom.to_string s Atom.Dune) | Quoted_string s -> pp_print_quoted_string ppf s | List [] -> Format.pp_print_string ppf "()" @@ -249,8 +221,9 @@ module Ast = struct | List of Loc.t * t list let atom_or_quoted_string loc s = - if Atom.is_valid s then Atom (loc, A s) - else Quoted_string (loc, s) + match Atom.of_string Atom.Dune s with + | None -> Quoted_string (loc, s) + | Some a -> Atom (loc, a) let loc (Atom (loc, _) | Quoted_string (loc, _) | List (loc, _)) = loc diff --git a/src/usexp/usexp.mli b/src/usexp/usexp.mli index 6ade200b..781625e0 100644 --- a/src/usexp/usexp.mli +++ b/src/usexp/usexp.mli @@ -7,20 +7,19 @@ module Atom : sig (** Acceptable atoms are composed of chars in the range ['!' .. '~'] excluding [' ' '"' '(' ')' ';' '\\'], and must be nonempty. *) - val is_valid : string -> bool - (** [is_valid s] checks that [s] respects the constraints to be an atom. *) + type syntax = Jbuild | Dune - val of_string : string -> t - (** Convert a string to an atom. If the string contains invalid - characters, raise [Invalid_argument]. *) + val of_string : syntax -> string -> t option + + val of_string_exn : syntax -> string -> t + + val to_string : t -> syntax -> string val of_int : int -> t val of_float : float -> t val of_bool : bool -> t val of_int64 : Int64.t -> t val of_digest : Digest.t -> t - - val to_string : t -> string end module Loc : sig diff --git a/test/unit-tests/sexp.mlt b/test/unit-tests/sexp.mlt index 67e516e1..821f4488 100644 --- a/test/unit-tests/sexp.mlt +++ b/test/unit-tests/sexp.mlt @@ -140,7 +140,7 @@ parse {|"$bar%foo%"|} parse {|\%{foo}|} [%%expect{| -- : parse_result = Same (Ok [\%{foo}]) +Exception: Invalid_argument "'\\%{foo}' is not a valid dune atom". |}] parse {|\${foo}|} @@ -150,15 +150,15 @@ parse {|\${foo}|} parse {|\$bar%foo%|} [%%expect{| -- : parse_result = Same (Ok [\$bar%foo%]) +Exception: Invalid_argument "'\\$bar%foo%' is not a valid dune atom". |}] parse {|\$bar\%foo%|} [%%expect{| -- : parse_result = Same (Ok [\$bar\%foo%]) +Exception: Invalid_argument "'\\$bar\\%foo%' is not a valid dune atom". |}] parse {|\$bar\%foo%{bar}|} [%%expect{| -- : parse_result = Same (Ok [\$bar\%foo%{bar}]) +Exception: Invalid_argument "'\\$bar\\%foo%{bar}' is not a valid dune atom". |}] diff --git a/test/unit-tests/sexp_tests.ml b/test/unit-tests/sexp_tests.ml index 4fd2bc3a..2d4e3fbf 100644 --- a/test/unit-tests/sexp_tests.ml +++ b/test/unit-tests/sexp_tests.ml @@ -23,13 +23,15 @@ let () = | Atom _ -> true | _ -> false in - if Usexp.Atom.is_valid s <> parser_recognizes_as_atom then begin + let valid_dune_atom = + Option.is_some (Usexp.Atom.of_string Dune s) in + if valid_dune_atom <> parser_recognizes_as_atom then begin Printf.eprintf "Usexp.Atom.is_valid error:\n\ - s = %S\n\ - Usexp.Atom.is_valid s = %B\n\ - parser_recognizes_as_atom = %B\n" - s (Usexp.Atom.is_valid s) parser_recognizes_as_atom; + s valid_dune_atom parser_recognizes_as_atom; exit 1 end; if printed_as_atom && not parser_recognizes_as_atom then begin