Dune & Jbuild validation for atoms
Atoms can now be constructed and pretty printed with a syntax = Jbuild | Dune. The syntax controls validation that will be used to make sure we are printing something/reading valid Signed-off-by: Rudi Grinberg <rudi.grinberg@gmail.com>
This commit is contained in:
parent
2ed5f3fcfd
commit
ad3a95655d
|
@ -149,8 +149,7 @@ module Lang = struct
|
||||||
in
|
in
|
||||||
let ver =
|
let ver =
|
||||||
Sexp.Of_sexp.parse Syntax.Version.t Univ_map.empty
|
Sexp.Of_sexp.parse Syntax.Version.t Univ_map.empty
|
||||||
(Atom (ver_loc, Sexp.Atom.of_string ver))
|
(Atom (ver_loc, Sexp.Atom.of_string_exn Sexp.Atom.Dune ver)) in
|
||||||
in
|
|
||||||
match Hashtbl.find langs name with
|
match Hashtbl.find langs name with
|
||||||
| None ->
|
| None ->
|
||||||
Loc.fail name_loc "Unknown language %S.%s" name
|
Loc.fail name_loc "Unknown language %S.%s" name
|
||||||
|
|
|
@ -11,7 +11,8 @@ let int n = Usexp.Atom (Usexp.Atom.of_int n)
|
||||||
let string = Usexp.atom_or_quoted_string
|
let string = Usexp.atom_or_quoted_string
|
||||||
let record l =
|
let record l =
|
||||||
let open Usexp in
|
let open Usexp in
|
||||||
List (List.map l ~f:(fun (n, v) -> List [Atom(Atom.of_string n); v]))
|
List (List.map l ~f:(fun (n, v) ->
|
||||||
|
List [Atom(Atom.of_string_exn Atom.Dune n); v]))
|
||||||
|
|
||||||
let sexp_of_position_no_file (p : Lexing.position) =
|
let sexp_of_position_no_file (p : Lexing.position) =
|
||||||
record
|
record
|
||||||
|
|
|
@ -34,7 +34,8 @@ module To_sexp = struct
|
||||||
let string_set set = list atom (String.Set.to_list set)
|
let string_set set = list atom (String.Set.to_list set)
|
||||||
let string_map f map = list (pair atom f) (String.Map.to_list map)
|
let string_map f map = list (pair atom f) (String.Map.to_list map)
|
||||||
let record l =
|
let record l =
|
||||||
List (List.map l ~f:(fun (n, v) -> List [Atom(Atom.of_string n); v]))
|
List (List.map l ~f:(fun (n, v) ->
|
||||||
|
List [Atom(Atom.of_string_exn Usexp.Atom.Dune n); v]))
|
||||||
let string_hashtbl f h =
|
let string_hashtbl f h =
|
||||||
string_map f
|
string_map f
|
||||||
(Hashtbl.foldi h ~init:String.Map.empty ~f:(fun key data acc ->
|
(Hashtbl.foldi h ~init:String.Map.empty ~f:(fun key data acc ->
|
||||||
|
@ -54,7 +55,7 @@ module To_sexp = struct
|
||||||
|
|
||||||
let record_fields (l : field list) =
|
let record_fields (l : field list) =
|
||||||
List (List.filter_map l ~f:(fun (k, v) ->
|
List (List.filter_map l ~f:(fun (k, v) ->
|
||||||
Option.map v ~f:(fun v -> List[Atom (Atom.of_string k); v])))
|
Option.map v ~f:(fun v -> List[Atom (Atom.of_string_exn Atom.Dune k); v])))
|
||||||
|
|
||||||
let unknown _ = unsafe_atom_of_string "<unknown>"
|
let unknown _ = unsafe_atom_of_string "<unknown>"
|
||||||
end
|
end
|
||||||
|
@ -296,13 +297,13 @@ module Of_sexp = struct
|
||||||
let string = plain_string (fun ~loc:_ x -> x)
|
let string = plain_string (fun ~loc:_ x -> x)
|
||||||
let int =
|
let int =
|
||||||
basic "Integer" (fun s ->
|
basic "Integer" (fun s ->
|
||||||
match int_of_string s with
|
match int_of_string (s Atom.Dune) with
|
||||||
| x -> Ok x
|
| x -> Ok x
|
||||||
| exception _ -> Error ())
|
| exception _ -> Error ())
|
||||||
|
|
||||||
let float =
|
let float =
|
||||||
basic "Float" (fun s ->
|
basic "Float" (fun s ->
|
||||||
match float_of_string s with
|
match float_of_string (s Atom.Dune) with
|
||||||
| x -> Ok x
|
| x -> Ok x
|
||||||
| exception _ -> Error ())
|
| exception _ -> Error ())
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,55 @@
|
||||||
|
type t = A of string [@@unboxed]
|
||||||
|
|
||||||
|
let invalid_argf fmt = Printf.ksprintf invalid_arg fmt
|
||||||
|
|
||||||
|
type syntax = Jbuild | Dune
|
||||||
|
|
||||||
|
let string_of_syntax = function
|
||||||
|
| Jbuild -> "jbuild"
|
||||||
|
| Dune -> "dune"
|
||||||
|
|
||||||
|
let (is_valid_jbuild, is_valid_dune) =
|
||||||
|
let rec jbuild s i len =
|
||||||
|
i = len ||
|
||||||
|
match String.unsafe_get s i with
|
||||||
|
| '"' | '(' | ')' | ';' | '\000'..'\032' | '\127'..'\255' -> false
|
||||||
|
| _ -> jbuild s (i + 1) len
|
||||||
|
in
|
||||||
|
let rec dune s i len =
|
||||||
|
i = len ||
|
||||||
|
match String.unsafe_get s i with
|
||||||
|
| '%' | '"' | '(' | ')' | ';' | '\000'..'\032' | '\127'..'\255' -> false
|
||||||
|
| _ -> dune s (i + 1) len
|
||||||
|
in
|
||||||
|
let make looper s =
|
||||||
|
let len = String.length s in
|
||||||
|
len > 0 && looper s 0 len
|
||||||
|
in
|
||||||
|
(make jbuild, make dune)
|
||||||
|
|
||||||
|
let of_string syn s =
|
||||||
|
match syn with
|
||||||
|
| Jbuild when is_valid_jbuild s -> Some (A s)
|
||||||
|
| Dune when is_valid_dune s -> Some (A s)
|
||||||
|
| _ -> None
|
||||||
|
|
||||||
|
let of_string_exn syn s =
|
||||||
|
match of_string syn s with
|
||||||
|
| Some s -> s
|
||||||
|
| None ->
|
||||||
|
invalid_argf "'%s' is not a valid %s atom" s (string_of_syntax syn)
|
||||||
|
|
||||||
|
let to_string (A t) syntax =
|
||||||
|
match syntax with
|
||||||
|
| Jbuild -> t
|
||||||
|
| Dune ->
|
||||||
|
if is_valid_dune t then
|
||||||
|
t
|
||||||
|
else
|
||||||
|
invalid_argf "Jbuild atom '%s' is not a valid dune atom" t
|
||||||
|
|
||||||
|
let of_int i = of_string_exn Dune (string_of_int i)
|
||||||
|
let of_float x = of_string_exn Dune (string_of_float x)
|
||||||
|
let of_bool x = of_string_exn Dune (string_of_bool x)
|
||||||
|
let of_int64 i = of_string_exn Dune (Int64.to_string i)
|
||||||
|
let of_digest d = of_string_exn Dune (Digest.to_hex d)
|
|
@ -0,0 +1,15 @@
|
||||||
|
type t = private A of string [@@unboxed]
|
||||||
|
|
||||||
|
type syntax = Jbuild | Dune
|
||||||
|
|
||||||
|
val of_string : syntax -> string -> t option
|
||||||
|
|
||||||
|
val of_string_exn : syntax -> string -> t
|
||||||
|
|
||||||
|
val to_string : t -> syntax -> string
|
||||||
|
|
||||||
|
val of_int : int -> t
|
||||||
|
val of_float : float -> t
|
||||||
|
val of_bool : bool -> t
|
||||||
|
val of_int64 : Int64.t -> t
|
||||||
|
val of_digest : Digest.t -> t
|
|
@ -1,7 +1,3 @@
|
||||||
module Atom : sig
|
|
||||||
type t = A of string [@@unboxed]
|
|
||||||
end
|
|
||||||
|
|
||||||
module Token : sig
|
module Token : sig
|
||||||
type t =
|
type t =
|
||||||
| Atom of Atom.t
|
| Atom of Atom.t
|
||||||
|
|
|
@ -1,8 +1,4 @@
|
||||||
{
|
{
|
||||||
module Atom = struct
|
|
||||||
type t = A of string [@@unboxed]
|
|
||||||
end
|
|
||||||
|
|
||||||
module Token = struct
|
module Token = struct
|
||||||
type t =
|
type t =
|
||||||
| Atom of Atom.t
|
| Atom of Atom.t
|
||||||
|
@ -128,7 +124,7 @@ and jbuild_atom acc start = parse
|
||||||
error lexbuf "Internal error in the S-expression parser, \
|
error lexbuf "Internal error in the S-expression parser, \
|
||||||
please report upstream.";
|
please report upstream.";
|
||||||
lexbuf.lex_start_p <- start;
|
lexbuf.lex_start_p <- start;
|
||||||
Token.Atom (A acc)
|
Token.Atom (Atom.of_string_exn Jbuild acc)
|
||||||
}
|
}
|
||||||
|
|
||||||
and quoted_string mode = parse
|
and quoted_string mode = parse
|
||||||
|
@ -248,7 +244,7 @@ and token = parse
|
||||||
Quoted_string s
|
Quoted_string s
|
||||||
}
|
}
|
||||||
| atom_char_dune+ as s
|
| atom_char_dune+ as s
|
||||||
{ Token.Atom (A s) }
|
{ Token.Atom (Atom.of_string_exn Dune s) }
|
||||||
| eof
|
| eof
|
||||||
{ Eof }
|
{ Eof }
|
||||||
|
|
||||||
|
|
|
@ -10,34 +10,7 @@ module Bytes = struct
|
||||||
UnlabeledBytes.blit_string src src_pos dst dst_pos len
|
UnlabeledBytes.blit_string src src_pos dst dst_pos len
|
||||||
end
|
end
|
||||||
|
|
||||||
module Atom = struct
|
module Atom = Atom
|
||||||
type t = Lexer.Atom.t = A of string [@@unboxed]
|
|
||||||
|
|
||||||
let is_valid =
|
|
||||||
let rec loop s i len =
|
|
||||||
i = len ||
|
|
||||||
match String.unsafe_get s i with
|
|
||||||
| '"' | '(' | ')' | ';' | '\000'..'\032' | '\127'..'\255' -> false
|
|
||||||
| _ -> loop s (i + 1) len
|
|
||||||
in
|
|
||||||
fun s ->
|
|
||||||
let len = String.length s in
|
|
||||||
len > 0 && loop s 0 len
|
|
||||||
|
|
||||||
(* XXX eventually we want to report a nice error message to the user
|
|
||||||
at the point the conversion is made. *)
|
|
||||||
let of_string s =
|
|
||||||
if is_valid s then A s
|
|
||||||
else invalid_arg(Printf.sprintf "Usexp.Atom.of_string: %S" s)
|
|
||||||
|
|
||||||
let of_int i = A (string_of_int i)
|
|
||||||
let of_float x = A (string_of_float x)
|
|
||||||
let of_bool x = A (string_of_bool x)
|
|
||||||
let of_int64 i = A (Int64.to_string i)
|
|
||||||
let of_digest d = A (Digest.to_hex d)
|
|
||||||
|
|
||||||
let to_string (A s) = s
|
|
||||||
end
|
|
||||||
|
|
||||||
type t =
|
type t =
|
||||||
| Atom of Atom.t
|
| Atom of Atom.t
|
||||||
|
@ -46,15 +19,14 @@ type t =
|
||||||
|
|
||||||
type sexp = t
|
type sexp = t
|
||||||
|
|
||||||
let atom s =
|
let atom s = Atom (Atom.of_string_exn Dune s)
|
||||||
if Atom.is_valid s then Atom (A s)
|
|
||||||
else invalid_arg "Usexp.atom"
|
|
||||||
|
|
||||||
let unsafe_atom_of_string s = Atom(A s)
|
let unsafe_atom_of_string s = atom s
|
||||||
|
|
||||||
let atom_or_quoted_string s =
|
let atom_or_quoted_string s =
|
||||||
if Atom.is_valid s then Atom (A s)
|
match Atom.of_string Atom.Dune s with
|
||||||
else Quoted_string s
|
| None -> Quoted_string s
|
||||||
|
| Some a -> Atom a
|
||||||
|
|
||||||
let quote_length s =
|
let quote_length s =
|
||||||
let n = ref 0 in
|
let n = ref 0 in
|
||||||
|
@ -117,13 +89,13 @@ let quoted s =
|
||||||
Bytes.unsafe_to_string s'
|
Bytes.unsafe_to_string s'
|
||||||
|
|
||||||
let rec to_string = function
|
let rec to_string = function
|
||||||
| Atom (A s) -> s
|
| Atom a -> Atom.to_string a Atom.Dune
|
||||||
| Quoted_string s -> quoted s
|
| Quoted_string s -> quoted s
|
||||||
| List l -> Printf.sprintf "(%s)" (List.map l ~f:to_string |> String.concat ~sep:" ")
|
| List l -> Printf.sprintf "(%s)" (List.map l ~f:to_string |> String.concat ~sep:" ")
|
||||||
|
|
||||||
let rec pp ppf = function
|
let rec pp ppf = function
|
||||||
| Atom (A s) ->
|
| Atom s ->
|
||||||
Format.pp_print_string ppf s
|
Format.pp_print_string ppf (Atom.to_string s Atom.Dune)
|
||||||
| Quoted_string s ->
|
| Quoted_string s ->
|
||||||
Format.pp_print_string ppf (quoted s)
|
Format.pp_print_string ppf (quoted s)
|
||||||
| List [] ->
|
| List [] ->
|
||||||
|
@ -164,7 +136,7 @@ let pp_print_quoted_string ppf s =
|
||||||
Format.pp_print_string ppf (quoted s)
|
Format.pp_print_string ppf (quoted s)
|
||||||
|
|
||||||
let rec pp_split_strings ppf = function
|
let rec pp_split_strings ppf = function
|
||||||
| Atom (A s) -> Format.pp_print_string ppf s
|
| Atom s -> Format.pp_print_string ppf (Atom.to_string s Atom.Dune)
|
||||||
| Quoted_string s -> pp_print_quoted_string ppf s
|
| Quoted_string s -> pp_print_quoted_string ppf s
|
||||||
| List [] ->
|
| List [] ->
|
||||||
Format.pp_print_string ppf "()"
|
Format.pp_print_string ppf "()"
|
||||||
|
@ -249,8 +221,9 @@ module Ast = struct
|
||||||
| List of Loc.t * t list
|
| List of Loc.t * t list
|
||||||
|
|
||||||
let atom_or_quoted_string loc s =
|
let atom_or_quoted_string loc s =
|
||||||
if Atom.is_valid s then Atom (loc, A s)
|
match Atom.of_string Atom.Dune s with
|
||||||
else Quoted_string (loc, s)
|
| None -> Quoted_string (loc, s)
|
||||||
|
| Some a -> Atom (loc, a)
|
||||||
|
|
||||||
let loc (Atom (loc, _) | Quoted_string (loc, _) | List (loc, _)) = loc
|
let loc (Atom (loc, _) | Quoted_string (loc, _) | List (loc, _)) = loc
|
||||||
|
|
||||||
|
|
|
@ -7,20 +7,19 @@ module Atom : sig
|
||||||
(** Acceptable atoms are composed of chars in the range ['!' .. '~'] excluding
|
(** Acceptable atoms are composed of chars in the range ['!' .. '~'] excluding
|
||||||
[' ' '"' '(' ')' ';' '\\'], and must be nonempty. *)
|
[' ' '"' '(' ')' ';' '\\'], and must be nonempty. *)
|
||||||
|
|
||||||
val is_valid : string -> bool
|
type syntax = Jbuild | Dune
|
||||||
(** [is_valid s] checks that [s] respects the constraints to be an atom. *)
|
|
||||||
|
|
||||||
val of_string : string -> t
|
val of_string : syntax -> string -> t option
|
||||||
(** Convert a string to an atom. If the string contains invalid
|
|
||||||
characters, raise [Invalid_argument]. *)
|
val of_string_exn : syntax -> string -> t
|
||||||
|
|
||||||
|
val to_string : t -> syntax -> string
|
||||||
|
|
||||||
val of_int : int -> t
|
val of_int : int -> t
|
||||||
val of_float : float -> t
|
val of_float : float -> t
|
||||||
val of_bool : bool -> t
|
val of_bool : bool -> t
|
||||||
val of_int64 : Int64.t -> t
|
val of_int64 : Int64.t -> t
|
||||||
val of_digest : Digest.t -> t
|
val of_digest : Digest.t -> t
|
||||||
|
|
||||||
val to_string : t -> string
|
|
||||||
end
|
end
|
||||||
|
|
||||||
module Loc : sig
|
module Loc : sig
|
||||||
|
|
|
@ -140,7 +140,7 @@ parse {|"$bar%foo%"|}
|
||||||
|
|
||||||
parse {|\%{foo}|}
|
parse {|\%{foo}|}
|
||||||
[%%expect{|
|
[%%expect{|
|
||||||
- : parse_result = Same (Ok [\%{foo}])
|
Exception: Invalid_argument "'\\%{foo}' is not a valid dune atom".
|
||||||
|}]
|
|}]
|
||||||
|
|
||||||
parse {|\${foo}|}
|
parse {|\${foo}|}
|
||||||
|
@ -150,15 +150,15 @@ parse {|\${foo}|}
|
||||||
|
|
||||||
parse {|\$bar%foo%|}
|
parse {|\$bar%foo%|}
|
||||||
[%%expect{|
|
[%%expect{|
|
||||||
- : parse_result = Same (Ok [\$bar%foo%])
|
Exception: Invalid_argument "'\\$bar%foo%' is not a valid dune atom".
|
||||||
|}]
|
|}]
|
||||||
|
|
||||||
parse {|\$bar\%foo%|}
|
parse {|\$bar\%foo%|}
|
||||||
[%%expect{|
|
[%%expect{|
|
||||||
- : parse_result = Same (Ok [\$bar\%foo%])
|
Exception: Invalid_argument "'\\$bar\\%foo%' is not a valid dune atom".
|
||||||
|}]
|
|}]
|
||||||
|
|
||||||
parse {|\$bar\%foo%{bar}|}
|
parse {|\$bar\%foo%{bar}|}
|
||||||
[%%expect{|
|
[%%expect{|
|
||||||
- : parse_result = Same (Ok [\$bar\%foo%{bar}])
|
Exception: Invalid_argument "'\\$bar\\%foo%{bar}' is not a valid dune atom".
|
||||||
|}]
|
|}]
|
||||||
|
|
|
@ -23,13 +23,15 @@ let () =
|
||||||
| Atom _ -> true
|
| Atom _ -> true
|
||||||
| _ -> false
|
| _ -> false
|
||||||
in
|
in
|
||||||
if Usexp.Atom.is_valid s <> parser_recognizes_as_atom then begin
|
let valid_dune_atom =
|
||||||
|
Option.is_some (Usexp.Atom.of_string Dune s) in
|
||||||
|
if valid_dune_atom <> parser_recognizes_as_atom then begin
|
||||||
Printf.eprintf
|
Printf.eprintf
|
||||||
"Usexp.Atom.is_valid error:\n\
|
"Usexp.Atom.is_valid error:\n\
|
||||||
- s = %S\n\
|
- s = %S\n\
|
||||||
- Usexp.Atom.is_valid s = %B\n\
|
- Usexp.Atom.is_valid s = %B\n\
|
||||||
- parser_recognizes_as_atom = %B\n"
|
- parser_recognizes_as_atom = %B\n"
|
||||||
s (Usexp.Atom.is_valid s) parser_recognizes_as_atom;
|
s valid_dune_atom parser_recognizes_as_atom;
|
||||||
exit 1
|
exit 1
|
||||||
end;
|
end;
|
||||||
if printed_as_atom && not parser_recognizes_as_atom then begin
|
if printed_as_atom && not parser_recognizes_as_atom then begin
|
||||||
|
|
Loading…
Reference in New Issue