Merge pull request #891 from rgrinberg/atom-jbuild-dune
Dune & Jbuild validation for atoms
This commit is contained in:
commit
411552e4c7
|
@ -149,8 +149,7 @@ module Lang = struct
|
|||
in
|
||||
let ver =
|
||||
Sexp.Of_sexp.parse Syntax.Version.t Univ_map.empty
|
||||
(Atom (ver_loc, Sexp.Atom.of_string ver))
|
||||
in
|
||||
(Atom (ver_loc, Sexp.Atom.of_string ver)) in
|
||||
match Hashtbl.find langs name with
|
||||
| None ->
|
||||
Loc.fail name_loc "Unknown language %S.%s" name
|
||||
|
|
|
@ -298,13 +298,13 @@ module Of_sexp = struct
|
|||
basic "Integer" (fun s ->
|
||||
match int_of_string s with
|
||||
| x -> Ok x
|
||||
| exception _ -> Error ())
|
||||
| exception _ -> Result.Error ())
|
||||
|
||||
let float =
|
||||
basic "Float" (fun s ->
|
||||
match float_of_string s with
|
||||
| x -> Ok x
|
||||
| exception _ -> Error ())
|
||||
| exception _ -> Result.Error ())
|
||||
|
||||
let pair a b =
|
||||
enter
|
||||
|
@ -333,7 +333,7 @@ module Of_sexp = struct
|
|||
let string_map t =
|
||||
list (pair string t) >>= fun bindings ->
|
||||
match String.Map.of_list bindings with
|
||||
| Ok x -> return x
|
||||
| Result.Ok x -> return x
|
||||
| Error (key, _v1, _v2) ->
|
||||
loc >>= fun loc ->
|
||||
of_sexp_errorf loc "key %s present multiple times" key
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
type t = A of string [@@unboxed]
|
||||
|
||||
let invalid_argf fmt = Printf.ksprintf invalid_arg fmt
|
||||
|
||||
type syntax = Jbuild | Dune
|
||||
|
||||
let is_valid_dune =
|
||||
let rec loop s i len =
|
||||
i = len ||
|
||||
match String.unsafe_get s i with
|
||||
| '%' | '"' | '(' | ')' | ';' | '\000'..'\032' | '\127'..'\255' -> false
|
||||
| _ -> loop s (i + 1) len
|
||||
in
|
||||
fun s ->
|
||||
let len = String.length s in
|
||||
len > 0 && loop s 0 len
|
||||
|
||||
let is_valid_jbuild str =
|
||||
let len = String.length str in
|
||||
len > 0 &&
|
||||
let rec loop ix =
|
||||
match str.[ix] with
|
||||
| '"' | '(' | ')' | ';' -> true
|
||||
| '|' -> ix > 0 && let next = ix - 1 in str.[next] = '#' || loop next
|
||||
| '#' -> ix > 0 && let next = ix - 1 in str.[next] = '|' || loop next
|
||||
| ' ' | '\t' | '\n' | '\012' | '\r' -> true
|
||||
| _ -> ix > 0 && loop (ix - 1)
|
||||
in
|
||||
not (loop (len - 1))
|
||||
|
||||
let of_string s = A s
|
||||
let to_string (A s) = s
|
||||
|
||||
let is_valid (A t) = function
|
||||
| Jbuild -> is_valid_jbuild t
|
||||
| Dune -> is_valid_dune t
|
||||
|
||||
let print ((A s) as t) syntax =
|
||||
if is_valid t syntax then
|
||||
s
|
||||
else
|
||||
match syntax with
|
||||
| Jbuild -> invalid_argf "atom '%s' cannot be printed in jbuild syntax" s
|
||||
| Dune -> invalid_argf "atom '%s' cannot be in dune syntax" s
|
||||
|
||||
let of_int i = of_string (string_of_int i)
|
||||
let of_float x = of_string (string_of_float x)
|
||||
let of_bool x = of_string (string_of_bool x)
|
||||
let of_digest d = of_string (Digest.to_hex d)
|
||||
let of_int64 i = of_string (Int64.to_string i)
|
|
@ -0,0 +1,17 @@
|
|||
type t = private A of string [@@unboxed]
|
||||
|
||||
type syntax = Jbuild | Dune
|
||||
|
||||
val is_valid_dune : string -> bool
|
||||
val is_valid : t -> syntax -> bool
|
||||
|
||||
val of_string : string -> t
|
||||
val to_string : t -> string
|
||||
|
||||
val print : t -> syntax -> string
|
||||
|
||||
val of_int : int -> t
|
||||
val of_float : float -> t
|
||||
val of_bool : bool -> t
|
||||
val of_int64 : Int64.t -> t
|
||||
val of_digest : Digest.t -> t
|
|
@ -1,7 +1,3 @@
|
|||
module Atom : sig
|
||||
type t = A of string [@@unboxed]
|
||||
end
|
||||
|
||||
module Token : sig
|
||||
type t =
|
||||
| Atom of Atom.t
|
||||
|
|
|
@ -1,8 +1,4 @@
|
|||
{
|
||||
module Atom = struct
|
||||
type t = A of string [@@unboxed]
|
||||
end
|
||||
|
||||
module Token = struct
|
||||
type t =
|
||||
| Atom of Atom.t
|
||||
|
@ -81,7 +77,7 @@ let hexdigit = ['0'-'9' 'a'-'f' 'A'-'F']
|
|||
let atom_char_jbuild =
|
||||
[^ ';' '(' ')' '"' ' ' '\t' '\r' '\n' '\012']
|
||||
let atom_char_dune =
|
||||
[^ ';' '(' ')' '"' '\000'-'\032' '\127'-'\255']
|
||||
[^ '%' ';' '(' ')' '"' '\000'-'\032' '\127'-'\255']
|
||||
|
||||
(* rule for jbuild files *)
|
||||
rule jbuild_token = parse
|
||||
|
@ -128,7 +124,7 @@ and jbuild_atom acc start = parse
|
|||
error lexbuf "Internal error in the S-expression parser, \
|
||||
please report upstream.";
|
||||
lexbuf.lex_start_p <- start;
|
||||
Token.Atom (A acc)
|
||||
Token.Atom (Atom.of_string acc)
|
||||
}
|
||||
|
||||
and quoted_string mode = parse
|
||||
|
@ -248,7 +244,8 @@ and token = parse
|
|||
Quoted_string s
|
||||
}
|
||||
| atom_char_dune+ as s
|
||||
{ Token.Atom (A s) }
|
||||
{ Token.Atom (Atom.of_string s) }
|
||||
| _ as c { error lexbuf (Printf.sprintf "Invalid atom character '%c'" c) }
|
||||
| eof
|
||||
{ Eof }
|
||||
|
||||
|
|
|
@ -10,34 +10,7 @@ module Bytes = struct
|
|||
UnlabeledBytes.blit_string src src_pos dst dst_pos len
|
||||
end
|
||||
|
||||
module Atom = struct
|
||||
type t = Lexer.Atom.t = A of string [@@unboxed]
|
||||
|
||||
let is_valid =
|
||||
let rec loop s i len =
|
||||
i = len ||
|
||||
match String.unsafe_get s i with
|
||||
| '"' | '(' | ')' | ';' | '\000'..'\032' | '\127'..'\255' -> false
|
||||
| _ -> loop s (i + 1) len
|
||||
in
|
||||
fun s ->
|
||||
let len = String.length s in
|
||||
len > 0 && loop s 0 len
|
||||
|
||||
(* XXX eventually we want to report a nice error message to the user
|
||||
at the point the conversion is made. *)
|
||||
let of_string s =
|
||||
if is_valid s then A s
|
||||
else invalid_arg(Printf.sprintf "Usexp.Atom.of_string: %S" s)
|
||||
|
||||
let of_int i = A (string_of_int i)
|
||||
let of_float x = A (string_of_float x)
|
||||
let of_bool x = A (string_of_bool x)
|
||||
let of_int64 i = A (Int64.to_string i)
|
||||
let of_digest d = A (Digest.to_hex d)
|
||||
|
||||
let to_string (A s) = s
|
||||
end
|
||||
module Atom = Atom
|
||||
|
||||
type t =
|
||||
| Atom of Atom.t
|
||||
|
@ -46,15 +19,15 @@ type t =
|
|||
|
||||
type sexp = t
|
||||
|
||||
let atom s =
|
||||
if Atom.is_valid s then Atom (A s)
|
||||
else invalid_arg "Usexp.atom"
|
||||
let atom s = Atom (Atom.of_string s)
|
||||
|
||||
let unsafe_atom_of_string s = Atom(A s)
|
||||
let unsafe_atom_of_string s = atom s
|
||||
|
||||
let atom_or_quoted_string s =
|
||||
if Atom.is_valid s then Atom (A s)
|
||||
else Quoted_string s
|
||||
if Atom.is_valid_dune s then
|
||||
Atom (Atom.of_string s)
|
||||
else
|
||||
Quoted_string s
|
||||
|
||||
let quote_length s =
|
||||
let n = ref 0 in
|
||||
|
@ -117,13 +90,13 @@ let quoted s =
|
|||
Bytes.unsafe_to_string s'
|
||||
|
||||
let rec to_string = function
|
||||
| Atom (A s) -> s
|
||||
| Atom a -> Atom.print a Atom.Dune
|
||||
| Quoted_string s -> quoted s
|
||||
| List l -> Printf.sprintf "(%s)" (List.map l ~f:to_string |> String.concat ~sep:" ")
|
||||
|
||||
let rec pp ppf = function
|
||||
| Atom (A s) ->
|
||||
Format.pp_print_string ppf s
|
||||
| Atom s ->
|
||||
Format.pp_print_string ppf (Atom.print s Atom.Dune)
|
||||
| Quoted_string s ->
|
||||
Format.pp_print_string ppf (quoted s)
|
||||
| List [] ->
|
||||
|
@ -164,7 +137,7 @@ let pp_print_quoted_string ppf s =
|
|||
Format.pp_print_string ppf (quoted s)
|
||||
|
||||
let rec pp_split_strings ppf = function
|
||||
| Atom (A s) -> Format.pp_print_string ppf s
|
||||
| Atom s -> Format.pp_print_string ppf (Atom.print s Atom.Dune)
|
||||
| Quoted_string s -> pp_print_quoted_string ppf s
|
||||
| List [] ->
|
||||
Format.pp_print_string ppf "()"
|
||||
|
@ -249,8 +222,10 @@ module Ast = struct
|
|||
| List of Loc.t * t list
|
||||
|
||||
let atom_or_quoted_string loc s =
|
||||
if Atom.is_valid s then Atom (loc, A s)
|
||||
else Quoted_string (loc, s)
|
||||
match atom_or_quoted_string s with
|
||||
| Atom a -> Atom (loc, a)
|
||||
| Quoted_string s -> Quoted_string (loc, s)
|
||||
| List _ -> assert false
|
||||
|
||||
let loc (Atom (loc, _) | Quoted_string (loc, _) | List (loc, _)) = loc
|
||||
|
||||
|
|
|
@ -4,23 +4,19 @@
|
|||
|
||||
module Atom : sig
|
||||
type t = private A of string [@@unboxed]
|
||||
(** Acceptable atoms are composed of chars in the range ['!' .. '~'] excluding
|
||||
[' ' '"' '(' ')' ';' '\\'], and must be nonempty. *)
|
||||
|
||||
val is_valid : string -> bool
|
||||
(** [is_valid s] checks that [s] respects the constraints to be an atom. *)
|
||||
type syntax = Jbuild | Dune
|
||||
|
||||
val of_string : string -> t
|
||||
(** Convert a string to an atom. If the string contains invalid
|
||||
characters, raise [Invalid_argument]. *)
|
||||
val is_valid : t -> syntax -> bool
|
||||
|
||||
val of_string : string -> t
|
||||
val to_string : t -> string
|
||||
|
||||
val of_int : int -> t
|
||||
val of_float : float -> t
|
||||
val of_bool : bool -> t
|
||||
val of_int64 : Int64.t -> t
|
||||
val of_digest : Digest.t -> t
|
||||
|
||||
val to_string : t -> string
|
||||
end
|
||||
|
||||
module Loc : sig
|
||||
|
|
|
@ -2,9 +2,21 @@
|
|||
open Stdune;;
|
||||
open Sexp.Of_sexp;;
|
||||
|
||||
let pp_sexp_ast ppf sexp =
|
||||
Sexp.pp ppf (Sexp.Ast.remove_locs sexp)
|
||||
let pp_sexp_ast =
|
||||
let rec subst_atoms ~f (s : Sexp.t) =
|
||||
match s with
|
||||
| Atom a -> f a
|
||||
| Quoted_string _ -> s
|
||||
| List xs -> List (List.map ~f:(subst_atoms ~f) xs)
|
||||
in
|
||||
fun ppf sexp ->
|
||||
sexp
|
||||
|> Sexp.Ast.remove_locs
|
||||
|> subst_atoms ~f:(fun (A s) ->
|
||||
List [(Sexp.atom "atom"); Sexp.atom_or_quoted_string s])
|
||||
|> Sexp.pp ppf
|
||||
;;
|
||||
|
||||
#install_printer pp_sexp_ast;;
|
||||
[%%expect{|
|
||||
val pp_sexp_ast : Format.formatter -> Stdune.Sexp.Ast.t -> unit = <fun>
|
||||
|
@ -20,7 +32,7 @@ let sexp = Sexp.parse_string ~fname:"" ~mode:Single {|
|
|||
(foo 2))
|
||||
|}
|
||||
[%%expect{|
|
||||
val sexp : Usexp.Ast.t = ((foo 1) (foo 2))
|
||||
val sexp : Usexp.Ast.t = (((atom foo) (atom 1)) ((atom foo) (atom 2)))
|
||||
|}]
|
||||
|
||||
let of_sexp = record (field "foo" int)
|
||||
|
@ -52,8 +64,9 @@ let parse s =
|
|||
let f ~lexer =
|
||||
try
|
||||
Ok (Sexp.parse_string ~fname:"" ~mode:Many ~lexer s)
|
||||
with Sexp.Parse_error e ->
|
||||
Error (Sexp.Parse_error.message e)
|
||||
with
|
||||
| Sexp.Parse_error e -> Error (Sexp.Parse_error.message e)
|
||||
| Invalid_argument e -> Error e
|
||||
in
|
||||
let jbuild = f ~lexer:Sexp.Lexer.jbuild_token in
|
||||
let dune = f ~lexer:Sexp.Lexer.token in
|
||||
|
@ -74,26 +87,34 @@ val parse : string -> parse_result = <fun>
|
|||
|
||||
parse {| # ## x##y x||y a#b|c#d copy# |}
|
||||
[%%expect{|
|
||||
- : parse_result = Same (Ok [#; ##; x##y; x||y; a#b|c#d; copy#])
|
||||
- : parse_result =
|
||||
Same
|
||||
(Ok
|
||||
[(atom #); (atom ##); (atom x##y); (atom x||y); (atom a#b|c#d);
|
||||
(atom copy#)])
|
||||
|}]
|
||||
|
||||
|
||||
parse {|x #| comment |# y|}
|
||||
[%%expect{|
|
||||
- : parse_result =
|
||||
Different {jbuild = Ok [x; y]; dune = Ok [x; #|; comment; |#; y]}
|
||||
Different
|
||||
{jbuild = Ok [(atom x); (atom y)];
|
||||
dune = Ok [(atom x); (atom #|); (atom comment); (atom |#); (atom y)]}
|
||||
|}]
|
||||
|
||||
parse {|x#|y|}
|
||||
[%%expect{|
|
||||
- : parse_result =
|
||||
Different {jbuild = Error "jbuild_atoms cannot contain #|"; dune = Ok [x#|y]}
|
||||
Different
|
||||
{jbuild = Error "jbuild_atoms cannot contain #|"; dune = Ok [(atom x#|y)]}
|
||||
|}]
|
||||
|
||||
parse {|x|#y|}
|
||||
[%%expect{|
|
||||
- : parse_result =
|
||||
Different {jbuild = Error "jbuild_atoms cannot contain |#"; dune = Ok [x|#y]}
|
||||
Different
|
||||
{jbuild = Error "jbuild_atoms cannot contain |#"; dune = Ok [(atom x|#y)]}
|
||||
|}]
|
||||
|
||||
parse {|"\a"|}
|
||||
|
@ -138,27 +159,38 @@ parse {|"$bar%foo%"|}
|
|||
- : parse_result = Same (Ok ["$bar%foo%"])
|
||||
|}]
|
||||
|
||||
parse {|\%{foo}|}
|
||||
[%%expect{|
|
||||
- : parse_result = Same (Ok [\%{foo}])
|
||||
|}]
|
||||
|
||||
parse {|\${foo}|}
|
||||
[%%expect{|
|
||||
- : parse_result = Same (Ok [\${foo}])
|
||||
- : parse_result = Same (Ok [(atom \${foo})])
|
||||
|}]
|
||||
|
||||
parse {|\%{foo}|}
|
||||
[%%expect{|
|
||||
- : parse_result =
|
||||
Different
|
||||
{jbuild = Ok [(atom "\\%{foo}")]; dune = Error "Invalid atom character '%'"}
|
||||
|}]
|
||||
|
||||
parse {|\$bar%foo%|}
|
||||
[%%expect{|
|
||||
- : parse_result = Same (Ok [\$bar%foo%])
|
||||
- : parse_result =
|
||||
Different
|
||||
{jbuild = Ok [(atom "\\$bar%foo%")];
|
||||
dune = Error "Invalid atom character '%'"}
|
||||
|}]
|
||||
|
||||
parse {|\$bar\%foo%|}
|
||||
[%%expect{|
|
||||
- : parse_result = Same (Ok [\$bar\%foo%])
|
||||
- : parse_result =
|
||||
Different
|
||||
{jbuild = Ok [(atom "\\$bar\\%foo%")];
|
||||
dune = Error "Invalid atom character '%'"}
|
||||
|}]
|
||||
|
||||
parse {|\$bar\%foo%{bar}|}
|
||||
[%%expect{|
|
||||
- : parse_result = Same (Ok [\$bar\%foo%{bar}])
|
||||
- : parse_result =
|
||||
Different
|
||||
{jbuild = Ok [(atom "\\$bar\\%foo%{bar}")];
|
||||
dune = Error "Invalid atom character '%'"}
|
||||
|}]
|
||||
|
|
|
@ -4,42 +4,58 @@ let () = Printexc.record_backtrace true
|
|||
|
||||
(* Test that all strings of length <= 3 such that [Usexp.Atom.is_valid
|
||||
s] are recignized as atoms by the parser *)
|
||||
|
||||
let string_of_syntax (x : Usexp.Atom.syntax) =
|
||||
match x with
|
||||
| Dune -> "dune"
|
||||
| Jbuild -> "jbuild"
|
||||
|
||||
let () =
|
||||
for len = 0 to 3 do
|
||||
let s = Bytes.create len in
|
||||
for i = 0 to 1 lsl (len * 8) - 1 do
|
||||
if len > 0 then Bytes.set s 0 (Char.chr ( i land 0xff));
|
||||
if len > 1 then Bytes.set s 1 (Char.chr ((i lsr 4) land 0xff));
|
||||
if len > 2 then Bytes.set s 2 (Char.chr ((i lsr 8) land 0xff));
|
||||
let s = Bytes.unsafe_to_string s in
|
||||
let parser_recognizes_as_atom =
|
||||
match Usexp.parse_string ~fname:"" ~mode:Single s with
|
||||
| exception _ -> false
|
||||
| Atom (_, A s') -> s = s'
|
||||
| _ -> false
|
||||
in
|
||||
let printed_as_atom =
|
||||
match Usexp.atom_or_quoted_string s with
|
||||
| Atom _ -> true
|
||||
| _ -> false
|
||||
in
|
||||
if Usexp.Atom.is_valid s <> parser_recognizes_as_atom then begin
|
||||
Printf.eprintf
|
||||
"Usexp.Atom.is_valid error:\n\
|
||||
- s = %S\n\
|
||||
- Usexp.Atom.is_valid s = %B\n\
|
||||
- parser_recognizes_as_atom = %B\n"
|
||||
s (Usexp.Atom.is_valid s) parser_recognizes_as_atom;
|
||||
exit 1
|
||||
end;
|
||||
if printed_as_atom && not parser_recognizes_as_atom then begin
|
||||
Printf.eprintf
|
||||
"Usexp.Atom.atom_or_quoted_string error:\n\
|
||||
- s = %S\n\
|
||||
- printed_as_atom = %B\n\
|
||||
- parser_recognizes_as_atom = %B\n"
|
||||
s printed_as_atom parser_recognizes_as_atom;
|
||||
exit 1
|
||||
end
|
||||
[ Usexp.Atom.Dune, Usexp.Lexer.token, (fun s -> Usexp.Atom.is_valid s Dune)
|
||||
; Jbuild, Usexp.Lexer.jbuild_token, (fun s -> Usexp.Atom.is_valid s Jbuild)
|
||||
]
|
||||
|> List.iter ~f:(fun (syntax, lexer, validator) ->
|
||||
for len = 0 to 3 do
|
||||
let s = Bytes.create len in
|
||||
for i = 0 to 1 lsl (len * 8) - 1 do
|
||||
if len > 0 then Bytes.set s 0 (Char.chr ( i land 0xff));
|
||||
if len > 1 then Bytes.set s 1 (Char.chr ((i lsr 4) land 0xff));
|
||||
if len > 2 then Bytes.set s 2 (Char.chr ((i lsr 8) land 0xff));
|
||||
let s = Bytes.unsafe_to_string s in
|
||||
let parser_recognizes_as_atom =
|
||||
match Usexp.parse_string ~lexer ~fname:"" ~mode:Single s with
|
||||
| exception _ -> false
|
||||
| Atom (_, A s') -> s = s'
|
||||
| _ -> false
|
||||
in
|
||||
let printed_as_atom =
|
||||
match Usexp.atom_or_quoted_string s with
|
||||
| Atom _ -> true
|
||||
| _ -> false
|
||||
in
|
||||
let valid_dune_atom = validator (Usexp.Atom.of_string s) in
|
||||
if valid_dune_atom <> parser_recognizes_as_atom then begin
|
||||
Printf.eprintf
|
||||
"Usexp.Atom.is_valid error:\n\
|
||||
- syntax = %s\n\
|
||||
- s = %S\n\
|
||||
- Usexp.Atom.is_valid s = %B\n\
|
||||
- parser_recognizes_as_atom = %B\n"
|
||||
(string_of_syntax syntax) s valid_dune_atom
|
||||
parser_recognizes_as_atom;
|
||||
exit 1
|
||||
end;
|
||||
if printed_as_atom && not parser_recognizes_as_atom then begin
|
||||
Printf.eprintf
|
||||
"Usexp.Atom.atom_or_quoted_string error:\n\
|
||||
- syntax = %s\n\
|
||||
- s = %S\n\
|
||||
- printed_as_atom = %B\n\
|
||||
- parser_recognizes_as_atom = %B\n"
|
||||
(string_of_syntax syntax) s printed_as_atom
|
||||
parser_recognizes_as_atom;
|
||||
exit 1
|
||||
end
|
||||
done
|
||||
done
|
||||
done
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue