diff --git a/src/usexp/lexer.mll b/src/usexp/lexer.mll index cb5d6343..69912d31 100644 --- a/src/usexp/lexer.mll +++ b/src/usexp/lexer.mll @@ -75,10 +75,14 @@ type block_string_line_kind = let comment = ';' [^ '\n' '\r']* let newline = '\r'? '\n' let blank = [' ' '\t' '\012'] -let atom_char = [^ ';' '(' ')' '"' ' ' '\t' '\r' '\n' '\012'] let digit = ['0'-'9'] let hexdigit = ['0'-'9' 'a'-'f' 'A'-'F'] +let atom_char_jbuild = + [^ ';' '(' ')' '"' ' ' '\t' '\r' '\n' '\012'] +let atom_char_dune = + [^ ';' '(' ')' '"' '\000'-'\032' '\127'-'\255'] + (* rule for jbuild files *) rule jbuild_token = parse | newline @@ -116,7 +120,7 @@ and jbuild_atom acc start = parse { lexbuf.lex_start_p <- start; error lexbuf "jbuild_atoms cannot contain |#" } - | ('#'+ | '|'+ | (atom_char # ['|' '#'])) as s + | ('#'+ | '|'+ | (atom_char_jbuild # ['|' '#'])) as s { jbuild_atom (if acc = "" then s else acc ^ s) start lexbuf } | "" @@ -243,7 +247,7 @@ and token = parse lexbuf.lex_start_p <- start; Quoted_string s } - | atom_char+ as s + | atom_char_dune+ as s { Token.Atom (A s) } | eof { Eof } diff --git a/src/usexp/usexp.ml b/src/usexp/usexp.ml index 11bfd71d..48a3dd32 100644 --- a/src/usexp/usexp.ml +++ b/src/usexp/usexp.ml @@ -17,7 +17,7 @@ module Atom = struct let rec loop s i len = i = len || match String.unsafe_get s i with - | '"' | '(' | ')' | ';' | ' ' | '\t' | '\n' | '\012' | '\r' -> false + | '"' | '(' | ')' | ';' | '\000'..'\032' | '\127'..'\255' -> false | _ -> loop s (i + 1) len in fun s -> @@ -52,19 +52,8 @@ let atom s = let unsafe_atom_of_string s = Atom(A s) -let should_be_atom = - let rec loop s i len = - i = len || - match String.unsafe_get s i with - | '"' | '(' | ')' | ';' | '\000'..'\032' | '\127'..'\255' -> false - | _ -> loop s (i + 1) len - in - fun s -> - let len = String.length s in - len > 0 && loop s 0 len - let atom_or_quoted_string s = - if should_be_atom s then Atom (A s) + if Atom.is_valid s then Atom (A s) else Quoted_string s let quote_length s = @@ -260,7 +249,7 @@ module Ast = struct | List of Loc.t * t list let atom_or_quoted_string loc s = - if should_be_atom s then Atom (loc, A s) + if Atom.is_valid s then Atom (loc, A s) else Quoted_string (loc, s) let loc (Atom (loc, _) | Quoted_string (loc, _) | List (loc, _)) = loc diff --git a/test/unit-tests/sexp.mlt b/test/unit-tests/sexp.mlt index 77a4f2a0..a460ec76 100644 --- a/test/unit-tests/sexp.mlt +++ b/test/unit-tests/sexp.mlt @@ -137,3 +137,28 @@ parse {|"$bar%foo%"|} [%%expect{| - : parse_result = Same (Ok ["$bar%foo%"]) |}] + +parse {|\%{foo}|} +[%%expect{| +- : parse_result = Same (Ok [\%{foo}]) +|}] + +parse {|\${foo}|} +[%%expect{| +- : parse_result = Same (Ok [\${foo}]) +|}] + +parse {|\$bar%foo%|} +[%%expect{| +- : parse_result = Same (Ok [\$bar%foo%]) +|}] + +parse {|\$bar\%foo%|} +[%%expect{| +- : parse_result = Same (Ok [\$bar\%foo%]) +|}] + +parse {|\$bar\%foo%{bar}|} +[%%expect{| +- : parse_result = Same (Ok [\$bar\%foo%{bar}]) +|}]