From bafb710a5b77aa4809424e23026c58dba47390ae Mon Sep 17 00:00:00 2001 From: Rudi Grinberg Date: Fri, 15 Jun 2018 14:50:05 +0700 Subject: [PATCH 1/6] Remove duplication between Atom.is_valid and should_be_atom Signed-off-by: Rudi Grinberg --- src/usexp/lexer.mll | 3 ++- src/usexp/usexp.ml | 17 +++-------------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/src/usexp/lexer.mll b/src/usexp/lexer.mll index cb5d6343..c53bab93 100644 --- a/src/usexp/lexer.mll +++ b/src/usexp/lexer.mll @@ -75,7 +75,8 @@ type block_string_line_kind = let comment = ';' [^ '\n' '\r']* let newline = '\r'? '\n' let blank = [' ' '\t' '\012'] -let atom_char = [^ ';' '(' ')' '"' ' ' '\t' '\r' '\n' '\012'] +let atom_char = + [^ ';' '(' ')' '"' ' ' '\t' '\r' '\n' '\000'-'\032' '\127'-'\255'] let digit = ['0'-'9'] let hexdigit = ['0'-'9' 'a'-'f' 'A'-'F'] diff --git a/src/usexp/usexp.ml b/src/usexp/usexp.ml index 11bfd71d..48a3dd32 100644 --- a/src/usexp/usexp.ml +++ b/src/usexp/usexp.ml @@ -17,7 +17,7 @@ module Atom = struct let rec loop s i len = i = len || match String.unsafe_get s i with - | '"' | '(' | ')' | ';' | ' ' | '\t' | '\n' | '\012' | '\r' -> false + | '"' | '(' | ')' | ';' | '\000'..'\032' | '\127'..'\255' -> false | _ -> loop s (i + 1) len in fun s -> @@ -52,19 +52,8 @@ let atom s = let unsafe_atom_of_string s = Atom(A s) -let should_be_atom = - let rec loop s i len = - i = len || - match String.unsafe_get s i with - | '"' | '(' | ')' | ';' | '\000'..'\032' | '\127'..'\255' -> false - | _ -> loop s (i + 1) len - in - fun s -> - let len = String.length s in - len > 0 && loop s 0 len - let atom_or_quoted_string s = - if should_be_atom s then Atom (A s) + if Atom.is_valid s then Atom (A s) else Quoted_string s let quote_length s = @@ -260,7 +249,7 @@ module Ast = struct | List of Loc.t * t list let atom_or_quoted_string loc s = - if should_be_atom s then Atom (loc, A s) + if Atom.is_valid s then Atom (loc, A s) else Quoted_string (loc, s) let loc (Atom (loc, _) | Quoted_string (loc, _) | List (loc, _)) = loc From 5dcefb2e5b031966860be852d5ffaefa0a959e22 Mon Sep 17 00:00:00 2001 From: Rudi Grinberg Date: Fri, 15 Jun 2018 15:05:09 +0700 Subject: [PATCH 2/6] Forbid '%' from appearing in atoms Signed-off-by: Rudi Grinberg --- src/usexp/lexer.mll | 2 +- src/usexp/usexp.ml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/usexp/lexer.mll b/src/usexp/lexer.mll index c53bab93..eab1eaf9 100644 --- a/src/usexp/lexer.mll +++ b/src/usexp/lexer.mll @@ -76,7 +76,7 @@ let comment = ';' [^ '\n' '\r']* let newline = '\r'? '\n' let blank = [' ' '\t' '\012'] let atom_char = - [^ ';' '(' ')' '"' ' ' '\t' '\r' '\n' '\000'-'\032' '\127'-'\255'] + [^ '%' ';' '(' ')' '"' ' ' '\t' '\r' '\n' '\000'-'\032' '\127'-'\255'] let digit = ['0'-'9'] let hexdigit = ['0'-'9' 'a'-'f' 'A'-'F'] diff --git a/src/usexp/usexp.ml b/src/usexp/usexp.ml index 48a3dd32..684b3751 100644 --- a/src/usexp/usexp.ml +++ b/src/usexp/usexp.ml @@ -17,7 +17,7 @@ module Atom = struct let rec loop s i len = i = len || match String.unsafe_get s i with - | '"' | '(' | ')' | ';' | '\000'..'\032' | '\127'..'\255' -> false + | '%' | '"' | '(' | ')' | ';' | '\000'..'\032' | '\127'..'\255' -> false | _ -> loop s (i + 1) len in fun s -> From 19908abf4df0af574b44abcc5aedd02e789abe4a Mon Sep 17 00:00:00 2001 From: Rudi Grinberg Date: Tue, 19 Jun 2018 00:59:48 +0700 Subject: [PATCH 3/6] Add '%' back to the atom char set Signed-off-by: Rudi Grinberg --- src/usexp/lexer.mll | 2 +- src/usexp/usexp.ml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/usexp/lexer.mll b/src/usexp/lexer.mll index eab1eaf9..c53bab93 100644 --- a/src/usexp/lexer.mll +++ b/src/usexp/lexer.mll @@ -76,7 +76,7 @@ let comment = ';' [^ '\n' '\r']* let newline = '\r'? '\n' let blank = [' ' '\t' '\012'] let atom_char = - [^ '%' ';' '(' ')' '"' ' ' '\t' '\r' '\n' '\000'-'\032' '\127'-'\255'] + [^ ';' '(' ')' '"' ' ' '\t' '\r' '\n' '\000'-'\032' '\127'-'\255'] let digit = ['0'-'9'] let hexdigit = ['0'-'9' 'a'-'f' 'A'-'F'] diff --git a/src/usexp/usexp.ml b/src/usexp/usexp.ml index 684b3751..48a3dd32 100644 --- a/src/usexp/usexp.ml +++ b/src/usexp/usexp.ml @@ -17,7 +17,7 @@ module Atom = struct let rec loop s i len = i = len || match String.unsafe_get s i with - | '%' | '"' | '(' | ')' | ';' | '\000'..'\032' | '\127'..'\255' -> false + | '"' | '(' | ')' | ';' | '\000'..'\032' | '\127'..'\255' -> false | _ -> loop s (i + 1) len in fun s -> From ec8860d0be80b02052a5113588469158291fdfb1 Mon Sep 17 00:00:00 2001 From: Rudi Grinberg Date: Tue, 19 Jun 2018 01:00:50 +0700 Subject: [PATCH 4/6] Add more tests with regards to escaping Signed-off-by: Rudi Grinberg --- test/unit-tests/sexp.mlt | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/test/unit-tests/sexp.mlt b/test/unit-tests/sexp.mlt index 77a4f2a0..a460ec76 100644 --- a/test/unit-tests/sexp.mlt +++ b/test/unit-tests/sexp.mlt @@ -137,3 +137,28 @@ parse {|"$bar%foo%"|} [%%expect{| - : parse_result = Same (Ok ["$bar%foo%"]) |}] + +parse {|\%{foo}|} +[%%expect{| +- : parse_result = Same (Ok [\%{foo}]) +|}] + +parse {|\${foo}|} +[%%expect{| +- : parse_result = Same (Ok [\${foo}]) +|}] + +parse {|\$bar%foo%|} +[%%expect{| +- : parse_result = Same (Ok [\$bar%foo%]) +|}] + +parse {|\$bar\%foo%|} +[%%expect{| +- : parse_result = Same (Ok [\$bar\%foo%]) +|}] + +parse {|\$bar\%foo%{bar}|} +[%%expect{| +- : parse_result = Same (Ok [\$bar\%foo%{bar}]) +|}] From 8432ee42cf43eb1b0c300f189c4db459719771a6 Mon Sep 17 00:00:00 2001 From: Rudi Grinberg Date: Tue, 19 Jun 2018 01:14:19 +0700 Subject: [PATCH 5/6] Separate atom_char for jbuild and dune files Dune files have a more strict definition of atoms Signed-off-by: Rudi Grinberg --- src/usexp/lexer.mll | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/usexp/lexer.mll b/src/usexp/lexer.mll index c53bab93..3df3e106 100644 --- a/src/usexp/lexer.mll +++ b/src/usexp/lexer.mll @@ -75,11 +75,14 @@ type block_string_line_kind = let comment = ';' [^ '\n' '\r']* let newline = '\r'? '\n' let blank = [' ' '\t' '\012'] -let atom_char = - [^ ';' '(' ')' '"' ' ' '\t' '\r' '\n' '\000'-'\032' '\127'-'\255'] let digit = ['0'-'9'] let hexdigit = ['0'-'9' 'a'-'f' 'A'-'F'] +let atom_char_jbuild = + [^ ';' '(' ')' '"' ' ' '\t' '\r' '\n' '\012'] +let atom_char_dune = + [^ ';' '(' ')' '"' ' ' '\t' '\r' '\n' '\000'-'\032' '\127'-'\255'] + (* rule for jbuild files *) rule jbuild_token = parse | newline @@ -117,7 +120,7 @@ and jbuild_atom acc start = parse { lexbuf.lex_start_p <- start; error lexbuf "jbuild_atoms cannot contain |#" } - | ('#'+ | '|'+ | (atom_char # ['|' '#'])) as s + | ('#'+ | '|'+ | (atom_char_jbuild # ['|' '#'])) as s { jbuild_atom (if acc = "" then s else acc ^ s) start lexbuf } | "" @@ -244,7 +247,7 @@ and token = parse lexbuf.lex_start_p <- start; Quoted_string s } - | atom_char+ as s + | atom_char_dune+ as s { Token.Atom (A s) } | eof { Eof } From 63c170304597ab983ee91f8df228525b63b048a7 Mon Sep 17 00:00:00 2001 From: Rudi Grinberg Date: Tue, 19 Jun 2018 15:40:36 +0700 Subject: [PATCH 6/6] Simplify the dune atom char set Signed-off-by: Rudi Grinberg --- src/usexp/lexer.mll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/usexp/lexer.mll b/src/usexp/lexer.mll index 3df3e106..69912d31 100644 --- a/src/usexp/lexer.mll +++ b/src/usexp/lexer.mll @@ -81,7 +81,7 @@ let hexdigit = ['0'-'9' 'a'-'f' 'A'-'F'] let atom_char_jbuild = [^ ';' '(' ')' '"' ' ' '\t' '\r' '\n' '\012'] let atom_char_dune = - [^ ';' '(' ')' '"' ' ' '\t' '\r' '\n' '\000'-'\032' '\127'-'\255'] + [^ ';' '(' ')' '"' '\000'-'\032' '\127'-'\255'] (* rule for jbuild files *) rule jbuild_token = parse