Strengthen the lexing of escape sequences (#872)
Things like \a are no longer allowed. Before they would be interpreted as a literal \a. This will allow to introduce new escape sequences in the future if needed. Signed-off-by: Jeremie Dimino <jeremie@dimino.org>
This commit is contained in:
parent
58a47e4ff8
commit
8727eb6c3c
|
@ -33,6 +33,16 @@ let error ?(delta=0) lexbuf message =
|
||||||
; message
|
; message
|
||||||
})
|
})
|
||||||
|
|
||||||
|
(* The difference between the old and new syntax is that the old
|
||||||
|
syntax allows backslash following by any characters other than 'n',
|
||||||
|
'x', ... and interpret it as it. The new syntax is stricter in
|
||||||
|
order to allow introducing new escape sequence in the future if
|
||||||
|
needed. *)
|
||||||
|
type escape_mode =
|
||||||
|
| In_block_comment (* Inside #|...|# comments (old syntax) *)
|
||||||
|
| Old_syntax
|
||||||
|
| New_syntax
|
||||||
|
|
||||||
let eval_decimal_char c = Char.code c - Char.code '0'
|
let eval_decimal_char c = Char.code c - Char.code '0'
|
||||||
|
|
||||||
let eval_decimal_escape c1 c2 c3 =
|
let eval_decimal_escape c1 c2 c3 =
|
||||||
|
@ -82,7 +92,7 @@ rule jbuild_token = parse
|
||||||
| '"'
|
| '"'
|
||||||
{ Buffer.clear escaped_buf;
|
{ Buffer.clear escaped_buf;
|
||||||
let start = Lexing.lexeme_start_p lexbuf in
|
let start = Lexing.lexeme_start_p lexbuf in
|
||||||
let s = quoted_string true lexbuf in
|
let s = quoted_string Old_syntax lexbuf in
|
||||||
lexbuf.lex_start_p <- start;
|
lexbuf.lex_start_p <- start;
|
||||||
Quoted_string s
|
Quoted_string s
|
||||||
}
|
}
|
||||||
|
@ -117,35 +127,34 @@ and jbuild_atom acc start = parse
|
||||||
Token.Atom (A acc)
|
Token.Atom (A acc)
|
||||||
}
|
}
|
||||||
|
|
||||||
(* If [strict] is false, ignore errors *)
|
and quoted_string mode = parse
|
||||||
and quoted_string strict = parse
|
|
||||||
| '"'
|
| '"'
|
||||||
{ Buffer.contents escaped_buf }
|
{ Buffer.contents escaped_buf }
|
||||||
| '\\'
|
| '\\'
|
||||||
{ match escape_sequence strict lexbuf with
|
{ match escape_sequence mode lexbuf with
|
||||||
| Newline -> quoted_string_after_escaped_newline strict lexbuf
|
| Newline -> quoted_string_after_escaped_newline mode lexbuf
|
||||||
| Other -> quoted_string strict lexbuf
|
| Other -> quoted_string mode lexbuf
|
||||||
}
|
}
|
||||||
| newline as s
|
| newline as s
|
||||||
{ Lexing.new_line lexbuf;
|
{ Lexing.new_line lexbuf;
|
||||||
Buffer.add_string escaped_buf s;
|
Buffer.add_string escaped_buf s;
|
||||||
quoted_string strict lexbuf
|
quoted_string mode lexbuf
|
||||||
}
|
}
|
||||||
| _ as c
|
| _ as c
|
||||||
{ Buffer.add_char escaped_buf c;
|
{ Buffer.add_char escaped_buf c;
|
||||||
quoted_string strict lexbuf
|
quoted_string mode lexbuf
|
||||||
}
|
}
|
||||||
| eof
|
| eof
|
||||||
{ if strict then
|
{ if mode <> In_block_comment then
|
||||||
error lexbuf "unterminated quoted string";
|
error lexbuf "unterminated quoted string";
|
||||||
Buffer.contents escaped_buf
|
Buffer.contents escaped_buf
|
||||||
}
|
}
|
||||||
|
|
||||||
and quoted_string_after_escaped_newline strict = parse
|
and quoted_string_after_escaped_newline mode = parse
|
||||||
| [' ' '\t']*
|
| [' ' '\t']*
|
||||||
{ quoted_string strict lexbuf }
|
{ quoted_string mode lexbuf }
|
||||||
|
|
||||||
and escape_sequence strict = parse
|
and escape_sequence mode = parse
|
||||||
| newline
|
| newline
|
||||||
{ Lexing.new_line lexbuf;
|
{ Lexing.new_line lexbuf;
|
||||||
Newline }
|
Newline }
|
||||||
|
@ -163,14 +172,14 @@ and escape_sequence strict = parse
|
||||||
}
|
}
|
||||||
| (digit as c1) (digit as c2) (digit as c3)
|
| (digit as c1) (digit as c2) (digit as c3)
|
||||||
{ let v = eval_decimal_escape c1 c2 c3 in
|
{ let v = eval_decimal_escape c1 c2 c3 in
|
||||||
if strict && v > 255 then
|
if mode <> In_block_comment && v > 255 then
|
||||||
error lexbuf "escape sequence in quoted string out of range"
|
error lexbuf "escape sequence in quoted string out of range"
|
||||||
~delta:(-1);
|
~delta:(-1);
|
||||||
Buffer.add_char escaped_buf (Char.chr v);
|
Buffer.add_char escaped_buf (Char.chr v);
|
||||||
Other
|
Other
|
||||||
}
|
}
|
||||||
| digit* as s
|
| digit* as s
|
||||||
{ if strict then
|
{ if mode <> In_block_comment then
|
||||||
error lexbuf "unterminated decimal escape sequence" ~delta:(-1);
|
error lexbuf "unterminated decimal escape sequence" ~delta:(-1);
|
||||||
Buffer.add_char escaped_buf '\\';
|
Buffer.add_char escaped_buf '\\';
|
||||||
Buffer.add_string escaped_buf s;
|
Buffer.add_string escaped_buf s;
|
||||||
|
@ -182,19 +191,21 @@ and escape_sequence strict = parse
|
||||||
Other
|
Other
|
||||||
}
|
}
|
||||||
| 'x' hexdigit* as s
|
| 'x' hexdigit* as s
|
||||||
{ if strict then
|
{ if mode <> In_block_comment then
|
||||||
error lexbuf "unterminated hexadecimal escape sequence" ~delta:(-1);
|
error lexbuf "unterminated hexadecimal escape sequence" ~delta:(-1);
|
||||||
Buffer.add_char escaped_buf '\\';
|
Buffer.add_char escaped_buf '\\';
|
||||||
Buffer.add_string escaped_buf s;
|
Buffer.add_string escaped_buf s;
|
||||||
Other
|
Other
|
||||||
}
|
}
|
||||||
| _ as c
|
| _ as c
|
||||||
{ Buffer.add_char escaped_buf '\\';
|
{ if mode = New_syntax then
|
||||||
|
error lexbuf "unknown escape sequence" ~delta:(-1);
|
||||||
|
Buffer.add_char escaped_buf '\\';
|
||||||
Buffer.add_char escaped_buf c;
|
Buffer.add_char escaped_buf c;
|
||||||
Other
|
Other
|
||||||
}
|
}
|
||||||
| eof
|
| eof
|
||||||
{ if strict then
|
{ if mode <> In_block_comment then
|
||||||
error lexbuf "unterminated escape sequence" ~delta:(-1);
|
error lexbuf "unterminated escape sequence" ~delta:(-1);
|
||||||
Other
|
Other
|
||||||
}
|
}
|
||||||
|
@ -202,7 +213,7 @@ and escape_sequence strict = parse
|
||||||
and jbuild_block_comment = parse
|
and jbuild_block_comment = parse
|
||||||
| '"'
|
| '"'
|
||||||
{ Buffer.clear escaped_buf;
|
{ Buffer.clear escaped_buf;
|
||||||
ignore (quoted_string false lexbuf : string);
|
ignore (quoted_string In_block_comment lexbuf : string);
|
||||||
jbuild_block_comment lexbuf
|
jbuild_block_comment lexbuf
|
||||||
}
|
}
|
||||||
| "|#"
|
| "|#"
|
||||||
|
@ -243,7 +254,7 @@ and dune_quoted_string = parse
|
||||||
| "\\>"
|
| "\\>"
|
||||||
{ block_string_start Raw lexbuf }
|
{ block_string_start Raw lexbuf }
|
||||||
| ""
|
| ""
|
||||||
{ quoted_string true lexbuf }
|
{ quoted_string New_syntax lexbuf }
|
||||||
|
|
||||||
and block_string_start kind = parse
|
and block_string_start kind = parse
|
||||||
| newline as s
|
| newline as s
|
||||||
|
@ -270,7 +281,7 @@ and block_string = parse
|
||||||
block_string_after_newline lexbuf
|
block_string_after_newline lexbuf
|
||||||
}
|
}
|
||||||
| '\\'
|
| '\\'
|
||||||
{ match escape_sequence true lexbuf with
|
{ match escape_sequence New_syntax lexbuf with
|
||||||
| Newline -> block_string_after_newline lexbuf
|
| Newline -> block_string_after_newline lexbuf
|
||||||
| Other -> block_string lexbuf
|
| Other -> block_string lexbuf
|
||||||
}
|
}
|
||||||
|
|
|
@ -95,3 +95,15 @@ parse {|x|#y|}
|
||||||
- : parse_result =
|
- : parse_result =
|
||||||
Different {jbuild = Error "jbuild_atoms cannot contain |#"; dune = Ok [x|#y]}
|
Different {jbuild = Error "jbuild_atoms cannot contain |#"; dune = Ok [x|#y]}
|
||||||
|}]
|
|}]
|
||||||
|
|
||||||
|
parse {|"\a"|}
|
||||||
|
[%%expect{|
|
||||||
|
- : parse_result =
|
||||||
|
Different {jbuild = Ok ["\\a"]; dune = Error "unknown escape sequence"}
|
||||||
|
|}]
|
||||||
|
|
||||||
|
parse {|"\%{x}"|}
|
||||||
|
[%%expect{|
|
||||||
|
- : parse_result =
|
||||||
|
Different {jbuild = Ok ["\\%{x}"]; dune = Error "unknown escape sequence"}
|
||||||
|
|}]
|
||||||
|
|
Loading…
Reference in New Issue