From cd6d5dba53efd0d5037d8a55affc7b6d52f7fc80 Mon Sep 17 00:00:00 2001 From: Rudi Grinberg Date: Wed, 20 Jun 2018 20:00:35 +0630 Subject: [PATCH] Simplify jbuild lexer and move types around The types should exist in the lexer module where they're used Signed-off-by: Rudi Grinberg --- src/usexp/dune_lexer.mll | 6 +++++- src/usexp/jbuild_lexer.mll | 25 ++++++++++++++++--------- src/usexp/lexer0.ml | 14 -------------- src/usexp/lexer0.mli | 9 --------- 4 files changed, 21 insertions(+), 33 deletions(-) diff --git a/src/usexp/dune_lexer.mll b/src/usexp/dune_lexer.mll index e6e4dcf0..2c3a10a2 100644 --- a/src/usexp/dune_lexer.mll +++ b/src/usexp/dune_lexer.mll @@ -1,5 +1,9 @@ { - open Lexer0 +open Lexer0 + +type block_string_line_kind = + | With_escape_sequences + | Raw } let comment = ';' [^ '\n' '\r']* diff --git a/src/usexp/jbuild_lexer.mll b/src/usexp/jbuild_lexer.mll index 4a2d546e..4cfade63 100644 --- a/src/usexp/jbuild_lexer.mll +++ b/src/usexp/jbuild_lexer.mll @@ -1,5 +1,14 @@ { open Lexer0 + +(* The difference between the old and new syntax is that the old + syntax allows backslash following by any characters other than 'n', + 'x', ... and interpret it as it. The new syntax is stricter in + order to allow introducing new escape sequence in the future if + needed. *) +type escape_mode = + | In_block_comment (* Inside #|...|# comments (old syntax) *) + | In_quoted_string } let comment = ';' [^ '\n' '\r']* @@ -24,7 +33,7 @@ rule token = parse | '"' { Buffer.clear escaped_buf; let start = Lexing.lexeme_start_p lexbuf in - let s = quoted_string Old_syntax lexbuf in + let s = quoted_string In_quoted_string lexbuf in lexbuf.lex_start_p <- start; Quoted_string s } @@ -77,7 +86,7 @@ and quoted_string mode = parse quoted_string mode lexbuf } | eof - { if mode <> In_block_comment then + { if mode = In_block_comment then error lexbuf "unterminated quoted string"; Buffer.contents escaped_buf } @@ -120,14 +129,14 @@ and escape_sequence mode = parse } | (digit as c1) (digit as c2) (digit as c3) { let v = eval_decimal_escape c1 c2 c3 in - if mode <> In_block_comment && v > 255 then + if mode = In_quoted_string && v > 255 then error lexbuf "escape sequence in quoted string out of range" ~delta:(-1); Buffer.add_char escaped_buf (Char.chr v); Other } | digit* as s - { if mode <> In_block_comment then + { if mode = In_quoted_string then error lexbuf "unterminated decimal escape sequence" ~delta:(-1); Buffer.add_char escaped_buf '\\'; Buffer.add_string escaped_buf s; @@ -139,21 +148,19 @@ and escape_sequence mode = parse Other } | 'x' hexdigit* as s - { if mode <> In_block_comment then + { if mode = In_quoted_string then error lexbuf "unterminated hexadecimal escape sequence" ~delta:(-1); Buffer.add_char escaped_buf '\\'; Buffer.add_string escaped_buf s; Other } | _ as c - { if mode = New_syntax then - error lexbuf "unknown escape sequence" ~delta:(-1); - Buffer.add_char escaped_buf '\\'; + { Buffer.add_char escaped_buf '\\'; Buffer.add_char escaped_buf c; Other } | eof - { if mode <> In_block_comment then + { if mode = In_quoted_string then error lexbuf "unterminated escape sequence" ~delta:(-1); Other } diff --git a/src/usexp/lexer0.ml b/src/usexp/lexer0.ml index 5ebc545a..b361acc2 100644 --- a/src/usexp/lexer0.ml +++ b/src/usexp/lexer0.ml @@ -31,24 +31,10 @@ let error ?(delta=0) lexbuf message = let escaped_buf = Buffer.create 256 -(* The difference between the old and new syntax is that the old - syntax allows backslash following by any characters other than 'n', - 'x', ... and interpret it as it. The new syntax is stricter in - order to allow introducing new escape sequence in the future if - needed. *) -type escape_mode = - | In_block_comment (* Inside #|...|# comments (old syntax) *) - | Old_syntax - | New_syntax - type escape_sequence = | Newline | Other -type block_string_line_kind = - | With_escape_sequences - | Raw - let eval_decimal_char c = Char.code c - Char.code '0' let eval_decimal_escape c1 c2 c3 = diff --git a/src/usexp/lexer0.mli b/src/usexp/lexer0.mli index fc15c06a..b442888d 100644 --- a/src/usexp/lexer0.mli +++ b/src/usexp/lexer0.mli @@ -24,19 +24,10 @@ val escaped_buf : Buffer.t exception Error of Error.t -type escape_mode = - | In_block_comment (* Inside #|...|# comments (old syntax) *) - | Old_syntax - | New_syntax - type escape_sequence = | Newline | Other -type block_string_line_kind = - | With_escape_sequences - | Raw - val eval_decimal_char : char -> int val eval_decimal_escape : char -> char -> char -> int