dune/src/sexp_lexer.mll

180 lines
4.4 KiB
OCaml

{
type stack =
| Empty
| Open of Lexing.position * stack
| Sexp of Sexp.t * Sexp.Locs.t * stack
let error = Loc.fail_lex
let make_list =
let rec loop lexbuf acc acc_locs = function
| Empty ->
error lexbuf "right parenthesis without matching left parenthesis"
| Open (start, stack) ->
Sexp (List acc,
List ({ start; stop = Lexing.lexeme_end_p lexbuf }, acc_locs),
stack)
| Sexp (sexp, locs, stack) -> loop lexbuf (sexp :: acc) (locs :: acc_locs) stack
in
fun lexbuf stack -> loop lexbuf [] [] stack
let new_sexp loop stack lexbuf =
match stack with
| Sexp (sexp, locs, Empty) -> Some (sexp, locs)
| _ -> loop stack lexbuf
let atom_loc lexbuf : Sexp.Locs.t =
Atom
{ start = Lexing.lexeme_start_p lexbuf
; stop = Lexing.lexeme_end_p lexbuf
}
let char_for_backslash = function
| 'n' -> '\010'
| 'r' -> '\013'
| 'b' -> '\008'
| 't' -> '\009'
| c -> c
let dec_code c1 c2 c3 =
100 * (Char.code c1 - 48) + 10 * (Char.code c2 - 48) + (Char.code c3 - 48)
let hex_code c1 c2 =
let d1 = Char.code c1 in
let val1 =
if d1 >= 97 then d1 - 87
else if d1 >= 65 then d1 - 55
else d1 - 48 in
let d2 = Char.code c2 in
let val2 =
if d2 >= 97 then d2 - 87
else if d2 >= 65 then d2 - 55
else d2 - 48 in
val1 * 16 + val2
let escaped_buf = Buffer.create 256
}
let lf = '\010'
let lf_cr = ['\010' '\013']
let dos_newline = "\013\010"
let blank = [' ' '\009' '\012']
let unquoted = [^ ';' '(' ')' '"'] # blank # lf_cr
let digit = ['0'-'9']
let hexdigit = ['0'-'9' 'a'-'f' 'A'-'F']
rule main stack = parse
| lf | dos_newline
{ Lexing.new_line lexbuf; main stack lexbuf }
| blank+
{ main stack lexbuf }
| (';' (_ # lf_cr)*)
{ main stack lexbuf }
| '('
{ main (Open (Lexing.lexeme_start_p lexbuf, stack)) lexbuf }
| ')'
{ new_sexp main (make_list lexbuf stack) lexbuf }
| '"'
{ Buffer.clear escaped_buf;
scan_string escaped_buf (Lexing.lexeme_start_p lexbuf) stack lexbuf
}
| unquoted* as s
{ new_sexp main (Sexp (Atom s, atom_loc lexbuf, stack)) lexbuf }
| eof
{ match stack with
| Empty -> None
| _ -> error lexbuf "unterminated s-expression" }
| _
{ error lexbuf "syntax error" }
and scan_string buf start stack = parse
| '"'
{ new_sexp main
(Sexp (Atom (Buffer.contents buf),
Atom { start; stop = Lexing.lexeme_end_p lexbuf },
stack))
lexbuf
}
| '\\' lf
{
Lexing.new_line lexbuf;
scan_string_after_escaped_newline buf start stack lexbuf
}
| '\\' dos_newline
{
Lexing.new_line lexbuf;
scan_string_after_escaped_newline buf start stack lexbuf
}
| '\\' (['\\' '\'' '"' 'n' 't' 'b' 'r' ' '] as c)
{
Buffer.add_char buf (char_for_backslash c);
scan_string buf start stack lexbuf
}
| '\\' (digit as c1) (digit as c2) (digit as c3)
{
let v = dec_code c1 c2 c3 in
if v > 255 then error lexbuf "illegal escape";
Buffer.add_char buf (Char.chr v);
scan_string buf start stack lexbuf
}
| '\\' 'x' (hexdigit as c1) (hexdigit as c2)
{
let v = hex_code c1 c2 in
Buffer.add_char buf (Char.chr v);
scan_string buf start stack lexbuf
}
| '\\' (_ as c)
{
Buffer.add_char buf '\\';
Buffer.add_char buf c;
scan_string buf start stack lexbuf
}
| lf
{
Lexing.new_line lexbuf;
Buffer.add_char buf '\n';
scan_string buf start stack lexbuf
}
| ([^ '\\' '"'] # lf)+ as s
{
Buffer.add_string buf s;
scan_string buf start stack lexbuf
}
| eof
{
error lexbuf "unterminated string"
}
and scan_string_after_escaped_newline buf start stack = parse
| [' ' '\t']*
{ scan_string buf start stack lexbuf }
| ""
{ scan_string buf start stack lexbuf }
and trailing = parse
| lf | dos_newline
{ Lexing.new_line lexbuf; trailing lexbuf }
| blank+
{ trailing lexbuf }
| (';' (_ # lf_cr)*)
{ trailing lexbuf }
| eof
{ () }
| _
{ error lexbuf "garbage after s-expression" }
{
let single lexbuf =
match main Empty lexbuf with
| None -> error lexbuf "no s-expression found"
| Some sexp -> trailing lexbuf; sexp
let many lexbuf =
let rec loop acc =
match main Empty lexbuf with
| None -> List.rev acc
| Some sexp -> loop (sexp :: acc)
in
loop []
}