Simplify the parser
Replace the current generated parser by an ocamllex lexer + a simple parser. The new code is: - much simpler and smaller - shouldn't cause ocamlopt to stack overflow anymore on BSD systems - slightly slower but not that much Signed-off-by: Jeremie Dimino <jdimino@janestreet.com>
This commit is contained in:
parent
115ee93dd6
commit
39e74826f4
3
Makefile
3
Makefile
|
@ -54,9 +54,6 @@ livedoc:
|
|||
update-jbuilds: $(BIN)
|
||||
$(BIN) build --dev @doc/runtest --auto-promote
|
||||
|
||||
update-sexp-parser:
|
||||
$(BIN) build --dev @update-sexp-parser --auto-promote
|
||||
|
||||
.PHONY: default install uninstall reinstall clean test doc
|
||||
.PHONY: promote accept-corrections opam-release
|
||||
|
||||
|
|
2
src/dune
2
src/dune
|
@ -12,7 +12,7 @@
|
|||
ocaml_config))
|
||||
(synopsis "Internal Dune library, do not use!")))
|
||||
|
||||
(ocamllex (meta_lexer glob_lexer))
|
||||
(ocamllex (meta_lexer glob_lexer dune_lexer))
|
||||
|
||||
(rule
|
||||
((targets (setup.ml))
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
rule is_script = parse
|
||||
| "(* -*- tuareg -*- *)" { true }
|
||||
| "" { false }
|
|
@ -16,9 +16,6 @@ module Dune_file = struct
|
|||
| Plain x -> x.path
|
||||
| Ocaml_script p -> p
|
||||
|
||||
let ocaml_script_prefix = "(* -*- tuareg -*- *)"
|
||||
let ocaml_script_prefix_len = String.length ocaml_script_prefix
|
||||
|
||||
let extract_ignored_subdirs =
|
||||
let stanza =
|
||||
let open Sexp.Of_sexp in
|
||||
|
@ -50,39 +47,14 @@ module Dune_file = struct
|
|||
in
|
||||
(ignored_subdirs, sexps)
|
||||
|
||||
let load file =
|
||||
Io.with_file_in file ~f:(fun ic ->
|
||||
let open Sexp in
|
||||
let state = Parser.create ~fname:(Path.to_string file) ~mode:Many in
|
||||
let buf = Bytes.create Io.buf_len in
|
||||
let rec loop stack =
|
||||
match input ic buf 0 Io.buf_len with
|
||||
| 0 -> stack
|
||||
| n -> loop (Parser.feed_subbytes state buf ~pos:0 ~len:n stack)
|
||||
in
|
||||
let finish stack =
|
||||
let sexps = Parser.feed_eoi state stack in
|
||||
let load ?lexer file =
|
||||
Io.with_lexbuf_from_file file ~f:(fun lb ->
|
||||
if Dune_lexer.is_script lb then
|
||||
(Ocaml_script file, String.Set.empty)
|
||||
else
|
||||
let sexps = Usexp.Parser.parse lb ?lexer ~mode:Many in
|
||||
let ignored_subdirs, sexps = extract_ignored_subdirs sexps in
|
||||
(Plain { path = file; sexps },
|
||||
ignored_subdirs)
|
||||
in
|
||||
let rec loop0 stack i =
|
||||
match input ic buf i (Io.buf_len - i) with
|
||||
| 0 ->
|
||||
finish (Parser.feed_subbytes state buf ~pos:0 ~len:i stack)
|
||||
| n ->
|
||||
let i = i + n in
|
||||
if i < ocaml_script_prefix_len then
|
||||
loop0 stack i
|
||||
else if Bytes.sub_string buf 0 ocaml_script_prefix_len
|
||||
[@warning "-6"]
|
||||
= ocaml_script_prefix then
|
||||
(Ocaml_script file, String.Set.empty)
|
||||
else
|
||||
let stack = Parser.feed_subbytes state buf ~pos:0 ~len:i stack in
|
||||
finish (loop stack)
|
||||
in
|
||||
loop0 Parser.Stack.empty 0)
|
||||
(Plain { path = file; sexps }, ignored_subdirs))
|
||||
end
|
||||
|
||||
let load_jbuild_ignore path =
|
||||
|
|
|
@ -46,11 +46,17 @@ let report_with_backtrace exn =
|
|||
hint on candidates)
|
||||
in
|
||||
{ p with loc = Some loc; pp }
|
||||
| Usexp.Parser.Error e ->
|
||||
let pos = Usexp.Parser.Error.position e in
|
||||
let msg = Usexp.Parser.Error.message e in
|
||||
let pos = { pos with pos_fname = !map_fname pos.pos_fname } in
|
||||
let loc = { Loc. start = pos; stop = pos } in
|
||||
| Sexp.Parse_error e ->
|
||||
let loc = Sexp.Parse_error.loc e in
|
||||
let msg = Sexp.Parse_error.message e in
|
||||
let map_pos (pos : Lexing.position) =
|
||||
{ pos with pos_fname = !map_fname pos.pos_fname }
|
||||
in
|
||||
let loc : Loc.t =
|
||||
{ start = map_pos loc.start
|
||||
; stop = map_pos loc.stop
|
||||
}
|
||||
in
|
||||
{ p with
|
||||
loc = Some loc
|
||||
; pp = fun ppf -> Format.fprintf ppf "@{<error>Error@}: %s\n" msg
|
||||
|
|
|
@ -84,19 +84,11 @@ let buf_len = 65_536
|
|||
module Sexp = struct
|
||||
open Sexp
|
||||
|
||||
let load path ~mode =
|
||||
with_file_in path ~f:(fun ic ->
|
||||
let state = Parser.create ~fname:(Path.to_string path) ~mode in
|
||||
let buf = Bytes.create buf_len in
|
||||
let rec loop stack =
|
||||
match input ic buf 0 buf_len with
|
||||
| 0 -> Parser.feed_eoi state stack
|
||||
| n -> loop (Parser.feed_subbytes state buf ~pos:0 ~len:n stack)
|
||||
in
|
||||
loop Parser.Stack.empty)
|
||||
let load ?lexer path ~mode =
|
||||
with_lexbuf_from_file path ~f:(Usexp.Parser.parse ~mode ?lexer)
|
||||
|
||||
let load_many_as_one path =
|
||||
match load path ~mode:Many with
|
||||
let load_many_as_one ?lexer path =
|
||||
match load ?lexer path ~mode:Many with
|
||||
| [] -> Ast.List (Loc.in_file (Path.to_string path), [])
|
||||
| x :: l ->
|
||||
let last = Option.value (List.last l) ~default:x in
|
||||
|
|
|
@ -27,8 +27,8 @@ val copy_file : src:Path.t -> dst:Path.t -> unit
|
|||
val read_all : in_channel -> string
|
||||
|
||||
module Sexp : sig
|
||||
val load : Path.t -> mode:'a Sexp.Parser.Mode.t -> 'a
|
||||
val load_many_as_one : Path.t -> Sexp.Ast.t
|
||||
val load : ?lexer:Usexp.Lexer.t -> Path.t -> mode:'a Sexp.Parser.Mode.t -> 'a
|
||||
val load_many_as_one : ?lexer:Usexp.Lexer.t -> Path.t -> Sexp.Ast.t
|
||||
end
|
||||
|
||||
(**/**)
|
||||
|
|
|
@ -5,9 +5,4 @@
|
|||
(synopsis "[Internal] S-expression library")
|
||||
(public_name dune.usexp)))
|
||||
|
||||
(rule
|
||||
(with-stdout-to table.ml.gen (run gen/gen_parser_automaton.exe)))
|
||||
|
||||
(alias
|
||||
((name update-sexp-parser)
|
||||
(action (diff table.ml table.ml.gen))))
|
||||
(ocamllex (lexer))
|
||||
|
|
|
@ -1,4 +0,0 @@
|
|||
(executable
|
||||
((name gen_parser_automaton)
|
||||
(libraries (base stdio gen_parsexp_lib))
|
||||
(preprocess (pps (ppx_sexp_conv ppx_compare)))))
|
|
@ -1,112 +0,0 @@
|
|||
(* Parsing of S-expression. The parsing is written as an automaton for which
|
||||
we provide different implementations of actions.
|
||||
*)
|
||||
|
||||
open Base
|
||||
open Stdio
|
||||
open Gen_parsexp_lib.Automaton
|
||||
open Gen_parsexp_lib.Automaton.Table
|
||||
|
||||
(* Sharing of transitions *)
|
||||
module Sharing = struct
|
||||
let create_assign_id () =
|
||||
let cache = Hashtbl.Poly.create () in
|
||||
(cache,
|
||||
fun x ->
|
||||
if not (Hashtbl.mem cache x) then
|
||||
Hashtbl.add_exn cache ~key:x ~data:(Hashtbl.length cache))
|
||||
|
||||
let share (table : t) =
|
||||
let transitions, assign_transition_id =
|
||||
create_assign_id ()
|
||||
in
|
||||
let transitions_eoi, assign_transition_eoi_id =
|
||||
create_assign_id ()
|
||||
in
|
||||
Array.iter table.transitions ~f:assign_transition_id;
|
||||
Array.iter table.transitions_eoi ~f:assign_transition_eoi_id;
|
||||
(transitions, transitions_eoi)
|
||||
end
|
||||
|
||||
let gen_code oc (t : t) =
|
||||
let (named_transitions, named_transitions_eoi) = Sharing.share t in
|
||||
let pr fmt = Out_channel.fprintf oc (Caml.(^^) fmt "\n") in
|
||||
pr "(* generated by %s *)" Caml.Sys.argv.(0);
|
||||
pr "";
|
||||
pr "open Parser_automaton_internal";
|
||||
pr "";
|
||||
pr "let raise = Parser_automaton_internal.Error.raise";
|
||||
pr "";
|
||||
let ordered_ids tbl =
|
||||
Hashtbl.fold tbl ~init:[] ~f:(fun ~key:x ~data:id acc -> (id, x) :: acc)
|
||||
|> List.sort ~cmp:(fun (id1, _) (id2, _) -> compare id1 id2)
|
||||
in
|
||||
List.iter (ordered_ids named_transitions)
|
||||
~f:(fun (id, tr) ->
|
||||
match tr with
|
||||
| Error error ->
|
||||
pr "let tr_%02d _state _char _stack =" id;
|
||||
pr " raise _state ~at_eof:false %s" (Error.to_string error)
|
||||
| Ok { action = (eps_actions, action); goto; advance } ->
|
||||
let eps_actions =
|
||||
List.filter_map ~f:Epsilon_action.to_runtime_function eps_actions
|
||||
in
|
||||
let action = Action.to_runtime_function action in
|
||||
pr "let tr_%02d state %schar stack =" id
|
||||
(if Option.is_none action &&
|
||||
not ([%compare.equal: goto_state] goto End_block_comment) then
|
||||
"_"
|
||||
else
|
||||
"");
|
||||
List.iter eps_actions ~f:(pr " let stack = %s state stack in");
|
||||
(match action with
|
||||
| None -> ()
|
||||
| Some s -> pr " let stack = %s state char stack in" s);
|
||||
(match goto with
|
||||
| State n -> pr " set_automaton_state state %d;" n
|
||||
| End_block_comment ->
|
||||
pr " let stack = end_block_comment state char stack in";
|
||||
pr " set_automaton_state state \
|
||||
(if block_comment_depth state <> 0 then %d else %d);"
|
||||
(State.to_int (Block_comment Normal)) (State.to_int Whitespace));
|
||||
pr " %s state;"
|
||||
(match advance with
|
||||
| Advance -> "advance"
|
||||
| Advance_eol -> "advance_eol");
|
||||
pr " stack"
|
||||
);
|
||||
pr "";
|
||||
List.iter (ordered_ids named_transitions_eoi) ~f:(fun (id, tr) ->
|
||||
match tr with
|
||||
| Error error ->
|
||||
pr "let tr_eoi_%02d state _stack =" id;
|
||||
pr " raise state ~at_eof:true %s" (Error.to_string error)
|
||||
| Ok eps_actions ->
|
||||
pr "let tr_eoi_%02d state stack =" id;
|
||||
let eps_actions =
|
||||
List.filter_map eps_actions ~f:Epsilon_action.to_runtime_function
|
||||
in
|
||||
List.iter eps_actions ~f:(pr " let stack = %s state stack in");
|
||||
pr " eps_eoi_check state stack");
|
||||
pr "";
|
||||
let pr_table ?(per_line=1) suffix tbl ids =
|
||||
pr "let transitions%s =" suffix;
|
||||
let len = Array.length tbl in
|
||||
let lines = len / per_line in
|
||||
assert (per_line * lines = len);
|
||||
for l = 0 to lines - 1 do
|
||||
Out_channel.fprintf oc (if l = 0 then " [|" else " ;");
|
||||
for col = 0 to per_line - 1 do
|
||||
if col > 0 then Out_channel.fprintf oc ";";
|
||||
let i = l * per_line + col in
|
||||
Out_channel.fprintf oc " tr%s_%02d" suffix (Hashtbl.find_exn ids tbl.(i))
|
||||
done;
|
||||
Out_channel.fprintf oc "\n"
|
||||
done;
|
||||
pr " |]";
|
||||
pr "";
|
||||
in
|
||||
pr_table "" t.transitions named_transitions ~per_line:8;
|
||||
pr_table "_eoi" t.transitions_eoi named_transitions_eoi
|
||||
|
||||
let () = gen_code Caml.stdout table
|
|
@ -1,383 +0,0 @@
|
|||
(* Abstract version of the parsing automaton.
|
||||
|
||||
It is used in two places:
|
||||
|
||||
- to define the automaton. At runtime, we only use integer for states and a table of
|
||||
functions for transitions
|
||||
|
||||
- for tests
|
||||
*)
|
||||
|
||||
open Base
|
||||
|
||||
module State = struct
|
||||
module Quoted_string = struct
|
||||
type t =
|
||||
| Normal
|
||||
| After_backslash
|
||||
| After_backslash_cr
|
||||
| After_backslash_digit
|
||||
| After_backslash_2digits
|
||||
| After_backslash_x
|
||||
| After_backslash_x_hex
|
||||
| Ignoring_blanks
|
||||
[@@deriving enumerate, compare, sexp_of]
|
||||
end
|
||||
|
||||
module Block_comment = struct
|
||||
type t =
|
||||
| Normal
|
||||
| After_pipe
|
||||
| After_hash
|
||||
| Quoted_string of Quoted_string.t
|
||||
[@@deriving enumerate, compare, sexp_of]
|
||||
end
|
||||
|
||||
module Unquoted_string = struct
|
||||
type t =
|
||||
| Normal
|
||||
| After_hash
|
||||
| After_pipe
|
||||
[@@deriving enumerate, compare, sexp_of]
|
||||
end
|
||||
|
||||
type t =
|
||||
| Whitespace
|
||||
| Error
|
||||
| After_cr
|
||||
| Unquoted_string of Unquoted_string.t
|
||||
| Line_comment
|
||||
| After_hash
|
||||
| Quoted_string of Quoted_string.t
|
||||
| Block_comment of Block_comment.t
|
||||
[@@deriving enumerate, compare, sexp_of]
|
||||
|
||||
let to_int t =
|
||||
let rec loop i t l =
|
||||
match l with
|
||||
| [] -> assert false
|
||||
| x :: l -> if [%compare.equal: t] t x then i else loop (i + 1) t l
|
||||
in
|
||||
loop 0 t all
|
||||
|
||||
let of_int i = List.nth_exn all i
|
||||
|
||||
let count = List.length all
|
||||
|
||||
let initial = to_int Whitespace
|
||||
let () = assert (initial = 0) (* This is assumed in parser_automaton_internal.ml *)
|
||||
let () = assert (to_int Error = 1) (* This is assumed in parser_automaton_internal.ml *)
|
||||
|
||||
let old_parser_approx_cont_state = function
|
||||
| Whitespace -> "Parsing_toplevel_whitespace"
|
||||
| After_cr -> "Parsing_nested_whitespace"
|
||||
| Unquoted_string _
|
||||
| Quoted_string _ -> "Parsing_atom"
|
||||
| After_hash -> "Parsing_atom"
|
||||
| Block_comment _ -> "Parsing_block_comment"
|
||||
| Line_comment -> "Parsing_toplevel_whitespace"
|
||||
(* This cannot happen with the old parser so the result is a dummy value *)
|
||||
| Error -> "Parsing_toplevel_whitespace"
|
||||
end
|
||||
|
||||
module Error = struct
|
||||
(* Subset of the [Parser_automaton_internal.Public.Error.Reason.t] type *)
|
||||
type t =
|
||||
| Unexpected_char_parsing_hex_escape
|
||||
| Unexpected_char_parsing_dec_escape
|
||||
| Unterminated_quoted_string
|
||||
| Unterminated_block_comment
|
||||
| Comment_token_in_unquoted_atom
|
||||
| Unexpected_character_after_cr
|
||||
| Automaton_in_error_state
|
||||
[@@deriving compare, sexp_of, hash, variants]
|
||||
|
||||
let to_string = Variants.to_name
|
||||
end
|
||||
|
||||
(* Action associated to transitions. Actions correspond to the similarly named functions
|
||||
in ../parser_automaton_internal.mli. *)
|
||||
module Action = struct
|
||||
|
||||
type t =
|
||||
| Nop
|
||||
| Opening
|
||||
| Closing
|
||||
| Add_atom_char
|
||||
| Add_quoted_atom_char
|
||||
| Add_first_char
|
||||
| Add_escaped
|
||||
| Add_hex_escape_char
|
||||
| Add_dec_escape_char
|
||||
| Add_last_hex_escape_char
|
||||
| Add_last_dec_escape_char
|
||||
| Add_token_char
|
||||
| Comment_add_last_dec_escape_char
|
||||
| Push_quoted_atom
|
||||
| Start_quoted_string
|
||||
| Start_block_comment
|
||||
| Start_sexp_comment
|
||||
| Start_line_comment
|
||||
[@@deriving compare, sexp_of, hash, variants]
|
||||
|
||||
let to_runtime_function = function
|
||||
| Nop -> None
|
||||
| t -> Some (String.uncapitalize (Variants.to_name t))
|
||||
end
|
||||
|
||||
(* Action associated to epsilon transitions, i.e. transitions that do not consume a
|
||||
character.
|
||||
|
||||
Having epsilon actions makes the definition of the automaton much simpler. *)
|
||||
module Epsilon_action = struct
|
||||
type t =
|
||||
| Nop
|
||||
| Push_atom
|
||||
| Add_first_char_hash
|
||||
| Add_escaped_cr
|
||||
| End_line_comment
|
||||
[@@deriving compare, sexp_of, hash, variants]
|
||||
|
||||
let to_runtime_function = function
|
||||
| Nop -> None
|
||||
| End_line_comment -> Some "end_line_comment"
|
||||
| t -> Some ("eps_" ^ String.uncapitalize (Variants.to_name t))
|
||||
end
|
||||
|
||||
module Transition = struct
|
||||
type t =
|
||||
| T of Action.t * State.t
|
||||
| E of Epsilon_action.t * State.t
|
||||
| Error of Error.t
|
||||
| End_block_comment (* can't be a normal transition, as the new state isn't known
|
||||
statically *)
|
||||
[@@deriving compare]
|
||||
end
|
||||
|
||||
module Final_transition = struct
|
||||
type t =
|
||||
| Eoi_check
|
||||
| E of Epsilon_action.t * State.t
|
||||
| Error of Error.t
|
||||
end
|
||||
|
||||
module type Automaton = sig
|
||||
val transition : State.t * char -> Transition.t
|
||||
val transition_eoi : State.t -> Final_transition.t
|
||||
end
|
||||
|
||||
(* Definition of the automaton, compiled later to a transition table. *)
|
||||
module Automaton : Automaton = struct
|
||||
module Quoted_string_transition = struct
|
||||
type t =
|
||||
| T of Action.t * State.Quoted_string.t
|
||||
| E of Epsilon_action.t * State.Quoted_string.t
|
||||
| Error of Error.t
|
||||
| End_of_quoted_string
|
||||
end
|
||||
|
||||
type context = In_block_comment | In_atom
|
||||
|
||||
let quoted_string_transition
|
||||
: context -> State.Quoted_string.t * char -> Quoted_string_transition.t
|
||||
= fun context x ->
|
||||
(* Distinguising atom and block comments is to optimize block comments. But
|
||||
we musn't optimize the exception on things like \321. *)
|
||||
let if_atom then_ else_ : Action.t =
|
||||
match context with
|
||||
| In_atom -> then_
|
||||
| In_block_comment -> else_
|
||||
in
|
||||
let if_atom_eps then_ else_ : Epsilon_action.t =
|
||||
match context with
|
||||
| In_atom -> then_
|
||||
| In_block_comment -> else_
|
||||
in
|
||||
match x with
|
||||
| Normal, '"' -> End_of_quoted_string
|
||||
| Normal, '\\' -> T (Add_token_char, After_backslash)
|
||||
| Normal, _ -> T (if_atom Add_quoted_atom_char Add_token_char, Normal)
|
||||
|
||||
| After_backslash, '\n' ->
|
||||
T (Add_token_char, Ignoring_blanks)
|
||||
| After_backslash, '\r' ->
|
||||
T (Add_token_char, After_backslash_cr)
|
||||
| After_backslash, 'x' ->
|
||||
T (Add_token_char, After_backslash_x)
|
||||
| After_backslash, '0'..'9' ->
|
||||
T (Add_dec_escape_char, After_backslash_digit)
|
||||
| After_backslash, _ ->
|
||||
T (if_atom Add_escaped Add_token_char, Normal)
|
||||
|
||||
| After_backslash_cr, '\n' ->
|
||||
T (Add_token_char, Ignoring_blanks)
|
||||
| After_backslash_cr, _ ->
|
||||
E (if_atom_eps Add_escaped_cr Nop, Normal)
|
||||
|
||||
| After_backslash_x, ('0'..'9' | 'a'..'f' | 'A'..'F') ->
|
||||
T (if_atom Add_hex_escape_char Add_token_char , After_backslash_x_hex)
|
||||
| After_backslash_x, _ ->
|
||||
Error Unexpected_char_parsing_hex_escape
|
||||
|
||||
| After_backslash_x_hex, ('0'..'9' | 'a'..'f' | 'A'..'F') ->
|
||||
T (if_atom Add_last_hex_escape_char Add_token_char, Normal)
|
||||
| After_backslash_x_hex, _ ->
|
||||
Error Unexpected_char_parsing_hex_escape
|
||||
|
||||
| After_backslash_digit, '0'..'9' ->
|
||||
T (Add_dec_escape_char, After_backslash_2digits)
|
||||
| After_backslash_digit, _ ->
|
||||
Error Unexpected_char_parsing_dec_escape
|
||||
|
||||
| After_backslash_2digits, '0'..'9' ->
|
||||
T (if_atom Add_last_dec_escape_char Comment_add_last_dec_escape_char, Normal)
|
||||
| After_backslash_2digits, _ ->
|
||||
Error Unexpected_char_parsing_dec_escape
|
||||
|
||||
| Ignoring_blanks, (' '|'\t') ->
|
||||
T (Add_token_char, Ignoring_blanks)
|
||||
| Ignoring_blanks, _ ->
|
||||
E (Nop, Normal)
|
||||
|
||||
module Block_comment_transition = struct
|
||||
type t =
|
||||
| T of Action.t * State.Block_comment.t
|
||||
| E of Epsilon_action.t * State.Block_comment.t
|
||||
| Error of Error.t
|
||||
| End_comment
|
||||
end
|
||||
|
||||
let block_comment_transition
|
||||
: State.Block_comment.t * char -> Block_comment_transition.t
|
||||
= function
|
||||
| Quoted_string state, c -> (
|
||||
match quoted_string_transition In_block_comment (state, c) with
|
||||
| End_of_quoted_string -> T (Add_token_char, Normal)
|
||||
| T (action, state) -> T (action, Quoted_string state)
|
||||
| E (action, state) -> E (action, Quoted_string state)
|
||||
| Error error -> Error error
|
||||
)
|
||||
| After_hash, '|' -> T (Start_block_comment, Normal)
|
||||
| After_pipe, '#' -> End_comment
|
||||
| _, '"' -> T (Add_token_char, Quoted_string Normal)
|
||||
| _, '|' -> T (Add_token_char, After_pipe)
|
||||
| _, '#' -> T (Add_token_char, After_hash)
|
||||
| _, _ -> T (Add_token_char, Normal)
|
||||
|
||||
let transition : State.t * char -> Transition.t = function
|
||||
| Whitespace, '(' -> T (Opening, Whitespace)
|
||||
| Whitespace, ')' -> T (Closing, Whitespace)
|
||||
| Whitespace, '\r' -> T (Nop, After_cr)
|
||||
| Whitespace, (' ' | '\t' | '\012' | '\n') -> T (Nop, Whitespace)
|
||||
| Whitespace, ';' -> T (Start_line_comment, Line_comment)
|
||||
| Whitespace, '"' -> T (Start_quoted_string, Quoted_string (Normal))
|
||||
| Whitespace, '#' -> T (Nop, After_hash)
|
||||
| Whitespace, '|' -> T (Add_first_char, Unquoted_string After_pipe)
|
||||
| Whitespace, _ -> T (Add_first_char, Unquoted_string Normal)
|
||||
|
||||
| After_cr, '\n' -> T (Nop, Whitespace)
|
||||
| After_cr, _ -> Error Unexpected_character_after_cr
|
||||
|
||||
| Unquoted_string _, (';'|'('|')'|'"'|' '|'\t'|'\012'|'\r'|'\n') ->
|
||||
E (Push_atom, Whitespace)
|
||||
| Unquoted_string After_hash, '|'
|
||||
| Unquoted_string After_pipe, '#' -> Error Comment_token_in_unquoted_atom
|
||||
| Unquoted_string _, '#' -> T (Add_atom_char, Unquoted_string After_hash)
|
||||
| Unquoted_string _, '|' -> T (Add_atom_char, Unquoted_string After_pipe)
|
||||
| Unquoted_string _, _ -> T (Add_atom_char, Unquoted_string Normal)
|
||||
|
||||
| Line_comment, ('\r' | '\n') -> E (End_line_comment, Whitespace)
|
||||
| Line_comment, _ -> T (Add_token_char, Line_comment)
|
||||
|
||||
| After_hash, ';' -> T (Start_sexp_comment, Whitespace)
|
||||
| After_hash, '|' -> T (Start_block_comment, Block_comment Normal)
|
||||
| After_hash, _ -> E (Add_first_char_hash, Unquoted_string Normal)
|
||||
|
||||
| Quoted_string state, c -> (
|
||||
match quoted_string_transition In_atom (state, c) with
|
||||
| End_of_quoted_string -> T (Push_quoted_atom, Whitespace)
|
||||
| T (action, state) -> T (action, Quoted_string state)
|
||||
| E (action, state) -> E (action, Quoted_string state)
|
||||
| Error error -> Error error
|
||||
)
|
||||
|
||||
| Block_comment state, c -> (
|
||||
match block_comment_transition (state, c) with
|
||||
| T (action, state) -> T (action, Block_comment state)
|
||||
| E (action, state) -> E (action, Block_comment state)
|
||||
| End_comment -> End_block_comment
|
||||
| Error error -> Error error
|
||||
)
|
||||
|
||||
| Error, _ -> Error Automaton_in_error_state
|
||||
|
||||
let transition_eoi : State.t -> Final_transition.t = function
|
||||
| Whitespace -> Eoi_check
|
||||
| After_cr -> Error Unexpected_character_after_cr
|
||||
| Unquoted_string _ -> E (Push_atom, Whitespace)
|
||||
| Line_comment -> E (End_line_comment, Whitespace)
|
||||
| After_hash -> E (Add_first_char_hash, Unquoted_string Normal)
|
||||
| Quoted_string _ -> Error Unterminated_quoted_string
|
||||
| Block_comment _ -> Error Unterminated_block_comment
|
||||
| Error -> Error Automaton_in_error_state
|
||||
end
|
||||
|
||||
module Table = struct
|
||||
type action = Epsilon_action.t list * Action.t [@@deriving compare, sexp_of, hash]
|
||||
type goto_state = State of int | End_block_comment [@@deriving compare, sexp_of, hash]
|
||||
type advance = Advance | Advance_eol [@@deriving compare, sexp_of, hash]
|
||||
type transition = { action : action; goto : goto_state; advance : advance }
|
||||
[@@deriving compare, sexp_of, hash]
|
||||
type 'a or_error =
|
||||
| Ok of 'a
|
||||
| Error of Error.t
|
||||
[@@deriving compare, sexp_of, hash]
|
||||
|
||||
type t =
|
||||
{ transitions : transition or_error array
|
||||
; transitions_eoi : Epsilon_action.t list or_error array
|
||||
}
|
||||
|
||||
let advance = function
|
||||
| '\n' -> Advance_eol
|
||||
| _ -> Advance
|
||||
|
||||
let compile (module A : Automaton) =
|
||||
let rec squash acc state c =
|
||||
match A.transition (state, c) with
|
||||
| T (action, state) -> Ok { action = (List.rev acc, action)
|
||||
; goto = State (State.to_int state)
|
||||
; advance = advance c
|
||||
}
|
||||
| E (action, state) -> squash (action :: acc) state c
|
||||
| Error error -> Error error
|
||||
| End_block_comment -> Ok { action = (List.rev acc, Nop)
|
||||
; goto = End_block_comment
|
||||
; advance = advance c
|
||||
}
|
||||
in
|
||||
let rec squash_eoi acc state =
|
||||
match A.transition_eoi state with
|
||||
| Eoi_check -> Ok (List.rev acc)
|
||||
| E (eps_action, state) -> squash_eoi (eps_action :: acc) state
|
||||
| Error error -> Error error
|
||||
in
|
||||
let transitions =
|
||||
Array.create ~len:(State.count * 256)
|
||||
(Ok { action = ([], Action.Nop)
|
||||
; goto = State 0
|
||||
; advance = Advance
|
||||
})
|
||||
in
|
||||
let transitions_eoi = Array.create (Ok []) ~len:State.count in
|
||||
for s = 0 to State.count - 1 do
|
||||
let state = State.of_int s in
|
||||
for c = 0 to 255 do
|
||||
transitions.(s * 256 + c) <- squash [] state (Char.of_int_exn c);
|
||||
done;
|
||||
transitions_eoi.(s) <- squash_eoi [] state
|
||||
done;
|
||||
{ transitions; transitions_eoi }
|
||||
end
|
||||
|
||||
let table = Table.compile (module Automaton)
|
|
@ -1,10 +0,0 @@
|
|||
(library
|
||||
((name gen_parsexp_lib)
|
||||
(libraries (base))
|
||||
(preprocess (pps (ppx_sexp_conv
|
||||
ppx_compare
|
||||
ppx_enumerate
|
||||
ppx_hash
|
||||
ppx_variants_conv)))))
|
||||
|
||||
(jbuild_version 1)
|
|
@ -0,0 +1,27 @@
|
|||
module Atom : sig
|
||||
type t = A of string [@@unboxed]
|
||||
end
|
||||
|
||||
module Token : sig
|
||||
type t =
|
||||
| Atom of Atom.t
|
||||
| Quoted_string of string
|
||||
| Lparen
|
||||
| Rparen
|
||||
| Sexp_comment
|
||||
| Eof
|
||||
end
|
||||
|
||||
type t = Lexing.lexbuf -> Token.t
|
||||
|
||||
val token : t
|
||||
|
||||
module Error : sig
|
||||
type t =
|
||||
{ start : Lexing.position
|
||||
; stop : Lexing.position
|
||||
; message : string
|
||||
}
|
||||
end
|
||||
|
||||
exception Error of Error.t
|
|
@ -0,0 +1,208 @@
|
|||
{
|
||||
module Atom = struct
|
||||
type t = A of string [@@unboxed]
|
||||
end
|
||||
|
||||
module Token = struct
|
||||
type t =
|
||||
| Atom of Atom.t
|
||||
| Quoted_string of string
|
||||
| Lparen
|
||||
| Rparen
|
||||
| Sexp_comment
|
||||
| Eof
|
||||
end
|
||||
|
||||
type t = Lexing.lexbuf -> Token.t
|
||||
|
||||
module Error = struct
|
||||
type t =
|
||||
{ start : Lexing.position
|
||||
; stop : Lexing.position
|
||||
; message : string
|
||||
}
|
||||
end
|
||||
|
||||
exception Error of Error.t
|
||||
|
||||
let error ?(delta=0) lexbuf message =
|
||||
let start = Lexing.lexeme_start_p lexbuf in
|
||||
raise
|
||||
(Error { start = { start with pos_cnum = start.pos_cnum + delta }
|
||||
; stop = Lexing.lexeme_end_p lexbuf
|
||||
; message
|
||||
})
|
||||
|
||||
let eval_decimal_char c = Char.code c - Char.code '0'
|
||||
|
||||
let eval_decimal_escape c1 c2 c3 =
|
||||
(eval_decimal_char c1) * 100 +
|
||||
(eval_decimal_char c2) * 10 +
|
||||
(eval_decimal_char c3)
|
||||
|
||||
let eval_hex_char c =
|
||||
match c with
|
||||
| '0'..'9' -> Char.code c - Char.code '0'
|
||||
| 'a'..'f' -> Char.code c - Char.code 'a' + 10
|
||||
| 'A'..'F' -> Char.code c - Char.code 'A' + 10
|
||||
| _ -> -1
|
||||
|
||||
let eval_hex_escape c1 c2 =
|
||||
(eval_hex_char c1) * 16 +
|
||||
(eval_hex_char c2)
|
||||
|
||||
type escape_sequence =
|
||||
| Newline
|
||||
| Other
|
||||
|
||||
let escaped_buf = Buffer.create 256
|
||||
}
|
||||
|
||||
let comment = ';' [^ '\n' '\r']*
|
||||
let newline = '\r'? '\n'
|
||||
let blank = [' ' '\t' '\012']
|
||||
let atom_char = [^ ';' '(' ')' '"' ' ' '\t' '\r' '\n' '\012' '|' '#']
|
||||
let digit = ['0'-'9']
|
||||
let hexdigit = ['0'-'9' 'a'-'f' 'A'-'F']
|
||||
|
||||
rule token = parse
|
||||
| newline
|
||||
{ Lexing.new_line lexbuf; token lexbuf }
|
||||
| blank+ | comment
|
||||
{ token lexbuf }
|
||||
| '('
|
||||
{ Token.Lparen }
|
||||
| ')'
|
||||
{ Rparen }
|
||||
| '"'
|
||||
{ Buffer.clear escaped_buf;
|
||||
let start = Lexing.lexeme_start_p lexbuf in
|
||||
let s = quoted_string true lexbuf in
|
||||
lexbuf.lex_start_p <- start;
|
||||
Quoted_string s
|
||||
}
|
||||
| "#|"
|
||||
{ block_comment lexbuf }
|
||||
| "#;"
|
||||
{ Sexp_comment }
|
||||
| eof
|
||||
{ Eof }
|
||||
| ""
|
||||
{ atom "" (Lexing.lexeme_start_p lexbuf) lexbuf }
|
||||
|
||||
and atom acc start = parse
|
||||
| '#'+ '|'
|
||||
{ lexbuf.lex_start_p <- start;
|
||||
error lexbuf "atoms cannot contain #|"
|
||||
}
|
||||
| '|'+ '#'
|
||||
{ lexbuf.lex_start_p <- start;
|
||||
error lexbuf "atoms cannot contain |#"
|
||||
}
|
||||
| ('#'+ | '|'+ | atom_char+) as s
|
||||
{ atom (if acc = "" then s else acc ^ s) start lexbuf
|
||||
}
|
||||
| ""
|
||||
{ if acc = "" then
|
||||
error lexbuf "Internal error in the S-expression parser, \
|
||||
please report upstream.";
|
||||
lexbuf.lex_start_p <- start;
|
||||
Token.Atom (A acc)
|
||||
}
|
||||
|
||||
(* If [strict] is false, ignore errors *)
|
||||
and quoted_string strict = parse
|
||||
| '"'
|
||||
{ Buffer.contents escaped_buf }
|
||||
| '\\'
|
||||
{ match escape_sequence strict lexbuf with
|
||||
| Newline -> quoted_string_after_escaped_newline strict lexbuf
|
||||
| Other -> quoted_string strict lexbuf
|
||||
}
|
||||
| newline as s
|
||||
{ Lexing.new_line lexbuf;
|
||||
Buffer.add_string escaped_buf s;
|
||||
quoted_string strict lexbuf
|
||||
}
|
||||
| _ as c
|
||||
{ Buffer.add_char escaped_buf c;
|
||||
quoted_string strict lexbuf
|
||||
}
|
||||
| eof
|
||||
{ if strict then
|
||||
error lexbuf "unterminated quoted string";
|
||||
Buffer.contents escaped_buf
|
||||
}
|
||||
|
||||
and quoted_string_after_escaped_newline strict = parse
|
||||
| [' ' '\t']*
|
||||
{ quoted_string strict lexbuf }
|
||||
|
||||
and escape_sequence strict = parse
|
||||
| newline
|
||||
{ Newline }
|
||||
| ['\\' '\'' '"' 'n' 't' 'b' 'r'] as c
|
||||
{ let c =
|
||||
match c with
|
||||
| 'n' -> '\n'
|
||||
| 'r' -> '\r'
|
||||
| 'b' -> '\b'
|
||||
| 't' -> '\t'
|
||||
| _ -> c
|
||||
in
|
||||
Buffer.add_char escaped_buf c;
|
||||
Other
|
||||
}
|
||||
| (digit as c1) (digit as c2) (digit as c3)
|
||||
{ let v = eval_decimal_escape c1 c2 c3 in
|
||||
if strict && v > 255 then
|
||||
error lexbuf "escape sequence in quoted string out of range"
|
||||
~delta:(-1);
|
||||
Buffer.add_char escaped_buf (Char.chr v);
|
||||
Other
|
||||
}
|
||||
| digit* as s
|
||||
{ if strict then
|
||||
error lexbuf "unterminated decimal escape sequence" ~delta:(-1);
|
||||
Buffer.add_char escaped_buf '\\';
|
||||
Buffer.add_string escaped_buf s;
|
||||
Other
|
||||
}
|
||||
| 'x' (hexdigit as c1) (hexdigit as c2)
|
||||
{ let v = eval_hex_escape c1 c2 in
|
||||
Buffer.add_char escaped_buf (Char.chr v);
|
||||
Other
|
||||
}
|
||||
| 'x' hexdigit* as s
|
||||
{ if strict then
|
||||
error lexbuf "unterminated hexadecimal escape sequence" ~delta:(-1);
|
||||
Buffer.add_char escaped_buf '\\';
|
||||
Buffer.add_string escaped_buf s;
|
||||
Other
|
||||
}
|
||||
| _ as c
|
||||
{ Buffer.add_char escaped_buf '\\';
|
||||
Buffer.add_char escaped_buf c;
|
||||
Other
|
||||
}
|
||||
| eof
|
||||
{ if strict then
|
||||
error lexbuf "unterminated escape sequence" ~delta:(-1);
|
||||
Other
|
||||
}
|
||||
|
||||
and block_comment = parse
|
||||
| '"'
|
||||
{ Buffer.clear escaped_buf;
|
||||
ignore (quoted_string false lexbuf : string);
|
||||
block_comment lexbuf
|
||||
}
|
||||
| "|#"
|
||||
{ token lexbuf
|
||||
}
|
||||
| eof
|
||||
{ error lexbuf "unterminated block comment"
|
||||
}
|
||||
| _
|
||||
{ block_comment lexbuf
|
||||
}
|
|
@ -1,377 +0,0 @@
|
|||
type stack =
|
||||
| Empty
|
||||
| Open of Lexing.position * stack
|
||||
| Sexp of Sexp_ast.t * stack
|
||||
let empty_stack = Empty
|
||||
|
||||
type 'a mode =
|
||||
| Single : Sexp_ast.t mode
|
||||
| Many : Sexp_ast.t list mode
|
||||
|
||||
type 'a state =
|
||||
{ mutable automaton_state : int
|
||||
; mutable depth : int
|
||||
; (* Number of opened #| when parsing a block comment *)
|
||||
mutable block_comment_depth : int
|
||||
; (* Number of full sexp to ignore. This correspond to the number of consecutive #;.
|
||||
We don't have to track nested #; because they do not change the result. *)
|
||||
mutable ignoring : int
|
||||
; (* Only meaningful when [ignoring] > 0. Number of opened parentheses of the
|
||||
outermost sexp comment. *)
|
||||
mutable ignoring_depth : int
|
||||
; (* When parsing an escape sequence of the form "\\NNN" or "\\XX", this accumulates
|
||||
the computed number *)
|
||||
mutable escaped_value : int
|
||||
; (* Buffer for accumulating atoms *)
|
||||
atom_buffer : Buffer.t
|
||||
; fname : string
|
||||
; mutable full_sexps : int
|
||||
; mutable offset : int (* global offset *)
|
||||
; mutable line_number : int
|
||||
; mutable bol_offset : int (* offset of beginning of line *)
|
||||
; (* Starting positions of the current token *)
|
||||
mutable token_start_pos : Lexing.position
|
||||
; mode : 'a mode
|
||||
}
|
||||
|
||||
(* these magic numbers are checked in gen_parser_automaton.ml:
|
||||
let () = assert (initial = 0)
|
||||
let () = assert (to_int Error = 1) *)
|
||||
let initial_state = 0
|
||||
let error_state = 1
|
||||
|
||||
let new_state ~fname mode =
|
||||
{ depth = 0
|
||||
; automaton_state = initial_state
|
||||
; block_comment_depth = 0
|
||||
; ignoring = 0
|
||||
; ignoring_depth = 0
|
||||
; escaped_value = 0
|
||||
; atom_buffer = Buffer.create 128
|
||||
; mode = mode
|
||||
; full_sexps = 0
|
||||
; offset = 0
|
||||
; line_number = 1
|
||||
; bol_offset = 0
|
||||
; fname = fname
|
||||
; token_start_pos = { pos_fname = fname; pos_cnum = 0; pos_lnum = 1; pos_bol = 0 }
|
||||
}
|
||||
|
||||
let mode t = t.mode
|
||||
|
||||
let offset t = t.offset
|
||||
let line t = t.line_number
|
||||
let column t = t.offset - t.bol_offset
|
||||
|
||||
let position t =
|
||||
{ Lexing.
|
||||
pos_fname = t.fname
|
||||
; pos_cnum = t.offset
|
||||
; pos_lnum = t.line_number
|
||||
; pos_bol = t.bol_offset
|
||||
}
|
||||
|
||||
let has_unclosed_paren state = state.depth > 0
|
||||
|
||||
let set_error_state state = state.automaton_state <- error_state
|
||||
|
||||
let sexp_of_stack : stack -> Sexp_ast.t = function
|
||||
| Sexp (sexp, Empty) -> sexp
|
||||
| _ -> failwith "Parser_automaton.sexp_of_stack"
|
||||
|
||||
let sexps_of_stack =
|
||||
let rec loop acc : stack -> Sexp_ast.t list = function
|
||||
| Empty -> acc
|
||||
| Open _ -> failwith "Parser_automaton.sexps_of_stack"
|
||||
| Sexp (sexp, stack) -> loop (sexp :: acc) stack
|
||||
in
|
||||
fun stack -> loop [] stack
|
||||
|
||||
let automaton_state state = state.automaton_state
|
||||
|
||||
module Error = struct
|
||||
type t =
|
||||
{ position : Lexing.position
|
||||
; message : string
|
||||
}
|
||||
|
||||
exception Parse_error of t
|
||||
|
||||
module Reason = struct
|
||||
(* To be kept in sync with the Error module in gen/gen_parser_automaton.ml *)
|
||||
type t =
|
||||
| Unexpected_char_parsing_hex_escape
|
||||
| Unexpected_char_parsing_dec_escape
|
||||
| Unterminated_quoted_string
|
||||
| Unterminated_block_comment
|
||||
| Escape_sequence_out_of_range
|
||||
| Unclosed_paren
|
||||
| Too_many_sexps
|
||||
| Closed_paren_without_opened
|
||||
| Comment_token_in_unquoted_atom
|
||||
| Sexp_comment_without_sexp
|
||||
| Unexpected_character_after_cr
|
||||
| No_sexp_found_in_input
|
||||
| Automaton_in_error_state
|
||||
end
|
||||
|
||||
let raise state ~at_eof (reason : Reason.t) =
|
||||
set_error_state state;
|
||||
let message =
|
||||
(* These messages where choosen such that we can build the various Sexplib parsing
|
||||
functions on top of Parsexp and keep the same exceptions.
|
||||
|
||||
At the time of writing this, a simple layer on top of parsexp to implement the
|
||||
sexplib API is passing all the sexplib tests.
|
||||
|
||||
Note that parsexp matches the semantic of Sexp.parse which is slightly
|
||||
different from the ocamllex/ocamlyacc based parser of Sexplib. The latter one
|
||||
is less tested and assumed to be less used. *)
|
||||
match reason with
|
||||
| Unexpected_char_parsing_hex_escape ->
|
||||
"unterminated hexadecimal escape sequence"
|
||||
| Unexpected_char_parsing_dec_escape ->
|
||||
"unterminated decimal escape sequence"
|
||||
| Unterminated_quoted_string ->
|
||||
"unterminated quoted string"
|
||||
| Unterminated_block_comment ->
|
||||
"unterminated block comment"
|
||||
| Escape_sequence_out_of_range ->
|
||||
"escape sequence in quoted string out of range"
|
||||
| Unclosed_paren ->
|
||||
"unclosed parentheses at end of input"
|
||||
| Too_many_sexps ->
|
||||
"s-expression followed by data"
|
||||
| Closed_paren_without_opened ->
|
||||
"unexpected character: ')'"
|
||||
| Comment_token_in_unquoted_atom ->
|
||||
if Buffer.contents state.atom_buffer = "|" then
|
||||
"illegal end of comment"
|
||||
else
|
||||
"comment tokens in unquoted atom"
|
||||
| Sexp_comment_without_sexp ->
|
||||
"unterminated sexp comment"
|
||||
| Unexpected_character_after_cr ->
|
||||
if at_eof then
|
||||
"unexpected end of input after carriage return"
|
||||
else
|
||||
"unexpected character after carriage return"
|
||||
| No_sexp_found_in_input ->
|
||||
"no s-expression found in input"
|
||||
| Automaton_in_error_state ->
|
||||
failwith "Parsexp.Parser_automaton: parser is dead"
|
||||
in
|
||||
let position = position state in
|
||||
raise (Parse_error { position; message })
|
||||
|
||||
let position t = t.position
|
||||
let message t = t.message
|
||||
end
|
||||
|
||||
exception Parse_error = Error.Parse_error
|
||||
|
||||
type 'a action = 'a state -> char -> stack -> stack
|
||||
type 'a epsilon_action = 'a state -> stack -> stack
|
||||
|
||||
let current_pos ?(delta=0) state : Lexing.position =
|
||||
let offset = state.offset + delta in
|
||||
{ pos_fname = state.fname
|
||||
; pos_lnum = state.line_number
|
||||
; pos_cnum = offset
|
||||
; pos_bol = state.bol_offset
|
||||
}
|
||||
|
||||
let set_automaton_state state x = state.automaton_state <- x
|
||||
|
||||
let advance state = state.offset <- state.offset + 1
|
||||
|
||||
let advance_eol state =
|
||||
let newline_offset = state.offset in
|
||||
state.offset <- newline_offset + 1;
|
||||
state.bol_offset <- state.offset;
|
||||
state.line_number <- state.line_number + 1
|
||||
|
||||
let block_comment_depth state = state.block_comment_depth
|
||||
|
||||
let add_token_char _state _char stack = stack
|
||||
|
||||
let add_atom_char state c stack =
|
||||
Buffer.add_char state.atom_buffer c;
|
||||
stack
|
||||
|
||||
let add_quoted_atom_char state c stack =
|
||||
Buffer.add_char state.atom_buffer c;
|
||||
add_token_char state c stack
|
||||
|
||||
let check_new_sexp_allowed : type a. a state -> unit = fun state ->
|
||||
let is_single = match state.mode with Single -> true | _ -> false in
|
||||
if is_single && state.full_sexps > 0 && state.ignoring = 0 then
|
||||
Error.raise state ~at_eof:false Too_many_sexps
|
||||
|
||||
let add_first_char state char stack =
|
||||
check_new_sexp_allowed state;
|
||||
Buffer.add_char state.atom_buffer char;
|
||||
state.token_start_pos <- current_pos state;
|
||||
stack
|
||||
|
||||
let eps_add_first_char_hash state stack =
|
||||
check_new_sexp_allowed state;
|
||||
Buffer.add_char state.atom_buffer '#';
|
||||
state.token_start_pos <- current_pos state ~delta:(-1);
|
||||
stack
|
||||
|
||||
let start_quoted_string state _char stack =
|
||||
check_new_sexp_allowed state;
|
||||
state.token_start_pos <- current_pos state;
|
||||
stack
|
||||
|
||||
let add_escaped state c stack =
|
||||
let c' =
|
||||
match c with
|
||||
| 'n' -> '\n'
|
||||
| 'r' -> '\r'
|
||||
| 'b' -> '\b'
|
||||
| 't' -> '\t'
|
||||
| '\\' | '\'' | '"' -> c
|
||||
| _ -> Buffer.add_char state.atom_buffer '\\'; c
|
||||
in
|
||||
Buffer.add_char state.atom_buffer c';
|
||||
add_token_char state c stack
|
||||
|
||||
let eps_add_escaped_cr state stack =
|
||||
Buffer.add_char state.atom_buffer '\r';
|
||||
stack
|
||||
|
||||
let dec_val c = Char.code c - Char.code '0'
|
||||
|
||||
let hex_val c =
|
||||
match c with
|
||||
| '0'..'9' -> Char.code c - Char.code '0'
|
||||
| 'a'..'f' -> Char.code c - Char.code 'a' + 10
|
||||
| _ -> Char.code c - Char.code 'A' + 10
|
||||
|
||||
let add_dec_escape_char state c stack =
|
||||
state.escaped_value <- state.escaped_value * 10 + dec_val c;
|
||||
add_token_char state c stack
|
||||
|
||||
let add_last_dec_escape_char state c stack =
|
||||
let value = state.escaped_value * 10 + dec_val c in
|
||||
state.escaped_value <- 0;
|
||||
if value > 255 then Error.raise state ~at_eof:false Escape_sequence_out_of_range;
|
||||
Buffer.add_char state.atom_buffer (Char.unsafe_chr value);
|
||||
add_token_char state c stack
|
||||
|
||||
let comment_add_last_dec_escape_char state c stack =
|
||||
let value = state.escaped_value * 10 + dec_val c in
|
||||
state.escaped_value <- 0;
|
||||
if value > 255 then Error.raise state ~at_eof:false Escape_sequence_out_of_range;
|
||||
add_token_char state c stack
|
||||
|
||||
let add_hex_escape_char state c stack =
|
||||
state.escaped_value <- (state.escaped_value lsl 4) lor hex_val c;
|
||||
add_token_char state c stack
|
||||
|
||||
let add_last_hex_escape_char state c stack =
|
||||
let value = (state.escaped_value lsl 4) lor hex_val c in
|
||||
state.escaped_value <- 0;
|
||||
Buffer.add_char state.atom_buffer (Char.unsafe_chr value);
|
||||
add_token_char state c stack
|
||||
|
||||
let opening state _char stack =
|
||||
check_new_sexp_allowed state;
|
||||
state.depth <- state.depth + 1;
|
||||
if state.ignoring = 0 then
|
||||
Open (current_pos state, stack)
|
||||
else
|
||||
stack
|
||||
|
||||
let sexp_or_comment_added ~is_comment state stack ~delta:_ =
|
||||
if state.ignoring lor state.depth <> 0 then
|
||||
stack
|
||||
else begin
|
||||
if not is_comment then state.full_sexps <- state.full_sexps + 1;
|
||||
stack
|
||||
end
|
||||
|
||||
let sexp_added state stack ~delta =
|
||||
let stack = sexp_or_comment_added ~is_comment:false state stack ~delta in
|
||||
if state.ignoring <> 0 && state.ignoring_depth = state.depth then begin
|
||||
state.ignoring <- state.ignoring - 1;
|
||||
stack
|
||||
end else stack
|
||||
|
||||
let rec make_list stop acc : stack -> stack = function
|
||||
| Empty -> assert false
|
||||
| Open (start, stack) -> Sexp (List ({ start; stop }, acc), stack)
|
||||
| Sexp (sexp, stack) -> make_list stop (sexp :: acc) stack
|
||||
|
||||
let closing state _char stack =
|
||||
if state.depth > 0 then begin
|
||||
let stack =
|
||||
if state.ignoring = 0 then
|
||||
make_list (current_pos state ~delta:1) [] stack
|
||||
else
|
||||
stack
|
||||
in
|
||||
if state.ignoring <> 0 && state.ignoring_depth = state.depth then
|
||||
Error.raise state ~at_eof:false Sexp_comment_without_sexp;
|
||||
state.depth <- state.depth - 1;
|
||||
sexp_added state stack ~delta:1
|
||||
end else
|
||||
Error.raise state ~at_eof:false Closed_paren_without_opened
|
||||
|
||||
let make_loc ?(delta=0) state : Sexp_ast.Loc.t =
|
||||
{ start = state.token_start_pos
|
||||
; stop = current_pos state ~delta
|
||||
}
|
||||
|
||||
let eps_push_atom state stack =
|
||||
let str = Buffer.contents state.atom_buffer in
|
||||
Buffer.clear state.atom_buffer;
|
||||
let stack =
|
||||
if state.ignoring = 0 then
|
||||
Sexp (Atom (make_loc state, A str), stack)
|
||||
else
|
||||
stack
|
||||
in
|
||||
sexp_added state stack ~delta:0
|
||||
|
||||
let push_quoted_atom state _char stack =
|
||||
let str = Buffer.contents state.atom_buffer in
|
||||
Buffer.clear state.atom_buffer;
|
||||
let stack =
|
||||
if state.ignoring = 0 then
|
||||
Sexp (Quoted_string (make_loc state ~delta:1, str), stack)
|
||||
else
|
||||
stack
|
||||
in
|
||||
sexp_added state stack ~delta:1
|
||||
|
||||
let start_sexp_comment state _char stack =
|
||||
if state.ignoring = 0 || state.ignoring_depth = state.depth then begin
|
||||
state.ignoring <- state.ignoring + 1;
|
||||
state.ignoring_depth <- state.depth
|
||||
end;
|
||||
stack
|
||||
|
||||
let start_block_comment state _char stack =
|
||||
state.block_comment_depth <- state.block_comment_depth + 1;
|
||||
stack
|
||||
|
||||
let end_block_comment state _char stack =
|
||||
state.block_comment_depth <- state.block_comment_depth - 1;
|
||||
stack
|
||||
|
||||
let start_line_comment _state _char stack = stack
|
||||
let end_line_comment _state stack = stack
|
||||
|
||||
let eps_eoi_check : type a. a state -> stack -> stack = fun state stack ->
|
||||
if state.depth > 0 then Error.raise state ~at_eof:true Unclosed_paren;
|
||||
if state.ignoring > 0 then Error.raise state ~at_eof:true Sexp_comment_without_sexp;
|
||||
if state.full_sexps = 0 then (
|
||||
match state.mode with
|
||||
| Many -> ()
|
||||
| Single ->
|
||||
Error.raise state ~at_eof:true No_sexp_found_in_input
|
||||
);
|
||||
stack
|
|
@ -1,132 +0,0 @@
|
|||
(** Internal bits used by the generated automaton, not part of the public API *)
|
||||
|
||||
(** Internal state of the automaton *)
|
||||
type 'a state
|
||||
|
||||
type 'a mode =
|
||||
| Single : Sexp_ast.t mode
|
||||
| Many : Sexp_ast.t list mode
|
||||
|
||||
type stack
|
||||
val empty_stack : stack
|
||||
|
||||
val new_state
|
||||
: fname:string
|
||||
-> 'a mode
|
||||
-> 'a state
|
||||
|
||||
val mode : 'a state -> 'a mode
|
||||
|
||||
(** Number of characters fed to the parser *)
|
||||
val offset : _ state -> int
|
||||
|
||||
(** Position in the text *)
|
||||
val line : _ state -> int
|
||||
val column : _ state -> int
|
||||
|
||||
(** Whether there are some unclosed parentheses *)
|
||||
val has_unclosed_paren : _ state -> bool
|
||||
|
||||
val set_error_state : _ state -> unit
|
||||
|
||||
val sexp_of_stack : stack -> Sexp_ast.t
|
||||
val sexps_of_stack : stack -> Sexp_ast.t list
|
||||
|
||||
val automaton_state : _ state -> int
|
||||
|
||||
module Error : sig
|
||||
type t
|
||||
|
||||
val position : t -> Lexing.position
|
||||
val message : t -> string
|
||||
|
||||
module Reason : sig
|
||||
type t =
|
||||
| Unexpected_char_parsing_hex_escape
|
||||
| Unexpected_char_parsing_dec_escape
|
||||
| Unterminated_quoted_string
|
||||
| Unterminated_block_comment
|
||||
| Escape_sequence_out_of_range
|
||||
| Unclosed_paren
|
||||
| Too_many_sexps
|
||||
| Closed_paren_without_opened
|
||||
| Comment_token_in_unquoted_atom
|
||||
| Sexp_comment_without_sexp
|
||||
| Unexpected_character_after_cr
|
||||
| No_sexp_found_in_input
|
||||
| Automaton_in_error_state
|
||||
end
|
||||
|
||||
val raise : _ state -> at_eof:bool -> Reason.t -> _
|
||||
end
|
||||
|
||||
exception Parse_error of Error.t
|
||||
|
||||
val set_automaton_state : _ state -> int -> unit
|
||||
|
||||
(** Advance the position counters. [advance_eol] is for when we read a newline
|
||||
character. *)
|
||||
val advance : _ state -> unit
|
||||
val advance_eol : _ state -> unit
|
||||
|
||||
(** Number of opened #| *)
|
||||
val block_comment_depth : _ state -> int
|
||||
|
||||
type 'a action = 'a state -> char -> stack -> stack
|
||||
type 'a epsilon_action = 'a state -> stack -> stack
|
||||
|
||||
(** Add a character to the atom buffer. [add_quoted_atom_char] does the same for quoted
|
||||
atoms *)
|
||||
val add_atom_char : _ action
|
||||
val add_quoted_atom_char : _ action
|
||||
|
||||
(** Add a character that just follows a '\\' and the '\\' itself if necessary. *)
|
||||
val add_escaped : _ action
|
||||
|
||||
(** [escaped_value <- escaped_value * 10 + (char - '0')]
|
||||
|
||||
These functions make the assumption that [char] is between '0' and '9'.
|
||||
[add_dec_escape_char] also assumes the result doesn't overflow. The automaton
|
||||
definition must make sure this is the case.
|
||||
|
||||
[add_last_dec_escape_char] also adds the resulting character to the atom buffer.
|
||||
*)
|
||||
val add_dec_escape_char : _ action
|
||||
val add_last_dec_escape_char : _ action
|
||||
|
||||
(** Same but for quoted strings inside comments. Useful because it can fail. *)
|
||||
val comment_add_last_dec_escape_char : _ action
|
||||
|
||||
(** Same as [add_dec_escape_char] but for hexadicemal escape sequences *)
|
||||
val add_hex_escape_char : _ action
|
||||
val add_last_hex_escape_char : _ action
|
||||
|
||||
(** Ignore one more full sexp to come *)
|
||||
val start_sexp_comment : _ action
|
||||
|
||||
(** Add the first char of an unquoted atom. *)
|
||||
val add_first_char : _ action
|
||||
val start_quoted_string : _ action
|
||||
|
||||
(** Takes note of a control character in quoted atoms or the uninterpreted characters of
|
||||
comments, for which there is no corresponding [add_*] call (a backslash and the x in
|
||||
"\xff" or any character in a line comment). This does not get called for the opening
|
||||
([start_quoted_string]) or closing ([push_quoted_atom]) quotes themselves.
|
||||
*)
|
||||
val add_token_char : _ action
|
||||
|
||||
val opening : _ action
|
||||
val closing : _ action
|
||||
val push_quoted_atom : _ action
|
||||
|
||||
val start_block_comment : _ action
|
||||
val end_block_comment : _ action
|
||||
|
||||
val start_line_comment : _ action
|
||||
val end_line_comment : _ epsilon_action
|
||||
|
||||
val eps_push_atom : _ epsilon_action
|
||||
val eps_add_first_char_hash : _ epsilon_action
|
||||
val eps_eoi_check : _ epsilon_action
|
||||
|
||||
val eps_add_escaped_cr : _ epsilon_action
|
|
@ -1,13 +0,0 @@
|
|||
module Loc = struct
|
||||
type t =
|
||||
{ start : Lexing.position
|
||||
; stop : Lexing.position
|
||||
}
|
||||
end
|
||||
|
||||
type atom = A of string [@@unboxed]
|
||||
|
||||
type t =
|
||||
| Atom of Loc.t * atom
|
||||
| Quoted_string of Loc.t * string
|
||||
| List of Loc.t * t list
|
1319
src/usexp/table.ml
1319
src/usexp/table.ml
File diff suppressed because it is too large
Load Diff
|
@ -1,2 +0,0 @@
|
|||
val transitions : Obj.t Parser_automaton_internal.action array
|
||||
val transitions_eoi : Obj.t Parser_automaton_internal.epsilon_action array
|
|
@ -10,10 +10,8 @@ module Bytes = struct
|
|||
UnlabeledBytes.blit_string src src_pos dst dst_pos len
|
||||
end
|
||||
|
||||
module A = Parser_automaton_internal
|
||||
|
||||
module Atom = struct
|
||||
type t = Sexp_ast.atom = A of string [@@unboxed]
|
||||
type t = Lexer.Atom.t = A of string [@@unboxed]
|
||||
|
||||
let is_valid str =
|
||||
let len = String.length str in
|
||||
|
@ -241,7 +239,10 @@ let prepare_formatter ppf =
|
|||
}
|
||||
|
||||
module Loc = struct
|
||||
include Sexp_ast.Loc
|
||||
type t =
|
||||
{ start : Lexing.position
|
||||
; stop : Lexing.position
|
||||
}
|
||||
|
||||
let in_file fn =
|
||||
let pos : Lexing.position =
|
||||
|
@ -257,7 +258,7 @@ module Loc = struct
|
|||
end
|
||||
|
||||
module Ast = struct
|
||||
type t = Sexp_ast.t =
|
||||
type t =
|
||||
| Atom of Loc.t * Atom.t
|
||||
| Quoted_string of Loc.t * string
|
||||
| List of Loc.t * t list
|
||||
|
@ -272,32 +273,6 @@ let loc (Atom (loc, _) | Quoted_string (loc, _) | List (loc, _)) = loc
|
|||
| Atom (_, s) -> Atom s
|
||||
| Quoted_string (_, s) -> Quoted_string s
|
||||
| List (_, l) -> List (List.map l ~f:remove_locs)
|
||||
|
||||
module Token = struct
|
||||
type t =
|
||||
| Atom of Loc.t * Atom.t
|
||||
| String of Loc.t * string
|
||||
| Lparen of Loc.t
|
||||
| Rparen of Loc.t
|
||||
end
|
||||
|
||||
let tokenize =
|
||||
let rec loop acc t =
|
||||
match t with
|
||||
| Atom (loc, s) -> Token.Atom (loc, s) :: acc
|
||||
| Quoted_string (loc, s) -> Token.String (loc, s) :: acc
|
||||
| List (loc, l) ->
|
||||
let shift (pos : Lexing.position) delta =
|
||||
{ pos with pos_cnum = pos.pos_cnum + delta }
|
||||
in
|
||||
let l_loc = { loc with stop = shift loc.start 1 } in
|
||||
let r_loc = { loc with start = shift loc.stop (-1) } in
|
||||
let acc = Token.Lparen l_loc :: acc in
|
||||
let acc = List.fold_left l ~init:acc ~f:loop in
|
||||
let acc = Token.Rparen r_loc :: acc in
|
||||
acc
|
||||
in
|
||||
fun t -> loop [] t |> List.rev
|
||||
end
|
||||
|
||||
let rec add_loc t ~loc : Ast.t =
|
||||
|
@ -306,78 +281,89 @@ let rec add_loc t ~loc : Ast.t =
|
|||
| Quoted_string s -> Quoted_string (loc, s)
|
||||
| List l -> List (loc, List.map l ~f:(add_loc ~loc))
|
||||
|
||||
module Parse_error = struct
|
||||
include Lexer.Error
|
||||
|
||||
let loc t : Loc.t = { start = t.start; stop = t.stop }
|
||||
let message t = t.message
|
||||
end
|
||||
exception Parse_error = Lexer.Error
|
||||
|
||||
module Lexer = Lexer
|
||||
|
||||
module Parser = struct
|
||||
module Error = A.Error
|
||||
exception Error = A.Parse_error
|
||||
let error (loc : Loc.t) message =
|
||||
raise (Parse_error
|
||||
{ start = loc.start
|
||||
; stop = loc.stop
|
||||
; message
|
||||
})
|
||||
|
||||
let make_loc lexbuf : Loc.t =
|
||||
{ start = Lexing.lexeme_start_p lexbuf
|
||||
; stop = Lexing.lexeme_end_p lexbuf
|
||||
}
|
||||
|
||||
module Mode = struct
|
||||
type 'a t = 'a A.mode =
|
||||
type 'a t =
|
||||
| Single : Ast.t t
|
||||
| Many : Ast.t list t
|
||||
|
||||
let make_result : type a. a t -> Lexing.lexbuf -> Ast.t list -> a
|
||||
= fun t lexbuf sexps ->
|
||||
match t with
|
||||
| Many -> sexps
|
||||
| Single ->
|
||||
match sexps with
|
||||
| [sexp] -> sexp
|
||||
| [] -> error (make_loc lexbuf) "no s-expression found in input"
|
||||
| _ :: sexp :: _ ->
|
||||
error (Ast.loc sexp) "too many s-expressions found in input"
|
||||
end
|
||||
|
||||
module Stack = struct
|
||||
type t = A.stack
|
||||
let empty = A.empty_stack
|
||||
end
|
||||
let rec loop depth lexer lexbuf acc =
|
||||
match (lexer lexbuf : Lexer.Token.t) with
|
||||
| Atom a ->
|
||||
let loc = make_loc lexbuf in
|
||||
loop depth lexer lexbuf (Ast.Atom (loc, a) :: acc)
|
||||
| Quoted_string s ->
|
||||
let loc = make_loc lexbuf in
|
||||
loop depth lexer lexbuf (Quoted_string (loc, s) :: acc)
|
||||
| Lparen ->
|
||||
let start = Lexing.lexeme_start_p lexbuf in
|
||||
let sexps = loop (depth + 1) lexer lexbuf [] in
|
||||
let stop = Lexing.lexeme_end_p lexbuf in
|
||||
loop depth lexer lexbuf (List ({ start; stop }, sexps) :: acc)
|
||||
| Rparen ->
|
||||
if depth = 0 then
|
||||
error (make_loc lexbuf)
|
||||
"right parenthesis without matching left parenthesis";
|
||||
List.rev acc
|
||||
| Sexp_comment ->
|
||||
let sexps =
|
||||
let loc = make_loc lexbuf in
|
||||
match loop depth lexer lexbuf [] with
|
||||
| _ :: sexps -> sexps
|
||||
| [] -> error loc "s-expression missing after #;"
|
||||
in
|
||||
List.rev_append acc sexps
|
||||
| Eof ->
|
||||
if depth > 0 then
|
||||
error (make_loc lexbuf)
|
||||
"unclosed parenthesis at end of input";
|
||||
List.rev acc
|
||||
|
||||
type 'a t = 'a A.state
|
||||
let create ~fname ~mode = A.new_state ~fname mode
|
||||
|
||||
let feed : type a. a A.action = fun state char stack ->
|
||||
let idx = (A.automaton_state state lsl 8) lor (Char.code char) in
|
||||
(* We need an Obj.magic as the type of the array can't be generalized.
|
||||
This problem will go away when we get immutable arrays. *)
|
||||
(Obj.magic (Table.transitions.(idx) : Obj.t A.action) : a A.action) state char stack
|
||||
[@@inline always]
|
||||
|
||||
let feed_eoi : type a. a t -> Stack.t -> a = fun state stack ->
|
||||
let stack =
|
||||
(Obj.magic (Table.transitions_eoi.(A.automaton_state state)
|
||||
: Obj.t A.epsilon_action)
|
||||
: a A.epsilon_action) state stack
|
||||
in
|
||||
A.set_error_state state;
|
||||
match A.mode state with
|
||||
| Mode.Single -> A.sexp_of_stack stack
|
||||
| Mode.Many -> A.sexps_of_stack stack
|
||||
|
||||
let rec feed_substring_unsafe str state stack i stop =
|
||||
if i < stop then
|
||||
let c = String.unsafe_get str i in
|
||||
let stack = feed state c stack in
|
||||
feed_substring_unsafe str state stack (i + 1) stop
|
||||
else
|
||||
stack
|
||||
|
||||
let rec feed_subbytes_unsafe str state stack i stop =
|
||||
if i < stop then
|
||||
let c = Bytes.unsafe_get str i in
|
||||
let stack = feed state c stack in
|
||||
feed_subbytes_unsafe str state stack (i + 1) stop
|
||||
else
|
||||
stack
|
||||
|
||||
let feed_substring state str ~pos ~len stack =
|
||||
let str_len = String.length str in
|
||||
if pos < 0 || len < 0 || pos > str_len - len then
|
||||
invalid_arg "Jbuilder_sexp.feed_substring";
|
||||
feed_substring_unsafe str state stack pos (pos + len)
|
||||
|
||||
let feed_subbytes state str ~pos ~len stack =
|
||||
let str_len = Bytes.length str in
|
||||
if pos < 0 || len < 0 || pos > str_len - len then
|
||||
invalid_arg "Jbuilder_sexp.feed_subbytes";
|
||||
feed_subbytes_unsafe str state stack pos (pos + len)
|
||||
|
||||
let feed_string state str stack =
|
||||
feed_substring_unsafe str state stack 0 (String.length str)
|
||||
|
||||
let feed_bytes state str stack =
|
||||
feed_subbytes_unsafe str state stack 0 (Bytes.length str)
|
||||
let parse ~mode ?(lexer=Lexer.token) lexbuf =
|
||||
loop 0 lexer lexbuf []
|
||||
|> Mode.make_result mode lexbuf
|
||||
end
|
||||
|
||||
let parse_string ~fname ~mode str =
|
||||
let p = Parser.create ~fname ~mode in
|
||||
let stack = Parser.feed_string p str Parser.Stack.empty in
|
||||
Parser.feed_eoi p stack
|
||||
let parse_string ~fname ~mode ?lexer str =
|
||||
let lb = Lexing.from_string str in
|
||||
lb.lex_curr_p <-
|
||||
{ pos_fname = fname
|
||||
; pos_lnum = 1
|
||||
; pos_bol = 0
|
||||
; pos_cnum = 0
|
||||
};
|
||||
Parser.parse ~mode ?lexer lb
|
||||
|
|
|
@ -74,63 +74,53 @@ module Ast : sig
|
|||
val loc : t -> Loc.t
|
||||
|
||||
val remove_locs : t -> sexp
|
||||
|
||||
module Token : sig
|
||||
type t =
|
||||
| Atom of Loc.t * Atom.t
|
||||
| String of Loc.t * string
|
||||
| Lparen of Loc.t
|
||||
| Rparen of Loc.t
|
||||
end
|
||||
|
||||
val tokenize : t -> Token.t list
|
||||
end with type sexp := t
|
||||
|
||||
val add_loc : t -> loc:Loc.t -> Ast.t
|
||||
|
||||
module Parser : sig
|
||||
module Error : sig
|
||||
type t
|
||||
module Parse_error : sig
|
||||
type t
|
||||
|
||||
val position : t -> Lexing.position
|
||||
val message : t -> string
|
||||
val loc : t -> Loc.t
|
||||
val message : t -> string
|
||||
end
|
||||
|
||||
(** Exception raised in case of a parsing error *)
|
||||
exception Parse_error of Parse_error.t
|
||||
|
||||
module Lexer : sig
|
||||
module Token : sig
|
||||
type t =
|
||||
| Atom of Atom.t
|
||||
| Quoted_string of string
|
||||
| Lparen
|
||||
| Rparen
|
||||
| Sexp_comment (** "#;", only used in the jbuild syntax *)
|
||||
| Eof
|
||||
end
|
||||
|
||||
(** Exception raised in case of a parsing error *)
|
||||
exception Error of Error.t
|
||||
type t = Lexing.lexbuf -> Token.t
|
||||
|
||||
val token : t
|
||||
end
|
||||
|
||||
module Parser : sig
|
||||
module Mode : sig
|
||||
type sexp = t
|
||||
type 'a t =
|
||||
| Single : Ast.t t
|
||||
| Many : Ast.t list t
|
||||
end with type sexp := t
|
||||
|
||||
module Stack : sig
|
||||
(** Parser stack. The stack is not in [state] for optimization purposes. *)
|
||||
type t
|
||||
|
||||
val empty : t
|
||||
end
|
||||
|
||||
type 'a t
|
||||
|
||||
(** Create a new parser state. [fname] is the filename the input is from. *)
|
||||
val create : fname:string -> mode:'a Mode.t -> 'a t
|
||||
|
||||
(** Feed one character to the parser. In case of error, it raises [Parse_error] *)
|
||||
val feed : _ t -> char -> Stack.t -> Stack.t
|
||||
|
||||
(** Instruct the parser that the end of input was reached. In case of error, it raises
|
||||
[Parse_error] *)
|
||||
val feed_eoi : 'a t -> Stack.t -> 'a
|
||||
|
||||
(** {3 Convenience functions} *)
|
||||
|
||||
val feed_string : _ t -> string -> Stack.t -> Stack.t
|
||||
val feed_substring : _ t -> string -> pos:int -> len:int -> Stack.t -> Stack.t
|
||||
val feed_bytes : _ t -> bytes -> Stack.t -> Stack.t
|
||||
val feed_subbytes : _ t -> bytes -> pos:int -> len:int -> Stack.t -> Stack.t
|
||||
val parse
|
||||
: mode:'a Mode.t
|
||||
-> ?lexer:Lexer.t
|
||||
-> Lexing.lexbuf
|
||||
-> 'a
|
||||
end
|
||||
|
||||
val parse_string : fname:string -> mode:'a Parser.Mode.t -> string -> 'a
|
||||
val parse_string
|
||||
: fname:string
|
||||
-> mode:'a Parser.Mode.t
|
||||
-> ?lexer:Lexer.t
|
||||
-> string
|
||||
-> 'a
|
||||
|
|
|
@ -38,3 +38,33 @@ let x = of_sexp sexp
|
|||
val of_sexp : int list Stdune.Sexp.Of_sexp.t = <fun>
|
||||
val x : int list = [1; 2]
|
||||
|}]
|
||||
|
||||
let parse s =
|
||||
try
|
||||
Sexp.parse_string ~fname:"" ~mode:Many s
|
||||
with Sexp.Parse_error e ->
|
||||
failwith (Sexp.Parse_error.message e)
|
||||
[%%expect{|
|
||||
val parse : string -> Usexp.Ast.t list = <fun>
|
||||
|}]
|
||||
|
||||
parse {| # ## x##y x||y a#b|c#d copy# |}
|
||||
[%%expect{|
|
||||
- : Usexp.Ast.t list = [#; ##; x##y; x||y; a#b|c#d; copy#]
|
||||
|}]
|
||||
|
||||
|
||||
parse {|x #| comment |# y|}
|
||||
[%%expect{|
|
||||
- : Usexp.Ast.t list = [x; y]
|
||||
|}]
|
||||
|
||||
parse {|x#|y|}
|
||||
[%%expect{|
|
||||
Exception: Failure "atoms cannot contain #|".
|
||||
|}]
|
||||
|
||||
parse {|x|#y|}
|
||||
[%%expect{|
|
||||
Exception: Failure "atoms cannot contain |#".
|
||||
|}]
|
||||
|
|
Loading…
Reference in New Issue