Import the Sexp parser generator (#608)
This commit is contained in:
parent
338f4c9ff2
commit
8e4c9fce4c
3
Makefile
3
Makefile
|
@ -54,5 +54,8 @@ livedoc:
|
||||||
update-jbuilds: $(BIN)
|
update-jbuilds: $(BIN)
|
||||||
$(BIN) build --dev @doc/runtest --auto-promote
|
$(BIN) build --dev @doc/runtest --auto-promote
|
||||||
|
|
||||||
|
update-sexp-parser:
|
||||||
|
$(BIN) build --dev @update-sexp-parser --auto-promote
|
||||||
|
|
||||||
.PHONY: default install uninstall reinstall clean test doc
|
.PHONY: default install uninstall reinstall clean test doc
|
||||||
.PHONY: promote accept-corrections
|
.PHONY: promote accept-corrections
|
||||||
|
|
|
@ -0,0 +1,112 @@
|
||||||
|
(* Parsing of S-expression. The parsing is written as an automaton for which
|
||||||
|
we provide different implementations of actions.
|
||||||
|
*)
|
||||||
|
|
||||||
|
open Base
|
||||||
|
open Stdio
|
||||||
|
open Gen_parsexp_lib.Automaton
|
||||||
|
open Gen_parsexp_lib.Automaton.Table
|
||||||
|
|
||||||
|
(* Sharing of transitions *)
|
||||||
|
module Sharing = struct
|
||||||
|
let create_assign_id () =
|
||||||
|
let cache = Hashtbl.Poly.create () in
|
||||||
|
(cache,
|
||||||
|
fun x ->
|
||||||
|
if not (Hashtbl.mem cache x) then
|
||||||
|
Hashtbl.add_exn cache ~key:x ~data:(Hashtbl.length cache))
|
||||||
|
|
||||||
|
let share (table : t) =
|
||||||
|
let transitions, assign_transition_id =
|
||||||
|
create_assign_id ()
|
||||||
|
in
|
||||||
|
let transitions_eoi, assign_transition_eoi_id =
|
||||||
|
create_assign_id ()
|
||||||
|
in
|
||||||
|
Array.iter table.transitions ~f:assign_transition_id;
|
||||||
|
Array.iter table.transitions_eoi ~f:assign_transition_eoi_id;
|
||||||
|
(transitions, transitions_eoi)
|
||||||
|
end
|
||||||
|
|
||||||
|
let gen_code oc (t : t) =
|
||||||
|
let (named_transitions, named_transitions_eoi) = Sharing.share t in
|
||||||
|
let pr fmt = Out_channel.fprintf oc (Caml.(^^) fmt "\n") in
|
||||||
|
pr "(* generated by %s *)" Caml.Sys.argv.(0);
|
||||||
|
pr "";
|
||||||
|
pr "open Parser_automaton_internal";
|
||||||
|
pr "";
|
||||||
|
pr "let raise = Parser_automaton_internal.Error.raise";
|
||||||
|
pr "";
|
||||||
|
let ordered_ids tbl =
|
||||||
|
Hashtbl.fold tbl ~init:[] ~f:(fun ~key:x ~data:id acc -> (id, x) :: acc)
|
||||||
|
|> List.sort ~cmp:(fun (id1, _) (id2, _) -> compare id1 id2)
|
||||||
|
in
|
||||||
|
List.iter (ordered_ids named_transitions)
|
||||||
|
~f:(fun (id, tr) ->
|
||||||
|
match tr with
|
||||||
|
| Error error ->
|
||||||
|
pr "let tr_%02d _state _char _stack =" id;
|
||||||
|
pr " raise _state ~at_eof:false %s" (Error.to_string error)
|
||||||
|
| Ok { action = (eps_actions, action); goto; advance } ->
|
||||||
|
let eps_actions =
|
||||||
|
List.filter_map ~f:Epsilon_action.to_runtime_function eps_actions
|
||||||
|
in
|
||||||
|
let action = Action.to_runtime_function action in
|
||||||
|
pr "let tr_%02d state %schar stack =" id
|
||||||
|
(if Option.is_none action &&
|
||||||
|
not ([%compare.equal: goto_state] goto End_block_comment) then
|
||||||
|
"_"
|
||||||
|
else
|
||||||
|
"");
|
||||||
|
List.iter eps_actions ~f:(pr " let stack = %s state stack in");
|
||||||
|
(match action with
|
||||||
|
| None -> ()
|
||||||
|
| Some s -> pr " let stack = %s state char stack in" s);
|
||||||
|
(match goto with
|
||||||
|
| State n -> pr " set_automaton_state state %d;" n
|
||||||
|
| End_block_comment ->
|
||||||
|
pr " let stack = end_block_comment state char stack in";
|
||||||
|
pr " set_automaton_state state \
|
||||||
|
(if block_comment_depth state <> 0 then %d else %d);"
|
||||||
|
(State.to_int (Block_comment Normal)) (State.to_int Whitespace));
|
||||||
|
pr " %s state;"
|
||||||
|
(match advance with
|
||||||
|
| Advance -> "advance"
|
||||||
|
| Advance_eol -> "advance_eol");
|
||||||
|
pr " stack"
|
||||||
|
);
|
||||||
|
pr "";
|
||||||
|
List.iter (ordered_ids named_transitions_eoi) ~f:(fun (id, tr) ->
|
||||||
|
match tr with
|
||||||
|
| Error error ->
|
||||||
|
pr "let tr_eoi_%02d state _stack =" id;
|
||||||
|
pr " raise state ~at_eof:true %s" (Error.to_string error)
|
||||||
|
| Ok eps_actions ->
|
||||||
|
pr "let tr_eoi_%02d state stack =" id;
|
||||||
|
let eps_actions =
|
||||||
|
List.filter_map eps_actions ~f:Epsilon_action.to_runtime_function
|
||||||
|
in
|
||||||
|
List.iter eps_actions ~f:(pr " let stack = %s state stack in");
|
||||||
|
pr " eps_eoi_check state stack");
|
||||||
|
pr "";
|
||||||
|
let pr_table ?(per_line=1) suffix tbl ids =
|
||||||
|
pr "let transitions%s =" suffix;
|
||||||
|
let len = Array.length tbl in
|
||||||
|
let lines = len / per_line in
|
||||||
|
assert (per_line * lines = len);
|
||||||
|
for l = 0 to lines - 1 do
|
||||||
|
Out_channel.fprintf oc (if l = 0 then " [|" else " ;");
|
||||||
|
for col = 0 to per_line - 1 do
|
||||||
|
if col > 0 then Out_channel.fprintf oc ";";
|
||||||
|
let i = l * per_line + col in
|
||||||
|
Out_channel.fprintf oc " tr%s_%02d" suffix (Hashtbl.find_exn ids tbl.(i))
|
||||||
|
done;
|
||||||
|
Out_channel.fprintf oc "\n"
|
||||||
|
done;
|
||||||
|
pr " |]";
|
||||||
|
pr "";
|
||||||
|
in
|
||||||
|
pr_table "" t.transitions named_transitions ~per_line:8;
|
||||||
|
pr_table "_eoi" t.transitions_eoi named_transitions_eoi
|
||||||
|
|
||||||
|
let () = gen_code Caml.stdout table
|
|
@ -0,0 +1,4 @@
|
||||||
|
(executable
|
||||||
|
((name gen_parser_automaton)
|
||||||
|
(libraries (base stdio gen_parsexp_lib))
|
||||||
|
(preprocess (pps (ppx_sexp_conv ppx_compare)))))
|
|
@ -0,0 +1,383 @@
|
||||||
|
(* Abstract version of the parsing automaton.
|
||||||
|
|
||||||
|
It is used in two places:
|
||||||
|
|
||||||
|
- to define the automaton. At runtime, we only use integer for states and a table of
|
||||||
|
functions for transitions
|
||||||
|
|
||||||
|
- for tests
|
||||||
|
*)
|
||||||
|
|
||||||
|
open Base
|
||||||
|
|
||||||
|
module State = struct
|
||||||
|
module Quoted_string = struct
|
||||||
|
type t =
|
||||||
|
| Normal
|
||||||
|
| After_backslash
|
||||||
|
| After_backslash_cr
|
||||||
|
| After_backslash_digit
|
||||||
|
| After_backslash_2digits
|
||||||
|
| After_backslash_x
|
||||||
|
| After_backslash_x_hex
|
||||||
|
| Ignoring_blanks
|
||||||
|
[@@deriving enumerate, compare, sexp_of]
|
||||||
|
end
|
||||||
|
|
||||||
|
module Block_comment = struct
|
||||||
|
type t =
|
||||||
|
| Normal
|
||||||
|
| After_pipe
|
||||||
|
| After_hash
|
||||||
|
| Quoted_string of Quoted_string.t
|
||||||
|
[@@deriving enumerate, compare, sexp_of]
|
||||||
|
end
|
||||||
|
|
||||||
|
module Unquoted_string = struct
|
||||||
|
type t =
|
||||||
|
| Normal
|
||||||
|
| After_hash
|
||||||
|
| After_pipe
|
||||||
|
[@@deriving enumerate, compare, sexp_of]
|
||||||
|
end
|
||||||
|
|
||||||
|
type t =
|
||||||
|
| Whitespace
|
||||||
|
| Error
|
||||||
|
| After_cr
|
||||||
|
| Unquoted_string of Unquoted_string.t
|
||||||
|
| Line_comment
|
||||||
|
| After_hash
|
||||||
|
| Quoted_string of Quoted_string.t
|
||||||
|
| Block_comment of Block_comment.t
|
||||||
|
[@@deriving enumerate, compare, sexp_of]
|
||||||
|
|
||||||
|
let to_int t =
|
||||||
|
let rec loop i t l =
|
||||||
|
match l with
|
||||||
|
| [] -> assert false
|
||||||
|
| x :: l -> if [%compare.equal: t] t x then i else loop (i + 1) t l
|
||||||
|
in
|
||||||
|
loop 0 t all
|
||||||
|
|
||||||
|
let of_int i = List.nth_exn all i
|
||||||
|
|
||||||
|
let count = List.length all
|
||||||
|
|
||||||
|
let initial = to_int Whitespace
|
||||||
|
let () = assert (initial = 0) (* This is assumed in parser_automaton_internal.ml *)
|
||||||
|
let () = assert (to_int Error = 1) (* This is assumed in parser_automaton_internal.ml *)
|
||||||
|
|
||||||
|
let old_parser_approx_cont_state = function
|
||||||
|
| Whitespace -> "Parsing_toplevel_whitespace"
|
||||||
|
| After_cr -> "Parsing_nested_whitespace"
|
||||||
|
| Unquoted_string _
|
||||||
|
| Quoted_string _ -> "Parsing_atom"
|
||||||
|
| After_hash -> "Parsing_atom"
|
||||||
|
| Block_comment _ -> "Parsing_block_comment"
|
||||||
|
| Line_comment -> "Parsing_toplevel_whitespace"
|
||||||
|
(* This cannot happen with the old parser so the result is a dummy value *)
|
||||||
|
| Error -> "Parsing_toplevel_whitespace"
|
||||||
|
end
|
||||||
|
|
||||||
|
module Error = struct
|
||||||
|
(* Subset of the [Parser_automaton_internal.Public.Error.Reason.t] type *)
|
||||||
|
type t =
|
||||||
|
| Unexpected_char_parsing_hex_escape
|
||||||
|
| Unexpected_char_parsing_dec_escape
|
||||||
|
| Unterminated_quoted_string
|
||||||
|
| Unterminated_block_comment
|
||||||
|
| Comment_token_in_unquoted_atom
|
||||||
|
| Unexpected_character_after_cr
|
||||||
|
| Automaton_in_error_state
|
||||||
|
[@@deriving compare, sexp_of, hash, variants]
|
||||||
|
|
||||||
|
let to_string = Variants.to_name
|
||||||
|
end
|
||||||
|
|
||||||
|
(* Action associated to transitions. Actions correspond to the similarly named functions
|
||||||
|
in ../parser_automaton_internal.mli. *)
|
||||||
|
module Action = struct
|
||||||
|
|
||||||
|
type t =
|
||||||
|
| Nop
|
||||||
|
| Opening
|
||||||
|
| Closing
|
||||||
|
| Add_atom_char
|
||||||
|
| Add_quoted_atom_char
|
||||||
|
| Add_first_char
|
||||||
|
| Add_escaped
|
||||||
|
| Add_hex_escape_char
|
||||||
|
| Add_dec_escape_char
|
||||||
|
| Add_last_hex_escape_char
|
||||||
|
| Add_last_dec_escape_char
|
||||||
|
| Add_token_char
|
||||||
|
| Comment_add_last_dec_escape_char
|
||||||
|
| Push_quoted_atom
|
||||||
|
| Start_quoted_string
|
||||||
|
| Start_block_comment
|
||||||
|
| Start_sexp_comment
|
||||||
|
| Start_line_comment
|
||||||
|
[@@deriving compare, sexp_of, hash, variants]
|
||||||
|
|
||||||
|
let to_runtime_function = function
|
||||||
|
| Nop -> None
|
||||||
|
| t -> Some (String.uncapitalize (Variants.to_name t))
|
||||||
|
end
|
||||||
|
|
||||||
|
(* Action associated to epsilon transitions, i.e. transitions that do not consume a
|
||||||
|
character.
|
||||||
|
|
||||||
|
Having epsilon actions makes the definition of the automaton much simpler. *)
|
||||||
|
module Epsilon_action = struct
|
||||||
|
type t =
|
||||||
|
| Nop
|
||||||
|
| Push_atom
|
||||||
|
| Add_first_char_hash
|
||||||
|
| Add_escaped_cr
|
||||||
|
| End_line_comment
|
||||||
|
[@@deriving compare, sexp_of, hash, variants]
|
||||||
|
|
||||||
|
let to_runtime_function = function
|
||||||
|
| Nop -> None
|
||||||
|
| End_line_comment -> Some "end_line_comment"
|
||||||
|
| t -> Some ("eps_" ^ String.uncapitalize (Variants.to_name t))
|
||||||
|
end
|
||||||
|
|
||||||
|
module Transition = struct
|
||||||
|
type t =
|
||||||
|
| T of Action.t * State.t
|
||||||
|
| E of Epsilon_action.t * State.t
|
||||||
|
| Error of Error.t
|
||||||
|
| End_block_comment (* can't be a normal transition, as the new state isn't known
|
||||||
|
statically *)
|
||||||
|
[@@deriving compare]
|
||||||
|
end
|
||||||
|
|
||||||
|
module Final_transition = struct
|
||||||
|
type t =
|
||||||
|
| Eoi_check
|
||||||
|
| E of Epsilon_action.t * State.t
|
||||||
|
| Error of Error.t
|
||||||
|
end
|
||||||
|
|
||||||
|
module type Automaton = sig
|
||||||
|
val transition : State.t * char -> Transition.t
|
||||||
|
val transition_eoi : State.t -> Final_transition.t
|
||||||
|
end
|
||||||
|
|
||||||
|
(* Definition of the automaton, compiled later to a transition table. *)
|
||||||
|
module Automaton : Automaton = struct
|
||||||
|
module Quoted_string_transition = struct
|
||||||
|
type t =
|
||||||
|
| T of Action.t * State.Quoted_string.t
|
||||||
|
| E of Epsilon_action.t * State.Quoted_string.t
|
||||||
|
| Error of Error.t
|
||||||
|
| End_of_quoted_string
|
||||||
|
end
|
||||||
|
|
||||||
|
type context = In_block_comment | In_atom
|
||||||
|
|
||||||
|
let quoted_string_transition
|
||||||
|
: context -> State.Quoted_string.t * char -> Quoted_string_transition.t
|
||||||
|
= fun context x ->
|
||||||
|
(* Distinguising atom and block comments is to optimize block comments. But
|
||||||
|
we musn't optimize the exception on things like \321. *)
|
||||||
|
let if_atom then_ else_ : Action.t =
|
||||||
|
match context with
|
||||||
|
| In_atom -> then_
|
||||||
|
| In_block_comment -> else_
|
||||||
|
in
|
||||||
|
let if_atom_eps then_ else_ : Epsilon_action.t =
|
||||||
|
match context with
|
||||||
|
| In_atom -> then_
|
||||||
|
| In_block_comment -> else_
|
||||||
|
in
|
||||||
|
match x with
|
||||||
|
| Normal, '"' -> End_of_quoted_string
|
||||||
|
| Normal, '\\' -> T (Add_token_char, After_backslash)
|
||||||
|
| Normal, _ -> T (if_atom Add_quoted_atom_char Add_token_char, Normal)
|
||||||
|
|
||||||
|
| After_backslash, '\n' ->
|
||||||
|
T (Add_token_char, Ignoring_blanks)
|
||||||
|
| After_backslash, '\r' ->
|
||||||
|
T (Add_token_char, After_backslash_cr)
|
||||||
|
| After_backslash, 'x' ->
|
||||||
|
T (Add_token_char, After_backslash_x)
|
||||||
|
| After_backslash, '0'..'9' ->
|
||||||
|
T (Add_dec_escape_char, After_backslash_digit)
|
||||||
|
| After_backslash, _ ->
|
||||||
|
T (if_atom Add_escaped Add_token_char, Normal)
|
||||||
|
|
||||||
|
| After_backslash_cr, '\n' ->
|
||||||
|
T (Add_token_char, Ignoring_blanks)
|
||||||
|
| After_backslash_cr, _ ->
|
||||||
|
E (if_atom_eps Add_escaped_cr Nop, Normal)
|
||||||
|
|
||||||
|
| After_backslash_x, ('0'..'9' | 'a'..'f' | 'A'..'F') ->
|
||||||
|
T (if_atom Add_hex_escape_char Add_token_char , After_backslash_x_hex)
|
||||||
|
| After_backslash_x, _ ->
|
||||||
|
Error Unexpected_char_parsing_hex_escape
|
||||||
|
|
||||||
|
| After_backslash_x_hex, ('0'..'9' | 'a'..'f' | 'A'..'F') ->
|
||||||
|
T (if_atom Add_last_hex_escape_char Add_token_char, Normal)
|
||||||
|
| After_backslash_x_hex, _ ->
|
||||||
|
Error Unexpected_char_parsing_hex_escape
|
||||||
|
|
||||||
|
| After_backslash_digit, '0'..'9' ->
|
||||||
|
T (Add_dec_escape_char, After_backslash_2digits)
|
||||||
|
| After_backslash_digit, _ ->
|
||||||
|
Error Unexpected_char_parsing_dec_escape
|
||||||
|
|
||||||
|
| After_backslash_2digits, '0'..'9' ->
|
||||||
|
T (if_atom Add_last_dec_escape_char Comment_add_last_dec_escape_char, Normal)
|
||||||
|
| After_backslash_2digits, _ ->
|
||||||
|
Error Unexpected_char_parsing_dec_escape
|
||||||
|
|
||||||
|
| Ignoring_blanks, (' '|'\t') ->
|
||||||
|
T (Add_token_char, Ignoring_blanks)
|
||||||
|
| Ignoring_blanks, _ ->
|
||||||
|
E (Nop, Normal)
|
||||||
|
|
||||||
|
module Block_comment_transition = struct
|
||||||
|
type t =
|
||||||
|
| T of Action.t * State.Block_comment.t
|
||||||
|
| E of Epsilon_action.t * State.Block_comment.t
|
||||||
|
| Error of Error.t
|
||||||
|
| End_comment
|
||||||
|
end
|
||||||
|
|
||||||
|
let block_comment_transition
|
||||||
|
: State.Block_comment.t * char -> Block_comment_transition.t
|
||||||
|
= function
|
||||||
|
| Quoted_string state, c -> (
|
||||||
|
match quoted_string_transition In_block_comment (state, c) with
|
||||||
|
| End_of_quoted_string -> T (Add_token_char, Normal)
|
||||||
|
| T (action, state) -> T (action, Quoted_string state)
|
||||||
|
| E (action, state) -> E (action, Quoted_string state)
|
||||||
|
| Error error -> Error error
|
||||||
|
)
|
||||||
|
| After_hash, '|' -> T (Start_block_comment, Normal)
|
||||||
|
| After_pipe, '#' -> End_comment
|
||||||
|
| _, '"' -> T (Add_token_char, Quoted_string Normal)
|
||||||
|
| _, '|' -> T (Add_token_char, After_pipe)
|
||||||
|
| _, '#' -> T (Add_token_char, After_hash)
|
||||||
|
| _, _ -> T (Add_token_char, Normal)
|
||||||
|
|
||||||
|
let transition : State.t * char -> Transition.t = function
|
||||||
|
| Whitespace, '(' -> T (Opening, Whitespace)
|
||||||
|
| Whitespace, ')' -> T (Closing, Whitespace)
|
||||||
|
| Whitespace, '\r' -> T (Nop, After_cr)
|
||||||
|
| Whitespace, (' ' | '\t' | '\012' | '\n') -> T (Nop, Whitespace)
|
||||||
|
| Whitespace, ';' -> T (Start_line_comment, Line_comment)
|
||||||
|
| Whitespace, '"' -> T (Start_quoted_string, Quoted_string (Normal))
|
||||||
|
| Whitespace, '#' -> T (Nop, After_hash)
|
||||||
|
| Whitespace, '|' -> T (Add_first_char, Unquoted_string After_pipe)
|
||||||
|
| Whitespace, _ -> T (Add_first_char, Unquoted_string Normal)
|
||||||
|
|
||||||
|
| After_cr, '\n' -> T (Nop, Whitespace)
|
||||||
|
| After_cr, _ -> Error Unexpected_character_after_cr
|
||||||
|
|
||||||
|
| Unquoted_string _, (';'|'('|')'|'"'|' '|'\t'|'\012'|'\r'|'\n') ->
|
||||||
|
E (Push_atom, Whitespace)
|
||||||
|
| Unquoted_string After_hash, '|'
|
||||||
|
| Unquoted_string After_pipe, '#' -> Error Comment_token_in_unquoted_atom
|
||||||
|
| Unquoted_string _, '#' -> T (Add_atom_char, Unquoted_string After_hash)
|
||||||
|
| Unquoted_string _, '|' -> T (Add_atom_char, Unquoted_string After_pipe)
|
||||||
|
| Unquoted_string _, _ -> T (Add_atom_char, Unquoted_string Normal)
|
||||||
|
|
||||||
|
| Line_comment, ('\r' | '\n') -> E (End_line_comment, Whitespace)
|
||||||
|
| Line_comment, _ -> T (Add_token_char, Line_comment)
|
||||||
|
|
||||||
|
| After_hash, ';' -> T (Start_sexp_comment, Whitespace)
|
||||||
|
| After_hash, '|' -> T (Start_block_comment, Block_comment Normal)
|
||||||
|
| After_hash, _ -> E (Add_first_char_hash, Unquoted_string Normal)
|
||||||
|
|
||||||
|
| Quoted_string state, c -> (
|
||||||
|
match quoted_string_transition In_atom (state, c) with
|
||||||
|
| End_of_quoted_string -> T (Push_quoted_atom, Whitespace)
|
||||||
|
| T (action, state) -> T (action, Quoted_string state)
|
||||||
|
| E (action, state) -> E (action, Quoted_string state)
|
||||||
|
| Error error -> Error error
|
||||||
|
)
|
||||||
|
|
||||||
|
| Block_comment state, c -> (
|
||||||
|
match block_comment_transition (state, c) with
|
||||||
|
| T (action, state) -> T (action, Block_comment state)
|
||||||
|
| E (action, state) -> E (action, Block_comment state)
|
||||||
|
| End_comment -> End_block_comment
|
||||||
|
| Error error -> Error error
|
||||||
|
)
|
||||||
|
|
||||||
|
| Error, _ -> Error Automaton_in_error_state
|
||||||
|
|
||||||
|
let transition_eoi : State.t -> Final_transition.t = function
|
||||||
|
| Whitespace -> Eoi_check
|
||||||
|
| After_cr -> Error Unexpected_character_after_cr
|
||||||
|
| Unquoted_string _ -> E (Push_atom, Whitespace)
|
||||||
|
| Line_comment -> E (End_line_comment, Whitespace)
|
||||||
|
| After_hash -> E (Add_first_char_hash, Unquoted_string Normal)
|
||||||
|
| Quoted_string _ -> Error Unterminated_quoted_string
|
||||||
|
| Block_comment _ -> Error Unterminated_block_comment
|
||||||
|
| Error -> Error Automaton_in_error_state
|
||||||
|
end
|
||||||
|
|
||||||
|
module Table = struct
|
||||||
|
type action = Epsilon_action.t list * Action.t [@@deriving compare, sexp_of, hash]
|
||||||
|
type goto_state = State of int | End_block_comment [@@deriving compare, sexp_of, hash]
|
||||||
|
type advance = Advance | Advance_eol [@@deriving compare, sexp_of, hash]
|
||||||
|
type transition = { action : action; goto : goto_state; advance : advance }
|
||||||
|
[@@deriving compare, sexp_of, hash]
|
||||||
|
type 'a or_error =
|
||||||
|
| Ok of 'a
|
||||||
|
| Error of Error.t
|
||||||
|
[@@deriving compare, sexp_of, hash]
|
||||||
|
|
||||||
|
type t =
|
||||||
|
{ transitions : transition or_error array
|
||||||
|
; transitions_eoi : Epsilon_action.t list or_error array
|
||||||
|
}
|
||||||
|
|
||||||
|
let advance = function
|
||||||
|
| '\n' -> Advance_eol
|
||||||
|
| _ -> Advance
|
||||||
|
|
||||||
|
let compile (module A : Automaton) =
|
||||||
|
let rec squash acc state c =
|
||||||
|
match A.transition (state, c) with
|
||||||
|
| T (action, state) -> Ok { action = (List.rev acc, action)
|
||||||
|
; goto = State (State.to_int state)
|
||||||
|
; advance = advance c
|
||||||
|
}
|
||||||
|
| E (action, state) -> squash (action :: acc) state c
|
||||||
|
| Error error -> Error error
|
||||||
|
| End_block_comment -> Ok { action = (List.rev acc, Nop)
|
||||||
|
; goto = End_block_comment
|
||||||
|
; advance = advance c
|
||||||
|
}
|
||||||
|
in
|
||||||
|
let rec squash_eoi acc state =
|
||||||
|
match A.transition_eoi state with
|
||||||
|
| Eoi_check -> Ok (List.rev acc)
|
||||||
|
| E (eps_action, state) -> squash_eoi (eps_action :: acc) state
|
||||||
|
| Error error -> Error error
|
||||||
|
in
|
||||||
|
let transitions =
|
||||||
|
Array.create ~len:(State.count * 256)
|
||||||
|
(Ok { action = ([], Action.Nop)
|
||||||
|
; goto = State 0
|
||||||
|
; advance = Advance
|
||||||
|
})
|
||||||
|
in
|
||||||
|
let transitions_eoi = Array.create (Ok []) ~len:State.count in
|
||||||
|
for s = 0 to State.count - 1 do
|
||||||
|
let state = State.of_int s in
|
||||||
|
for c = 0 to 255 do
|
||||||
|
transitions.(s * 256 + c) <- squash [] state (Char.of_int_exn c);
|
||||||
|
done;
|
||||||
|
transitions_eoi.(s) <- squash_eoi [] state
|
||||||
|
done;
|
||||||
|
{ transitions; transitions_eoi }
|
||||||
|
end
|
||||||
|
|
||||||
|
let table = Table.compile (module Automaton)
|
|
@ -0,0 +1,10 @@
|
||||||
|
(library
|
||||||
|
((name gen_parsexp_lib)
|
||||||
|
(libraries (base))
|
||||||
|
(preprocess (pps (ppx_sexp_conv
|
||||||
|
ppx_compare
|
||||||
|
ppx_enumerate
|
||||||
|
ppx_hash
|
||||||
|
ppx_variants_conv)))))
|
||||||
|
|
||||||
|
(jbuild_version 1)
|
|
@ -1,3 +1,10 @@
|
||||||
(jbuild_version 1)
|
(jbuild_version 1)
|
||||||
|
|
||||||
(library ((name usexp)))
|
(library ((name usexp)))
|
||||||
|
|
||||||
|
(rule
|
||||||
|
(with-stdout-to table.ml.gen (run gen/gen_parser_automaton.exe)))
|
||||||
|
|
||||||
|
(alias
|
||||||
|
((name update-sexp-parser)
|
||||||
|
(action (diff table.ml table.ml.gen))))
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
(* This file was copied and adapted from the src/parser_automaton.ml file of
|
(* generated by ./gen/gen_parser_automaton.exe *)
|
||||||
https://github.com/janestreet/parsexp *)
|
|
||||||
|
|
||||||
open Parser_automaton_internal
|
open Parser_automaton_internal
|
||||||
|
|
||||||
|
@ -1317,3 +1316,4 @@ let transitions_eoi =
|
||||||
; tr_eoi_07
|
; tr_eoi_07
|
||||||
; tr_eoi_07
|
; tr_eoi_07
|
||||||
|]
|
|]
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue