From 9734b2e6d04d2f487cc8cf427e9becf052d79019 Mon Sep 17 00:00:00 2001 From: Jeremie Dimino Date: Sat, 14 Jul 2018 03:58:23 +0100 Subject: [PATCH] Make the preprocessor more generic Signed-off-by: Jeremie Dimino --- src/let-syntax/dune | 2 +- src/let-syntax/lexer.mll | 192 +++++++++++++++++++++++++++++++++++++++ src/let-syntax/pp.mll | 186 ------------------------------------- 3 files changed, 193 insertions(+), 187 deletions(-) create mode 100644 src/let-syntax/lexer.mll delete mode 100644 src/let-syntax/pp.mll diff --git a/src/let-syntax/dune b/src/let-syntax/dune index 0d7b1143..8185cce5 100644 --- a/src/let-syntax/dune +++ b/src/let-syntax/dune @@ -1,2 +1,2 @@ (executable (name pp)) -(ocamllex pp) +(ocamllex lexer) diff --git a/src/let-syntax/lexer.mll b/src/let-syntax/lexer.mll new file mode 100644 index 00000000..d7d16588 --- /dev/null +++ b/src/let-syntax/lexer.mll @@ -0,0 +1,192 @@ +{ +open StdLabels +open Printf + +type mode = Generated_code | Source_code + +type t = + { fname : string + ; output_fname : string + ; (* Line number in generated file *) + mutable line : int + ; oc : out_channel + ; mutable mode : mode + ; buf : Buffer.t + } + +let lexeme_len lb = + Lexing.lexeme_end lb - Lexing.lexeme_start lb + +let fail t lb msg = + let start = Lexing.lexeme_start_p lb in + let stop = Lexing.lexeme_end_p lb in + Printf.eprintf + "File %S, line %d, characters %d-%d:\n\ + Error: %s\n%!" + t.fname start.pos_lnum (start.pos_cnum - start.pos_bol) + (stop.pos_cnum - start.pos_bol) + msg; + exit 1 + +let ps t s = + String.iter s ~f:(function + | '\n' -> t.line <- t.line + 1 + | _ -> ()); + output_string t.oc s +let pc t = function + | '\n' -> t.line <- t.line + 1; output_char t.oc '\n' + | c -> output_char t.oc c +let npc t n c = for _ = 1 to n do pc t c done +let pf t fmt = ksprintf (ps t) fmt + +let enter_generated_code t = + t.mode <- Generated_code; + pc t '\n'; + pf t "# %d %S\n" t.line t.output_fname + +let enter_source_code t (pos : Lexing.position) = + t.mode <- Source_code; + pc t '\n'; + pf t "# %d %S\n" pos.pos_lnum t.fname; + let col = pos.pos_cnum - pos.pos_bol in + npc t col ' ' + +let pass_through t lb = + if t.mode = Generated_code then + enter_source_code t (Lexing.lexeme_start_p lb); + ps t (Lexing.lexeme lb) + +type and_or_in = And | In +} + +let space = [' ' '\t'] +let newline = '\n' | "\r\n" +let id = ['a'-'z' '_'] ['A'-'Z' 'a'-'z' '_' '\'' '0'-'9']* + +rule main t col = parse + | eof + { In } + | newline + { pass_through t lexbuf; + Lexing.new_line lexbuf; + after_newline t col lexbuf + } + | " in" newline + { pass_through t lexbuf; + Lexing.new_line lexbuf; + after_in_and_newline t col lexbuf + } + | _ + { pass_through t lexbuf; + main t col lexbuf + } + | id as id + { pass_through t lexbuf; + match id with + | "let" -> after_let t col (Lexing.lexeme_start_p lexbuf) lexbuf + | _ -> main t col lexbuf + } + +and after_let t col pos = parse + | "%" (id as id) + { if id <> "map" then begin + pass_through t lexbuf; + main t col lexbuf + end else begin + let col' = pos.Lexing.pos_cnum - pos.Lexing.pos_bol in + let rec loop i = + let pos = Lexing.lexeme_end_p lexbuf in + enter_generated_code t; + let id = sprintf "__x%d__" i in + ps t id; + lhs t lexbuf; + let pattern = Buffer.contents t.buf in + Buffer.clear t.buf; + (id, (pos, pattern)) :: + match main t col' lexbuf with + | And -> loop (i + 1) + | In -> [] + in + let ids, patterns = List.split (loop 1) in + enter_generated_code t; + ps t "Let_syntax.(fun f -> const f"; + List.iter ids ~f:(pf t "$ %s"); + ps t ") @@ fun "; + List.iter patterns ~f:(fun (pos, pattern) -> + pc t '('; + enter_source_code t pos; + ps t pattern; + pc t ')'); + ps t "->"; + t.mode <- Generated_code; + main t col lexbuf + end + } + | "" + { main t col lexbuf } + +and after_newline t col = parse + | space* + { pass_through t lexbuf; + if lexeme_len lexbuf = col then + after_indent t col lexbuf + else + main t col lexbuf + } + +and after_indent t col = parse + | id as id + { pass_through t lexbuf; + match id with + | "and" -> And + | "in" -> In + | _ -> fail t lexbuf "'and' or 'in' keyword expected" + } + +and after_in_and_newline t col = parse + | space* newline + { pass_through t lexbuf; + Lexing.new_line lexbuf; + after_in_and_newline t col lexbuf + } + | space* + { pass_through t lexbuf; + if lexeme_len lexbuf = col then + In + else + main t col lexbuf + } + +and lhs t = parse + | eof + { () + } + | newline as s + { Buffer.add_string t.buf s; + Lexing.new_line lexbuf; + lhs t lexbuf + } + | "=" + { pass_through t lexbuf + } + | _ as c + { Buffer.add_char t.buf c; + lhs t lexbuf + } + +{ + let process_file ~fname ~output_fname ~oc = + let (And | In) = + let t = + { fname + ; output_fname + ; line = 1 + ; oc + ; mode = Generated_code + ; buf = Buffer.create 512 + } + in + main t (-1) (Lexing.from_channel (open_in_bin fname)) + in + () +} diff --git a/src/let-syntax/pp.mll b/src/let-syntax/pp.mll deleted file mode 100644 index 560460b2..00000000 --- a/src/let-syntax/pp.mll +++ /dev/null @@ -1,186 +0,0 @@ -{ -open StdLabels -open Printf - -let fname = Sys.argv.(1) -let fname_gen = fname ^ ".generated" - -let lexeme_len lb = - Lexing.lexeme_end lb - Lexing.lexeme_start lb - -let fail lb msg = - let start = Lexing.lexeme_start_p lb in - let stop = Lexing.lexeme_end_p lb in - Printf.eprintf - "File %S, line %d, characters %d-%d:\n\ - Error: %s\n%!" - fname start.pos_lnum (start.pos_cnum - start.pos_bol) - (stop.pos_cnum - start.pos_bol) - msg; - exit 1 - -(* Line number in generated file *) -let lnum = ref 1 - -let ps s = - String.iter s ~f:(function - | '\n' -> incr lnum - | _ -> ()); - print_string s -let pc = function - | '\n' -> incr lnum; print_char '\n' - | c -> print_char c -let npc n c = for _ = 1 to n do pc c done -let pf fmt = ksprintf ps fmt - -let gen_id = - let n = ref 0 in - fun () -> - incr n; - sprintf "__x%d__" !n - -let buf = Buffer.create 512 -let add_lexeme lb = Buffer.add_string buf (Lexing.lexeme lb) - -type mode = Generated_code | Source_code - -let mode = ref Generated_code - -let enter_generated_code () = - mode := Generated_code; - pc '\n'; - pf "# %d %S\n" !lnum fname_gen - -let enter_source_code (pos : Lexing.position) = - mode := Source_code; - pc '\n'; - pf "# %d %S\n" pos.pos_lnum fname; - let col = pos.pos_cnum - pos.pos_bol in - npc col ' ' - -let pass_through lb = - if !mode = Generated_code then - enter_source_code (Lexing.lexeme_start_p lb); - ps (Lexing.lexeme lb) - -type and_or_in = And | In -} - -let space = [' ' '\t'] -let newline = '\n' | "\r\n" -let id = ['a'-'z' '_'] ['A'-'Z' 'a'-'z' '_' '\'' '0'-'9']* - -rule main col = parse - | eof - { In } - | newline - { pass_through lexbuf; - Lexing.new_line lexbuf; - after_newline col lexbuf - } - | " in" newline - { pass_through lexbuf; - Lexing.new_line lexbuf; - after_in_and_newline col lexbuf - } - | _ - { pass_through lexbuf; - main col lexbuf - } - | id as id - { pass_through lexbuf; - match id with - | "let" -> after_let col (Lexing.lexeme_start_p lexbuf) lexbuf - | _ -> main col lexbuf - } - -and after_let col pos = parse - | "%" (id as id) - { if id <> "map" then begin - pass_through lexbuf; - main col lexbuf - end else begin - let col' = pos.Lexing.pos_cnum - pos.Lexing.pos_bol in - let rec loop () = - let pos = Lexing.lexeme_end_p lexbuf in - enter_generated_code (); - let id = gen_id () in - ps id; - lhs lexbuf; - let pattern = Buffer.contents buf in - Buffer.clear buf; - (id, (pos, pattern)) :: - match main col' lexbuf with - | And -> loop () - | In -> [] - in - let ids, patterns = List.split (loop ()) in - enter_generated_code (); - ps "Let_syntax.(fun f -> const f"; - List.iter ids ~f:(pf "$ %s"); - ps ") @@ fun "; - List.iter patterns ~f:(fun (pos, pattern) -> - pc '('; - enter_source_code pos; - ps pattern; - pc ')'); - ps "->"; - mode := Generated_code; - main col lexbuf - end - } - | "" - { main col lexbuf } - -and after_newline col = parse - | space* - { pass_through lexbuf; - if lexeme_len lexbuf = col then - after_indent col lexbuf - else - main col lexbuf - } - -and after_indent col = parse - | id as id - { pass_through lexbuf; - match id with - | "and" -> And - | "in" -> In - | _ -> fail lexbuf "'and' or 'in' keyword expected" - } - -and after_in_and_newline col = parse - | space* newline - { pass_through lexbuf; - Lexing.new_line lexbuf; - after_in_and_newline col lexbuf - } - | space* - { pass_through lexbuf; - if lexeme_len lexbuf = col then - In - else - main col lexbuf - } - -and lhs = parse - | eof - { () - } - | newline as s - { Buffer.add_string buf s; - Lexing.new_line lexbuf; - lhs lexbuf - } - | "=" - { pass_through lexbuf - } - | _ as c - { Buffer.add_char buf c; - lhs lexbuf - } - -{ - let (And | In) = main (-1) (Lexing.from_channel (open_in_bin fname)) -}