forked from ocaml/ocaml
-
Notifications
You must be signed in to change notification settings - Fork 0
/
compact.ml
232 lines (198 loc) · 7.33 KB
/
compact.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
(**************************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1996 Institut National de Recherche en Informatique et *)
(* en Automatique. *)
(* *)
(* All rights reserved. This file is distributed under the terms of *)
(* the GNU Lesser General Public License version 2.1, with the *)
(* special exception on linking described in the file LICENSE. *)
(* *)
(**************************************************************************)
(* Compaction of an automata *)
open Lexgen
(* Code for memory actions *)
let code = Table.create 0
(* instructions are 2 8-bits integers, a 0xff byte means return *)
let emit_int i = Table.emit code i
let ins_mem i c = match i with
| Copy (dst, src) -> dst::src::c
| Set dst -> dst::0xff::c
let ins_tag i c = match i with
| SetTag (dst, src) -> dst::src::c
| EraseTag dst -> dst::0xff::c
let do_emit_code c =
let r = Table.size code in
List.iter emit_int c ;
emit_int 0xff ;
r
let memory = Hashtbl.create 101
let mem_emit_code c =
try Hashtbl.find memory c with
| Not_found ->
let r = do_emit_code c in
Hashtbl.add memory c r ;
r
(* Code address 0 is the empty code (ie do nothing) *)
let _ = mem_emit_code []
let emit_tag_code c = mem_emit_code (List.fold_right ins_tag c [])
and emit_mem_code c =mem_emit_code (List.fold_right ins_mem c [])
(*******************************************)
(* Compact the transition and check arrays *)
(*******************************************)
(* Determine the integer occurring most frequently in an array *)
let most_frequent_elt v =
let frequencies = Hashtbl.create 17 in
let max_freq = ref 0 in
let most_freq = ref (v.(0)) in
for i = 0 to Array.length v - 1 do
let e = v.(i) in
let r =
try
Hashtbl.find frequencies e
with Not_found ->
let r = ref 1 in Hashtbl.add frequencies e r; r in
incr r;
if !r > !max_freq then begin max_freq := !r; most_freq := e end
done;
!most_freq
(* Transform an array into a list of (position, non-default element) *)
let non_default_elements def v =
let rec nondef i =
if i >= Array.length v then [] else begin
let e = v.(i) in
if e = def then nondef(i+1) else (i, e) :: nondef(i+1)
end in
nondef 0
type t_compact =
{mutable c_trans : int array ;
mutable c_check : int array ;
mutable c_last_used : int ; }
let create_compact () =
{ c_trans = Array.make 1024 0 ;
c_check = Array.make 1024 (-1) ;
c_last_used = 0 ; }
let reset_compact c =
c.c_trans <- Array.make 1024 0 ;
c.c_check <- Array.make 1024 (-1) ;
c.c_last_used <- 0
(* One compacted table for transitions, one other for memory actions *)
let trans = create_compact ()
and moves = create_compact ()
let grow_compact c =
let old_trans = c.c_trans
and old_check = c.c_check in
let n = Array.length old_trans in
c.c_trans <- Array.make (2*n) 0;
Array.blit old_trans 0 c.c_trans 0 c.c_last_used;
c.c_check <- Array.make (2*n) (-1);
Array.blit old_check 0 c.c_check 0 c.c_last_used
let do_pack state_num orig compact =
let default = most_frequent_elt orig in
let nondef = non_default_elements default orig in
let rec pack_from b =
while
b + 257 > Array.length compact.c_trans
do
grow_compact compact
done;
let rec try_pack = function
[] -> b
| (pos, _v) :: rem ->
if compact.c_check.(b + pos) = -1 then
try_pack rem
else pack_from (b+1) in
try_pack nondef in
let base = pack_from 0 in
List.iter
(fun (pos, v) ->
compact.c_trans.(base + pos) <- v;
compact.c_check.(base + pos) <- state_num)
nondef;
if base + 257 > compact.c_last_used then
compact.c_last_used <- base + 257;
(base, default)
let pack_moves state_num move_t =
let move_v = Array.make 257 0
and move_m = Array.make 257 0 in
for i = 0 to 256 do
let act,c = move_t.(i) in
move_v.(i) <- (match act with Backtrack -> -1 | Goto n -> n) ;
move_m.(i) <- emit_mem_code c
done ;
let pk_trans = do_pack state_num move_v trans
and pk_moves = do_pack state_num move_m moves in
pk_trans, pk_moves
(* Build the tables *)
type lex_tables =
{ tbl_base: int array; (* Perform / Shift *)
tbl_backtrk: int array; (* No_remember / Remember *)
tbl_default: int array; (* Default transition *)
tbl_trans: int array; (* Transitions (compacted) *)
tbl_check: int array; (* Check (compacted) *)
(* code addresses are managed in a similar fashion as transitions *)
tbl_base_code : int array; (* code ptr / base for Shift *)
tbl_backtrk_code : int array; (* nothing / code when Remember *)
(* moves to execute before transitions (compacted) *)
tbl_default_code : int array;
tbl_trans_code : int array;
tbl_check_code : int array;
(* byte code itself *)
tbl_code: int array;}
let compact_tables state_v =
let n = Array.length state_v in
let base = Array.make n 0
and backtrk = Array.make n (-1)
and default = Array.make n 0
and base_code = Array.make n 0
and backtrk_code = Array.make n 0
and default_code = Array.make n 0 in
for i = 0 to n - 1 do
match state_v.(i) with
| Perform (n,c) ->
base.(i) <- -(n+1) ;
base_code.(i) <- emit_tag_code c
| Shift(trans, move) ->
begin match trans with
| No_remember -> ()
| Remember (n,c) ->
backtrk.(i) <- n ;
backtrk_code.(i) <- emit_tag_code c
end;
let (b_trans, d_trans),(b_moves,d_moves) = pack_moves i move in
base.(i) <- b_trans; default.(i) <- d_trans ;
base_code.(i) <- b_moves; default_code.(i) <- d_moves ;
done;
let code = Table.trim code in
let tables =
if Array.length code > 1 then
{ tbl_base = base;
tbl_backtrk = backtrk;
tbl_default = default;
tbl_trans = Array.sub trans.c_trans 0 trans.c_last_used;
tbl_check = Array.sub trans.c_check 0 trans.c_last_used;
tbl_base_code = base_code ;
tbl_backtrk_code = backtrk_code;
tbl_default_code = default_code;
tbl_trans_code = Array.sub moves.c_trans 0 moves.c_last_used;
tbl_check_code = Array.sub moves.c_check 0 moves.c_last_used;
tbl_code = code}
else (* when no memory moves, do not emit related tables *)
{ tbl_base = base;
tbl_backtrk = backtrk;
tbl_default = default;
tbl_trans = Array.sub trans.c_trans 0 trans.c_last_used;
tbl_check = Array.sub trans.c_check 0 trans.c_last_used;
tbl_base_code = [||] ;
tbl_backtrk_code = [||];
tbl_default_code = [||];
tbl_trans_code = [||];
tbl_check_code = [||];
tbl_code = [||]}
in
reset_compact trans ;
reset_compact moves ;
tables