forked from kyordhel/GPSRCmdGen
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Grammar.cs
441 lines (392 loc) · 14.7 KB
/
Grammar.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
namespace GPSRCmdGen
{
/// <summary>
/// A grammar which can be used to produce random sentences.
/// </summary>
public class Grammar : ITiered
{
#region Variables
/// <summary>
/// Stores the set of production rules (accessible by rule name)
/// </summary>
private Dictionary<string, ProductionRule> productionRules;
#endregion
#region Constructors
/// <summary>
/// Initializes a new instance of the <see cref="GPSRCmdGen.Grammar"/> class.
/// </summary>
public Grammar(){
this.productionRules = new Dictionary<string, ProductionRule> ();
this.Tier = DifficultyDegree.Unknown;
}
#endregion
#region Properties
/// <summary>
/// Gets or sets the name of the grammar.
/// </summary>
public string Name{ get; set; }
/// <summary>
/// Gets the difficulty degree (tier) of the grammar
/// </summary>
public DifficultyDegree Tier{ get; set; }
/// <summary>
/// Gets the set of production rules (accessible by rule name)
/// </summary>
internal Dictionary<string, ProductionRule> ProductionRules {
get{ return this.productionRules;}
}
#endregion
#region Methods
/// <summary>
/// Retrieves the non-terminal symbol within the input string pointed by cc
/// </summary>
/// <param name="s">S.</param>
/// <param name="cc">A read header that points to the first character at
/// the right of.</param>
/// <returns>The non-terminal symbol found.</returns>
private string FetchNonTerminal (string s, ref int cc)
{
char c;
int bcc = cc++;
while (cc < s.Length) {
c = s [cc];
if (((c >= '0') && (c <= '9')) || ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')) || (c == '_') )
++cc;
else
break;
}
return s.Substring (bcc, cc - bcc);
}
/// <summary>
/// Gets a random replacement for the provided non-terminal symbol.
/// </summary>
/// <param name="nonTerminal">The non-terminal symbol for which a
/// random replacement will be searched</param>
/// <param name="rnd">Random number generator used to choose a replacement</param>
/// <returns>A replacement string. If the non-terminal symbol does not
/// belong to the grammar or contains no productions, an empty string
/// is returned.</returns>
private string FindReplacement(string nonTerminal, Random rnd){
ProductionRule rule;
if (!this.productionRules.ContainsKey (nonTerminal))
return String.Empty;
rule = this.productionRules [nonTerminal];
int max = rule.Replacements.Count;
if (max < 1)
return String.Empty;
return rule.Replacements [rnd.Next (0, max)];
}
/// <summary>
/// Generates a random sentence.
/// </summary>
/// <returns>A randomly generated sentence.</returns>
public string GenerateSentence(){
return GenerateSentence (new Random (DateTime.Now.Millisecond));
}
/// <summary>
/// Generates a random sentence.
/// </summary>
/// <param name="rnd">Random number generator used to choose the
/// productions and generate the sentence</param>
/// <returns>A randomly generated sentence.</returns>
public string GenerateSentence(Random rnd){
string option = FindReplacement("$Main", rnd);
return SolveNonTerminals (option, rnd);
}
/// <summary>
/// Solves all the non-terminal symbols within the given sentence.
/// </summary>
/// <param name="sentence">A string with non-terminal symbols to replace.</param>
/// <param name="rnd">Random number generator used to choose the
/// productions and generate the sentence</param>
/// <returns>A string with terminal symbols only</returns>
private string SolveNonTerminals (string sentence, Random rnd)
{
return SolveNonTerminals(sentence, rnd, 0);
}
/// <summary>
/// Solves all the non-terminal symbols within the given sentence.
/// </summary>
/// <param name="sentence">A string with non-terminal symbols to replace.</param>
/// <param name="rnd">Random number generator used to choose the
/// productions and generate the sentence</param>
/// <param name="stackCounter">A counter that indicates how many times
/// this function has called itself. It is used to prevent a stack overflow.
/// When it reach 1000 the production is aborted.</param>
/// <returns>A string with terminal symbols only</returns>
/// <remarks>Recursive function</remarks>
private string SolveNonTerminals (string sentence, Random rnd, int stackCounter)
{
if (++stackCounter > 999)
throw new StackOverflowException ();
int bcc;
string replacement;
string nonTerminal;
// Search in sentence for non-terminals
int cc = 0;
while ( cc < sentence.Length) {
if (sentence [cc] != '$') {
++cc;
continue;
}
bcc = cc;
nonTerminal = FetchNonTerminal (sentence, ref cc);
replacement = FindReplacement (nonTerminal, rnd);
replacement = SolveNonTerminals (replacement, rnd, stackCounter);
sentence = sentence.Substring (0, bcc) + replacement + sentence.Substring (cc);
cc = bcc + replacement.Length;
}
return sentence;
}
/// <summary>
/// Returns a <see cref="System.String"/> that represents the current <see cref="GPSRCmdGen.Grammar"/>.
/// </summary>
/// <returns>A <see cref="System.String"/> that represents the current <see cref="GPSRCmdGen.Grammar"/>.</returns>
public override string ToString ()
{
return string.Format ("[Grammar: Name={0}, Tier={1}]", Name, Tier);
}
#endregion
#region Static variables
/// <summary>
/// Regular expression used to extract the name of a grammar
/// </summary>
private static readonly Regex rxGrammarNameXtractor;
/// <summary>
/// Regular expression used to extract the difficulty degree of a grammar
/// </summary>
private static readonly Regex rxGrammarTierXtractor;
#endregion
#region Static constructor
/// <summary>
/// Initializes the <see cref="GPSRCmdGen.Grammar"/> class.
/// </summary>
static Grammar(){
rxGrammarNameXtractor = new Regex(@"^\s*grammar\s+name\s+(?<name>.*)\s*", RegexOptions.Compiled | RegexOptions.IgnoreCase);
rxGrammarTierXtractor = new Regex(@"^\s*grammar\s+tier\s+(?<tier>\w+)\s*", RegexOptions.Compiled | RegexOptions.IgnoreCase);
}
#endregion
#region Static Methods
/// <summary>
/// Expands all production rules in a set of production rules.
/// Expansion consists in spliting all parentheses and OR operators in a
/// replacement into new production rules
/// </summary>
/// <param name="ruleDicc">A given rule dictionary having the non-terminal
/// symbol as key and the production rule as value.</param>
private static void ExpandRules(Dictionary<string, ProductionRule> ruleDicc)
{
List<ProductionRule> ruleList = new List<ProductionRule>(2 * ruleDicc.Count);
foreach (KeyValuePair<string,ProductionRule> p in ruleDicc)
ruleList.Add (p.Value);
for (int ix = 0; ix < ruleList.Count; ++ix) {
ExpandRule (ix, ruleList, ruleDicc);
}
}
/// <summary>
/// Expands the addressed production rule in a set of production rules.
/// Expansion consists in spliting all parentheses and OR operators in a
/// replacement into new production rules
/// </summary>
/// <param name="ix">The index of the rule to expand in the lsit of rules</param>
/// <param name="ruleList">The set of production rules of the grammar with
/// elements accessible by index in O(1).</param>
/// <param name="ruleDicc">The set of production rules of the grammar with
/// elements accessible by non-terminal symbol in O(1).</param>
private static void ExpandRule (int ix, List<ProductionRule> ruleList, Dictionary<string, ProductionRule> ruleDicc)
{
if (ix >= ruleList.Count)
return;
string replacement;
string nonTerminal;
int nonTerminalBaseIndex = 0;
ProductionRule pr = ruleList [ix];
for (int i = 0; i < pr.Replacements.Count; ++i) {
replacement = pr.Replacements [i];
// Find open parenthesis within replacement
for (int cc = 0; cc < replacement.Length; ++cc) {
if (replacement [cc] != '(')
continue;
// Open parenthesis was found!
// Increase reading header to the char at the right of the left par
int bcc = ++cc;
// and find the closing parenthesis
if (!ProductionRule.FindClosePar (replacement, ref cc))
break;
// Get the replacement (subchunk)
string subchunk = replacement.Substring (bcc, cc - bcc);
// Generate a Non-Terminal symbol (name) for the replacement
nonTerminal = GenerateNonTerminal(pr.NonTerminal, ref nonTerminalBaseIndex, ruleDicc);
// Create and add the production rule
ProductionRule cpr = ProductionRule.FromString(nonTerminal+" = "+subchunk);
ruleList.Add (cpr);
ruleDicc.Add (cpr.NonTerminal, cpr);
// Replace the subchunk with the Non-Terminal symbol
replacement = replacement.Substring (0, bcc-1) + nonTerminal + replacement.Substring (cc+1);
pr.Replacements [i] = replacement;
cc = bcc + nonTerminal.Length -2;
}
}
}
/// <summary>
/// Generates a new symbol for a non-terminal from a existing one
/// </summary>
/// <returns>The non terminal.</returns>
/// <param name="parentNonTerminal">The symbol of the non-terminal which is
/// going to be split and which will be used as base for generating the
/// new non-terminal symbol.</param>
/// <param name="ix">The index of the current production</param>
/// <param name="ruleDicc">The dictionary containing all the rules of the grammar</param>
private static string GenerateNonTerminal (string parentNonTerminal, ref int ix, Dictionary<string, ProductionRule> ruleDicc)
{
string nonTerminal;
string prefix = parentNonTerminal + "_";
do {
nonTerminal = prefix + ix.ToString ();
++ix;
} while(ruleDicc.ContainsKey(nonTerminal));
return nonTerminal;
}
/// <summary>
/// Parses a set of strings containing each a production rule, converting them into
/// a non-terminal symbol addressable set of expanded production rules.
/// </summary>
/// <param name="rules">The set of strings to parse</param>
/// <returns>A set of production rules of with elements accessible by its
/// non-terminal symbol</returns>
private static Dictionary<string, ProductionRule> ParseProductionRules(IEnumerable<string> rules)
{
ProductionRule pr;
Dictionary<string, ProductionRule> prsd;
prsd = new Dictionary<string, ProductionRule> ();
foreach (string line in rules) {
pr = ProductionRule.FromString (line);
if ((pr == null) || (pr.Replacements.Count < 1))
continue;
if (prsd.ContainsKey (pr.NonTerminal))
prsd[pr.NonTerminal].AddReplacements(pr);
else
prsd.Add (pr.NonTerminal, pr);
}
ExpandRules (prsd);
return prsd;
}
/// <summary>
/// Loads a grammar from a text file.
/// </summary>
/// <param name="filePath">The grammar file path.</param>
/// <returns>The grammar represented within the provided file, or null
/// if the grammar could not be loadder.</returns>
public static Grammar LoadFromFile (string filePath)
{
if (!File.Exists (filePath))
return null;
Grammar grammar = new Grammar ();
List<string> lines = new List<string>(File.ReadAllLines (filePath));
StripCommentsAndEmptyLines (lines, grammar);
grammar.productionRules = ParseProductionRules (lines);
if (!grammar.productionRules.ContainsKey ("$Main"))
return null;
return grammar;
}
/// <summary>
/// Removes a multi-line comment form a list of text lines
/// </summary>
/// <param name="lines">The list of text lines in the grammar text file.</param>
/// <param name="i">The index of the line in the list where the multi-line comment was found.</param>
/// <param name="j">The index within the line of the first character to the
/// right of the multi-line comment start symbol.</param>
private static void ParseMultiLineComment (List<string> lines, int i, int j)
{
lines [i] = lines [i].Substring (0, j-1);
if(lines[i].Length < 1)
lines.RemoveAt(i);
while (i < lines.Count) {
j = lines [i].IndexOf ("*/");
if (j != -1)
break;
lines.RemoveAt(i);
}
lines [i] = lines [i].Substring (j+2);
}
/// <summary>
/// Parses the single line comment looking for the name and tier
/// (difficulty degree) of the grammar
/// </summary>
/// <param name="grammar">The grammar file where the found data will be stored</param>
/// <param name="line">The string that contains the comment</param>
/// <param name="j">The position in the line where the comment starts</param>
private static void ParseSingleLineComment (Grammar grammar, string line, int j)
{
if (++j >= line.Length)
return;
Match m;
line = line.Substring (j);
if (String.IsNullOrEmpty (grammar.Name)) {
m = rxGrammarNameXtractor.Match (line);
if (m.Success)
grammar.Name = m.Result ("${name}");
}
m = rxGrammarTierXtractor.Match (line);
if (grammar.Tier == DifficultyDegree.Unknown) {
m = rxGrammarTierXtractor.Match (line);
try{
if(m.Success)
grammar.Tier = (DifficultyDegree)Enum.Parse(typeof(DifficultyDegree), m.Result("${tier}"));
}catch{
return;
}
}
}
/// <summary>
/// Strips comments from the content of a grammar text file
/// </summary>
/// <param name="lines">The set of lines read of a grammar text file</param>
/// <param name="i">The index of the current line being analized</param>
/// <param name="grammar">A grammar object on which additional information may
/// be dumped</param>
private static void StripComments(List<string> lines, int i, Grammar grammar){
for (int j = 0; j < lines[i].Length; ++j) {
// Double character commenting
if ((lines [i] [j] == '/') && ((j+1) < lines[i].Length)) {
++j;
if (lines [i] [j] == '/') {
ParseSingleLineComment (grammar, lines [i], j);
lines [i] = lines [i].Substring (0, j-1);
break;
} else if (lines [i] [j] == '*') {
ParseMultiLineComment (lines, i, j);
break;
}
}
// Single character commenting
else if ((lines [i] [j] == '#') || (lines [i] [j] == ';') || (lines [i] [j] == '%')) {
ParseSingleLineComment (grammar, lines[i], j);
lines [i] = lines [i].Substring (0, j);
break;
}
}
}
/// <summary>
/// Strips comments and empty lines from the content of a grammar text file
/// </summary>
/// <param name="lines">The set of lines read of a grammar text file</param>
/// <param name="grammar">A grammar object on which additional information may
/// be dumped</param>
private static void StripCommentsAndEmptyLines(List<string> lines, Grammar grammar)
{
for (int i = 0; i < lines.Count; ++i) {
lines [i] = lines [i].Trim ();
StripComments (lines, i, grammar);
if(lines[i].Length < 1)
lines.RemoveAt(i--);
}
}
#endregion
}
}