forked from NeonMan/gema
-
Notifications
You must be signed in to change notification settings - Fork 0
/
read.c
1967 lines (1878 loc) · 54.1 KB
/
read.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* read pattern definitions */
/* $Id: read.c,v 1.20 2004/09/18 22:57:06 dngray Exp $ */
/*********************************************************************
This file is part of "gema", the general-purpose macro translator,
written by David N. Gray <[email protected]> in 1994 and 1995.
You may do whatever you like with this, so long as you retain
an acknowledgment of the original source.
*********************************************************************/
/*
* $Log: read.c,v $
* Revision 1.20 2004/09/18 22:57:06 dngray
* Allow MAX_DOMAINS to be larger than 255
* (merged changes contributed by Alex Karahalios).
*
* Revision 1.19 2001/12/15 20:22:44 gray
* Modify use of hex character constants to work-around a compiler bug on
* DEC Alpha OSF1. Clean up compiler warnings..
*
* Revision 1.18 2001/09/30 23:10:20 gray
* Fix uninitialized variable in skip_comment.
*
* Revision 1.17 1996/04/08 05:29:56 gray
* Fixed initialization of `fnnargs' so that ${varname} always works even when
* @var has never been used. Fixed interaction of comment and continuation
* lines. If the first line of a pattern file begins with "#!", ignore that
* line so that pattern files can be made directly executable. When '=' is
* missing, discard incomplete rule to avoid other errors. Warn when template
* begins with recursive argument in same domain (will just overflow stack).
*
* Revision 1.16 1995/08/27 21:03:46 gray
* Fix handling of space between identifiers with "-t" or "-w".
* Fix to not be prevented from using dispatch table when template begins with
* "\I" or when upper/lower case difference for key.
*
* Revision 1.15 1995/08/20 05:34:40 gray
* Treat variable in template as literal for argument terminator.
* Fix to not free patterns installed in multiple places.
* Fix bug in dispatch table setup.
*
* Revision 1.14 1995/08/13 05:35:49 gray
* New macro `char_kind' to ensure uniform handling of EOF -- fixes crash on
* EOF at end of inherited domain rule.
*
* Revision 1.13 1995/08/06 02:26:47 gray
* Fix bug on "\A" (regression in previous version).
* Add "<J>" (match lower case) and "<K>" (match upper case).
* Support @set-syntax{M;\'} for quoting a string.
*
* Revision 1.12 1995/07/27 05:31:34 gray
* Fix a couple of problems with spaces with "-w".
*
* Revision 1.11 1995/07/27 03:00:45 gray
* Fix handling of "\B" followed by an argument instead of a literal.
*
* Revision 1.10 1995/06/12 03:05:17 gray
* Fixed bug in handling of rule domain encloses in angle brackets
* ("<...>:..."). Add new functions @get-switch and @out-column.
*
* Revision 1.9 1995/05/30 05:06:21 gray
* Fix occasional spurious warning about escape sequence in action
* continued on second line.
*
* Revision 1.8 1995/05/08 03:13:46 gray
* Add @expand-wild
*/
#if defined(_QC) || defined(_MSC_VER) /* Microsoft C or Quick C */
#pragma check_stack(off)
#endif
#include "pattern.h"
#include "util.h"
#include "patimp.h"
#include <ctype.h> /* for isalnum */
#include <string.h>
#include <assert.h>
#include "reg-expr.h"
#include "main.h" /* for EXS_SYNTAX */
#include "var.h" /* for get_var */
boolean line_mode = FALSE;
boolean token_mode = FALSE;
boolean discard_unmatched = FALSE;
boolean debug_switch = FALSE;
enum char_kinds {
PI_LITERAL, /* character represents itself */
PI_LIT_CTRL, /* control character that represents itself but has
the same code as a pattern operator */
PI_CR, /* Carriage Return */
PI_ARG, /* template argument -- wild card match */
PI_1ARG, /* template argument -- match any one character */
PI_RARG, /* template argument with recursive translation */
PI_SEP, /* separates template and action */
PI_PUT, /* outputs an argument or variable in an action */
PI_QUOTE, /* take next character literally */
PI_ESC, /* escape character */
PI_CTRL, /* combines with next character for ASCII control */
PI_SPACE, /* matches one or more white space characters */
PI_BEGIN_ARG, /* begin argument list */
PI_ARG_SEP, /* separates arguments of action operators */
PI_END_ARG, /* end argument list */
PI_END, /* end of pattern */
PI_DOMAIN, /* terminates domain name preceding pattern */
PI_BEGIN_DOMAIN_ARG,
PI_END_DOMAIN_ARG,
PI_END_REGEXP,
PI_BEGIN_REGEXP,
PI_CHAR_OP, /* makes following character special */
PI_OP, /* introduces named command */
PI_COMMENT, /* rest of line is a comment */
PI_ABBREV_DOMAIN, /* single-character recursive argument domain */
PI_IGNORE, /* character that is completely ignored */
PI_IGNORED_SPACE, /* ignored unless needed as delimiter */
PI_QUOTE_STRING, /* take characters literally until matching quote */
PI_EOF, /* end of file */
Num_Char_Kinds /* must be last element of enumeration */
};
#define NUMCHAR 256
static unsigned char default_syntax_chars[Num_Char_Kinds+1] =
".\1\r*?#=$\\\\^ {;};:<>//@@!\0\0\0\0\0";
static unsigned char syntax_chars[Num_Char_Kinds+1];
static char char_table[NUMCHAR+1] = {
#if EOF == (-1)
/* -1: */ PI_EOF,
#endif
/* 00: */ PI_LIT_CTRL,
/* 01: */ PI_LIT_CTRL,
/* 02: */ PI_LIT_CTRL,
/* 03: */ PI_LIT_CTRL,
/* 04: */ PI_LIT_CTRL,
/* 05: */ PI_LIT_CTRL,
/* 06: */ PI_LIT_CTRL,
/* 07: */ PI_LIT_CTRL,
/* 08: */ PI_LITERAL, /* ASCII BS */
/* 09: */ PI_LITERAL, /* ASCII HT */
/* 0A: */ PI_LITERAL, /* ASCII LF */
/* 0B: */ PI_LITERAL, /* VT */
/* 0C: */ PI_LITERAL, /* FF */
#if defined(MSDOS) || '\r' == '\n'
/* 0D: */ PI_LITERAL, /* CR */
#else
/* 0D: */ PI_CR, /* CR */
#endif
/* 0E: */ PI_LIT_CTRL,
/* 0F: */ PI_LIT_CTRL,
/* 10: */ PI_LIT_CTRL,
/* 11: */ PI_LIT_CTRL,
/* 12: */ PI_LIT_CTRL,
/* 13: */ PI_LIT_CTRL,
/* 14: */ PI_LIT_CTRL,
/* 15: */ PI_LITERAL, /* EBCDIC NL */
/* 16: */ PI_LIT_CTRL,
/* 17: */ PI_LIT_CTRL,
/* 18: */ PI_LIT_CTRL,
/* 19: */ PI_LIT_CTRL,
/* 1A: */ PI_LIT_CTRL,
/* 1B: */ PI_LITERAL, /* ASCII ESC */
/* 1C: */ PI_LIT_CTRL,
/* 1D: */ PI_LIT_CTRL,
/* 1E: */ PI_LIT_CTRL,
/* 1F: */ PI_LIT_CTRL,
/* 20: */ PI_LITERAL };
#if EOF == (-1)
#define char_kind(ch) ((enum char_kinds)((char_table+1)[ch]))
#define set_char_kind(ch,k) char_table[(ch)+1] = (k)
#else
/* I don't know of any implementation where EOF is not -1, but the ANSI
standard does not require it. */
static enum char_kinds
char_kind(int ch){
return ( ch == EOF ? PI_EOF : (enum char_kinds) char_table[ch] );
}
#define set_char_kind(ch,k) char_table[ch] = (k)
#endif
boolean is_operator(int x) {
return char_kind(x) == PI_LIT_CTRL;
}
static enum char_kinds
default_char_kind( int pc ) {
const char* x;
x = strrchr((const char*)default_syntax_chars,(char)pc);
if ( x == NULL )
return PI_LITERAL;
else
return (enum char_kinds)(x - (const char*)default_syntax_chars);
}
boolean set_syntax( int type, const char* char_set ) {
enum char_kinds k;
const char* s;
switch(toupper(type)) {
case 'L': k = PI_LITERAL; break;
case 'Q': k = PI_QUOTE; break;
case 'M': k = PI_QUOTE_STRING; break;
case 'E': k = PI_ESC; break;
case 'C': k = PI_COMMENT; break;
case 'A': k = PI_ARG_SEP; break;
case 'T': k = PI_END; break;
case 'F': k = PI_OP; break;
case 'I': k = PI_IGNORE; break;
case 'S': k = PI_IGNORED_SPACE; break;
case 'D': k = PI_ABBREV_DOMAIN; break;
case 'K': k = PI_CHAR_OP; break;
default:
k = char_kind(type);
if ( k <= PI_LIT_CTRL ) {
k = default_char_kind(type);
if ( k <= PI_LIT_CTRL )
return FALSE;
}
break;
} /* end switch */
for ( s = char_set ; s[0] != '\0' ; s++ ) {
unsigned int ch = *(const unsigned char*)s;
unsigned char* scp = &syntax_chars[ char_kind(ch) ];
if ( *scp == ch )
*scp = '\0';
set_char_kind(ch,k);
syntax_chars[k] = ch;
}
return TRUE;
}
#if 0 /* old way */
/* special characters in external pattern definitions: */
#define PI_ARG '*' /* template argument -- wild card match */
#define PI_1ARG '?' /* template argument -- match any one character */
#define PI_RARG '#' /* template argument with recursive translation */
#define PI_SEP '=' /* separates template and action */
#define PI_PUT '$' /* outputs an argument or variable in an action */
#define PI_QUOTE '\\' /* take next character literally */
#define PI_ESC '\\' /* escape character */
#define PI_CTRL '^' /* combines with next character for ASCII control */
#define PI_SPACE ' ' /* matches one or more white space characters */
#define PI_END '\n' /* end of pattern */
#define PI_ALTEND ';' /* end of pattern */
#define PI_BEGIN_ARG '{' /* begin argument list */
#define PI_ARG_SEP ';' /* separates arguments of action operators */
#define PI_END_ARG '}' /* end argument list */
#define PI_DOMAIN ':' /* terminates domain name preceding pattern */
#define PI_BEGIN_DOMAIN_ARG '<'
#define PI_END_DOMAIN_ARG '>'
#define PI_BEGIN_REGEXP '/'
#define PI_END_REGEXP '/'
#define PI_OP '@' /* introduces named command */
#define PI_COMMENT '!' /* rest of line is a comment */
#endif
int ndomains = 0;
#if MAX_DOMAINS > (1<<14)
#error "MAX_DOMAINS is too large; must fit in 14 bits."
#endif
Domain domains[MAX_DOMAINS] = { NULL };
char* trim_name( unsigned char* x ) {
unsigned char* s;
unsigned char* end;
s = x;
while ( isspace(s[0]) || s[0] == PT_SPACE || s[0] == PT_ID_DELIM )
s++;
if ( s[0] != PT_RECUR ) {
end = s + strlen((const char*)s);
while ( end > s &&
( isspace(end[-1]) || end[-1] == PT_SPACE || end[-1] == PT_ID_DELIM ) )
end--;
end[0] = '\0';
}
return (char*)s;
}
static int find_domain( const char* name ) {
int i;
for ( i = ndomains ; i > 0 ; ) {
i--;
if ( case_insensitive ) {
if( stricmp(name, domain_name(i)) == 0 )
return i;
} else if ( strcmp(name, domain_name(i)) == 0 )
return i;
}
if ( ndomains >= MAX_DOMAINS ) {
fprintf(stderr,"More than %d domain names; aborting.\n", MAX_DOMAINS);
exit((int)EXS_SYNTAX);
return -1; /* just to avoid warning from SGI compiler */
}
else {
Patterns p;
Domain dp;
i = ndomains++;
dp = (Domain)allocate(sizeof(struct domain_struct), MemoryPatterns);
p = &dp->patterns;
domains[i] = dp;
dp->name = str_dup(name);
dp->inherits = NULL;
p->head = NULL;
p->tail = NULL;
p->dispatch = NULL;
dp->init_and_final_patterns = NULL;
return i;
}
}
Domain get_domain( const char* name ) {
return domains[ find_domain(name) ];
}
static void
delete_pattern ( Pattern p ) {
free((char*)p->pattern);
free((char*)p->action);
free(p);
}
static void
delete_patterns ( Patterns ps ) {
Pattern p;
Pattern n;
if ( ps->dispatch != NULL ) {
int i;
Patterns xp;
for ( i = 0 ; i < DISPATCH_SIZE ; i++ ) {
xp = ps->dispatch[i];
if ( xp != NULL ) {
delete_patterns(xp);
free(xp);
}
}
free(ps->dispatch);
ps->dispatch = NULL;
}
for ( p = ps->head ; p != NULL ; ) {
n = p->next;
delete_pattern(p);
p = n;
}
ps->head = NULL;
ps->tail = NULL;
}
void delete_domain(int n) {
Domain dp;
assert( n < ndomains );
dp = domains[n];
delete_patterns( &dp->patterns );
while ( dp->init_and_final_patterns != NULL ) {
Pattern p = dp->init_and_final_patterns;
dp->init_and_final_patterns = p->next;
delete_pattern(p);
}
if ( n == ndomains-1 ) {
ndomains--;
free((char*)dp->name);
free(dp);
}
}
void quoted_copy( CIStream in, COStream out ) {
/* copy the input stream to the output stream, quoting any
characters that have special meaning in patterns. */
int qc;
int quote = (int)syntax_chars[PI_QUOTE];
for ( ; ; ) {
qc = cis_getch(in);
if ( char_kind(qc) != PI_LITERAL ) {
if ( qc == EOF )
break;
cos_putch(out, quote);
if ( qc == '\n' )
qc = 'n';
}
cos_putch(out, qc);
}
}
const char* safe_string(const unsigned char* s) {
if ( s == NULL )
return "";
else return (char*)s;
}
static void
describe_character(enum char_kinds chartype, const char* description ) {
int ch = (int)syntax_chars[(int)chartype];
if ( ch != '\0' )
fprintf(stderr, "\t'%c' %s\n", ch, description );
}
void pattern_help( FILE* f ) {
int i;
assert( f == stderr );
fprintf(f, "Pattern syntax:\n\t<template>%c<replacement>\n",
syntax_chars[PI_SEP]);
fprintf(f, "Text matching <template> is replaced by <replacement>.\n");
fprintf(f, "Patterns are separated by a newline or '%c'.\n",
syntax_chars[PI_END]);
fputs("Special characters within a template:\n", f);
describe_character( PI_ARG, "argument - match any number of characters" );
describe_character( PI_1ARG, "argument - match any one character" );
describe_character( PI_RARG, "argument, recursively translated" );
describe_character( PI_BEGIN_REGEXP, "regular expression delimiter" );
fputs("Special characters within the replacement:\n", f );
describe_character( PI_PUT, "followed by digit, insert numbered argument" );
describe_character( PI_OP, "prefixes name of function to call" );
fputs("Within both template and replacement:\n", f);
describe_character( PI_ESC, "is an escape character." );
describe_character( PI_CTRL, "adds the Control key to the following character" );
fputs( "Following are all of the characters with special meaning:\n\t", f);
#if 0
for ( i = PI_ARG ; i < PI_EOF ; i++ ) {
int ch = (int)syntax_chars[i];
if ( isprint(ch) && strchr((const char*)syntax_chars+1+i, ch)==NULL ) {
fputc( ch, f );
fputc( ' ', f );
}
}
#else
for ( i = 0 ; i < NUMCHAR ; i++ ) {
enum char_kinds kind = char_kind(i);
if ( kind > PI_CR && isgraph(i) ) {
fputc( i, f );
fputc( ' ', f );
}
}
#endif
fputs( "\nSee the man page for further details.\n", f );
}
void
skip_whitespace( CIStream s ) {
while ( isspace(cis_peek(s)) )
(void)cis_getch(s);
}
static unsigned char*
escaped_char( int ch, unsigned char* bp, CIStream s ) {
int nc;
int pc;
nc = cis_getch(s);
if ( syntax_chars[PI_ESC] != ch && char_kind(ch) == PI_QUOTE )
pc = nc;
else
switch(nc) {
/* control characters */
#ifndef MSDOS
#if '\r' != '\n'
case '\r':
if ( cis_peek(s) != '\n' ) {
pc = nc;
break;
}
/* else fall-through to ignore redundant CR in MS-DOS files */
#endif
#endif
case '\n': /* ignore new line and leading space on next line */
skip_whitespace(s);
return bp;
case 'n': pc = '\n'; break;
case 't': pc = '\t'; break;
case 'a': pc = '\a'; break;
case 'b': pc = '\b'; break;
case 'f': pc = '\f'; break;
case 'r': pc = '\r'; break;
case 'v': pc = '\v'; break;
case 's': pc = ' '; break;
#if 'A' == 0x41
case 'e': pc = ((char)0x1B) ; break; /* ASCII Escape */
case 'd': pc = ((char)0x7F) ; break; /* ASCII Delete */
case 'c': { /* control */
int xc;
xc = cis_getch(s);
pc = toupper(xc) ^ 0x40;
break;
}
#elif 'A' == 0xC1
case 'e': pc = ((char)0x27) ; break; /* EBCDIC Escape */
case 'd': pc = ((char)0x07) ; break; /* EBCDIC Delete */
#endif
#if 0 /* not needed */
/* the following two are the same in ASCI and EBCDIC */
case 'o': pc = ((char)0x0E) ; break; /* shift out */
case 'i': pc = ((char)0x0F) ; break; /* shift in */
#endif
case 'x': {
char cbuf[4];
char* endp;
cbuf[0] = (char)cis_getch(s);
cbuf[1] = (char)cis_getch(s);
cbuf[2] = '\0';
pc = (int)strtol(cbuf,&endp,16);
if ( *endp != '\0' )
input_error(s, EXS_SYNTAX,
"Invalid escape sequence in pattern: \"%cx%s\"\n",
ch, cbuf);
break;
}
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
pc = nc - '0';
while ( isdigit(cis_peek(s)) )
pc = (pc << 3) + cis_getch(s) - '0';
break;
/* template match operators */
case 'S': pc = PT_SPACE; goto store;
case 'W': pc = PT_SKIP_WHITE_SPACE; goto store;
case 'I': pc = PT_ID_DELIM ; goto store;
case 'X': pc = PT_WORD_DELIM ; goto store;
#if 0 /* changed my mind */
case 'V': pc = PT_ARG_DELIM ; goto store;
#endif
case 'N': pc = PT_LINE ; goto store;
case 'B': pc = PTX_BEGIN_FILE ; goto aux;
case 'E': pc = PTX_END_FILE ; goto aux;
case 'L': pc = PTX_ONE_LINE ; goto aux;
case 'C': pc = PTX_NO_CASE ; goto aux;
case 'A': pc = PTX_INIT; goto aux;
case 'Z': pc = PTX_FINAL; goto aux;
case 'P': pc = PTX_POSITION; goto aux;
case 'G': pc = PTX_NO_GOAL; goto aux;
case 'J': pc = PTX_JOIN; goto aux;
/* quoting literal character */
default:
if ( isalnum(nc) ||
( ch != '\\' && ch != syntax_chars[PI_QUOTE] ) )
input_error(s, EXS_SYNTAX,
"Invalid escape sequence in pattern: \"%c%c\"\n",
ch, nc);
pc = nc;
break;
} /* end switch */
if ( is_operator(pc) )
*bp++ = PT_QUOTE;
goto store;
aux:
*bp++ = PT_AUX;
store:
*bp = pc;
return bp+1;
}
struct regex_struct {
const char* source;
unsigned char* compiled;
};
static struct regex_struct * regex_table = NULL;
static int last_regex = -1;
#define MAX_NUM_REGEXP 255
int intern_regexp( unsigned char* exp, CIStream in ) {
int i;
if ( regex_table == NULL )
regex_table = (struct regex_struct*)
allocate( MAX_NUM_REGEXP * sizeof(struct regex_struct*),
MemoryRegexp);
for ( i = last_regex ; i >= 0 ; i-- )
if ( strcmp( (const char*)exp, regex_table[i].source )==0 )
return i;
if ( last_regex >= (MAX_NUM_REGEXP-1) ) {
input_error(in, EXS_SYNTAX, "More than %d unique regular expressions.\n",
MAX_NUM_REGEXP);
exit(exit_status);
return -1; /* just to avoid warning from SGI compiler */
}
else {
struct regex_struct* p;
unsigned char* msg;
size_t bufsize;
bufsize = 200;
p = regex_table + ++last_regex;
p->source = str_dup((const char*)exp);
p->compiled = allocate ( bufsize, MemoryRegexp);
for ( ; ; ) {
msg = regexp_comp(exp, p->compiled, bufsize);
if ( msg == NULL ) /* OK */
break;
else if ( msg == regexp_dfa_buffer_too_short ) {
bufsize = bufsize * 2;
p->compiled = realloc( p->compiled, bufsize );
if ( p->compiled == NULL ) {
input_error(in, EXS_MEM, "Out of memory for regular expression.\n");
exit(exit_status);
}
continue;
}
else {
input_error(in, EXS_SYNTAX, "Error in regular expression: %s\n", msg);
if ( ! keep_going ) {
exit(exit_status);
}
return intern_regexp( (unsigned char*)"\1", in );
}
}
return last_regex;
}
}
unsigned char*
regex_match(int regex_num, const unsigned char* text, boolean start_of_line) {
/* if the regular expression interned as number `regex_num' matches the
beginning of `text', return the position of the end of the match,
else return NULL. */
boolean match;
assert( regex_num >= 0 && regex_num <= last_regex );
match = regexp_exec( (unsigned char*)text, start_of_line, FALSE,
regex_table[regex_num].compiled );
if ( !match )
return NULL;
else return regexp_eopat[0];
}
static int
regexp_key( int regex_num ) {
const char* s;
int ch;
s = regex_table[regex_num].source;
if ( s[0] == '^' )
s++;
if ( s[0] == '\\' && s[1] == '<' )
s += 2;
ch = s[0];
if ( strchr(".*+\\[]", ch )==NULL && !is_operator(ch) )
return ch;
else return PT_REGEXP;
}
static struct action_ops {
const char* name;
unsigned char code;
unsigned char nargs;
} action_operators[] =
{ { "abort", OP_ABORT, 0 },
{ "add", OP_ADD, 2 },
{ "and", OP_AND, 2 },
{ "append", OP_APPEND, 2 },
{ "bind", OP_BIND, 2 },
{ "center", OP_CENTER, 2 },
{ "char-int", OP_CHARINT, 1 },
{ "close", OP_CLOSE, 1 },
{ "cmpi", OP_STRI_CMP, 5 }, /* compare strings, case insensitive */
{ "cmpn", OP_NUM_CMP, 5 }, /* compare numbers */
{ "cmps", OP_STR_CMP, 5 }, /* compare strings, case sensitive */
{ "column", OP_COL, 0 },
{ "date", OP_DATE, 0 },
{ "datime", OP_DATIME, 0 },
{ "decr", OP_DECR, 1 },
{ "define", OP_DEFINE, 1 },
{ "div", OP_DIV, 2 },
{ "downcase", OP_DOWNCASE, 1 },
{ "end", OP_EXIT, 0 },
{ "err", OP_ERR, 1},
{ "exit-status", OP_EXIT_STATUS, 1 },
{ "expand-wild", OP_EXP_WILD, 1 },
{ "fail", OP_FAIL, 0 },
{ "file", OP_FILE, 0 },
{ "file-time", OP_MODTIME, 0 },
{ "fill-center", OP_FILL_CENTER, 2 },
{ "fill-left", OP_FILL_LEFT, 2 },
{ "fill-right", OP_FILL_RIGHT, 2 },
{ "getenv", OP_GETENV, 1 },
{ "getenv", OP_GETENV_DEFAULT, 2 },
{ "get-switch", OP_GET_SWITCH, 1 },
{ "incr", OP_INCR, 1 },
{ "inpath", OP_PATH, 0 },
{ "int-char", OP_INTCHAR, 1 },
{ "line", OP_LINE, 0 },
{ "left", OP_LEFT, 2 },
{ "length", OP_LENGTH, 1 },
{ "makepath", OP_COMBINEPATH, 3 },
{ "mergepath", OP_MERGEPATH, 3 },
{ "mul", OP_MUL, 2 },
{ "mod", OP_MOD, 2 },
{ "not", OP_NOT, 1 },
{ "or", OP_OR, 2 },
{ "out", OP_OUT, 1},
{ "outpath", OP_OUTFILE, 0 },
{ "out-column", OP_OUTCOL, 0 },
{ "push", OP_BIND, 2 },
{ "pop", OP_UNBIND, 1 },
{ "probe", OP_PROBE, 1 },
{ "quote", OP_QUOTE, 1 },
{ "radix", OP_RADIX, 3 },
{ "read", OP_READ, 1 },
{ "relpath", OP_RELPATH, 2 },
{ "relative-path", OP_RELPATH, 2 },
{ "repeat", OP_REPEAT, 2 },
{ "reset-syntax", OP_DEFAULT_SYNTAX, 0 },
{ "reverse", OP_REVERSE, 1 },
{ "right", OP_RIGHT, 2 },
{ "set", OP_SET, 2 },
#ifndef MSDOS
{ "set-locale", OP_LOCALE, 1 },
#endif
{ "set-switch", OP_SET_SWITCH, 2 },
{ "set-syntax", OP_SYNTAX, 2 }, /* @set-syntax{type;charset} */
{ "set-parm", OP_SET_PARM, 2 },
{ "set-wrap", OP_SET_WRAP, 2 },
{ "shell", OP_SHELL, 1},
{ "show-help", OP_HELP, 0},
{ "sub", OP_SUB, 2 },
{ "subst", OP_SUBST, 2 }, /* @subst{patterns;operand} */
{ "substring", OP_SUBSTRING, 3 }, /* @substring{skip,length,string} */
{ "tab", OP_TAB, 1 },
{ "terminate", OP_END_OR_FAIL, 0 },
{ "time", OP_TIME, 0 },
{ "unbind", OP_UNBIND, 1 },
{ "undefine", OP_UNDEFINE, 1 },
{ "upcase", OP_UPCASE, 1 },
{ "var", OP_VAR, 1 },
{ "var", OP_VAR_DFLT, 2 },
{ "version", OP_VERSION, 0 },
{ "wrap", OP_WRAP, 1 },
{ "write", OP_WRITE, 2 },
{ NULL, 0, 0 }
};
/* number of arguments for each built-in function: */
unsigned char fnnargs[OP_last_op] = { 0 };
void initialize_syntax(void) {
int i;
memcpy(syntax_chars, default_syntax_chars, sizeof(syntax_chars));
for ( i = 0x20 ; i < NUMCHAR ; i++ )
set_char_kind(i,PI_LITERAL);
set_char_kind('\n',PI_END);
for ( i = 0 ; i < Num_Char_Kinds ; i++ ) {
unsigned int ch = syntax_chars[i];
if ( ch != '\0' )
set_char_kind(ch,i);
}
{
const struct action_ops * tp;
for ( tp = &action_operators[0] ; tp->name != NULL ; tp++ )
fnnargs[tp->code] = tp->nargs;
}
}
#if 0 /* not needed after all */
static int
end_delim(int c) {
/* if the argument is an opening delimiter, return the corresponding
closing delimiter, else NUL. */
const char* k;
for ( k = "(){}[]`'''""<>" ; *k != '\0' ; k += 2 )
if ( k[0] == c )
return k[1];
return '\0';
}
#endif
static unsigned char*
read_action( CIStream s, unsigned char* bp, int nargs,
unsigned char* arg_keys );
int
read_put( CIStream s, unsigned char** app, int nargs,
unsigned char* arg_keys ) {
int ch;
int pc;
unsigned char* ap;
ap = *app;
ch = cis_getch(s);
if ( isdigit(ch) ) { /* "$1" is argument */
pc = ch - '0';
if ( pc == 0 ) /* $0 is special case */
pc = PT_MATCHED_TEXT;
else if ( pc < 1 || pc > nargs ) {
pc = syntax_chars[PI_PUT];
input_error(s, EXS_SYNTAX,
"Invalid argument number: \"%c%c\"\n", pc, ch);
}
else *ap++ = PT_PUT_ARG;
} /* "$x" is single-letter variable */
else if ( isalpha(ch) ) {
*ap++ = PT_VAR1;
pc = ch;
}
else if ( char_kind(ch) == PI_BEGIN_ARG && arg_keys != NULL ) {
/* "${varname}" */
int xc;
unsigned char* ap1;
*ap++ = PT_OP;
*ap++ = OP_VAR;
ap1 = ap;
ap = read_action( s, ap, nargs, arg_keys );
pc = PT_SEPARATOR;
xc = cis_prevch(s);
if ( xc == syntax_chars[PI_ARG_SEP] || char_kind(xc) == PI_ARG_SEP ) {
(*app)[1] = OP_VAR_DFLT;
*ap++ = PT_SEPARATOR;
ap = read_action( s, ap, nargs, arg_keys );
xc = cis_prevch(s);
}
else {
int n;
char* end;
*ap = '\0';
n = (int)strtol((char*)ap1,&end,10);
if ( n > 0 && end == (char*)ap && n <= nargs ) {
/* an argument number instead of a variable */
ap1[-2] = PT_PUT_ARG;
ap = ap1-1;
pc = n;
}
}
if ( char_kind(xc) != PI_END_ARG )
input_error(s, EXS_SYNTAX, "Missing \"%c\" for \"%c%c\"\n",
syntax_chars[PI_END_ARG], syntax_chars[PI_PUT],
syntax_chars[PI_BEGIN_ARG]);
}
else {
input_error(s, EXS_SYNTAX, "Invalid variable reference: \"%c%c\"\n",
syntax_chars[PI_PUT], ch);
*ap++ = syntax_chars[PI_PUT];
pc = ch;
}
*app = ap;
return pc;
}
static boolean
skip_comment( CIStream s ) {
int ch;
ch = 0;
for ( ; ; ) {
int nc;
nc = cis_getch(s);
if ( nc == '\n' || nc == EOF )
break;
else ch = nc;
}
if ( char_kind(ch) == PI_ESC ) {
/* line ends in "\"; continue with the next line. */
/* ignore new line and leading space on next line */
skip_whitespace(s);
return FALSE;
}
else return TRUE;
}
/* maximum length of template or action: */
#define BUFSIZE 1200
static unsigned char*
read_action( CIStream s, unsigned char* bp, int nargs,
unsigned char* arg_keys ) {
unsigned char* ap;
enum char_kinds kind;
for ( ap = bp ; ; ) {
int pc; /* code for action */
int ch; /* character read */
ch = cis_getch(s);
pc = ch; /* just to avoid warning from Gnu compiler */
kind = char_kind(ch);
dispatch:
switch ( kind ) {
case PI_COMMENT: /* ignore rest of line */
if( !skip_comment(s) )
continue;
/* else fall-through to end the line */
case PI_END:
case PI_ARG_SEP:
case PI_END_ARG:
case PI_EOF:
*ap = PT_END;
return ap;
case PI_BEGIN_ARG:
input_error(s, EXS_SYNTAX, "Unexpected '%c' encountered.\n", ch);
pc = ch;
break;
case PI_PUT: /* argument or variable */
pc = read_put( s, &ap, nargs, arg_keys );
break;
case PI_ARG:
case PI_RARG:
case PI_ABBREV_DOMAIN:
case PI_1ARG: {
int ai;
pc = ch;
for ( ai = 1 ; ai <= nargs ; ai++ )
if ( (int)arg_keys[ai] == ch ) {
*ap++ = PT_PUT_ARG;
pc = ai;
arg_keys[ai] = '\0';
break;
}
if ( pc == ch && nargs > 0 )
input_error(s, EXS_OK, "More '%c' in action than in template.\n", ch);
break;
}
case PI_CHAR_OP:
charop: {
pc = cis_getch(s);
kind = default_char_kind(pc);
if ( kind <= PI_LIT_CTRL ) {
input_error( s, EXS_SYNTAX, "Undefined control sequence: \"%c%c\"\n",
ch, pc);
break;
}
else {
ch = pc;
goto dispatch;
}
}
case PI_OP: {
int xc;
unsigned char* xp;
unsigned char* apcode;
*ap = (unsigned char)ch;
for ( xp = ap+1 ; ; xp++ ) {
xc = cis_peek(s);
if ( !isalnum(xc) && xc != '_' && xc != '-' ) {
const struct action_ops * tp;
char* name;
if ( xp == ap+1 && ch == syntax_chars[PI_CHAR_OP] &&
char_kind(xc) != PI_BEGIN_ARG )
goto charop;
*xp = '\0';
name = trim_name(ap+1);
for ( tp = &action_operators[0] ; ; tp++ ) {
if ( tp->name == NULL ) {
#if 1
int domain = find_domain(name);
*ap++ = PT_DOMAIN;
#if MAX_DOMAINS < 256
*ap++ = (unsigned char)(domain + 1);
#else
*ap++ = (unsigned char)((domain & 0x7f)|0x80);
*ap++ = (unsigned char)(((domain>>7) & 0x7f)|0x80);
#endif
if ( char_kind(xc) == PI_BEGIN_ARG ) {
int term_kind;
int term_char;
(void)cis_getch(s);
read_arg:
ap = read_action( s, ap, nargs, arg_keys );
term_char = cis_prevch(s);
term_kind = char_kind(term_char);
if ( term_kind != PI_END_ARG ) {
if ( term_kind == PI_ARG_SEP ||
term_char == syntax_chars[PI_ARG_SEP] ) {
input_error(s, EXS_SYNTAX,
"Arg separator \"%c\" in domain call \"%c%s%c\"\n",
term_char, ch,
domain_name(domain), xc);
*ap++ = term_char;
goto read_arg;
}
else input_error(s, EXS_SYNTAX,
"Missing \"%c\" for \"%c%s%c\"\n",
syntax_chars[PI_END_ARG], ch,
domain_name(domain), xc);
}
}
else input_error(s, EXS_SYNTAX,
"Error: missing '%c' after \"%c%s\"\n",
syntax_chars[PI_BEGIN_ARG],
ch, domain_name(domain));
pc = PT_SEPARATOR;
#else
input_error(s, EXS_SYNTAX,
"Error: undefined operator: \"%c%s\"\n",
ch, name);
ap = xp;
pc = ' ';
#endif
goto store;
}
if ( stricmp(name,tp->name) == 0 ) {
unsigned n = 0;