Merge branch 'take-2'

The take-2 branch started over with a new grammar based directly on
the grammar from the C99 specification. It doesn't try to capture
things like balanced sets of parentheses for macro arguments in the
grammar. Instead, it merely captures things as token lists and then
performs operations like parsing arguments and expanding macros on
those lists.

We merge it here since it's currently behaving better, (passing the
entire test suite). But the code base has proven quite fragile
really. Several of the recently added test cases required additional
special cases in the take-2 branch while working trivially on master.

So this merge point may be useful in the future, since we might have a
cleaner code base by coming back to the state before this merge and
fixing it, rather than accepting all the fragile
imperative/list-munging code from the take-2 branch.
This commit is contained in:
Carl Worth
2010-05-29 06:03:32 -07:00
8 changed files with 1127 additions and 637 deletions
+1
View File
@@ -6,4 +6,5 @@ glcpp-parse.h
*~
tests/*.expected
tests/*.gcc
tests/*.glcpp
tests/*.out
+1 -1
View File
@@ -7,7 +7,7 @@ CFLAGS = -g
override CFLAGS += -Wall -Wextra -Wwrite-strings -Wswitch-enum -Wno-unused
glcpp: glcpp.o glcpp-lex.o glcpp-parse.o hash_table.o xtalloc.o
gcc -o $@ -ltalloc $^
gcc -o $@ -ltalloc -lm $^
%.c %.h: %.y
bison --debug --defines=$*.h --output=$*.c $^
+16
View File
@@ -12,3 +12,19 @@ preprocessors". To fill in these details, I've been using the C99
standard (for which I had a convenient copy) as available from:
http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1124.pdf
Known limitations
-----------------
Macro invocations cannot include embedded newlines.
The __LINE__, __FILE__, and __VERSION__ macros are not yet supported.
The argument of the 'defined' operator cannot yet include enclosing
parentheses.
The #error, #pragma, #extension, #version, and #line macros are not
yet supported.
A file that ends with a function-like macro name as the last
non-whitespace token will result in a parse error, (where it should be
passed through as is).
+69 -163
View File
@@ -32,21 +32,14 @@
%option reentrant noyywrap
%option extra-type="glcpp_parser_t *"
%x ST_DEFINE
%x ST_DEFINE_OBJ_OR_FUNC
%x ST_DEFINE_PARAMETER
%x ST_DEFINE_VALUE
%x ST_IF
%x ST_UNDEF
%x ST_UNDEF_END
SPACE [[:space:]]
NONSPACE [^[:space:]]
NEWLINE [\n]
HSPACE [ \t]
HASH ^{HSPACE}*#{HSPACE}*
IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]*
TOKEN [^[:space:](),]+
PUNCTUATION [][(){}.&*~!/%<>^|;,=+-]
OTHER [^][(){}.&*~!/%<>^|;,=#[:space:]+-]+
DECIMAL_INTEGER [1-9][0-9]*[uU]?
OCTAL_INTEGER 0[0-7]*[uU]?
@@ -54,210 +47,123 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]?
%%
{HASH}if{HSPACE}* {
BEGIN ST_IF;
return IF;
{HASH}define{HSPACE}+/{IDENTIFIER}"(" {
yyextra->space_tokens = 0;
return HASH_DEFINE_FUNC;
}
{HASH}elif{HSPACE}* {
BEGIN ST_IF;
return ELIF;
{HASH}define {
yyextra->space_tokens = 0;
return HASH_DEFINE_OBJ;
}
<ST_IF>{DECIMAL_INTEGER} {
yylval.ival = strtoll (yytext, NULL, 10);
return INTEGER;
{HASH}undef {
yyextra->space_tokens = 0;
return HASH_UNDEF;
}
<ST_IF>{OCTAL_INTEGER} {
yylval.ival = strtoll (yytext + 1, NULL, 8);
return INTEGER;
{HASH}if {
yyextra->space_tokens = 0;
return HASH_IF;
}
<ST_IF>{HEXADECIMAL_INTEGER} {
yylval.ival = strtoll (yytext + 2, NULL, 16);
return INTEGER;
{HASH}elif {
yyextra->space_tokens = 0;
return HASH_ELIF;
}
<ST_IF>"defined" {
return DEFINED;
{HASH}else {
yyextra->space_tokens = 0;
return HASH_ELSE;
}
<ST_IF>"<<" {
{HASH}endif {
yyextra->space_tokens = 0;
return HASH_ENDIF;
}
{HASH} {
yyextra->space_tokens = 0;
return HASH;
}
{DECIMAL_INTEGER} {
yylval.str = xtalloc_strdup (yyextra, yytext);
return INTEGER_STRING;
}
{OCTAL_INTEGER} {
yylval.str = xtalloc_strdup (yyextra, yytext);
return INTEGER_STRING;
}
{HEXADECIMAL_INTEGER} {
yylval.str = xtalloc_strdup (yyextra, yytext);
return INTEGER_STRING;
}
"<<" {
return LEFT_SHIFT;
}
<ST_IF>">>" {
">>" {
return RIGHT_SHIFT;
}
<ST_IF>"<=" {
"<=" {
return LESS_OR_EQUAL;
}
<ST_IF>">=" {
">=" {
return GREATER_OR_EQUAL;
}
<ST_IF>"==" {
"==" {
return EQUAL;
}
<ST_IF>"!=" {
"!=" {
return NOT_EQUAL;
}
<ST_IF>"&&" {
"&&" {
return AND;
}
<ST_IF>"||" {
"||" {
return OR;
}
<ST_IF>[-+*/%<>&^|()~] {
return yytext[0];
"##" {
return PASTE;
}
<ST_IF>{IDENTIFIER} {
yylval.str = xtalloc_strdup (yyextra, yytext);
return IDENTIFIER;
}
<ST_IF>{HSPACE}+
<ST_IF>\n {
BEGIN INITIAL;
return NEWLINE;
}
{HASH}endif{HSPACE}* {
return ENDIF;
}
{HASH}else{HSPACE}* {
return ELSE;
}
{HASH}undef{HSPACE}* {
BEGIN ST_UNDEF;
return UNDEF;
}
<ST_UNDEF>{IDENTIFIER} {
BEGIN ST_UNDEF_END;
yylval.str = xtalloc_strdup (yyextra, yytext);
return IDENTIFIER;
}
<ST_UNDEF_END>{HSPACE}*
<ST_UNDEF_END>\n {
BEGIN INITIAL;
}
/* We use the ST_DEFINE and ST_DEFVAL states so that we can
* pass a space token, (yes, a token for whitespace!), since
* the preprocessor specification requires distinguishing
* "#define foo()" from "#define foo ()".
*/
{HASH}define{HSPACE}* {
BEGIN ST_DEFINE;
return DEFINE;
}
<ST_DEFINE>{IDENTIFIER} {
BEGIN ST_DEFINE_OBJ_OR_FUNC;
yylval.str = xtalloc_strdup (yyextra, yytext);
return IDENTIFIER;
}
<ST_DEFINE_OBJ_OR_FUNC>\n {
BEGIN INITIAL;
return NEWLINE;
}
<ST_DEFINE_OBJ_OR_FUNC>{HSPACE}+ {
BEGIN ST_DEFINE_VALUE;
return SPACE;
}
<ST_DEFINE_OBJ_OR_FUNC>"(" {
BEGIN ST_DEFINE_PARAMETER;
return '(';
}
<ST_DEFINE_PARAMETER>{IDENTIFIER} {
yylval.str = xtalloc_strdup (yyextra, yytext);
return IDENTIFIER;
}
<ST_DEFINE_PARAMETER>"," {
return ',';
}
<ST_DEFINE_PARAMETER>")" {
BEGIN ST_DEFINE_VALUE;
return ')';
}
<ST_DEFINE_PARAMETER>{HSPACE}+
<ST_DEFINE_VALUE>{TOKEN} {
yylval.token.type = TOKEN;
yylval.token.value = xtalloc_strdup (yyextra, yytext);
return TOKEN;
}
<ST_DEFINE_VALUE>[(),] {
yylval.token.type = TOKEN;
yylval.token.value = xtalloc_strdup (yyextra, yytext);
return TOKEN;
}
<ST_DEFINE_VALUE>{HSPACE}+
<ST_DEFINE_VALUE>\n {
BEGIN INITIAL;
return NEWLINE;
"defined" {
return DEFINED;
}
{IDENTIFIER} {
int parameter_index;
yylval.str = xtalloc_strdup (yyextra, yytext);
switch (glcpp_parser_classify_token (yyextra, yylval.str,
&parameter_index))
{
case TOKEN_CLASS_IDENTIFIER:
return IDENTIFIER;
break;
case TOKEN_CLASS_IDENTIFIER_FINALIZED:
return IDENTIFIER_FINALIZED;
break;
case TOKEN_CLASS_FUNC_MACRO:
return FUNC_MACRO;
break;
case TOKEN_CLASS_OBJ_MACRO:
return OBJ_MACRO;
break;
}
return IDENTIFIER;
}
[(),] {
{PUNCTUATION} {
return yytext[0];
}
{TOKEN} {
yylval.token.type = TOKEN;
yylval.token.value = xtalloc_strdup (yyextra, yytext);
return TOKEN;
{OTHER}+ {
yylval.str = xtalloc_strdup (yyextra, yytext);
return OTHER;
}
{HSPACE}+ {
if (yyextra->space_tokens) {
return SPACE;
}
}
\n {
yyextra->need_newline = 1;
return NEWLINE;
}
{HSPACE}+
%%
+986 -456
View File
File diff suppressed because it is too large Load Diff
+35 -14
View File
@@ -44,21 +44,36 @@ typedef struct string_list {
string_node_t *tail;
} string_list_t;
typedef struct token {
typedef struct token token_t;
typedef struct token_list token_list_t;
typedef union YYSTYPE
{
intmax_t ival;
char *str;
string_list_t *string_list;
token_t *token;
token_list_t *token_list;
} YYSTYPE;
# define YYSTYPE_IS_TRIVIAL 1
# define YYSTYPE_IS_DECLARED 1
struct token {
int type;
char *value;
} token_t;
YYSTYPE value;
};
typedef struct token_node {
int type;
const char *value;
token_t *token;
struct token_node *next;
} token_node_t;
typedef struct token_list {
struct token_list {
token_node_t *head;
token_node_t *tail;
} token_list_t;
token_node_t *non_space_tail;
};
typedef struct argument_node {
token_list_t *argument;
@@ -111,16 +126,16 @@ typedef struct skip_node {
struct glcpp_parser {
yyscan_t scanner;
struct hash_table *defines;
expansion_node_t *expansions;
int just_printed_separator;
int need_newline;
string_list_t *active;
int space_tokens;
int newline_as_space;
int in_control_line;
int paren_count;
skip_node_t *skip_stack;
token_list_t *lex_from_list;
token_node_t *lex_from_node;
};
void
glcpp_parser_push_expansion_argument (glcpp_parser_t *parser,
int argument_index);
glcpp_parser_t *
glcpp_parser_create (void);
@@ -164,4 +179,10 @@ xtalloc_strndup (const void *t, const char *p, size_t n);
char *
xtalloc_asprintf (const void *t, const char *fmt, ...);
void *
_xtalloc_reference_loc (const void *context,
const void *ptr, const char *location);
#define xtalloc_reference(ctx, ptr) (_TALLOC_TYPEOF(ptr))_xtalloc_reference_loc((ctx),(ptr), __location__)
#endif
+4 -3
View File
@@ -2,8 +2,9 @@
for test in *.c; do
echo "Testing $test"
../glcpp < $test > $test.out
../glcpp < $test > $test.glcpp
grep -v '^$' < $test.glcpp > $test.out || true
gcc -E $test -o $test.gcc
grep -v '^#' < $test.gcc > $test.expected
diff -B -u $test.expected $test.out
grep -v '^#' < $test.gcc | grep -v '^$' > $test.expected || true
diff -u $test.expected $test.out
done
+15
View File
@@ -82,3 +82,18 @@ xtalloc_asprintf (const void *t, const char *fmt, ...)
va_end(ap);
return ret;
}
void *
_xtalloc_reference_loc (const void *context,
const void *ptr, const char *location)
{
void *ret;
ret = _talloc_reference_loc (context, ptr, location);
if (ret == NULL) {
fprintf (stderr, "Out of memory.\n");
exit (1);
}
return ret;
}