Merge branch 'take-2'
The take-2 branch started over with a new grammar based directly on the grammar from the C99 specification. It doesn't try to capture things like balanced sets of parentheses for macro arguments in the grammar. Instead, it merely captures things as token lists and then performs operations like parsing arguments and expanding macros on those lists. We merge it here since it's currently behaving better, (passing the entire test suite). But the code base has proven quite fragile really. Several of the recently added test cases required additional special cases in the take-2 branch while working trivially on master. So this merge point may be useful in the future, since we might have a cleaner code base by coming back to the state before this merge and fixing it, rather than accepting all the fragile imperative/list-munging code from the take-2 branch.
This commit is contained in:
@@ -6,4 +6,5 @@ glcpp-parse.h
|
||||
*~
|
||||
tests/*.expected
|
||||
tests/*.gcc
|
||||
tests/*.glcpp
|
||||
tests/*.out
|
||||
|
||||
@@ -7,7 +7,7 @@ CFLAGS = -g
|
||||
override CFLAGS += -Wall -Wextra -Wwrite-strings -Wswitch-enum -Wno-unused
|
||||
|
||||
glcpp: glcpp.o glcpp-lex.o glcpp-parse.o hash_table.o xtalloc.o
|
||||
gcc -o $@ -ltalloc $^
|
||||
gcc -o $@ -ltalloc -lm $^
|
||||
|
||||
%.c %.h: %.y
|
||||
bison --debug --defines=$*.h --output=$*.c $^
|
||||
|
||||
@@ -12,3 +12,19 @@ preprocessors". To fill in these details, I've been using the C99
|
||||
standard (for which I had a convenient copy) as available from:
|
||||
|
||||
http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1124.pdf
|
||||
|
||||
Known limitations
|
||||
-----------------
|
||||
Macro invocations cannot include embedded newlines.
|
||||
|
||||
The __LINE__, __FILE__, and __VERSION__ macros are not yet supported.
|
||||
|
||||
The argument of the 'defined' operator cannot yet include enclosing
|
||||
parentheses.
|
||||
|
||||
The #error, #pragma, #extension, #version, and #line macros are not
|
||||
yet supported.
|
||||
|
||||
A file that ends with a function-like macro name as the last
|
||||
non-whitespace token will result in a parse error, (where it should be
|
||||
passed through as is).
|
||||
+69
-163
@@ -32,21 +32,14 @@
|
||||
%option reentrant noyywrap
|
||||
%option extra-type="glcpp_parser_t *"
|
||||
|
||||
%x ST_DEFINE
|
||||
%x ST_DEFINE_OBJ_OR_FUNC
|
||||
%x ST_DEFINE_PARAMETER
|
||||
%x ST_DEFINE_VALUE
|
||||
%x ST_IF
|
||||
%x ST_UNDEF
|
||||
%x ST_UNDEF_END
|
||||
|
||||
SPACE [[:space:]]
|
||||
NONSPACE [^[:space:]]
|
||||
NEWLINE [\n]
|
||||
HSPACE [ \t]
|
||||
HASH ^{HSPACE}*#{HSPACE}*
|
||||
IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]*
|
||||
TOKEN [^[:space:](),]+
|
||||
PUNCTUATION [][(){}.&*~!/%<>^|;,=+-]
|
||||
OTHER [^][(){}.&*~!/%<>^|;,=#[:space:]+-]+
|
||||
|
||||
DECIMAL_INTEGER [1-9][0-9]*[uU]?
|
||||
OCTAL_INTEGER 0[0-7]*[uU]?
|
||||
@@ -54,210 +47,123 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]?
|
||||
|
||||
%%
|
||||
|
||||
{HASH}if{HSPACE}* {
|
||||
BEGIN ST_IF;
|
||||
return IF;
|
||||
{HASH}define{HSPACE}+/{IDENTIFIER}"(" {
|
||||
yyextra->space_tokens = 0;
|
||||
return HASH_DEFINE_FUNC;
|
||||
}
|
||||
|
||||
{HASH}elif{HSPACE}* {
|
||||
BEGIN ST_IF;
|
||||
return ELIF;
|
||||
{HASH}define {
|
||||
yyextra->space_tokens = 0;
|
||||
return HASH_DEFINE_OBJ;
|
||||
}
|
||||
|
||||
<ST_IF>{DECIMAL_INTEGER} {
|
||||
yylval.ival = strtoll (yytext, NULL, 10);
|
||||
return INTEGER;
|
||||
{HASH}undef {
|
||||
yyextra->space_tokens = 0;
|
||||
return HASH_UNDEF;
|
||||
}
|
||||
|
||||
<ST_IF>{OCTAL_INTEGER} {
|
||||
yylval.ival = strtoll (yytext + 1, NULL, 8);
|
||||
return INTEGER;
|
||||
{HASH}if {
|
||||
yyextra->space_tokens = 0;
|
||||
return HASH_IF;
|
||||
}
|
||||
|
||||
<ST_IF>{HEXADECIMAL_INTEGER} {
|
||||
yylval.ival = strtoll (yytext + 2, NULL, 16);
|
||||
return INTEGER;
|
||||
{HASH}elif {
|
||||
yyextra->space_tokens = 0;
|
||||
return HASH_ELIF;
|
||||
}
|
||||
|
||||
<ST_IF>"defined" {
|
||||
return DEFINED;
|
||||
{HASH}else {
|
||||
yyextra->space_tokens = 0;
|
||||
return HASH_ELSE;
|
||||
}
|
||||
|
||||
<ST_IF>"<<" {
|
||||
{HASH}endif {
|
||||
yyextra->space_tokens = 0;
|
||||
return HASH_ENDIF;
|
||||
}
|
||||
|
||||
{HASH} {
|
||||
yyextra->space_tokens = 0;
|
||||
return HASH;
|
||||
}
|
||||
|
||||
{DECIMAL_INTEGER} {
|
||||
yylval.str = xtalloc_strdup (yyextra, yytext);
|
||||
return INTEGER_STRING;
|
||||
}
|
||||
|
||||
{OCTAL_INTEGER} {
|
||||
yylval.str = xtalloc_strdup (yyextra, yytext);
|
||||
return INTEGER_STRING;
|
||||
}
|
||||
|
||||
{HEXADECIMAL_INTEGER} {
|
||||
yylval.str = xtalloc_strdup (yyextra, yytext);
|
||||
return INTEGER_STRING;
|
||||
}
|
||||
|
||||
"<<" {
|
||||
return LEFT_SHIFT;
|
||||
}
|
||||
|
||||
<ST_IF>">>" {
|
||||
">>" {
|
||||
return RIGHT_SHIFT;
|
||||
}
|
||||
|
||||
<ST_IF>"<=" {
|
||||
"<=" {
|
||||
return LESS_OR_EQUAL;
|
||||
}
|
||||
|
||||
<ST_IF>">=" {
|
||||
">=" {
|
||||
return GREATER_OR_EQUAL;
|
||||
}
|
||||
|
||||
<ST_IF>"==" {
|
||||
"==" {
|
||||
return EQUAL;
|
||||
}
|
||||
|
||||
<ST_IF>"!=" {
|
||||
"!=" {
|
||||
return NOT_EQUAL;
|
||||
}
|
||||
|
||||
<ST_IF>"&&" {
|
||||
"&&" {
|
||||
return AND;
|
||||
}
|
||||
|
||||
<ST_IF>"||" {
|
||||
"||" {
|
||||
return OR;
|
||||
}
|
||||
|
||||
<ST_IF>[-+*/%<>&^|()~] {
|
||||
return yytext[0];
|
||||
"##" {
|
||||
return PASTE;
|
||||
}
|
||||
|
||||
<ST_IF>{IDENTIFIER} {
|
||||
yylval.str = xtalloc_strdup (yyextra, yytext);
|
||||
return IDENTIFIER;
|
||||
}
|
||||
|
||||
<ST_IF>{HSPACE}+
|
||||
|
||||
<ST_IF>\n {
|
||||
BEGIN INITIAL;
|
||||
return NEWLINE;
|
||||
}
|
||||
|
||||
{HASH}endif{HSPACE}* {
|
||||
return ENDIF;
|
||||
}
|
||||
|
||||
{HASH}else{HSPACE}* {
|
||||
return ELSE;
|
||||
}
|
||||
|
||||
{HASH}undef{HSPACE}* {
|
||||
BEGIN ST_UNDEF;
|
||||
return UNDEF;
|
||||
}
|
||||
|
||||
<ST_UNDEF>{IDENTIFIER} {
|
||||
BEGIN ST_UNDEF_END;
|
||||
yylval.str = xtalloc_strdup (yyextra, yytext);
|
||||
return IDENTIFIER;
|
||||
}
|
||||
|
||||
<ST_UNDEF_END>{HSPACE}*
|
||||
|
||||
<ST_UNDEF_END>\n {
|
||||
BEGIN INITIAL;
|
||||
}
|
||||
|
||||
/* We use the ST_DEFINE and ST_DEFVAL states so that we can
|
||||
* pass a space token, (yes, a token for whitespace!), since
|
||||
* the preprocessor specification requires distinguishing
|
||||
* "#define foo()" from "#define foo ()".
|
||||
*/
|
||||
{HASH}define{HSPACE}* {
|
||||
BEGIN ST_DEFINE;
|
||||
return DEFINE;
|
||||
}
|
||||
|
||||
<ST_DEFINE>{IDENTIFIER} {
|
||||
BEGIN ST_DEFINE_OBJ_OR_FUNC;
|
||||
yylval.str = xtalloc_strdup (yyextra, yytext);
|
||||
return IDENTIFIER;
|
||||
}
|
||||
|
||||
<ST_DEFINE_OBJ_OR_FUNC>\n {
|
||||
BEGIN INITIAL;
|
||||
return NEWLINE;
|
||||
}
|
||||
|
||||
<ST_DEFINE_OBJ_OR_FUNC>{HSPACE}+ {
|
||||
BEGIN ST_DEFINE_VALUE;
|
||||
return SPACE;
|
||||
}
|
||||
|
||||
<ST_DEFINE_OBJ_OR_FUNC>"(" {
|
||||
BEGIN ST_DEFINE_PARAMETER;
|
||||
return '(';
|
||||
}
|
||||
|
||||
<ST_DEFINE_PARAMETER>{IDENTIFIER} {
|
||||
yylval.str = xtalloc_strdup (yyextra, yytext);
|
||||
return IDENTIFIER;
|
||||
}
|
||||
|
||||
<ST_DEFINE_PARAMETER>"," {
|
||||
return ',';
|
||||
}
|
||||
|
||||
<ST_DEFINE_PARAMETER>")" {
|
||||
BEGIN ST_DEFINE_VALUE;
|
||||
return ')';
|
||||
}
|
||||
|
||||
<ST_DEFINE_PARAMETER>{HSPACE}+
|
||||
|
||||
<ST_DEFINE_VALUE>{TOKEN} {
|
||||
yylval.token.type = TOKEN;
|
||||
yylval.token.value = xtalloc_strdup (yyextra, yytext);
|
||||
return TOKEN;
|
||||
}
|
||||
|
||||
<ST_DEFINE_VALUE>[(),] {
|
||||
yylval.token.type = TOKEN;
|
||||
yylval.token.value = xtalloc_strdup (yyextra, yytext);
|
||||
return TOKEN;
|
||||
}
|
||||
|
||||
<ST_DEFINE_VALUE>{HSPACE}+
|
||||
|
||||
<ST_DEFINE_VALUE>\n {
|
||||
BEGIN INITIAL;
|
||||
return NEWLINE;
|
||||
"defined" {
|
||||
return DEFINED;
|
||||
}
|
||||
|
||||
{IDENTIFIER} {
|
||||
int parameter_index;
|
||||
yylval.str = xtalloc_strdup (yyextra, yytext);
|
||||
switch (glcpp_parser_classify_token (yyextra, yylval.str,
|
||||
¶meter_index))
|
||||
{
|
||||
case TOKEN_CLASS_IDENTIFIER:
|
||||
return IDENTIFIER;
|
||||
break;
|
||||
case TOKEN_CLASS_IDENTIFIER_FINALIZED:
|
||||
return IDENTIFIER_FINALIZED;
|
||||
break;
|
||||
case TOKEN_CLASS_FUNC_MACRO:
|
||||
return FUNC_MACRO;
|
||||
break;
|
||||
case TOKEN_CLASS_OBJ_MACRO:
|
||||
return OBJ_MACRO;
|
||||
break;
|
||||
|
||||
}
|
||||
return IDENTIFIER;
|
||||
}
|
||||
|
||||
[(),] {
|
||||
{PUNCTUATION} {
|
||||
return yytext[0];
|
||||
}
|
||||
|
||||
{TOKEN} {
|
||||
yylval.token.type = TOKEN;
|
||||
yylval.token.value = xtalloc_strdup (yyextra, yytext);
|
||||
return TOKEN;
|
||||
{OTHER}+ {
|
||||
yylval.str = xtalloc_strdup (yyextra, yytext);
|
||||
return OTHER;
|
||||
}
|
||||
|
||||
{HSPACE}+ {
|
||||
if (yyextra->space_tokens) {
|
||||
return SPACE;
|
||||
}
|
||||
}
|
||||
|
||||
\n {
|
||||
yyextra->need_newline = 1;
|
||||
return NEWLINE;
|
||||
}
|
||||
|
||||
{HSPACE}+
|
||||
|
||||
%%
|
||||
|
||||
+986
-456
File diff suppressed because it is too large
Load Diff
@@ -44,21 +44,36 @@ typedef struct string_list {
|
||||
string_node_t *tail;
|
||||
} string_list_t;
|
||||
|
||||
typedef struct token {
|
||||
typedef struct token token_t;
|
||||
typedef struct token_list token_list_t;
|
||||
|
||||
typedef union YYSTYPE
|
||||
{
|
||||
intmax_t ival;
|
||||
char *str;
|
||||
string_list_t *string_list;
|
||||
token_t *token;
|
||||
token_list_t *token_list;
|
||||
} YYSTYPE;
|
||||
|
||||
# define YYSTYPE_IS_TRIVIAL 1
|
||||
# define YYSTYPE_IS_DECLARED 1
|
||||
|
||||
struct token {
|
||||
int type;
|
||||
char *value;
|
||||
} token_t;
|
||||
YYSTYPE value;
|
||||
};
|
||||
|
||||
typedef struct token_node {
|
||||
int type;
|
||||
const char *value;
|
||||
token_t *token;
|
||||
struct token_node *next;
|
||||
} token_node_t;
|
||||
|
||||
typedef struct token_list {
|
||||
struct token_list {
|
||||
token_node_t *head;
|
||||
token_node_t *tail;
|
||||
} token_list_t;
|
||||
token_node_t *non_space_tail;
|
||||
};
|
||||
|
||||
typedef struct argument_node {
|
||||
token_list_t *argument;
|
||||
@@ -111,16 +126,16 @@ typedef struct skip_node {
|
||||
struct glcpp_parser {
|
||||
yyscan_t scanner;
|
||||
struct hash_table *defines;
|
||||
expansion_node_t *expansions;
|
||||
int just_printed_separator;
|
||||
int need_newline;
|
||||
string_list_t *active;
|
||||
int space_tokens;
|
||||
int newline_as_space;
|
||||
int in_control_line;
|
||||
int paren_count;
|
||||
skip_node_t *skip_stack;
|
||||
token_list_t *lex_from_list;
|
||||
token_node_t *lex_from_node;
|
||||
};
|
||||
|
||||
void
|
||||
glcpp_parser_push_expansion_argument (glcpp_parser_t *parser,
|
||||
int argument_index);
|
||||
|
||||
glcpp_parser_t *
|
||||
glcpp_parser_create (void);
|
||||
|
||||
@@ -164,4 +179,10 @@ xtalloc_strndup (const void *t, const char *p, size_t n);
|
||||
char *
|
||||
xtalloc_asprintf (const void *t, const char *fmt, ...);
|
||||
|
||||
void *
|
||||
_xtalloc_reference_loc (const void *context,
|
||||
const void *ptr, const char *location);
|
||||
|
||||
#define xtalloc_reference(ctx, ptr) (_TALLOC_TYPEOF(ptr))_xtalloc_reference_loc((ctx),(ptr), __location__)
|
||||
|
||||
#endif
|
||||
|
||||
+4
-3
@@ -2,8 +2,9 @@
|
||||
|
||||
for test in *.c; do
|
||||
echo "Testing $test"
|
||||
../glcpp < $test > $test.out
|
||||
../glcpp < $test > $test.glcpp
|
||||
grep -v '^$' < $test.glcpp > $test.out || true
|
||||
gcc -E $test -o $test.gcc
|
||||
grep -v '^#' < $test.gcc > $test.expected
|
||||
diff -B -u $test.expected $test.out
|
||||
grep -v '^#' < $test.gcc | grep -v '^$' > $test.expected || true
|
||||
diff -u $test.expected $test.out
|
||||
done
|
||||
|
||||
@@ -82,3 +82,18 @@ xtalloc_asprintf (const void *t, const char *fmt, ...)
|
||||
va_end(ap);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void *
|
||||
_xtalloc_reference_loc (const void *context,
|
||||
const void *ptr, const char *location)
|
||||
{
|
||||
void *ret;
|
||||
|
||||
ret = _talloc_reference_loc (context, ptr, location);
|
||||
if (ret == NULL) {
|
||||
fprintf (stderr, "Out of memory.\n");
|
||||
exit (1);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user