From 3a37b8701cd3e0a86fef59910b20b2af7e4573f6 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 10 May 2010 11:44:09 -0700 Subject: [PATCH 001/148] Add the tiniest shell of a flex/bison-based parser. It doesn't really *do* anything yet---merlely parsing a stream of whitespace-separated tokens, (and not interpreting them at all). --- Makefile | 12 +++++++++++ glcpp-lex.l | 41 ++++++++++++++++++++++++++++++++++++ glcpp-parse.y | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++ glcpp.c | 28 +++++++++++++++++++++++++ 4 files changed, 138 insertions(+) create mode 100644 Makefile create mode 100644 glcpp-lex.l create mode 100644 glcpp-parse.y create mode 100644 glcpp.c diff --git a/Makefile b/Makefile new file mode 100644 index 00000000000..d8357dda0f0 --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +glcpp: glcpp.o glcpp-lex.o glcpp-parse.o + +%.c %.h: %.y + bison --defines=$*.h --output=$*.c $^ + +%.c: %.l + flex --outfile=$@ $< + +glcpp-lex.c: glcpp-parse.h + +clean: + rm -f glcpp-lex.c glcpp-parse.c *.o *~ diff --git a/glcpp-lex.l b/glcpp-lex.l new file mode 100644 index 00000000000..9779f2b92e6 --- /dev/null +++ b/glcpp-lex.l @@ -0,0 +1,41 @@ +%{ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#include "glcpp-parse.h" +%} + +%option noyywrap + +%% + + /* Silently eat all whitespace. */ +[[:space:]]+ + + /* Any non-whitespace is a token. */ +[^[:space:]]+ { return TOKEN; } + +%% diff --git a/glcpp-parse.y b/glcpp-parse.y new file mode 100644 index 00000000000..739b2935b3f --- /dev/null +++ b/glcpp-parse.y @@ -0,0 +1,57 @@ +%{ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#define YYSTYPE int + +void +yyerror (const char *error); + +%} + +%token TOKEN + +%% + +input: /* empty */ + | tokens +; + + +tokens: token + | tokens token +; + +token: TOKEN +; + +%% + +void +yyerror (const char *error) +{ + fprintf (stderr, "Parse error: %s\n", error); +} diff --git a/glcpp.c b/glcpp.c new file mode 100644 index 00000000000..09641ceeadb --- /dev/null +++ b/glcpp.c @@ -0,0 +1,28 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +int +main (void) +{ + return yyparse (); +} From 38aa83560be3368b4e9784b3ef8f73144171ca45 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 10 May 2010 11:52:29 -0700 Subject: [PATCH 002/148] Make the lexer reentrant (to avoid "still reachable" memory). This allows the final program to be 100% "valgrind clean", (freeing all memory that it allocates). This will make it much easier to ensure that any allocation that parser actions perform are also cleaned up. --- glcpp-lex.l | 2 +- glcpp-parse.y | 7 +++++-- glcpp.c | 9 ++++++++- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 9779f2b92e6..276f50ddfe3 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -28,7 +28,7 @@ #include "glcpp-parse.h" %} -%option noyywrap +%option reentrant noyywrap %% diff --git a/glcpp-parse.y b/glcpp-parse.y index 739b2935b3f..9acd549b24c 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -28,10 +28,13 @@ #define YYSTYPE int void -yyerror (const char *error); +yyerror (const char *error, void *scanner); %} +%parse-param {void *scanner} +%lex-param {void *scanner} + %token TOKEN %% @@ -51,7 +54,7 @@ token: TOKEN %% void -yyerror (const char *error) +yyerror (const char *error, void *scanner) { fprintf (stderr, "Parse error: %s\n", error); } diff --git a/glcpp.c b/glcpp.c index 09641ceeadb..90a0e89cfa6 100644 --- a/glcpp.c +++ b/glcpp.c @@ -24,5 +24,12 @@ int main (void) { - return yyparse (); + int ret; + void *scanner; + + yylex_init (&scanner); + ret = yyparse (scanner); + yylex_destroy (scanner); + + return ret; } From a1e32bcff0a04dbff61f28c8e725cf2bf120bf85 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 10 May 2010 13:17:25 -0700 Subject: [PATCH 003/148] Add some compiler warnings and corresponding fixes. Most of the current problems were (mostly) harmless things like missing declarations, but there was at least one real error, (reversed argument order for yyerrror). --- Makefile | 2 ++ glcpp-lex.l | 1 + glcpp-parse.y | 6 ++++-- glcpp.c | 2 ++ glcpp.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 54 insertions(+), 2 deletions(-) create mode 100644 glcpp.h diff --git a/Makefile b/Makefile index d8357dda0f0..d0ca78de74c 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,5 @@ +override CFLAGS += -Wall -Wextra -Wwrite-strings -Wswitch-enum -Wno-unused + glcpp: glcpp.o glcpp-lex.o glcpp-parse.o %.c %.h: %.y diff --git a/glcpp-lex.l b/glcpp-lex.l index 276f50ddfe3..747e24056f4 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -25,6 +25,7 @@ #include #include +#include "glcpp.h" #include "glcpp-parse.h" %} diff --git a/glcpp-parse.y b/glcpp-parse.y index 9acd549b24c..a2d10942538 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -25,10 +25,12 @@ #include #include +#include "glcpp.h" + #define YYSTYPE int void -yyerror (const char *error, void *scanner); +yyerror (void *scanner, const char *error); %} @@ -54,7 +56,7 @@ token: TOKEN %% void -yyerror (const char *error, void *scanner) +yyerror (void *scanner, const char *error) { fprintf (stderr, "Parse error: %s\n", error); } diff --git a/glcpp.c b/glcpp.c index 90a0e89cfa6..eefac74be9a 100644 --- a/glcpp.c +++ b/glcpp.c @@ -21,6 +21,8 @@ * DEALINGS IN THE SOFTWARE. */ +#include "glcpp.h" + int main (void) { diff --git a/glcpp.h b/glcpp.h new file mode 100644 index 00000000000..485387b8a5d --- /dev/null +++ b/glcpp.h @@ -0,0 +1,45 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef GLCPP_H +#define GLCPP_H + +/* Generated by glcpp-lex.l to glcpp-lex.c */ + +#define yyscan_t void* + +int +yylex_init (yyscan_t *scanner); + +int +yylex (yyscan_t scanner); + +int +yylex_destroy (yyscan_t scanner); + +/* Generated by glcpp-parse.y to glcpp-parse.c */ + +int +yyparse (void *scanner); + +#endif From a70e7bab2b492f64455c74f2222b363f37dc8dfa Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 10 May 2010 13:32:42 -0700 Subject: [PATCH 004/148] Add .gitignore file. To ignore generated source files (and glcpp binary). --- .gitignore | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000000..5bbd660f22b --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +glcpp +glcpp-lex.c +glcpp-parse.c +glcpp-parse.h +*.o +*~ From 633a692225fcdad15ce84776a7a18d7d008d52b3 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 10 May 2010 13:36:26 -0700 Subject: [PATCH 005/148] Add hash table implementation from glsl2 project. The preprocessor here is intended to become part of the glsl2 codebase eventually anyway. --- Makefile | 2 +- hash_table.c | 159 ++++++++++++++++++++++++++++++ hash_table.h | 125 ++++++++++++++++++++++++ main/imports.h | 6 ++ main/simple_list.h | 235 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 526 insertions(+), 1 deletion(-) create mode 100644 hash_table.c create mode 100644 hash_table.h create mode 100644 main/imports.h create mode 100644 main/simple_list.h diff --git a/Makefile b/Makefile index d0ca78de74c..0af7e05d1b2 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ override CFLAGS += -Wall -Wextra -Wwrite-strings -Wswitch-enum -Wno-unused -glcpp: glcpp.o glcpp-lex.o glcpp-parse.o +glcpp: glcpp.o glcpp-lex.o glcpp-parse.o hash_table.o %.c %.h: %.y bison --defines=$*.h --output=$*.c $^ diff --git a/hash_table.c b/hash_table.c new file mode 100644 index 00000000000..e89a2564d76 --- /dev/null +++ b/hash_table.c @@ -0,0 +1,159 @@ +/* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file hash_table.c + * \brief Implementation of a generic, opaque hash table data type. + * + * \author Ian Romanick + */ + +#include "main/imports.h" +#include "main/simple_list.h" +#include "hash_table.h" + +struct node { + struct node *next; + struct node *prev; +}; + +struct hash_table { + hash_func_t hash; + hash_compare_func_t compare; + + unsigned num_buckets; + struct node buckets[1]; +}; + + +struct hash_node { + struct node link; + const void *key; + void *data; +}; + + +struct hash_table * +hash_table_ctor(unsigned num_buckets, hash_func_t hash, + hash_compare_func_t compare) +{ + struct hash_table *ht; + unsigned i; + + + if (num_buckets < 16) { + num_buckets = 16; + } + + ht = _mesa_malloc(sizeof(*ht) + ((num_buckets - 1) + * sizeof(ht->buckets[0]))); + if (ht != NULL) { + ht->hash = hash; + ht->compare = compare; + ht->num_buckets = num_buckets; + + for (i = 0; i < num_buckets; i++) { + make_empty_list(& ht->buckets[i]); + } + } + + return ht; +} + + +void +hash_table_dtor(struct hash_table *ht) +{ + hash_table_clear(ht); + _mesa_free(ht); +} + + +void +hash_table_clear(struct hash_table *ht) +{ + struct node *node; + struct node *temp; + unsigned i; + + + for (i = 0; i < ht->num_buckets; i++) { + foreach_s(node, temp, & ht->buckets[i]) { + remove_from_list(node); + _mesa_free(node); + } + + assert(is_empty_list(& ht->buckets[i])); + } +} + + +void * +hash_table_find(struct hash_table *ht, const void *key) +{ + const unsigned hash_value = (*ht->hash)(key); + const unsigned bucket = hash_value % ht->num_buckets; + struct node *node; + + foreach(node, & ht->buckets[bucket]) { + struct hash_node *hn = (struct hash_node *) node; + + if ((*ht->compare)(hn->key, key) == 0) { + return hn->data; + } + } + + return NULL; +} + + +void +hash_table_insert(struct hash_table *ht, void *data, const void *key) +{ + const unsigned hash_value = (*ht->hash)(key); + const unsigned bucket = hash_value % ht->num_buckets; + struct hash_node *node; + + node = _mesa_calloc(sizeof(*node)); + + node->data = data; + node->key = key; + + insert_at_head(& ht->buckets[bucket], & node->link); +} + + +unsigned +hash_table_string_hash(const void *key) +{ + const char *str = (const char *) key; + unsigned hash = 5381; + + + while (*str != '\0') { + hash = (hash * 33) + *str; + str++; + } + + return hash; +} diff --git a/hash_table.h b/hash_table.h new file mode 100644 index 00000000000..b9dd343dee9 --- /dev/null +++ b/hash_table.h @@ -0,0 +1,125 @@ +/* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file hash_table.h + * \brief Implementation of a generic, opaque hash table data type. + * + * \author Ian Romanick + */ + +#ifndef HASH_TABLE_H +#define HASH_TABLE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +struct hash_table; + +typedef unsigned (*hash_func_t)(const void *key); +typedef int (*hash_compare_func_t)(const void *key1, const void *key2); + +/** + * Hash table constructor + * + * Creates a hash table with the specified number of buckets. The supplied + * \c hash and \c compare routines are used when adding elements to the table + * and when searching for elements in the table. + * + * \param num_buckets Number of buckets (bins) in the hash table. + * \param hash Function used to compute hash value of input keys. + * \param compare Function used to compare keys. + */ +extern struct hash_table *hash_table_ctor(unsigned num_buckets, + hash_func_t hash, hash_compare_func_t compare); + + +/** + * Release all memory associated with a hash table + * + * \warning + * This function cannot release memory occupied either by keys or data. + */ +extern void hash_table_dtor(struct hash_table *ht); + + +/** + * Flush all entries from a hash table + * + * \param ht Table to be cleared of its entries. + */ +extern void hash_table_clear(struct hash_table *ht); + + +/** + * Search a hash table for a specific element + * + * \param ht Table to be searched + * \param key Key of the desired element + * + * \return + * The \c data value supplied to \c hash_table_insert when the element with + * the matching key was added. If no matching key exists in the table, + * \c NULL is returned. + */ +extern void *hash_table_find(struct hash_table *ht, const void *key); + + +/** + * Add an element to a hash table + */ +extern void hash_table_insert(struct hash_table *ht, void *data, + const void *key); + + +/** + * Compute hash value of a string + * + * Computes the hash value of a string using the DJB2 algorithm developed by + * Professor Daniel J. Bernstein. It was published on comp.lang.c once upon + * a time. I was unable to find the original posting in the archives. + * + * \param key Pointer to a NUL terminated string to be hashed. + * + * \sa hash_table_string_compare + */ +extern unsigned hash_table_string_hash(const void *key); + + +/** + * Compare two strings used as keys + * + * This is just a macro wrapper around \c strcmp. + * + * \sa hash_table_string_hash + */ +#define hash_table_string_compare ((hash_compare_func_t) strcmp) + +#ifdef __cplusplus +}; +#endif + +#endif /* HASH_TABLE_H */ diff --git a/main/imports.h b/main/imports.h new file mode 100644 index 00000000000..d2197342c04 --- /dev/null +++ b/main/imports.h @@ -0,0 +1,6 @@ +#include +#include + +#define _mesa_malloc(x) malloc(x) +#define _mesa_free(x) free(x) +#define _mesa_calloc(x) calloc(1,x) diff --git a/main/simple_list.h b/main/simple_list.h new file mode 100644 index 00000000000..5ef39e14cc6 --- /dev/null +++ b/main/simple_list.h @@ -0,0 +1,235 @@ +/** + * \file simple_list.h + * Simple macros for type-safe, intrusive lists. + * + * Intended to work with a list sentinal which is created as an empty + * list. Insert & delete are O(1). + * + * \author + * (C) 1997, Keith Whitwell + */ + +/* + * Mesa 3-D graphics library + * Version: 3.5 + * + * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#ifndef _SIMPLE_LIST_H +#define _SIMPLE_LIST_H + +struct simple_node { + struct simple_node *next; + struct simple_node *prev; +}; + +/** + * Remove an element from list. + * + * \param elem element to remove. + */ +#define remove_from_list(elem) \ +do { \ + (elem)->next->prev = (elem)->prev; \ + (elem)->prev->next = (elem)->next; \ +} while (0) + +/** + * Insert an element to the list head. + * + * \param list list. + * \param elem element to insert. + */ +#define insert_at_head(list, elem) \ +do { \ + (elem)->prev = list; \ + (elem)->next = (list)->next; \ + (list)->next->prev = elem; \ + (list)->next = elem; \ +} while(0) + +/** + * Insert an element to the list tail. + * + * \param list list. + * \param elem element to insert. + */ +#define insert_at_tail(list, elem) \ +do { \ + (elem)->next = list; \ + (elem)->prev = (list)->prev; \ + (list)->prev->next = elem; \ + (list)->prev = elem; \ +} while(0) + +/** + * Move an element to the list head. + * + * \param list list. + * \param elem element to move. + */ +#define move_to_head(list, elem) \ +do { \ + remove_from_list(elem); \ + insert_at_head(list, elem); \ +} while (0) + +/** + * Move an element to the list tail. + * + * \param list list. + * \param elem element to move. + */ +#define move_to_tail(list, elem) \ +do { \ + remove_from_list(elem); \ + insert_at_tail(list, elem); \ +} while (0) + +/** + * Consatinate a cyclic list to a list + * + * Appends the sequence of nodes starting with \c tail to the list \c head. + * A "cyclic list" is a list that does not have a sentinal node. This means + * that the data pointed to by \c tail is an actual node, not a dataless + * sentinal. Note that if \c tail constist of a single node, this macro + * behaves identically to \c insert_at_tail + * + * \param head Head of the list to be appended to. This may or may not + * be a cyclic list. + * \param tail Head of the cyclic list to be appended to \c head. + * \param temp Temporary \c simple_list used by the macro + * + * \sa insert_at_tail + */ +#define concat_list_and_cycle(head, tail, temp) \ +do { \ + (head)->prev->next = (tail); \ + (tail)->prev->next = (head); \ + (temp) = (head)->prev; \ + (head)->prev = (tail)->prev; \ + (tail)->prev = (temp); \ +} while (0) + +#define concat_list(head, next_list) \ +do { \ + (next_list)->next->prev = (head)->prev; \ + (next_list)->prev->next = (head); \ + (head)->prev->next = (next_list)->next; \ + (head)->prev = (next_list)->prev; \ +} while (0) + +/** + * Make a empty list empty. + * + * \param sentinal list (sentinal element). + */ +#define make_empty_list(sentinal) \ +do { \ + (sentinal)->next = sentinal; \ + (sentinal)->prev = sentinal; \ +} while (0) + +/** + * Get list first element. + * + * \param list list. + * + * \return pointer to first element. + */ +#define first_elem(list) ((list)->next) + +/** + * Get list last element. + * + * \param list list. + * + * \return pointer to last element. + */ +#define last_elem(list) ((list)->prev) + +/** + * Get next element. + * + * \param elem element. + * + * \return pointer to next element. + */ +#define next_elem(elem) ((elem)->next) + +/** + * Get previous element. + * + * \param elem element. + * + * \return pointer to previous element. + */ +#define prev_elem(elem) ((elem)->prev) + +/** + * Test whether element is at end of the list. + * + * \param list list. + * \param elem element. + * + * \return non-zero if element is at end of list, or zero otherwise. + */ +#define at_end(list, elem) ((elem) == (list)) + +/** + * Test if a list is empty. + * + * \param list list. + * + * \return non-zero if list empty, or zero otherwise. + */ +#define is_empty_list(list) ((list)->next == (list)) + +/** + * Walk through the elements of a list. + * + * \param ptr pointer to the current element. + * \param list list. + * + * \note It should be followed by a { } block or a single statement, as in a \c + * for loop. + */ +#define foreach(ptr, list) \ + for( ptr=(list)->next ; ptr!=list ; ptr=(ptr)->next ) + +/** + * Walk through the elements of a list. + * + * Same as #foreach but lets you unlink the current value during a list + * traversal. Useful for freeing a list, element by element. + * + * \param ptr pointer to the current element. + * \param t temporary pointer. + * \param list list. + * + * \note It should be followed by a { } block or a single statement, as in a \c + * for loop. + */ +#define foreach_s(ptr, t, list) \ + for(ptr=(list)->next,t=(ptr)->next; list != ptr; ptr=t, t=(t)->next) + +#endif From 725c17a9266c1141508da623c8781412853b70e4 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 10 May 2010 16:14:59 -0700 Subject: [PATCH 006/148] Makefile: Enable debugging of parser. This compiles the debugging code for teh parser. It's not active unless the yydebug variable is set to a non-zero value. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0af7e05d1b2..d37e9233ec0 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ override CFLAGS += -Wall -Wextra -Wwrite-strings -Wswitch-enum -Wno-unused glcpp: glcpp.o glcpp-lex.o glcpp-parse.o hash_table.o %.c %.h: %.y - bison --defines=$*.h --output=$*.c $^ + bison --debug --defines=$*.h --output=$*.c $^ %.c: %.l flex --outfile=$@ $< From 0b27b5f05191f07ed31e65ff07e5233672f3c33a Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 10 May 2010 16:16:06 -0700 Subject: [PATCH 007/148] Implment #define By using the recently-imported hash_table implementation. --- glcpp-lex.l | 23 +++++++++++++++++++---- glcpp-parse.y | 51 ++++++++++++++++++++++++++++++++++++++++++++------- glcpp.c | 10 ++++++---- glcpp.h | 21 +++++++++++++++++++-- 4 files changed, 88 insertions(+), 17 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 747e24056f4..a220fef76bf 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -31,12 +31,27 @@ %option reentrant noyywrap +%x ST_DEFINE +%x ST_DEFVAL + +SPACE [[:space:]] +NONSPACE [^[:space:]] +NOTNEWLINE [^\n] +HSPACE [ \t] +HASH ^{HSPACE}*# +IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* +DEFVAL {NONSPACE}{NOTNEWLINE}* %% - /* Silently eat all whitespace. */ -[[:space:]]+ +{HASH}define { BEGIN ST_DEFINE; return DEFINE; } - /* Any non-whitespace is a token. */ -[^[:space:]]+ { return TOKEN; } +{HSPACE}+ +{IDENTIFIER} { BEGIN ST_DEFVAL; yylval = strdup (yytext); return IDENTIFIER; } + +{SPACE}+ +{DEFVAL} { BEGIN INITIAL; yylval = strdup (yytext); return DEFVAL; } + + /* Anything we don't specifically recognize is a stream of tokens */ +{NONSPACE}+ { yylval = strdup (yytext); return TOKEN; } %% diff --git a/glcpp-parse.y b/glcpp-parse.y index a2d10942538..89dc46497f5 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -27,30 +27,46 @@ #include "glcpp.h" -#define YYSTYPE int +#define YYLEX_PARAM parser->scanner void yyerror (void *scanner, const char *error); %} -%parse-param {void *scanner} +%parse-param {glcpp_parser_t *parser} %lex-param {void *scanner} +%token DEFINE +%token DEFVAL +%token IDENTIFIER %token TOKEN %% input: /* empty */ - | tokens + | content ; - -tokens: token - | tokens token +content: token + | directive + | content token + | content directive ; -token: TOKEN +directive: DEFINE IDENTIFIER DEFVAL { + hash_table_insert (parser->defines, $3, $2); +} +; + +token: TOKEN { + char *value = hash_table_find (parser->defines, $1); + if (value) + printf ("%s", value); + else + printf ("%s", $1); + free ($1); +} ; %% @@ -60,3 +76,24 @@ yyerror (void *scanner, const char *error) { fprintf (stderr, "Parse error: %s\n", error); } + +void +glcpp_parser_init (glcpp_parser_t *parser) +{ + yylex_init (&parser->scanner); + parser->defines = hash_table_ctor (32, hash_table_string_hash, + hash_table_string_compare); +} + +int +glcpp_parser_parse (glcpp_parser_t *parser) +{ + return yyparse (parser); +} + +void +glcpp_parser_fini (glcpp_parser_t *parser) +{ + yylex_destroy (parser->scanner); + hash_table_dtor (parser->defines); +} diff --git a/glcpp.c b/glcpp.c index eefac74be9a..d6c89df2f95 100644 --- a/glcpp.c +++ b/glcpp.c @@ -26,12 +26,14 @@ int main (void) { + glcpp_parser_t parser; int ret; - void *scanner; - yylex_init (&scanner); - ret = yyparse (scanner); - yylex_destroy (scanner); + glcpp_parser_init (&parser); + + ret = glcpp_parser_parse (&parser); + + glcpp_parser_fini (&parser); return ret; } diff --git a/glcpp.h b/glcpp.h index 485387b8a5d..5278e1b971b 100644 --- a/glcpp.h +++ b/glcpp.h @@ -24,10 +24,27 @@ #ifndef GLCPP_H #define GLCPP_H -/* Generated by glcpp-lex.l to glcpp-lex.c */ +#include "hash_table.h" +#define YYSTYPE char * #define yyscan_t void* +typedef struct { + yyscan_t scanner; + struct hash_table *defines; +} glcpp_parser_t; + +void +glcpp_parser_init (glcpp_parser_t *parser); + +int +glcpp_parser_parse (glcpp_parser_t *parser); + +void +glcpp_parser_fini (glcpp_parser_t *parser); + +/* Generated by glcpp-lex.l to glcpp-lex.c */ + int yylex_init (yyscan_t *scanner); @@ -40,6 +57,6 @@ yylex_destroy (yyscan_t scanner); /* Generated by glcpp-parse.y to glcpp-parse.c */ int -yyparse (void *scanner); +yyparse (glcpp_parser_t *parser); #endif From e8c790b3ceab06eb0433c3a234d3e16980f7ef19 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 10 May 2010 16:21:10 -0700 Subject: [PATCH 008/148] Add a very simple test for the pre-processor. Validate desired test cases by ensuring the output of glcpp matches the output of the gcc preprocessor, (ignoring any lines of the gcc output beginning with '#'). Only one test case so far with a trivial #define. --- .gitignore | 3 +++ Makefile | 4 ++++ tests/001-define.c | 2 ++ tests/glcpp-test | 9 +++++++++ 4 files changed, 18 insertions(+) create mode 100644 tests/001-define.c create mode 100755 tests/glcpp-test diff --git a/.gitignore b/.gitignore index 5bbd660f22b..d67bd38c93c 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,6 @@ glcpp-parse.c glcpp-parse.h *.o *~ +tests/*.expected +tests/*.gcc +tests/*.out diff --git a/Makefile b/Makefile index d37e9233ec0..38cc1f314a9 100644 --- a/Makefile +++ b/Makefile @@ -10,5 +10,9 @@ glcpp: glcpp.o glcpp-lex.o glcpp-parse.o hash_table.o glcpp-lex.c: glcpp-parse.h +test: + @(cd tests; ./glcpp-test) + clean: rm -f glcpp-lex.c glcpp-parse.c *.o *~ + rm -f tests/*.out tests/*.gcc tests/*.expected diff --git a/tests/001-define.c b/tests/001-define.c new file mode 100644 index 00000000000..cbf2fee0e75 --- /dev/null +++ b/tests/001-define.c @@ -0,0 +1,2 @@ +#define foo 1 +foo diff --git a/tests/glcpp-test b/tests/glcpp-test new file mode 100755 index 00000000000..25685eeabe5 --- /dev/null +++ b/tests/glcpp-test @@ -0,0 +1,9 @@ +#!/bin/sh + +for test in *.c; do + echo "Testing $test" + ../glcpp < $test > $test.out + gcc -E $test -o $test.gcc + grep -v '^#' < $test.gcc > $test.expected + diff -u $test.expected $test.out +done From beb26e8ac3152c4a7be43d7ee068b50e17b3ba18 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 11 May 2010 12:04:42 -0700 Subject: [PATCH 009/148] Add README file describing glcpp. Mostly this is a place for me to write down the URLs of the GLSL and C99 specifications that I need to write this code. --- README | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 README diff --git a/README b/README new file mode 100644 index 00000000000..ba833a49ffb --- /dev/null +++ b/README @@ -0,0 +1,14 @@ +glcpp -- GLSL "C" preprocessor + +This is a simple preprocessor designed to provide the preprocessing +needs of the GLSL language. The requirements for this preprocessor are +specified in the GLSL 1.30 specification availble from: + +http://www.opengl.org/registry/doc/GLSLangSpec.Full.1.30.08.pdf + +This specification is not precise on some semantics, (for example, +#define and #if), defining these merely "as is standard for C++ +preprocessors". To fill in these details, I've been using the C99 +standard (for which I had a convenient copy) as available from: + +http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1124.pdf From 49206ef4c8adba5427e9d9b5e0dfc11345262890 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 11 May 2010 12:29:22 -0700 Subject: [PATCH 010/148] Add test for chained #define directives. Where one macro is defined in terms of another macro. The current implementation does not yet deal with this correctly. --- tests/002-define-chain.c | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 tests/002-define-chain.c diff --git a/tests/002-define-chain.c b/tests/002-define-chain.c new file mode 100644 index 00000000000..87d75c68751 --- /dev/null +++ b/tests/002-define-chain.c @@ -0,0 +1,3 @@ +#define foo 1 +#define bar foo +bar From c6d5af335121f6027cc46ef9c5aa77aa4e5906ca Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 11 May 2010 12:30:09 -0700 Subject: [PATCH 011/148] Fix to handle chained #define directives. The fix is as simple as adding a loop to continue to lookup values in the hash table until one of the following termination conditions: 1. The token we look up has no definition 2. We get back the original symbol we started with This second termination condition prevents infinite iteration. --- glcpp-parse.y | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 89dc46497f5..a3a661b8bef 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -32,6 +32,9 @@ void yyerror (void *scanner, const char *error); +const char * +_resolve_token (glcpp_parser_t *parser, const char *token); + %} %parse-param {glcpp_parser_t *parser} @@ -59,14 +62,7 @@ directive: DEFINE IDENTIFIER DEFVAL { } ; -token: TOKEN { - char *value = hash_table_find (parser->defines, $1); - if (value) - printf ("%s", value); - else - printf ("%s", $1); - free ($1); -} +token: TOKEN { printf ("%s", _resolve_token (parser, $1)); free ($1); } ; %% @@ -97,3 +93,22 @@ glcpp_parser_fini (glcpp_parser_t *parser) yylex_destroy (parser->scanner); hash_table_dtor (parser->defines); } + +const char * +_resolve_token (glcpp_parser_t *parser, const char *token) +{ + const char *orig = token; + const char *replacement; + + while (1) { + replacement = hash_table_find (parser->defines, token); + if (replacement == NULL) + break; + token = replacement; + if (strcmp (token, orig) == 0) + break; + } + + return token; +} + From 34db0d332e0a1477971b7c29c18899e7264f9bce Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 11 May 2010 12:35:06 -0700 Subject: [PATCH 012/148] Add a couple more tests for chained #define directives. One with the chained defines in the opposite order, and one with the potential to trigger an infinite-loop bug through mutual recursion. Each of these tests pass already. --- tests/003-define-chain-reverse.c | 3 +++ tests/004-define-recursive.c | 6 ++++++ 2 files changed, 9 insertions(+) create mode 100644 tests/003-define-chain-reverse.c create mode 100644 tests/004-define-recursive.c diff --git a/tests/003-define-chain-reverse.c b/tests/003-define-chain-reverse.c new file mode 100644 index 00000000000..a18b724eca0 --- /dev/null +++ b/tests/003-define-chain-reverse.c @@ -0,0 +1,3 @@ +#define bar foo +#define foo 1 +bar diff --git a/tests/004-define-recursive.c b/tests/004-define-recursive.c new file mode 100644 index 00000000000..2ac56ea3dcf --- /dev/null +++ b/tests/004-define-recursive.c @@ -0,0 +1,6 @@ +#define foo bar +#define bar baz +#define baz foo +foo +bar +baz From df2ab5b99237ab0b6760226554b133a5ccd11579 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 11 May 2010 12:39:29 -0700 Subject: [PATCH 013/148] Add tests defining a macro to be a literal and another macro. These 3 new tests are modeled after 3 existing tests but made slightly more complex since now instead of definining a new macro to be an existing macro, we define it to be replaced with two tokens, (one a literal, and one an existing macro). These tests all fail currently because the replacement lookup is currently happening on the basis of the entire replacement string rather than on a list of tokens. --- tests/005-define-composite-chain.c | 3 +++ tests/006-define-composite-chain-reverse.c | 3 +++ tests/007-define-composite-recursive.c | 6 ++++++ 3 files changed, 12 insertions(+) create mode 100644 tests/005-define-composite-chain.c create mode 100644 tests/006-define-composite-chain-reverse.c create mode 100644 tests/007-define-composite-recursive.c diff --git a/tests/005-define-composite-chain.c b/tests/005-define-composite-chain.c new file mode 100644 index 00000000000..f5521df968d --- /dev/null +++ b/tests/005-define-composite-chain.c @@ -0,0 +1,3 @@ +#define foo 1 +#define bar a foo +bar diff --git a/tests/006-define-composite-chain-reverse.c b/tests/006-define-composite-chain-reverse.c new file mode 100644 index 00000000000..4bb91a1221a --- /dev/null +++ b/tests/006-define-composite-chain-reverse.c @@ -0,0 +1,3 @@ +#define bar a foo +#define foo 1 +bar diff --git a/tests/007-define-composite-recursive.c b/tests/007-define-composite-recursive.c new file mode 100644 index 00000000000..5784565bdf3 --- /dev/null +++ b/tests/007-define-composite-recursive.c @@ -0,0 +1,6 @@ +#define foo a bar +#define bar b baz +#define baz c foo +foo +bar +baz From 33cc400714f379ef13e876b4aedd0de8cb5d033d Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 12 May 2010 12:17:10 -0700 Subject: [PATCH 014/148] Fix defines involving both literals and other defined macros. We now store a list of tokens in our hash-table rather than a single string. This lets us replace each macro in the value as necessary. This code adds a link dependency on talloc which does exactly what we want in terms of memory management for a parser. The 3 tests added in the previous commit now pass. --- Makefile | 7 ++ glcpp-lex.l | 34 +++++++--- glcpp-parse.y | 181 ++++++++++++++++++++++++++++++++++++++++---------- glcpp.c | 10 +-- glcpp.h | 25 ++++--- 5 files changed, 203 insertions(+), 54 deletions(-) diff --git a/Makefile b/Makefile index 38cc1f314a9..83519328bf6 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,13 @@ +# Debug symbols by default, but let the user avoid that with something +# like "make CFLAGS=-O2" +CFLAGS = -g + +# But we use 'override' here so that "make CFLAGS=-O2" will still have +# all the warnings enabled. override CFLAGS += -Wall -Wextra -Wwrite-strings -Wswitch-enum -Wno-unused glcpp: glcpp.o glcpp-lex.o glcpp-parse.o hash_table.o + gcc -o $@ -ltalloc $^ %.c %.h: %.y bison --debug --defines=$*.h --output=$*.c $^ diff --git a/glcpp-lex.l b/glcpp-lex.l index a220fef76bf..f1a35607794 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -36,22 +36,40 @@ SPACE [[:space:]] NONSPACE [^[:space:]] -NOTNEWLINE [^\n] +NEWLINE [\n] HSPACE [ \t] HASH ^{HSPACE}*# IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* -DEFVAL {NONSPACE}{NOTNEWLINE}* +TOKEN {NONSPACE}+ + %% -{HASH}define { BEGIN ST_DEFINE; return DEFINE; } +{HASH}define{HSPACE}* { + BEGIN ST_DEFINE; + return DEFINE; +} -{HSPACE}+ -{IDENTIFIER} { BEGIN ST_DEFVAL; yylval = strdup (yytext); return IDENTIFIER; } +{IDENTIFIER} { + yylval.str = strdup (yytext); + return IDENTIFIER; +} -{SPACE}+ -{DEFVAL} { BEGIN INITIAL; yylval = strdup (yytext); return DEFVAL; } +{TOKEN} { + yylval.str = strdup (yytext); + return TOKEN; +} + +\n { + BEGIN INITIAL; + return NEWLINE; +} + +{SPACE}+ /* Anything we don't specifically recognize is a stream of tokens */ -{NONSPACE}+ { yylval = strdup (yytext); return TOKEN; } +{NONSPACE}+ { + yylval.str = strdup (yytext); + return TOKEN; +} %% diff --git a/glcpp-parse.y b/glcpp-parse.y index a3a661b8bef..eae96efb30a 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -24,61 +24,158 @@ #include #include +#include #include "glcpp.h" #define YYLEX_PARAM parser->scanner +struct glcpp_parser { + yyscan_t scanner; + struct hash_table *defines; +}; + void yyerror (void *scanner, const char *error); -const char * -_resolve_token (glcpp_parser_t *parser, const char *token); +void +_print_resolved_token (glcpp_parser_t *parser, const char *token); + +list_t * +_list_create (void *ctx); + +void +_list_append (list_t *list, const char *str); %} +%union { + char *str; + list_t *list; +} + %parse-param {glcpp_parser_t *parser} %lex-param {void *scanner} -%token DEFINE -%token DEFVAL -%token IDENTIFIER -%token TOKEN +%token DEFINE IDENTIFIER NEWLINE TOKEN +%type token IDENTIFIER TOKEN +%type replacement_list %% -input: /* empty */ - | content +input: + /* empty */ +| content ; -content: token - | directive - | content token - | content directive +content: + token { + _print_resolved_token (parser, $1); + free ($1); + } +| directive +| content token { + _print_resolved_token (parser, $2); + free ($2); + } +| content directive ; -directive: DEFINE IDENTIFIER DEFVAL { - hash_table_insert (parser->defines, $3, $2); +directive: + DEFINE IDENTIFIER replacement_list NEWLINE { + char *key = talloc_strdup ($3, $2); + free ($2); + hash_table_insert (parser->defines, $3, key); + printf ("\n"); + } +; + +replacement_list: + /* empty */ { + $$ = _list_create (parser); + } + +| replacement_list token { + _list_append ($1, $2); + free ($2); + $$ = $1; + } +; + +token: + TOKEN { $$ = $1; } +| IDENTIFIER { $$ = $1; } +; + +%% + +list_t * +_list_create (void *ctx) +{ + list_t *list; + + list = talloc (ctx, list_t); + if (list == NULL) { + fprintf (stderr, "Out of memory.\n"); + exit (1); + } + + list->head = NULL; + list->tail = NULL; + + return list; } -; -token: TOKEN { printf ("%s", _resolve_token (parser, $1)); free ($1); } -; +void +_list_append (list_t *list, const char *str) +{ + node_t *node; -%% + node = talloc (list, node_t); + if (node == NULL) { + fprintf (stderr, "Out of memory.\n"); + exit (1); + } + node->str = talloc_strdup (node, str); + if (node->str == NULL) { + fprintf (stderr, "Out of memory.\n"); + exit (1); + } + + node->next = NULL; + + if (list->head == NULL) { + list->head = node; + } else { + list->tail->next = node; + } + + list->tail = node; +} + void yyerror (void *scanner, const char *error) { fprintf (stderr, "Parse error: %s\n", error); } -void -glcpp_parser_init (glcpp_parser_t *parser) +glcpp_parser_t * +glcpp_parser_create (void) { + glcpp_parser_t *parser; + + parser = talloc (NULL, glcpp_parser_t); + if (parser == NULL) { + fprintf (stderr, "Out of memory.\n"); + exit (1); + } + yylex_init (&parser->scanner); parser->defines = hash_table_ctor (32, hash_table_string_hash, hash_table_string_compare); + + return parser; } int @@ -88,27 +185,43 @@ glcpp_parser_parse (glcpp_parser_t *parser) } void -glcpp_parser_fini (glcpp_parser_t *parser) +glcpp_parser_destroy (glcpp_parser_t *parser) { yylex_destroy (parser->scanner); hash_table_dtor (parser->defines); + talloc_free (parser); } -const char * -_resolve_token (glcpp_parser_t *parser, const char *token) +static void +_print_resolved_recursive (glcpp_parser_t *parser, + const char *token, + const char *orig, + int *first) { - const char *orig = token; - const char *replacement; + list_t *replacement; + node_t *node; - while (1) { - replacement = hash_table_find (parser->defines, token); - if (replacement == NULL) - break; - token = replacement; - if (strcmp (token, orig) == 0) - break; + replacement = hash_table_find (parser->defines, token); + if (replacement == NULL) { + printf ("%s%s", *first ? "" : " ", token); + *first = 0; + } else { + for (node = replacement->head ; node ; node = node->next) { + token = node->str; + if (strcmp (token, orig) == 0) { + printf ("%s%s", *first ? "" : " ", token); + *first = 0; + } else { + _print_resolved_recursive (parser, token, orig, first); + } + } } - - return token; } +void +_print_resolved_token (glcpp_parser_t *parser, const char *token) +{ + int first = 1; + + _print_resolved_recursive (parser, token, token, &first); +} diff --git a/glcpp.c b/glcpp.c index d6c89df2f95..fcdc4ed8a0f 100644 --- a/glcpp.c +++ b/glcpp.c @@ -23,17 +23,19 @@ #include "glcpp.h" +extern int yydebug; + int main (void) { - glcpp_parser_t parser; + glcpp_parser_t *parser; int ret; - glcpp_parser_init (&parser); + parser = glcpp_parser_create (); - ret = glcpp_parser_parse (&parser); + ret = glcpp_parser_parse (parser); - glcpp_parser_fini (&parser); + glcpp_parser_destroy (parser); return ret; } diff --git a/glcpp.h b/glcpp.h index 5278e1b971b..6fea9333e85 100644 --- a/glcpp.h +++ b/glcpp.h @@ -26,22 +26,31 @@ #include "hash_table.h" -#define YYSTYPE char * #define yyscan_t void* -typedef struct { - yyscan_t scanner; - struct hash_table *defines; -} glcpp_parser_t; +/* Some data types used for parser value. */ -void -glcpp_parser_init (glcpp_parser_t *parser); + +typedef struct node { + const char *str; + struct node *next; +} node_t; + +typedef struct list { + node_t *head; + node_t *tail; +} list_t; + +typedef struct glcpp_parser glcpp_parser_t; + +glcpp_parser_t * +glcpp_parser_create (void); int glcpp_parser_parse (glcpp_parser_t *parser); void -glcpp_parser_fini (glcpp_parser_t *parser); +glcpp_parser_destroy (glcpp_parser_t *parser); /* Generated by glcpp-lex.l to glcpp-lex.c */ From 5070a20cd1e65d52856bd74558f9a34f8dca114f Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 12 May 2010 12:45:33 -0700 Subject: [PATCH 015/148] Convert lexer to talloc and add xtalloc wrappers. The lexer was previously using strdup (expecting the parser to free), but is now more consistent, easier to use, and slightly more efficent by using talloc along with the parser. Also, we add xtalloc and xtalloc_strdup wrappers around talloc and talloc_strdup to put all of the out-of-memory-checking code in one place. --- Makefile | 2 +- glcpp-lex.l | 7 ++++--- glcpp-parse.y | 39 ++++++++++---------------------------- glcpp.h | 12 +++++++++++- xtalloc.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 78 insertions(+), 34 deletions(-) create mode 100644 xtalloc.c diff --git a/Makefile b/Makefile index 83519328bf6..7233150a80b 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ CFLAGS = -g # all the warnings enabled. override CFLAGS += -Wall -Wextra -Wwrite-strings -Wswitch-enum -Wno-unused -glcpp: glcpp.o glcpp-lex.o glcpp-parse.o hash_table.o +glcpp: glcpp.o glcpp-lex.o glcpp-parse.o hash_table.o xtalloc.o gcc -o $@ -ltalloc $^ %.c %.h: %.y diff --git a/glcpp-lex.l b/glcpp-lex.l index f1a35607794..ec91538a73c 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -30,6 +30,7 @@ %} %option reentrant noyywrap +%option extra-type="glcpp_parser_t *" %x ST_DEFINE %x ST_DEFVAL @@ -50,12 +51,12 @@ TOKEN {NONSPACE}+ } {IDENTIFIER} { - yylval.str = strdup (yytext); + yylval.str = xtalloc_strdup (yyextra, yytext); return IDENTIFIER; } {TOKEN} { - yylval.str = strdup (yytext); + yylval.str = xtalloc_strdup (yyextra, yytext); return TOKEN; } @@ -68,7 +69,7 @@ TOKEN {NONSPACE}+ /* Anything we don't specifically recognize is a stream of tokens */ {NONSPACE}+ { - yylval.str = strdup (yytext); + yylval.str = xtalloc_strdup (yyextra, yytext); return TOKEN; } diff --git a/glcpp-parse.y b/glcpp-parse.y index eae96efb30a..1a7ec4970d5 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -71,21 +71,20 @@ input: content: token { _print_resolved_token (parser, $1); - free ($1); + talloc_free ($1); } | directive | content token { _print_resolved_token (parser, $2); - free ($2); + talloc_free ($2); } | content directive ; directive: DEFINE IDENTIFIER replacement_list NEWLINE { - char *key = talloc_strdup ($3, $2); - free ($2); - hash_table_insert (parser->defines, $3, key); + talloc_steal ($3, $2); + hash_table_insert (parser->defines, $3, $2); printf ("\n"); } ; @@ -97,7 +96,7 @@ replacement_list: | replacement_list token { _list_append ($1, $2); - free ($2); + talloc_free ($2); $$ = $1; } ; @@ -114,12 +113,7 @@ _list_create (void *ctx) { list_t *list; - list = talloc (ctx, list_t); - if (list == NULL) { - fprintf (stderr, "Out of memory.\n"); - exit (1); - } - + list = xtalloc (ctx, list_t); list->head = NULL; list->tail = NULL; @@ -131,17 +125,8 @@ _list_append (list_t *list, const char *str) { node_t *node; - node = talloc (list, node_t); - if (node == NULL) { - fprintf (stderr, "Out of memory.\n"); - exit (1); - } - - node->str = talloc_strdup (node, str); - if (node->str == NULL) { - fprintf (stderr, "Out of memory.\n"); - exit (1); - } + node = xtalloc (list, node_t); + node->str = xtalloc_strdup (node, str); node->next = NULL; @@ -165,13 +150,9 @@ glcpp_parser_create (void) { glcpp_parser_t *parser; - parser = talloc (NULL, glcpp_parser_t); - if (parser == NULL) { - fprintf (stderr, "Out of memory.\n"); - exit (1); - } + parser = xtalloc (NULL, glcpp_parser_t); - yylex_init (&parser->scanner); + yylex_init_extra (parser, &parser->scanner); parser->defines = hash_table_ctor (32, hash_table_string_hash, hash_table_string_compare); diff --git a/glcpp.h b/glcpp.h index 6fea9333e85..8472570ccb6 100644 --- a/glcpp.h +++ b/glcpp.h @@ -55,7 +55,7 @@ glcpp_parser_destroy (glcpp_parser_t *parser); /* Generated by glcpp-lex.l to glcpp-lex.c */ int -yylex_init (yyscan_t *scanner); +yylex_init_extra (glcpp_parser_t *parser, yyscan_t* scanner); int yylex (yyscan_t scanner); @@ -68,4 +68,14 @@ yylex_destroy (yyscan_t scanner); int yyparse (glcpp_parser_t *parser); +/* xtalloc - wrappers around talloc to check for out-of-memory */ + +#define xtalloc(ctx, type) (type *)xtalloc_named_const(ctx, sizeof(type), #type) + +void * +xtalloc_named_const (const void *context, size_t size, const char *name); + +char * +xtalloc_strdup (const void *t, const char *p); + #endif diff --git a/xtalloc.c b/xtalloc.c new file mode 100644 index 00000000000..849e12d3491 --- /dev/null +++ b/xtalloc.c @@ -0,0 +1,52 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +void * +xtalloc_named_const (const void *context, size_t size, const char *name) +{ + void *ret; + + ret = talloc_named_const (context, size, name); + if (ret == NULL) { + fprintf (stderr, "Out of memory.\n"); + exit (1); + } + + return ret; +} + +char * +xtalloc_strdup (const void *t, const char *p) +{ + char *ret; + + ret = talloc_strdup (t, p); + if (ret == NULL) { + fprintf (stderr, "Out of memory.\n"); + exit (1); + } + + return ret; +} From 39cd7c2f2e2d27a93ad63191f02adb56be31c0ce Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 12 May 2010 12:49:07 -0700 Subject: [PATCH 016/148] Add test for an empty definition. Happily this one passes without needing any additional code. --- tests/008-define-empty.c | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 tests/008-define-empty.c diff --git a/tests/008-define-empty.c b/tests/008-define-empty.c new file mode 100644 index 00000000000..b1bd17ec215 --- /dev/null +++ b/tests/008-define-empty.c @@ -0,0 +1,2 @@ +#define foo +foo From 7bdd1f36d9f238e6af4846d46b9dd30fffc772a5 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 12 May 2010 12:51:31 -0700 Subject: [PATCH 017/148] Add test for #undef. Which hasn't been implemented yet, so this test fails. --- tests/009-undef.c | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 tests/009-undef.c diff --git a/tests/009-undef.c b/tests/009-undef.c new file mode 100644 index 00000000000..3fc1fb44243 --- /dev/null +++ b/tests/009-undef.c @@ -0,0 +1,4 @@ +#define foo 1 +foo +#undef foo +foo From cd27e6413a683d3ba1763ec68edfb1ff13193fc3 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 12 May 2010 13:11:50 -0700 Subject: [PATCH 018/148] Add support for the #undef macro. This isn't ideal for two reasons: 1. There's a bunch of stateful redundancy in the lexer that should be cleaned up. 2. The hash table does not provide a mechanism to delete an entry, so we waste memory to add a new NULL entry in front of the existing entry with the same key. But this does at least work, (it passes the recently added undef test case). --- glcpp-lex.l | 19 ++++++++++++++++++- glcpp-parse.y | 26 +++++++++++++++++++++----- 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index ec91538a73c..9ec4deb7185 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -33,7 +33,7 @@ %option extra-type="glcpp_parser_t *" %x ST_DEFINE -%x ST_DEFVAL +%x ST_UNDEF SPACE [[:space:]] NONSPACE [^[:space:]] @@ -67,6 +67,23 @@ TOKEN {NONSPACE}+ {SPACE}+ +{HASH}undef{HSPACE}* { + BEGIN ST_UNDEF; + return UNDEF; +} + +{IDENTIFIER} { + yylval.str = xtalloc_strdup (yyextra, yytext); + return IDENTIFIER; +} + +\n { + BEGIN INITIAL; + return NEWLINE; +} + +{SPACE}+ + /* Anything we don't specifically recognize is a stream of tokens */ {NONSPACE}+ { yylval.str = xtalloc_strdup (yyextra, yytext); diff --git a/glcpp-parse.y b/glcpp-parse.y index 1a7ec4970d5..29614fb1a4d 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -57,7 +57,7 @@ _list_append (list_t *list, const char *str); %parse-param {glcpp_parser_t *parser} %lex-param {void *scanner} -%token DEFINE IDENTIFIER NEWLINE TOKEN +%token DEFINE IDENTIFIER NEWLINE TOKEN UNDEF %type token IDENTIFIER TOKEN %type replacement_list @@ -73,19 +73,35 @@ content: _print_resolved_token (parser, $1); talloc_free ($1); } -| directive +| directive_with_newline | content token { _print_resolved_token (parser, $2); talloc_free ($2); } -| content directive +| content directive_with_newline +; + +directive_with_newline: + directive NEWLINE { + printf ("\n"); + } ; directive: - DEFINE IDENTIFIER replacement_list NEWLINE { + DEFINE IDENTIFIER replacement_list { talloc_steal ($3, $2); hash_table_insert (parser->defines, $3, $2); - printf ("\n"); + } +| UNDEF IDENTIFIER { + list_t *replacement = hash_table_find (parser->defines, $2); + if (replacement) { + /* XXX: Need hash table to support a real way + * to remove an element rather than prefixing + * a new node with data of NULL like this. */ + hash_table_insert (parser->defines, NULL, $2); + talloc_free (replacement); + } + talloc_free ($2); } ; From a68e668b17a00ed5714cdb1e7809b7ba4522d89d Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 12 May 2010 13:14:08 -0700 Subject: [PATCH 019/148] Add test case to define, undef, and then again define a macro. Happily, this is another test case that works just fine without any additional code. --- tests/010-undef-re-define.c | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 tests/010-undef-re-define.c diff --git a/tests/010-undef-re-define.c b/tests/010-undef-re-define.c new file mode 100644 index 00000000000..32ff73798b1 --- /dev/null +++ b/tests/010-undef-re-define.c @@ -0,0 +1,6 @@ +#define foo 1 +foo +#undef foo +foo +#define foo 2 +foo From 012295f94c4b02d2683072d9aa6ab56f81409507 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 12 May 2010 13:19:23 -0700 Subject: [PATCH 020/148] Simplify lexer significantly (remove all stateful lexing). We are able to remove all state by simply passing NEWLINE through as a token unconditionally (as opposed to only passing newline when on a driective line as we did previously). --- glcpp-lex.l | 41 +++++++++-------------------------------- glcpp-parse.y | 6 ++++++ 2 files changed, 15 insertions(+), 32 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 9ec4deb7185..18d9050d715 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -32,9 +32,6 @@ %option reentrant noyywrap %option extra-type="glcpp_parser_t *" -%x ST_DEFINE -%x ST_UNDEF - SPACE [[:space:]] NONSPACE [^[:space:]] NEWLINE [\n] @@ -46,48 +43,28 @@ TOKEN {NONSPACE}+ %% {HASH}define{HSPACE}* { - BEGIN ST_DEFINE; return DEFINE; } -{IDENTIFIER} { - yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; -} - -{TOKEN} { - yylval.str = xtalloc_strdup (yyextra, yytext); - return TOKEN; -} - -\n { - BEGIN INITIAL; - return NEWLINE; -} - -{SPACE}+ - {HASH}undef{HSPACE}* { - BEGIN ST_UNDEF; return UNDEF; } -{IDENTIFIER} { + +{IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); return IDENTIFIER; } -\n { - BEGIN INITIAL; - return NEWLINE; -} - -{SPACE}+ - - /* Anything we don't specifically recognize is a stream of tokens */ -{NONSPACE}+ { +{TOKEN} { yylval.str = xtalloc_strdup (yyextra, yytext); return TOKEN; } +\n { + return NEWLINE; +} + +{SPACE}+ + %% diff --git a/glcpp-parse.y b/glcpp-parse.y index 29614fb1a4d..9883a6f9532 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -74,11 +74,17 @@ content: talloc_free ($1); } | directive_with_newline +| NEWLINE { + printf ("\n"); + } | content token { _print_resolved_token (parser, $2); talloc_free ($2); } | content directive_with_newline +| content NEWLINE { + printf ("\n"); + } ; directive_with_newline: From 8bcb6f1777ff8f763c67552c111ce8e637d78410 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 12 May 2010 13:21:20 -0700 Subject: [PATCH 021/148] Remove some redundancy in the top-level production. Previously we had two copies of all top-level actions, (once in a list context and once in a non-list context). Much simpler to instead have a single list-context production with no action and then only have the actions in their own non-list contexts. --- glcpp-parse.y | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 9883a6f9532..91fc5b98fc5 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -65,7 +65,7 @@ _list_append (list_t *list, const char *str); input: /* empty */ -| content +| input content ; content: @@ -77,14 +77,6 @@ content: | NEWLINE { printf ("\n"); } -| content token { - _print_resolved_token (parser, $2); - talloc_free ($2); - } -| content directive_with_newline -| content NEWLINE { - printf ("\n"); - } ; directive_with_newline: From 9f62a7e9e25efd79ebf46c64166876436f88f08a Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 13 May 2010 07:38:29 -0700 Subject: [PATCH 022/148] Make the lexer distinguish between identifiers and defined macros. This is just a minor style improvement for now. But the same mechanism, (having the lexer peek into the table of defined macros), will be essential when we add function-like macros in addition to the current object-like macros. --- glcpp-lex.l | 5 ++++- glcpp-parse.y | 51 ++++++++++++++++++++++++++++++++++----------------- glcpp.h | 4 ++++ 3 files changed, 42 insertions(+), 18 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 18d9050d715..3622db939e7 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -53,7 +53,10 @@ TOKEN {NONSPACE}+ {IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; + if (glcpp_parser_macro_defined (yyextra, yylval.str)) + return MACRO; + else + return IDENTIFIER; } {TOKEN} { diff --git a/glcpp-parse.y b/glcpp-parse.y index 91fc5b98fc5..4d6475497bf 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -39,7 +39,7 @@ void yyerror (void *scanner, const char *error); void -_print_resolved_token (glcpp_parser_t *parser, const char *token); +_print_expanded_macro (glcpp_parser_t *parser, const char *macro); list_t * _list_create (void *ctx); @@ -57,8 +57,8 @@ _list_append (list_t *list, const char *str); %parse-param {glcpp_parser_t *parser} %lex-param {void *scanner} -%token DEFINE IDENTIFIER NEWLINE TOKEN UNDEF -%type token IDENTIFIER TOKEN +%token DEFINE IDENTIFIER MACRO NEWLINE TOKEN UNDEF +%type IDENTIFIER MACRO TOKEN string %type replacement_list %% @@ -69,8 +69,16 @@ input: ; content: - token { - _print_resolved_token (parser, $1); + IDENTIFIER { + printf ("%s", $1); + talloc_free ($1); + } +| TOKEN { + printf ("%s", $1); + talloc_free ($1); + } +| MACRO { + _print_expanded_macro (parser, $1); talloc_free ($1); } | directive_with_newline @@ -90,7 +98,7 @@ directive: talloc_steal ($3, $2); hash_table_insert (parser->defines, $3, $2); } -| UNDEF IDENTIFIER { +| UNDEF MACRO { list_t *replacement = hash_table_find (parser->defines, $2); if (replacement) { /* XXX: Need hash table to support a real way @@ -108,16 +116,17 @@ replacement_list: $$ = _list_create (parser); } -| replacement_list token { +| replacement_list string { _list_append ($1, $2); talloc_free ($2); $$ = $1; } ; -token: - TOKEN { $$ = $1; } -| IDENTIFIER { $$ = $1; } +string: + IDENTIFIER { $$ = $1; } +| MACRO { $$ = $1; } +| TOKEN { $$ = $1; } ; %% @@ -187,11 +196,17 @@ glcpp_parser_destroy (glcpp_parser_t *parser) talloc_free (parser); } +int +glcpp_parser_macro_defined (glcpp_parser_t *parser, const char *identifier) +{ + return (hash_table_find (parser->defines, identifier) != NULL); +} + static void -_print_resolved_recursive (glcpp_parser_t *parser, - const char *token, - const char *orig, - int *first) +_print_expanded_macro_recursive (glcpp_parser_t *parser, + const char *token, + const char *orig, + int *first) { list_t *replacement; node_t *node; @@ -207,16 +222,18 @@ _print_resolved_recursive (glcpp_parser_t *parser, printf ("%s%s", *first ? "" : " ", token); *first = 0; } else { - _print_resolved_recursive (parser, token, orig, first); + _print_expanded_macro_recursive (parser, + token, orig, + first); } } } } void -_print_resolved_token (glcpp_parser_t *parser, const char *token) +_print_expanded_macro (glcpp_parser_t *parser, const char *macro) { int first = 1; - _print_resolved_recursive (parser, token, token, &first); + _print_expanded_macro_recursive (parser, macro, macro, &first); } diff --git a/glcpp.h b/glcpp.h index 8472570ccb6..39d6d5d0ebb 100644 --- a/glcpp.h +++ b/glcpp.h @@ -52,6 +52,10 @@ glcpp_parser_parse (glcpp_parser_t *parser); void glcpp_parser_destroy (glcpp_parser_t *parser); +int +glcpp_parser_macro_defined (glcpp_parser_t *parser, + const char *identifier); + /* Generated by glcpp-lex.l to glcpp-lex.c */ int From 4abc3dec720933e78a266417cffb2ea7b16d497f Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 13 May 2010 09:34:21 -0700 Subject: [PATCH 023/148] Add tests for the structure of function-like macros. These test only the most basic aspect of parsing of function-like macros. Specifically, none of the definitions of these function like macros use the arguments of the function. No function-like macros are implemented yet, so all of these fail for now. --- tests/011-define-func-empty.c | 2 ++ tests/012-define-func-no-args.c | 2 ++ tests/013-define-func-1-arg-unused.c | 2 ++ tests/014-define-func-2-arg-unused.c | 2 ++ 4 files changed, 8 insertions(+) create mode 100644 tests/011-define-func-empty.c create mode 100644 tests/012-define-func-no-args.c create mode 100644 tests/013-define-func-1-arg-unused.c create mode 100644 tests/014-define-func-2-arg-unused.c diff --git a/tests/011-define-func-empty.c b/tests/011-define-func-empty.c new file mode 100644 index 00000000000..d9ce13c2284 --- /dev/null +++ b/tests/011-define-func-empty.c @@ -0,0 +1,2 @@ +#define foo() +foo() diff --git a/tests/012-define-func-no-args.c b/tests/012-define-func-no-args.c new file mode 100644 index 00000000000..c2bb730b115 --- /dev/null +++ b/tests/012-define-func-no-args.c @@ -0,0 +1,2 @@ +#define foo() bar +foo() diff --git a/tests/013-define-func-1-arg-unused.c b/tests/013-define-func-1-arg-unused.c new file mode 100644 index 00000000000..f78fb8b118a --- /dev/null +++ b/tests/013-define-func-1-arg-unused.c @@ -0,0 +1,2 @@ +#define foo(x) 1 +foo(bar) diff --git a/tests/014-define-func-2-arg-unused.c b/tests/014-define-func-2-arg-unused.c new file mode 100644 index 00000000000..11feb2624b7 --- /dev/null +++ b/tests/014-define-func-2-arg-unused.c @@ -0,0 +1,2 @@ +#define foo(x,y) 1 +foo(bar,baz) From fcbbb4688641e46270ba0cd531639df9b964f697 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 13 May 2010 09:36:23 -0700 Subject: [PATCH 024/148] Add support for the structure of function-like macros. We accept the structure of arguments in both macro definition and macro invocation, but we don't yet expand those arguments. This is just enough code to pass the recently-added tests, but does not yet provide any sort of useful function-like macro. --- Makefile | 2 +- glcpp-lex.l | 20 +++-- glcpp-parse.y | 214 ++++++++++++++++++++++++++++++++++++++++++++------ glcpp.h | 12 ++- 4 files changed, 214 insertions(+), 34 deletions(-) diff --git a/Makefile b/Makefile index 7233150a80b..c5472a86b3c 100644 --- a/Makefile +++ b/Makefile @@ -22,4 +22,4 @@ test: clean: rm -f glcpp-lex.c glcpp-parse.c *.o *~ - rm -f tests/*.out tests/*.gcc tests/*.expected + rm -f tests/*.out tests/*.gcc tests/*.expected tests/*~ diff --git a/glcpp-lex.l b/glcpp-lex.l index 3622db939e7..c6e545aa8ed 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -38,7 +38,7 @@ NEWLINE [\n] HSPACE [ \t] HASH ^{HSPACE}*# IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* -TOKEN {NONSPACE}+ +TOKEN [^[:space:](),]+ %% @@ -53,12 +53,22 @@ TOKEN {NONSPACE}+ {IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); - if (glcpp_parser_macro_defined (yyextra, yylval.str)) - return MACRO; - else - return IDENTIFIER; + switch (glcpp_parser_macro_type (yyextra, yylval.str)) + { + case MACRO_TYPE_UNDEFINED: + return IDENTIFIER; + break; + case MACRO_TYPE_OBJECT: + return OBJ_MACRO; + break; + case MACRO_TYPE_FUNCTION: + return FUNC_MACRO; + break; + } } +[(),] { return yytext[0]; } + {TOKEN} { yylval.str = xtalloc_strdup (yyextra, yytext); return TOKEN; diff --git a/glcpp-parse.y b/glcpp-parse.y index 4d6475497bf..2e40db525b8 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -24,12 +24,19 @@ #include #include +#include #include #include "glcpp.h" #define YYLEX_PARAM parser->scanner +typedef struct { + int is_function; + list_t *parameter_list; + list_t *replacement_list; +} macro_t; + struct glcpp_parser { yyscan_t scanner; struct hash_table *defines; @@ -39,13 +46,32 @@ void yyerror (void *scanner, const char *error); void -_print_expanded_macro (glcpp_parser_t *parser, const char *macro); +_define_object_macro (glcpp_parser_t *parser, + const char *macro, + list_t *replacement_list); + +void +_define_function_macro (glcpp_parser_t *parser, + const char *macro, + list_t *parameter_list, + list_t *replacement_list); + +void +_print_expanded_object_macro (glcpp_parser_t *parser, const char *macro); + +void +_print_expanded_function_macro (glcpp_parser_t *parser, + const char *macro, + list_t *arguments); list_t * _list_create (void *ctx); void -_list_append (list_t *list, const char *str); +_list_append_item (list_t *list, const char *str); + +void +_list_append_list (list_t *list, list_t *tail); %} @@ -57,9 +83,9 @@ _list_append (list_t *list, const char *str); %parse-param {glcpp_parser_t *parser} %lex-param {void *scanner} -%token DEFINE IDENTIFIER MACRO NEWLINE TOKEN UNDEF -%type IDENTIFIER MACRO TOKEN string -%type replacement_list +%token DEFINE FUNC_MACRO IDENTIFIER NEWLINE OBJ_MACRO TOKEN UNDEF +%type FUNC_MACRO IDENTIFIER OBJ_MACRO TOKEN string +%type argument argument_list parameter_list replacement_list %% @@ -77,16 +103,48 @@ content: printf ("%s", $1); talloc_free ($1); } -| MACRO { - _print_expanded_macro (parser, $1); - talloc_free ($1); - } +| macro | directive_with_newline | NEWLINE { printf ("\n"); } ; +macro: + FUNC_MACRO '(' argument_list ')' { + _print_expanded_function_macro (parser, $1, $3); + } +| OBJ_MACRO { + _print_expanded_object_macro (parser, $1); + talloc_free ($1); + } +; + +argument_list: + /* empty */ { + $$ = _list_create (parser); + } +| argument { + $$ = _list_create (parser); + _list_append_list ($$, $1); + } +| argument_list ',' argument { + _list_append_list ($1, $3); + $$ = $1; + } +; + +argument: + /* empty */ { + $$ = _list_create (parser); + } +| argument string { + _list_append_item ($1, $2); + talloc_free ($2); + } +| argument '(' argument ')' +; + directive_with_newline: directive NEWLINE { printf ("\n"); @@ -95,10 +153,23 @@ directive_with_newline: directive: DEFINE IDENTIFIER replacement_list { - talloc_steal ($3, $2); - hash_table_insert (parser->defines, $3, $2); + _define_object_macro (parser, $2, $3); } -| UNDEF MACRO { +| DEFINE IDENTIFIER '(' parameter_list ')' replacement_list { + _define_function_macro (parser, $2, $4, $6); + } +| UNDEF FUNC_MACRO { + list_t *replacement = hash_table_find (parser->defines, $2); + if (replacement) { + /* XXX: Need hash table to support a real way + * to remove an element rather than prefixing + * a new node with data of NULL like this. */ + hash_table_insert (parser->defines, NULL, $2); + talloc_free (replacement); + } + talloc_free ($2); + } +| UNDEF OBJ_MACRO { list_t *replacement = hash_table_find (parser->defines, $2); if (replacement) { /* XXX: Need hash table to support a real way @@ -115,17 +186,33 @@ replacement_list: /* empty */ { $$ = _list_create (parser); } - | replacement_list string { - _list_append ($1, $2); + _list_append_item ($1, $2); talloc_free ($2); $$ = $1; } ; +parameter_list: + /* empty */ { + $$ = _list_create (parser); + } +| IDENTIFIER { + $$ = _list_create (parser); + _list_append_item ($$, $1); + talloc_free ($1); + } +| parameter_list ',' IDENTIFIER { + _list_append_item ($1, $3); + talloc_free ($3); + $$ = $1; + } +; + string: IDENTIFIER { $$ = $1; } -| MACRO { $$ = $1; } +| FUNC_MACRO { $$ = $1; } +| OBJ_MACRO { $$ = $1; } | TOKEN { $$ = $1; } ; @@ -144,7 +231,19 @@ _list_create (void *ctx) } void -_list_append (list_t *list, const char *str) +_list_append_list (list_t *list, list_t *tail) +{ + if (list->head == NULL) { + list->head = tail->head; + } else { + list->tail->next = tail->head; + } + + list->tail = tail->tail; +} + +void +_list_append_item (list_t *list, const char *str) { node_t *node; @@ -196,10 +295,20 @@ glcpp_parser_destroy (glcpp_parser_t *parser) talloc_free (parser); } -int -glcpp_parser_macro_defined (glcpp_parser_t *parser, const char *identifier) +macro_type_t +glcpp_parser_macro_type (glcpp_parser_t *parser, const char *identifier) { - return (hash_table_find (parser->defines, identifier) != NULL); + macro_t *macro; + + macro = hash_table_find (parser->defines, identifier); + + if (macro == NULL) + return MACRO_TYPE_UNDEFINED; + + if (macro->is_function) + return MACRO_TYPE_FUNCTION; + else + return MACRO_TYPE_OBJECT; } static void @@ -208,15 +317,17 @@ _print_expanded_macro_recursive (glcpp_parser_t *parser, const char *orig, int *first) { - list_t *replacement; + macro_t *macro; node_t *node; - replacement = hash_table_find (parser->defines, token); - if (replacement == NULL) { + macro = hash_table_find (parser->defines, token); + if (macro == NULL) { printf ("%s%s", *first ? "" : " ", token); *first = 0; } else { - for (node = replacement->head ; node ; node = node->next) { + list_t *replacement_list = macro->replacement_list; + + for (node = replacement_list->head ; node ; node = node->next) { token = node->str; if (strcmp (token, orig) == 0) { printf ("%s%s", *first ? "" : " ", token); @@ -231,9 +342,62 @@ _print_expanded_macro_recursive (glcpp_parser_t *parser, } void -_print_expanded_macro (glcpp_parser_t *parser, const char *macro) +_define_object_macro (glcpp_parser_t *parser, + const char *identifier, + list_t *replacement_list) +{ + macro_t *macro; + + macro = xtalloc (parser, macro_t); + + macro->is_function = 0; + macro->parameter_list = NULL; + macro->replacement_list = talloc_steal (macro, replacement_list); + + hash_table_insert (parser->defines, macro, identifier); +} + +void +_define_function_macro (glcpp_parser_t *parser, + const char *identifier, + list_t *parameter_list, + list_t *replacement_list) +{ + macro_t *macro; + + macro = xtalloc (parser, macro_t); + + macro->is_function = 1; + macro->parameter_list = talloc_steal (macro, parameter_list); + macro->replacement_list = talloc_steal (macro, replacement_list); + + hash_table_insert (parser->defines, macro, identifier); +} + +void +_print_expanded_object_macro (glcpp_parser_t *parser, const char *identifier) { int first = 1; + macro_t *macro; - _print_expanded_macro_recursive (parser, macro, macro, &first); + macro = hash_table_find (parser->defines, identifier); + assert (! macro->is_function); + + _print_expanded_macro_recursive (parser, identifier, identifier, &first); +} + +void +_print_expanded_function_macro (glcpp_parser_t *parser, + const char *identifier, + list_t *arguments) +{ + int first = 1; + macro_t *macro; + + macro = hash_table_find (parser->defines, identifier); + assert (macro->is_function); + + /* XXX: Need to use argument list here in the expansion. */ + + _print_expanded_macro_recursive (parser, identifier, identifier, &first); } diff --git a/glcpp.h b/glcpp.h index 39d6d5d0ebb..69b3b840aed 100644 --- a/glcpp.h +++ b/glcpp.h @@ -52,9 +52,15 @@ glcpp_parser_parse (glcpp_parser_t *parser); void glcpp_parser_destroy (glcpp_parser_t *parser); -int -glcpp_parser_macro_defined (glcpp_parser_t *parser, - const char *identifier); +typedef enum { + MACRO_TYPE_UNDEFINED, + MACRO_TYPE_OBJECT, + MACRO_TYPE_FUNCTION +} macro_type_t; + +macro_type_t +glcpp_parser_macro_type (glcpp_parser_t *parser, + const char *identifier); /* Generated by glcpp-lex.l to glcpp-lex.c */ From db35d557a40b9fb56483f77da2fb98f541808dd0 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 08:47:32 -0700 Subject: [PATCH 025/148] Eliminate a reduce/reduce conflict in the function-like macro production. Previously, an empty argument could be parsed as either an "argument_list" directly or first as an "argument" and then an "argument_list". We fix this by removing the possibility of an empty "argument_list" directly. --- glcpp-parse.y | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 2e40db525b8..66725db69ed 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -121,10 +121,7 @@ macro: ; argument_list: - /* empty */ { - $$ = _list_create (parser); - } -| argument { + argument { $$ = _list_create (parser); _list_append_list ($$, $1); } From 67c27afc168f85ce6dc66820db864aaaef67f8ed Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 13 May 2010 10:26:58 -0700 Subject: [PATCH 026/148] Add test for an object-like macro with a definition beginning with '(' Our current parser sees "#define foo (" as an identifier token followed by a '(' token and parses this as a function-like macro. That would be correct for "#define foo(" but the preprocessor specification treats this whitespace as significant here so this test currently fails. --- tests/015-define-object-with-parens.c | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 tests/015-define-object-with-parens.c diff --git a/tests/015-define-object-with-parens.c b/tests/015-define-object-with-parens.c new file mode 100644 index 00000000000..7dcadfa24fd --- /dev/null +++ b/tests/015-define-object-with-parens.c @@ -0,0 +1,2 @@ +#define foo ( ) 1 +foo() From 0a93cbbe4f00e0bdd0c61119d3598e3a98a37505 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 13 May 2010 10:29:07 -0700 Subject: [PATCH 027/148] Fix parsing of object-like macro with a definition that begins with '('. Previously our parser was incorrectly treating this case as a function-like macro. We fix this by conditionally passing a SPACE token from the lexer, (but only immediately after the identifier immediately after #define). --- glcpp-lex.l | 45 ++++++++++++++++++++++++++++++++++++++------- glcpp-parse.y | 32 ++++++++++++++++++++++---------- 2 files changed, 60 insertions(+), 17 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index c6e545aa8ed..3c9dda46d47 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -32,6 +32,9 @@ %option reentrant noyywrap %option extra-type="glcpp_parser_t *" +%x ST_DEFINE +%x ST_DEFVAL + SPACE [[:space:]] NONSPACE [^[:space:]] NEWLINE [\n] @@ -42,16 +45,42 @@ TOKEN [^[:space:](),]+ %% -{HASH}define{HSPACE}* { - return DEFINE; -} - -{HASH}undef{HSPACE}* { +{HASH}undef{HSPACE}* { return UNDEF; } + /* We use the ST_DEFINE and ST_DEFVAL states so that we can + * pass a space token, (yes, a token for whitespace!), since + * the preprocessor specification requires distinguishing + * "#define foo()" from "#define foo ()". + */ +{HASH}define{HSPACE}* { + BEGIN ST_DEFINE; + return DEFINE; +} -{IDENTIFIER} { +{IDENTIFIER} { + BEGIN ST_DEFVAL; + yylval.str = xtalloc_strdup (yyextra, yytext); + return IDENTIFIER; +} + +\n { + BEGIN INITIAL; + return NEWLINE; +} + +{HSPACE}+ { + BEGIN INITIAL; + return SPACE; +} + +"(" { + BEGIN INITIAL; + return '('; +} + +{IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); switch (glcpp_parser_macro_type (yyextra, yylval.str)) { @@ -67,7 +96,9 @@ TOKEN [^[:space:](),]+ } } -[(),] { return yytext[0]; } +[(),] { + return yytext[0]; +} {TOKEN} { yylval.str = xtalloc_strdup (yyextra, yytext); diff --git a/glcpp-parse.y b/glcpp-parse.y index 66725db69ed..dc352de55b6 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -83,8 +83,8 @@ _list_append_list (list_t *list, list_t *tail); %parse-param {glcpp_parser_t *parser} %lex-param {void *scanner} -%token DEFINE FUNC_MACRO IDENTIFIER NEWLINE OBJ_MACRO TOKEN UNDEF -%type FUNC_MACRO IDENTIFIER OBJ_MACRO TOKEN string +%token DEFINE FUNC_MACRO IDENTIFIER NEWLINE OBJ_MACRO SPACE TOKEN UNDEF +%type FUNC_MACRO IDENTIFIER OBJ_MACRO TOKEN word word_or_symbol %type argument argument_list parameter_list replacement_list %% @@ -105,9 +105,10 @@ content: } | macro | directive_with_newline -| NEWLINE { - printf ("\n"); - } +| NEWLINE { printf ("\n"); } +| '(' { printf ("("); } +| ')' { printf (")"); } +| ',' { printf (","); } ; macro: @@ -135,7 +136,7 @@ argument: /* empty */ { $$ = _list_create (parser); } -| argument string { +| argument word { _list_append_item ($1, $2); talloc_free ($2); } @@ -149,8 +150,12 @@ directive_with_newline: ; directive: - DEFINE IDENTIFIER replacement_list { - _define_object_macro (parser, $2, $3); + DEFINE IDENTIFIER { + list_t *list = _list_create (parser); + _define_object_macro (parser, $2, list); + } +| DEFINE IDENTIFIER SPACE replacement_list { + _define_object_macro (parser, $2, $4); } | DEFINE IDENTIFIER '(' parameter_list ')' replacement_list { _define_function_macro (parser, $2, $4, $6); @@ -183,7 +188,7 @@ replacement_list: /* empty */ { $$ = _list_create (parser); } -| replacement_list string { +| replacement_list word_or_symbol { _list_append_item ($1, $2); talloc_free ($2); $$ = $1; @@ -206,7 +211,14 @@ parameter_list: } ; -string: +word_or_symbol: + word { $$ = $1; } +| '(' { $$ = xtalloc_strdup (parser, "("); } +| ')' { $$ = xtalloc_strdup (parser, ")"); } +| ',' { $$ = xtalloc_strdup (parser, ","); } +; + +word: IDENTIFIER { $$ = $1; } | FUNC_MACRO { $$ = $1; } | OBJ_MACRO { $$ = $1; } From 27bc8930ba9ba67f2de29a03232a948316409ded Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 13 May 2010 10:41:53 -0700 Subject: [PATCH 028/148] Add some whitespace variations to test 15. This shows two minor failures in our current parsing (resulting in whitespace-only changes, oso not that significant): 1. We are inserting extra whitespace between tokens not originally separated by whitespace in the replacement list of a macro definition. 2. We are swallowing whitespace separating tokens in the general content. --- tests/015-define-object-with-parens.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/015-define-object-with-parens.c b/tests/015-define-object-with-parens.c index 7dcadfa24fd..10bf7e31a34 100644 --- a/tests/015-define-object-with-parens.c +++ b/tests/015-define-object-with-parens.c @@ -1,2 +1,4 @@ #define foo ( ) 1 foo() +#define bar () 2 +bar( ) From 462cce1852c80a2d71bfec1a2ead10fe0a9e2486 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 13 May 2010 10:45:32 -0700 Subject: [PATCH 029/148] Makefile: Make "make test" depend on the main program. Otherwise, running "make test" can run an old version of the code, (even when new changes are sitting in the source waiting to be compiled). --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c5472a86b3c..550945abd30 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ glcpp: glcpp.o glcpp-lex.o glcpp-parse.o hash_table.o xtalloc.o glcpp-lex.c: glcpp-parse.h -test: +test: glcpp @(cd tests; ./glcpp-test) clean: From 48b94da0994b44e41324a2419117dcd81facce8b Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 13 May 2010 10:46:29 -0700 Subject: [PATCH 030/148] Make the lexer return SPACE tokens unconditionally. It seems strange to always be returning SPACE tokens, but since we were already needing to return a SPACE token in some cases, this actually simplifies our lexer. This also allows us to fix two whitespace-handling differences compared to "gcc -E" so that now the recent modification to the test suite passes once again. --- glcpp-lex.l | 29 +++-------------------------- glcpp-parse.y | 37 +++++++++++++++++++------------------ 2 files changed, 22 insertions(+), 44 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 3c9dda46d47..21b9e3530aa 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -32,9 +32,6 @@ %option reentrant noyywrap %option extra-type="glcpp_parser_t *" -%x ST_DEFINE -%x ST_DEFVAL - SPACE [[:space:]] NONSPACE [^[:space:]] NEWLINE [\n] @@ -55,31 +52,9 @@ TOKEN [^[:space:](),]+ * "#define foo()" from "#define foo ()". */ {HASH}define{HSPACE}* { - BEGIN ST_DEFINE; return DEFINE; } -{IDENTIFIER} { - BEGIN ST_DEFVAL; - yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; -} - -\n { - BEGIN INITIAL; - return NEWLINE; -} - -{HSPACE}+ { - BEGIN INITIAL; - return SPACE; -} - -"(" { - BEGIN INITIAL; - return '('; -} - {IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); switch (glcpp_parser_macro_type (yyextra, yylval.str)) @@ -109,6 +84,8 @@ TOKEN [^[:space:](),]+ return NEWLINE; } -{SPACE}+ +{HSPACE}+ { + return SPACE; +} %% diff --git a/glcpp-parse.y b/glcpp-parse.y index dc352de55b6..7d1c3ab927f 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -109,6 +109,7 @@ content: | '(' { printf ("("); } | ')' { printf (")"); } | ',' { printf (","); } +| SPACE { printf (" "); } ; macro: @@ -157,8 +158,12 @@ directive: | DEFINE IDENTIFIER SPACE replacement_list { _define_object_macro (parser, $2, $4); } -| DEFINE IDENTIFIER '(' parameter_list ')' replacement_list { - _define_function_macro (parser, $2, $4, $6); +| DEFINE IDENTIFIER '(' parameter_list ')' { + list_t *list = _list_create (parser); + _define_function_macro (parser, $2, $4, list); + } +| DEFINE IDENTIFIER '(' parameter_list ')' SPACE replacement_list { + _define_function_macro (parser, $2, $4, $7); } | UNDEF FUNC_MACRO { list_t *replacement = hash_table_find (parser->defines, $2); @@ -185,8 +190,10 @@ directive: ; replacement_list: - /* empty */ { + word_or_symbol { $$ = _list_create (parser); + _list_append_item ($$, $1); + talloc_free ($1); } | replacement_list word_or_symbol { _list_append_item ($1, $2); @@ -216,6 +223,7 @@ word_or_symbol: | '(' { $$ = xtalloc_strdup (parser, "("); } | ')' { $$ = xtalloc_strdup (parser, ")"); } | ',' { $$ = xtalloc_strdup (parser, ","); } +| SPACE { $$ = xtalloc_strdup (parser, " "); } ; word: @@ -323,29 +331,24 @@ glcpp_parser_macro_type (glcpp_parser_t *parser, const char *identifier) static void _print_expanded_macro_recursive (glcpp_parser_t *parser, const char *token, - const char *orig, - int *first) + const char *orig) { macro_t *macro; node_t *node; macro = hash_table_find (parser->defines, token); if (macro == NULL) { - printf ("%s%s", *first ? "" : " ", token); - *first = 0; + printf ("%s", token); } else { list_t *replacement_list = macro->replacement_list; for (node = replacement_list->head ; node ; node = node->next) { token = node->str; - if (strcmp (token, orig) == 0) { - printf ("%s%s", *first ? "" : " ", token); - *first = 0; - } else { + if (strcmp (token, orig) == 0) + printf ("%s", token); + else _print_expanded_macro_recursive (parser, - token, orig, - first); - } + token, orig); } } } @@ -386,13 +389,12 @@ _define_function_macro (glcpp_parser_t *parser, void _print_expanded_object_macro (glcpp_parser_t *parser, const char *identifier) { - int first = 1; macro_t *macro; macro = hash_table_find (parser->defines, identifier); assert (! macro->is_function); - _print_expanded_macro_recursive (parser, identifier, identifier, &first); + _print_expanded_macro_recursive (parser, identifier, identifier); } void @@ -400,7 +402,6 @@ _print_expanded_function_macro (glcpp_parser_t *parser, const char *identifier, list_t *arguments) { - int first = 1; macro_t *macro; macro = hash_table_find (parser->defines, identifier); @@ -408,5 +409,5 @@ _print_expanded_function_macro (glcpp_parser_t *parser, /* XXX: Need to use argument list here in the expansion. */ - _print_expanded_macro_recursive (parser, identifier, identifier, &first); + _print_expanded_macro_recursive (parser, identifier, identifier); } From af71ba41bdecbe9f971752c32c514ca7b319f588 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 13 May 2010 12:54:17 -0700 Subject: [PATCH 031/148] Add tests exercising substitution of arguments in function-like macros. This capability is the only thing that makes function-like macros interesting. This isn't supported yet so these tests fail for now. --- tests/016-define-func-1-arg.c | 2 ++ tests/017-define-func-2-args.c | 2 ++ 2 files changed, 4 insertions(+) create mode 100644 tests/016-define-func-1-arg.c create mode 100644 tests/017-define-func-2-args.c diff --git a/tests/016-define-func-1-arg.c b/tests/016-define-func-1-arg.c new file mode 100644 index 00000000000..dea38d1fedd --- /dev/null +++ b/tests/016-define-func-1-arg.c @@ -0,0 +1,2 @@ +#define foo(x) ((x) + 1) +foo(bar) diff --git a/tests/017-define-func-2-args.c b/tests/017-define-func-2-args.c new file mode 100644 index 00000000000..c7253835278 --- /dev/null +++ b/tests/017-define-func-2-args.c @@ -0,0 +1,2 @@ +#define foo(x,y) ((x)*(y)) +foo(bar,baz) From dcc2ecd30d2ff68792f192c867b301a10872d86d Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 13 May 2010 12:56:42 -0700 Subject: [PATCH 032/148] Implement substitution of macro arguments. Making the two recently-added tests for this functionality now pass. --- glcpp-parse.y | 169 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 141 insertions(+), 28 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 7d1c3ab927f..4b4a754f82b 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -73,6 +73,15 @@ _list_append_item (list_t *list, const char *str); void _list_append_list (list_t *list, list_t *tail); +int +_list_contains (list_t *list, const char *member, int *index); + +const char * +_list_member_at (list_t *list, int index); + +int +_list_length (list_t *list); + %} %union { @@ -277,6 +286,62 @@ _list_append_item (list_t *list, const char *str) list->tail = node; } + +int +_list_contains (list_t *list, const char *member, int *index) +{ + node_t *node; + int i; + + if (list == NULL) + return 0; + + for (i = 0, node = list->head; node; i++, node = node->next) { + if (strcmp (node->str, member) == 0) { + *index = i; + return 1; + } + } + + return 0; +} + +int +_list_length (list_t *list) +{ + int length = 0; + node_t *node; + + if (list == NULL) + return 0; + + for (node = list->head; node; node = node->next) + length++; + + return length; +} + +const char * +_list_member_at (list_t *list, int index) +{ + node_t *node; + int i; + + if (list == NULL) + return NULL; + + node = list->head; + for (i = 0; i < index; i++) { + node = node->next; + if (node == NULL) + break; + } + + if (node) + return node->str; + + return NULL; +} void yyerror (void *scanner, const char *error) @@ -328,31 +393,6 @@ glcpp_parser_macro_type (glcpp_parser_t *parser, const char *identifier) return MACRO_TYPE_OBJECT; } -static void -_print_expanded_macro_recursive (glcpp_parser_t *parser, - const char *token, - const char *orig) -{ - macro_t *macro; - node_t *node; - - macro = hash_table_find (parser->defines, token); - if (macro == NULL) { - printf ("%s", token); - } else { - list_t *replacement_list = macro->replacement_list; - - for (node = replacement_list->head ; node ; node = node->next) { - token = node->str; - if (strcmp (token, orig) == 0) - printf ("%s", token); - else - _print_expanded_macro_recursive (parser, - token, orig); - } - } -} - void _define_object_macro (glcpp_parser_t *parser, const char *identifier, @@ -386,6 +426,70 @@ _define_function_macro (glcpp_parser_t *parser, hash_table_insert (parser->defines, macro, identifier); } +static void +_print_expanded_macro_recursive (glcpp_parser_t *parser, + const char *token, + const char *orig, + list_t *parameters, + list_t *arguments); + +static void +_print_expanded_list_recursive (glcpp_parser_t *parser, + list_t *list, + const char *orig, + list_t *parameters, + list_t *arguments) +{ + const char *token; + node_t *node; + int index; + + for (node = list->head ; node ; node = node->next) { + token = node->str; + + if (strcmp (token, orig) == 0) { + printf ("%s", token); + continue; + } + + if (_list_contains (parameters, token, &index)) { + const char *argument; + + argument = _list_member_at (arguments, index); + _print_expanded_macro_recursive (parser, argument, + orig, parameters, + arguments); + } else { + _print_expanded_macro_recursive (parser, token, + orig, parameters, + arguments); + } + } +} + + +static void +_print_expanded_macro_recursive (glcpp_parser_t *parser, + const char *token, + const char *orig, + list_t *parameters, + list_t *arguments) +{ + macro_t *macro; + list_t *replacement_list; + + macro = hash_table_find (parser->defines, token); + if (macro == NULL) { + printf ("%s", token); + return; + } + + replacement_list = macro->replacement_list; + + _print_expanded_list_recursive (parser, replacement_list, + orig, parameters, arguments); +} + void _print_expanded_object_macro (glcpp_parser_t *parser, const char *identifier) { @@ -394,7 +498,8 @@ _print_expanded_object_macro (glcpp_parser_t *parser, const char *identifier) macro = hash_table_find (parser->defines, identifier); assert (! macro->is_function); - _print_expanded_macro_recursive (parser, identifier, identifier); + _print_expanded_macro_recursive (parser, identifier, identifier, + NULL, NULL); } void @@ -407,7 +512,15 @@ _print_expanded_function_macro (glcpp_parser_t *parser, macro = hash_table_find (parser->defines, identifier); assert (macro->is_function); - /* XXX: Need to use argument list here in the expansion. */ + if (_list_length (arguments) != _list_length (macro->parameter_list)) { + fprintf (stderr, + "Error: macro %s invoked with %d arguments (expected %d)\n", + identifier, + _list_length (arguments), + _list_length (macro->parameter_list)); + return; + } - _print_expanded_macro_recursive (parser, identifier, identifier); + _print_expanded_macro_recursive (parser, identifier, identifier, + macro->parameter_list, arguments); } From 30140733112b09d531d949a9bfbd9daf0cae4781 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 13 May 2010 12:57:34 -0700 Subject: [PATCH 033/148] Add test where a macro formal parameter is the same as an existing macro. This is a well-defined condition, but something that currently trips up the implementation. Should be easy to fix. --- tests/018-define-func-macro-as-parameter.c | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 tests/018-define-func-macro-as-parameter.c diff --git a/tests/018-define-func-macro-as-parameter.c b/tests/018-define-func-macro-as-parameter.c new file mode 100644 index 00000000000..668130b8f9b --- /dev/null +++ b/tests/018-define-func-macro-as-parameter.c @@ -0,0 +1,3 @@ +#define x 0 +#define foo(x) x +foo(1) From 7f9aa36bbcf457e1a221ab6447de3bec30908000 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 13 May 2010 12:58:49 -0700 Subject: [PATCH 034/148] Fix case of a macro formal parameter matching a defined macro. Simply need to allow for a macro name to appear in the parameter list. This makes the recently-added test pass. --- glcpp-parse.y | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 4b4a754f82b..1b6c939a269 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -93,7 +93,7 @@ _list_length (list_t *list); %lex-param {void *scanner} %token DEFINE FUNC_MACRO IDENTIFIER NEWLINE OBJ_MACRO SPACE TOKEN UNDEF -%type FUNC_MACRO IDENTIFIER OBJ_MACRO TOKEN word word_or_symbol +%type FUNC_MACRO IDENTIFIER identifier_perhaps_macro OBJ_MACRO TOKEN word word_or_symbol %type argument argument_list parameter_list replacement_list %% @@ -215,18 +215,24 @@ parameter_list: /* empty */ { $$ = _list_create (parser); } -| IDENTIFIER { +| identifier_perhaps_macro { $$ = _list_create (parser); _list_append_item ($$, $1); talloc_free ($1); } -| parameter_list ',' IDENTIFIER { +| parameter_list ',' identifier_perhaps_macro { _list_append_item ($1, $3); talloc_free ($3); $$ = $1; } ; +identifier_perhaps_macro: + IDENTIFIER { $$ = $1; } +| FUNC_MACRO { $$ = $1; } +| OBJ_MACRO { $$ = $1; } +; + word_or_symbol: word { $$ = $1; } | '(' { $$ = xtalloc_strdup (parser, "("); } From 610053b2c63fe6bc1d11347dc87e63d958b04dd8 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 10:05:11 -0700 Subject: [PATCH 035/148] Rename list_t and node_t to string_list_t and string_node_t. We'll soon be adding other types of lists, so it will be helpful to have a qualified name here. --- glcpp-parse.y | 128 +++++++++++++++++++++++++------------------------- glcpp.h | 14 +++--- 2 files changed, 71 insertions(+), 71 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 1b6c939a269..3b97743085a 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -33,8 +33,8 @@ typedef struct { int is_function; - list_t *parameter_list; - list_t *replacement_list; + string_list_t *parameter_list; + string_list_t *replacement_list; } macro_t; struct glcpp_parser { @@ -48,13 +48,13 @@ yyerror (void *scanner, const char *error); void _define_object_macro (glcpp_parser_t *parser, const char *macro, - list_t *replacement_list); + string_list_t *replacement_list); void _define_function_macro (glcpp_parser_t *parser, const char *macro, - list_t *parameter_list, - list_t *replacement_list); + string_list_t *parameter_list, + string_list_t *replacement_list); void _print_expanded_object_macro (glcpp_parser_t *parser, const char *macro); @@ -62,31 +62,31 @@ _print_expanded_object_macro (glcpp_parser_t *parser, const char *macro); void _print_expanded_function_macro (glcpp_parser_t *parser, const char *macro, - list_t *arguments); + string_list_t *arguments); -list_t * -_list_create (void *ctx); +string_list_t * +_string_list_create (void *ctx); void -_list_append_item (list_t *list, const char *str); +_string_list_append_item (string_list_t *list, const char *str); void -_list_append_list (list_t *list, list_t *tail); +_string_list_append_list (string_list_t *list, string_list_t *tail); int -_list_contains (list_t *list, const char *member, int *index); +_string_list_contains (string_list_t *list, const char *member, int *index); const char * -_list_member_at (list_t *list, int index); +_string_list_member_at (string_list_t *list, int index); int -_list_length (list_t *list); +_string_list_length (string_list_t *list); %} %union { char *str; - list_t *list; + string_list_t *list; } %parse-param {glcpp_parser_t *parser} @@ -133,21 +133,21 @@ macro: argument_list: argument { - $$ = _list_create (parser); - _list_append_list ($$, $1); + $$ = _string_list_create (parser); + _string_list_append_list ($$, $1); } | argument_list ',' argument { - _list_append_list ($1, $3); + _string_list_append_list ($1, $3); $$ = $1; } ; argument: /* empty */ { - $$ = _list_create (parser); + $$ = _string_list_create (parser); } | argument word { - _list_append_item ($1, $2); + _string_list_append_item ($1, $2); talloc_free ($2); } | argument '(' argument ')' @@ -161,21 +161,21 @@ directive_with_newline: directive: DEFINE IDENTIFIER { - list_t *list = _list_create (parser); + string_list_t *list = _string_list_create (parser); _define_object_macro (parser, $2, list); } | DEFINE IDENTIFIER SPACE replacement_list { _define_object_macro (parser, $2, $4); } | DEFINE IDENTIFIER '(' parameter_list ')' { - list_t *list = _list_create (parser); + string_list_t *list = _string_list_create (parser); _define_function_macro (parser, $2, $4, list); } | DEFINE IDENTIFIER '(' parameter_list ')' SPACE replacement_list { _define_function_macro (parser, $2, $4, $7); } | UNDEF FUNC_MACRO { - list_t *replacement = hash_table_find (parser->defines, $2); + string_list_t *replacement = hash_table_find (parser->defines, $2); if (replacement) { /* XXX: Need hash table to support a real way * to remove an element rather than prefixing @@ -186,7 +186,7 @@ directive: talloc_free ($2); } | UNDEF OBJ_MACRO { - list_t *replacement = hash_table_find (parser->defines, $2); + string_list_t *replacement = hash_table_find (parser->defines, $2); if (replacement) { /* XXX: Need hash table to support a real way * to remove an element rather than prefixing @@ -200,12 +200,12 @@ directive: replacement_list: word_or_symbol { - $$ = _list_create (parser); - _list_append_item ($$, $1); + $$ = _string_list_create (parser); + _string_list_append_item ($$, $1); talloc_free ($1); } | replacement_list word_or_symbol { - _list_append_item ($1, $2); + _string_list_append_item ($1, $2); talloc_free ($2); $$ = $1; } @@ -213,15 +213,15 @@ replacement_list: parameter_list: /* empty */ { - $$ = _list_create (parser); + $$ = _string_list_create (parser); } | identifier_perhaps_macro { - $$ = _list_create (parser); - _list_append_item ($$, $1); + $$ = _string_list_create (parser); + _string_list_append_item ($$, $1); talloc_free ($1); } | parameter_list ',' identifier_perhaps_macro { - _list_append_item ($1, $3); + _string_list_append_item ($1, $3); talloc_free ($3); $$ = $1; } @@ -250,12 +250,12 @@ word: %% -list_t * -_list_create (void *ctx) +string_list_t * +_string_list_create (void *ctx) { - list_t *list; + string_list_t *list; - list = xtalloc (ctx, list_t); + list = xtalloc (ctx, string_list_t); list->head = NULL; list->tail = NULL; @@ -263,7 +263,7 @@ _list_create (void *ctx) } void -_list_append_list (list_t *list, list_t *tail) +_string_list_append_list (string_list_t *list, string_list_t *tail) { if (list->head == NULL) { list->head = tail->head; @@ -275,11 +275,11 @@ _list_append_list (list_t *list, list_t *tail) } void -_list_append_item (list_t *list, const char *str) +_string_list_append_item (string_list_t *list, const char *str) { - node_t *node; + string_node_t *node; - node = xtalloc (list, node_t); + node = xtalloc (list, string_node_t); node->str = xtalloc_strdup (node, str); node->next = NULL; @@ -294,9 +294,9 @@ _list_append_item (list_t *list, const char *str) } int -_list_contains (list_t *list, const char *member, int *index) +_string_list_contains (string_list_t *list, const char *member, int *index) { - node_t *node; + string_node_t *node; int i; if (list == NULL) @@ -313,10 +313,10 @@ _list_contains (list_t *list, const char *member, int *index) } int -_list_length (list_t *list) +_string_list_length (string_list_t *list) { int length = 0; - node_t *node; + string_node_t *node; if (list == NULL) return 0; @@ -328,9 +328,9 @@ _list_length (list_t *list) } const char * -_list_member_at (list_t *list, int index) +_string_list_member_at (string_list_t *list, int index) { - node_t *node; + string_node_t *node; int i; if (list == NULL) @@ -402,7 +402,7 @@ glcpp_parser_macro_type (glcpp_parser_t *parser, const char *identifier) void _define_object_macro (glcpp_parser_t *parser, const char *identifier, - list_t *replacement_list) + string_list_t *replacement_list) { macro_t *macro; @@ -418,8 +418,8 @@ _define_object_macro (glcpp_parser_t *parser, void _define_function_macro (glcpp_parser_t *parser, const char *identifier, - list_t *parameter_list, - list_t *replacement_list) + string_list_t *parameter_list, + string_list_t *replacement_list) { macro_t *macro; @@ -436,18 +436,18 @@ static void _print_expanded_macro_recursive (glcpp_parser_t *parser, const char *token, const char *orig, - list_t *parameters, - list_t *arguments); + string_list_t *parameters, + string_list_t *arguments); static void -_print_expanded_list_recursive (glcpp_parser_t *parser, - list_t *list, +_print_expanded_string_list_recursive (glcpp_parser_t *parser, + string_list_t *list, const char *orig, - list_t *parameters, - list_t *arguments) + string_list_t *parameters, + string_list_t *arguments) { const char *token; - node_t *node; + string_node_t *node; int index; for (node = list->head ; node ; node = node->next) { @@ -458,10 +458,10 @@ _print_expanded_list_recursive (glcpp_parser_t *parser, continue; } - if (_list_contains (parameters, token, &index)) { + if (_string_list_contains (parameters, token, &index)) { const char *argument; - argument = _list_member_at (arguments, index); + argument = _string_list_member_at (arguments, index); _print_expanded_macro_recursive (parser, argument, orig, parameters, arguments); @@ -478,11 +478,11 @@ static void _print_expanded_macro_recursive (glcpp_parser_t *parser, const char *token, const char *orig, - list_t *parameters, - list_t *arguments) + string_list_t *parameters, + string_list_t *arguments) { macro_t *macro; - list_t *replacement_list; + string_list_t *replacement_list; macro = hash_table_find (parser->defines, token); if (macro == NULL) { @@ -492,7 +492,7 @@ _print_expanded_macro_recursive (glcpp_parser_t *parser, replacement_list = macro->replacement_list; - _print_expanded_list_recursive (parser, replacement_list, + _print_expanded_string_list_recursive (parser, replacement_list, orig, parameters, arguments); } @@ -511,19 +511,19 @@ _print_expanded_object_macro (glcpp_parser_t *parser, const char *identifier) void _print_expanded_function_macro (glcpp_parser_t *parser, const char *identifier, - list_t *arguments) + string_list_t *arguments) { macro_t *macro; macro = hash_table_find (parser->defines, identifier); assert (macro->is_function); - if (_list_length (arguments) != _list_length (macro->parameter_list)) { + if (_string_list_length (arguments) != _string_list_length (macro->parameter_list)) { fprintf (stderr, "Error: macro %s invoked with %d arguments (expected %d)\n", identifier, - _list_length (arguments), - _list_length (macro->parameter_list)); + _string_list_length (arguments), + _string_list_length (macro->parameter_list)); return; } diff --git a/glcpp.h b/glcpp.h index 69b3b840aed..cee08faa983 100644 --- a/glcpp.h +++ b/glcpp.h @@ -31,15 +31,15 @@ /* Some data types used for parser value. */ -typedef struct node { +typedef struct string_node { const char *str; - struct node *next; -} node_t; + struct string_node *next; +} string_node_t; -typedef struct list { - node_t *head; - node_t *tail; -} list_t; +typedef struct string_list { + string_node_t *head; + string_node_t *tail; +} string_list_t; typedef struct glcpp_parser glcpp_parser_t; From c5e9855f130b928b480c18c913135a411ee921e7 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 10:12:21 -0700 Subject: [PATCH 036/148] Remove _list suffix from several identifiers. Instead of "parameter_list" and "replacement_list" just use "parameters" and "replacements". This is consistent with the existing "arguments" and keeps the line length down in the face of the now-longer "string_list_t" rather than "list_t". --- glcpp-parse.y | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 3b97743085a..4e5de8254d8 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -33,8 +33,8 @@ typedef struct { int is_function; - string_list_t *parameter_list; - string_list_t *replacement_list; + string_list_t *parameters; + string_list_t *replacements; } macro_t; struct glcpp_parser { @@ -48,13 +48,13 @@ yyerror (void *scanner, const char *error); void _define_object_macro (glcpp_parser_t *parser, const char *macro, - string_list_t *replacement_list); + string_list_t *replacements); void _define_function_macro (glcpp_parser_t *parser, const char *macro, - string_list_t *parameter_list, - string_list_t *replacement_list); + string_list_t *parameters, + string_list_t *replacements); void _print_expanded_object_macro (glcpp_parser_t *parser, const char *macro); @@ -402,15 +402,15 @@ glcpp_parser_macro_type (glcpp_parser_t *parser, const char *identifier) void _define_object_macro (glcpp_parser_t *parser, const char *identifier, - string_list_t *replacement_list) + string_list_t *replacements) { macro_t *macro; macro = xtalloc (parser, macro_t); macro->is_function = 0; - macro->parameter_list = NULL; - macro->replacement_list = talloc_steal (macro, replacement_list); + macro->parameters = NULL; + macro->replacements = talloc_steal (macro, replacements); hash_table_insert (parser->defines, macro, identifier); } @@ -418,16 +418,16 @@ _define_object_macro (glcpp_parser_t *parser, void _define_function_macro (glcpp_parser_t *parser, const char *identifier, - string_list_t *parameter_list, - string_list_t *replacement_list) + string_list_t *parameters, + string_list_t *replacements) { macro_t *macro; macro = xtalloc (parser, macro_t); macro->is_function = 1; - macro->parameter_list = talloc_steal (macro, parameter_list); - macro->replacement_list = talloc_steal (macro, replacement_list); + macro->parameters = talloc_steal (macro, parameters); + macro->replacements = talloc_steal (macro, replacements); hash_table_insert (parser->defines, macro, identifier); } @@ -482,7 +482,7 @@ _print_expanded_macro_recursive (glcpp_parser_t *parser, string_list_t *arguments) { macro_t *macro; - string_list_t *replacement_list; + string_list_t *replacements; macro = hash_table_find (parser->defines, token); if (macro == NULL) { @@ -490,10 +490,10 @@ _print_expanded_macro_recursive (glcpp_parser_t *parser, return; } - replacement_list = macro->replacement_list; + replacements = macro->replacements; - _print_expanded_string_list_recursive (parser, replacement_list, - orig, parameters, arguments); + _print_expanded_string_list_recursive (parser, replacements, + orig, parameters, arguments); } void @@ -518,15 +518,15 @@ _print_expanded_function_macro (glcpp_parser_t *parser, macro = hash_table_find (parser->defines, identifier); assert (macro->is_function); - if (_string_list_length (arguments) != _string_list_length (macro->parameter_list)) { + if (_string_list_length (arguments) != _string_list_length (macro->parameters)) { fprintf (stderr, "Error: macro %s invoked with %d arguments (expected %d)\n", identifier, _string_list_length (arguments), - _string_list_length (macro->parameter_list)); + _string_list_length (macro->parameters)); return; } _print_expanded_macro_recursive (parser, identifier, identifier, - macro->parameter_list, arguments); + macro->parameters, arguments); } From 04af13539a7a4bc72b566c111914b103d9e851a6 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 10:17:38 -0700 Subject: [PATCH 037/148] Move most printing to the action in the content production. Previously, printing was occurring all over the place. Here we document that it should all be happening at the top-level content production, and we move the printing of directive newlines. The printing of expanded macros is still happening in lower-level productions, but we plan to fix that soon. --- glcpp-parse.y | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 4e5de8254d8..8dc78975114 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -103,6 +103,7 @@ input: | input content ; + /* We do all printing at the content level */ content: IDENTIFIER { printf ("%s", $1); @@ -113,7 +114,7 @@ content: talloc_free ($1); } | macro -| directive_with_newline +| directive_with_newline { printf ("\n"); } | NEWLINE { printf ("\n"); } | '(' { printf ("("); } | ')' { printf (")"); } @@ -154,9 +155,7 @@ argument: ; directive_with_newline: - directive NEWLINE { - printf ("\n"); - } + directive NEWLINE ; directive: From 2be8be0f742a7abf410be8176f6fd6fc49a6b361 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 10:31:43 -0700 Subject: [PATCH 038/148] Make macro-expansion productions create string-list values rather than printing Then we print the final string list up at the top-level content production along with all other printing. Additionally, having macro-expansion productions that create values will make it easier to solve problems like composed function-like macro invocations in the future. --- glcpp-parse.y | 128 +++++++++++++++++++++++++++++++------------------- 1 file changed, 80 insertions(+), 48 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 8dc78975114..d0ee78e008e 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -56,13 +56,16 @@ _define_function_macro (glcpp_parser_t *parser, string_list_t *parameters, string_list_t *replacements); -void -_print_expanded_object_macro (glcpp_parser_t *parser, const char *macro); +string_list_t * +_expand_object_macro (glcpp_parser_t *parser, const char *identifier); + +string_list_t * +_expand_function_macro (glcpp_parser_t *parser, + const char *identifier, + string_list_t *arguments); void -_print_expanded_function_macro (glcpp_parser_t *parser, - const char *macro, - string_list_t *arguments); +_print_string_list (string_list_t *list); string_list_t * _string_list_create (void *ctx); @@ -94,7 +97,7 @@ _string_list_length (string_list_t *list); %token DEFINE FUNC_MACRO IDENTIFIER NEWLINE OBJ_MACRO SPACE TOKEN UNDEF %type FUNC_MACRO IDENTIFIER identifier_perhaps_macro OBJ_MACRO TOKEN word word_or_symbol -%type argument argument_list parameter_list replacement_list +%type argument argument_list macro parameter_list replacement_list %% @@ -113,7 +116,9 @@ content: printf ("%s", $1); talloc_free ($1); } -| macro +| macro { + _print_string_list ($1); + } | directive_with_newline { printf ("\n"); } | NEWLINE { printf ("\n"); } | '(' { printf ("("); } @@ -124,10 +129,10 @@ content: macro: FUNC_MACRO '(' argument_list ')' { - _print_expanded_function_macro (parser, $1, $3); + $$ = _expand_function_macro (parser, $1, $3); } | OBJ_MACRO { - _print_expanded_object_macro (parser, $1); + $$ = _expand_object_macro (parser, $1); talloc_free ($1); } ; @@ -326,6 +331,18 @@ _string_list_length (string_list_t *list) return length; } +void +_print_string_list (string_list_t *list) +{ + string_node_t *node; + + if (list == NULL) + return; + + for (node = list->head; node; node = node->next) + printf ("%s", node->str); +} + const char * _string_list_member_at (string_list_t *list, int index) { @@ -431,29 +448,33 @@ _define_function_macro (glcpp_parser_t *parser, hash_table_insert (parser->defines, macro, identifier); } -static void -_print_expanded_macro_recursive (glcpp_parser_t *parser, - const char *token, - const char *orig, - string_list_t *parameters, - string_list_t *arguments); +static string_list_t * +_expand_macro_recursive (glcpp_parser_t *parser, + const char *token, + const char *orig, + string_list_t *parameters, + string_list_t *arguments); -static void -_print_expanded_string_list_recursive (glcpp_parser_t *parser, - string_list_t *list, - const char *orig, - string_list_t *parameters, - string_list_t *arguments) +static string_list_t * +_expand_string_list_recursive (glcpp_parser_t *parser, + string_list_t *list, + const char *orig, + string_list_t *parameters, + string_list_t *arguments) { + string_list_t *result; + string_list_t *child; const char *token; string_node_t *node; int index; + result = _string_list_create (parser); + for (node = list->head ; node ; node = node->next) { token = node->str; if (strcmp (token, orig) == 0) { - printf ("%s", token); + _string_list_append_item (result, token); continue; } @@ -461,71 +482,82 @@ _print_expanded_string_list_recursive (glcpp_parser_t *parser, const char *argument; argument = _string_list_member_at (arguments, index); - _print_expanded_macro_recursive (parser, argument, - orig, parameters, - arguments); + child = _expand_macro_recursive (parser, argument, + orig, NULL, NULL); + _string_list_append_list (result, child); } else { - _print_expanded_macro_recursive (parser, token, + child = _expand_macro_recursive (parser, token, orig, parameters, arguments); + _string_list_append_list (result, child); } } + + return result; } -static void -_print_expanded_macro_recursive (glcpp_parser_t *parser, - const char *token, - const char *orig, - string_list_t *parameters, - string_list_t *arguments) +static string_list_t * +_expand_macro_recursive (glcpp_parser_t *parser, + const char *token, + const char *orig, + string_list_t *parameters, + string_list_t *arguments) { macro_t *macro; string_list_t *replacements; macro = hash_table_find (parser->defines, token); if (macro == NULL) { - printf ("%s", token); - return; + string_list_t *result; + + result = _string_list_create (parser); + _string_list_append_item (result, token); + return result; } replacements = macro->replacements; - _print_expanded_string_list_recursive (parser, replacements, - orig, parameters, arguments); + return _expand_string_list_recursive (parser, replacements, + orig, parameters, arguments); } -void -_print_expanded_object_macro (glcpp_parser_t *parser, const char *identifier) +string_list_t * +_expand_object_macro (glcpp_parser_t *parser, const char *identifier) { macro_t *macro; macro = hash_table_find (parser->defines, identifier); assert (! macro->is_function); - _print_expanded_macro_recursive (parser, identifier, identifier, - NULL, NULL); + return _expand_macro_recursive (parser, identifier, identifier, + NULL, NULL); } -void -_print_expanded_function_macro (glcpp_parser_t *parser, - const char *identifier, - string_list_t *arguments) +string_list_t * +_expand_function_macro (glcpp_parser_t *parser, + const char *identifier, + string_list_t *arguments) { + string_list_t *result; macro_t *macro; + result = _string_list_create (parser); + macro = hash_table_find (parser->defines, identifier); assert (macro->is_function); - if (_string_list_length (arguments) != _string_list_length (macro->parameters)) { + if (_string_list_length (arguments) != + _string_list_length (macro->parameters)) + { fprintf (stderr, "Error: macro %s invoked with %d arguments (expected %d)\n", identifier, _string_list_length (arguments), _string_list_length (macro->parameters)); - return; + return NULL; } - _print_expanded_macro_recursive (parser, identifier, identifier, - macro->parameters, arguments); + return _expand_macro_recursive (parser, identifier, identifier, + macro->parameters, arguments); } From db272e6e6fbfe349ea6d9877bb7715ecb2d9f0c1 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 10:00:59 -0700 Subject: [PATCH 039/148] Add test for function-like macro invocations with multiple-token arguments. These are not yet parsed correctly, so these tests fail. --- tests/019-define-func-1-arg-multi.c | 2 ++ tests/020-define-func-2-arg-multi.c | 2 ++ 2 files changed, 4 insertions(+) create mode 100644 tests/019-define-func-1-arg-multi.c create mode 100644 tests/020-define-func-2-arg-multi.c diff --git a/tests/019-define-func-1-arg-multi.c b/tests/019-define-func-1-arg-multi.c new file mode 100644 index 00000000000..c4e62b25508 --- /dev/null +++ b/tests/019-define-func-1-arg-multi.c @@ -0,0 +1,2 @@ +#define foo(x) (x) +foo(this is more than one word) diff --git a/tests/020-define-func-2-arg-multi.c b/tests/020-define-func-2-arg-multi.c new file mode 100644 index 00000000000..253421139d4 --- /dev/null +++ b/tests/020-define-func-2-arg-multi.c @@ -0,0 +1,2 @@ +#define foo(x,y) x, two fish, red fish, y +foo(one fish, blue fish) From 8f6a828e4a454e1bdce359c43e1108ff0315a89c Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 10:44:19 -0700 Subject: [PATCH 040/148] Support macro invocations with multiple tokens for a single argument. We provide for this by changing the value of the argument-list production from a list of strings (string_list_t) to a new data-structure that holds a list of lists of strings (argument_list_t). --- glcpp-parse.y | 115 ++++++++++++++++++++++++++++++++++++++++---------- glcpp.h | 11 ++++- 2 files changed, 102 insertions(+), 24 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index d0ee78e008e..27b5514e928 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -62,7 +62,7 @@ _expand_object_macro (glcpp_parser_t *parser, const char *identifier); string_list_t * _expand_function_macro (glcpp_parser_t *parser, const char *identifier, - string_list_t *arguments); + argument_list_t *arguments); void _print_string_list (string_list_t *list); @@ -79,17 +79,27 @@ _string_list_append_list (string_list_t *list, string_list_t *tail); int _string_list_contains (string_list_t *list, const char *member, int *index); -const char * -_string_list_member_at (string_list_t *list, int index); - int _string_list_length (string_list_t *list); +argument_list_t * +_argument_list_create (void *ctx); + +void +_argument_list_append (argument_list_t *list, string_list_t *argument); + +int +_argument_list_length (argument_list_t *list); + +string_list_t * +_argument_list_member_at (argument_list_t *list, int index); + %} %union { char *str; - string_list_t *list; + string_list_t *string_list; + argument_list_t *argument_list; } %parse-param {glcpp_parser_t *parser} @@ -97,7 +107,8 @@ _string_list_length (string_list_t *list); %token DEFINE FUNC_MACRO IDENTIFIER NEWLINE OBJ_MACRO SPACE TOKEN UNDEF %type FUNC_MACRO IDENTIFIER identifier_perhaps_macro OBJ_MACRO TOKEN word word_or_symbol -%type argument argument_list macro parameter_list replacement_list +%type argument macro parameter_list replacement_list +%type argument_list %% @@ -139,11 +150,15 @@ macro: argument_list: argument { - $$ = _string_list_create (parser); - _string_list_append_list ($$, $1); + $$ = _argument_list_create (parser); + _argument_list_append ($$, $1); + } +| argument_list ',' SPACE argument { + _argument_list_append ($1, $4); + $$ = $1; } | argument_list ',' argument { - _string_list_append_list ($1, $3); + _argument_list_append ($1, $3); $$ = $1; } ; @@ -156,6 +171,11 @@ argument: _string_list_append_item ($1, $2); talloc_free ($2); } +| argument SPACE word { + _string_list_append_item ($1, " "); + _string_list_append_item ($1, $3); + talloc_free ($3); + } | argument '(' argument ')' ; @@ -343,10 +363,59 @@ _print_string_list (string_list_t *list) printf ("%s", node->str); } -const char * -_string_list_member_at (string_list_t *list, int index) +argument_list_t * +_argument_list_create (void *ctx) { - string_node_t *node; + argument_list_t *list; + + list = xtalloc (ctx, argument_list_t); + list->head = NULL; + list->tail = NULL; + + return list; +} + +void +_argument_list_append (argument_list_t *list, string_list_t *argument) +{ + argument_node_t *node; + + if (argument == NULL || argument->head == NULL) + return; + + node = xtalloc (list, argument_node_t); + node->argument = argument; + + node->next = NULL; + + if (list->head == NULL) { + list->head = node; + } else { + list->tail->next = node; + } + + list->tail = node; +} + +int +_argument_list_length (argument_list_t *list) +{ + int length = 0; + argument_node_t *node; + + if (list == NULL) + return 0; + + for (node = list->head; node; node = node->next) + length++; + + return length; +} + +string_list_t * +_argument_list_member_at (argument_list_t *list, int index) +{ + argument_node_t *node; int i; if (list == NULL) @@ -360,7 +429,7 @@ _string_list_member_at (string_list_t *list, int index) } if (node) - return node->str; + return node->argument; return NULL; } @@ -453,14 +522,14 @@ _expand_macro_recursive (glcpp_parser_t *parser, const char *token, const char *orig, string_list_t *parameters, - string_list_t *arguments); + argument_list_t *arguments); static string_list_t * _expand_string_list_recursive (glcpp_parser_t *parser, string_list_t *list, const char *orig, string_list_t *parameters, - string_list_t *arguments) + argument_list_t *arguments) { string_list_t *result; string_list_t *child; @@ -479,11 +548,11 @@ _expand_string_list_recursive (glcpp_parser_t *parser, } if (_string_list_contains (parameters, token, &index)) { - const char *argument; + string_list_t *argument; - argument = _string_list_member_at (arguments, index); - child = _expand_macro_recursive (parser, argument, - orig, NULL, NULL); + argument = _argument_list_member_at (arguments, index); + child = _expand_string_list_recursive (parser, argument, + orig, NULL, NULL); _string_list_append_list (result, child); } else { child = _expand_macro_recursive (parser, token, @@ -502,7 +571,7 @@ _expand_macro_recursive (glcpp_parser_t *parser, const char *token, const char *orig, string_list_t *parameters, - string_list_t *arguments) + argument_list_t *arguments) { macro_t *macro; string_list_t *replacements; @@ -537,7 +606,7 @@ _expand_object_macro (glcpp_parser_t *parser, const char *identifier) string_list_t * _expand_function_macro (glcpp_parser_t *parser, const char *identifier, - string_list_t *arguments) + argument_list_t *arguments) { string_list_t *result; macro_t *macro; @@ -547,13 +616,13 @@ _expand_function_macro (glcpp_parser_t *parser, macro = hash_table_find (parser->defines, identifier); assert (macro->is_function); - if (_string_list_length (arguments) != + if (_argument_list_length (arguments) != _string_list_length (macro->parameters)) { fprintf (stderr, "Error: macro %s invoked with %d arguments (expected %d)\n", identifier, - _string_list_length (arguments), + _argument_list_length (arguments), _string_list_length (macro->parameters)); return NULL; } diff --git a/glcpp.h b/glcpp.h index cee08faa983..7966a2a3d21 100644 --- a/glcpp.h +++ b/glcpp.h @@ -30,7 +30,6 @@ /* Some data types used for parser value. */ - typedef struct string_node { const char *str; struct string_node *next; @@ -41,6 +40,16 @@ typedef struct string_list { string_node_t *tail; } string_list_t; +typedef struct argument_node { + string_list_t *argument; + struct argument_node *next; +} argument_node_t; + +typedef struct argument_list { + argument_node_t *head; + argument_node_t *tail; +} argument_list_t; + typedef struct glcpp_parser glcpp_parser_t; glcpp_parser_t * From ac070e8bf5005151dd702f2cd3fbfb2d1eaaf00d Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 11:33:00 -0700 Subject: [PATCH 041/148] Eliminate a shift/reduce conflict. By simply allowing for the argument_list production to be empty rather than the lower-level argument production to be empty. --- glcpp-parse.y | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 27b5514e928..e70b3298d8d 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -149,7 +149,10 @@ macro: ; argument_list: - argument { + /* empty */ { + $$ = _argument_list_create (parser); + } +| argument { $$ = _argument_list_create (parser); _argument_list_append ($$, $1); } @@ -164,8 +167,9 @@ argument_list: ; argument: - /* empty */ { + word { $$ = _string_list_create (parser); + _string_list_append_item ($$, $1); } | argument word { _string_list_append_item ($1, $2); From 92e7bf0f50ff673b7441b2f2be9ef99a4af8cae4 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 10:01:44 -0700 Subject: [PATCH 042/148] Add test for composed invocation of function-like macros. This is a case like "foo(bar(x))" where both foo and bar are defined function-like macros. This is not yet parsed correctly so this test fails. --- tests/021-define-func-compose.c | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 tests/021-define-func-compose.c diff --git a/tests/021-define-func-compose.c b/tests/021-define-func-compose.c new file mode 100644 index 00000000000..21ddd0e65f9 --- /dev/null +++ b/tests/021-define-func-compose.c @@ -0,0 +1,3 @@ +#define bar(x) (1+(x)) +#define foo(y) (2*(y)) +foo(bar(3)) From 38bd27b444f610904320b5aa9d37e43be9164697 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 12:05:37 -0700 Subject: [PATCH 043/148] Fix expansion of composited macros. This is a case such as "foo(bar(x))". The recently added test for this now passes. --- glcpp-parse.y | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index e70b3298d8d..f972ec372b8 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -106,7 +106,7 @@ _argument_list_member_at (argument_list_t *list, int index); %lex-param {void *scanner} %token DEFINE FUNC_MACRO IDENTIFIER NEWLINE OBJ_MACRO SPACE TOKEN UNDEF -%type FUNC_MACRO IDENTIFIER identifier_perhaps_macro OBJ_MACRO TOKEN word word_or_symbol +%type FUNC_MACRO IDENTIFIER identifier_perhaps_macro OBJ_MACRO replacement_word TOKEN word %type argument macro parameter_list replacement_list %type argument_list @@ -171,6 +171,9 @@ argument: $$ = _string_list_create (parser); _string_list_append_item ($$, $1); } +| macro { + $$ = $1; + } | argument word { _string_list_append_item ($1, $2); talloc_free ($2); @@ -227,18 +230,28 @@ directive: ; replacement_list: - word_or_symbol { + replacement_word { $$ = _string_list_create (parser); _string_list_append_item ($$, $1); talloc_free ($1); } -| replacement_list word_or_symbol { +| replacement_list replacement_word { _string_list_append_item ($1, $2); talloc_free ($2); $$ = $1; } ; +replacement_word: + word { $$ = $1; } +| FUNC_MACRO { $$ = $1; } +| OBJ_MACRO { $$ = $1; } +| '(' { $$ = xtalloc_strdup (parser, "("); } +| ')' { $$ = xtalloc_strdup (parser, ")"); } +| ',' { $$ = xtalloc_strdup (parser, ","); } +| SPACE { $$ = xtalloc_strdup (parser, " "); } +; + parameter_list: /* empty */ { $$ = _string_list_create (parser); @@ -261,18 +274,8 @@ identifier_perhaps_macro: | OBJ_MACRO { $$ = $1; } ; -word_or_symbol: - word { $$ = $1; } -| '(' { $$ = xtalloc_strdup (parser, "("); } -| ')' { $$ = xtalloc_strdup (parser, ")"); } -| ',' { $$ = xtalloc_strdup (parser, ","); } -| SPACE { $$ = xtalloc_strdup (parser, " "); } -; - word: IDENTIFIER { $$ = $1; } -| FUNC_MACRO { $$ = $1; } -| OBJ_MACRO { $$ = $1; } | TOKEN { $$ = $1; } ; From f6ae186cfd2c7006656ac55446247b569b92a721 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 16:51:54 -0700 Subject: [PATCH 044/148] Add test invoking a macro with an argument containing (non-macro) parentheses. The macro invocation is defined to consume all text between a set of matched parentheses. We previously tested for inner parentheses from a nested function-like macro invocation. Here we test for inner parentheses occuring on their own, (not part of another macro invocation). --- tests/022-define-func-arg-with-parens.c | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 tests/022-define-func-arg-with-parens.c diff --git a/tests/022-define-func-arg-with-parens.c b/tests/022-define-func-arg-with-parens.c new file mode 100644 index 00000000000..c20d73a4a28 --- /dev/null +++ b/tests/022-define-func-arg-with-parens.c @@ -0,0 +1,2 @@ +#define foo(x) (x) +foo(argument(including parens)for the win) From 3596bb149e107ad12df4fee0723caf91819c0758 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 16:53:52 -0700 Subject: [PATCH 045/148] Provide implementation for macro arguments containing parentheses. We were correctly parsing this already, but simply not returning any value (for no good reason). Fortunately the fix is quite simple. This makes the test added in the previous commit now pass. --- Makefile | 2 +- glcpp-parse.y | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 550945abd30..3fa863c49b9 100644 --- a/Makefile +++ b/Makefile @@ -21,5 +21,5 @@ test: glcpp @(cd tests; ./glcpp-test) clean: - rm -f glcpp-lex.c glcpp-parse.c *.o *~ + rm -f glcpp glcpp-lex.c glcpp-parse.c *.o *~ rm -f tests/*.out tests/*.gcc tests/*.expected tests/*~ diff --git a/glcpp-parse.y b/glcpp-parse.y index f972ec372b8..58afd724b6a 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -177,13 +177,20 @@ argument: | argument word { _string_list_append_item ($1, $2); talloc_free ($2); + $$ = $1; } | argument SPACE word { _string_list_append_item ($1, " "); _string_list_append_item ($1, $3); talloc_free ($3); + $$ = $1; + } +| argument '(' argument ')' { + _string_list_append_item ($1, "("); + _string_list_append_list ($1, $3); + _string_list_append_item ($1, ")"); + $$ = $1; } -| argument '(' argument ')' ; directive_with_newline: From 4eb2ccf261f739ad9b91455f28c1dece573a30d6 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 16:58:00 -0700 Subject: [PATCH 046/148] Add test with extra whitespace in macro defintions and invocations. This whitespace is not dealt with in an elegant way yet so this test does not pass currently. --- tests/023-define-extra-whitespace.c | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 tests/023-define-extra-whitespace.c diff --git a/tests/023-define-extra-whitespace.c b/tests/023-define-extra-whitespace.c new file mode 100644 index 00000000000..375355a17d9 --- /dev/null +++ b/tests/023-define-extra-whitespace.c @@ -0,0 +1,8 @@ +#define noargs() 1 +# define onearg(foo) foo + # define twoargs( x , y ) x y + # define threeargs( a , b , c ) a b c +noargs ( ) + onearg ( 2 ) + twoargs ( 3 , 4 ) +threeargs ( 5 , 6 , 7 ) From 81f01432bd4aad8e8b87ae273eb05297e35eff07 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 17:08:45 -0700 Subject: [PATCH 047/148] Don't return SPACE tokens unless strictly needed. This reverts the unconditional return of SPACE tokens from the lexer from commit 48b94da0994b44e41324a2419117dcd81facce8b . That commit seemed useful because it kept the lexer simpler, but the presence of SPACE tokens is causing lots of extra complication for the parser itself, (redundant productions other than whitespace differences, several productions buggy in the case of extra whitespace, etc.) Of course, we'd prefer to never have any whitespace token, but that's not possible with the need to distinguish between "#define foo()" and "#define foo ()". So we'll accept a little bit of pain in the lexer, (enough state to support this special-case token), in exchange for keeping most of the parser blissffully ignorant of whether tokens are separated by whitespace or not. This change does mean that our output now differs from that of "gcc -E", but only in whitespace. So we test with "diff -w now to ignore those differences. --- glcpp-lex.l | 29 ++++++++++++++++++++++++++--- glcpp-parse.y | 22 +++++++--------------- tests/glcpp-test | 2 +- 3 files changed, 34 insertions(+), 19 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 21b9e3530aa..3c9dda46d47 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -32,6 +32,9 @@ %option reentrant noyywrap %option extra-type="glcpp_parser_t *" +%x ST_DEFINE +%x ST_DEFVAL + SPACE [[:space:]] NONSPACE [^[:space:]] NEWLINE [\n] @@ -52,9 +55,31 @@ TOKEN [^[:space:](),]+ * "#define foo()" from "#define foo ()". */ {HASH}define{HSPACE}* { + BEGIN ST_DEFINE; return DEFINE; } +{IDENTIFIER} { + BEGIN ST_DEFVAL; + yylval.str = xtalloc_strdup (yyextra, yytext); + return IDENTIFIER; +} + +\n { + BEGIN INITIAL; + return NEWLINE; +} + +{HSPACE}+ { + BEGIN INITIAL; + return SPACE; +} + +"(" { + BEGIN INITIAL; + return '('; +} + {IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); switch (glcpp_parser_macro_type (yyextra, yylval.str)) @@ -84,8 +109,6 @@ TOKEN [^[:space:](),]+ return NEWLINE; } -{HSPACE}+ { - return SPACE; -} +{SPACE}+ %% diff --git a/glcpp-parse.y b/glcpp-parse.y index 58afd724b6a..71ea3e53439 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -135,7 +135,6 @@ content: | '(' { printf ("("); } | ')' { printf (")"); } | ',' { printf (","); } -| SPACE { printf (" "); } ; macro: @@ -156,10 +155,6 @@ argument_list: $$ = _argument_list_create (parser); _argument_list_append ($$, $1); } -| argument_list ',' SPACE argument { - _argument_list_append ($1, $4); - $$ = $1; - } | argument_list ',' argument { _argument_list_append ($1, $3); $$ = $1; @@ -179,12 +174,6 @@ argument: talloc_free ($2); $$ = $1; } -| argument SPACE word { - _string_list_append_item ($1, " "); - _string_list_append_item ($1, $3); - talloc_free ($3); - $$ = $1; - } | argument '(' argument ')' { _string_list_append_item ($1, "("); _string_list_append_list ($1, $3); @@ -209,8 +198,8 @@ directive: string_list_t *list = _string_list_create (parser); _define_function_macro (parser, $2, $4, list); } -| DEFINE IDENTIFIER '(' parameter_list ')' SPACE replacement_list { - _define_function_macro (parser, $2, $4, $7); +| DEFINE IDENTIFIER '(' parameter_list ')' replacement_list { + _define_function_macro (parser, $2, $4, $6); } | UNDEF FUNC_MACRO { string_list_t *replacement = hash_table_find (parser->defines, $2); @@ -256,7 +245,6 @@ replacement_word: | '(' { $$ = xtalloc_strdup (parser, "("); } | ')' { $$ = xtalloc_strdup (parser, ")"); } | ',' { $$ = xtalloc_strdup (parser, ","); } -| SPACE { $$ = xtalloc_strdup (parser, " "); } ; parameter_list: @@ -373,8 +361,11 @@ _print_string_list (string_list_t *list) if (list == NULL) return; - for (node = list->head; node; node = node->next) + for (node = list->head; node; node = node->next) { printf ("%s", node->str); + if (node->next) + printf (" "); + } } argument_list_t * @@ -623,6 +614,7 @@ _expand_function_macro (glcpp_parser_t *parser, argument_list_t *arguments) { string_list_t *result; + macro_t *macro; result = _string_list_create (parser); diff --git a/tests/glcpp-test b/tests/glcpp-test index 25685eeabe5..bd204de1e2f 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -5,5 +5,5 @@ for test in *.c; do ../glcpp < $test > $test.out gcc -E $test -o $test.gcc grep -v '^#' < $test.gcc > $test.expected - diff -u $test.expected $test.out + diff -w -u $test.expected $test.out done From e36a4d5be9a9fa3abc4fb5d0b6c3601934f7a343 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 17:29:24 -0700 Subject: [PATCH 048/148] Fix two whitespace bugs in the lexer. The first bug was not allowing whitespace between '#' and the directive name. The second bug was swallowing a terminating newline along with any trailing whitespace on a line. With these two fixes, and the previous commit to stop emitting SPACE tokens, the recently added extra-whitespace test now passes. --- glcpp-lex.l | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 3c9dda46d47..97ff1175f1b 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -39,7 +39,7 @@ SPACE [[:space:]] NONSPACE [^[:space:]] NEWLINE [\n] HSPACE [ \t] -HASH ^{HSPACE}*# +HASH ^{HSPACE}*#{HSPACE}* IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* TOKEN [^[:space:](),]+ @@ -109,6 +109,6 @@ TOKEN [^[:space:](),]+ return NEWLINE; } -{SPACE}+ +{HSPACE}+ %% From 420d05a15b90658680b87b4d83b092768590319a Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 17 May 2010 10:15:23 -0700 Subject: [PATCH 049/148] Add test and fix bug leading to infinite recursion. The test case here is simply "#define foo foo" and "#define bar foo" and then attempting to expand "bar". Previously, our termination condition for the recursion was overly simple---just looking for the single identifier that began the expansion. We now fix this to maintain a stack of identifiers and terminate when any one of them occurs in the replacement list. --- glcpp-parse.y | 87 ++++++++++++++++++---- tests/024-define-chain-to-self-recursion.c | 3 + 2 files changed, 75 insertions(+), 15 deletions(-) create mode 100644 tests/024-define-chain-to-self-recursion.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 71ea3e53439..16d2a28a007 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -76,6 +76,12 @@ _string_list_append_item (string_list_t *list, const char *str); void _string_list_append_list (string_list_t *list, string_list_t *tail); +void +_string_list_push (string_list_t *list, const char *str); + +void +_string_list_pop (string_list_t *list); + int _string_list_contains (string_list_t *list, const char *member, int *index); @@ -319,6 +325,45 @@ _string_list_append_item (string_list_t *list, const char *str) list->tail = node; } +void +_string_list_push (string_list_t *list, const char *str) +{ + string_node_t *node; + + node = xtalloc (list, string_node_t); + node->str = xtalloc_strdup (node, str); + + node->next = list->head; + + if (list->tail == NULL) { + list->tail = node; + } + + list->head = node; +} + +void +_string_list_pop (string_list_t *list) +{ + string_node_t *node; + + node = list->head; + + if (node == NULL) { + fprintf (stderr, "Internal error: _string_list_pop called on an empty list.\n"); + exit (1); + } + + list->head = node->next; + + if (list->tail == node) { + assert (node->next == NULL); + list->tail = NULL; + } + + talloc_free (node); +} + int _string_list_contains (string_list_t *list, const char *member, int *index) { @@ -330,7 +375,8 @@ _string_list_contains (string_list_t *list, const char *member, int *index) for (i = 0, node = list->head; node; i++, node = node->next) { if (strcmp (node->str, member) == 0) { - *index = i; + if (index) + *index = i; return 1; } } @@ -525,14 +571,14 @@ _define_function_macro (glcpp_parser_t *parser, static string_list_t * _expand_macro_recursive (glcpp_parser_t *parser, const char *token, - const char *orig, + string_list_t *active, string_list_t *parameters, argument_list_t *arguments); static string_list_t * _expand_string_list_recursive (glcpp_parser_t *parser, string_list_t *list, - const char *orig, + string_list_t *active, string_list_t *parameters, argument_list_t *arguments) { @@ -547,7 +593,10 @@ _expand_string_list_recursive (glcpp_parser_t *parser, for (node = list->head ; node ; node = node->next) { token = node->str; - if (strcmp (token, orig) == 0) { + /* Don't expand this macro if it's on the active + * stack, (meaning we're already in the process of + * expanding it). */ + if (_string_list_contains (active, token, NULL)) { _string_list_append_item (result, token); continue; } @@ -557,11 +606,11 @@ _expand_string_list_recursive (glcpp_parser_t *parser, argument = _argument_list_member_at (arguments, index); child = _expand_string_list_recursive (parser, argument, - orig, NULL, NULL); + active, NULL, NULL); _string_list_append_list (result, child); } else { child = _expand_macro_recursive (parser, token, - orig, parameters, + active, parameters, arguments); _string_list_append_list (result, child); } @@ -574,12 +623,18 @@ _expand_string_list_recursive (glcpp_parser_t *parser, static string_list_t * _expand_macro_recursive (glcpp_parser_t *parser, const char *token, - const char *orig, + string_list_t *active, string_list_t *parameters, argument_list_t *arguments) { macro_t *macro; string_list_t *replacements; + string_list_t *result; + + if (active == NULL) + active = _string_list_create (NULL); + + _string_list_push (active, token); macro = hash_table_find (parser->defines, token); if (macro == NULL) { @@ -592,8 +647,14 @@ _expand_macro_recursive (glcpp_parser_t *parser, replacements = macro->replacements; - return _expand_string_list_recursive (parser, replacements, - orig, parameters, arguments); + result = _expand_string_list_recursive (parser, replacements, + active, parameters, arguments); + + _string_list_pop (active); + if (_string_list_length (active) == 0) + talloc_free (active); + + return result; } string_list_t * @@ -604,7 +665,7 @@ _expand_object_macro (glcpp_parser_t *parser, const char *identifier) macro = hash_table_find (parser->defines, identifier); assert (! macro->is_function); - return _expand_macro_recursive (parser, identifier, identifier, + return _expand_macro_recursive (parser, identifier, NULL, NULL, NULL); } @@ -613,12 +674,8 @@ _expand_function_macro (glcpp_parser_t *parser, const char *identifier, argument_list_t *arguments) { - string_list_t *result; - macro_t *macro; - result = _string_list_create (parser); - macro = hash_table_find (parser->defines, identifier); assert (macro->is_function); @@ -633,6 +690,6 @@ _expand_function_macro (glcpp_parser_t *parser, return NULL; } - return _expand_macro_recursive (parser, identifier, identifier, + return _expand_macro_recursive (parser, identifier, NULL, macro->parameters, arguments); } diff --git a/tests/024-define-chain-to-self-recursion.c b/tests/024-define-chain-to-self-recursion.c new file mode 100644 index 00000000000..e788adce30c --- /dev/null +++ b/tests/024-define-chain-to-self-recursion.c @@ -0,0 +1,3 @@ +#define foo foo +#define bar foo +bar From acf87bc03411c4d9b818a346bc9dad858b0a2407 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 17 May 2010 10:34:29 -0700 Subject: [PATCH 050/148] Fix bug (and add test) for a function-like-macro appearing as a non-macro. That is, when a function-like macro appears in the content without parentheses it should be accepted and passed on through, (previously the parser was regarding this as a syntax error). --- glcpp-parse.y | 4 ++++ tests/025-func-macro-as-non-macro.c | 2 ++ 2 files changed, 6 insertions(+) create mode 100644 tests/025-func-macro-as-non-macro.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 16d2a28a007..6f158d91398 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -133,6 +133,10 @@ content: printf ("%s", $1); talloc_free ($1); } +| FUNC_MACRO { + printf ("%s", $1); + talloc_free ($1); + } | macro { _print_string_list ($1); } diff --git a/tests/025-func-macro-as-non-macro.c b/tests/025-func-macro-as-non-macro.c new file mode 100644 index 00000000000..3dbe026d9dd --- /dev/null +++ b/tests/025-func-macro-as-non-macro.c @@ -0,0 +1,2 @@ +#define foo(bar) bar +foo From 796e1f0eadcfbbc6e4d79778b2378975204bb97c Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 17 May 2010 12:45:16 -0700 Subject: [PATCH 051/148] Expect 1 shift/reduce conflict. The most recent fix to the parser introduced a shift/reduce conflict. We document this conflict here, and tell bison that it need not report it (since I verified that it's being resolved in the direction desired). For the record, I did write additional lexer code to eliminate this conflict, but it was quite fragile, (would not accept a newline between a function-like macro name and the left parenthesis, for example). --- glcpp-parse.y | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/glcpp-parse.y b/glcpp-parse.y index 6f158d91398..959083578e7 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -116,6 +116,14 @@ _argument_list_member_at (argument_list_t *list, int index); %type argument macro parameter_list replacement_list %type argument_list +/* Hard to remove shift/reduce conflicts documented as follows: + * + * 1. '(' after FUNC_MACRO name which is correctly resolved to shift + * to form macro invocation rather than reducing directly to + * content. + */ +%expect 1 + %% input: From 1a29500e72ac338c1fb243742aff1c167e1059db Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 17 May 2010 13:19:04 -0700 Subject: [PATCH 052/148] Fix (and add test for) function-like macro invocation with newlines. The test has a newline before the left parenthesis, and newlines to separate the parentheses from the argument. The fix involves more state in the lexer to only return a NEWLINE token when termniating a directive. This is very similar to our previous fix with extra lexer state to only return the SPACE token when it would be significant for the parser. With this change, the exact number and positioning of newlines in the output is now different compared to "gcc -E" so we add a -B option to diff when testing to ignore that. --- glcpp-lex.l | 90 +++++++++++++++++++------- glcpp-parse.y | 1 - tests/026-define-func-extra-newlines.c | 6 ++ tests/glcpp-test | 2 +- 4 files changed, 73 insertions(+), 26 deletions(-) create mode 100644 tests/026-define-func-extra-newlines.c diff --git a/glcpp-lex.l b/glcpp-lex.l index 97ff1175f1b..4cb73c5d715 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -27,13 +27,36 @@ #include "glcpp.h" #include "glcpp-parse.h" + +/* Yes, a macro with a return statement in it is evil. But surely no + * more evil than all the code generation happening with flex in the + * first place. */ +#define LEXIFY_IDENTIFIER do { \ + yylval.str = xtalloc_strdup (yyextra, yytext); \ + switch (glcpp_parser_macro_type (yyextra, yylval.str)) \ + { \ + case MACRO_TYPE_UNDEFINED: \ + return IDENTIFIER; \ + break; \ + case MACRO_TYPE_OBJECT: \ + return OBJ_MACRO; \ + break; \ + case MACRO_TYPE_FUNCTION: \ + return FUNC_MACRO; \ + break; \ + } \ + } while (0) + %} %option reentrant noyywrap %option extra-type="glcpp_parser_t *" %x ST_DEFINE +%x ST_DEFVAL_START %x ST_DEFVAL +%x ST_UNDEF +%x ST_UNDEF_END SPACE [[:space:]] NONSPACE [^[:space:]] @@ -46,9 +69,20 @@ TOKEN [^[:space:](),]+ %% {HASH}undef{HSPACE}* { + BEGIN ST_UNDEF; return UNDEF; } +{IDENTIFIER} { + BEGIN ST_UNDEF_END; + LEXIFY_IDENTIFIER; +} + +\n { + BEGIN INITIAL; + return NEWLINE; +} + /* We use the ST_DEFINE and ST_DEFVAL states so that we can * pass a space token, (yes, a token for whitespace!), since * the preprocessor specification requires distinguishing @@ -60,40 +94,48 @@ TOKEN [^[:space:](),]+ } {IDENTIFIER} { - BEGIN ST_DEFVAL; + BEGIN ST_DEFVAL_START; yylval.str = xtalloc_strdup (yyextra, yytext); return IDENTIFIER; } +\n { + BEGIN INITIAL; + return NEWLINE; +} + +{HSPACE}+ { + BEGIN ST_DEFVAL; + return SPACE; +} + +"(" { + BEGIN ST_DEFVAL; + return '('; +} + +{IDENTIFIER} { + LEXIFY_IDENTIFIER; +} + +[(),] { + return yytext[0]; +} + +{TOKEN} { + yylval.str = xtalloc_strdup (yyextra, yytext); + return TOKEN; +} + \n { BEGIN INITIAL; return NEWLINE; } -{HSPACE}+ { - BEGIN INITIAL; - return SPACE; -} - -"(" { - BEGIN INITIAL; - return '('; -} +{HSPACE}+ {IDENTIFIER} { - yylval.str = xtalloc_strdup (yyextra, yytext); - switch (glcpp_parser_macro_type (yyextra, yylval.str)) - { - case MACRO_TYPE_UNDEFINED: - return IDENTIFIER; - break; - case MACRO_TYPE_OBJECT: - return OBJ_MACRO; - break; - case MACRO_TYPE_FUNCTION: - return FUNC_MACRO; - break; - } + LEXIFY_IDENTIFIER; } [(),] { @@ -106,7 +148,7 @@ TOKEN [^[:space:](),]+ } \n { - return NEWLINE; + printf ("\n"); } {HSPACE}+ diff --git a/glcpp-parse.y b/glcpp-parse.y index 959083578e7..b2eaa5ba696 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -149,7 +149,6 @@ content: _print_string_list ($1); } | directive_with_newline { printf ("\n"); } -| NEWLINE { printf ("\n"); } | '(' { printf ("("); } | ')' { printf (")"); } | ',' { printf (","); } diff --git a/tests/026-define-func-extra-newlines.c b/tests/026-define-func-extra-newlines.c new file mode 100644 index 00000000000..0d837405309 --- /dev/null +++ b/tests/026-define-func-extra-newlines.c @@ -0,0 +1,6 @@ +#define foo(a) bar + +foo +( +1 +) diff --git a/tests/glcpp-test b/tests/glcpp-test index bd204de1e2f..673a4f45e96 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -5,5 +5,5 @@ for test in *.c; do ../glcpp < $test > $test.out gcc -E $test -o $test.gcc grep -v '^#' < $test.gcc > $test.expected - diff -w -u $test.expected $test.out + diff -B -w -u $test.expected $test.out done From d476db38fe21f5e6061a7d93dbd5a9991b91bf59 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 17 May 2010 13:33:10 -0700 Subject: [PATCH 053/148] Add several tests where the defined value of a macro is (or looks like) a macro Many of these look quite similar to existing tests that are handled correctly, yet none of these work. For example, in test 30 we have a simple non-function macro "foo" that is defined as "bar(baz(success))" and obviously non-function macro expansion has been working for a long time. Similarly, if we had text of "bar(baz(success))" it would be expanded correctly as well. But when this otherwise functioning text appears as the body of a macro, things don't work at all. This is pointing out a fundamental problem with the current approach. The current code does a recursive expansion of a macro definition, but this doesn't involve the parsing machinery, so it can't actually handle things like an arbitrary nesting of parentheses. The fix will require the parser to stuff macro values back into the lexer to get at all of the existing machinery when expanding macros. --- tests/027-define-chain-obj-to-func.c | 3 +++ tests/028-define-chain-obj-to-non-func.c | 3 +++ tests/029-define-chain-obj-to-func-with-args.c | 3 +++ tests/030-define-chain-obj-to-func-compose.c | 4 ++++ tests/031-define-chain-func-to-func-compose.c | 4 ++++ 5 files changed, 17 insertions(+) create mode 100644 tests/027-define-chain-obj-to-func.c create mode 100644 tests/028-define-chain-obj-to-non-func.c create mode 100644 tests/029-define-chain-obj-to-func-with-args.c create mode 100644 tests/030-define-chain-obj-to-func-compose.c create mode 100644 tests/031-define-chain-func-to-func-compose.c diff --git a/tests/027-define-chain-obj-to-func.c b/tests/027-define-chain-obj-to-func.c new file mode 100644 index 00000000000..5ccb52caba5 --- /dev/null +++ b/tests/027-define-chain-obj-to-func.c @@ -0,0 +1,3 @@ +#define failure() success +#define foo failure() +foo diff --git a/tests/028-define-chain-obj-to-non-func.c b/tests/028-define-chain-obj-to-non-func.c new file mode 100644 index 00000000000..44962a71876 --- /dev/null +++ b/tests/028-define-chain-obj-to-non-func.c @@ -0,0 +1,3 @@ +#define success() failure +#define foo success +foo diff --git a/tests/029-define-chain-obj-to-func-with-args.c b/tests/029-define-chain-obj-to-func-with-args.c new file mode 100644 index 00000000000..261f7d28fc2 --- /dev/null +++ b/tests/029-define-chain-obj-to-func-with-args.c @@ -0,0 +1,3 @@ +#define bar(failure) failure +#define foo bar(success) +foo diff --git a/tests/030-define-chain-obj-to-func-compose.c b/tests/030-define-chain-obj-to-func-compose.c new file mode 100644 index 00000000000..e56fbefd62d --- /dev/null +++ b/tests/030-define-chain-obj-to-func-compose.c @@ -0,0 +1,4 @@ +#define baz(failure) failure +#define bar(failure) failure +#define foo bar(baz(success)) +foo diff --git a/tests/031-define-chain-func-to-func-compose.c b/tests/031-define-chain-func-to-func-compose.c new file mode 100644 index 00000000000..3f4c8744dff --- /dev/null +++ b/tests/031-define-chain-func-to-func-compose.c @@ -0,0 +1,4 @@ +#define baz(failure) failure +#define bar(failure) failure +#define foo() bar(baz(success)) +foo() From a807fb72c45888b5ff915aa08d8bd10069be4a2e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 18 May 2010 22:10:04 -0700 Subject: [PATCH 054/148] Rewrite macro handling to support function-like macro invocation in macro values The rewrite her discards the functions that did direct, recursive expansion of macro values. Instead, the parser now pushes the macro definition string over to a stack of buffers for the lexer. This way, macro expansion gets access to all parsing machinery. This isn't a small change, but the result is simpler than before (I think). It passes the entire test suite, including the four tests added with the previous commit that were failing before. --- glcpp-lex.l | 152 ++++++++++++++------- glcpp-parse.y | 371 +++++++++++++++++++++----------------------------- glcpp.h | 77 +++++++++-- xtalloc.c | 14 ++ 4 files changed, 343 insertions(+), 271 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 4cb73c5d715..52be1b1ea43 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -27,34 +27,15 @@ #include "glcpp.h" #include "glcpp-parse.h" - -/* Yes, a macro with a return statement in it is evil. But surely no - * more evil than all the code generation happening with flex in the - * first place. */ -#define LEXIFY_IDENTIFIER do { \ - yylval.str = xtalloc_strdup (yyextra, yytext); \ - switch (glcpp_parser_macro_type (yyextra, yylval.str)) \ - { \ - case MACRO_TYPE_UNDEFINED: \ - return IDENTIFIER; \ - break; \ - case MACRO_TYPE_OBJECT: \ - return OBJ_MACRO; \ - break; \ - case MACRO_TYPE_FUNCTION: \ - return FUNC_MACRO; \ - break; \ - } \ - } while (0) - %} %option reentrant noyywrap %option extra-type="glcpp_parser_t *" %x ST_DEFINE -%x ST_DEFVAL_START -%x ST_DEFVAL +%x ST_DEFINE_OBJ_OR_FUNC +%x ST_DEFINE_PARAMETER +%x ST_DEFINE_VALUE %x ST_UNDEF %x ST_UNDEF_END @@ -75,12 +56,14 @@ TOKEN [^[:space:](),]+ {IDENTIFIER} { BEGIN ST_UNDEF_END; - LEXIFY_IDENTIFIER; + yylval.str = xtalloc_strdup (yyextra, yytext); + return IDENTIFIER; } +{HSPACE}* + \n { BEGIN INITIAL; - return NEWLINE; } /* We use the ST_DEFINE and ST_DEFVAL states so that we can @@ -94,48 +77,73 @@ TOKEN [^[:space:](),]+ } {IDENTIFIER} { - BEGIN ST_DEFVAL_START; + BEGIN ST_DEFINE_OBJ_OR_FUNC; yylval.str = xtalloc_strdup (yyextra, yytext); return IDENTIFIER; } -\n { +\n { BEGIN INITIAL; - return NEWLINE; + yylval.str = xtalloc_strdup (yyextra, ""); + return REPLACEMENT; } -{HSPACE}+ { - BEGIN ST_DEFVAL; - return SPACE; +{HSPACE}+ { + BEGIN ST_DEFINE_VALUE; } -"(" { - BEGIN ST_DEFVAL; +"(" { + BEGIN ST_DEFINE_PARAMETER; return '('; } -{IDENTIFIER} { - LEXIFY_IDENTIFIER; -} - -[(),] { - return yytext[0]; -} - -{TOKEN} { +{IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); - return TOKEN; + return IDENTIFIER; } -\n { +"," { + return ','; +} + +")" { + BEGIN ST_DEFINE_VALUE; + return ')'; +} + +{HSPACE}+ + +.*\n { BEGIN INITIAL; - return NEWLINE; + yylval.str = xtalloc_strndup (yyextra, yytext, strlen (yytext) - 1); + return REPLACEMENT; } -{HSPACE}+ - {IDENTIFIER} { - LEXIFY_IDENTIFIER; + int parameter_index; + yylval.str = xtalloc_strdup (yyextra, yytext); + switch (glcpp_parser_classify_token (yyextra, yylval.str, + ¶meter_index)) + { + case TOKEN_CLASS_ARGUMENT: + talloc_free (yylval.str); + /* We don't return a value here since the + * current token will be replaced by new + * tokens. */ + glcpp_parser_push_expansion_argument (yyextra, + parameter_index); + break; + case TOKEN_CLASS_IDENTIFIER: + return IDENTIFIER; + break; + case TOKEN_CLASS_FUNC_MACRO: + return FUNC_MACRO; + break; + case TOKEN_CLASS_OBJ_MACRO: + return OBJ_MACRO; + break; + + } } [(),] { @@ -153,4 +161,54 @@ TOKEN [^[:space:](),]+ {HSPACE}+ +<> { + int done; + + done = glcpp_lex_stack_pop (yyextra->lex_stack); + + if (done) + yyterminate (); + + glcpp_parser_pop_expansion (yyextra); +} + %% + +void +glcpp_lex_stack_push (glcpp_lex_stack_t *stack, const char *string) +{ + struct yyguts_t *yyg = (struct yyguts_t*) stack->parser->scanner; + glcpp_lex_node_t *node; + + /* Save the current buffer on the top of the stack. */ + node = xtalloc (stack, glcpp_lex_node_t); + node->buffer = YY_CURRENT_BUFFER; + + node->next = stack->head; + stack->head = node; + + /* Then switch to a new scan buffer for string. */ + yy_scan_string (string, stack->parser->scanner); +} + +int +glcpp_lex_stack_pop (glcpp_lex_stack_t *stack) +{ + struct yyguts_t *yyg = (struct yyguts_t*) stack->parser->scanner; + glcpp_lex_node_t *node; + + node = stack->head; + + if (node == NULL) + return 1; + + stack->head = node->next; + + yy_delete_buffer (YY_CURRENT_BUFFER, stack->parser->scanner); + yy_switch_to_buffer ((YY_BUFFER_STATE) node->buffer, + stack->parser->scanner); + + talloc_free (node); + + return 0; +} diff --git a/glcpp-parse.y b/glcpp-parse.y index b2eaa5ba696..9f1075aa50a 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -25,41 +25,29 @@ #include #include #include -#include #include "glcpp.h" #define YYLEX_PARAM parser->scanner -typedef struct { - int is_function; - string_list_t *parameters; - string_list_t *replacements; -} macro_t; - -struct glcpp_parser { - yyscan_t scanner; - struct hash_table *defines; -}; - void yyerror (void *scanner, const char *error); void _define_object_macro (glcpp_parser_t *parser, const char *macro, - string_list_t *replacements); + const char *replacement); void _define_function_macro (glcpp_parser_t *parser, const char *macro, string_list_t *parameters, - string_list_t *replacements); + const char *replacement); -string_list_t * +void _expand_object_macro (glcpp_parser_t *parser, const char *identifier); -string_list_t * +void _expand_function_macro (glcpp_parser_t *parser, const char *identifier, argument_list_t *arguments); @@ -76,12 +64,6 @@ _string_list_append_item (string_list_t *list, const char *str); void _string_list_append_list (string_list_t *list, string_list_t *tail); -void -_string_list_push (string_list_t *list, const char *str); - -void -_string_list_pop (string_list_t *list); - int _string_list_contains (string_list_t *list, const char *member, int *index); @@ -111,9 +93,9 @@ _argument_list_member_at (argument_list_t *list, int index); %parse-param {glcpp_parser_t *parser} %lex-param {void *scanner} -%token DEFINE FUNC_MACRO IDENTIFIER NEWLINE OBJ_MACRO SPACE TOKEN UNDEF -%type FUNC_MACRO IDENTIFIER identifier_perhaps_macro OBJ_MACRO replacement_word TOKEN word -%type argument macro parameter_list replacement_list +%token DEFINE FUNC_MACRO IDENTIFIER NEWLINE OBJ_MACRO REPLACEMENT TOKEN UNDEF +%type FUNC_MACRO IDENTIFIER OBJ_MACRO REPLACEMENT TOKEN word +%type argument macro parameter_list %type argument_list /* Hard to remove shift/reduce conflicts documented as follows: @@ -145,21 +127,21 @@ content: printf ("%s", $1); talloc_free ($1); } -| macro { - _print_string_list ($1); +| directive { + printf ("\n"); } -| directive_with_newline { printf ("\n"); } | '(' { printf ("("); } | ')' { printf (")"); } | ',' { printf (","); } +| macro ; macro: FUNC_MACRO '(' argument_list ')' { - $$ = _expand_function_macro (parser, $1, $3); + _expand_function_macro (parser, $1, $3); } | OBJ_MACRO { - $$ = _expand_object_macro (parser, $1); + _expand_object_macro (parser, $1); talloc_free ($1); } ; @@ -184,7 +166,7 @@ argument: _string_list_append_item ($$, $1); } | macro { - $$ = $1; + $$ = _string_list_create (parser); } | argument word { _string_list_append_item ($1, $2); @@ -199,93 +181,42 @@ argument: } ; -directive_with_newline: - directive NEWLINE -; - directive: - DEFINE IDENTIFIER { - string_list_t *list = _string_list_create (parser); - _define_object_macro (parser, $2, list); + DEFINE IDENTIFIER REPLACEMENT { + _define_object_macro (parser, $2, $3); } -| DEFINE IDENTIFIER SPACE replacement_list { - _define_object_macro (parser, $2, $4); - } -| DEFINE IDENTIFIER '(' parameter_list ')' { - string_list_t *list = _string_list_create (parser); - _define_function_macro (parser, $2, $4, list); - } -| DEFINE IDENTIFIER '(' parameter_list ')' replacement_list { +| DEFINE IDENTIFIER '(' parameter_list ')' REPLACEMENT { _define_function_macro (parser, $2, $4, $6); } -| UNDEF FUNC_MACRO { - string_list_t *replacement = hash_table_find (parser->defines, $2); - if (replacement) { +| UNDEF IDENTIFIER { + string_list_t *macro = hash_table_find (parser->defines, $2); + if (macro) { /* XXX: Need hash table to support a real way * to remove an element rather than prefixing * a new node with data of NULL like this. */ hash_table_insert (parser->defines, NULL, $2); - talloc_free (replacement); + talloc_free (macro); } talloc_free ($2); } -| UNDEF OBJ_MACRO { - string_list_t *replacement = hash_table_find (parser->defines, $2); - if (replacement) { - /* XXX: Need hash table to support a real way - * to remove an element rather than prefixing - * a new node with data of NULL like this. */ - hash_table_insert (parser->defines, NULL, $2); - talloc_free (replacement); - } - talloc_free ($2); - } -; - -replacement_list: - replacement_word { - $$ = _string_list_create (parser); - _string_list_append_item ($$, $1); - talloc_free ($1); - } -| replacement_list replacement_word { - _string_list_append_item ($1, $2); - talloc_free ($2); - $$ = $1; - } -; - -replacement_word: - word { $$ = $1; } -| FUNC_MACRO { $$ = $1; } -| OBJ_MACRO { $$ = $1; } -| '(' { $$ = xtalloc_strdup (parser, "("); } -| ')' { $$ = xtalloc_strdup (parser, ")"); } -| ',' { $$ = xtalloc_strdup (parser, ","); } ; parameter_list: /* empty */ { $$ = _string_list_create (parser); } -| identifier_perhaps_macro { +| IDENTIFIER { $$ = _string_list_create (parser); _string_list_append_item ($$, $1); talloc_free ($1); } -| parameter_list ',' identifier_perhaps_macro { +| parameter_list ',' IDENTIFIER { _string_list_append_item ($1, $3); talloc_free ($3); $$ = $1; } ; -identifier_perhaps_macro: - IDENTIFIER { $$ = $1; } -| FUNC_MACRO { $$ = $1; } -| OBJ_MACRO { $$ = $1; } -; - word: IDENTIFIER { $$ = $1; } | TOKEN { $$ = $1; } @@ -336,45 +267,6 @@ _string_list_append_item (string_list_t *list, const char *str) list->tail = node; } -void -_string_list_push (string_list_t *list, const char *str) -{ - string_node_t *node; - - node = xtalloc (list, string_node_t); - node->str = xtalloc_strdup (node, str); - - node->next = list->head; - - if (list->tail == NULL) { - list->tail = node; - } - - list->head = node; -} - -void -_string_list_pop (string_list_t *list) -{ - string_node_t *node; - - node = list->head; - - if (node == NULL) { - fprintf (stderr, "Internal error: _string_list_pop called on an empty list.\n"); - exit (1); - } - - list->head = node->next; - - if (list->tail == node) { - assert (node->next == NULL); - list->tail = NULL; - } - - talloc_free (node); -} - int _string_list_contains (string_list_t *list, const char *member, int *index) { @@ -512,6 +404,11 @@ glcpp_parser_create (void) yylex_init_extra (parser, &parser->scanner); parser->defines = hash_table_ctor (32, hash_table_string_hash, hash_table_string_compare); + parser->expansions = NULL; + + parser->lex_stack = xtalloc (parser, glcpp_lex_stack_t); + parser->lex_stack->parser = parser; + parser->lex_stack->head = NULL; return parser; } @@ -530,26 +427,46 @@ glcpp_parser_destroy (glcpp_parser_t *parser) talloc_free (parser); } -macro_type_t -glcpp_parser_macro_type (glcpp_parser_t *parser, const char *identifier) +token_class_t +glcpp_parser_classify_token (glcpp_parser_t *parser, + const char *identifier, + int *parameter_index) { macro_t *macro; + /* First we check if we are currently expanding a + * function-like macro, and if so, whether the parameter list + * contains a parameter matching this token name. */ + if (parser->expansions && + parser->expansions->macro && + parser->expansions->macro->parameters) + { + string_list_t *list; + + list = parser->expansions->macro->parameters; + + if (_string_list_contains (list, identifier, parameter_index)) + return TOKEN_CLASS_ARGUMENT; + } + + /* If not a function-like macro parameter, we next check if + * this token is a macro itself. */ + macro = hash_table_find (parser->defines, identifier); if (macro == NULL) - return MACRO_TYPE_UNDEFINED; + return TOKEN_CLASS_IDENTIFIER; if (macro->is_function) - return MACRO_TYPE_FUNCTION; + return TOKEN_CLASS_FUNC_MACRO; else - return MACRO_TYPE_OBJECT; + return TOKEN_CLASS_OBJ_MACRO; } void _define_object_macro (glcpp_parser_t *parser, const char *identifier, - string_list_t *replacements) + const char *replacement) { macro_t *macro; @@ -557,7 +474,8 @@ _define_object_macro (glcpp_parser_t *parser, macro->is_function = 0; macro->parameters = NULL; - macro->replacements = talloc_steal (macro, replacements); + macro->identifier = talloc_strdup (macro, identifier); + macro->replacement = talloc_steal (macro, replacement); hash_table_insert (parser->defines, macro, identifier); } @@ -566,7 +484,7 @@ void _define_function_macro (glcpp_parser_t *parser, const char *identifier, string_list_t *parameters, - string_list_t *replacements) + const char *replacement) { macro_t *macro; @@ -574,101 +492,126 @@ _define_function_macro (glcpp_parser_t *parser, macro->is_function = 1; macro->parameters = talloc_steal (macro, parameters); - macro->replacements = talloc_steal (macro, replacements); + macro->identifier = talloc_strdup (macro, identifier); + macro->replacement = talloc_steal (macro, replacement); hash_table_insert (parser->defines, macro, identifier); } -static string_list_t * -_expand_macro_recursive (glcpp_parser_t *parser, - const char *token, - string_list_t *active, - string_list_t *parameters, - argument_list_t *arguments); - -static string_list_t * -_expand_string_list_recursive (glcpp_parser_t *parser, - string_list_t *list, - string_list_t *active, - string_list_t *parameters, - argument_list_t *arguments) +static void +_glcpp_parser_push_expansion_internal (glcpp_parser_t *parser, + macro_t *macro, + argument_list_t *arguments, + const char * replacement) { - string_list_t *result; - string_list_t *child; - const char *token; + expansion_node_t *node; + + node = xtalloc (parser, expansion_node_t); + + node->macro = macro; + node->arguments = arguments; + + node->next = parser->expansions; + parser->expansions = node; + + glcpp_lex_stack_push (parser->lex_stack, replacement); +} + +void +glcpp_parser_push_expansion_macro (glcpp_parser_t *parser, + macro_t *macro, + argument_list_t *arguments) +{ + _glcpp_parser_push_expansion_internal (parser, macro, arguments, + macro->replacement); +} + +void +glcpp_parser_push_expansion_argument (glcpp_parser_t *parser, + int argument_index) +{ + argument_list_t *arguments; + string_list_t *argument; string_node_t *node; - int index; + char *argument_str, *s; + int length; - result = _string_list_create (parser); + arguments = parser->expansions->arguments; - for (node = list->head ; node ; node = node->next) { - token = node->str; + argument = _argument_list_member_at (arguments, argument_index); - /* Don't expand this macro if it's on the active - * stack, (meaning we're already in the process of - * expanding it). */ - if (_string_list_contains (active, token, NULL)) { - _string_list_append_item (result, token); - continue; - } + length = 0; + for (node = argument->head; node; node = node->next) + length += strlen (node->str) + 1; - if (_string_list_contains (parameters, token, &index)) { - string_list_t *argument; + argument_str = xtalloc_size (parser, length); - argument = _argument_list_member_at (arguments, index); - child = _expand_string_list_recursive (parser, argument, - active, NULL, NULL); - _string_list_append_list (result, child); - } else { - child = _expand_macro_recursive (parser, token, - active, parameters, - arguments); - _string_list_append_list (result, child); + *argument_str = '\0'; + s = argument_str; + for (node = argument->head; node; node = node->next) { + strcpy (s, node->str); + s += strlen (node->str); + if (node->next) { + *s = ' '; + s++; + *s = '\0'; } } - return result; + _glcpp_parser_push_expansion_internal (parser, NULL, NULL, + argument_str); } - -static string_list_t * -_expand_macro_recursive (glcpp_parser_t *parser, - const char *token, - string_list_t *active, - string_list_t *parameters, - argument_list_t *arguments) +/* The lexer calls this when it exhausts a string. */ +void +glcpp_parser_pop_expansion (glcpp_parser_t *parser) { - macro_t *macro; - string_list_t *replacements; - string_list_t *result; + expansion_node_t *node; - if (active == NULL) - active = _string_list_create (NULL); + node = parser->expansions; - _string_list_push (active, token); - - macro = hash_table_find (parser->defines, token); - if (macro == NULL) { - string_list_t *result; - - result = _string_list_create (parser); - _string_list_append_item (result, token); - return result; + if (node == NULL) { + fprintf (stderr, "Internal error: _expansion_list_pop called on an empty list.\n"); + exit (1); } - replacements = macro->replacements; + parser->expansions = node->next; - result = _expand_string_list_recursive (parser, replacements, - active, parameters, arguments); - - _string_list_pop (active); - if (_string_list_length (active) == 0) - talloc_free (active); - - return result; + talloc_free (node); } -string_list_t * +int +glcpp_parser_is_expanding (glcpp_parser_t *parser, const char *member) +{ + expansion_node_t *node; + + for (node = parser->expansions; node; node = node->next) { + if (node->macro && + strcmp (node->macro->identifier, member) == 0) + { + return 1; + } + } + + return 0; +} + +static void +_expand_macro (glcpp_parser_t *parser, + const char *token, + macro_t *macro, + argument_list_t *arguments) +{ + /* Don't recurse if we're already actively expanding this token. */ + if (glcpp_parser_is_expanding (parser, token)) { + printf ("%s", token); + return; + } + + glcpp_parser_push_expansion_macro (parser, macro, arguments); +} + +void _expand_object_macro (glcpp_parser_t *parser, const char *identifier) { macro_t *macro; @@ -676,11 +619,10 @@ _expand_object_macro (glcpp_parser_t *parser, const char *identifier) macro = hash_table_find (parser->defines, identifier); assert (! macro->is_function); - return _expand_macro_recursive (parser, identifier, NULL, - NULL, NULL); + _expand_macro (parser, identifier, macro, NULL); } -string_list_t * +void _expand_function_macro (glcpp_parser_t *parser, const char *identifier, argument_list_t *arguments) @@ -698,9 +640,8 @@ _expand_function_macro (glcpp_parser_t *parser, identifier, _argument_list_length (arguments), _string_list_length (macro->parameters)); - return NULL; + return; } - return _expand_macro_recursive (parser, identifier, NULL, - macro->parameters, arguments); + _expand_macro (parser, identifier, macro, arguments); } diff --git a/glcpp.h b/glcpp.h index 7966a2a3d21..81f7d14c5ba 100644 --- a/glcpp.h +++ b/glcpp.h @@ -24,11 +24,13 @@ #ifndef GLCPP_H #define GLCPP_H +#include + #include "hash_table.h" #define yyscan_t void* -/* Some data types used for parser value. */ +/* Some data types used for parser values. */ typedef struct string_node { const char *str; @@ -52,6 +54,56 @@ typedef struct argument_list { typedef struct glcpp_parser glcpp_parser_t; +/* Support for temporarily lexing/parsing tokens from a string. */ + +typedef struct glcpp_lex_node { + void *buffer; + struct glcpp_lex_node *next; +} glcpp_lex_node_t; + +typedef struct { + glcpp_parser_t *parser; + glcpp_lex_node_t *head; +} glcpp_lex_stack_t; + +void +glcpp_lex_stack_push (glcpp_lex_stack_t *stack, const char *string); + +int +glcpp_lex_stack_pop (glcpp_lex_stack_t *stack); + +typedef enum { + TOKEN_CLASS_ARGUMENT, + TOKEN_CLASS_IDENTIFIER, + TOKEN_CLASS_FUNC_MACRO, + TOKEN_CLASS_OBJ_MACRO +} token_class_t; + +token_class_t +glcpp_parser_classify_token (glcpp_parser_t *parser, + const char *identifier, + int *parameter_index); + +typedef struct { + int is_function; + string_list_t *parameters; + const char *identifier; + const char *replacement; +} macro_t; + +typedef struct expansion_node { + macro_t *macro; + argument_list_t *arguments; + struct expansion_node *next; +} expansion_node_t; + +struct glcpp_parser { + yyscan_t scanner; + struct hash_table *defines; + expansion_node_t *expansions; + glcpp_lex_stack_t *lex_stack; +}; + glcpp_parser_t * glcpp_parser_create (void); @@ -61,15 +113,17 @@ glcpp_parser_parse (glcpp_parser_t *parser); void glcpp_parser_destroy (glcpp_parser_t *parser); -typedef enum { - MACRO_TYPE_UNDEFINED, - MACRO_TYPE_OBJECT, - MACRO_TYPE_FUNCTION -} macro_type_t; +void +glcpp_parser_push_expansion_macro (glcpp_parser_t *parser, + macro_t *macro, + argument_list_t *arguments); -macro_type_t -glcpp_parser_macro_type (glcpp_parser_t *parser, - const char *identifier); +void +glcpp_parser_push_expansion_argument (glcpp_parser_t *parser, + int argument_index); + +void +glcpp_parser_pop_expansion (glcpp_parser_t *parser); /* Generated by glcpp-lex.l to glcpp-lex.c */ @@ -91,10 +145,15 @@ yyparse (glcpp_parser_t *parser); #define xtalloc(ctx, type) (type *)xtalloc_named_const(ctx, sizeof(type), #type) +#define xtalloc_size(ctx, size) xtalloc_named_const(ctx, size, __location__) + void * xtalloc_named_const (const void *context, size_t size, const char *name); char * xtalloc_strdup (const void *t, const char *p); +char * +xtalloc_strndup (const void *t, const char *p, size_t n); + #endif diff --git a/xtalloc.c b/xtalloc.c index 849e12d3491..d9893ae8893 100644 --- a/xtalloc.c +++ b/xtalloc.c @@ -50,3 +50,17 @@ xtalloc_strdup (const void *t, const char *p) return ret; } + +char * +xtalloc_strndup (const void *t, const char *p, size_t n) +{ + char *ret; + + ret = talloc_strndup (t, p, n); + if (ret == NULL) { + fprintf (stderr, "Out of memory.\n"); + exit (1); + } + + return ret; +} From be0e2e9b2ada51be66afb6b44330acb44e0261f2 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 19 May 2010 07:29:22 -0700 Subject: [PATCH 055/148] Fix bug (and add tests) for a function-like macro defined as itself. This case worked previously, but broke in the recent rewrite of function- like macro expansion. The recursion was still terminated correctly, but any parenthesized expression after the macro name was still being swallowed even though the identifier was not being expanded as a macro. The fix is to notice earlier that the identifier is an already-expanding macro. We let the lexer know this through the classify_token function so that an already-expanding macro is lexed as an identifier, not a FUNC_MACRO. --- glcpp-parse.y | 59 ++++++++++++---------------- tests/032-define-func-self-recurse.c | 2 + tests/033-define-func-self-compose.c | 2 + 3 files changed, 30 insertions(+), 33 deletions(-) create mode 100644 tests/032-define-func-self-recurse.c create mode 100644 tests/033-define-func-self-compose.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 9f1075aa50a..8dc07483c18 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -427,6 +427,22 @@ glcpp_parser_destroy (glcpp_parser_t *parser) talloc_free (parser); } +static int +glcpp_parser_is_expanding (glcpp_parser_t *parser, const char *member) +{ + expansion_node_t *node; + + for (node = parser->expansions; node; node = node->next) { + if (node->macro && + strcmp (node->macro->identifier, member) == 0) + { + return 1; + } + } + + return 0; +} + token_class_t glcpp_parser_classify_token (glcpp_parser_t *parser, const char *identifier, @@ -457,6 +473,12 @@ glcpp_parser_classify_token (glcpp_parser_t *parser, if (macro == NULL) return TOKEN_CLASS_IDENTIFIER; + /* Don't consider this a macro if we are already actively + * expanding this macro. */ + if (glcpp_parser_is_expanding (parser, identifier)) + return TOKEN_CLASS_IDENTIFIER; + + /* Definitely a macro. Just need to check if it's function-like. */ if (macro->is_function) return TOKEN_CLASS_FUNC_MACRO; else @@ -580,37 +602,6 @@ glcpp_parser_pop_expansion (glcpp_parser_t *parser) talloc_free (node); } -int -glcpp_parser_is_expanding (glcpp_parser_t *parser, const char *member) -{ - expansion_node_t *node; - - for (node = parser->expansions; node; node = node->next) { - if (node->macro && - strcmp (node->macro->identifier, member) == 0) - { - return 1; - } - } - - return 0; -} - -static void -_expand_macro (glcpp_parser_t *parser, - const char *token, - macro_t *macro, - argument_list_t *arguments) -{ - /* Don't recurse if we're already actively expanding this token. */ - if (glcpp_parser_is_expanding (parser, token)) { - printf ("%s", token); - return; - } - - glcpp_parser_push_expansion_macro (parser, macro, arguments); -} - void _expand_object_macro (glcpp_parser_t *parser, const char *identifier) { @@ -618,8 +609,9 @@ _expand_object_macro (glcpp_parser_t *parser, const char *identifier) macro = hash_table_find (parser->defines, identifier); assert (! macro->is_function); + assert (! glcpp_parser_is_expanding (parser, identifier)); - _expand_macro (parser, identifier, macro, NULL); + glcpp_parser_push_expansion_macro (parser, macro, NULL); } void @@ -631,6 +623,7 @@ _expand_function_macro (glcpp_parser_t *parser, macro = hash_table_find (parser->defines, identifier); assert (macro->is_function); + assert (! glcpp_parser_is_expanding (parser, identifier)); if (_argument_list_length (arguments) != _string_list_length (macro->parameters)) @@ -643,5 +636,5 @@ _expand_function_macro (glcpp_parser_t *parser, return; } - _expand_macro (parser, identifier, macro, arguments); + glcpp_parser_push_expansion_macro (parser, macro, arguments); } diff --git a/tests/032-define-func-self-recurse.c b/tests/032-define-func-self-recurse.c new file mode 100644 index 00000000000..60d8526c0aa --- /dev/null +++ b/tests/032-define-func-self-recurse.c @@ -0,0 +1,2 @@ +#define foo(a) foo(2 * (a)) +foo(3) diff --git a/tests/033-define-func-self-compose.c b/tests/033-define-func-self-compose.c new file mode 100644 index 00000000000..8abaaf6be95 --- /dev/null +++ b/tests/033-define-func-self-compose.c @@ -0,0 +1,2 @@ +#define foo(a) foo(2 * (a)) +foo(foo(3)) From 69f390d6096c597dbe63f20fd02b2312da211de8 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 19 May 2010 07:42:42 -0700 Subject: [PATCH 056/148] Fix bug (and test) for an invocation using macro name as a non-macro argument This adds a second shift/reduce conflict to our grammar. It's basically the same conflict we had previously, (deciding to shift a '(' after a FUNC_MACRO) but this time in the "argument" context rather than the "content" context. It would be nice to not have these, but I think they are unavoidable (withotu a lot of pain at least) given the preprocessor specification. --- glcpp-parse.y | 10 +++++++++- tests/034-define-func-self-compose-non-func.c | 2 ++ 2 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 tests/034-define-func-self-compose-non-func.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 8dc07483c18..ea27184c47c 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -103,8 +103,12 @@ _argument_list_member_at (argument_list_t *list, int index); * 1. '(' after FUNC_MACRO name which is correctly resolved to shift * to form macro invocation rather than reducing directly to * content. + * + * 2. Similarly, '(' after FUNC_MACRO which is correctly resolved to + * shift to form macro invocation rather than reducing directly to + * argument. */ -%expect 1 +%expect 2 %% @@ -168,6 +172,10 @@ argument: | macro { $$ = _string_list_create (parser); } +| FUNC_MACRO { + $$ = _string_list_create (parser); + _string_list_append_item ($$, $1); + } | argument word { _string_list_append_item ($1, $2); talloc_free ($2); diff --git a/tests/034-define-func-self-compose-non-func.c b/tests/034-define-func-self-compose-non-func.c new file mode 100644 index 00000000000..209a5f7e07c --- /dev/null +++ b/tests/034-define-func-self-compose-non-func.c @@ -0,0 +1,2 @@ +#define foo(bar) bar +foo(foo) From 59ca98990f814926d716a13b0201c94945133824 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 19 May 2010 07:49:47 -0700 Subject: [PATCH 057/148] Fix bug as in previous fix, but with multi-token argument. The previous fix added FUNC_MACRO to a production one higher in teh grammar than it should have. So it prevented a FUNC_MACRO from appearing as part of a mutli-token argument rather than just alone as an argument. Fix this (and add a test). --- glcpp-parse.y | 22 +++++++++---------- ...lf-compose-non-func-multi-token-argument.c | 2 ++ 2 files changed, 12 insertions(+), 12 deletions(-) create mode 100644 tests/035-define-func-self-compose-non-func-multi-token-argument.c diff --git a/glcpp-parse.y b/glcpp-parse.y index ea27184c47c..400f138d17e 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -94,7 +94,7 @@ _argument_list_member_at (argument_list_t *list, int index); %lex-param {void *scanner} %token DEFINE FUNC_MACRO IDENTIFIER NEWLINE OBJ_MACRO REPLACEMENT TOKEN UNDEF -%type FUNC_MACRO IDENTIFIER OBJ_MACRO REPLACEMENT TOKEN word +%type argument_word FUNC_MACRO IDENTIFIER OBJ_MACRO REPLACEMENT TOKEN %type argument macro parameter_list %type argument_list @@ -165,18 +165,14 @@ argument_list: ; argument: - word { + argument_word { $$ = _string_list_create (parser); _string_list_append_item ($$, $1); } | macro { $$ = _string_list_create (parser); } -| FUNC_MACRO { - $$ = _string_list_create (parser); - _string_list_append_item ($$, $1); - } -| argument word { +| argument argument_word { _string_list_append_item ($1, $2); talloc_free ($2); $$ = $1; @@ -189,6 +185,13 @@ argument: } ; +argument_word: + IDENTIFIER { $$ = $1; } +| TOKEN { $$ = $1; } +| FUNC_MACRO { $$ = $1; } +; + + directive: DEFINE IDENTIFIER REPLACEMENT { _define_object_macro (parser, $2, $3); @@ -225,11 +228,6 @@ parameter_list: } ; -word: - IDENTIFIER { $$ = $1; } -| TOKEN { $$ = $1; } -; - %% string_list_t * diff --git a/tests/035-define-func-self-compose-non-func-multi-token-argument.c b/tests/035-define-func-self-compose-non-func-multi-token-argument.c new file mode 100644 index 00000000000..9955219470c --- /dev/null +++ b/tests/035-define-func-self-compose-non-func-multi-token-argument.c @@ -0,0 +1,2 @@ +#define foo(bar) bar +foo(1 + foo) From 5d2114254592e03b6d554c5e2eea4ea442c3fa05 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 19 May 2010 07:57:03 -0700 Subject: [PATCH 058/148] Like previous fix, but for object-like macros (and add a test). The support for an object-like amcro within a macro-invocation argument was also implemented at one level too high in the grammar. Fortunately, this is a very simple fix. --- glcpp-parse.y | 4 +--- tests/036-define-func-non-macro-multi-token-argument.c | 3 +++ 2 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 tests/036-define-func-non-macro-multi-token-argument.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 400f138d17e..647532f209d 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -169,9 +169,6 @@ argument: $$ = _string_list_create (parser); _string_list_append_item ($$, $1); } -| macro { - $$ = _string_list_create (parser); - } | argument argument_word { _string_list_append_item ($1, $2); talloc_free ($2); @@ -189,6 +186,7 @@ argument_word: IDENTIFIER { $$ = $1; } | TOKEN { $$ = $1; } | FUNC_MACRO { $$ = $1; } +| macro { $$ = xtalloc_strdup (parser, ""); } ; diff --git a/tests/036-define-func-non-macro-multi-token-argument.c b/tests/036-define-func-non-macro-multi-token-argument.c new file mode 100644 index 00000000000..b21ff336738 --- /dev/null +++ b/tests/036-define-func-non-macro-multi-token-argument.c @@ -0,0 +1,3 @@ +#define bar success +#define foo(x) x +foo(more bar) From 8f38aff9b5dd42ef963532fe5fc618e8bafa218a Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 19 May 2010 10:01:29 -0700 Subject: [PATCH 059/148] Add a wrapper function around the lexer. We rename the generated lexer from yylex to glcpp_lex. Then we implement our own yylex function in glcpp-parse.y that calls glcpp_lex. This doesn't change the behavior at all yet, but gives us a place where we can do implement alternate lexing in the future. (We want this because instead of re-lexing from strings for macro expansion, we want to lex from pre-parsed token lists. We need this so that when we terminate recursion due to an already active macro expansion, we can ensure that that symbol never gets expanded again later.) --- Makefile | 2 +- glcpp-parse.y | 13 +++++++++++-- glcpp.h | 6 +++--- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 3fa863c49b9..88116128f85 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ glcpp: glcpp.o glcpp-lex.o glcpp-parse.o hash_table.o xtalloc.o bison --debug --defines=$*.h --output=$*.c $^ %.c: %.l - flex --outfile=$@ $< + flex --prefix=glcpp_ --outfile=$@ $< glcpp-lex.c: glcpp-parse.h diff --git a/glcpp-parse.y b/glcpp-parse.y index 647532f209d..6ef1cae0eca 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -82,6 +82,9 @@ _argument_list_length (argument_list_t *list); string_list_t * _argument_list_member_at (argument_list_t *list, int index); +static int +yylex (yyscan_t scanner); + %} %union { @@ -405,7 +408,7 @@ glcpp_parser_create (void) parser = xtalloc (NULL, glcpp_parser_t); - yylex_init_extra (parser, &parser->scanner); + glcpp_lex_init_extra (parser, &parser->scanner); parser->defines = hash_table_ctor (32, hash_table_string_hash, hash_table_string_compare); parser->expansions = NULL; @@ -426,7 +429,7 @@ glcpp_parser_parse (glcpp_parser_t *parser) void glcpp_parser_destroy (glcpp_parser_t *parser) { - yylex_destroy (parser->scanner); + glcpp_lex_destroy (parser->scanner); hash_table_dtor (parser->defines); talloc_free (parser); } @@ -642,3 +645,9 @@ _expand_function_macro (glcpp_parser_t *parser, glcpp_parser_push_expansion_macro (parser, macro, arguments); } + +static int +yylex (yyscan_t scanner) +{ + return glcpp_lex (scanner); +} diff --git a/glcpp.h b/glcpp.h index 81f7d14c5ba..6aabf6f1823 100644 --- a/glcpp.h +++ b/glcpp.h @@ -128,13 +128,13 @@ glcpp_parser_pop_expansion (glcpp_parser_t *parser); /* Generated by glcpp-lex.l to glcpp-lex.c */ int -yylex_init_extra (glcpp_parser_t *parser, yyscan_t* scanner); +glcpp_lex_init_extra (glcpp_parser_t *parser, yyscan_t* scanner); int -yylex (yyscan_t scanner); +glcpp_lex (yyscan_t scanner); int -yylex_destroy (yyscan_t scanner); +glcpp_lex_destroy (yyscan_t scanner); /* Generated by glcpp-parse.y to glcpp-parse.c */ From 0293b2e2dd81fabd3ecb71e036a99621801e1c94 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 19 May 2010 10:05:40 -0700 Subject: [PATCH 060/148] Rename yylex to glcpp_parser_lex and give it a glcpp_parser_t* argument. Much cleaner this way, (and now our custom lex function has access to all the parser state which it will need). --- glcpp-parse.y | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 6ef1cae0eca..04bac00e9f9 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -82,8 +82,10 @@ _argument_list_length (argument_list_t *list); string_list_t * _argument_list_member_at (argument_list_t *list, int index); +#define yylex glcpp_parser_lex + static int -yylex (yyscan_t scanner); +glcpp_parser_lex (glcpp_parser_t *parser); %} @@ -94,7 +96,7 @@ yylex (yyscan_t scanner); } %parse-param {glcpp_parser_t *parser} -%lex-param {void *scanner} +%lex-param {glcpp_parser_t *parser} %token DEFINE FUNC_MACRO IDENTIFIER NEWLINE OBJ_MACRO REPLACEMENT TOKEN UNDEF %type argument_word FUNC_MACRO IDENTIFIER OBJ_MACRO REPLACEMENT TOKEN @@ -647,7 +649,7 @@ _expand_function_macro (glcpp_parser_t *parser, } static int -yylex (yyscan_t scanner) +glcpp_parser_lex (glcpp_parser_t *parser) { - return glcpp_lex (scanner); + return glcpp_lex (parser->scanner); } From 66df1c262a0c816b28b21457fc499fadfcc0dbee Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 19 May 2010 10:06:56 -0700 Subject: [PATCH 061/148] Remove unneeded YYLEX_PARAM define. I'm not sure where this came from, but it's clearly not needed. --- glcpp-parse.y | 2 -- 1 file changed, 2 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 04bac00e9f9..bca22cec862 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -28,8 +28,6 @@ #include "glcpp.h" -#define YYLEX_PARAM parser->scanner - void yyerror (void *scanner, const char *error); From 71c59ec66bc258be6a641b26f793060f6d9522c8 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 19 May 2010 10:07:31 -0700 Subject: [PATCH 062/148] Remove unused NEWLINE token. We fixed the lexer a while back to never return a NEWLINE token, but negelcted to clean up this declaration. --- glcpp-parse.y | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index bca22cec862..bb57b300982 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -96,7 +96,7 @@ glcpp_parser_lex (glcpp_parser_t *parser); %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token DEFINE FUNC_MACRO IDENTIFIER NEWLINE OBJ_MACRO REPLACEMENT TOKEN UNDEF +%token DEFINE FUNC_MACRO IDENTIFIER OBJ_MACRO REPLACEMENT TOKEN UNDEF %type argument_word FUNC_MACRO IDENTIFIER OBJ_MACRO REPLACEMENT TOKEN %type argument macro parameter_list %type argument_list From aaa9acbf10b7a8e7dac061885ef95823ad27f80e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 19 May 2010 13:28:24 -0700 Subject: [PATCH 063/148] Perform "re lexing" on string list values rathern than on text. Previously, we would pass original strings back to the original lexer whenever we needed to re-lex something, (such as an expanded macro or a macro argument). Now, we instead parse the macro or argument originally to a string list, and then re-lex by simply returning each string from this list in turn. We do this in the recently added glcpp_parser_lex function that sits on top of the lower-level glcpp_lex that only deals with text. This doesn't change any behavior (at least according to the existing test suite which all still passes) but it brings us much closer to being able to "finalize" an unexpanded macro as required by the specification. --- glcpp-lex.l | 71 ++++++------------------ glcpp-parse.y | 148 ++++++++++++++++++++++++++++++++++---------------- glcpp.h | 38 ++----------- 3 files changed, 124 insertions(+), 133 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 52be1b1ea43..aec967964b0 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -84,12 +84,12 @@ TOKEN [^[:space:](),]+ \n { BEGIN INITIAL; - yylval.str = xtalloc_strdup (yyextra, ""); - return REPLACEMENT; + return NEWLINE; } {HSPACE}+ { BEGIN ST_DEFINE_VALUE; + return SPACE; } "(" { @@ -113,10 +113,21 @@ TOKEN [^[:space:](),]+ {HSPACE}+ -.*\n { +{TOKEN} { + yylval.str = xtalloc_strdup (yyextra, yytext); + return TOKEN; +} + +[(),] { + yylval.str = xtalloc_strdup (yyextra, yytext); + return TOKEN; +} + +{HSPACE}+ + +\n { BEGIN INITIAL; - yylval.str = xtalloc_strndup (yyextra, yytext, strlen (yytext) - 1); - return REPLACEMENT; + return NEWLINE; } {IDENTIFIER} { @@ -161,54 +172,4 @@ TOKEN [^[:space:](),]+ {HSPACE}+ -<> { - int done; - - done = glcpp_lex_stack_pop (yyextra->lex_stack); - - if (done) - yyterminate (); - - glcpp_parser_pop_expansion (yyextra); -} - %% - -void -glcpp_lex_stack_push (glcpp_lex_stack_t *stack, const char *string) -{ - struct yyguts_t *yyg = (struct yyguts_t*) stack->parser->scanner; - glcpp_lex_node_t *node; - - /* Save the current buffer on the top of the stack. */ - node = xtalloc (stack, glcpp_lex_node_t); - node->buffer = YY_CURRENT_BUFFER; - - node->next = stack->head; - stack->head = node; - - /* Then switch to a new scan buffer for string. */ - yy_scan_string (string, stack->parser->scanner); -} - -int -glcpp_lex_stack_pop (glcpp_lex_stack_t *stack) -{ - struct yyguts_t *yyg = (struct yyguts_t*) stack->parser->scanner; - glcpp_lex_node_t *node; - - node = stack->head; - - if (node == NULL) - return 1; - - stack->head = node->next; - - yy_delete_buffer (YY_CURRENT_BUFFER, stack->parser->scanner); - yy_switch_to_buffer ((YY_BUFFER_STATE) node->buffer, - stack->parser->scanner); - - talloc_free (node); - - return 0; -} diff --git a/glcpp-parse.y b/glcpp-parse.y index bb57b300982..2383c93117f 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -34,13 +34,13 @@ yyerror (void *scanner, const char *error); void _define_object_macro (glcpp_parser_t *parser, const char *macro, - const char *replacement); + string_list_t *replacements); void _define_function_macro (glcpp_parser_t *parser, const char *macro, string_list_t *parameters, - const char *replacement); + string_list_t *replacements); void _expand_object_macro (glcpp_parser_t *parser, const char *identifier); @@ -80,6 +80,14 @@ _argument_list_length (argument_list_t *list); string_list_t * _argument_list_member_at (argument_list_t *list, int index); +static void +glcpp_parser_push_expansion_macro (glcpp_parser_t *parser, + macro_t *macro, + argument_list_t *arguments); + +static void +glcpp_parser_pop_expansion (glcpp_parser_t *parser); + #define yylex glcpp_parser_lex static int @@ -96,9 +104,9 @@ glcpp_parser_lex (glcpp_parser_t *parser); %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token DEFINE FUNC_MACRO IDENTIFIER OBJ_MACRO REPLACEMENT TOKEN UNDEF -%type argument_word FUNC_MACRO IDENTIFIER OBJ_MACRO REPLACEMENT TOKEN -%type argument macro parameter_list +%token DEFINE FUNC_MACRO IDENTIFIER OBJ_MACRO NEWLINE SPACE TOKEN UNDEF +%type argument_word FUNC_MACRO IDENTIFIER OBJ_MACRO TOKEN +%type argument macro parameter_list replacement_list pp_tokens %type argument_list /* Hard to remove shift/reduce conflicts documented as follows: @@ -194,10 +202,14 @@ argument_word: directive: - DEFINE IDENTIFIER REPLACEMENT { - _define_object_macro (parser, $2, $3); + DEFINE IDENTIFIER NEWLINE { + string_list_t *list = _string_list_create (parser); + _define_object_macro (parser, $2, list); } -| DEFINE IDENTIFIER '(' parameter_list ')' REPLACEMENT { +| DEFINE IDENTIFIER SPACE replacement_list NEWLINE { + _define_object_macro (parser, $2, $4); + } +| DEFINE IDENTIFIER '(' parameter_list ')' replacement_list NEWLINE { _define_function_macro (parser, $2, $4, $6); } | UNDEF IDENTIFIER { @@ -229,6 +241,27 @@ parameter_list: } ; +replacement_list: + /* empty */ { + $$ = _string_list_create (parser); + } +| pp_tokens { + $$ = $1; + } +; + + +pp_tokens: + TOKEN { + $$ = _string_list_create (parser); + _string_list_append_item ($$, $1); + } +| pp_tokens TOKEN { + _string_list_append_item ($1, $2); + $$ = $1; + } +; + %% string_list_t * @@ -413,10 +446,6 @@ glcpp_parser_create (void) hash_table_string_compare); parser->expansions = NULL; - parser->lex_stack = xtalloc (parser, glcpp_lex_stack_t); - parser->lex_stack->parser = parser; - parser->lex_stack->head = NULL; - return parser; } @@ -495,7 +524,7 @@ glcpp_parser_classify_token (glcpp_parser_t *parser, void _define_object_macro (glcpp_parser_t *parser, const char *identifier, - const char *replacement) + string_list_t *replacements) { macro_t *macro; @@ -504,7 +533,7 @@ _define_object_macro (glcpp_parser_t *parser, macro->is_function = 0; macro->parameters = NULL; macro->identifier = talloc_strdup (macro, identifier); - macro->replacement = talloc_steal (macro, replacement); + macro->replacements = talloc_steal (macro, replacements); hash_table_insert (parser->defines, macro, identifier); } @@ -513,7 +542,7 @@ void _define_function_macro (glcpp_parser_t *parser, const char *identifier, string_list_t *parameters, - const char *replacement) + string_list_t *replacements) { macro_t *macro; @@ -522,7 +551,7 @@ _define_function_macro (glcpp_parser_t *parser, macro->is_function = 1; macro->parameters = talloc_steal (macro, parameters); macro->identifier = talloc_strdup (macro, identifier); - macro->replacement = talloc_steal (macro, replacement); + macro->replacements = talloc_steal (macro, replacements); hash_table_insert (parser->defines, macro, identifier); } @@ -531,7 +560,7 @@ static void _glcpp_parser_push_expansion_internal (glcpp_parser_t *parser, macro_t *macro, argument_list_t *arguments, - const char * replacement) + string_node_t *replacements) { expansion_node_t *node; @@ -539,20 +568,19 @@ _glcpp_parser_push_expansion_internal (glcpp_parser_t *parser, node->macro = macro; node->arguments = arguments; + node->replacements = replacements; node->next = parser->expansions; parser->expansions = node; - - glcpp_lex_stack_push (parser->lex_stack, replacement); } -void +static void glcpp_parser_push_expansion_macro (glcpp_parser_t *parser, macro_t *macro, argument_list_t *arguments) { _glcpp_parser_push_expansion_internal (parser, macro, arguments, - macro->replacement); + macro->replacements->head); } void @@ -561,38 +589,16 @@ glcpp_parser_push_expansion_argument (glcpp_parser_t *parser, { argument_list_t *arguments; string_list_t *argument; - string_node_t *node; - char *argument_str, *s; - int length; arguments = parser->expansions->arguments; argument = _argument_list_member_at (arguments, argument_index); - length = 0; - for (node = argument->head; node; node = node->next) - length += strlen (node->str) + 1; - - argument_str = xtalloc_size (parser, length); - - *argument_str = '\0'; - s = argument_str; - for (node = argument->head; node; node = node->next) { - strcpy (s, node->str); - s += strlen (node->str); - if (node->next) { - *s = ' '; - s++; - *s = '\0'; - } - } - _glcpp_parser_push_expansion_internal (parser, NULL, NULL, - argument_str); + argument->head); } -/* The lexer calls this when it exhausts a string. */ -void +static void glcpp_parser_pop_expansion (glcpp_parser_t *parser) { expansion_node_t *node; @@ -649,5 +655,55 @@ _expand_function_macro (glcpp_parser_t *parser, static int glcpp_parser_lex (glcpp_parser_t *parser) { - return glcpp_lex (parser->scanner); + expansion_node_t *expansion; + string_node_t *replacements; + int parameter_index; + + /* Who says C can't do efficient tail recursion? */ + RECURSE: + + expansion = parser->expansions; + + if (expansion == NULL) + return glcpp_lex (parser->scanner); + + replacements = expansion->replacements; + + /* Pop expansion when replacements is exhausted. */ + if (replacements == NULL) { + glcpp_parser_pop_expansion (parser); + goto RECURSE; + } + + expansion->replacements = replacements->next; + + if (strcmp (replacements->str, "(") == 0) + return '('; + else if (strcmp (replacements->str, ")") == 0) + return ')'; + else if (strcmp (replacements->str, ",") == 0) + return ','; + + yylval.str = xtalloc_strdup (parser, replacements->str); + + switch (glcpp_parser_classify_token (parser, yylval.str, + ¶meter_index)) + { + case TOKEN_CLASS_ARGUMENT: + talloc_free (yylval.str); + glcpp_parser_push_expansion_argument (parser, + parameter_index); + goto RECURSE; + break; + case TOKEN_CLASS_IDENTIFIER: + return IDENTIFIER; + break; + case TOKEN_CLASS_FUNC_MACRO: + return FUNC_MACRO; + break; + default: + case TOKEN_CLASS_OBJ_MACRO: + return OBJ_MACRO; + break; + } } diff --git a/glcpp.h b/glcpp.h index 6aabf6f1823..ef821a7637a 100644 --- a/glcpp.h +++ b/glcpp.h @@ -54,24 +54,6 @@ typedef struct argument_list { typedef struct glcpp_parser glcpp_parser_t; -/* Support for temporarily lexing/parsing tokens from a string. */ - -typedef struct glcpp_lex_node { - void *buffer; - struct glcpp_lex_node *next; -} glcpp_lex_node_t; - -typedef struct { - glcpp_parser_t *parser; - glcpp_lex_node_t *head; -} glcpp_lex_stack_t; - -void -glcpp_lex_stack_push (glcpp_lex_stack_t *stack, const char *string); - -int -glcpp_lex_stack_pop (glcpp_lex_stack_t *stack); - typedef enum { TOKEN_CLASS_ARGUMENT, TOKEN_CLASS_IDENTIFIER, @@ -88,12 +70,13 @@ typedef struct { int is_function; string_list_t *parameters; const char *identifier; - const char *replacement; + string_list_t *replacements; } macro_t; typedef struct expansion_node { macro_t *macro; argument_list_t *arguments; + string_node_t *replacements; struct expansion_node *next; } expansion_node_t; @@ -101,9 +84,12 @@ struct glcpp_parser { yyscan_t scanner; struct hash_table *defines; expansion_node_t *expansions; - glcpp_lex_stack_t *lex_stack; }; +void +glcpp_parser_push_expansion_argument (glcpp_parser_t *parser, + int argument_index); + glcpp_parser_t * glcpp_parser_create (void); @@ -113,18 +99,6 @@ glcpp_parser_parse (glcpp_parser_t *parser); void glcpp_parser_destroy (glcpp_parser_t *parser); -void -glcpp_parser_push_expansion_macro (glcpp_parser_t *parser, - macro_t *macro, - argument_list_t *arguments); - -void -glcpp_parser_push_expansion_argument (glcpp_parser_t *parser, - int argument_index); - -void -glcpp_parser_pop_expansion (glcpp_parser_t *parser); - /* Generated by glcpp-lex.l to glcpp-lex.c */ int From 472524413d004680dbdb89602617f32da8f42f56 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 19 May 2010 13:54:37 -0700 Subject: [PATCH 064/148] Use new token_list_t rather than string_list_t for macro values. There's not yet any change in functionality here, (at least according to the test suite). But we now have the option of specifying a type for each string in the token list. This will allow us to finalize an unexpanded macro name so that it won't be subjected to excess expansion later. --- glcpp-parse.y | 111 +++++++++++++++++++++++++++++++++++++------------- glcpp.h | 17 ++++++-- 2 files changed, 97 insertions(+), 31 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 2383c93117f..c8d1919d9c5 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -34,13 +34,13 @@ yyerror (void *scanner, const char *error); void _define_object_macro (glcpp_parser_t *parser, const char *macro, - string_list_t *replacements); + token_list_t *replacements); void _define_function_macro (glcpp_parser_t *parser, const char *macro, string_list_t *parameters, - string_list_t *replacements); + token_list_t *replacements); void _expand_object_macro (glcpp_parser_t *parser, const char *identifier); @@ -72,14 +72,23 @@ argument_list_t * _argument_list_create (void *ctx); void -_argument_list_append (argument_list_t *list, string_list_t *argument); +_argument_list_append (argument_list_t *list, token_list_t *argument); int _argument_list_length (argument_list_t *list); -string_list_t * +token_list_t * _argument_list_member_at (argument_list_t *list, int index); +token_list_t * +_token_list_create (void *ctx); + +void +_token_list_append (token_list_t *list, int type, const char *value); + +void +_token_list_append_list (token_list_t *list, token_list_t *tail); + static void glcpp_parser_push_expansion_macro (glcpp_parser_t *parser, macro_t *macro, @@ -97,8 +106,9 @@ glcpp_parser_lex (glcpp_parser_t *parser); %union { char *str; - string_list_t *string_list; argument_list_t *argument_list; + string_list_t *string_list; + token_list_t *token_list; } %parse-param {glcpp_parser_t *parser} @@ -106,8 +116,9 @@ glcpp_parser_lex (glcpp_parser_t *parser); %token DEFINE FUNC_MACRO IDENTIFIER OBJ_MACRO NEWLINE SPACE TOKEN UNDEF %type argument_word FUNC_MACRO IDENTIFIER OBJ_MACRO TOKEN -%type argument macro parameter_list replacement_list pp_tokens %type argument_list +%type macro parameter_list +%type argument replacement_list pp_tokens /* Hard to remove shift/reduce conflicts documented as follows: * @@ -177,18 +188,18 @@ argument_list: argument: argument_word { - $$ = _string_list_create (parser); - _string_list_append_item ($$, $1); + $$ = _token_list_create (parser); + _token_list_append ($$, IDENTIFIER, $1); } | argument argument_word { - _string_list_append_item ($1, $2); + _token_list_append ($1, IDENTIFIER, $2); talloc_free ($2); $$ = $1; } | argument '(' argument ')' { - _string_list_append_item ($1, "("); - _string_list_append_list ($1, $3); - _string_list_append_item ($1, ")"); + _token_list_append ($1, '(', "("); + _token_list_append_list ($1, $3); + _token_list_append ($1, ')', ")"); $$ = $1; } ; @@ -203,7 +214,7 @@ argument_word: directive: DEFINE IDENTIFIER NEWLINE { - string_list_t *list = _string_list_create (parser); + token_list_t *list = _token_list_create (parser); _define_object_macro (parser, $2, list); } | DEFINE IDENTIFIER SPACE replacement_list NEWLINE { @@ -243,7 +254,7 @@ parameter_list: replacement_list: /* empty */ { - $$ = _string_list_create (parser); + $$ = _token_list_create (parser); } | pp_tokens { $$ = $1; @@ -253,11 +264,11 @@ replacement_list: pp_tokens: TOKEN { - $$ = _string_list_create (parser); - _string_list_append_item ($$, $1); + $$ = _token_list_create (parser); + _token_list_append ($$, TOKEN, $1); } | pp_tokens TOKEN { - _string_list_append_item ($1, $2); + _token_list_append ($1, TOKEN, $2); $$ = $1; } ; @@ -370,7 +381,7 @@ _argument_list_create (void *ctx) } void -_argument_list_append (argument_list_t *list, string_list_t *argument) +_argument_list_append (argument_list_t *list, token_list_t *argument) { argument_node_t *node; @@ -406,7 +417,7 @@ _argument_list_length (argument_list_t *list) return length; } -string_list_t * +token_list_t * _argument_list_member_at (argument_list_t *list, int index) { argument_node_t *node; @@ -427,6 +438,50 @@ _argument_list_member_at (argument_list_t *list, int index) return NULL; } + +token_list_t * +_token_list_create (void *ctx) +{ + token_list_t *list; + + list = xtalloc (ctx, token_list_t); + list->head = NULL; + list->tail = NULL; + + return list; +} + +void +_token_list_append (token_list_t *list, int type, const char *value) +{ + token_node_t *node; + + node = xtalloc (list, token_node_t); + node->type = type; + node->value = xtalloc_strdup (list, value); + + node->next = NULL; + + if (list->head == NULL) { + list->head = node; + } else { + list->tail->next = node; + } + + list->tail = node; +} + +void +_token_list_append_list (token_list_t *list, token_list_t *tail) +{ + if (list->head == NULL) { + list->head = tail->head; + } else { + list->tail->next = tail->head; + } + + list->tail = tail->tail; +} void yyerror (void *scanner, const char *error) @@ -524,7 +579,7 @@ glcpp_parser_classify_token (glcpp_parser_t *parser, void _define_object_macro (glcpp_parser_t *parser, const char *identifier, - string_list_t *replacements) + token_list_t *replacements) { macro_t *macro; @@ -542,7 +597,7 @@ void _define_function_macro (glcpp_parser_t *parser, const char *identifier, string_list_t *parameters, - string_list_t *replacements) + token_list_t *replacements) { macro_t *macro; @@ -560,7 +615,7 @@ static void _glcpp_parser_push_expansion_internal (glcpp_parser_t *parser, macro_t *macro, argument_list_t *arguments, - string_node_t *replacements) + token_node_t *replacements) { expansion_node_t *node; @@ -588,7 +643,7 @@ glcpp_parser_push_expansion_argument (glcpp_parser_t *parser, int argument_index) { argument_list_t *arguments; - string_list_t *argument; + token_list_t *argument; arguments = parser->expansions->arguments; @@ -656,7 +711,7 @@ static int glcpp_parser_lex (glcpp_parser_t *parser) { expansion_node_t *expansion; - string_node_t *replacements; + token_node_t *replacements; int parameter_index; /* Who says C can't do efficient tail recursion? */ @@ -677,14 +732,14 @@ glcpp_parser_lex (glcpp_parser_t *parser) expansion->replacements = replacements->next; - if (strcmp (replacements->str, "(") == 0) + if (strcmp (replacements->value, "(") == 0) return '('; - else if (strcmp (replacements->str, ")") == 0) + else if (strcmp (replacements->value, ")") == 0) return ')'; - else if (strcmp (replacements->str, ",") == 0) + else if (strcmp (replacements->value, ",") == 0) return ','; - yylval.str = xtalloc_strdup (parser, replacements->str); + yylval.str = xtalloc_strdup (parser, replacements->value); switch (glcpp_parser_classify_token (parser, yylval.str, ¶meter_index)) diff --git a/glcpp.h b/glcpp.h index ef821a7637a..c647e2a72e1 100644 --- a/glcpp.h +++ b/glcpp.h @@ -42,8 +42,19 @@ typedef struct string_list { string_node_t *tail; } string_list_t; +typedef struct token_node { + int type; + const char *value; + struct token_node *next; +} token_node_t; + +typedef struct token_list { + token_node_t *head; + token_node_t *tail; +} token_list_t; + typedef struct argument_node { - string_list_t *argument; + token_list_t *argument; struct argument_node *next; } argument_node_t; @@ -70,13 +81,13 @@ typedef struct { int is_function; string_list_t *parameters; const char *identifier; - string_list_t *replacements; + token_list_t *replacements; } macro_t; typedef struct expansion_node { macro_t *macro; argument_list_t *arguments; - string_node_t *replacements; + token_node_t *replacements; struct expansion_node *next; } expansion_node_t; From b569383bbdfa22ed591255f56fb93832633a95ae Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 20 May 2010 08:01:44 -0700 Subject: [PATCH 065/148] Avoid re-expanding a macro name that has once been rejected from expansion. The specification of the preprocessor in C99 says that when we see a macro name that we are already expanding that we refuse to expand it now, (which we've done for a while), but also that we refuse to ever expand it later if seen in other contexts at which it would be legitimate to expand. We add a test case for that here, and fix it to work. The fix takes advantage of a new token_t value for tokens and argument words along with the recently added IDENTIFIER_FINALIZED token type which instructs the parser to not even look for another expansion. --- glcpp-lex.l | 12 ++++++--- glcpp-parse.y | 38 ++++++++++++++++++--------- glcpp.h | 6 +++++ tests/037-finalize-unexpanded-macro.c | 3 +++ 4 files changed, 44 insertions(+), 15 deletions(-) create mode 100644 tests/037-finalize-unexpanded-macro.c diff --git a/glcpp-lex.l b/glcpp-lex.l index aec967964b0..8e3ab661e6f 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -114,12 +114,14 @@ TOKEN [^[:space:](),]+ {HSPACE}+ {TOKEN} { - yylval.str = xtalloc_strdup (yyextra, yytext); + yylval.token.type = TOKEN; + yylval.token.value = xtalloc_strdup (yyextra, yytext); return TOKEN; } [(),] { - yylval.str = xtalloc_strdup (yyextra, yytext); + yylval.token.type = TOKEN; + yylval.token.value = xtalloc_strdup (yyextra, yytext); return TOKEN; } @@ -147,6 +149,9 @@ TOKEN [^[:space:](),]+ case TOKEN_CLASS_IDENTIFIER: return IDENTIFIER; break; + case TOKEN_CLASS_IDENTIFIER_FINALIZED: + return IDENTIFIER_FINALIZED; + break; case TOKEN_CLASS_FUNC_MACRO: return FUNC_MACRO; break; @@ -162,7 +167,8 @@ TOKEN [^[:space:](),]+ } {TOKEN} { - yylval.str = xtalloc_strdup (yyextra, yytext); + yylval.token.type = TOKEN; + yylval.token.value = xtalloc_strdup (yyextra, yytext); return TOKEN; } diff --git a/glcpp-parse.y b/glcpp-parse.y index c8d1919d9c5..28e79ebf9f7 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -108,16 +108,18 @@ glcpp_parser_lex (glcpp_parser_t *parser); char *str; argument_list_t *argument_list; string_list_t *string_list; + token_t token; token_list_t *token_list; } %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token DEFINE FUNC_MACRO IDENTIFIER OBJ_MACRO NEWLINE SPACE TOKEN UNDEF -%type argument_word FUNC_MACRO IDENTIFIER OBJ_MACRO TOKEN +%token DEFINE FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO NEWLINE SPACE TOKEN UNDEF +%type FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO %type argument_list %type macro parameter_list +%type TOKEN argument_word %type argument replacement_list pp_tokens /* Hard to remove shift/reduce conflicts documented as follows: @@ -145,10 +147,14 @@ content: printf ("%s", $1); talloc_free ($1); } -| TOKEN { +| IDENTIFIER_FINALIZED { printf ("%s", $1); talloc_free ($1); } +| TOKEN { + printf ("%s", $1.value); + talloc_free ($1.value); + } | FUNC_MACRO { printf ("%s", $1); talloc_free ($1); @@ -189,11 +195,11 @@ argument_list: argument: argument_word { $$ = _token_list_create (parser); - _token_list_append ($$, IDENTIFIER, $1); + _token_list_append ($$, $1.type, $1.value); } | argument argument_word { - _token_list_append ($1, IDENTIFIER, $2); - talloc_free ($2); + _token_list_append ($1, $2.type, $2.value); + talloc_free ($2.value); $$ = $1; } | argument '(' argument ')' { @@ -205,10 +211,11 @@ argument: ; argument_word: - IDENTIFIER { $$ = $1; } + IDENTIFIER { $$.type = IDENTIFIER; $$.value = $1; } +| IDENTIFIER_FINALIZED { $$.type = IDENTIFIER_FINALIZED; $$.value = $1; } | TOKEN { $$ = $1; } -| FUNC_MACRO { $$ = $1; } -| macro { $$ = xtalloc_strdup (parser, ""); } +| FUNC_MACRO { $$.type = FUNC_MACRO; $$.value = $1; } +| macro { $$.type = TOKEN; $$.value = xtalloc_strdup (parser, ""); } ; @@ -265,10 +272,10 @@ replacement_list: pp_tokens: TOKEN { $$ = _token_list_create (parser); - _token_list_append ($$, TOKEN, $1); + _token_list_append ($$, $1.type, $1.value); } | pp_tokens TOKEN { - _token_list_append ($1, TOKEN, $2); + _token_list_append ($1, $2.type, $2.value); $$ = $1; } ; @@ -567,7 +574,7 @@ glcpp_parser_classify_token (glcpp_parser_t *parser, /* Don't consider this a macro if we are already actively * expanding this macro. */ if (glcpp_parser_is_expanding (parser, identifier)) - return TOKEN_CLASS_IDENTIFIER; + return TOKEN_CLASS_IDENTIFIER_FINALIZED; /* Definitely a macro. Just need to check if it's function-like. */ if (macro->is_function) @@ -741,6 +748,10 @@ glcpp_parser_lex (glcpp_parser_t *parser) yylval.str = xtalloc_strdup (parser, replacements->value); + /* Carefully refuse to expand any finalized identifier. */ + if (replacements->type == IDENTIFIER_FINALIZED) + return IDENTIFIER_FINALIZED; + switch (glcpp_parser_classify_token (parser, yylval.str, ¶meter_index)) { @@ -753,6 +764,9 @@ glcpp_parser_lex (glcpp_parser_t *parser) case TOKEN_CLASS_IDENTIFIER: return IDENTIFIER; break; + case TOKEN_CLASS_IDENTIFIER_FINALIZED: + return IDENTIFIER_FINALIZED; + break; case TOKEN_CLASS_FUNC_MACRO: return FUNC_MACRO; break; diff --git a/glcpp.h b/glcpp.h index c647e2a72e1..5432a318173 100644 --- a/glcpp.h +++ b/glcpp.h @@ -42,6 +42,11 @@ typedef struct string_list { string_node_t *tail; } string_list_t; +typedef struct token { + int type; + char *value; +} token_t; + typedef struct token_node { int type; const char *value; @@ -68,6 +73,7 @@ typedef struct glcpp_parser glcpp_parser_t; typedef enum { TOKEN_CLASS_ARGUMENT, TOKEN_CLASS_IDENTIFIER, + TOKEN_CLASS_IDENTIFIER_FINALIZED, TOKEN_CLASS_FUNC_MACRO, TOKEN_CLASS_OBJ_MACRO } token_class_t; diff --git a/tests/037-finalize-unexpanded-macro.c b/tests/037-finalize-unexpanded-macro.c new file mode 100644 index 00000000000..b3a2f37f1b9 --- /dev/null +++ b/tests/037-finalize-unexpanded-macro.c @@ -0,0 +1,3 @@ +#define expand(x) expand(x once) +#define foo(x) x +foo(expand(just)) From 9f3d2c4e3dff3eb4f5820a034426056bf66b3015 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 20 May 2010 08:42:02 -0700 Subject: [PATCH 066/148] Add support for commas within parenthesized groups in function arguments. The specification says that commas within a parenthesized group, (that's not a function-like macro invocation), are passed through literally and not considered argument separators in any outer macro invocation. Add support and a test for this case. This support makes a third occurrence of the same "FUNC_MACRO (" shift/reduce conflict appear, so expect that. This change does introduce a fairly large copy/paste block in the grammar which is unfortunate. Perhaps if I were more clever I'd find a way to share the common pieces between argument and argument_or_comma. --- glcpp-parse.y | 44 +++++++++++++++++++++++++++++--- tests/038-func-arg-with-commas.c | 2 ++ 2 files changed, 42 insertions(+), 4 deletions(-) create mode 100644 tests/038-func-arg-with-commas.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 28e79ebf9f7..c9edc5c3040 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -119,8 +119,8 @@ glcpp_parser_lex (glcpp_parser_t *parser); %type FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO %type argument_list %type macro parameter_list -%type TOKEN argument_word -%type argument replacement_list pp_tokens +%type TOKEN argument_word argument_word_or_comma +%type argument argument_or_comma replacement_list pp_tokens /* Hard to remove shift/reduce conflicts documented as follows: * @@ -131,8 +131,10 @@ glcpp_parser_lex (glcpp_parser_t *parser); * 2. Similarly, '(' after FUNC_MACRO which is correctly resolved to * shift to form macro invocation rather than reducing directly to * argument. + * + * 3. Similarly again now that we added argument_or_comma as well. */ -%expect 2 +%expect 3 %% @@ -202,7 +204,7 @@ argument: talloc_free ($2.value); $$ = $1; } -| argument '(' argument ')' { +| argument '(' argument_or_comma ')' { _token_list_append ($1, '(', "("); _token_list_append_list ($1, $3); _token_list_append ($1, ')', ")"); @@ -218,6 +220,40 @@ argument_word: | macro { $$.type = TOKEN; $$.value = xtalloc_strdup (parser, ""); } ; + /* XXX: The body of argument_or_comma is the same as the body + * of argument, but with "argument" and "argument_word" + * changed to "argument_or_comma" and + * "argument_word_or_comma". It would be nice to have less + * redundancy here, but I'm not sure how. + * + * It would also be nice to have a less ugly grammar to have + * to implement, but such is the C preprocessor. + */ +argument_or_comma: + argument_word_or_comma { + $$ = _token_list_create (parser); + _token_list_append ($$, $1.type, $1.value); + } +| argument_or_comma argument_word_or_comma { + _token_list_append ($1, $2.type, $2.value); + $$ = $1; + } +| argument_or_comma '(' argument_or_comma ')' { + _token_list_append ($1, '(', "("); + _token_list_append_list ($1, $3); + _token_list_append ($1, ')', ")"); + $$ = $1; + } +; + +argument_word_or_comma: + IDENTIFIER { $$.type = IDENTIFIER; $$.value = $1; } +| IDENTIFIER_FINALIZED { $$.type = IDENTIFIER_FINALIZED; $$.value = $1; } +| TOKEN { $$ = $1; } +| FUNC_MACRO { $$.type = FUNC_MACRO; $$.value = $1; } +| macro { $$.type = TOKEN; $$.value = xtalloc_strdup (parser, ""); } +| ',' { $$.type = ','; $$.value = xtalloc_strdup (parser, ","); } +; directive: DEFINE IDENTIFIER NEWLINE { diff --git a/tests/038-func-arg-with-commas.c b/tests/038-func-arg-with-commas.c new file mode 100644 index 00000000000..1407c7d6e3c --- /dev/null +++ b/tests/038-func-arg-with-commas.c @@ -0,0 +1,2 @@ +#define foo(x) success +foo(argument (with,embedded , commas) -- tricky) From 805ea6afe66f52476094256914b7319b29972a16 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 20 May 2010 12:06:33 -0700 Subject: [PATCH 067/148] Add test (and fix) for a function argument of a macro that expands with a comma. The fix here is quite simple (and actually only deletes code). When expanding a macro, we don't return a ',' as a unique token type, but simply let it fall through to the generic case. --- glcpp-parse.y | 2 -- tests/039-func-arg-obj-macro-with-comma.c | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 tests/039-func-arg-obj-macro-with-comma.c diff --git a/glcpp-parse.y b/glcpp-parse.y index c9edc5c3040..773db93e54a 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -779,8 +779,6 @@ glcpp_parser_lex (glcpp_parser_t *parser) return '('; else if (strcmp (replacements->value, ")") == 0) return ')'; - else if (strcmp (replacements->value, ",") == 0) - return ','; yylval.str = xtalloc_strdup (parser, replacements->value); diff --git a/tests/039-func-arg-obj-macro-with-comma.c b/tests/039-func-arg-obj-macro-with-comma.c new file mode 100644 index 00000000000..0f7fe632b56 --- /dev/null +++ b/tests/039-func-arg-obj-macro-with-comma.c @@ -0,0 +1,3 @@ +#define foo(a) (a) +#define bar two,words +foo(bar) From 660bda057a0f9c83625e798c0f719080d11e9431 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 20 May 2010 14:00:28 -0700 Subject: [PATCH 068/148] Stop ignoring whitespace while testing. Sometime back the output of glcpp started differing from the output of "gcc -E" in the amount of whitespace in emitted. At the time, I switched the test suite to use "diff -w" to ignore this. This was a mistake since it ignores whitespace entirely. (I meant to use "diff -b" which ignores only changes in the amount of whitespace.) So bugs have since been introduced that the test suite doesn't notice. For example, glcpp is producing "twotokens" where it should be producing "two tokens". Let's stop ignoring whitespace in the test suite, which currently introduces lots of failures---some real and some spurious. --- tests/glcpp-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/glcpp-test b/tests/glcpp-test index 673a4f45e96..25685eeabe5 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -5,5 +5,5 @@ for test in *.c; do ../glcpp < $test > $test.out gcc -E $test -o $test.gcc grep -v '^#' < $test.gcc > $test.expected - diff -B -w -u $test.expected $test.out + diff -u $test.expected $test.out done From 323421db6567f3402e0ff9dcf548269e6d7b5497 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 20 May 2010 14:05:37 -0700 Subject: [PATCH 069/148] Remove "unnecessary" whitespace from some tests. This whitespace was not part of anything being tested, and it introduces differences (that we don't actually care about) between the output of "gcc -E" and glcpp. Just eliminate this extra whitespace to reduce spurious test-case failures. --- tests/015-define-object-with-parens.c | 6 +++--- tests/016-define-func-1-arg.c | 2 +- tests/020-define-func-2-arg-multi.c | 2 +- tests/023-define-extra-whitespace.c | 4 ++-- tests/032-define-func-self-recurse.c | 2 +- tests/033-define-func-self-compose.c | 2 +- ...define-func-self-compose-non-func-multi-token-argument.c | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/015-define-object-with-parens.c b/tests/015-define-object-with-parens.c index 10bf7e31a34..558da9c617b 100644 --- a/tests/015-define-object-with-parens.c +++ b/tests/015-define-object-with-parens.c @@ -1,4 +1,4 @@ -#define foo ( ) 1 +#define foo ()1 foo() -#define bar () 2 -bar( ) +#define bar ()2 +bar() diff --git a/tests/016-define-func-1-arg.c b/tests/016-define-func-1-arg.c index dea38d1fedd..a2e2404c7c1 100644 --- a/tests/016-define-func-1-arg.c +++ b/tests/016-define-func-1-arg.c @@ -1,2 +1,2 @@ -#define foo(x) ((x) + 1) +#define foo(x) ((x)+1) foo(bar) diff --git a/tests/020-define-func-2-arg-multi.c b/tests/020-define-func-2-arg-multi.c index 253421139d4..3049ad15465 100644 --- a/tests/020-define-func-2-arg-multi.c +++ b/tests/020-define-func-2-arg-multi.c @@ -1,2 +1,2 @@ -#define foo(x,y) x, two fish, red fish, y +#define foo(x,y) x,two fish,red fish,y foo(one fish, blue fish) diff --git a/tests/023-define-extra-whitespace.c b/tests/023-define-extra-whitespace.c index 375355a17d9..7ebfed6516c 100644 --- a/tests/023-define-extra-whitespace.c +++ b/tests/023-define-extra-whitespace.c @@ -3,6 +3,6 @@ # define twoargs( x , y ) x y # define threeargs( a , b , c ) a b c noargs ( ) - onearg ( 2 ) - twoargs ( 3 , 4 ) +onearg ( 2 ) +twoargs ( 3 , 4 ) threeargs ( 5 , 6 , 7 ) diff --git a/tests/032-define-func-self-recurse.c b/tests/032-define-func-self-recurse.c index 60d8526c0aa..b3ac70f499c 100644 --- a/tests/032-define-func-self-recurse.c +++ b/tests/032-define-func-self-recurse.c @@ -1,2 +1,2 @@ -#define foo(a) foo(2 * (a)) +#define foo(a) foo(2*(a)) foo(3) diff --git a/tests/033-define-func-self-compose.c b/tests/033-define-func-self-compose.c index 8abaaf6be95..f65e48286cf 100644 --- a/tests/033-define-func-self-compose.c +++ b/tests/033-define-func-self-compose.c @@ -1,2 +1,2 @@ -#define foo(a) foo(2 * (a)) +#define foo(a) foo(2*(a)) foo(foo(3)) diff --git a/tests/035-define-func-self-compose-non-func-multi-token-argument.c b/tests/035-define-func-self-compose-non-func-multi-token-argument.c index 9955219470c..c307fbe830f 100644 --- a/tests/035-define-func-self-compose-non-func-multi-token-argument.c +++ b/tests/035-define-func-self-compose-non-func-multi-token-argument.c @@ -1,2 +1,2 @@ #define foo(bar) bar -foo(1 + foo) +foo(1+foo) From ff13cfed81132eaaa8859f25f87ea5398d4864ba Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 20 May 2010 14:08:19 -0700 Subject: [PATCH 070/148] Remove unused function _print_string_list The only good dead code is non-existing dead code. --- glcpp-parse.y | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 773db93e54a..79a8ec2cf2a 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -50,9 +50,6 @@ _expand_function_macro (glcpp_parser_t *parser, const char *identifier, argument_list_t *arguments); -void -_print_string_list (string_list_t *list); - string_list_t * _string_list_create (void *ctx); @@ -396,21 +393,6 @@ _string_list_length (string_list_t *list) return length; } -void -_print_string_list (string_list_t *list) -{ - string_node_t *node; - - if (list == NULL) - return; - - for (node = list->head; node; node = node->next) { - printf ("%s", node->str); - if (node->next) - printf (" "); - } -} - argument_list_t * _argument_list_create (void *ctx) { From 005b32061f77008530a290ed991980a579095002 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 20 May 2010 14:19:57 -0700 Subject: [PATCH 071/148] Fix bug of consuming excess whitespace. We fix this by moving printing up to the top-level "input" action and tracking whether a space is needed between one token and the next. This fixes all actual bugs in test-suite output, but does leave some tests failing due to differences in the amount of whitespace produced, (which aren't actual bugs per se). --- glcpp-parse.y | 71 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 53 insertions(+), 18 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 79a8ec2cf2a..c6d64176b2d 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -102,6 +102,7 @@ glcpp_parser_lex (glcpp_parser_t *parser); %} %union { + int ival; char *str; argument_list_t *argument_list; string_list_t *string_list; @@ -112,8 +113,9 @@ glcpp_parser_lex (glcpp_parser_t *parser); %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token DEFINE FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO NEWLINE SPACE TOKEN UNDEF -%type FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO +%token DEFINE FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO NEWLINE SEPARATOR SPACE TOKEN UNDEF +%type input punctuator +%type content FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO %type argument_list %type macro parameter_list %type TOKEN argument_word argument_word_or_comma @@ -135,38 +137,71 @@ glcpp_parser_lex (glcpp_parser_t *parser); %% + /* We do all printing at the input level. + * + * The value for "input" is simply TOKEN or SEPARATOR so we + * can decide whether it's necessary to print a space + * character between any two. */ input: - /* empty */ -| input content + /* empty */ { + $$ = SEPARATOR; + } +| input content { + int is_token; + + if ($2 && strlen ($2)) { + int c = $2[0]; + int is_not_separator = ((c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || + (c == '_')); + + if ($1 == TOKEN && is_not_separator) + printf (" "); + printf ("%s", $2); + if (is_not_separator) + $$ = TOKEN; + else + $$ = SEPARATOR; + } else { + $$ = $1; + } + if ($2) + talloc_free ($2); + } ; - /* We do all printing at the content level */ content: IDENTIFIER { - printf ("%s", $1); - talloc_free ($1); + $$ = $1; } | IDENTIFIER_FINALIZED { - printf ("%s", $1); - talloc_free ($1); + $$ = $1; } | TOKEN { - printf ("%s", $1.value); - talloc_free ($1.value); + $$ = $1.value; } | FUNC_MACRO { - printf ("%s", $1); - talloc_free ($1); + $$ = $1; } | directive { - printf ("\n"); + $$ = talloc_strdup (parser, "\n"); + } +| punctuator { + $$ = talloc_asprintf (parser, "%c", $1); + } +| macro { + $$ = NULL; } -| '(' { printf ("("); } -| ')' { printf (")"); } -| ',' { printf (","); } -| macro ; +punctuator: + '(' { $$ = '('; } +| ')' { $$ = ')'; } +| ',' { $$ = ','; } + ; + macro: FUNC_MACRO '(' argument_list ')' { _expand_function_macro (parser, $1, $3); From 5a6b9a27fdb2ac66aaadd90b15b1889fea8f08d0 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 20 May 2010 14:29:43 -0700 Subject: [PATCH 072/148] Avoid printing a space at the beginning of lines in the output. This fixes more differences compared to "gcc -E" so removes several cases of erroneously failing test cases. The implementation isn't very elegant, but it is functional. --- glcpp-lex.l | 5 +++++ glcpp-parse.y | 18 +++++++++++------- glcpp.h | 1 + 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 8e3ab661e6f..13e4d6f0ef1 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -173,7 +173,12 @@ TOKEN [^[:space:](),]+ } \n { + /* XXX: Printing here (rather than in a parser production) + * *and* frobbing a bit of the parser state here are both ugly + * things. But all my attempts to avoid this by returning a + * NEWLINE token here have led to even more ugly things. */ printf ("\n"); + yyextra->just_printed_separator = 1; } {HSPACE}+ diff --git a/glcpp-parse.y b/glcpp-parse.y index c6d64176b2d..93713a3f0ca 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -114,7 +114,7 @@ glcpp_parser_lex (glcpp_parser_t *parser); %lex-param {glcpp_parser_t *parser} %token DEFINE FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO NEWLINE SEPARATOR SPACE TOKEN UNDEF -%type input punctuator +%type punctuator %type content FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO %type argument_list %type macro parameter_list @@ -144,7 +144,7 @@ glcpp_parser_lex (glcpp_parser_t *parser); * character between any two. */ input: /* empty */ { - $$ = SEPARATOR; + parser->just_printed_separator = 1; } | input content { int is_token; @@ -157,16 +157,18 @@ input: (c >= '0' && c <= '9') || (c == '_')); - if ($1 == TOKEN && is_not_separator) + if (! parser->just_printed_separator && is_not_separator) + { printf (" "); + } printf ("%s", $2); + if (is_not_separator) - $$ = TOKEN; + parser->just_printed_separator = 0; else - $$ = SEPARATOR; - } else { - $$ = $1; + parser->just_printed_separator = 1; } + if ($2) talloc_free ($2); } @@ -561,6 +563,8 @@ glcpp_parser_create (void) hash_table_string_compare); parser->expansions = NULL; + parser->just_printed_separator = 1; + return parser; } diff --git a/glcpp.h b/glcpp.h index 5432a318173..c25e29c6883 100644 --- a/glcpp.h +++ b/glcpp.h @@ -101,6 +101,7 @@ struct glcpp_parser { yyscan_t scanner; struct hash_table *defines; expansion_node_t *expansions; + int just_printed_separator; }; void From 876e510bdab96574c4ca5ee94c580fe6ad7f0106 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 20 May 2010 14:38:06 -0700 Subject: [PATCH 073/148] Finish cleaning up whitespace differences. The last remaining thing here was that when a line ended with a macro, and the parser looked ahead to the newline token, the lexer was printing that newline before the parser printed the expansion of the macro. The fix is simple, just make the lexer tell the parser that a newline is needed, and the parser can wait until reducing a production to print that newline. With this, we now pass the entire test suite with simply "diff -u", so we no longer have any diff options hiding whitespace bugs from us. Hurrah! --- glcpp-lex.l | 7 +------ glcpp-parse.y | 9 +++++++++ glcpp.h | 1 + 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 13e4d6f0ef1..114b59f0456 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -173,12 +173,7 @@ TOKEN [^[:space:](),]+ } \n { - /* XXX: Printing here (rather than in a parser production) - * *and* frobbing a bit of the parser state here are both ugly - * things. But all my attempts to avoid this by returning a - * NEWLINE token here have led to even more ugly things. */ - printf ("\n"); - yyextra->just_printed_separator = 1; + yyextra->need_newline = 1; } {HSPACE}+ diff --git a/glcpp-parse.y b/glcpp-parse.y index 93713a3f0ca..ddc2a258cd8 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -171,6 +171,12 @@ input: if ($2) talloc_free ($2); + + if (parser->need_newline) { + printf ("\n"); + parser->just_printed_separator = 1; + parser->need_newline = 0; + } } ; @@ -564,6 +570,7 @@ glcpp_parser_create (void) parser->expansions = NULL; parser->just_printed_separator = 1; + parser->need_newline = 0; return parser; } @@ -577,6 +584,8 @@ glcpp_parser_parse (glcpp_parser_t *parser) void glcpp_parser_destroy (glcpp_parser_t *parser) { + if (parser->need_newline) + printf ("\n"); glcpp_lex_destroy (parser->scanner); hash_table_dtor (parser->defines); talloc_free (parser); diff --git a/glcpp.h b/glcpp.h index c25e29c6883..2e93cb981d8 100644 --- a/glcpp.h +++ b/glcpp.h @@ -102,6 +102,7 @@ struct glcpp_parser { struct hash_table *defines; expansion_node_t *expansions; int just_printed_separator; + int need_newline; }; void From b894583fd0246060d908a0cc7b5f3ef72a5a2112 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 20 May 2010 15:02:03 -0700 Subject: [PATCH 074/148] Add xtalloc_asprintf I expect this to be useful in the upcoming implementation of token pasting. --- glcpp.h | 3 +++ xtalloc.c | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/glcpp.h b/glcpp.h index 2e93cb981d8..048a9be76bb 100644 --- a/glcpp.h +++ b/glcpp.h @@ -149,4 +149,7 @@ xtalloc_strdup (const void *t, const char *p); char * xtalloc_strndup (const void *t, const char *p, size_t n); +char * +xtalloc_asprintf (const void *t, const char *fmt, ...); + #endif diff --git a/xtalloc.c b/xtalloc.c index d9893ae8893..e52d12ac6b2 100644 --- a/xtalloc.c +++ b/xtalloc.c @@ -64,3 +64,21 @@ xtalloc_strndup (const void *t, const char *p, size_t n) return ret; } + +char * +xtalloc_asprintf (const void *t, const char *fmt, ...) +{ + va_list ap; + char *ret; + + va_start(ap, fmt); + + ret = talloc_vasprintf(t, fmt, ap); + if (ret == NULL) { + fprintf (stderr, "Out of memory.\n"); + exit (1); + } + + va_end(ap); + return ret; +} From c10a51ba13272dc48407b885d8684be99bba120d Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 20 May 2010 15:15:26 -0700 Subject: [PATCH 075/148] Pre-expand macro arguments at time of invocation. Previously, we were using the same lexing stack as we use for macro expansion to also expand macro arguments. Instead, we now do this earlier by simply recursing over the macro-invocations replacement list and constructing a new expanded list, (and pushing only *that* onto the stack). This is simpler, and also allows us to more easily implement token pasting in the future. --- glcpp-lex.l | 8 ----- glcpp-parse.y | 88 ++++++++++++++++----------------------------------- glcpp.h | 2 -- 3 files changed, 28 insertions(+), 70 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 114b59f0456..6138a9de12e 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -138,14 +138,6 @@ TOKEN [^[:space:](),]+ switch (glcpp_parser_classify_token (yyextra, yylval.str, ¶meter_index)) { - case TOKEN_CLASS_ARGUMENT: - talloc_free (yylval.str); - /* We don't return a value here since the - * current token will be replaced by new - * tokens. */ - glcpp_parser_push_expansion_argument (yyextra, - parameter_index); - break; case TOKEN_CLASS_IDENTIFIER: return IDENTIFIER; break; diff --git a/glcpp-parse.y b/glcpp-parse.y index ddc2a258cd8..0691619acf4 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -86,11 +86,6 @@ _token_list_append (token_list_t *list, int type, const char *value); void _token_list_append_list (token_list_t *list, token_list_t *tail); -static void -glcpp_parser_push_expansion_macro (glcpp_parser_t *parser, - macro_t *macro, - argument_list_t *arguments); - static void glcpp_parser_pop_expansion (glcpp_parser_t *parser); @@ -614,24 +609,7 @@ glcpp_parser_classify_token (glcpp_parser_t *parser, { macro_t *macro; - /* First we check if we are currently expanding a - * function-like macro, and if so, whether the parameter list - * contains a parameter matching this token name. */ - if (parser->expansions && - parser->expansions->macro && - parser->expansions->macro->parameters) - { - string_list_t *list; - - list = parser->expansions->macro->parameters; - - if (_string_list_contains (list, identifier, parameter_index)) - return TOKEN_CLASS_ARGUMENT; - } - - /* If not a function-like macro parameter, we next check if - * this token is a macro itself. */ - + /* Is this token a defined macro? */ macro = hash_table_find (parser->defines, identifier); if (macro == NULL) @@ -685,47 +663,21 @@ _define_function_macro (glcpp_parser_t *parser, } static void -_glcpp_parser_push_expansion_internal (glcpp_parser_t *parser, - macro_t *macro, - argument_list_t *arguments, - token_node_t *replacements) +_glcpp_parser_push_expansion (glcpp_parser_t *parser, + macro_t *macro, + token_node_t *replacements) { expansion_node_t *node; node = xtalloc (parser, expansion_node_t); node->macro = macro; - node->arguments = arguments; node->replacements = replacements; node->next = parser->expansions; parser->expansions = node; } -static void -glcpp_parser_push_expansion_macro (glcpp_parser_t *parser, - macro_t *macro, - argument_list_t *arguments) -{ - _glcpp_parser_push_expansion_internal (parser, macro, arguments, - macro->replacements->head); -} - -void -glcpp_parser_push_expansion_argument (glcpp_parser_t *parser, - int argument_index) -{ - argument_list_t *arguments; - token_list_t *argument; - - arguments = parser->expansions->arguments; - - argument = _argument_list_member_at (arguments, argument_index); - - _glcpp_parser_push_expansion_internal (parser, NULL, NULL, - argument->head); -} - static void glcpp_parser_pop_expansion (glcpp_parser_t *parser) { @@ -752,7 +704,7 @@ _expand_object_macro (glcpp_parser_t *parser, const char *identifier) assert (! macro->is_function); assert (! glcpp_parser_is_expanding (parser, identifier)); - glcpp_parser_push_expansion_macro (parser, macro, NULL); + _glcpp_parser_push_expansion (parser, macro, macro->replacements->head); } void @@ -761,6 +713,9 @@ _expand_function_macro (glcpp_parser_t *parser, argument_list_t *arguments) { macro_t *macro; + token_list_t *expanded; + token_node_t *i, *j; + int parameter_index; macro = hash_table_find (parser->defines, identifier); assert (macro->is_function); @@ -777,7 +732,26 @@ _expand_function_macro (glcpp_parser_t *parser, return; } - glcpp_parser_push_expansion_macro (parser, macro, arguments); + expanded = _token_list_create (macro); + + for (i = macro->replacements->head; i; i = i->next) { + if (_string_list_contains (macro->parameters, i->value, + ¶meter_index)) + { + token_list_t *argument; + argument = _argument_list_member_at (arguments, + parameter_index); + for (j = argument->head; j; j = j->next) + { + _token_list_append (expanded, j->type, + j->value); + } + } else { + _token_list_append (expanded, i->type, i->value); + } + } + + _glcpp_parser_push_expansion (parser, macro, expanded->head); } static int @@ -819,12 +793,6 @@ glcpp_parser_lex (glcpp_parser_t *parser) switch (glcpp_parser_classify_token (parser, yylval.str, ¶meter_index)) { - case TOKEN_CLASS_ARGUMENT: - talloc_free (yylval.str); - glcpp_parser_push_expansion_argument (parser, - parameter_index); - goto RECURSE; - break; case TOKEN_CLASS_IDENTIFIER: return IDENTIFIER; break; diff --git a/glcpp.h b/glcpp.h index 048a9be76bb..1537109ada6 100644 --- a/glcpp.h +++ b/glcpp.h @@ -71,7 +71,6 @@ typedef struct argument_list { typedef struct glcpp_parser glcpp_parser_t; typedef enum { - TOKEN_CLASS_ARGUMENT, TOKEN_CLASS_IDENTIFIER, TOKEN_CLASS_IDENTIFIER_FINALIZED, TOKEN_CLASS_FUNC_MACRO, @@ -92,7 +91,6 @@ typedef struct { typedef struct expansion_node { macro_t *macro; - argument_list_t *arguments; token_node_t *replacements; struct expansion_node *next; } expansion_node_t; From d8327e575dd20fe696f3a44ada4bd4001b15db27 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 20 May 2010 15:18:54 -0700 Subject: [PATCH 076/148] Implement (and add test) for token pasting. This is *very* easy to implement now that macro arguments are pre-expanded. --- glcpp-parse.y | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 0691619acf4..aa758f7e439 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -760,6 +760,8 @@ glcpp_parser_lex (glcpp_parser_t *parser) expansion_node_t *expansion; token_node_t *replacements; int parameter_index; + const char *token; + token_class_t class; /* Who says C can't do efficient tail recursion? */ RECURSE: @@ -779,12 +781,31 @@ glcpp_parser_lex (glcpp_parser_t *parser) expansion->replacements = replacements->next; - if (strcmp (replacements->value, "(") == 0) + token = replacements->value; + + /* Implement token pasting. */ + if (replacements->next && strcmp (replacements->next->value, "##") == 0) { + token_node_t *next_node; + + next_node = replacements->next->next; + + if (next_node == NULL) { + fprintf (stderr, "Error: '##' cannot appear at the end of a macro expansion.\n"); + exit (1); + } + + token = xtalloc_asprintf (parser, "%s%s", + token, next_node->value); + expansion->replacements = next_node->next; + } + + + if (strcmp (token, "(") == 0) return '('; - else if (strcmp (replacements->value, ")") == 0) + else if (strcmp (token, ")") == 0) return ')'; - yylval.str = xtalloc_strdup (parser, replacements->value); + yylval.str = xtalloc_strdup (parser, token); /* Carefully refuse to expand any finalized identifier. */ if (replacements->type == IDENTIFIER_FINALIZED) From b20d33c5c6fea8e392c26e9ab060efd14034f1f9 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 20 May 2010 22:27:07 -0700 Subject: [PATCH 077/148] Implement #if, #else, #elif, and #endif with tests. So far the only expression implemented is a single integer literal, but obviously that's easy to extend. Various things including nesting are tested here. --- glcpp-lex.l | 32 +++++++++++ glcpp-parse.y | 109 ++++++++++++++++++++++++++++++++++++-- glcpp.h | 12 +++++ tests/040-token-pasting.c | 2 + tests/041-if-0.c | 5 ++ tests/042-if-1.c | 5 ++ tests/043-if-0-else.c | 7 +++ tests/044-if-1-else.c | 7 +++ tests/045-if-0-elif.c | 11 ++++ tests/046-if-1-elsif.c | 11 ++++ tests/047-if-elif-else.c | 11 ++++ tests/048-if-nested.c | 11 ++++ tests/glcpp-test | 2 +- 13 files changed, 221 insertions(+), 4 deletions(-) create mode 100644 tests/040-token-pasting.c create mode 100644 tests/041-if-0.c create mode 100644 tests/042-if-1.c create mode 100644 tests/043-if-0-else.c create mode 100644 tests/044-if-1-else.c create mode 100644 tests/045-if-0-elif.c create mode 100644 tests/046-if-1-elsif.c create mode 100644 tests/047-if-elif-else.c create mode 100644 tests/048-if-nested.c diff --git a/glcpp-lex.l b/glcpp-lex.l index 6138a9de12e..825ce3d3709 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -36,6 +36,7 @@ %x ST_DEFINE_OBJ_OR_FUNC %x ST_DEFINE_PARAMETER %x ST_DEFINE_VALUE +%x ST_IF %x ST_UNDEF %x ST_UNDEF_END @@ -44,11 +45,42 @@ NONSPACE [^[:space:]] NEWLINE [\n] HSPACE [ \t] HASH ^{HSPACE}*#{HSPACE}* +INTEGER [0-9]+ IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* TOKEN [^[:space:](),]+ %% +{HASH}if{HSPACE}* { + BEGIN ST_IF; + return IF; +} + +{HASH}elif{HSPACE}* { + BEGIN ST_IF; + return ELIF; +} + +{INTEGER} { + yylval.ival = atoi (yytext); + return INTEGER; +} + +{HSPACE}+ + +\n { + BEGIN INITIAL; + return NEWLINE; +} + +{HASH}endif{HSPACE}* { + return ENDIF; +} + +{HASH}else{HSPACE}* { + return ELSE; +} + {HASH}undef{HSPACE}* { BEGIN ST_UNDEF; return UNDEF; diff --git a/glcpp-parse.y b/glcpp-parse.y index aa758f7e439..26432f20325 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -89,6 +89,16 @@ _token_list_append_list (token_list_t *list, token_list_t *tail); static void glcpp_parser_pop_expansion (glcpp_parser_t *parser); +static void +_glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, int condition); + +static void +_glcpp_parser_skip_stack_change_if (glcpp_parser_t *parser, const char *type, + int condition); + +static void +_glcpp_parser_skip_stack_pop (glcpp_parser_t *parser); + #define yylex glcpp_parser_lex static int @@ -108,8 +118,8 @@ glcpp_parser_lex (glcpp_parser_t *parser); %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token DEFINE FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO NEWLINE SEPARATOR SPACE TOKEN UNDEF -%type punctuator +%token DEFINE ELIF ELSE ENDIF FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED IF IFDEF IFNDEF INTEGER OBJ_MACRO NEWLINE SEPARATOR SPACE TOKEN UNDEF +%type expression INTEGER punctuator %type content FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO %type argument_list %type macro parameter_list @@ -143,8 +153,12 @@ input: } | input content { int is_token; + int skipping = 0; - if ($2 && strlen ($2)) { + if (parser->skip_stack && parser->skip_stack->type != SKIP_NO_SKIP) + skipping = 1; + + if ($2 && strlen ($2) && ! skipping) { int c = $2[0]; int is_not_separator = ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || @@ -301,6 +315,28 @@ directive: | DEFINE IDENTIFIER '(' parameter_list ')' replacement_list NEWLINE { _define_function_macro (parser, $2, $4, $6); } +| IF expression NEWLINE { + _glcpp_parser_skip_stack_push_if (parser, $2); + } +| IFDEF IDENTIFIER NEWLINE { + string_list_t *macro = hash_table_find (parser->defines, $2); + talloc_free ($2); + _glcpp_parser_skip_stack_push_if (parser, macro != NULL); + } +| IFNDEF IDENTIFIER NEWLINE { + string_list_t *macro = hash_table_find (parser->defines, $2); + talloc_free ($2); + _glcpp_parser_skip_stack_push_if (parser, macro == NULL); + } +| ELIF expression NEWLINE { + _glcpp_parser_skip_stack_change_if (parser, "#elif", $2); + } +| ELSE { + _glcpp_parser_skip_stack_change_if (parser, "else", 1); + } +| ENDIF { + _glcpp_parser_skip_stack_pop (parser); + } | UNDEF IDENTIFIER { string_list_t *macro = hash_table_find (parser->defines, $2); if (macro) { @@ -314,6 +350,13 @@ directive: } ; +/* XXX: Need to fill out with all operators. */ +expression: + INTEGER { + $$ = $1; + } +; + parameter_list: /* empty */ { $$ = _string_list_create (parser); @@ -567,6 +610,8 @@ glcpp_parser_create (void) parser->just_printed_separator = 1; parser->need_newline = 0; + parser->skip_stack = NULL; + return parser; } @@ -581,6 +626,8 @@ glcpp_parser_destroy (glcpp_parser_t *parser) { if (parser->need_newline) printf ("\n"); + if (parser->skip_stack) + fprintf (stderr, "Error: Unterminated #if\n"); glcpp_lex_destroy (parser->scanner); hash_table_dtor (parser->defines); talloc_free (parser); @@ -829,3 +876,59 @@ glcpp_parser_lex (glcpp_parser_t *parser) break; } } + +static void +_glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, int condition) +{ + skip_type_t current = SKIP_NO_SKIP; + skip_node_t *node; + + if (parser->skip_stack) + current = parser->skip_stack->type; + + node = xtalloc (parser, skip_node_t); + + if (current == SKIP_NO_SKIP) { + if (condition) + node->type = SKIP_NO_SKIP; + else + node->type = SKIP_TO_ELSE; + } else { + node->type = SKIP_TO_ENDIF; + } + + node->next = parser->skip_stack; + parser->skip_stack = node; +} + +static void +_glcpp_parser_skip_stack_change_if (glcpp_parser_t *parser, const char *type, + int condition) +{ + if (parser->skip_stack == NULL) { + fprintf (stderr, "Error: %s without #if\n", type); + exit (1); + } + + if (parser->skip_stack->type == SKIP_TO_ELSE) { + if (condition) + parser->skip_stack->type = SKIP_NO_SKIP; + } else { + parser->skip_stack->type = SKIP_TO_ENDIF; + } +} + +static void +_glcpp_parser_skip_stack_pop (glcpp_parser_t *parser) +{ + skip_node_t *node; + + if (parser->skip_stack == NULL) { + fprintf (stderr, "Error: #endif without #if\n"); + exit (1); + } + + node = parser->skip_stack; + parser->skip_stack = node->next; + talloc_free (node); +} diff --git a/glcpp.h b/glcpp.h index 1537109ada6..33ece8f92b1 100644 --- a/glcpp.h +++ b/glcpp.h @@ -95,12 +95,24 @@ typedef struct expansion_node { struct expansion_node *next; } expansion_node_t; +typedef enum skip_type { + SKIP_NO_SKIP, + SKIP_TO_ELSE, + SKIP_TO_ENDIF +} skip_type_t; + +typedef struct skip_node { + skip_type_t type; + struct skip_node *next; +} skip_node_t; + struct glcpp_parser { yyscan_t scanner; struct hash_table *defines; expansion_node_t *expansions; int just_printed_separator; int need_newline; + skip_node_t *skip_stack; }; void diff --git a/tests/040-token-pasting.c b/tests/040-token-pasting.c new file mode 100644 index 00000000000..caab3ba7368 --- /dev/null +++ b/tests/040-token-pasting.c @@ -0,0 +1,2 @@ +#define paste(a,b) a ## b +paste(one , token) diff --git a/tests/041-if-0.c b/tests/041-if-0.c new file mode 100644 index 00000000000..2cab677d3e8 --- /dev/null +++ b/tests/041-if-0.c @@ -0,0 +1,5 @@ +success_1 +#if 0 +failure +#endif +success_2 diff --git a/tests/042-if-1.c b/tests/042-if-1.c new file mode 100644 index 00000000000..874a25cf41b --- /dev/null +++ b/tests/042-if-1.c @@ -0,0 +1,5 @@ +success_1 +#if 1 +success_2 +#endif +success_3 diff --git a/tests/043-if-0-else.c b/tests/043-if-0-else.c new file mode 100644 index 00000000000..323351f9dbf --- /dev/null +++ b/tests/043-if-0-else.c @@ -0,0 +1,7 @@ +success_1 +#if 0 +failure +#else +success_2 +#endif +success_3 diff --git a/tests/044-if-1-else.c b/tests/044-if-1-else.c new file mode 100644 index 00000000000..28dfc25c6f0 --- /dev/null +++ b/tests/044-if-1-else.c @@ -0,0 +1,7 @@ +success_1 +#if 1 +success_2 +#else +failure +#endif +success_3 diff --git a/tests/045-if-0-elif.c b/tests/045-if-0-elif.c new file mode 100644 index 00000000000..e50f686d461 --- /dev/null +++ b/tests/045-if-0-elif.c @@ -0,0 +1,11 @@ +success_1 +#if 0 +failure_1 +#elif 0 +failure_2 +#elif 1 +success_3 +#elif 1 +failure_3 +#endif +success_4 diff --git a/tests/046-if-1-elsif.c b/tests/046-if-1-elsif.c new file mode 100644 index 00000000000..130515a01ea --- /dev/null +++ b/tests/046-if-1-elsif.c @@ -0,0 +1,11 @@ +success_1 +#if 1 +success_2 +#elif 0 +failure_1 +#elif 1 +failure_2 +#elif 0 +failure_3 +#endif +success_3 diff --git a/tests/047-if-elif-else.c b/tests/047-if-elif-else.c new file mode 100644 index 00000000000..e8f0838a9ed --- /dev/null +++ b/tests/047-if-elif-else.c @@ -0,0 +1,11 @@ +success_1 +#if 0 +failure_1 +#elif 0 +failure_2 +#elif 0 +failure_3 +#else +success_2 +#endif +success_3 diff --git a/tests/048-if-nested.c b/tests/048-if-nested.c new file mode 100644 index 00000000000..fc4679c3be4 --- /dev/null +++ b/tests/048-if-nested.c @@ -0,0 +1,11 @@ +success_1 +#if 0 +failure_1 +#if 1 +failure_2 +#else +failure_3 +#endif +failure_4 +#endif +success_2 diff --git a/tests/glcpp-test b/tests/glcpp-test index 25685eeabe5..022a2367121 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -5,5 +5,5 @@ for test in *.c; do ../glcpp < $test > $test.out gcc -E $test -o $test.gcc grep -v '^#' < $test.gcc > $test.expected - diff -u $test.expected $test.out + diff -B -u $test.expected $test.out done From bcbd587b0f5312d85307785ee2df6e5906af4f7b Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 24 May 2010 10:37:38 -0700 Subject: [PATCH 078/148] Implement all operators specified for GLSL #if expressions (with tests). The operator coverage here is quite complete. The one big thing missing is that we are not yet doing macro expansion in #if lines. This makes the whole support fairly useless, so we plan to fix that shortcoming right away. --- glcpp-lex.l | 45 +++++++++++++ glcpp-parse.y | 97 ++++++++++++++++++++++++++-- tests/049-if-expression-precedence.c | 6 ++ tests/050-if-defined.c | 19 ++++++ tests/051-if-relational.c | 35 ++++++++++ 5 files changed, 195 insertions(+), 7 deletions(-) create mode 100644 tests/049-if-expression-precedence.c create mode 100644 tests/050-if-defined.c create mode 100644 tests/051-if-relational.c diff --git a/glcpp-lex.l b/glcpp-lex.l index 825ce3d3709..84166fb76fc 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -66,6 +66,51 @@ TOKEN [^[:space:](),]+ return INTEGER; } +"defined" { + return DEFINED; +} + +"<<" { + return LEFT_SHIFT; +} + +">>" { + return RIGHT_SHIFT; +} + +"<=" { + return LESS_OR_EQUAL; +} + +">=" { + return GREATER_OR_EQUAL; +} + +"==" { + return EQUAL; +} + +"!=" { + return NOT_EQUAL; +} + +"&&" { + return AND; +} + +"||" { + return OR; +} + +[-+*/%<>&^|()] { + return yytext[0]; +} + +{IDENTIFIER} { + yylval.str = xtalloc_strdup (yyextra, yytext); + return IDENTIFIER; +} + {HSPACE}+ \n { diff --git a/glcpp-parse.y b/glcpp-parse.y index 26432f20325..0d3afa7af64 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -118,13 +118,24 @@ glcpp_parser_lex (glcpp_parser_t *parser); %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token DEFINE ELIF ELSE ENDIF FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED IF IFDEF IFNDEF INTEGER OBJ_MACRO NEWLINE SEPARATOR SPACE TOKEN UNDEF +%token DEFINE DEFINED ELIF ELSE ENDIF FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED IF IFDEF IFNDEF INTEGER OBJ_MACRO NEWLINE SPACE TOKEN UNDEF %type expression INTEGER punctuator %type content FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO %type argument_list %type macro parameter_list %type TOKEN argument_word argument_word_or_comma %type argument argument_or_comma replacement_list pp_tokens +%left OR +%left AND +%left '|' +%left '^' +%left '&' +%left EQUAL NOT_EQUAL +%left '<' '>' LESS_OR_EQUAL GREATER_OR_EQUAL +%left LEFT_SHIFT RIGHT_SHIFT +%left '+' '-' +%left '*' '/' '%' +%right UNARY /* Hard to remove shift/reduce conflicts documented as follows: * @@ -142,11 +153,7 @@ glcpp_parser_lex (glcpp_parser_t *parser); %% - /* We do all printing at the input level. - * - * The value for "input" is simply TOKEN or SEPARATOR so we - * can decide whether it's necessary to print a space - * character between any two. */ + /* We do all printing at the input level. */ input: /* empty */ { parser->just_printed_separator = 1; @@ -350,11 +357,87 @@ directive: } ; -/* XXX: Need to fill out with all operators. */ expression: INTEGER { $$ = $1; } +| expression OR expression { + $$ = $1 || $3; + } +| expression AND expression { + $$ = $1 && $3; + } +| expression '|' expression { + $$ = $1 | $3; + } +| expression '^' expression { + $$ = $1 ^ $3; + } +| expression '&' expression { + $$ = $1 & $3; + } +| expression NOT_EQUAL expression { + $$ = $1 != $3; + } +| expression EQUAL expression { + $$ = $1 == $3; + } +| expression GREATER_OR_EQUAL expression { + $$ = $1 >= $3; + } +| expression LESS_OR_EQUAL expression { + $$ = $1 <= $3; + } +| expression '>' expression { + $$ = $1 > $3; + } +| expression '<' expression { + $$ = $1 < $3; + } +| expression RIGHT_SHIFT expression { + $$ = $1 >> $3; + } +| expression LEFT_SHIFT expression { + $$ = $1 << $3; + } +| expression '-' expression { + $$ = $1 - $3; + } +| expression '+' expression { + $$ = $1 + $3; + } +| expression '%' expression { + $$ = $1 % $3; + } +| expression '/' expression { + $$ = $1 / $3; + } +| expression '*' expression { + $$ = $1 * $3; + } +| '!' expression %prec UNARY { + $$ = ! $2; + } +| '~' expression %prec UNARY { + $$ = ~ $2; + } +| '-' expression %prec UNARY { + $$ = - $2; + } +| '+' expression %prec UNARY { + $$ = + $2; + } +| DEFINED IDENTIFIER %prec UNARY { + string_list_t *macro = hash_table_find (parser->defines, $2); + talloc_free ($2); + if (macro) + $$ = 1; + else + $$ = 0; + } +| '(' expression ')' { + $$ = $2; + } ; parameter_list: diff --git a/tests/049-if-expression-precedence.c b/tests/049-if-expression-precedence.c new file mode 100644 index 00000000000..cea935220fd --- /dev/null +++ b/tests/049-if-expression-precedence.c @@ -0,0 +1,6 @@ +#if 1 + 2 * 3 + - (25 % 17 - + 1) +failure with operator precedence +#else +success +#endif + diff --git a/tests/050-if-defined.c b/tests/050-if-defined.c new file mode 100644 index 00000000000..9838cc747d5 --- /dev/null +++ b/tests/050-if-defined.c @@ -0,0 +1,19 @@ +#if defined foo +failure_1 +#else +success_1 +#endif +#define foo +#if defined foo +success_2 +#else +failure_2 +#endif +#undef foo +#if defined foo +failure_3 +#else +success_3 +#endif + + diff --git a/tests/051-if-relational.c b/tests/051-if-relational.c new file mode 100644 index 00000000000..c3db488e0de --- /dev/null +++ b/tests/051-if-relational.c @@ -0,0 +1,35 @@ +#if 3 < 2 +failure_1 +#else +success_1 +#endif + +#if 3 >= 2 +success_2 +#else +failure_2 +#endif + +#if 2 + 3 <= 5 +success_3 +#else +failure_3 +#endif + +#if 3 - 2 == 1 +success_3 +#else +failure_3 +#endif + +#if 1 > 3 +failure_4 +#else +success_4 +#endif + +#if 1 != 5 +success_5 +#else +failure_5 +#endif From 89b933a24375a2ebed383290f24360a14edbac6b Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 24 May 2010 11:26:42 -0700 Subject: [PATCH 079/148] Add the '~' operator to the lexer. This was simply missing before, (and unnoticed since we had no test of the '~' operator). --- glcpp-lex.l | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 84166fb76fc..fe95508a321 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -102,7 +102,7 @@ TOKEN [^[:space:](),]+ return OR; } -[-+*/%<>&^|()] { +[-+*/%<>&^|()~] { return yytext[0]; } From 35419095f8d92f7dc5de472da3a0271d343cbcba Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 24 May 2010 11:27:23 -0700 Subject: [PATCH 080/148] Switch to intmax_t (rather than int) for #if expressions This is what the C99 specification demands. And the GLSL specification says that we should follow the "standard C++" rules for #if condition expressions rather than the GLSL rules, (which only support a 32-bit integer). --- glcpp-parse.y | 4 +++- glcpp.h | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 0d3afa7af64..2c0fe9a6af9 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -107,6 +107,7 @@ glcpp_parser_lex (glcpp_parser_t *parser); %} %union { + intmax_t imaxval; int ival; char *str; argument_list_t *argument_list; @@ -119,7 +120,8 @@ glcpp_parser_lex (glcpp_parser_t *parser); %lex-param {glcpp_parser_t *parser} %token DEFINE DEFINED ELIF ELSE ENDIF FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED IF IFDEF IFNDEF INTEGER OBJ_MACRO NEWLINE SPACE TOKEN UNDEF -%type expression INTEGER punctuator +%type punctuator +%type expression INTEGER %type content FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO %type argument_list %type macro parameter_list diff --git a/glcpp.h b/glcpp.h index 33ece8f92b1..503731b85b3 100644 --- a/glcpp.h +++ b/glcpp.h @@ -24,6 +24,8 @@ #ifndef GLCPP_H #define GLCPP_H +#include + #include #include "hash_table.h" From 03f6d5d2d4a6c42a197ee8eb4e26b87c87bbe43e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 24 May 2010 11:29:02 -0700 Subject: [PATCH 081/148] Add support for octal and hexadecimal integer literals. In addition to the decimal literals which we already support. Note that we use strtoll here to get the large-width integers demanded by the specification. --- glcpp-lex.l | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index fe95508a321..ee1f6e3aeea 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -45,10 +45,13 @@ NONSPACE [^[:space:]] NEWLINE [\n] HSPACE [ \t] HASH ^{HSPACE}*#{HSPACE}* -INTEGER [0-9]+ IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* TOKEN [^[:space:](),]+ +DECIMAL_INTEGER [1-9][0-9]*[uU]? +OCTAL_INTEGER 0[0-7]*[uU]? +HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? + %% {HASH}if{HSPACE}* { @@ -61,8 +64,18 @@ TOKEN [^[:space:](),]+ return ELIF; } -{INTEGER} { - yylval.ival = atoi (yytext); +{DECIMAL_INTEGER} { + yylval.ival = strtoll (yytext, NULL, 10); + return INTEGER; +} + +{OCTAL_INTEGER} { + yylval.ival = strtoll (yytext + 1, NULL, 8); + return INTEGER; +} + +{HEXADECIMAL_INTEGER} { + yylval.ival = strtoll (yytext + 2, NULL, 16); return INTEGER; } From bb9315f8047770585391c56973ef26c30f74d603 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 24 May 2010 11:30:06 -0700 Subject: [PATCH 082/148] Add test of bitwise operators and octal/hexadecimal literals. This new test covers several features from the last few commits. This test passes already. --- tests/052-if-bitwise.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 tests/052-if-bitwise.c diff --git a/tests/052-if-bitwise.c b/tests/052-if-bitwise.c new file mode 100644 index 00000000000..2d8e45eb61e --- /dev/null +++ b/tests/052-if-bitwise.c @@ -0,0 +1,20 @@ +#if (0xaaaaaaaa | 0x55555555) != 4294967295 +failure_1 +#else +success_1 +#endif +#if (0x12345678 ^ 0xfdecba98) == 4023971040 +success_2 +#else +failure_2 +#endif +#if (~ 0xdeadbeef) != -3735928560 +failure_3 +#else +success_3 +#endif +#if (0667 & 0733) == 403 +success_4 +#else +failure_4 +#endif From 00f1ec421edf73516fdcfbbdb651f13eeefe8f08 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 24 May 2010 11:33:28 -0700 Subject: [PATCH 083/148] Add test for '/', '<<', and '>>' in #if expressions. These operators have been supported already, but were not covered in existing tests yet. So this test passes already. --- tests/053-if-divide-and-shift.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 tests/053-if-divide-and-shift.c diff --git a/tests/053-if-divide-and-shift.c b/tests/053-if-divide-and-shift.c new file mode 100644 index 00000000000..ddc1573ab26 --- /dev/null +++ b/tests/053-if-divide-and-shift.c @@ -0,0 +1,16 @@ +#if (15 / 2) != 7 +failure_1 +#else +success_1 +#endif +#if (1 << 12) == 4096 +success_2 +#else +failure_2 +#endif +#if (31762 >> 8) != 124 +failure_3 +#else +success_3 +#endif + From 3ff81670848abb29b92e78f45080ad36cc85001c Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 13:09:03 -0700 Subject: [PATCH 084/148] Starting over with the C99 grammar for the preprocessor. This is a fresh start with a much simpler approach for the flex/bison portions of the preprocessor. This isn't functional yet, (produces no output), but can at least read all of our test cases without any parse errors. The grammar here is based on the grammar provided for the preprocessor in the C99 specification. --- glcpp-lex.l | 247 +++++++---------------------- glcpp-parse.y | 405 ++++++++--------------------------------------- tests/glcpp-test | 5 +- 3 files changed, 125 insertions(+), 532 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index ee1f6e3aeea..f1dd11ea9bd 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -32,21 +32,14 @@ %option reentrant noyywrap %option extra-type="glcpp_parser_t *" -%x ST_DEFINE -%x ST_DEFINE_OBJ_OR_FUNC -%x ST_DEFINE_PARAMETER -%x ST_DEFINE_VALUE -%x ST_IF -%x ST_UNDEF -%x ST_UNDEF_END - SPACE [[:space:]] NONSPACE [^[:space:]] NEWLINE [\n] HSPACE [ \t] HASH ^{HSPACE}*#{HSPACE}* IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* -TOKEN [^[:space:](),]+ +PUNCTUATION [][(){}.&*~!/%<>^|;,+-] +OTHER [^][(){}.&*~!/%<>^|;,=#[:space:]+-]+ DECIMAL_INTEGER [1-9][0-9]*[uU]? OCTAL_INTEGER 0[0-7]*[uU]? @@ -54,208 +47,74 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? %% -{HASH}if{HSPACE}* { - BEGIN ST_IF; - return IF; +{HASH}define{HSPACE}+/{IDENTIFIER}"(" { + return HASH_DEFINE_FUNC; } -{HASH}elif{HSPACE}* { - BEGIN ST_IF; - return ELIF; +{HASH}define { + return HASH_DEFINE_OBJ; } -{DECIMAL_INTEGER} { - yylval.ival = strtoll (yytext, NULL, 10); - return INTEGER; +{HASH}undef { + return HASH_UNDEF; } -{OCTAL_INTEGER} { - yylval.ival = strtoll (yytext + 1, NULL, 8); - return INTEGER; -} - -{HEXADECIMAL_INTEGER} { - yylval.ival = strtoll (yytext + 2, NULL, 16); - return INTEGER; -} - -"defined" { - return DEFINED; -} - -"<<" { - return LEFT_SHIFT; -} - -">>" { - return RIGHT_SHIFT; -} - -"<=" { - return LESS_OR_EQUAL; -} - -">=" { - return GREATER_OR_EQUAL; -} - -"==" { - return EQUAL; -} - -"!=" { - return NOT_EQUAL; -} - -"&&" { - return AND; -} - -"||" { - return OR; -} - -[-+*/%<>&^|()~] { - return yytext[0]; -} - -{IDENTIFIER} { - yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; -} - -{HSPACE}+ - -\n { - BEGIN INITIAL; - return NEWLINE; -} - -{HASH}endif{HSPACE}* { - return ENDIF; -} - -{HASH}else{HSPACE}* { - return ELSE; -} - -{HASH}undef{HSPACE}* { - BEGIN ST_UNDEF; - return UNDEF; -} - -{IDENTIFIER} { - BEGIN ST_UNDEF_END; - yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; -} - -{HSPACE}* - -\n { - BEGIN INITIAL; -} - - /* We use the ST_DEFINE and ST_DEFVAL states so that we can - * pass a space token, (yes, a token for whitespace!), since - * the preprocessor specification requires distinguishing - * "#define foo()" from "#define foo ()". - */ -{HASH}define{HSPACE}* { - BEGIN ST_DEFINE; - return DEFINE; -} - -{IDENTIFIER} { - BEGIN ST_DEFINE_OBJ_OR_FUNC; - yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; -} - -\n { - BEGIN INITIAL; - return NEWLINE; -} - -{HSPACE}+ { - BEGIN ST_DEFINE_VALUE; - return SPACE; -} - -"(" { - BEGIN ST_DEFINE_PARAMETER; - return '('; -} - -{IDENTIFIER} { - yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; -} - -"," { - return ','; -} - -")" { - BEGIN ST_DEFINE_VALUE; - return ')'; -} - -{HSPACE}+ - -{TOKEN} { - yylval.token.type = TOKEN; - yylval.token.value = xtalloc_strdup (yyextra, yytext); - return TOKEN; -} - -[(),] { - yylval.token.type = TOKEN; - yylval.token.value = xtalloc_strdup (yyextra, yytext); - return TOKEN; -} - -{HSPACE}+ - -\n { - BEGIN INITIAL; - return NEWLINE; +{HASH} { + return HASH; } {IDENTIFIER} { - int parameter_index; yylval.str = xtalloc_strdup (yyextra, yytext); - switch (glcpp_parser_classify_token (yyextra, yylval.str, - ¶meter_index)) - { - case TOKEN_CLASS_IDENTIFIER: - return IDENTIFIER; - break; - case TOKEN_CLASS_IDENTIFIER_FINALIZED: - return IDENTIFIER_FINALIZED; - break; - case TOKEN_CLASS_FUNC_MACRO: - return FUNC_MACRO; - break; - case TOKEN_CLASS_OBJ_MACRO: - return OBJ_MACRO; - break; - - } + return IDENTIFIER; } -[(),] { +"<<" { + return LEFT_SHIFT; +} + +">>" { + return RIGHT_SHIFT; +} + +"<=" { + return LESS_OR_EQUAL; +} + +">=" { + return GREATER_OR_EQUAL; +} + +"==" { + return EQUAL; +} + +"!=" { + return NOT_EQUAL; +} + +"&&" { + return AND; +} + +"||" { + return OR; +} + +"##" { + return PASTE; +} + +{PUNCTUATION} { return yytext[0]; } -{TOKEN} { - yylval.token.type = TOKEN; - yylval.token.value = xtalloc_strdup (yyextra, yytext); - return TOKEN; +\n { + return NEWLINE; } -\n { - yyextra->need_newline = 1; +{OTHER} { + yylval.str = xtalloc_strdup (yyextra, yytext); + return OTHER; } {HSPACE}+ diff --git a/glcpp-parse.y b/glcpp-parse.y index 2c0fe9a6af9..ebb28ed1965 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -119,366 +119,97 @@ glcpp_parser_lex (glcpp_parser_t *parser); %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token DEFINE DEFINED ELIF ELSE ENDIF FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED IF IFDEF IFNDEF INTEGER OBJ_MACRO NEWLINE SPACE TOKEN UNDEF -%type punctuator -%type expression INTEGER -%type content FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO -%type argument_list -%type macro parameter_list -%type TOKEN argument_word argument_word_or_comma -%type argument argument_or_comma replacement_list pp_tokens -%left OR -%left AND -%left '|' -%left '^' -%left '&' -%left EQUAL NOT_EQUAL -%left '<' '>' LESS_OR_EQUAL GREATER_OR_EQUAL -%left LEFT_SHIFT RIGHT_SHIFT -%left '+' '-' -%left '*' '/' '%' -%right UNARY +%token HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH IDENTIFIER NEWLINE OTHER HASH_UNDEF +%token LEFT_SHIFT RIGHT_SHIFT LESS_OR_EQUAL GREATER_OR_EQUAL EQUAL NOT_EQUAL AND OR PASTE -/* Hard to remove shift/reduce conflicts documented as follows: - * - * 1. '(' after FUNC_MACRO name which is correctly resolved to shift - * to form macro invocation rather than reducing directly to - * content. - * - * 2. Similarly, '(' after FUNC_MACRO which is correctly resolved to - * shift to form macro invocation rather than reducing directly to - * argument. - * - * 3. Similarly again now that we added argument_or_comma as well. - */ -%expect 3 + /* Stale stuff just to allow code to compile. */ +%token IDENTIFIER_FINALIZED FUNC_MACRO OBJ_MACRO %% - /* We do all printing at the input level. */ input: - /* empty */ { - parser->just_printed_separator = 1; - } -| input content { - int is_token; - int skipping = 0; - - if (parser->skip_stack && parser->skip_stack->type != SKIP_NO_SKIP) - skipping = 1; - - if ($2 && strlen ($2) && ! skipping) { - int c = $2[0]; - int is_not_separator = ((c >= 'a' && c <= 'z') || - (c >= 'A' && c <= 'Z') || - (c >= 'A' && c <= 'Z') || - (c >= '0' && c <= '9') || - (c == '_')); - - if (! parser->just_printed_separator && is_not_separator) - { - printf (" "); - } - printf ("%s", $2); - - if (is_not_separator) - parser->just_printed_separator = 0; - else - parser->just_printed_separator = 1; - } - - if ($2) - talloc_free ($2); - - if (parser->need_newline) { - printf ("\n"); - parser->just_printed_separator = 1; - parser->need_newline = 0; - } - } + /* empty */ +| input line ; -content: - IDENTIFIER { - $$ = $1; - } -| IDENTIFIER_FINALIZED { - $$ = $1; - } -| TOKEN { - $$ = $1.value; - } -| FUNC_MACRO { - $$ = $1; - } -| directive { - $$ = talloc_strdup (parser, "\n"); - } -| punctuator { - $$ = talloc_asprintf (parser, "%c", $1); - } -| macro { - $$ = NULL; - } +line: + control_line +| text_line +| HASH non_directive ; -punctuator: - '(' { $$ = '('; } -| ')' { $$ = ')'; } -| ',' { $$ = ','; } - ; - -macro: - FUNC_MACRO '(' argument_list ')' { - _expand_function_macro (parser, $1, $3); - } -| OBJ_MACRO { - _expand_object_macro (parser, $1); - talloc_free ($1); - } +control_line: + HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE +| HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE +| HASH_DEFINE_FUNC IDENTIFIER '(' identifier_list ')' replacement_list NEWLINE +| HASH_UNDEF IDENTIFIER NEWLINE +| HASH NEWLINE ; -argument_list: - /* empty */ { - $$ = _argument_list_create (parser); - } -| argument { - $$ = _argument_list_create (parser); - _argument_list_append ($$, $1); - } -| argument_list ',' argument { - _argument_list_append ($1, $3); - $$ = $1; - } +identifier_list: + IDENTIFIER +| identifier_list ',' IDENTIFIER ; -argument: - argument_word { - $$ = _token_list_create (parser); - _token_list_append ($$, $1.type, $1.value); - } -| argument argument_word { - _token_list_append ($1, $2.type, $2.value); - talloc_free ($2.value); - $$ = $1; - } -| argument '(' argument_or_comma ')' { - _token_list_append ($1, '(', "("); - _token_list_append_list ($1, $3); - _token_list_append ($1, ')', ")"); - $$ = $1; - } +text_line: + NEWLINE +| pp_tokens NEWLINE ; -argument_word: - IDENTIFIER { $$.type = IDENTIFIER; $$.value = $1; } -| IDENTIFIER_FINALIZED { $$.type = IDENTIFIER_FINALIZED; $$.value = $1; } -| TOKEN { $$ = $1; } -| FUNC_MACRO { $$.type = FUNC_MACRO; $$.value = $1; } -| macro { $$.type = TOKEN; $$.value = xtalloc_strdup (parser, ""); } -; - - /* XXX: The body of argument_or_comma is the same as the body - * of argument, but with "argument" and "argument_word" - * changed to "argument_or_comma" and - * "argument_word_or_comma". It would be nice to have less - * redundancy here, but I'm not sure how. - * - * It would also be nice to have a less ugly grammar to have - * to implement, but such is the C preprocessor. - */ -argument_or_comma: - argument_word_or_comma { - $$ = _token_list_create (parser); - _token_list_append ($$, $1.type, $1.value); - } -| argument_or_comma argument_word_or_comma { - _token_list_append ($1, $2.type, $2.value); - $$ = $1; - } -| argument_or_comma '(' argument_or_comma ')' { - _token_list_append ($1, '(', "("); - _token_list_append_list ($1, $3); - _token_list_append ($1, ')', ")"); - $$ = $1; - } -; - -argument_word_or_comma: - IDENTIFIER { $$.type = IDENTIFIER; $$.value = $1; } -| IDENTIFIER_FINALIZED { $$.type = IDENTIFIER_FINALIZED; $$.value = $1; } -| TOKEN { $$ = $1; } -| FUNC_MACRO { $$.type = FUNC_MACRO; $$.value = $1; } -| macro { $$.type = TOKEN; $$.value = xtalloc_strdup (parser, ""); } -| ',' { $$.type = ','; $$.value = xtalloc_strdup (parser, ","); } -; - -directive: - DEFINE IDENTIFIER NEWLINE { - token_list_t *list = _token_list_create (parser); - _define_object_macro (parser, $2, list); - } -| DEFINE IDENTIFIER SPACE replacement_list NEWLINE { - _define_object_macro (parser, $2, $4); - } -| DEFINE IDENTIFIER '(' parameter_list ')' replacement_list NEWLINE { - _define_function_macro (parser, $2, $4, $6); - } -| IF expression NEWLINE { - _glcpp_parser_skip_stack_push_if (parser, $2); - } -| IFDEF IDENTIFIER NEWLINE { - string_list_t *macro = hash_table_find (parser->defines, $2); - talloc_free ($2); - _glcpp_parser_skip_stack_push_if (parser, macro != NULL); - } -| IFNDEF IDENTIFIER NEWLINE { - string_list_t *macro = hash_table_find (parser->defines, $2); - talloc_free ($2); - _glcpp_parser_skip_stack_push_if (parser, macro == NULL); - } -| ELIF expression NEWLINE { - _glcpp_parser_skip_stack_change_if (parser, "#elif", $2); - } -| ELSE { - _glcpp_parser_skip_stack_change_if (parser, "else", 1); - } -| ENDIF { - _glcpp_parser_skip_stack_pop (parser); - } -| UNDEF IDENTIFIER { - string_list_t *macro = hash_table_find (parser->defines, $2); - if (macro) { - /* XXX: Need hash table to support a real way - * to remove an element rather than prefixing - * a new node with data of NULL like this. */ - hash_table_insert (parser->defines, NULL, $2); - talloc_free (macro); - } - talloc_free ($2); - } -; - -expression: - INTEGER { - $$ = $1; - } -| expression OR expression { - $$ = $1 || $3; - } -| expression AND expression { - $$ = $1 && $3; - } -| expression '|' expression { - $$ = $1 | $3; - } -| expression '^' expression { - $$ = $1 ^ $3; - } -| expression '&' expression { - $$ = $1 & $3; - } -| expression NOT_EQUAL expression { - $$ = $1 != $3; - } -| expression EQUAL expression { - $$ = $1 == $3; - } -| expression GREATER_OR_EQUAL expression { - $$ = $1 >= $3; - } -| expression LESS_OR_EQUAL expression { - $$ = $1 <= $3; - } -| expression '>' expression { - $$ = $1 > $3; - } -| expression '<' expression { - $$ = $1 < $3; - } -| expression RIGHT_SHIFT expression { - $$ = $1 >> $3; - } -| expression LEFT_SHIFT expression { - $$ = $1 << $3; - } -| expression '-' expression { - $$ = $1 - $3; - } -| expression '+' expression { - $$ = $1 + $3; - } -| expression '%' expression { - $$ = $1 % $3; - } -| expression '/' expression { - $$ = $1 / $3; - } -| expression '*' expression { - $$ = $1 * $3; - } -| '!' expression %prec UNARY { - $$ = ! $2; - } -| '~' expression %prec UNARY { - $$ = ~ $2; - } -| '-' expression %prec UNARY { - $$ = - $2; - } -| '+' expression %prec UNARY { - $$ = + $2; - } -| DEFINED IDENTIFIER %prec UNARY { - string_list_t *macro = hash_table_find (parser->defines, $2); - talloc_free ($2); - if (macro) - $$ = 1; - else - $$ = 0; - } -| '(' expression ')' { - $$ = $2; - } -; - -parameter_list: - /* empty */ { - $$ = _string_list_create (parser); - } -| IDENTIFIER { - $$ = _string_list_create (parser); - _string_list_append_item ($$, $1); - talloc_free ($1); - } -| parameter_list ',' IDENTIFIER { - _string_list_append_item ($1, $3); - talloc_free ($3); - $$ = $1; - } +non_directive: + pp_tokens NEWLINE ; replacement_list: - /* empty */ { - $$ = _token_list_create (parser); - } -| pp_tokens { - $$ = $1; - } + /* empty */ +| pp_tokens ; - pp_tokens: - TOKEN { - $$ = _token_list_create (parser); - _token_list_append ($$, $1.type, $1.value); - } -| pp_tokens TOKEN { - _token_list_append ($1, $2.type, $2.value); - $$ = $1; - } + preprocessing_token +| pp_tokens preprocessing_token ; +preprocessing_token: + IDENTIFIER +| punctuator +| OTHER +; + +punctuator: + '[' +| ']' +| '(' +| ')' +| '{' +| '}' +| '.' +| '&' +| '*' +| '+' +| '-' +| '~' +| '!' +| '/' +| '%' +| LEFT_SHIFT +| RIGHT_SHIFT +| '<' +| '>' +| LESS_OR_EQUAL +| GREATER_OR_EQUAL +| EQUAL +| NOT_EQUAL +| '^' +| '|' +| AND +| OR +| ';' +| ',' +| PASTE +; + + %% string_list_t * diff --git a/tests/glcpp-test b/tests/glcpp-test index 022a2367121..868b03cce83 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -1,9 +1,12 @@ #!/bin/sh +set -e + +echo "Caution: These results are just verifying parse-ability, not correctness!" for test in *.c; do echo "Testing $test" ../glcpp < $test > $test.out gcc -E $test -o $test.gcc grep -v '^#' < $test.gcc > $test.expected - diff -B -u $test.expected $test.out +# diff -B -u $test.expected $test.out done From 9bb796f33ac67abdf6c0bf55a06b0d8448caa3d3 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 14:40:47 -0700 Subject: [PATCH 085/148] Add xtalloc_reference. Yet another talloc wrapper that should come in handy. --- glcpp.h | 6 ++++++ xtalloc.c | 15 +++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/glcpp.h b/glcpp.h index 503731b85b3..6171ce8b4a0 100644 --- a/glcpp.h +++ b/glcpp.h @@ -164,4 +164,10 @@ xtalloc_strndup (const void *t, const char *p, size_t n); char * xtalloc_asprintf (const void *t, const char *fmt, ...); +void * +_xtalloc_reference_loc (const void *context, + const void *ptr, const char *location); + +#define xtalloc_reference(ctx, ptr) (_TALLOC_TYPEOF(ptr))_xtalloc_reference_loc((ctx),(ptr), __location__) + #endif diff --git a/xtalloc.c b/xtalloc.c index e52d12ac6b2..656ac2d6cb5 100644 --- a/xtalloc.c +++ b/xtalloc.c @@ -82,3 +82,18 @@ xtalloc_asprintf (const void *t, const char *fmt, ...) va_end(ap); return ret; } + +void * +_xtalloc_reference_loc (const void *context, + const void *ptr, const char *location) +{ + void *ret; + + ret = _talloc_reference_loc (context, ptr, location); + if (ret == NULL) { + fprintf (stderr, "Out of memory.\n"); + exit (1); + } + + return ret; +} From 80dc60b9c3529cf438948d50b9619e8af2fad880 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 14:42:00 -0700 Subject: [PATCH 086/148] Delete some trailing whitespace. This pernicious stuff managed to sneak in on us. --- glcpp-parse.y | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index ebb28ed1965..c53370a89ad 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -95,7 +95,7 @@ _glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, int condition); static void _glcpp_parser_skip_stack_change_if (glcpp_parser_t *parser, const char *type, int condition); - + static void _glcpp_parser_skip_stack_pop (glcpp_parser_t *parser); @@ -243,7 +243,7 @@ _string_list_append_item (string_list_t *list, const char *str) node = xtalloc (list, string_node_t); node->str = xtalloc_strdup (node, str); - + node->next = NULL; if (list->head == NULL) { @@ -404,7 +404,7 @@ _token_list_append_list (token_list_t *list, token_list_t *tail) list->tail = tail->tail; } - + void yyerror (void *scanner, const char *error) { @@ -733,7 +733,7 @@ _glcpp_parser_skip_stack_change_if (glcpp_parser_t *parser, const char *type, parser->skip_stack->type = SKIP_TO_ENDIF; } } - + static void _glcpp_parser_skip_stack_pop (glcpp_parser_t *parser) { From 808401fd79eea9fa2c965f9f235a753c0cb0d920 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 14:52:43 -0700 Subject: [PATCH 087/148] Store parsed tokens as token list and print all text lines. Still not doing any macro expansion just yet. But it should be fairly easy from here. --- glcpp-parse.y | 227 +++++++++++++++++++++++++++++++++++------------ glcpp.h | 27 ++++-- tests/glcpp-test | 5 +- 3 files changed, 195 insertions(+), 64 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index c53370a89ad..991b8a0b856 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -77,15 +77,29 @@ _argument_list_length (argument_list_t *list); token_list_t * _argument_list_member_at (argument_list_t *list, int index); +/* Note: This function talloc_steal()s the str pointer. */ +token_t * +_token_create_str (void *ctx, int type, char *str); + +token_t * +_token_create_ival (void *ctx, int type, int ival); + token_list_t * _token_list_create (void *ctx); +/* Note: This function add a talloc_reference() to token. + * + * You may want to talloc_unlink any current reference if you no + * longer need it. */ void -_token_list_append (token_list_t *list, int type, const char *value); +_token_list_append (token_list_t *list, token_t *token); void _token_list_append_list (token_list_t *list, token_list_t *tail); +void +_token_list_print (token_list_t *list); + static void glcpp_parser_pop_expansion (glcpp_parser_t *parser); @@ -107,12 +121,9 @@ glcpp_parser_lex (glcpp_parser_t *parser); %} %union { - intmax_t imaxval; int ival; char *str; - argument_list_t *argument_list; - string_list_t *string_list; - token_t token; + token_t *token; token_list_t *token_list; } @@ -121,6 +132,10 @@ glcpp_parser_lex (glcpp_parser_t *parser); %token HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH IDENTIFIER NEWLINE OTHER HASH_UNDEF %token LEFT_SHIFT RIGHT_SHIFT LESS_OR_EQUAL GREATER_OR_EQUAL EQUAL NOT_EQUAL AND OR PASTE +%type punctuator +%type IDENTIFIER OTHER +%type preprocessing_token +%type pp_tokens replacement_list text_line /* Stale stuff just to allow code to compile. */ %token IDENTIFIER_FINALIZED FUNC_MACRO OBJ_MACRO @@ -134,7 +149,11 @@ input: line: control_line -| text_line +| text_line { + _token_list_print ($1); + printf ("\n"); + talloc_free ($1); + } | HASH non_directive ; @@ -152,7 +171,7 @@ identifier_list: ; text_line: - NEWLINE + NEWLINE { $$ = NULL; } | pp_tokens NEWLINE ; @@ -161,55 +180,68 @@ non_directive: ; replacement_list: - /* empty */ + /* empty */ { $$ = NULL; } | pp_tokens ; pp_tokens: - preprocessing_token -| pp_tokens preprocessing_token + preprocessing_token { + $$ = _token_list_create (parser); + _token_list_append ($$, $1); + talloc_unlink (parser, $1); + } +| pp_tokens preprocessing_token { + $$ = $1; + _token_list_append ($$, $2); + talloc_unlink (parser, $2); + } ; preprocessing_token: - IDENTIFIER -| punctuator -| OTHER + IDENTIFIER { + $$ = _token_create_str (parser, IDENTIFIER, $1); + } +| punctuator { + $$ = _token_create_ival (parser, $1, $1); + } +| OTHER { + $$ = _token_create_str (parser, OTHER, $1); + } ; punctuator: - '[' -| ']' -| '(' -| ')' -| '{' -| '}' -| '.' -| '&' -| '*' -| '+' -| '-' -| '~' -| '!' -| '/' -| '%' -| LEFT_SHIFT -| RIGHT_SHIFT -| '<' -| '>' -| LESS_OR_EQUAL -| GREATER_OR_EQUAL -| EQUAL -| NOT_EQUAL -| '^' -| '|' -| AND -| OR -| ';' -| ',' -| PASTE + '[' { $$ = '['; } +| ']' { $$ = ']'; } +| '(' { $$ = '('; } +| ')' { $$ = ')'; } +| '{' { $$ = '{'; } +| '}' { $$ = '}'; } +| '.' { $$ = '.'; } +| '&' { $$ = '&'; } +| '*' { $$ = '*'; } +| '+' { $$ = '+'; } +| '-' { $$ = '-'; } +| '~' { $$ = '~'; } +| '!' { $$ = '!'; } +| '/' { $$ = '/'; } +| '%' { $$ = '%'; } +| LEFT_SHIFT { $$ = LEFT_SHIFT; } +| RIGHT_SHIFT { $$ = RIGHT_SHIFT; } +| '<' { $$ = '<'; } +| '>' { $$ = '>'; } +| LESS_OR_EQUAL { $$ = LESS_OR_EQUAL; } +| GREATER_OR_EQUAL { $$ = GREATER_OR_EQUAL; } +| EQUAL { $$ = EQUAL; } +| NOT_EQUAL { $$ = NOT_EQUAL; } +| '^' { $$ = '^'; } +| '|' { $$ = '|'; } +| AND { $$ = AND; } +| OR { $$ = OR; } +| ';' { $$ = ';'; } +| ',' { $$ = ','; } +| PASTE { $$ = PASTE; } ; - %% string_list_t * @@ -361,6 +393,77 @@ _argument_list_member_at (argument_list_t *list, int index) return NULL; } +/* Note: This function talloc_steal()s the str pointer. */ +token_t * +_token_create_str (void *ctx, int type, char *str) +{ + token_t *token; + + token = xtalloc (ctx, token_t); + token->type = type; + token->value.str = talloc_steal (token, str); + + return token; +} + +token_t * +_token_create_ival (void *ctx, int type, int ival) +{ + token_t *token; + + token = xtalloc (ctx, token_t); + token->type = type; + token->value.ival = ival; + + return token; +} + +void +_token_print (token_t *token) +{ + if (token->type < 256) { + printf ("%c", token->type); + return; + } + + switch (token->type) { + case IDENTIFIER: + case OTHER: + printf ("%s", token->value.str); + break; + case LEFT_SHIFT: + printf ("<<"); + break; + case RIGHT_SHIFT: + printf (">>"); + break; + case LESS_OR_EQUAL: + printf ("<="); + break; + case GREATER_OR_EQUAL: + printf (">="); + break; + case EQUAL: + printf ("=="); + break; + case NOT_EQUAL: + printf ("!="); + break; + case AND: + printf ("&&"); + break; + case OR: + printf ("||"); + break; + case PASTE: + printf ("##"); + break; + default: + fprintf (stderr, "Error: Don't know how to print token type %d\n", token->type); + break; + } +} + token_list_t * _token_list_create (void *ctx) { @@ -374,13 +477,12 @@ _token_list_create (void *ctx) } void -_token_list_append (token_list_t *list, int type, const char *value) +_token_list_append (token_list_t *list, token_t *token) { token_node_t *node; node = xtalloc (list, token_node_t); - node->type = type; - node->value = xtalloc_strdup (list, value); + node->token = xtalloc_reference (list, token); node->next = NULL; @@ -405,6 +507,21 @@ _token_list_append_list (token_list_t *list, token_list_t *tail) list->tail = tail->tail; } +void +_token_list_print (token_list_t *list) +{ + token_node_t *node; + + if (list == NULL) + return; + + for (node = list->head; node; node = node->next) { + _token_print (node->token); + if (node->next) + printf (" "); + } +} + void yyerror (void *scanner, const char *error) { @@ -598,7 +715,8 @@ _expand_function_macro (glcpp_parser_t *parser, expanded = _token_list_create (macro); for (i = macro->replacements->head; i; i = i->next) { - if (_string_list_contains (macro->parameters, i->value, + if (_string_list_contains (macro->parameters, + i->token->value.str, ¶meter_index)) { token_list_t *argument; @@ -606,11 +724,10 @@ _expand_function_macro (glcpp_parser_t *parser, parameter_index); for (j = argument->head; j; j = j->next) { - _token_list_append (expanded, j->type, - j->value); + _token_list_append (expanded, j->token); } } else { - _token_list_append (expanded, i->type, i->value); + _token_list_append (expanded, i->token); } } @@ -644,10 +761,10 @@ glcpp_parser_lex (glcpp_parser_t *parser) expansion->replacements = replacements->next; - token = replacements->value; + token = replacements->token->value.str; /* Implement token pasting. */ - if (replacements->next && strcmp (replacements->next->value, "##") == 0) { + if (replacements->next && strcmp (replacements->next->token->value.str, "##") == 0) { token_node_t *next_node; next_node = replacements->next->next; @@ -658,7 +775,7 @@ glcpp_parser_lex (glcpp_parser_t *parser) } token = xtalloc_asprintf (parser, "%s%s", - token, next_node->value); + token, next_node->token->value.str); expansion->replacements = next_node->next; } @@ -671,7 +788,7 @@ glcpp_parser_lex (glcpp_parser_t *parser) yylval.str = xtalloc_strdup (parser, token); /* Carefully refuse to expand any finalized identifier. */ - if (replacements->type == IDENTIFIER_FINALIZED) + if (replacements->token->type == IDENTIFIER_FINALIZED) return IDENTIFIER_FINALIZED; switch (glcpp_parser_classify_token (parser, yylval.str, diff --git a/glcpp.h b/glcpp.h index 6171ce8b4a0..261254a17c4 100644 --- a/glcpp.h +++ b/glcpp.h @@ -44,21 +44,34 @@ typedef struct string_list { string_node_t *tail; } string_list_t; -typedef struct token { +typedef struct token token_t; +typedef struct token_list token_list_t; + +typedef union YYSTYPE +{ + int ival; + char *str; + token_t *token; + token_list_t *token_list; +} YYSTYPE; + +# define YYSTYPE_IS_TRIVIAL 1 +# define YYSTYPE_IS_DECLARED 1 + +struct token { int type; - char *value; -} token_t; + YYSTYPE value; +}; typedef struct token_node { - int type; - const char *value; + token_t *token; struct token_node *next; } token_node_t; -typedef struct token_list { +struct token_list { token_node_t *head; token_node_t *tail; -} token_list_t; +}; typedef struct argument_node { token_list_t *argument; diff --git a/tests/glcpp-test b/tests/glcpp-test index 868b03cce83..34cca883301 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -7,6 +7,7 @@ for test in *.c; do echo "Testing $test" ../glcpp < $test > $test.out gcc -E $test -o $test.gcc - grep -v '^#' < $test.gcc > $test.expected -# diff -B -u $test.expected $test.out +# grep -v '^#' < $test.gcc > $test.expected + grep -v '^[ ]*#' < $test > $test.expected + diff -w -u $test.expected $test.out done From 9fb8b7a495c9dc6f9a62cf82300fae5925af92fc Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 15:04:32 -0700 Subject: [PATCH 088/148] Make the lexer pass whitespace through (as OTHER tokens) for text lines. With this change, we can recreate the original text-line input exactly. Previously we were inserting a space between every pair of tokens so our output had a lot more whitespace than our input. With this change, we can drop the "-b" option to diff and match the input exactly. --- glcpp-lex.l | 122 ++++++++++++++++++++++++++++++++--------------- glcpp-parse.y | 2 - tests/glcpp-test | 2 +- 3 files changed, 84 insertions(+), 42 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index f1dd11ea9bd..7b5cdd57a0f 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -32,6 +32,21 @@ %option reentrant noyywrap %option extra-type="glcpp_parser_t *" + /* This lexer has two states: + * + * The CONTROL state is for control lines (directives) + * It lexes exactly as specified in the C99 specification. + * + * The INITIAL state is for input lines. In this state, we + * make the OTHER token much more broad in that it now + * includes tokens consisting entirely of whitespace. This + * allows us to pass text through verbatim. It avoids the + * "inadvertent token pasting" problem that would occur if we + * just printed tokens, while also avoiding excess whitespace + * insertion in the output.*/ + +%x CONTROL + SPACE [[:space:]] NONSPACE [^[:space:]] NEWLINE [\n] @@ -48,75 +63,104 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? %% {HASH}define{HSPACE}+/{IDENTIFIER}"(" { + BEGIN CONTROL; return HASH_DEFINE_FUNC; } {HASH}define { + BEGIN CONTROL; return HASH_DEFINE_OBJ; } {HASH}undef { + BEGIN CONTROL; return HASH_UNDEF; } {HASH} { + BEGIN CONTROL; return HASH; } +{IDENTIFIER} { + yylval.str = xtalloc_strdup (yyextra, yytext); + return IDENTIFIER; +} + +"<<" { + return LEFT_SHIFT; +} + +">>" { + return RIGHT_SHIFT; +} + +"<=" { + return LESS_OR_EQUAL; +} + +">=" { + return GREATER_OR_EQUAL; +} + +"==" { + return EQUAL; +} + +"!=" { + return NOT_EQUAL; +} + +"&&" { + return AND; +} + +"||" { + return OR; +} + +"##" { + return PASTE; +} + +{PUNCTUATION} { + return yytext[0]; +} + +{OTHER} { + yylval.str = xtalloc_strdup (yyextra, yytext); + return OTHER; +} + +{HSPACE}+ + +\n { + BEGIN INITIAL; + return NEWLINE; +} + {IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); return IDENTIFIER; } -"<<" { - return LEFT_SHIFT; +{OTHER}+ { + yylval.str = xtalloc_strdup (yyextra, yytext); + return OTHER; } -">>" { - return RIGHT_SHIFT; -} - -"<=" { - return LESS_OR_EQUAL; -} - -">=" { - return GREATER_OR_EQUAL; -} - -"==" { - return EQUAL; -} - -"!=" { - return NOT_EQUAL; -} - -"&&" { - return AND; -} - -"||" { - return OR; -} - -"##" { - return PASTE; -} - -{PUNCTUATION} { - return yytext[0]; +{HSPACE}+ { + yylval.str = xtalloc_strdup (yyextra, yytext); + return OTHER; } \n { return NEWLINE; } -{OTHER} { +. { yylval.str = xtalloc_strdup (yyextra, yytext); return OTHER; } -{HSPACE}+ - %% diff --git a/glcpp-parse.y b/glcpp-parse.y index 991b8a0b856..957421b864e 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -517,8 +517,6 @@ _token_list_print (token_list_t *list) for (node = list->head; node; node = node->next) { _token_print (node->token); - if (node->next) - printf (" "); } } diff --git a/tests/glcpp-test b/tests/glcpp-test index 34cca883301..8074e471197 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -9,5 +9,5 @@ for test in *.c; do gcc -E $test -o $test.gcc # grep -v '^#' < $test.gcc > $test.expected grep -v '^[ ]*#' < $test > $test.expected - diff -w -u $test.expected $test.out + diff -u $test.expected $test.out done From ae6517f4a83981ae363bbbfe439ec23e8deb04b1 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 15:24:59 -0700 Subject: [PATCH 089/148] Implement expansion of object-like macros. For this we add an "active" string_list_t to the parser. This makes the current expansion_list_t in the parser obsolete, but we don't remove that yet. With this change we can now start passing some actual tests, so we turn on real testing in the test suite again. I expect to implement things more or less in the same order as before, so the test suite now halts on first error. With this change the first 8 tests in the suite pass, (object-like macros with chaining and recursion). --- glcpp-parse.y | 128 ++++++++++++++++++++++++++++++++++++++++------- glcpp.h | 1 + tests/glcpp-test | 5 +- 3 files changed, 112 insertions(+), 22 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 957421b864e..b3ef177a6da 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -59,6 +59,12 @@ _string_list_append_item (string_list_t *list, const char *str); void _string_list_append_list (string_list_t *list, string_list_t *tail); +void +_string_list_push (string_list_t *list, const char *str); + +void +_string_list_pop (string_list_t *list); + int _string_list_contains (string_list_t *list, const char *member, int *index); @@ -98,7 +104,8 @@ void _token_list_append_list (token_list_t *list, token_list_t *tail); void -_token_list_print (token_list_t *list); +_glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, + token_list_t *list); static void glcpp_parser_pop_expansion (glcpp_parser_t *parser); @@ -144,21 +151,24 @@ glcpp_parser_lex (glcpp_parser_t *parser); input: /* empty */ -| input line +| input line { + printf ("\n"); + } ; line: control_line | text_line { - _token_list_print ($1); - printf ("\n"); + _glcpp_parser_print_expanded_token_list (parser, $1); talloc_free ($1); } | HASH non_directive ; control_line: - HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE + HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE { + _define_object_macro (parser, $2, $3); + } | HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE | HASH_DEFINE_FUNC IDENTIFIER '(' identifier_list ')' replacement_list NEWLINE | HASH_UNDEF IDENTIFIER NEWLINE @@ -287,6 +297,42 @@ _string_list_append_item (string_list_t *list, const char *str) list->tail = node; } +void +_string_list_push (string_list_t *list, const char *str) +{ + string_node_t *node; + + node = xtalloc (list, string_node_t); + node->str = xtalloc_strdup (node, str); + node->next = list->head; + + if (list->tail == NULL) { + list->tail = node; + } + list->head = node; +} + +void +_string_list_pop (string_list_t *list) +{ + string_node_t *node; + + node = list->head; + + if (node == NULL) { + fprintf (stderr, "Internal error: _string_list_pop called on an empty list.\n"); + exit (1); + } + + list->head = node->next; + if (list->tail == node) { + assert (node->next == NULL); + list->tail = NULL; + } + + talloc_free (node); +} + int _string_list_contains (string_list_t *list, const char *member, int *index) { @@ -507,19 +553,6 @@ _token_list_append_list (token_list_t *list, token_list_t *tail) list->tail = tail->tail; } -void -_token_list_print (token_list_t *list) -{ - token_node_t *node; - - if (list == NULL) - return; - - for (node = list->head; node; node = node->next) { - _token_print (node->token); - } -} - void yyerror (void *scanner, const char *error) { @@ -536,6 +569,7 @@ glcpp_parser_create (void) glcpp_lex_init_extra (parser, &parser->scanner); parser->defines = hash_table_ctor (32, hash_table_string_hash, hash_table_string_compare); + parser->active = _string_list_create (parser); parser->expansions = NULL; parser->just_printed_separator = 1; @@ -605,6 +639,64 @@ glcpp_parser_classify_token (glcpp_parser_t *parser, return TOKEN_CLASS_OBJ_MACRO; } +void +_glcpp_parser_print_expanded_token (glcpp_parser_t *parser, + token_t *token) +{ + const char *identifier; + macro_t *macro; + + /* We only expand identifiers */ + if (token->type != IDENTIFIER) { + _token_print (token); + return; + } + + /* Look up this identifier in the hash table. */ + identifier = token->value.str; + macro = hash_table_find (parser->defines, identifier); + + /* Not a macro, so just print directly. */ + if (macro == NULL) { + printf ("%s", identifier); + return; + } + + /* We're not (yet) supporting function-like macros. */ + if (macro->is_function) { + printf ("%s", identifier); + return; + } + + /* Finally, don't expand this macro if we're already actively + * expanding it, (to avoid infinite recursion). */ + if (_string_list_contains (parser->active, identifier, NULL)) { + printf ("%s", identifier); + return; + } + + _string_list_push (parser->active, identifier); + _glcpp_parser_print_expanded_token_list (parser, + macro->replacements); + _string_list_pop (parser->active); +} + +void +_glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, + token_list_t *list) +{ + token_node_t *node; + + if (list == NULL) + return; + + for (node = list->head; node; node = node->next) { + _glcpp_parser_print_expanded_token (parser, node->token); + if (node->next) + printf (" "); + } +} + void _define_object_macro (glcpp_parser_t *parser, const char *identifier, diff --git a/glcpp.h b/glcpp.h index 261254a17c4..bd599d73011 100644 --- a/glcpp.h +++ b/glcpp.h @@ -124,6 +124,7 @@ typedef struct skip_node { struct glcpp_parser { yyscan_t scanner; struct hash_table *defines; + string_list_t *active; expansion_node_t *expansions; int just_printed_separator; int need_newline; diff --git a/tests/glcpp-test b/tests/glcpp-test index 8074e471197..63041552104 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -1,13 +1,10 @@ #!/bin/sh set -e -echo "Caution: These results are just verifying parse-ability, not correctness!" - for test in *.c; do echo "Testing $test" ../glcpp < $test > $test.out gcc -E $test -o $test.gcc -# grep -v '^#' < $test.gcc > $test.expected - grep -v '^[ ]*#' < $test > $test.expected + grep -v '^#' < $test.gcc > $test.expected diff -u $test.expected $test.out done From e6fb7827c96451d4a09dfda31979a6b9cb27301e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 15:28:58 -0700 Subject: [PATCH 090/148] Implement #undef. Which is as simple as copying the former action back from the git history. Now all tests through test 11 pass. --- glcpp-parse.y | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index b3ef177a6da..830a6232d80 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -171,7 +171,17 @@ control_line: } | HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE | HASH_DEFINE_FUNC IDENTIFIER '(' identifier_list ')' replacement_list NEWLINE -| HASH_UNDEF IDENTIFIER NEWLINE +| HASH_UNDEF IDENTIFIER NEWLINE { + string_list_t *macro = hash_table_find (parser->defines, $2); + if (macro) { + /* XXX: Need hash table to support a real way + * to remove an element rather than prefixing + * a new node with data of NULL like this. */ + hash_table_insert (parser->defines, NULL, $2); + talloc_free (macro); + } + talloc_free ($2); + } | HASH NEWLINE ; From b1854fdfb6b567fa61d544d8080e2acb4cc78dc1 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 16:28:26 -0700 Subject: [PATCH 091/148] Implement simplified substitution for function-like macro invocation. This supports function-like macro invocation but without any argument substitution. This now makes test 11 through 14 pass. --- glcpp-lex.l | 14 +++- glcpp-parse.y | 174 ++++++++++++++++++++++++++++++++++++++++++++------ glcpp.h | 1 + 3 files changed, 168 insertions(+), 21 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 7b5cdd57a0f..b1980742d39 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -144,6 +144,18 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? return IDENTIFIER; } +"(" { + return '('; +} + +")" { + return ')'; +} + +"," { + return ','; +} + {OTHER}+ { yylval.str = xtalloc_strdup (yyextra, yytext); return OTHER; @@ -151,7 +163,7 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? {HSPACE}+ { yylval.str = xtalloc_strdup (yyextra, yytext); - return OTHER; + return SPACE; } \n { diff --git a/glcpp-parse.y b/glcpp-parse.y index 830a6232d80..60b414e43a7 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -127,20 +127,14 @@ glcpp_parser_lex (glcpp_parser_t *parser); %} -%union { - int ival; - char *str; - token_t *token; - token_list_t *token_list; -} - %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH IDENTIFIER NEWLINE OTHER HASH_UNDEF +%token HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_UNDEF IDENTIFIER NEWLINE OTHER SPACE %token LEFT_SHIFT RIGHT_SHIFT LESS_OR_EQUAL GREATER_OR_EQUAL EQUAL NOT_EQUAL AND OR PASTE %type punctuator -%type IDENTIFIER OTHER +%type IDENTIFIER OTHER SPACE +%type identifier_list %type preprocessing_token %type pp_tokens replacement_list text_line @@ -169,8 +163,12 @@ control_line: HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE { _define_object_macro (parser, $2, $3); } -| HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE -| HASH_DEFINE_FUNC IDENTIFIER '(' identifier_list ')' replacement_list NEWLINE +| HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE { + _define_function_macro (parser, $2, NULL, $5); + } +| HASH_DEFINE_FUNC IDENTIFIER '(' identifier_list ')' replacement_list NEWLINE { + _define_function_macro (parser, $2, $4, $6); + } | HASH_UNDEF IDENTIFIER NEWLINE { string_list_t *macro = hash_table_find (parser->defines, $2); if (macro) { @@ -186,8 +184,16 @@ control_line: ; identifier_list: - IDENTIFIER -| identifier_list ',' IDENTIFIER + IDENTIFIER { + $$ = _string_list_create (parser); + _string_list_append_item ($$, $1); + talloc_steal ($$, $1); + } +| identifier_list ',' IDENTIFIER { + $$ = $1; + _string_list_append_item ($$, $3); + talloc_steal ($$, $3); + } ; text_line: @@ -227,6 +233,9 @@ preprocessing_token: | OTHER { $$ = _token_create_str (parser, OTHER, $1); } +| SPACE { + $$ = _token_create_str (parser, OTHER, $1); + } ; punctuator: @@ -649,7 +658,14 @@ glcpp_parser_classify_token (glcpp_parser_t *parser, return TOKEN_CLASS_OBJ_MACRO; } -void +/* Print a non-macro token, or the expansion of an object-like macro. + * + * Returns 0 if this token is completely printed. + * + * Returns 1 in the case that 'token' is a function-like macro that + * needs further expansion. + */ +static int _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, token_t *token) { @@ -659,7 +675,7 @@ _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, /* We only expand identifiers */ if (token->type != IDENTIFIER) { _token_print (token); - return; + return 0; } /* Look up this identifier in the hash table. */ @@ -669,20 +685,135 @@ _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, /* Not a macro, so just print directly. */ if (macro == NULL) { printf ("%s", identifier); - return; + return 0; } - /* We're not (yet) supporting function-like macros. */ + /* For function-like macros return 1 for further processing. */ if (macro->is_function) { - printf ("%s", identifier); - return; + return 1; } /* Finally, don't expand this macro if we're already actively * expanding it, (to avoid infinite recursion). */ if (_string_list_contains (parser->active, identifier, NULL)) { + printf ("%s", identifier); + return 0; + } + + _string_list_push (parser->active, identifier); + _glcpp_parser_print_expanded_token_list (parser, + macro->replacements); + _string_list_pop (parser->active); + + return 0; +} + +typedef enum function_status +{ + FUNCTION_STATUS_SUCCESS, + FUNCTION_NOT_A_FUNCTION, + FUNCTION_UNBALANCED_PARENTHESES +} function_status_t; + +/* Find a set of function-like macro arguments by looking for a + * balanced set of parentheses. Upon return *node will be the last + * consumed node, such that further processing can continue with + * node->next. + * + * Return values: + * + * FUNCTION_STATUS_SUCCESS: + * + * Successfully parsed a set of function arguments. + * + * FUNCTION_NOT_A_FUNCTION: + * + * Macro name not followed by a '('. This is not an error, but + * simply that the macro name should be treated as a non-macro. + * + * FUNCTION_UNBLANCED_PARENTHESES + * + * Macro name is not followed by a balanced set of parentheses. + */ +static function_status_t +_find_arguments (token_node_t **node_ret, argument_list_t **arguments) +{ + token_node_t *node = *node_ret, *last; + int paren_count; + int arg_count; + + last = node; + node = node->next; + + /* Ignore whitespace before first parenthesis. */ + while (node && node->token->type == SPACE) + node = node->next; + + if (node == NULL || node->token->type != '(') + return FUNCTION_NOT_A_FUNCTION; + + paren_count = 0; + arg_count = 0; + do { + if (node->token->type == '(') + { + paren_count++; + } + else if (node->token->type == ')') + { + paren_count--; + } + else if (node->token->type == ',' && + paren_count == 1) + { + arg_count++; + } + + last = node; + node = node->next; + + } while (node && paren_count); + + if (node && paren_count) + return FUNCTION_UNBALANCED_PARENTHESES; + + *node_ret = last; + + return FUNCTION_STATUS_SUCCESS; +} + +/* Prints the expansion of *node (consuming further tokens from the + * list as necessary). Upon return *node will be the last consumed + * node, such that further processing can continue with node->next. */ +static void +_glcpp_parser_print_expanded_function (glcpp_parser_t *parser, + token_node_t **node_ret) +{ + macro_t *macro; + token_node_t *node; + const char *identifier; + argument_list_t *arguments; + function_status_t status; + + node = *node_ret; + identifier = node->token->value.str; + + macro = hash_table_find (parser->defines, identifier); + + assert (macro->is_function); + + status = _find_arguments (node_ret, &arguments); + + switch (status) { + case FUNCTION_STATUS_SUCCESS: + break; + case FUNCTION_NOT_A_FUNCTION: printf ("%s", identifier); return; + case FUNCTION_UNBALANCED_PARENTHESES: + fprintf (stderr, "Error: Macro %s call has unbalanced parentheses\n", + identifier); + exit (1); } _string_list_push (parser->active, identifier); @@ -696,12 +827,15 @@ _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, token_list_t *list) { token_node_t *node; + function_status_t function_status; if (list == NULL) return; for (node = list->head; node; node = node->next) { - _glcpp_parser_print_expanded_token (parser, node->token); + if (_glcpp_parser_print_expanded_token (parser, node->token)) + _glcpp_parser_print_expanded_function (parser, &node); + if (node->next) printf (" "); } diff --git a/glcpp.h b/glcpp.h index bd599d73011..043098b1347 100644 --- a/glcpp.h +++ b/glcpp.h @@ -51,6 +51,7 @@ typedef union YYSTYPE { int ival; char *str; + string_list_t *string_list; token_t *token; token_list_t *token_list; } YYSTYPE; From f34a0009dd07dbca4de5491744bd3618eae9458e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 16:59:02 -0700 Subject: [PATCH 092/148] Pass through literal space values from replacement lists. This makes test 15 pass and also dramatically simplifies the lexer. We were previously using a CONTROL state in the lexer to only emit SPACE tokens when on text lines. But that's not actually what we want. We need SPACE tokens in the replacement lists as well. Instead of a lexer state for this, we now simply set a "space_tokens" flag whenever we start constructing a pp_tokens list and clear the flag whenever we see a '#' introducing a directive. Much cleaner this way. --- glcpp-lex.l | 131 ++++++++++++++++---------------------------------- glcpp-parse.y | 10 ++-- glcpp.h | 1 + 3 files changed, 48 insertions(+), 94 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index b1980742d39..f6d0c8b7d67 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -32,21 +32,6 @@ %option reentrant noyywrap %option extra-type="glcpp_parser_t *" - /* This lexer has two states: - * - * The CONTROL state is for control lines (directives) - * It lexes exactly as specified in the C99 specification. - * - * The INITIAL state is for input lines. In this state, we - * make the OTHER token much more broad in that it now - * includes tokens consisting entirely of whitespace. This - * allows us to pass text through verbatim. It avoids the - * "inadvertent token pasting" problem that would occur if we - * just printed tokens, while also avoiding excess whitespace - * insertion in the output.*/ - -%x CONTROL - SPACE [[:space:]] NONSPACE [^[:space:]] NEWLINE [\n] @@ -63,97 +48,68 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? %% {HASH}define{HSPACE}+/{IDENTIFIER}"(" { - BEGIN CONTROL; + yyextra->space_tokens = 0; return HASH_DEFINE_FUNC; } {HASH}define { - BEGIN CONTROL; + yyextra->space_tokens = 0; return HASH_DEFINE_OBJ; } {HASH}undef { - BEGIN CONTROL; + yyextra->space_tokens = 0; return HASH_UNDEF; } {HASH} { - BEGIN CONTROL; + yyextra->space_tokens = 0; return HASH; } -{IDENTIFIER} { - yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; -} - -"<<" { - return LEFT_SHIFT; -} - -">>" { - return RIGHT_SHIFT; -} - -"<=" { - return LESS_OR_EQUAL; -} - -">=" { - return GREATER_OR_EQUAL; -} - -"==" { - return EQUAL; -} - -"!=" { - return NOT_EQUAL; -} - -"&&" { - return AND; -} - -"||" { - return OR; -} - -"##" { - return PASTE; -} - -{PUNCTUATION} { - return yytext[0]; -} - -{OTHER} { - yylval.str = xtalloc_strdup (yyextra, yytext); - return OTHER; -} - -{HSPACE}+ - -\n { - BEGIN INITIAL; - return NEWLINE; -} - {IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); return IDENTIFIER; } -"(" { - return '('; +"<<" { + return LEFT_SHIFT; } -")" { - return ')'; +">>" { + return RIGHT_SHIFT; } -"," { - return ','; +"<=" { + return LESS_OR_EQUAL; +} + +">=" { + return GREATER_OR_EQUAL; +} + +"==" { + return EQUAL; +} + +"!=" { + return NOT_EQUAL; +} + +"&&" { + return AND; +} + +"||" { + return OR; +} + +"##" { + return PASTE; +} + +{PUNCTUATION} { + return yytext[0]; } {OTHER}+ { @@ -162,17 +118,14 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? } {HSPACE}+ { - yylval.str = xtalloc_strdup (yyextra, yytext); - return SPACE; + if (yyextra->space_tokens) { + yylval.str = xtalloc_strdup (yyextra, yytext); + return SPACE; + } } \n { return NEWLINE; } -. { - yylval.str = xtalloc_strdup (yyextra, yytext); - return OTHER; -} - %% diff --git a/glcpp-parse.y b/glcpp-parse.y index 60b414e43a7..a1981995fd0 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -160,7 +160,7 @@ line: ; control_line: - HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE { + HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE { _define_object_macro (parser, $2, $3); } | HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE { @@ -212,6 +212,7 @@ replacement_list: pp_tokens: preprocessing_token { + parser->space_tokens = 1; $$ = _token_list_create (parser); _token_list_append ($$, $1); talloc_unlink (parser, $1); @@ -234,7 +235,7 @@ preprocessing_token: $$ = _token_create_str (parser, OTHER, $1); } | SPACE { - $$ = _token_create_str (parser, OTHER, $1); + $$ = _token_create_str (parser, SPACE, $1); } ; @@ -494,6 +495,7 @@ _token_print (token_t *token) switch (token->type) { case IDENTIFIER: case OTHER: + case SPACE: printf ("%s", token->value.str); break; case LEFT_SHIFT: @@ -589,6 +591,7 @@ glcpp_parser_create (void) parser->defines = hash_table_ctor (32, hash_table_string_hash, hash_table_string_compare); parser->active = _string_list_create (parser); + parser->space_tokens = 1; parser->expansions = NULL; parser->just_printed_separator = 1; @@ -835,9 +838,6 @@ _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, for (node = list->head; node; node = node->next) { if (_glcpp_parser_print_expanded_token (parser, node->token)) _glcpp_parser_print_expanded_function (parser, &node); - - if (node->next) - printf (" "); } } diff --git a/glcpp.h b/glcpp.h index 043098b1347..f3760fa7a41 100644 --- a/glcpp.h +++ b/glcpp.h @@ -126,6 +126,7 @@ struct glcpp_parser { yyscan_t scanner; struct hash_table *defines; string_list_t *active; + int space_tokens; expansion_node_t *expansions; int just_printed_separator; int need_newline; From f8ec4e0be86eee05f5a661a01864247fcd1a6b30 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 17:06:17 -0700 Subject: [PATCH 093/148] Add a test #0 to ensure that we don't do any inadvertent token pasting. This simply ensures that spaces in input line are preserved. --- tests/000-content-with-spaces.c | 1 + 1 file changed, 1 insertion(+) create mode 100644 tests/000-content-with-spaces.c diff --git a/tests/000-content-with-spaces.c b/tests/000-content-with-spaces.c new file mode 100644 index 00000000000..a7fc918c908 --- /dev/null +++ b/tests/000-content-with-spaces.c @@ -0,0 +1 @@ +this is four tokens From e9397867ddce20a4263949f4b3a488fa99af3041 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 17:08:07 -0700 Subject: [PATCH 094/148] Collapse multiple spaces in input down to a single space. This is what gcc does, and it's actually less work to do this. Previously we were having to save the contents of space tokens as a string, but we don't need to do that now. We extend test #0 to exercise this feature here. --- glcpp-lex.l | 1 - glcpp-parse.y | 10 ++++++---- tests/000-content-with-spaces.c | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index f6d0c8b7d67..516f42dee32 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -119,7 +119,6 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? {HSPACE}+ { if (yyextra->space_tokens) { - yylval.str = xtalloc_strdup (yyextra, yytext); return SPACE; } } diff --git a/glcpp-parse.y b/glcpp-parse.y index a1981995fd0..0460f71f746 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -132,8 +132,8 @@ glcpp_parser_lex (glcpp_parser_t *parser); %token HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_UNDEF IDENTIFIER NEWLINE OTHER SPACE %token LEFT_SHIFT RIGHT_SHIFT LESS_OR_EQUAL GREATER_OR_EQUAL EQUAL NOT_EQUAL AND OR PASTE -%type punctuator -%type IDENTIFIER OTHER SPACE +%type punctuator SPACE +%type IDENTIFIER OTHER %type identifier_list %type preprocessing_token %type pp_tokens replacement_list text_line @@ -235,7 +235,7 @@ preprocessing_token: $$ = _token_create_str (parser, OTHER, $1); } | SPACE { - $$ = _token_create_str (parser, SPACE, $1); + $$ = _token_create_ival (parser, SPACE, SPACE); } ; @@ -495,9 +495,11 @@ _token_print (token_t *token) switch (token->type) { case IDENTIFIER: case OTHER: - case SPACE: printf ("%s", token->value.str); break; + case SPACE: + printf (" "); + break; case LEFT_SHIFT: printf ("<<"); break; diff --git a/tests/000-content-with-spaces.c b/tests/000-content-with-spaces.c index a7fc918c908..696cb3a74fc 100644 --- a/tests/000-content-with-spaces.c +++ b/tests/000-content-with-spaces.c @@ -1 +1 @@ -this is four tokens +this is four tokens From 9ce18cf9837bee379dfd0f52a3df005c1797e544 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 17:32:21 -0700 Subject: [PATCH 095/148] Implement substitution of function parameters in macro calls. This makes tests 16 - 19 pass. --- glcpp-parse.y | 65 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 8 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 0460f71f746..eb93bad85d1 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -405,9 +405,6 @@ _argument_list_append (argument_list_t *list, token_list_t *argument) { argument_node_t *node; - if (argument == NULL || argument->head == NULL) - return; - node = xtalloc (list, argument_node_t); node->argument = argument; @@ -741,8 +738,9 @@ typedef enum function_status * Macro name is not followed by a balanced set of parentheses. */ static function_status_t -_find_arguments (token_node_t **node_ret, argument_list_t **arguments) +_arguments_parse (argument_list_t *arguments, token_node_t **node_ret) { + token_list_t *argument; token_node_t *node = *node_ret, *last; int paren_count; int arg_count; @@ -757,6 +755,8 @@ _find_arguments (token_node_t **node_ret, argument_list_t **arguments) if (node == NULL || node->token->type != '(') return FUNCTION_NOT_A_FUNCTION; + argument = NULL; + paren_count = 0; arg_count = 0; do { @@ -771,7 +771,14 @@ _find_arguments (token_node_t **node_ret, argument_list_t **arguments) else if (node->token->type == ',' && paren_count == 1) { - arg_count++; + argument = NULL; + } + else { + if (argument == NULL) { + argument = _token_list_create (arguments); + _argument_list_append (arguments, argument); + } + _token_list_append (argument, node->token); } last = node; @@ -799,6 +806,9 @@ _glcpp_parser_print_expanded_function (glcpp_parser_t *parser, const char *identifier; argument_list_t *arguments; function_status_t status; + token_list_t *expanded; + token_node_t *i, *j; + int parameter_index; node = *node_ret; identifier = node->token->value.str; @@ -807,7 +817,8 @@ _glcpp_parser_print_expanded_function (glcpp_parser_t *parser, assert (macro->is_function); - status = _find_arguments (node_ret, &arguments); + arguments = _argument_list_create (parser); + status = _arguments_parse (arguments, node_ret); switch (status) { case FUNCTION_STATUS_SUCCESS: @@ -821,10 +832,48 @@ _glcpp_parser_print_expanded_function (glcpp_parser_t *parser, exit (1); } + if (macro->replacements == NULL) { + talloc_free (arguments); + return; + } + + + if (_argument_list_length (arguments) != + _string_list_length (macro->parameters)) + { + fprintf (stderr, + "Error: macro %s invoked with %d arguments (expected %d)\n", + identifier, + _argument_list_length (arguments), + _string_list_length (macro->parameters)); + return; + } + + expanded = _token_list_create (arguments); + + for (i = macro->replacements->head; i; i = i->next) { + if (i->token->type == IDENTIFIER && + _string_list_contains (macro->parameters, + i->token->value.str, + ¶meter_index)) + { + token_list_t *argument; + argument = _argument_list_member_at (arguments, + parameter_index); + for (j = argument->head; j; j = j->next) + { + _token_list_append (expanded, j->token); + } + } else { + _token_list_append (expanded, i->token); + } + } + _string_list_push (parser->active, identifier); - _glcpp_parser_print_expanded_token_list (parser, - macro->replacements); + _glcpp_parser_print_expanded_token_list (parser, expanded); _string_list_pop (parser->active); + + talloc_free (arguments); } void From c7581c2e6e6897eddc55c537c92417b813a8b81e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 17:41:07 -0700 Subject: [PATCH 096/148] Ignore separating whitespace at the beginning of a macro argument. This causes test 16 to pass. Tests 17-20 are also passing now, (though they would probably have passed before this change and simply weren't being run yet). --- glcpp-parse.y | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index eb93bad85d1..ec966580fc4 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -743,7 +743,6 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) token_list_t *argument; token_node_t *node = *node_ret, *last; int paren_count; - int arg_count; last = node; node = node->next; @@ -757,9 +756,7 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) argument = NULL; - paren_count = 0; - arg_count = 0; - do { + for (paren_count = 0; node; last = node, node = node->next) { if (node->token->type == '(') { paren_count++; @@ -767,6 +764,11 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) else if (node->token->type == ')') { paren_count--; + if (paren_count == 0) { + last = node; + node = node->next; + break; + } } else if (node->token->type == ',' && paren_count == 1) @@ -775,16 +777,16 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) } else { if (argument == NULL) { + /* Don't treat initial whitespace as + * part of the arguement. */ + if (node->token->type == SPACE) + continue; argument = _token_list_create (arguments); _argument_list_append (arguments, argument); } _token_list_append (argument, node->token); } - - last = node; - node = node->next; - - } while (node && paren_count); + } if (node && paren_count) return FUNCTION_UNBALANCED_PARENTHESES; From 652fa272ea4bdb9bfe6cd7f8413b3a3b03972987 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 17:45:22 -0700 Subject: [PATCH 097/148] Avoid swallowing initial left parenthesis from nested macro invocation. We weren't including this left parenthesis in the argument's token list so the nested function invocation wasn not being recognized. With this fix, tests 21 and 22 now pass. --- glcpp-parse.y | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index ec966580fc4..131102fab95 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -754,9 +754,12 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) if (node == NULL || node->token->type != '(') return FUNCTION_NOT_A_FUNCTION; + last = node; + node = node->next; + argument = NULL; - for (paren_count = 0; node; last = node, node = node->next) { + for (paren_count = 1; node; last = node, node = node->next) { if (node->token->type == '(') { paren_count++; @@ -770,7 +773,8 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) break; } } - else if (node->token->type == ',' && + + if (node->token->type == ',' && paren_count == 1) { argument = NULL; From 5aa7ea08093f727761d424ad090f44b116c8f0bd Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 18:39:43 -0700 Subject: [PATCH 098/148] Remove a bunch of old code and give the static treatment to what's left. We're no longer using the expansion stack, so its functions can go along with most of the body of glcpp_parser_lex that was using it. --- glcpp-parse.y | 262 ++++---------------------------------------------- glcpp.h | 7 -- 2 files changed, 21 insertions(+), 248 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 131102fab95..02286cd8e09 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -28,88 +28,77 @@ #include "glcpp.h" -void +static void yyerror (void *scanner, const char *error); -void +static void _define_object_macro (glcpp_parser_t *parser, const char *macro, token_list_t *replacements); -void +static void _define_function_macro (glcpp_parser_t *parser, const char *macro, string_list_t *parameters, token_list_t *replacements); -void -_expand_object_macro (glcpp_parser_t *parser, const char *identifier); - -void -_expand_function_macro (glcpp_parser_t *parser, - const char *identifier, - argument_list_t *arguments); - -string_list_t * +static string_list_t * _string_list_create (void *ctx); -void +static void _string_list_append_item (string_list_t *list, const char *str); -void +static void _string_list_append_list (string_list_t *list, string_list_t *tail); -void +static void _string_list_push (string_list_t *list, const char *str); -void +static void _string_list_pop (string_list_t *list); -int +static int _string_list_contains (string_list_t *list, const char *member, int *index); -int +static int _string_list_length (string_list_t *list); -argument_list_t * +static argument_list_t * _argument_list_create (void *ctx); -void +static void _argument_list_append (argument_list_t *list, token_list_t *argument); -int +static int _argument_list_length (argument_list_t *list); -token_list_t * +static token_list_t * _argument_list_member_at (argument_list_t *list, int index); /* Note: This function talloc_steal()s the str pointer. */ -token_t * +static token_t * _token_create_str (void *ctx, int type, char *str); -token_t * +static token_t * _token_create_ival (void *ctx, int type, int ival); -token_list_t * +static token_list_t * _token_list_create (void *ctx); /* Note: This function add a talloc_reference() to token. * * You may want to talloc_unlink any current reference if you no * longer need it. */ -void +static void _token_list_append (token_list_t *list, token_t *token); -void +static void _token_list_append_list (token_list_t *list, token_list_t *tail); -void +static void _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, token_list_t *list); -static void -glcpp_parser_pop_expansion (glcpp_parser_t *parser); - static void _glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, int condition); @@ -591,10 +580,6 @@ glcpp_parser_create (void) hash_table_string_compare); parser->active = _string_list_create (parser); parser->space_tokens = 1; - parser->expansions = NULL; - - parser->just_printed_separator = 1; - parser->need_newline = 0; parser->skip_stack = NULL; @@ -610,8 +595,6 @@ glcpp_parser_parse (glcpp_parser_t *parser) void glcpp_parser_destroy (glcpp_parser_t *parser) { - if (parser->need_newline) - printf ("\n"); if (parser->skip_stack) fprintf (stderr, "Error: Unterminated #if\n"); glcpp_lex_destroy (parser->scanner); @@ -619,47 +602,6 @@ glcpp_parser_destroy (glcpp_parser_t *parser) talloc_free (parser); } -static int -glcpp_parser_is_expanding (glcpp_parser_t *parser, const char *member) -{ - expansion_node_t *node; - - for (node = parser->expansions; node; node = node->next) { - if (node->macro && - strcmp (node->macro->identifier, member) == 0) - { - return 1; - } - } - - return 0; -} - -token_class_t -glcpp_parser_classify_token (glcpp_parser_t *parser, - const char *identifier, - int *parameter_index) -{ - macro_t *macro; - - /* Is this token a defined macro? */ - macro = hash_table_find (parser->defines, identifier); - - if (macro == NULL) - return TOKEN_CLASS_IDENTIFIER; - - /* Don't consider this a macro if we are already actively - * expanding this macro. */ - if (glcpp_parser_is_expanding (parser, identifier)) - return TOKEN_CLASS_IDENTIFIER_FINALIZED; - - /* Definitely a macro. Just need to check if it's function-like. */ - if (macro->is_function) - return TOKEN_CLASS_FUNC_MACRO; - else - return TOKEN_CLASS_OBJ_MACRO; -} - /* Print a non-macro token, or the expansion of an object-like macro. * * Returns 0 if this token is completely printed. @@ -933,172 +875,10 @@ _define_function_macro (glcpp_parser_t *parser, hash_table_insert (parser->defines, macro, identifier); } -static void -_glcpp_parser_push_expansion (glcpp_parser_t *parser, - macro_t *macro, - token_node_t *replacements) -{ - expansion_node_t *node; - - node = xtalloc (parser, expansion_node_t); - - node->macro = macro; - node->replacements = replacements; - - node->next = parser->expansions; - parser->expansions = node; -} - -static void -glcpp_parser_pop_expansion (glcpp_parser_t *parser) -{ - expansion_node_t *node; - - node = parser->expansions; - - if (node == NULL) { - fprintf (stderr, "Internal error: _expansion_list_pop called on an empty list.\n"); - exit (1); - } - - parser->expansions = node->next; - - talloc_free (node); -} - -void -_expand_object_macro (glcpp_parser_t *parser, const char *identifier) -{ - macro_t *macro; - - macro = hash_table_find (parser->defines, identifier); - assert (! macro->is_function); - assert (! glcpp_parser_is_expanding (parser, identifier)); - - _glcpp_parser_push_expansion (parser, macro, macro->replacements->head); -} - -void -_expand_function_macro (glcpp_parser_t *parser, - const char *identifier, - argument_list_t *arguments) -{ - macro_t *macro; - token_list_t *expanded; - token_node_t *i, *j; - int parameter_index; - - macro = hash_table_find (parser->defines, identifier); - assert (macro->is_function); - assert (! glcpp_parser_is_expanding (parser, identifier)); - - if (_argument_list_length (arguments) != - _string_list_length (macro->parameters)) - { - fprintf (stderr, - "Error: macro %s invoked with %d arguments (expected %d)\n", - identifier, - _argument_list_length (arguments), - _string_list_length (macro->parameters)); - return; - } - - expanded = _token_list_create (macro); - - for (i = macro->replacements->head; i; i = i->next) { - if (_string_list_contains (macro->parameters, - i->token->value.str, - ¶meter_index)) - { - token_list_t *argument; - argument = _argument_list_member_at (arguments, - parameter_index); - for (j = argument->head; j; j = j->next) - { - _token_list_append (expanded, j->token); - } - } else { - _token_list_append (expanded, i->token); - } - } - - _glcpp_parser_push_expansion (parser, macro, expanded->head); -} - static int glcpp_parser_lex (glcpp_parser_t *parser) { - expansion_node_t *expansion; - token_node_t *replacements; - int parameter_index; - const char *token; - token_class_t class; - - /* Who says C can't do efficient tail recursion? */ - RECURSE: - - expansion = parser->expansions; - - if (expansion == NULL) - return glcpp_lex (parser->scanner); - - replacements = expansion->replacements; - - /* Pop expansion when replacements is exhausted. */ - if (replacements == NULL) { - glcpp_parser_pop_expansion (parser); - goto RECURSE; - } - - expansion->replacements = replacements->next; - - token = replacements->token->value.str; - - /* Implement token pasting. */ - if (replacements->next && strcmp (replacements->next->token->value.str, "##") == 0) { - token_node_t *next_node; - - next_node = replacements->next->next; - - if (next_node == NULL) { - fprintf (stderr, "Error: '##' cannot appear at the end of a macro expansion.\n"); - exit (1); - } - - token = xtalloc_asprintf (parser, "%s%s", - token, next_node->token->value.str); - expansion->replacements = next_node->next; - } - - - if (strcmp (token, "(") == 0) - return '('; - else if (strcmp (token, ")") == 0) - return ')'; - - yylval.str = xtalloc_strdup (parser, token); - - /* Carefully refuse to expand any finalized identifier. */ - if (replacements->token->type == IDENTIFIER_FINALIZED) - return IDENTIFIER_FINALIZED; - - switch (glcpp_parser_classify_token (parser, yylval.str, - ¶meter_index)) - { - case TOKEN_CLASS_IDENTIFIER: - return IDENTIFIER; - break; - case TOKEN_CLASS_IDENTIFIER_FINALIZED: - return IDENTIFIER_FINALIZED; - break; - case TOKEN_CLASS_FUNC_MACRO: - return FUNC_MACRO; - break; - default: - case TOKEN_CLASS_OBJ_MACRO: - return OBJ_MACRO; - break; - } + return glcpp_lex (parser->scanner); } static void diff --git a/glcpp.h b/glcpp.h index f3760fa7a41..6bd6e66a7cc 100644 --- a/glcpp.h +++ b/glcpp.h @@ -127,16 +127,9 @@ struct glcpp_parser { struct hash_table *defines; string_list_t *active; int space_tokens; - expansion_node_t *expansions; - int just_printed_separator; - int need_newline; skip_node_t *skip_stack; }; -void -glcpp_parser_push_expansion_argument (glcpp_parser_t *parser, - int argument_index); - glcpp_parser_t * glcpp_parser_create (void); From 10ae438399f14367dd9e03032594c1e16c428999 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 20:35:01 -0700 Subject: [PATCH 099/148] Avoid getting extra trailing whitespace from macros. This trailing whitespace was coming from macro definitions and from macro arguments. We fix this with a little extra state in the token_list. It now remembers the last non-space token added, so that these can be trimmed off just before printing the list. With this fix test 23 now passes. Tests 24 and 25 are also passing, but they probbably would ahve before this fix---just that they weren't being run earlier. --- glcpp-parse.y | 30 ++++++++++++++++++++++++++++-- glcpp.h | 1 + 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 02286cd8e09..60eaf215b8b 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -471,7 +471,7 @@ _token_create_ival (void *ctx, int type, int ival) } void -_token_print (token_t *token) +_glcpp_parser_print_token (glcpp_parser_t *parser, token_t *token) { if (token->type < 256) { printf ("%c", token->type); @@ -527,6 +527,7 @@ _token_list_create (void *ctx) list = xtalloc (ctx, token_list_t); list->head = NULL; list->tail = NULL; + list->non_space_tail = NULL; return list; } @@ -548,6 +549,8 @@ _token_list_append (token_list_t *list, token_t *token) } list->tail = node; + if (token->type != SPACE) + list->non_space_tail = node; } void @@ -560,6 +563,25 @@ _token_list_append_list (token_list_t *list, token_list_t *tail) } list->tail = tail->tail; + list->non_space_tail = tail->non_space_tail; +} + +void +_token_list_trim_trailing_space (token_list_t *list) +{ + token_node_t *tail, *next; + + if (list->non_space_tail) { + tail = list->non_space_tail->next; + list->non_space_tail->next = NULL; + list->tail = list->non_space_tail; + + while (tail) { + next = tail->next; + talloc_free (tail); + tail = next; + } + } } void @@ -618,7 +640,7 @@ _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, /* We only expand identifiers */ if (token->type != IDENTIFIER) { - _token_print (token); + _glcpp_parser_print_token (parser, token); return 0; } @@ -719,6 +741,8 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) if (node->token->type == ',' && paren_count == 1) { + if (argument) + _token_list_trim_trailing_space (argument); argument = NULL; } else { @@ -834,6 +858,8 @@ _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, if (list == NULL) return; + _token_list_trim_trailing_space (list); + for (node = list->head; node; node = node->next) { if (_glcpp_parser_print_expanded_token (parser, node->token)) _glcpp_parser_print_expanded_function (parser, &node); diff --git a/glcpp.h b/glcpp.h index 6bd6e66a7cc..21db918cdce 100644 --- a/glcpp.h +++ b/glcpp.h @@ -72,6 +72,7 @@ typedef struct token_node { struct token_list { token_node_t *head; token_node_t *tail; + token_node_t *non_space_tail; }; typedef struct argument_node { From 039739b2da0ce8496f6e8d38127c0b3793607afa Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 07:58:59 -0700 Subject: [PATCH 100/148] Defer test 26 until much later (to test 55). Supporting embedded newlines in a macro invocation is going to be tricky with our current approach to lexing and parsing. Since this isn't really an important feature for us, we can defer this until more important things are resolved. With this test out of the way, tests 27 through 31 are passing. --- ...ine-func-extra-newlines.c => 055-define-func-extra-newlines.c} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{026-define-func-extra-newlines.c => 055-define-func-extra-newlines.c} (100%) diff --git a/tests/026-define-func-extra-newlines.c b/tests/055-define-func-extra-newlines.c similarity index 100% rename from tests/026-define-func-extra-newlines.c rename to tests/055-define-func-extra-newlines.c From c0607d573e04846a23c3162901aabd7fc40ebc61 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 08:01:42 -0700 Subject: [PATCH 101/148] Check active expansions before expanding a function-like macro invocation. With this fix, test 32 no longer recurses infinitely, but now passes. --- glcpp-parse.y | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 60eaf215b8b..a2bff6e0ada 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -654,11 +654,6 @@ _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, return 0; } - /* For function-like macros return 1 for further processing. */ - if (macro->is_function) { - return 1; - } - /* Finally, don't expand this macro if we're already actively * expanding it, (to avoid infinite recursion). */ if (_string_list_contains (parser->active, identifier, NULL)) { @@ -666,6 +661,11 @@ _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, return 0; } + /* For function-like macros return 1 for further processing. */ + if (macro->is_function) { + return 1; + } + _string_list_push (parser->active, identifier); _glcpp_parser_print_expanded_token_list (parser, macro->replacements); From 0197e9b64f0e64a617537c5ad1465b4a8706fe1c Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 08:05:19 -0700 Subject: [PATCH 102/148] Change macro expansion to append onto token lists rather than printing directly. This doesn't change any functionality here, but will allow us to make future changes that were not possible with direct printing. Specifically, we need to expand macros within macro arguments before performing argument substitution. And *that* expansion cannot result in immediate printing. --- glcpp-parse.y | 193 +++++++++++++++++++++++++++++++------------------- 1 file changed, 120 insertions(+), 73 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index a2bff6e0ada..e25cfa92142 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -99,6 +99,11 @@ static void _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, token_list_t *list); +static void +_glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, + token_list_t *list, + token_list_t *result); + static void _glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, int condition); @@ -470,55 +475,6 @@ _token_create_ival (void *ctx, int type, int ival) return token; } -void -_glcpp_parser_print_token (glcpp_parser_t *parser, token_t *token) -{ - if (token->type < 256) { - printf ("%c", token->type); - return; - } - - switch (token->type) { - case IDENTIFIER: - case OTHER: - printf ("%s", token->value.str); - break; - case SPACE: - printf (" "); - break; - case LEFT_SHIFT: - printf ("<<"); - break; - case RIGHT_SHIFT: - printf (">>"); - break; - case LESS_OR_EQUAL: - printf ("<="); - break; - case GREATER_OR_EQUAL: - printf (">="); - break; - case EQUAL: - printf ("=="); - break; - case NOT_EQUAL: - printf ("!="); - break; - case AND: - printf ("&&"); - break; - case OR: - printf ("||"); - break; - case PASTE: - printf ("##"); - break; - default: - fprintf (stderr, "Error: Don't know how to print token type %d\n", token->type); - break; - } -} - token_list_t * _token_list_create (void *ctx) { @@ -584,6 +540,67 @@ _token_list_trim_trailing_space (token_list_t *list) } } +static void +_token_print (token_t *token) +{ + if (token->type < 256) { + printf ("%c", token->type); + return; + } + + switch (token->type) { + case IDENTIFIER: + case OTHER: + printf ("%s", token->value.str); + break; + case SPACE: + printf (" "); + break; + case LEFT_SHIFT: + printf ("<<"); + break; + case RIGHT_SHIFT: + printf (">>"); + break; + case LESS_OR_EQUAL: + printf ("<="); + break; + case GREATER_OR_EQUAL: + printf (">="); + break; + case EQUAL: + printf ("=="); + break; + case NOT_EQUAL: + printf ("!="); + break; + case AND: + printf ("&&"); + break; + case OR: + printf ("||"); + break; + case PASTE: + printf ("##"); + break; + default: + fprintf (stderr, "Error: Don't know how to print token type %d\n", token->type); + break; + } +} + +static void +_token_list_print (token_list_t *list) +{ + token_node_t *node; + + if (list == NULL) + return; + + for (node = list->head; node; node = node->next) + _token_print (node->token); +} + void yyerror (void *scanner, const char *error) { @@ -624,23 +641,26 @@ glcpp_parser_destroy (glcpp_parser_t *parser) talloc_free (parser); } -/* Print a non-macro token, or the expansion of an object-like macro. +/* Appends onto 'expansion' a non-macro token or the expansion of an + * object-like macro. * - * Returns 0 if this token is completely printed. + * Returns 0 if this token is completely processed. * * Returns 1 in the case that 'token' is a function-like macro that * needs further expansion. */ static int -_glcpp_parser_print_expanded_token (glcpp_parser_t *parser, - token_t *token) +_glcpp_parser_expand_token_onto (glcpp_parser_t *parser, + token_t *token, + token_list_t *result) { const char *identifier; macro_t *macro; + token_list_t *expansion; /* We only expand identifiers */ if (token->type != IDENTIFIER) { - _glcpp_parser_print_token (parser, token); + _token_list_append (result, token); return 0; } @@ -648,16 +668,16 @@ _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, identifier = token->value.str; macro = hash_table_find (parser->defines, identifier); - /* Not a macro, so just print directly. */ + /* Not a macro, so just append. */ if (macro == NULL) { - printf ("%s", identifier); + _token_list_append (result, token); return 0; } /* Finally, don't expand this macro if we're already actively * expanding it, (to avoid infinite recursion). */ if (_string_list_contains (parser->active, identifier, NULL)) { - printf ("%s", identifier); + _token_list_append (result, token); return 0; } @@ -667,8 +687,9 @@ _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, } _string_list_push (parser->active, identifier); - _glcpp_parser_print_expanded_token_list (parser, - macro->replacements); + _glcpp_parser_expand_token_list_onto (parser, + macro->replacements, + result); _string_list_pop (parser->active); return 0; @@ -770,15 +791,16 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) * list as necessary). Upon return *node will be the last consumed * node, such that further processing can continue with node->next. */ static void -_glcpp_parser_print_expanded_function (glcpp_parser_t *parser, - token_node_t **node_ret) +_glcpp_parser_expand_function_onto (glcpp_parser_t *parser, + token_node_t **node_ret, + token_list_t *result) { macro_t *macro; token_node_t *node; const char *identifier; argument_list_t *arguments; function_status_t status; - token_list_t *expanded; + token_list_t *substituted; token_node_t *i, *j; int parameter_index; @@ -796,7 +818,7 @@ _glcpp_parser_print_expanded_function (glcpp_parser_t *parser, case FUNCTION_STATUS_SUCCESS: break; case FUNCTION_NOT_A_FUNCTION: - printf ("%s", identifier); + _token_list_append (result, node->token); return; case FUNCTION_UNBALANCED_PARENTHESES: fprintf (stderr, "Error: Macro %s call has unbalanced parentheses\n", @@ -809,7 +831,6 @@ _glcpp_parser_print_expanded_function (glcpp_parser_t *parser, return; } - if (_argument_list_length (arguments) != _string_list_length (macro->parameters)) { @@ -821,7 +842,8 @@ _glcpp_parser_print_expanded_function (glcpp_parser_t *parser, return; } - expanded = _token_list_create (arguments); + /* Perform argument substitution on the replacement list. */ + substituted = _token_list_create (arguments); for (i = macro->replacements->head; i; i = i->next) { if (i->token->type == IDENTIFIER && @@ -834,36 +856,61 @@ _glcpp_parser_print_expanded_function (glcpp_parser_t *parser, parameter_index); for (j = argument->head; j; j = j->next) { - _token_list_append (expanded, j->token); + _token_list_append (substituted, j->token); } } else { - _token_list_append (expanded, i->token); + _token_list_append (substituted, i->token); } } _string_list_push (parser->active, identifier); - _glcpp_parser_print_expanded_token_list (parser, expanded); + _glcpp_parser_expand_token_list_onto (parser, substituted, result); _string_list_pop (parser->active); talloc_free (arguments); } +static void +_glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, + token_list_t *list, + token_list_t *result) +{ + token_node_t *node; + + if (list == NULL) + return; + + for (node = list->head; node; node = node->next) + { + if (_glcpp_parser_expand_token_onto (parser, node->token, + result)) + { + _glcpp_parser_expand_function_onto (parser, &node, + result); + } + } +} + void _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, token_list_t *list) { + token_list_t *expanded; token_node_t *node; function_status_t function_status; if (list == NULL) return; - _token_list_trim_trailing_space (list); + expanded = _token_list_create (parser); - for (node = list->head; node; node = node->next) { - if (_glcpp_parser_print_expanded_token (parser, node->token)) - _glcpp_parser_print_expanded_function (parser, &node); - } + _glcpp_parser_expand_token_list_onto (parser, list, expanded); + + _token_list_trim_trailing_space (expanded); + + _token_list_print (expanded); + + talloc_free (expanded); } void From d5cd40343f4a83d3270cb87ef38e85dcb9682e8c Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 08:09:29 -0700 Subject: [PATCH 103/148] Expand macro arguments before performing argument substitution. As required by the C99 specification of the preprocessor. With this fix, tests 33 through 36 now pass. --- glcpp-parse.y | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index e25cfa92142..3b736f8e64d 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -854,10 +854,11 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, token_list_t *argument; argument = _argument_list_member_at (arguments, parameter_index); - for (j = argument->head; j; j = j->next) - { - _token_list_append (substituted, j->token); - } + /* Before substituting, we expand the argument + * tokens. */ + _glcpp_parser_expand_token_list_onto (parser, + argument, + substituted); } else { _token_list_append (substituted, i->token); } From b1ae61a2ee1bf2ba733dca417b0268b1106d83cf Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 08:10:38 -0700 Subject: [PATCH 104/148] Fix a typo in a comment. Always better to use proper grammar in our grammar. --- glcpp-parse.y | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 3b736f8e64d..5b792a976e6 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -85,7 +85,7 @@ _token_create_ival (void *ctx, int type, int ival); static token_list_t * _token_list_create (void *ctx); -/* Note: This function add a talloc_reference() to token. +/* Note: This function adds a talloc_reference() to token. * * You may want to talloc_unlink any current reference if you no * longer need it. */ From c9dcc08d4512370b6fef6370afb8bcdb0ecd9292 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 08:11:08 -0700 Subject: [PATCH 105/148] README: Document some known limitations. None of these are fundamental---just a few things that haven't been implemented yet. --- README | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/README b/README index ba833a49ffb..f0f64c2644a 100644 --- a/README +++ b/README @@ -12,3 +12,15 @@ preprocessors". To fill in these details, I've been using the C99 standard (for which I had a convenient copy) as available from: http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1124.pdf + +Known limitations +----------------- +Macro invocations cannot include embedded newlines. + +The __LINE__, __FILE__, and __VERSION__ macros are not yet supported. + +The argument of the 'defined' operator cannot yet include enclosing +parentheses. + +The #error, #pragma, #extension, #version, and #line macros are not +yet supported. From ec4ada01c01338ae1deab634cf62f24344bdbd3a Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 08:15:49 -0700 Subject: [PATCH 106/148] Prevent unexpanded macros from being expanded again in the future. With this fix, tests 37 - 39 now pass. --- glcpp-parse.y | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 5b792a976e6..ec104330631 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -676,8 +676,17 @@ _glcpp_parser_expand_token_onto (glcpp_parser_t *parser, /* Finally, don't expand this macro if we're already actively * expanding it, (to avoid infinite recursion). */ - if (_string_list_contains (parser->active, identifier, NULL)) { - _token_list_append (result, token); + if (_string_list_contains (parser->active, identifier, NULL)) + { + /* We change the token type here from IDENTIFIER to + * OTHER to prevent any future expansion of this + * unexpanded token. */ + char *str; + token_t *new_token; + + str = xtalloc_strdup (result, token->value.str); + new_token = _token_create_str (result, OTHER, str); + _token_list_append (result, new_token); return 0; } From 63909fc19654ddb3ef339bcceed9cbf6e6a057bc Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 08:16:56 -0700 Subject: [PATCH 107/148] Remove some stale token types. All the code referencing these was removed some time ago. --- glcpp-parse.y | 3 --- 1 file changed, 3 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index ec104330631..04e78b1826f 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -132,9 +132,6 @@ glcpp_parser_lex (glcpp_parser_t *parser); %type preprocessing_token %type pp_tokens replacement_list text_line - /* Stale stuff just to allow code to compile. */ -%token IDENTIFIER_FINALIZED FUNC_MACRO OBJ_MACRO - %% input: From ce540f2571a449a3620bd3672bfb93b39cef71e1 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 08:25:44 -0700 Subject: [PATCH 108/148] Rename identifier from 'i' to 'node'. Now that we no longer have nested for loops with 'i' and 'j' we can use the 'node' that we already have. --- glcpp-parse.y | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 04e78b1826f..5f59b5b006f 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -807,7 +807,6 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, argument_list_t *arguments; function_status_t status; token_list_t *substituted; - token_node_t *i, *j; int parameter_index; node = *node_ret; @@ -851,10 +850,11 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, /* Perform argument substitution on the replacement list. */ substituted = _token_list_create (arguments); - for (i = macro->replacements->head; i; i = i->next) { - if (i->token->type == IDENTIFIER && + for (node = macro->replacements->head; node; node = node->next) + { + if (node->token->type == IDENTIFIER && _string_list_contains (macro->parameters, - i->token->value.str, + node->token->value.str, ¶meter_index)) { token_list_t *argument; @@ -866,7 +866,7 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, argument, substituted); } else { - _token_list_append (substituted, i->token); + _token_list_append (substituted, node->token); } } From ad0dee6bb0f197b9addb45f38e8843d6a504723c Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 09:04:50 -0700 Subject: [PATCH 109/148] Implement token pasting. Which makes test 40 now pass. --- glcpp-parse.y | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) diff --git a/glcpp-parse.y b/glcpp-parse.y index 5f59b5b006f..330d3ab3bc4 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -586,6 +586,86 @@ _token_print (token_t *token) } } +/* Change 'token' into a new token formed by pasting 'other'. */ +static void +_token_paste (token_t *token, token_t *other) +{ + /* A very few single-character punctuators can be combined + * with another to form a multi-character punctuator. */ + switch (token->type) { + case '<': + if (other->type == '<') { + token->type = LEFT_SHIFT; + token->value.ival = LEFT_SHIFT; + return; + } else if (other->type == '=') { + token->type = LESS_OR_EQUAL; + token->value.ival = LESS_OR_EQUAL; + return; + } + break; + case '>': + if (other->type == '>') { + token->type = RIGHT_SHIFT; + token->value.ival = RIGHT_SHIFT; + return; + } else if (other->type == '=') { + token->type = GREATER_OR_EQUAL; + token->value.ival = GREATER_OR_EQUAL; + return; + } + break; + case '=': + if (other->type == '=') { + token->type = EQUAL; + token->value.ival = EQUAL; + return; + } + break; + case '!': + if (other->type == '=') { + token->type = NOT_EQUAL; + token->value.ival = NOT_EQUAL; + return; + } + break; + case '&': + if (other->type == '&') { + token->type = AND; + token->value.ival = AND; + return; + } + break; + case '|': + if (other->type == '|') { + token->type = OR; + token->value.ival = OR; + return; + } + break; + } + + /* Two string-valued tokens can usually just be mashed + * together. + * + * XXX: Since our 'OTHER' case is currently so loose, this may + * allow some things thruogh that should be treated as + * errors. */ + if ((token->type == IDENTIFIER || token->type == OTHER) && + (other->type == IDENTIFIER || other->type == OTHER)) + { + token->value.str = talloc_strdup_append (token->value.str, + other->value.str); + return; + } + + printf ("Error: Pasting \""); + _token_print (token); + printf ("\" and \""); + _token_print (other); + printf ("\" does not give a valid preprocessing token.\n"); +} + static void _token_list_print (token_list_t *list) { @@ -870,6 +950,43 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, } } + /* After argument substitution, and before further expansion + * below, implement token pasting. */ + + node = substituted->head; + while (node) + { + token_node_t *next_non_space; + + /* Look ahead for a PASTE token, skipping space. */ + next_non_space = node->next; + while (next_non_space && next_non_space->token->type == SPACE) + next_non_space = next_non_space->next; + + if (next_non_space == NULL) + break; + + if (next_non_space->token->type != PASTE) { + node = next_non_space; + continue; + } + + /* Now find the next non-space token after the PASTE. */ + next_non_space = next_non_space->next; + while (next_non_space && next_non_space->token->type == SPACE) + next_non_space = next_non_space->next; + + if (next_non_space == NULL) { + fprintf (stderr, "Error: '##' cannot appear at either end of a macro expansion\n"); + exit (1); + } + + _token_paste (node->token, next_non_space->token); + node->next = next_non_space->next; + + node = node->next; + } + _string_list_push (parser->active, identifier); _glcpp_parser_expand_token_list_onto (parser, substituted, result); _string_list_pop (parser->active); From 8fed1cddae8b024972d0c08f120bfd0292cb9cca Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 09:32:12 -0700 Subject: [PATCH 110/148] stash --- glcpp-lex.l | 40 ++++++++++++++ glcpp-parse.y | 141 ++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 176 insertions(+), 5 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 516f42dee32..97f01d06368 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -62,11 +62,47 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? return HASH_UNDEF; } +{HASH}if { + yyextra->space_tokens = 0; + return HASH_IF; +} + +{HASH}elif { + yyextra->space_tokens = 0; + return HASH_ELIF; +} + +{HASH}else { + yyextra->space_tokens = 0; + return HASH_ELSE; +} + +{HASH}endif { + yyextra->space_tokens = 0; + return HASH_ENDIF; +} + {HASH} { yyextra->space_tokens = 0; return HASH; } +{DECIMAL_INTEGER} { + yylval.ival = strtoll (yytext, NULL, 10); + return INTEGER; +} + +{OCTAL_INTEGER} { + yylval.ival = strtoll (yytext + 1, NULL, 8); + return INTEGER; +} + +{HEXADECIMAL_INTEGER} { + yylval.ival = strtoll (yytext + 2, NULL, 16); + return INTEGER; +} + + {IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); return IDENTIFIER; @@ -108,6 +144,10 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? return PASTE; } +"defined" { + return DEFINED; +} + {PUNCTUATION} { return yytext[0]; } diff --git a/glcpp-parse.y b/glcpp-parse.y index 330d3ab3bc4..58e1e655fdb 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -25,6 +25,7 @@ #include #include #include +#include #include "glcpp.h" @@ -124,27 +125,46 @@ glcpp_parser_lex (glcpp_parser_t *parser); %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_UNDEF IDENTIFIER NEWLINE OTHER SPACE -%token LEFT_SHIFT RIGHT_SHIFT LESS_OR_EQUAL GREATER_OR_EQUAL EQUAL NOT_EQUAL AND OR PASTE -%type punctuator SPACE +%token DEFINED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER INTEGER NEWLINE OTHER SPACE +%token PASTE +%type expression INTEGER punctuator SPACE %type IDENTIFIER OTHER %type identifier_list %type preprocessing_token %type pp_tokens replacement_list text_line +%left OR +%left AND +%left '|' +%left '^' +%left '&' +%left EQUAL NOT_EQUAL +%left '<' '>' LESS_OR_EQUAL GREATER_OR_EQUAL +%left LEFT_SHIFT RIGHT_SHIFT +%left '+' '-' +%left '*' '/' '%' +%right UNARY %% input: /* empty */ | input line { - printf ("\n"); + if (parser->skip_stack == NULL || + parser->skip_stack->type == SKIP_NO_SKIP) + { + printf ("\n"); + } } ; line: control_line | text_line { - _glcpp_parser_print_expanded_token_list (parser, $1); + if (parser->skip_stack == NULL || + parser->skip_stack->type == SKIP_NO_SKIP) + { + _glcpp_parser_print_expanded_token_list (parser, $1); + } talloc_free ($1); } | HASH non_directive @@ -171,9 +191,114 @@ control_line: } talloc_free ($2); } +| HASH_IF expression NEWLINE { + _glcpp_parser_skip_stack_push_if (parser, $2); + } +| HASH_IFDEF IDENTIFIER NEWLINE { + string_list_t *macro = hash_table_find (parser->defines, $2); + talloc_free ($2); + _glcpp_parser_skip_stack_push_if (parser, macro != NULL); + } +| HASH_IFNDEF IDENTIFIER NEWLINE { + string_list_t *macro = hash_table_find (parser->defines, $2); + talloc_free ($2); + _glcpp_parser_skip_stack_push_if (parser, macro == NULL); + } +| HASH_ELIF expression NEWLINE { + _glcpp_parser_skip_stack_change_if (parser, "#elif", $2); + } +| HASH_ELSE NEWLINE { + _glcpp_parser_skip_stack_change_if (parser, "else", 1); + } +| HASH_ENDIF NEWLINE { + _glcpp_parser_skip_stack_pop (parser); + } | HASH NEWLINE ; +expression: + INTEGER { + $$ = $1; + } +| expression OR expression { + $$ = $1 || $3; + } +| expression AND expression { + $$ = $1 && $3; + } +| expression '|' expression { + $$ = $1 | $3; + } +| expression '^' expression { + $$ = $1 ^ $3; + } +| expression '&' expression { + $$ = $1 & $3; + } +| expression NOT_EQUAL expression { + $$ = $1 != $3; + } +| expression EQUAL expression { + $$ = $1 == $3; + } +| expression GREATER_OR_EQUAL expression { + $$ = $1 >= $3; + } +| expression LESS_OR_EQUAL expression { + $$ = $1 <= $3; + } +| expression '>' expression { + $$ = $1 > $3; + } +| expression '<' expression { + $$ = $1 < $3; + } +| expression RIGHT_SHIFT expression { + $$ = $1 >> $3; + } +| expression LEFT_SHIFT expression { + $$ = $1 << $3; + } +| expression '-' expression { + $$ = $1 - $3; + } +| expression '+' expression { + $$ = $1 + $3; + } +| expression '%' expression { + $$ = $1 % $3; + } +| expression '/' expression { + $$ = $1 / $3; + } +| expression '*' expression { + $$ = $1 * $3; + } +| '!' expression %prec UNARY { + $$ = ! $2; + } +| '~' expression %prec UNARY { + $$ = ~ $2; + } +| '-' expression %prec UNARY { + $$ = - $2; + } +| '+' expression %prec UNARY { + $$ = + $2; + } +| DEFINED IDENTIFIER %prec UNARY { + string_list_t *macro = hash_table_find (parser->defines, $2); + talloc_free ($2); + if (macro) + $$ = 1; + else + $$ = 0; + } +| '(' expression ')' { + $$ = $2; + } +; + identifier_list: IDENTIFIER { $$ = _string_list_create (parser); @@ -219,6 +344,9 @@ preprocessing_token: IDENTIFIER { $$ = _token_create_str (parser, IDENTIFIER, $1); } +| INTEGER { + $$ = _token_create_ival (parser, INTEGER, $1); + } | punctuator { $$ = _token_create_ival (parser, $1, $1); } @@ -546,6 +674,9 @@ _token_print (token_t *token) } switch (token->type) { + case INTEGER: + printf ("%" PRIxMAX, token->value.ival); + break; case IDENTIFIER: case OTHER: printf ("%s", token->value.str); From f6914fd37b2b66d7be1ba0c31450d89d1785ccce Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 09:32:57 -0700 Subject: [PATCH 111/148] Implement #if and friends. With this change, tests 41 through 49 all pass. (The defined operator appears to be somehow broken so that test 50 doesn't pass yet.) --- glcpp.h | 2 +- tests/049-if-expression-precedence.c | 1 - tests/050-if-defined.c | 2 -- tests/glcpp-test | 2 +- 4 files changed, 2 insertions(+), 5 deletions(-) diff --git a/glcpp.h b/glcpp.h index 21db918cdce..36ab0e7ca5c 100644 --- a/glcpp.h +++ b/glcpp.h @@ -49,7 +49,7 @@ typedef struct token_list token_list_t; typedef union YYSTYPE { - int ival; + intmax_t ival; char *str; string_list_t *string_list; token_t *token; diff --git a/tests/049-if-expression-precedence.c b/tests/049-if-expression-precedence.c index cea935220fd..833ea03882a 100644 --- a/tests/049-if-expression-precedence.c +++ b/tests/049-if-expression-precedence.c @@ -3,4 +3,3 @@ failure with operator precedence #else success #endif - diff --git a/tests/050-if-defined.c b/tests/050-if-defined.c index 9838cc747d5..34f0f95140e 100644 --- a/tests/050-if-defined.c +++ b/tests/050-if-defined.c @@ -15,5 +15,3 @@ failure_3 #else success_3 #endif - - diff --git a/tests/glcpp-test b/tests/glcpp-test index 63041552104..bf88d4462e1 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -6,5 +6,5 @@ for test in *.c; do ../glcpp < $test > $test.out gcc -E $test -o $test.gcc grep -v '^#' < $test.gcc > $test.expected - diff -u $test.expected $test.out + diff -B -u $test.expected $test.out done From 16c1e980e2e3c8852ce9bea85afe094c24e420fa Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 09:35:34 -0700 Subject: [PATCH 112/148] Fix lexing of "defined" as an operator, not an identifier. Simply need to move the rule for IDENTIFIER to be after "defined" and everything is happy. With this change, tests 50 through 53 all pass now. --- glcpp-lex.l | 11 +++++------ tests/053-if-divide-and-shift.c | 1 - 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 97f01d06368..d6b7726d36d 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -102,12 +102,6 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? return INTEGER; } - -{IDENTIFIER} { - yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; -} - "<<" { return LEFT_SHIFT; } @@ -148,6 +142,11 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? return DEFINED; } +{IDENTIFIER} { + yylval.str = xtalloc_strdup (yyextra, yytext); + return IDENTIFIER; +} + {PUNCTUATION} { return yytext[0]; } diff --git a/tests/053-if-divide-and-shift.c b/tests/053-if-divide-and-shift.c index ddc1573ab26..d24c54a88d1 100644 --- a/tests/053-if-divide-and-shift.c +++ b/tests/053-if-divide-and-shift.c @@ -13,4 +13,3 @@ failure_3 #else success_3 #endif - From 8e82fcb070d5fae0ec2c763cee4cea225b459664 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 11:15:21 -0700 Subject: [PATCH 113/148] Implement (and test) support for macro expansion within conditional expressions. To do this we have split the existing "HASH_IF expression" into two productions: First is HASH_IF pp_tokens which simply constructs a list of tokens. Then, with that resulting token list, we first evaluate all DEFINED operator tokens, then expand all macros, and finally start lexing from the resulting token list. This brings us to the second production, IF_EXPANDED expression This final production works just like our previous "HASH_IF expression", evaluating a constant integer expression. The new test (54) added for this case now passes. --- glcpp-parse.y | 155 +++++++++++++++++++++++++++++++------ glcpp.h | 2 + tests/054-if-with-macros.c | 34 ++++++++ 3 files changed, 169 insertions(+), 22 deletions(-) create mode 100644 tests/054-if-with-macros.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 58e1e655fdb..cce8a70156f 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -96,6 +96,10 @@ _token_list_append (token_list_t *list, token_t *token); static void _token_list_append_list (token_list_t *list, token_list_t *tail); +static void +_glcpp_parser_evaluate_defined (glcpp_parser_t *parser, + token_list_t *list); + static void _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, token_list_t *list); @@ -120,14 +124,17 @@ _glcpp_parser_skip_stack_pop (glcpp_parser_t *parser); static int glcpp_parser_lex (glcpp_parser_t *parser); +static void +glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list); + %} %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token DEFINED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER INTEGER NEWLINE OTHER SPACE +%token DEFINED ELIF_EXPANDED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER IF_EXPANDED INTEGER NEWLINE OTHER SPACE %token PASTE -%type expression INTEGER punctuator SPACE +%type expression INTEGER operator SPACE %type IDENTIFIER OTHER %type identifier_list %type preprocessing_token @@ -148,28 +155,39 @@ glcpp_parser_lex (glcpp_parser_t *parser); input: /* empty */ -| input line { +| input line +; + +line: + control_line { if (parser->skip_stack == NULL || parser->skip_stack->type == SKIP_NO_SKIP) { printf ("\n"); } } -; - -line: - control_line | text_line { if (parser->skip_stack == NULL || parser->skip_stack->type == SKIP_NO_SKIP) { _glcpp_parser_print_expanded_token_list (parser, $1); + printf ("\n"); } talloc_free ($1); } +| expanded_line | HASH non_directive ; +expanded_line: + IF_EXPANDED expression NEWLINE { + _glcpp_parser_skip_stack_push_if (parser, $2); + } +| ELIF_EXPANDED expression NEWLINE { + _glcpp_parser_skip_stack_change_if (parser, "elif", $2); + } +; + control_line: HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE { _define_object_macro (parser, $2, $3); @@ -191,8 +209,17 @@ control_line: } talloc_free ($2); } -| HASH_IF expression NEWLINE { - _glcpp_parser_skip_stack_push_if (parser, $2); +| HASH_IF pp_tokens NEWLINE { + token_list_t *expanded; + token_t *token; + + expanded = _token_list_create (parser); + token = _token_create_ival (parser, IF_EXPANDED, IF_EXPANDED); + _token_list_append (expanded, token); + talloc_unlink (parser, token); + _glcpp_parser_evaluate_defined (parser, $2); + _glcpp_parser_expand_token_list_onto (parser, $2, expanded); + glcpp_parser_lex_from (parser, expanded); } | HASH_IFDEF IDENTIFIER NEWLINE { string_list_t *macro = hash_table_find (parser->defines, $2); @@ -204,8 +231,17 @@ control_line: talloc_free ($2); _glcpp_parser_skip_stack_push_if (parser, macro == NULL); } -| HASH_ELIF expression NEWLINE { - _glcpp_parser_skip_stack_change_if (parser, "#elif", $2); +| HASH_ELIF pp_tokens NEWLINE { + token_list_t *expanded; + token_t *token; + + expanded = _token_list_create (parser); + token = _token_create_ival (parser, ELIF_EXPANDED, ELIF_EXPANDED); + _token_list_append (expanded, token); + talloc_unlink (parser, token); + _glcpp_parser_evaluate_defined (parser, $2); + _glcpp_parser_expand_token_list_onto (parser, $2, expanded); + glcpp_parser_lex_from (parser, expanded); } | HASH_ELSE NEWLINE { _glcpp_parser_skip_stack_change_if (parser, "else", 1); @@ -286,14 +322,6 @@ expression: | '+' expression %prec UNARY { $$ = + $2; } -| DEFINED IDENTIFIER %prec UNARY { - string_list_t *macro = hash_table_find (parser->defines, $2); - talloc_free ($2); - if (macro) - $$ = 1; - else - $$ = 0; - } | '(' expression ')' { $$ = $2; } @@ -347,7 +375,7 @@ preprocessing_token: | INTEGER { $$ = _token_create_ival (parser, INTEGER, $1); } -| punctuator { +| operator { $$ = _token_create_ival (parser, $1, $1); } | OTHER { @@ -358,7 +386,7 @@ preprocessing_token: } ; -punctuator: +operator: '[' { $$ = '['; } | ']' { $$ = ']'; } | '(' { $$ = '('; } @@ -389,6 +417,7 @@ punctuator: | ';' { $$ = ';'; } | ',' { $$ = ','; } | PASTE { $$ = PASTE; } +| DEFINED { $$ = DEFINED; } ; %% @@ -830,6 +859,9 @@ glcpp_parser_create (void) parser->skip_stack = NULL; + parser->lex_from_list = NULL; + parser->lex_from_node = NULL; + return parser; } @@ -849,6 +881,39 @@ glcpp_parser_destroy (glcpp_parser_t *parser) talloc_free (parser); } +/* Replace any occurences of DEFINED tokens in 'list' with either a + * '0' or '1' INTEGER token depending on whether the next token in the + * list is defined or not. */ +static void +_glcpp_parser_evaluate_defined (glcpp_parser_t *parser, + token_list_t *list) +{ + token_node_t *node, *next; + string_list_t *macro; + + if (list == NULL) + return; + + for (node = list->head; node; node = node->next) { + if (node->token->type != DEFINED) + continue; + next = node->next; + while (next && next->token->type == SPACE) + next = next->next; + if (next == NULL || next->token->type != IDENTIFIER) { + fprintf (stderr, "Error: operator \"defined\" requires an identifier\n"); + exit (1); + } + macro = hash_table_find (parser->defines, + next->token->value.str); + + node->token->type = INTEGER; + node->token->value.ival = (macro != NULL); + node->next = next->next; + } +} + + /* Appends onto 'expansion' a non-macro token or the expansion of an * object-like macro. * @@ -1206,7 +1271,53 @@ _define_function_macro (glcpp_parser_t *parser, static int glcpp_parser_lex (glcpp_parser_t *parser) { - return glcpp_lex (parser->scanner); + token_node_t *node; + int ret; + + if (parser->lex_from_list == NULL) + return glcpp_lex (parser->scanner); + + node = parser->lex_from_node; + + if (node == NULL) { + talloc_free (parser->lex_from_list); + parser->lex_from_list = NULL; + return NEWLINE; + } + + yylval = node->token->value; + ret = node->token->type; + + parser->lex_from_node = node->next; + + return ret; +} + +static void +glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list) +{ + token_node_t *node; + + assert (parser->lex_from_list == NULL); + + /* Copy list, eliminating any space tokens. */ + parser->lex_from_list = _token_list_create (parser); + + for (node = list->head; node; node = node->next) { + if (node->token->type == SPACE) + continue; + _token_list_append (parser->lex_from_list, node->token); + } + + talloc_free (list); + + parser->lex_from_node = parser->lex_from_list->head; + + /* It's possible the list consisted of nothing but whitespace. */ + if (parser->lex_from_node == NULL) { + talloc_free (parser->lex_from_list); + parser->lex_from_list = NULL; + } } static void diff --git a/glcpp.h b/glcpp.h index 36ab0e7ca5c..e5be1a6cd62 100644 --- a/glcpp.h +++ b/glcpp.h @@ -129,6 +129,8 @@ struct glcpp_parser { string_list_t *active; int space_tokens; skip_node_t *skip_stack; + token_list_t *lex_from_list; + token_node_t *lex_from_node; }; glcpp_parser_t * diff --git a/tests/054-if-with-macros.c b/tests/054-if-with-macros.c new file mode 100644 index 00000000000..3da79a0d96e --- /dev/null +++ b/tests/054-if-with-macros.c @@ -0,0 +1,34 @@ +#define one 1 +#define two 2 +#define three 3 +#define five 5 +#if five < two +failure_1 +#else +success_1 +#endif +#if three >= two +success_2 +#else +failure_2 +#endif +#if two + three <= five +success_3 +#else +failure_3 +#endif +#if five - two == three +success_4 +#else +failure_4 +#endif +#if one > three +failure_5 +#else +success_5 +#endif +#if one != five +success_6 +#else +failure_6 +#endif From 0324cad796b7a68634a729719f08fcbb5bbd04cc Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 15:53:05 -0700 Subject: [PATCH 114/148] All macro lookups should be of type macro_t, not string_list_t. This is what I get for using a non-type-safe hash-table implementation. --- glcpp-parse.y | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index cce8a70156f..a809ebf3af5 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -199,7 +199,7 @@ control_line: _define_function_macro (parser, $2, $4, $6); } | HASH_UNDEF IDENTIFIER NEWLINE { - string_list_t *macro = hash_table_find (parser->defines, $2); + macro_t *macro = hash_table_find (parser->defines, $2); if (macro) { /* XXX: Need hash table to support a real way * to remove an element rather than prefixing @@ -222,12 +222,12 @@ control_line: glcpp_parser_lex_from (parser, expanded); } | HASH_IFDEF IDENTIFIER NEWLINE { - string_list_t *macro = hash_table_find (parser->defines, $2); + macro_t *macro = hash_table_find (parser->defines, $2); talloc_free ($2); _glcpp_parser_skip_stack_push_if (parser, macro != NULL); } | HASH_IFNDEF IDENTIFIER NEWLINE { - string_list_t *macro = hash_table_find (parser->defines, $2); + macro_t *macro = hash_table_find (parser->defines, $2); talloc_free ($2); _glcpp_parser_skip_stack_push_if (parser, macro == NULL); } @@ -889,7 +889,7 @@ _glcpp_parser_evaluate_defined (glcpp_parser_t *parser, token_list_t *list) { token_node_t *node, *next; - string_list_t *macro; + macro_t *macro; if (list == NULL) return; From 95951ea7bb8728cf54ae4136cb59d0af9e8a06bd Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 15:57:10 -0700 Subject: [PATCH 115/148] Treat newlines as space when invoking a function-like macro invocation. This adds three new pieces of state to the parser, (is_control_line, newline_as_space, and paren_count), and a large amount of messy code. I'd definitely like to see a cleaner solution for this. With this fix, the "define-func-extra-newlines" now passes so we put it back to test #26 where it was originally (lately it has been known as test #55). Also, we tweak test 25 slightly. Previously this test was ending a file function-like macro name that was not actually a macro (not followed by a left parenthesis). As is, this fix was making that test fail because the text_line production expects to see a terminating NEWLINE, but that NEWLINE is now getting turned into a SPACE here. This seems unlikely to be a problem in the wild, (function macros being used in a non-macro sense seems rare enough---but more than likely they won't happen at the end of a file). Still, we document this shortcoming in the README. --- README | 4 ++ glcpp-parse.y | 61 ++++++++++++++++++- glcpp.h | 3 + tests/025-func-macro-as-non-macro.c | 2 +- ...nes.c => 026-define-func-extra-newlines.c} | 0 5 files changed, 67 insertions(+), 3 deletions(-) rename tests/{055-define-func-extra-newlines.c => 026-define-func-extra-newlines.c} (100%) diff --git a/README b/README index f0f64c2644a..ab42a3ffe12 100644 --- a/README +++ b/README @@ -24,3 +24,7 @@ parentheses. The #error, #pragma, #extension, #version, and #line macros are not yet supported. + +A file that ends with a function-like macro name as the last +non-whitespace token will result in a parse error, (where it should be +passed through as is). \ No newline at end of file diff --git a/glcpp-parse.y b/glcpp-parse.y index a809ebf3af5..1346b65aff6 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -856,6 +856,9 @@ glcpp_parser_create (void) hash_table_string_compare); parser->active = _string_list_create (parser); parser->space_tokens = 1; + parser->newline_as_space = 0; + parser->in_control_line = 0; + parser->paren_count = 0; parser->skip_stack = NULL; @@ -1274,8 +1277,62 @@ glcpp_parser_lex (glcpp_parser_t *parser) token_node_t *node; int ret; - if (parser->lex_from_list == NULL) - return glcpp_lex (parser->scanner); + if (parser->lex_from_list == NULL) { + ret = glcpp_lex (parser->scanner); + + /* XXX: This ugly block of code exists for the sole + * purpose of converting a NEWLINE token into a SPACE + * token, but only in the case where we have seen a + * function-like macro name, but have not yet seen its + * closing parenthesis. + * + * There's perhaps a more compact way to do this with + * mid-rule actions in the grammar. + * + * I'm definitely not pleased with the complexity of + * this code here. + */ + if (parser->newline_as_space) + { + if (ret == '(') { + parser->paren_count++; + } else if (ret == ')') { + parser->paren_count--; + if (parser->paren_count == 0) + parser->newline_as_space = 0; + } else if (ret == NEWLINE) { + ret = SPACE; + } else if (ret != SPACE) { + if (parser->paren_count == 0) + parser->newline_as_space = 0; + } + } + else if (parser->in_control_line) + { + if (ret == NEWLINE) + parser->in_control_line = 0; + } + else if (ret == HASH_DEFINE_OBJ || ret == HASH_DEFINE_FUNC || + ret == HASH_UNDEF || ret == HASH_IF || + ret == HASH_IFDEF || ret == HASH_IFNDEF || + ret == HASH_ELIF || ret == HASH_ELSE || + ret == HASH_ENDIF || ret == HASH) + { + parser->in_control_line = 1; + } + else if (ret == IDENTIFIER) + { + macro_t *macro; + macro = hash_table_find (parser->defines, + yylval.str); + if (macro && macro->is_function) { + parser->newline_as_space = 1; + parser->paren_count = 0; + } + } + + return ret; + } node = parser->lex_from_node; diff --git a/glcpp.h b/glcpp.h index e5be1a6cd62..5c8c304a9ca 100644 --- a/glcpp.h +++ b/glcpp.h @@ -128,6 +128,9 @@ struct glcpp_parser { struct hash_table *defines; string_list_t *active; int space_tokens; + int newline_as_space; + int in_control_line; + int paren_count; skip_node_t *skip_stack; token_list_t *lex_from_list; token_node_t *lex_from_node; diff --git a/tests/025-func-macro-as-non-macro.c b/tests/025-func-macro-as-non-macro.c index 3dbe026d9dd..b433671d1bf 100644 --- a/tests/025-func-macro-as-non-macro.c +++ b/tests/025-func-macro-as-non-macro.c @@ -1,2 +1,2 @@ #define foo(bar) bar -foo +foo bar diff --git a/tests/055-define-func-extra-newlines.c b/tests/026-define-func-extra-newlines.c similarity index 100% rename from tests/055-define-func-extra-newlines.c rename to tests/026-define-func-extra-newlines.c From a8ea26d7c94526518670e54f44336f433d0ac77c Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 16:18:05 -0700 Subject: [PATCH 116/148] Add two tests developed on the take-2 branch. The define-chain-obj-to-func-parens-in-text test passes here while the if-with-macros test fails. --- tests/054-if-with-macros.c | 34 +++++++++++++++++++ ...-define-chain-obj-to-func-parens-in-text.c | 3 ++ 2 files changed, 37 insertions(+) create mode 100644 tests/054-if-with-macros.c create mode 100644 tests/055-define-chain-obj-to-func-parens-in-text.c diff --git a/tests/054-if-with-macros.c b/tests/054-if-with-macros.c new file mode 100644 index 00000000000..3da79a0d96e --- /dev/null +++ b/tests/054-if-with-macros.c @@ -0,0 +1,34 @@ +#define one 1 +#define two 2 +#define three 3 +#define five 5 +#if five < two +failure_1 +#else +success_1 +#endif +#if three >= two +success_2 +#else +failure_2 +#endif +#if two + three <= five +success_3 +#else +failure_3 +#endif +#if five - two == three +success_4 +#else +failure_4 +#endif +#if one > three +failure_5 +#else +success_5 +#endif +#if one != five +success_6 +#else +failure_6 +#endif diff --git a/tests/055-define-chain-obj-to-func-parens-in-text.c b/tests/055-define-chain-obj-to-func-parens-in-text.c new file mode 100644 index 00000000000..00f2c2346d6 --- /dev/null +++ b/tests/055-define-chain-obj-to-func-parens-in-text.c @@ -0,0 +1,3 @@ +#define failure() success +#define foo failure +foo() From 7db2402a8009772a3f10d19cfc7f30be9ee79295 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 17:01:57 -0700 Subject: [PATCH 117/148] Add support (and test) for an object-to-function chain with the parens in the content. That is, the following case: #define foo(x) (x) #define bar bar(baz) which now works with this (ugly) commit. I definitely want to come up with something cleaner than this. --- glcpp-parse.y | 67 ++++++++++++++----- ...-define-chain-obj-to-func-parens-in-text.c | 3 + 2 files changed, 53 insertions(+), 17 deletions(-) create mode 100644 tests/055-define-chain-obj-to-func-parens-in-text.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 1346b65aff6..abdcd1ed5d8 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -926,9 +926,9 @@ _glcpp_parser_evaluate_defined (glcpp_parser_t *parser, * needs further expansion. */ static int -_glcpp_parser_expand_token_onto (glcpp_parser_t *parser, - token_t *token, - token_list_t *result) +_expand_token_onto (glcpp_parser_t *parser, + token_t *token, + token_list_t *result) { const char *identifier; macro_t *macro; @@ -1075,10 +1075,10 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) /* Prints the expansion of *node (consuming further tokens from the * list as necessary). Upon return *node will be the last consumed * node, such that further processing can continue with node->next. */ -static void -_glcpp_parser_expand_function_onto (glcpp_parser_t *parser, - token_node_t **node_ret, - token_list_t *result) +static function_status_t +_expand_function_onto (glcpp_parser_t *parser, + token_node_t **node_ret, + token_list_t *result) { macro_t *macro; token_node_t *node; @@ -1103,7 +1103,7 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, break; case FUNCTION_NOT_A_FUNCTION: _token_list_append (result, node->token); - return; + return FUNCTION_NOT_A_FUNCTION; case FUNCTION_UNBALANCED_PARENTHESES: fprintf (stderr, "Error: Macro %s call has unbalanced parentheses\n", identifier); @@ -1112,7 +1112,7 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, if (macro->replacements == NULL) { talloc_free (arguments); - return; + return FUNCTION_STATUS_SUCCESS; } if (_argument_list_length (arguments) != @@ -1123,7 +1123,7 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, identifier, _argument_list_length (arguments), _string_list_length (macro->parameters)); - return; + exit (1); } /* Perform argument substitution on the replacement list. */ @@ -1191,6 +1191,8 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, _string_list_pop (parser->active); talloc_free (arguments); + + return FUNCTION_STATUS_SUCCESS; } static void @@ -1199,19 +1201,50 @@ _glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, token_list_t *result) { token_node_t *node; + token_list_t *intermediate, *list_orig = list; + int i, need_rescan = 0; if (list == NULL) return; - for (node = list->head; node; node = node->next) - { - if (_glcpp_parser_expand_token_onto (parser, node->token, - result)) - { - _glcpp_parser_expand_function_onto (parser, &node, - result); + intermediate = _token_list_create (parser); + + /* XXX: The two-pass expansion here is really ugly. The + * problem this is solving is that we can expand a macro into + * a function-like macro name, and then we need to recognize + * that as a function-like macro, but perhaps the parentheses + * and arguments aren't on the token list yet, (since they are + * in the actual content so they are part of what we are + * expanding. + * + * This ugly hack works, but is messy, fragile, and hard to + * maintain. I think a cleaner solution would separate the + * notions of expanding and appending and avoid this problem + * altogether. + */ + + for (i = 0; i < 2; i++) { + if (i == 1) { + list = intermediate; + intermediate = _token_list_create (parser); } + for (node = list->head; node; node = node->next) + { + if (_expand_token_onto (parser, node->token, + intermediate)) + { + if (_expand_function_onto (parser, &node, + intermediate)) + { + need_rescan = 1; + } + } + } + if (list != list_orig) + talloc_free (list); } + + _token_list_append_list (result, intermediate); } void diff --git a/tests/055-define-chain-obj-to-func-parens-in-text.c b/tests/055-define-chain-obj-to-func-parens-in-text.c new file mode 100644 index 00000000000..00f2c2346d6 --- /dev/null +++ b/tests/055-define-chain-obj-to-func-parens-in-text.c @@ -0,0 +1,3 @@ +#define failure() success +#define foo failure +foo() From dd7490093d84ce74a99922c3544b51c3f5d43345 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 27 May 2010 10:12:33 -0700 Subject: [PATCH 118/148] Avoid treating an expanded comma as an argument separator. That is, a function-like invocation foo(x) is valid as a single-argument invocation even if 'x' is a macro that expands into a value with a comma. Add a new COMMA_FINAL token type to handle this, and add a test for this case, (which passes). --- glcpp-parse.y | 18 ++++++++++++++++-- tests/056-macro-argument-with-comma.c | 4 ++++ 2 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 tests/056-macro-argument-with-comma.c diff --git a/glcpp-parse.y b/glcpp-parse.y index abdcd1ed5d8..b2684d06d98 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -132,7 +132,7 @@ glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list); %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token DEFINED ELIF_EXPANDED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER IF_EXPANDED INTEGER NEWLINE OTHER SPACE +%token COMMA_FINAL DEFINED ELIF_EXPANDED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER IF_EXPANDED INTEGER NEWLINE OTHER SPACE %token PASTE %type expression INTEGER operator SPACE %type IDENTIFIER OTHER @@ -740,6 +740,9 @@ _token_print (token_t *token) case PASTE: printf ("##"); break; + case COMMA_FINAL: + printf (","); + break; default: fprintf (stderr, "Error: Don't know how to print token type %d\n", token->type); break; @@ -936,7 +939,18 @@ _expand_token_onto (glcpp_parser_t *parser, /* We only expand identifiers */ if (token->type != IDENTIFIER) { - _token_list_append (result, token); + /* We change any COMMA into a COMMA_FINAL to prevent + * it being mistaken for an argument separator + * later. */ + if (token->type == ',') { + token_t *new_token; + + new_token = _token_create_ival (result, COMMA_FINAL, + COMMA_FINAL); + _token_list_append (result, new_token); + } else { + _token_list_append (result, token); + } return 0; } diff --git a/tests/056-macro-argument-with-comma.c b/tests/056-macro-argument-with-comma.c new file mode 100644 index 00000000000..58701d1f25b --- /dev/null +++ b/tests/056-macro-argument-with-comma.c @@ -0,0 +1,4 @@ +#define bar with,embedded,commas +#define function(x) success +#define foo function +foo(bar) From 602a34769a0850a98366c4011ce8b8c7d08c9276 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 27 May 2010 10:14:38 -0700 Subject: [PATCH 119/148] Add test 56 for a comma within the expansion of an argument. This case was tricky on the take-2 branch. It happens to be passing already here. --- tests/056-macro-argument-with-comma.c | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 tests/056-macro-argument-with-comma.c diff --git a/tests/056-macro-argument-with-comma.c b/tests/056-macro-argument-with-comma.c new file mode 100644 index 00000000000..58701d1f25b --- /dev/null +++ b/tests/056-macro-argument-with-comma.c @@ -0,0 +1,4 @@ +#define bar with,embedded,commas +#define function(x) success +#define foo function +foo(bar) From a65cf7b1d29e98ef3bf31051df8a06cb394d131f Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 27 May 2010 11:55:36 -0700 Subject: [PATCH 120/148] Make two list-processing functions do nothing with an empty list. This just makes these functions easier to understand all around. In the case of _token_list_append_list this is an actual bug fix, (where append an empty list onto a non-empty list would previously scramble the tail pointer of the original list). --- glcpp-parse.y | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index b2684d06d98..ba79a611f6e 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -666,6 +666,9 @@ _token_list_append (token_list_t *list, token_t *token) void _token_list_append_list (token_list_t *list, token_list_t *tail) { + if (tail == NULL || tail->head == NULL) + return; + if (list->head == NULL) { list->head = tail->head; } else { @@ -1218,7 +1221,7 @@ _glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, token_list_t *intermediate, *list_orig = list; int i, need_rescan = 0; - if (list == NULL) + if (list == NULL || list->head == NULL) return; intermediate = _token_list_create (parser); From a19297b26e971e5a9dbe00b4254931505da4b5a9 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 27 May 2010 13:29:19 -0700 Subject: [PATCH 121/148] Provide support for empty arguments in macro invocations. For this we always add a new argument to the argument list as soon as possible, without waiting until we see some argument token. This does mean we need to take some extra care when comparing the number of arguments with the number of expected arguments. In addition to matching numbers, we also support one (empty) argument when zero arguments are expected. Add a test case here for this, which does pass. --- glcpp-parse.y | 20 +++++++++++--------- tests/057-empty-arguments.c | 6 ++++++ 2 files changed, 17 insertions(+), 9 deletions(-) create mode 100644 tests/057-empty-arguments.c diff --git a/glcpp-parse.y b/glcpp-parse.y index ba79a611f6e..3e0a96528b4 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -1044,7 +1044,8 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) last = node; node = node->next; - argument = NULL; + argument = _token_list_create (arguments); + _argument_list_append (arguments, argument); for (paren_count = 1; node; last = node, node = node->next) { if (node->token->type == '(') @@ -1064,18 +1065,16 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) if (node->token->type == ',' && paren_count == 1) { - if (argument) - _token_list_trim_trailing_space (argument); - argument = NULL; + _token_list_trim_trailing_space (argument); + argument = _token_list_create (arguments); + _argument_list_append (arguments, argument); } else { - if (argument == NULL) { + if (argument->head == NULL) { /* Don't treat initial whitespace as * part of the arguement. */ if (node->token->type == SPACE) continue; - argument = _token_list_create (arguments); - _argument_list_append (arguments, argument); } _token_list_append (argument, node->token); } @@ -1132,8 +1131,11 @@ _expand_function_onto (glcpp_parser_t *parser, return FUNCTION_STATUS_SUCCESS; } - if (_argument_list_length (arguments) != - _string_list_length (macro->parameters)) + if (! ((_argument_list_length (arguments) == + _string_list_length (macro->parameters)) || + (_string_list_length (macro->parameters) == 0 && + _argument_list_length (arguments) == 1 && + arguments->head->argument->head == NULL))) { fprintf (stderr, "Error: macro %s invoked with %d arguments (expected %d)\n", diff --git a/tests/057-empty-arguments.c b/tests/057-empty-arguments.c new file mode 100644 index 00000000000..6140232865d --- /dev/null +++ b/tests/057-empty-arguments.c @@ -0,0 +1,6 @@ +#define zero() success +zero() +#define one(x) success +one() +#define two(x,y) success +two(,) From fb48fcdf9b5a5b002469ed247809fb0294d6c7a8 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 27 May 2010 13:44:13 -0700 Subject: [PATCH 122/148] Add test for macro invocations with empty arguments. This case was recently solved on the take-2 branch. --- tests/057-empty-arguments.c | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 tests/057-empty-arguments.c diff --git a/tests/057-empty-arguments.c b/tests/057-empty-arguments.c new file mode 100644 index 00000000000..6140232865d --- /dev/null +++ b/tests/057-empty-arguments.c @@ -0,0 +1,6 @@ +#define zero() success +zero() +#define one(x) success +one() +#define two(x,y) success +two(,) From 85b50e840d969c4d9ebcfcc3df1df7a95e07e34e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 27 May 2010 14:01:18 -0700 Subject: [PATCH 123/148] Add placeholder tokens to support pasting with empty arguments. Along with a passing test to verify that this works. --- glcpp-parse.y | 36 +++++++++++++++++++---- tests/058-token-pasting-empty-arguments.c | 5 ++++ 2 files changed, 35 insertions(+), 6 deletions(-) create mode 100644 tests/058-token-pasting-empty-arguments.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 3e0a96528b4..d587a4bf338 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -132,7 +132,7 @@ glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list); %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token COMMA_FINAL DEFINED ELIF_EXPANDED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER IF_EXPANDED INTEGER NEWLINE OTHER SPACE +%token COMMA_FINAL DEFINED ELIF_EXPANDED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER IF_EXPANDED INTEGER NEWLINE OTHER PLACEHOLDER SPACE %token PASTE %type expression INTEGER operator SPACE %type IDENTIFIER OTHER @@ -746,6 +746,9 @@ _token_print (token_t *token) case COMMA_FINAL: printf (","); break; + case PLACEHOLDER: + /* Nothing to print. */ + break; default: fprintf (stderr, "Error: Don't know how to print token type %d\n", token->type); break; @@ -756,6 +759,17 @@ _token_print (token_t *token) static void _token_paste (token_t *token, token_t *other) { + /* Pasting a placeholder onto anything makes no change. */ + if (other->type == PLACEHOLDER) + return; + + /* When 'token' is a placeholder, just return contents of 'other'. */ + if (token->type == PLACEHOLDER) { + token->type = other->type; + token->value = other->value; + return; + } + /* A very few single-character punctuators can be combined * with another to form a multi-character punctuator. */ switch (token->type) { @@ -1159,10 +1173,20 @@ _expand_function_onto (glcpp_parser_t *parser, argument = _argument_list_member_at (arguments, parameter_index); /* Before substituting, we expand the argument - * tokens. */ - _glcpp_parser_expand_token_list_onto (parser, - argument, - substituted); + * tokens, or append a placeholder token for + * an empty argument. */ + if (argument->head) { + _glcpp_parser_expand_token_list_onto (parser, + argument, + substituted); + } else { + token_t *new_token; + + new_token = _token_create_ival (substituted, + PLACEHOLDER, + PLACEHOLDER); + _token_list_append (substituted, new_token); + } } else { _token_list_append (substituted, node->token); } @@ -1196,7 +1220,7 @@ _expand_function_onto (glcpp_parser_t *parser, if (next_non_space == NULL) { fprintf (stderr, "Error: '##' cannot appear at either end of a macro expansion\n"); - exit (1); + return FUNCTION_STATUS_SUCCESS; } _token_paste (node->token, next_non_space->token); diff --git a/tests/058-token-pasting-empty-arguments.c b/tests/058-token-pasting-empty-arguments.c new file mode 100644 index 00000000000..8ac260c76b6 --- /dev/null +++ b/tests/058-token-pasting-empty-arguments.c @@ -0,0 +1,5 @@ +#define paste(x,y) x ## y +paste(a,b) +paste(a,) +paste(,b) +paste(,) From 050e3ded1ea05cfe336dd0cd20212d17d7960c9e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 27 May 2010 14:36:29 -0700 Subject: [PATCH 124/148] Implement token pasting of integers. To do this correctly, we change the lexer to lex integers as string values, (new token type of INTEGER_STRING), and only convert to integer values when evaluating an expression value. Add a new test case for this, (which does pass now). --- Makefile | 2 +- glcpp-lex.l | 12 ++++++------ glcpp-parse.y | 32 +++++++++++++++++++++---------- tests/059-token-pasting-integer.c | 4 ++++ 4 files changed, 33 insertions(+), 17 deletions(-) create mode 100644 tests/059-token-pasting-integer.c diff --git a/Makefile b/Makefile index 88116128f85..0c06aa880fb 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ CFLAGS = -g override CFLAGS += -Wall -Wextra -Wwrite-strings -Wswitch-enum -Wno-unused glcpp: glcpp.o glcpp-lex.o glcpp-parse.o hash_table.o xtalloc.o - gcc -o $@ -ltalloc $^ + gcc -o $@ -ltalloc -lm $^ %.c %.h: %.y bison --debug --defines=$*.h --output=$*.c $^ diff --git a/glcpp-lex.l b/glcpp-lex.l index d6b7726d36d..70d47d24975 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -88,18 +88,18 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? } {DECIMAL_INTEGER} { - yylval.ival = strtoll (yytext, NULL, 10); - return INTEGER; + yylval.str = xtalloc_strdup (yyextra, yytext); + return INTEGER_STRING; } {OCTAL_INTEGER} { - yylval.ival = strtoll (yytext + 1, NULL, 8); - return INTEGER; + yylval.str = xtalloc_strdup (yyextra, yytext); + return INTEGER_STRING; } {HEXADECIMAL_INTEGER} { - yylval.ival = strtoll (yytext + 2, NULL, 16); - return INTEGER; + yylval.str = xtalloc_strdup (yyextra, yytext); + return INTEGER_STRING; } "<<" { diff --git a/glcpp-parse.y b/glcpp-parse.y index d587a4bf338..5b2d0d3927a 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -132,10 +132,10 @@ glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list); %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token COMMA_FINAL DEFINED ELIF_EXPANDED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER IF_EXPANDED INTEGER NEWLINE OTHER PLACEHOLDER SPACE +%token COMMA_FINAL DEFINED ELIF_EXPANDED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER IF_EXPANDED INTEGER INTEGER_STRING NEWLINE OTHER PLACEHOLDER SPACE %token PASTE %type expression INTEGER operator SPACE -%type IDENTIFIER OTHER +%type IDENTIFIER INTEGER_STRING OTHER %type identifier_list %type preprocessing_token %type pp_tokens replacement_list text_line @@ -253,7 +253,16 @@ control_line: ; expression: - INTEGER { + INTEGER_STRING { + if (strlen ($1) >= 3 && strncmp ($1, "0x", 2) == 0) { + $$ = strtoll ($1 + 2, NULL, 16); + } else if ($1[0] == '0') { + $$ = strtoll ($1, NULL, 8); + } else { + $$ = strtoll ($1, NULL, 10); + } + } +| INTEGER { $$ = $1; } | expression OR expression { @@ -372,8 +381,8 @@ preprocessing_token: IDENTIFIER { $$ = _token_create_str (parser, IDENTIFIER, $1); } -| INTEGER { - $$ = _token_create_ival (parser, INTEGER, $1); +| INTEGER_STRING { + $$ = _token_create_str (parser, INTEGER_STRING, $1); } | operator { $$ = _token_create_ival (parser, $1, $1); @@ -710,6 +719,7 @@ _token_print (token_t *token) printf ("%" PRIxMAX, token->value.ival); break; case IDENTIFIER: + case INTEGER_STRING: case OTHER: printf ("%s", token->value.str); break; @@ -828,11 +838,13 @@ _token_paste (token_t *token, token_t *other) /* Two string-valued tokens can usually just be mashed * together. * - * XXX: Since our 'OTHER' case is currently so loose, this may - * allow some things thruogh that should be treated as - * errors. */ - if ((token->type == IDENTIFIER || token->type == OTHER) && - (other->type == IDENTIFIER || other->type == OTHER)) + * XXX: This isn't actually legitimate. Several things here + * should result in a diagnostic since the result cannot be a + * valid, single pre-processing token. For example, pasting + * "123" and "abc" is not legal, but we don't catch that + * here. */ + if ((token->type == IDENTIFIER || token->type == OTHER || token->type == INTEGER_STRING) && + (other->type == IDENTIFIER || other->type == OTHER || other->type == INTEGER_STRING)) { token->value.str = talloc_strdup_append (token->value.str, other->value.str); diff --git a/tests/059-token-pasting-integer.c b/tests/059-token-pasting-integer.c new file mode 100644 index 00000000000..37b895a4237 --- /dev/null +++ b/tests/059-token-pasting-integer.c @@ -0,0 +1,4 @@ +#define paste(x,y) x ## y +paste(1,2) +paste(1,000) +paste(identifier,2) From 886e05a35a319cdace9afed93d0cc8df2c7f33e0 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 27 May 2010 14:45:20 -0700 Subject: [PATCH 125/148] Add test for token-pasting of integers. This test was tricky to make pass in the take-2 branch. It ends up passing already here with no additional effort, (since we are lexing integers as string-valued token except when in the ST_IF state in the lexer anyway). --- tests/059-token-pasting-integer.c | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 tests/059-token-pasting-integer.c diff --git a/tests/059-token-pasting-integer.c b/tests/059-token-pasting-integer.c new file mode 100644 index 00000000000..37b895a4237 --- /dev/null +++ b/tests/059-token-pasting-integer.c @@ -0,0 +1,4 @@ +#define paste(x,y) x ## y +paste(1,2) +paste(1,000) +paste(identifier,2) From baa17c87485b5e776ec142844f5df38a3df9dccc Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 27 May 2010 14:53:51 -0700 Subject: [PATCH 126/148] Remove blank lines from output files before comparing. Recently I'm seeing cases where "gcc -E" mysteriously omits blank lines, (even though it prints the blank lines in other very similar cases). Rather than trying to decipher and imitate this, just get rid of the blank lines. This approach with sed to kill the lines before the diff is better than "diff -B" since when there is an actual difference, the presence of blank lines won't make the diff harder to read. --- .gitignore | 1 + tests/glcpp-test | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index d67bd38c93c..b88f0cc75c7 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ glcpp-parse.h *~ tests/*.expected tests/*.gcc +tests/*.glcpp tests/*.out diff --git a/tests/glcpp-test b/tests/glcpp-test index bf88d4462e1..92c994979a9 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -3,8 +3,9 @@ set -e for test in *.c; do echo "Testing $test" - ../glcpp < $test > $test.out + ../glcpp < $test > $test.glcpp + grep -v '^$' < $test.glcpp > $test.out || true gcc -E $test -o $test.gcc - grep -v '^#' < $test.gcc > $test.expected - diff -B -u $test.expected $test.out + grep -v '^#' < $test.gcc | grep -v '^$' > $test.expected || true + diff -u $test.expected $test.out done From 95ec433d59be234cf2695ae091cee4ace3314d21 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 28 May 2010 08:00:43 -0700 Subject: [PATCH 127/148] Revert "Add support for an object-to-function chain with the parens in the content." This reverts commit 7db2402a8009772a3f10d19cfc7f30be9ee79295 It doesn't revert the new test case from that commit, just the extremely ugly second-pass implementation. --- glcpp-parse.y | 65 +++++++++++++-------------------------------------- 1 file changed, 16 insertions(+), 49 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 5b2d0d3927a..f4cb72a133f 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -958,9 +958,9 @@ _glcpp_parser_evaluate_defined (glcpp_parser_t *parser, * needs further expansion. */ static int -_expand_token_onto (glcpp_parser_t *parser, - token_t *token, - token_list_t *result) +_glcpp_parser_expand_token_onto (glcpp_parser_t *parser, + token_t *token, + token_list_t *result) { const char *identifier; macro_t *macro; @@ -1117,10 +1117,10 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) /* Prints the expansion of *node (consuming further tokens from the * list as necessary). Upon return *node will be the last consumed * node, such that further processing can continue with node->next. */ -static function_status_t -_expand_function_onto (glcpp_parser_t *parser, - token_node_t **node_ret, - token_list_t *result) +static void +_glcpp_parser_expand_function_onto (glcpp_parser_t *parser, + token_node_t **node_ret, + token_list_t *result) { macro_t *macro; token_node_t *node; @@ -1145,7 +1145,7 @@ _expand_function_onto (glcpp_parser_t *parser, break; case FUNCTION_NOT_A_FUNCTION: _token_list_append (result, node->token); - return FUNCTION_NOT_A_FUNCTION; + return; case FUNCTION_UNBALANCED_PARENTHESES: fprintf (stderr, "Error: Macro %s call has unbalanced parentheses\n", identifier); @@ -1154,7 +1154,7 @@ _expand_function_onto (glcpp_parser_t *parser, if (macro->replacements == NULL) { talloc_free (arguments); - return FUNCTION_STATUS_SUCCESS; + return; } if (! ((_argument_list_length (arguments) == @@ -1168,7 +1168,7 @@ _expand_function_onto (glcpp_parser_t *parser, identifier, _argument_list_length (arguments), _string_list_length (macro->parameters)); - exit (1); + return; } /* Perform argument substitution on the replacement list. */ @@ -1246,8 +1246,6 @@ _expand_function_onto (glcpp_parser_t *parser, _string_list_pop (parser->active); talloc_free (arguments); - - return FUNCTION_STATUS_SUCCESS; } static void @@ -1256,50 +1254,19 @@ _glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, token_list_t *result) { token_node_t *node; - token_list_t *intermediate, *list_orig = list; - int i, need_rescan = 0; if (list == NULL || list->head == NULL) return; - intermediate = _token_list_create (parser); - - /* XXX: The two-pass expansion here is really ugly. The - * problem this is solving is that we can expand a macro into - * a function-like macro name, and then we need to recognize - * that as a function-like macro, but perhaps the parentheses - * and arguments aren't on the token list yet, (since they are - * in the actual content so they are part of what we are - * expanding. - * - * This ugly hack works, but is messy, fragile, and hard to - * maintain. I think a cleaner solution would separate the - * notions of expanding and appending and avoid this problem - * altogether. - */ - - for (i = 0; i < 2; i++) { - if (i == 1) { - list = intermediate; - intermediate = _token_list_create (parser); - } - for (node = list->head; node; node = node->next) + for (node = list->head; node; node = node->next) + { + if (_glcpp_parser_expand_token_onto (parser, node->token, + result)) { - if (_expand_token_onto (parser, node->token, - intermediate)) - { - if (_expand_function_onto (parser, &node, - intermediate)) - { - need_rescan = 1; - } - } + _glcpp_parser_expand_function_onto (parser, &node, + result); } - if (list != list_orig) - talloc_free (list); } - - _token_list_append_list (result, intermediate); } void From 9b519f9c7997e0ec02c66d39edc12912aebb9eca Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 28 May 2010 08:04:13 -0700 Subject: [PATCH 128/148] Stop interrupting the test suite at the first failure. This behavior was useful when starting the implementation over ("take-2") where the whole test suite was failing. This made it easy to focus on one test at a time and get each working. More recently, we got the whole suite working, so we don't need this feature anymore. And in the previous commit, we regressed a couple of tests, so it's nice to be able to see all the failures with a single run of the suite. --- tests/glcpp-test | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/glcpp-test b/tests/glcpp-test index 92c994979a9..ba398af0d54 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -1,5 +1,4 @@ #!/bin/sh -set -e for test in *.c; do echo "Testing $test" From 3c93d397050bbeccb7809e53a425c860df947c45 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 28 May 2010 08:17:46 -0700 Subject: [PATCH 129/148] Simplify calling conventions of functions under expand_token_list_onto. We previously had a confusing thing where _expand_token_onto would return a non-zero value to indicate that the caller should then call _expand_function_onto. It's much cleaner for _expand_token_onto to just do what's needed and call the necessary function. --- glcpp-parse.y | 159 +++++++++++++++++++++++--------------------------- 1 file changed, 74 insertions(+), 85 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index f4cb72a133f..9f97b2a282a 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -948,81 +948,6 @@ _glcpp_parser_evaluate_defined (glcpp_parser_t *parser, } } - -/* Appends onto 'expansion' a non-macro token or the expansion of an - * object-like macro. - * - * Returns 0 if this token is completely processed. - * - * Returns 1 in the case that 'token' is a function-like macro that - * needs further expansion. - */ -static int -_glcpp_parser_expand_token_onto (glcpp_parser_t *parser, - token_t *token, - token_list_t *result) -{ - const char *identifier; - macro_t *macro; - token_list_t *expansion; - - /* We only expand identifiers */ - if (token->type != IDENTIFIER) { - /* We change any COMMA into a COMMA_FINAL to prevent - * it being mistaken for an argument separator - * later. */ - if (token->type == ',') { - token_t *new_token; - - new_token = _token_create_ival (result, COMMA_FINAL, - COMMA_FINAL); - _token_list_append (result, new_token); - } else { - _token_list_append (result, token); - } - return 0; - } - - /* Look up this identifier in the hash table. */ - identifier = token->value.str; - macro = hash_table_find (parser->defines, identifier); - - /* Not a macro, so just append. */ - if (macro == NULL) { - _token_list_append (result, token); - return 0; - } - - /* Finally, don't expand this macro if we're already actively - * expanding it, (to avoid infinite recursion). */ - if (_string_list_contains (parser->active, identifier, NULL)) - { - /* We change the token type here from IDENTIFIER to - * OTHER to prevent any future expansion of this - * unexpanded token. */ - char *str; - token_t *new_token; - - str = xtalloc_strdup (result, token->value.str); - new_token = _token_create_str (result, OTHER, str); - _token_list_append (result, new_token); - return 0; - } - - /* For function-like macros return 1 for further processing. */ - if (macro->is_function) { - return 1; - } - - _string_list_push (parser->active, identifier); - _glcpp_parser_expand_token_list_onto (parser, - macro->replacements, - result); - _string_list_pop (parser->active); - - return 0; -} - typedef enum function_status { FUNCTION_STATUS_SUCCESS, @@ -1114,9 +1039,10 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) return FUNCTION_STATUS_SUCCESS; } -/* Prints the expansion of *node (consuming further tokens from the - * list as necessary). Upon return *node will be the last consumed - * node, such that further processing can continue with node->next. */ +/* Appends expansion of *node (consuming further tokens from the list + * as necessary) onto result. Upon return *node will be the last + * consumed node, such that further processing can continue with + * node->next. */ static void _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, token_node_t **node_ret, @@ -1232,7 +1158,7 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, if (next_non_space == NULL) { fprintf (stderr, "Error: '##' cannot appear at either end of a macro expansion\n"); - return FUNCTION_STATUS_SUCCESS; + return; } _token_paste (node->token, next_non_space->token); @@ -1248,6 +1174,74 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, talloc_free (arguments); } + +/* Appends the expansion of the token in *node onto result. + * Upon return *node will be the last consumed node, such that further + * processing can continue with node->next. */ +static void +_glcpp_parser_expand_token_onto (glcpp_parser_t *parser, + token_node_t **node, + token_list_t *result) +{ + token_t *token = (*node)->token; + const char *identifier; + macro_t *macro; + token_list_t *expansion; + + /* We only expand identifiers */ + if (token->type != IDENTIFIER) { + /* We change any COMMA into a COMMA_FINAL to prevent + * it being mistaken for an argument separator + * later. */ + if (token->type == ',') { + token_t *new_token; + + new_token = _token_create_ival (result, COMMA_FINAL, + COMMA_FINAL); + _token_list_append (result, new_token); + } else { + _token_list_append (result, token); + } + return; + } + + /* Look up this identifier in the hash table. */ + identifier = token->value.str; + macro = hash_table_find (parser->defines, identifier); + + /* Not a macro, so just append. */ + if (macro == NULL) { + _token_list_append (result, token); + return; + } + + /* Finally, don't expand this macro if we're already actively + * expanding it, (to avoid infinite recursion). */ + if (_string_list_contains (parser->active, identifier, NULL)) + { + /* We change the token type here from IDENTIFIER to + * OTHER to prevent any future expansion of this + * unexpanded token. */ + char *str; + token_t *new_token; + + str = xtalloc_strdup (result, token->value.str); + new_token = _token_create_str (result, OTHER, str); + _token_list_append (result, new_token); + return; + } + + if (macro->is_function) { + _glcpp_parser_expand_function_onto (parser, node, result); + } else { + _string_list_push (parser->active, identifier); + _glcpp_parser_expand_token_list_onto (parser, + macro->replacements, + result); + _string_list_pop (parser->active); + } +} + static void _glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, token_list_t *list, @@ -1260,12 +1254,7 @@ _glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, for (node = list->head; node; node = node->next) { - if (_glcpp_parser_expand_token_onto (parser, node->token, - result)) - { - _glcpp_parser_expand_function_onto (parser, &node, - result); - } + _glcpp_parser_expand_token_onto (parser, &node, result); } } From 681afbc855c86df8c3521ccdfadb7f16b9729baa Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 28 May 2010 15:06:02 -0700 Subject: [PATCH 130/148] Perform macro by replacing tokens in original list. We take the results of macro expansion and splice them into the original token list over which we are iterating. This makes it easy for function-like macro invocations to find their arguments since they are simply subsequent tokens on the list. This fixes the recently-introduced regressions (tests 55 and 56) and also passes new tests 60 and 61 introduced to strees this feature, (with macro-argument parentheses split between a macro value and the textual input). --- glcpp-parse.y | 278 +++++++++++------- ...-left-paren-in-macro-right-paren-in-text.c | 3 + tests/061-define-chain-obj-to-func-multi.c | 5 + 3 files changed, 187 insertions(+), 99 deletions(-) create mode 100644 tests/060-left-paren-in-macro-right-paren-in-text.c create mode 100644 tests/061-define-chain-obj-to-func-multi.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 9f97b2a282a..c89d7bf159c 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -101,13 +101,12 @@ _glcpp_parser_evaluate_defined (glcpp_parser_t *parser, token_list_t *list); static void -_glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, - token_list_t *list); +_glcpp_parser_expand_token_list (glcpp_parser_t *parser, + token_list_t *list); static void -_glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, - token_list_t *list, - token_list_t *result); +_glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, + token_list_t *list); static void _glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, int condition); @@ -218,7 +217,8 @@ control_line: _token_list_append (expanded, token); talloc_unlink (parser, token); _glcpp_parser_evaluate_defined (parser, $2); - _glcpp_parser_expand_token_list_onto (parser, $2, expanded); + _glcpp_parser_expand_token_list (parser, $2); + _token_list_append_list (expanded, $2); glcpp_parser_lex_from (parser, expanded); } | HASH_IFDEF IDENTIFIER NEWLINE { @@ -240,7 +240,8 @@ control_line: _token_list_append (expanded, token); talloc_unlink (parser, token); _glcpp_parser_evaluate_defined (parser, $2); - _glcpp_parser_expand_token_list_onto (parser, $2, expanded); + _glcpp_parser_expand_token_list (parser, $2); + _token_list_append_list (expanded, $2); glcpp_parser_lex_from (parser, expanded); } | HASH_ELSE NEWLINE { @@ -688,6 +689,22 @@ _token_list_append_list (token_list_t *list, token_list_t *tail) list->non_space_tail = tail->non_space_tail; } +token_list_t * +_token_list_copy (void *ctx, token_list_t *other) +{ + token_list_t *copy; + token_node_t *node; + + if (other == NULL) + return NULL; + + copy = _token_list_create (ctx); + for (node = other->head; node; node = node->next) + _token_list_append (copy, node->token); + + return copy; +} + void _token_list_trim_trailing_space (token_list_t *list) { @@ -956,9 +973,12 @@ typedef enum function_status } function_status_t; /* Find a set of function-like macro arguments by looking for a - * balanced set of parentheses. Upon return *node will be the last - * consumed node, such that further processing can continue with - * node->next. + * balanced set of parentheses. + * + * When called, 'node' should be the opening-parenthesis token, (or + * perhaps preceeding SPACE tokens). Upon successful return *last will + * be the last consumed node, (corresponding to the closing right + * parenthesis). * * Return values: * @@ -976,13 +996,13 @@ typedef enum function_status * Macro name is not followed by a balanced set of parentheses. */ static function_status_t -_arguments_parse (argument_list_t *arguments, token_node_t **node_ret) +_arguments_parse (argument_list_t *arguments, + token_node_t *node, + token_node_t **last) { token_list_t *argument; - token_node_t *node = *node_ret, *last; int paren_count; - last = node; node = node->next; /* Ignore whitespace before first parenthesis. */ @@ -992,13 +1012,12 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) if (node == NULL || node->token->type != '(') return FUNCTION_NOT_A_FUNCTION; - last = node; node = node->next; argument = _token_list_create (arguments); _argument_list_append (arguments, argument); - for (paren_count = 1; node; last = node, node = node->next) { + for (paren_count = 1; node; node = node->next) { if (node->token->type == '(') { paren_count++; @@ -1006,11 +1025,8 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) else if (node->token->type == ')') { paren_count--; - if (paren_count == 0) { - last = node; - node = node->next; + if (paren_count == 0) break; - } } if (node->token->type == ',' && @@ -1031,32 +1047,44 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) } } - if (node && paren_count) + if (paren_count) return FUNCTION_UNBALANCED_PARENTHESES; - *node_ret = last; + *last = node; return FUNCTION_STATUS_SUCCESS; } -/* Appends expansion of *node (consuming further tokens from the list - * as necessary) onto result. Upon return *node will be the last - * consumed node, such that further processing can continue with - * node->next. */ -static void -_glcpp_parser_expand_function_onto (glcpp_parser_t *parser, - token_node_t **node_ret, - token_list_t *result) +/* This is a helper function that's essentially part of the + * implementation of _glcpp_parser_expand_node. It shouldn't be called + * except for by that function. + * + * Returns NULL if node is a simple token with no expansion, (that is, + * although 'node' corresponds to an identifier defined as a + * function-like macro, it is not followed with a parenthesized + * argument list). + * + * Compute the complete expansion of node (which is a function-like + * macro) and subsequent nodes which are arguments. + * + * Returns the token list that results from the expansion and sets + * *last to the last node in the list that was consumed by the + * expansion. Specificallty, *last will be set as follows: as the + * token of the closing right parenthesis. + */ +static token_list_t * +_glcpp_parser_expand_function (glcpp_parser_t *parser, + token_node_t *node, + token_node_t **last) + { macro_t *macro; - token_node_t *node; const char *identifier; argument_list_t *arguments; function_status_t status; token_list_t *substituted; int parameter_index; - node = *node_ret; identifier = node->token->value.str; macro = hash_table_find (parser->defines, identifier); @@ -1064,23 +1092,20 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, assert (macro->is_function); arguments = _argument_list_create (parser); - status = _arguments_parse (arguments, node_ret); + status = _arguments_parse (arguments, node, last); switch (status) { case FUNCTION_STATUS_SUCCESS: break; case FUNCTION_NOT_A_FUNCTION: - _token_list_append (result, node->token); - return; + return NULL; case FUNCTION_UNBALANCED_PARENTHESES: - fprintf (stderr, "Error: Macro %s call has unbalanced parentheses\n", - identifier); - exit (1); + return NULL; } if (macro->replacements == NULL) { talloc_free (arguments); - return; + return _token_list_create (parser); } if (! ((_argument_list_length (arguments) == @@ -1094,7 +1119,7 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, identifier, _argument_list_length (arguments), _string_list_length (macro->parameters)); - return; + return NULL; } /* Perform argument substitution on the replacement list. */ @@ -1114,9 +1139,9 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, * tokens, or append a placeholder token for * an empty argument. */ if (argument->head) { - _glcpp_parser_expand_token_list_onto (parser, - argument, - substituted); + _glcpp_parser_expand_token_list (parser, + argument); + _token_list_append_list (substituted, argument); } else { token_t *new_token; @@ -1158,7 +1183,7 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, if (next_non_space == NULL) { fprintf (stderr, "Error: '##' cannot appear at either end of a macro expansion\n"); - return; + return NULL; } _token_paste (node->token, next_non_space->token); @@ -1168,22 +1193,33 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, } _string_list_push (parser->active, identifier); - _glcpp_parser_expand_token_list_onto (parser, substituted, result); + _glcpp_parser_expand_token_list (parser, substituted); _string_list_pop (parser->active); - talloc_free (arguments); + return substituted; } - -/* Appends the expansion of the token in *node onto result. - * Upon return *node will be the last consumed node, such that further - * processing can continue with node->next. */ -static void -_glcpp_parser_expand_token_onto (glcpp_parser_t *parser, - token_node_t **node, - token_list_t *result) +/* Compute the complete expansion of node, (and subsequent nodes after + * 'node' in the case that 'node' is a function-like macro and + * subsequent nodes are arguments). + * + * Returns NULL if node is a simple token with no expansion. + * + * Otherwise, returns the token list that results from the expansion + * and sets *last to the last node in the list that was consumed by + * the expansion. Specificallty, *last will be set as follows: + * + * As 'node' in the case of object-like macro expansion. + * + * As the token of the closing right parenthesis in the case of + * function-like macro expansion. + */ +static token_list_t * +_glcpp_parser_expand_node (glcpp_parser_t *parser, + token_node_t *node, + token_node_t **last) { - token_t *token = (*node)->token; + token_t *token = node->token; const char *identifier; macro_t *macro; token_list_t *expansion; @@ -1194,52 +1230,110 @@ _glcpp_parser_expand_token_onto (glcpp_parser_t *parser, * it being mistaken for an argument separator * later. */ if (token->type == ',') { - token_t *new_token; - - new_token = _token_create_ival (result, COMMA_FINAL, - COMMA_FINAL); - _token_list_append (result, new_token); - } else { - _token_list_append (result, token); + token->type = COMMA_FINAL; + token->value.ival = COMMA_FINAL; } - return; + + return NULL; } /* Look up this identifier in the hash table. */ identifier = token->value.str; macro = hash_table_find (parser->defines, identifier); - /* Not a macro, so just append. */ - if (macro == NULL) { - _token_list_append (result, token); - return; - } + /* Not a macro, so no expansion needed. */ + if (macro == NULL) + return NULL; /* Finally, don't expand this macro if we're already actively * expanding it, (to avoid infinite recursion). */ - if (_string_list_contains (parser->active, identifier, NULL)) - { + if (_string_list_contains (parser->active, identifier, NULL)) { /* We change the token type here from IDENTIFIER to * OTHER to prevent any future expansion of this * unexpanded token. */ char *str; - token_t *new_token; + token_list_t *expansion; + token_t *final; - str = xtalloc_strdup (result, token->value.str); - new_token = _token_create_str (result, OTHER, str); - _token_list_append (result, new_token); - return; + str = xtalloc_strdup (parser, token->value.str); + final = _token_create_str (parser, OTHER, str); + expansion = _token_list_create (parser); + _token_list_append (expansion, final); + *last = node; + return expansion; } - if (macro->is_function) { - _glcpp_parser_expand_function_onto (parser, node, result); - } else { + if (! macro->is_function) + { + *last = node; + + if (macro->replacements == NULL) + return _token_list_create (parser); + + expansion = _token_list_copy (parser, macro->replacements); + _string_list_push (parser->active, identifier); - _glcpp_parser_expand_token_list_onto (parser, - macro->replacements, - result); + _glcpp_parser_expand_token_list (parser, expansion); _string_list_pop (parser->active); + + return expansion; } + + return _glcpp_parser_expand_function (parser, node, last); +} + +/* Walk over the token list replacing nodes with their expansion. + * Whenever nodes are expanded the walking will walk over the new + * nodes, continuing to expand as necessary. The results are placed in + * 'list' itself; + */ +static void +_glcpp_parser_expand_token_list (glcpp_parser_t *parser, + token_list_t *list) +{ + token_node_t *node_prev; + token_node_t *node, *last; + token_list_t *expansion; + + if (list == NULL) + return; + + _token_list_trim_trailing_space (list); + + node_prev = NULL; + node = list->head; + + while (node) { + /* Find the expansion for node, which will replace all + * nodes from node to last, inclusive. */ + expansion = _glcpp_parser_expand_node (parser, node, &last); + if (expansion) { + /* Splice expansion into list, supporting a + * simple deletion if the expansion is + * empty. */ + if (expansion->head) { + if (node_prev) + node_prev->next = expansion->head; + else + list->head = expansion->head; + expansion->tail->next = last->next; + if (last == list->tail) + list->tail = expansion->tail; + } else { + if (node_prev) + node_prev->next = last->next; + else + list->head = last->next; + if (last == list->tail) + list->tail == NULL; + } + } else { + node_prev = node; + } + node = node_prev ? node_prev->next : list->head; + } + + list->non_space_tail = list->tail; } static void @@ -1247,37 +1341,23 @@ _glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, token_list_t *list, token_list_t *result) { - token_node_t *node; + _glcpp_parser_expand_token_list (parser, list); - if (list == NULL || list->head == NULL) - return; - - for (node = list->head; node; node = node->next) - { - _glcpp_parser_expand_token_onto (parser, &node, result); - } + _token_list_append_list (result, list); } void _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, token_list_t *list) { - token_list_t *expanded; - token_node_t *node; - function_status_t function_status; - if (list == NULL) return; - expanded = _token_list_create (parser); + _glcpp_parser_expand_token_list (parser, list); - _glcpp_parser_expand_token_list_onto (parser, list, expanded); + _token_list_trim_trailing_space (list); - _token_list_trim_trailing_space (expanded); - - _token_list_print (expanded); - - talloc_free (expanded); + _token_list_print (list); } void diff --git a/tests/060-left-paren-in-macro-right-paren-in-text.c b/tests/060-left-paren-in-macro-right-paren-in-text.c new file mode 100644 index 00000000000..ed80ea879ce --- /dev/null +++ b/tests/060-left-paren-in-macro-right-paren-in-text.c @@ -0,0 +1,3 @@ +#define double(a) a*2 +#define foo double( +foo 5) diff --git a/tests/061-define-chain-obj-to-func-multi.c b/tests/061-define-chain-obj-to-func-multi.c new file mode 100644 index 00000000000..6dbfd1f62d1 --- /dev/null +++ b/tests/061-define-chain-obj-to-func-multi.c @@ -0,0 +1,5 @@ +#define foo(x) success +#define bar foo +#define baz bar +#define joe baz +joe (failure) From c7144dc2e0175a8f4922f261d75437b984039a8c Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 28 May 2010 15:12:36 -0700 Subject: [PATCH 131/148] Remove some blank lines from the end of some test cases. To match what we have done on the take-2 branch to these test cases. --- tests/049-if-expression-precedence.c | 1 - tests/050-if-defined.c | 2 -- tests/053-if-divide-and-shift.c | 1 - 3 files changed, 4 deletions(-) diff --git a/tests/049-if-expression-precedence.c b/tests/049-if-expression-precedence.c index cea935220fd..833ea03882a 100644 --- a/tests/049-if-expression-precedence.c +++ b/tests/049-if-expression-precedence.c @@ -3,4 +3,3 @@ failure with operator precedence #else success #endif - diff --git a/tests/050-if-defined.c b/tests/050-if-defined.c index 9838cc747d5..34f0f95140e 100644 --- a/tests/050-if-defined.c +++ b/tests/050-if-defined.c @@ -15,5 +15,3 @@ failure_3 #else success_3 #endif - - diff --git a/tests/053-if-divide-and-shift.c b/tests/053-if-divide-and-shift.c index ddc1573ab26..d24c54a88d1 100644 --- a/tests/053-if-divide-and-shift.c +++ b/tests/053-if-divide-and-shift.c @@ -13,4 +13,3 @@ failure_3 #else success_3 #endif - From 792bdcbeee770b14dc833261e7ef3c1d400e5e3f Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 28 May 2010 15:13:11 -0700 Subject: [PATCH 132/148] Tweak test 25 slightly, (so the non-macro doesn't end the file). This isn't a problem here, but on the take-2 branch, it was trickier at one point to make a non-macro work when the last token of the file. So we use the simpler test case here and defer the other case until later. --- tests/025-func-macro-as-non-macro.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/025-func-macro-as-non-macro.c b/tests/025-func-macro-as-non-macro.c index 3dbe026d9dd..b433671d1bf 100644 --- a/tests/025-func-macro-as-non-macro.c +++ b/tests/025-func-macro-as-non-macro.c @@ -1,2 +1,2 @@ #define foo(bar) bar -foo +foo bar From b1249f69fd687441632c2d2e63618627ae9be442 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 28 May 2010 15:15:00 -0700 Subject: [PATCH 133/148] Add two (passing) tests from the take-2 branch. These two tests were tricky to make work on take-2, but happen to already eb working here. --- tests/000-content-with-spaces.c | 1 + tests/061-define-chain-obj-to-func-multi.c | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 tests/000-content-with-spaces.c create mode 100644 tests/061-define-chain-obj-to-func-multi.c diff --git a/tests/000-content-with-spaces.c b/tests/000-content-with-spaces.c new file mode 100644 index 00000000000..696cb3a74fc --- /dev/null +++ b/tests/000-content-with-spaces.c @@ -0,0 +1 @@ +this is four tokens diff --git a/tests/061-define-chain-obj-to-func-multi.c b/tests/061-define-chain-obj-to-func-multi.c new file mode 100644 index 00000000000..6dbfd1f62d1 --- /dev/null +++ b/tests/061-define-chain-obj-to-func-multi.c @@ -0,0 +1,5 @@ +#define foo(x) success +#define bar foo +#define baz bar +#define joe baz +joe (failure) From 614a9aece0888e7c8221ad2e8a231762442db794 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 28 May 2010 15:15:59 -0700 Subject: [PATCH 134/148] Add two more (failing) tests from the take-2 branch. These tests were recently fixed on the take-2 branch, but will require additional work before they will pass here. --- tests/058-token-pasting-empty-arguments.c | 5 +++++ tests/060-left-paren-in-macro-right-paren-in-text.c | 3 +++ 2 files changed, 8 insertions(+) create mode 100644 tests/058-token-pasting-empty-arguments.c create mode 100644 tests/060-left-paren-in-macro-right-paren-in-text.c diff --git a/tests/058-token-pasting-empty-arguments.c b/tests/058-token-pasting-empty-arguments.c new file mode 100644 index 00000000000..8ac260c76b6 --- /dev/null +++ b/tests/058-token-pasting-empty-arguments.c @@ -0,0 +1,5 @@ +#define paste(x,y) x ## y +paste(a,b) +paste(a,) +paste(,b) +paste(,) diff --git a/tests/060-left-paren-in-macro-right-paren-in-text.c b/tests/060-left-paren-in-macro-right-paren-in-text.c new file mode 100644 index 00000000000..ed80ea879ce --- /dev/null +++ b/tests/060-left-paren-in-macro-right-paren-in-text.c @@ -0,0 +1,3 @@ +#define double(a) a*2 +#define foo double( +foo 5) From 631016946ca8134244c4e58bef6863d204b1119b Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Sat, 29 May 2010 05:07:24 -0700 Subject: [PATCH 135/148] Fix pass-through of '=' and add a test for it. Previously '=' was not included in our PUNCTUATION regeular expression, but it *was* excldued from our OTHER regular expression, so we were getting the default (and hamful) lex action of just printing it. The test we add here is named "punctuator" with the idea that we can extend it as needed for other punctuator testing. --- glcpp-lex.l | 2 +- glcpp-parse.y | 1 + tests/071-punctuator.c | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 tests/071-punctuator.c diff --git a/glcpp-lex.l b/glcpp-lex.l index 70d47d24975..52269c6b306 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -38,7 +38,7 @@ NEWLINE [\n] HSPACE [ \t] HASH ^{HSPACE}*#{HSPACE}* IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* -PUNCTUATION [][(){}.&*~!/%<>^|;,+-] +PUNCTUATION [][(){}.&*~!/%<>^|;,=+-] OTHER [^][(){}.&*~!/%<>^|;,=#[:space:]+-]+ DECIMAL_INTEGER [1-9][0-9]*[uU]? diff --git a/glcpp-parse.y b/glcpp-parse.y index c89d7bf159c..01ca08ec740 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -426,6 +426,7 @@ operator: | OR { $$ = OR; } | ';' { $$ = ';'; } | ',' { $$ = ','; } +| '=' { $$ = '='; } | PASTE { $$ = PASTE; } | DEFINED { $$ = DEFINED; } ; diff --git a/tests/071-punctuator.c b/tests/071-punctuator.c new file mode 100644 index 00000000000..959d6825988 --- /dev/null +++ b/tests/071-punctuator.c @@ -0,0 +1 @@ +a = b From b06096e86eda1257769156523b5738044c6a2b10 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Sat, 29 May 2010 05:54:19 -0700 Subject: [PATCH 136/148] Add test and fix bugs with multiple token-pasting on the same line. The list replacement when token pasting was broken, (failing to properly update the list's tail pointer). Also, memory management when pasting was broken, (modifying the original token's string which would cause problems with multiple calls to a macro which pasted a literal string). We didn't catch this with previous tests because they only pasted argument values. --- glcpp-parse.y | 92 +++++++++++++---------------- tests/072-token-pasting-same-line.c | 2 + 2 files changed, 43 insertions(+), 51 deletions(-) create mode 100644 tests/072-token-pasting-same-line.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 01ca08ec740..f4c834e038f 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -783,73 +783,53 @@ _token_print (token_t *token) } } -/* Change 'token' into a new token formed by pasting 'other'. */ -static void +/* Return a new token (talloc()ed off of 'token') formed by pasting + * 'token' and 'other'. Note that this function may return 'token' or + * 'other' directly rather than allocating anything new. + * + * Caution: Only very cursory error-checking is performed to see if + * the final result is a valid single token. */ +static token_t * _token_paste (token_t *token, token_t *other) { /* Pasting a placeholder onto anything makes no change. */ if (other->type == PLACEHOLDER) - return; + return token; - /* When 'token' is a placeholder, just return contents of 'other'. */ - if (token->type == PLACEHOLDER) { - token->type = other->type; - token->value = other->value; - return; - } + /* When 'token' is a placeholder, just return 'other'. */ + if (token->type == PLACEHOLDER) + return other; /* A very few single-character punctuators can be combined * with another to form a multi-character punctuator. */ switch (token->type) { case '<': - if (other->type == '<') { - token->type = LEFT_SHIFT; - token->value.ival = LEFT_SHIFT; - return; - } else if (other->type == '=') { - token->type = LESS_OR_EQUAL; - token->value.ival = LESS_OR_EQUAL; - return; - } + if (other->type == '<') + return _token_create_ival (token, LEFT_SHIFT, LEFT_SHIFT); + else if (other->type == '=') + return _token_create_ival (token, LESS_OR_EQUAL, LESS_OR_EQUAL); break; case '>': - if (other->type == '>') { - token->type = RIGHT_SHIFT; - token->value.ival = RIGHT_SHIFT; - return; - } else if (other->type == '=') { - token->type = GREATER_OR_EQUAL; - token->value.ival = GREATER_OR_EQUAL; - return; - } + if (other->type == '>') + return _token_create_ival (token, RIGHT_SHIFT, RIGHT_SHIFT); + else if (other->type == '=') + return _token_create_ival (token, GREATER_OR_EQUAL, GREATER_OR_EQUAL); break; case '=': - if (other->type == '=') { - token->type = EQUAL; - token->value.ival = EQUAL; - return; - } + if (other->type == '=') + return _token_create_ival (token, EQUAL, EQUAL); break; case '!': - if (other->type == '=') { - token->type = NOT_EQUAL; - token->value.ival = NOT_EQUAL; - return; - } + if (other->type == '=') + return _token_create_ival (token, NOT_EQUAL, NOT_EQUAL); break; case '&': - if (other->type == '&') { - token->type = AND; - token->value.ival = AND; - return; - } + if (other->type == '&') + return _token_create_ival (token, AND, AND); break; case '|': - if (other->type == '|') { - token->type = OR; - token->value.ival = OR; - return; - } + if (other->type == '|') + return _token_create_ival (token, OR, OR); break; } @@ -864,9 +844,11 @@ _token_paste (token_t *token, token_t *other) if ((token->type == IDENTIFIER || token->type == OTHER || token->type == INTEGER_STRING) && (other->type == IDENTIFIER || other->type == OTHER || other->type == INTEGER_STRING)) { - token->value.str = talloc_strdup_append (token->value.str, - other->value.str); - return; + char *str; + + str = xtalloc_asprintf (token, "%s%s", + token->value.str, other->value.str); + return _token_create_str (token, token->type, str); } printf ("Error: Pasting \""); @@ -874,6 +856,8 @@ _token_paste (token_t *token, token_t *other) printf ("\" and \""); _token_print (other); printf ("\" does not give a valid preprocessing token.\n"); + + return token; } static void @@ -1159,6 +1143,8 @@ _glcpp_parser_expand_function (glcpp_parser_t *parser, /* After argument substitution, and before further expansion * below, implement token pasting. */ + _token_list_trim_trailing_space (substituted); + node = substituted->head; while (node) { @@ -1187,12 +1173,16 @@ _glcpp_parser_expand_function (glcpp_parser_t *parser, return NULL; } - _token_paste (node->token, next_non_space->token); + node->token = _token_paste (node->token, next_non_space->token); node->next = next_non_space->next; + if (next_non_space == substituted->tail) + substituted->tail = node; node = node->next; } + substituted->non_space_tail = substituted->tail; + _string_list_push (parser->active, identifier); _glcpp_parser_expand_token_list (parser, substituted); _string_list_pop (parser->active); diff --git a/tests/072-token-pasting-same-line.c b/tests/072-token-pasting-same-line.c new file mode 100644 index 00000000000..e421e9d5e29 --- /dev/null +++ b/tests/072-token-pasting-same-line.c @@ -0,0 +1,2 @@ +#define paste(x) success_ ## x +paste(1) paste(2) paste(3) From 75ef1c75dd47a0b4054a767fd94f7c3cf68d2331 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Sat, 29 May 2010 05:57:22 -0700 Subject: [PATCH 137/148] Add killer test case from the C99 specification. Happily, this passes now, (since many of the previously added test cases were extracted from this one). --- tests/099-c99-example.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 tests/099-c99-example.c diff --git a/tests/099-c99-example.c b/tests/099-c99-example.c new file mode 100644 index 00000000000..d1976b1f265 --- /dev/null +++ b/tests/099-c99-example.c @@ -0,0 +1,17 @@ +#define x 3 +#define f(a) f(x * (a)) +#undef x +#define x 2 +#define g f +#define z z[0] +#define h g(~ +#define m(a) a(w) +#define w 0,1 +#define t(a) a +#define p() int +#define q(x) x +#define r(x,y) x ## y +f(y+1) + f(f(z)) % t(t(g)(0) + t)(1); +g(x +(3,4)-w) | h 5) & m + (f)^m(m); +p() i[q()] = { q(1), r(2,3), r(4,), r(,5), r(,)}; From ae3fb09cd20fc189d68f0c2a63cc74dd584d7ee1 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Sat, 29 May 2010 06:01:32 -0700 Subject: [PATCH 138/148] Add three more tests cases recently added to the take-2 branch. The 071-punctuator test is failing only trivially (whitespace change only). And the 072-token-pasting-same-line.c test passes just fine here, (more evidence perhaps that the approach in take-2 is more trouble than it's worth?). The 099-c99-example test case is the inspiration for much of the rest of the test suite. It amazingly passes on the take-2 branch, but doesn't pass here yet. --- tests/071-punctuator.c | 1 + tests/072-token-pasting-same-line.c | 2 ++ tests/099-c99-example.c | 17 +++++++++++++++++ 3 files changed, 20 insertions(+) create mode 100644 tests/071-punctuator.c create mode 100644 tests/072-token-pasting-same-line.c create mode 100644 tests/099-c99-example.c diff --git a/tests/071-punctuator.c b/tests/071-punctuator.c new file mode 100644 index 00000000000..959d6825988 --- /dev/null +++ b/tests/071-punctuator.c @@ -0,0 +1 @@ +a = b diff --git a/tests/072-token-pasting-same-line.c b/tests/072-token-pasting-same-line.c new file mode 100644 index 00000000000..e421e9d5e29 --- /dev/null +++ b/tests/072-token-pasting-same-line.c @@ -0,0 +1,2 @@ +#define paste(x) success_ ## x +paste(1) paste(2) paste(3) diff --git a/tests/099-c99-example.c b/tests/099-c99-example.c new file mode 100644 index 00000000000..d1976b1f265 --- /dev/null +++ b/tests/099-c99-example.c @@ -0,0 +1,17 @@ +#define x 3 +#define f(a) f(x * (a)) +#undef x +#define x 2 +#define g f +#define z z[0] +#define h g(~ +#define m(a) a(w) +#define w 0,1 +#define t(a) a +#define p() int +#define q(x) x +#define r(x,y) x ## y +f(y+1) + f(f(z)) % t(t(g)(0) + t)(1); +g(x +(3,4)-w) | h 5) & m + (f)^m(m); +p() i[q()] = { q(1), r(2,3), r(4,), r(,5), r(,)}; From a771a40e2257657cbdae0eb97a7bb8733db76b91 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 1 Jun 2010 11:20:18 -0700 Subject: [PATCH 139/148] Fix #if-skipping to *really* skip the skipped group. Previously we were avoiding printing within a skipped group, but we were still evluating directives such as #define and #undef and still emitting diagnostics for things such as macro calls with the wrong number of arguments. Add a test for this and fix it with a high-priority rule in the lexer that consumes the skipped content. --- glcpp-lex.l | 60 ++++++++++++++++++++++------------ glcpp-parse.y | 15 +++------ glcpp.h | 1 + tests/062-if-0-skips-garbage.c | 5 +++ 4 files changed, 50 insertions(+), 31 deletions(-) create mode 100644 tests/062-if-0-skips-garbage.c diff --git a/glcpp-lex.l b/glcpp-lex.l index 52269c6b306..a51d9e185fc 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -47,6 +47,45 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? %% +{HASH}if/.*\n { + yyextra->lexing_if = 1; + yyextra->space_tokens = 0; + return HASH_IF; +} + +{HASH}elif/.*\n { + yyextra->lexing_if = 1; + yyextra->space_tokens = 0; + return HASH_ELIF; +} + +{HASH}else/.*\n { + yyextra->space_tokens = 0; + return HASH_ELSE; +} + +{HASH}endif/.*\n { + yyextra->space_tokens = 0; + return HASH_ENDIF; +} + + /* When skipping (due to an #if 0 or similar) consume anything + * up to a newline. We do this less priroty than any + * #if-related directive (#if, #elif, #else, #endif), but with + * more priority than any other directive or token to avoid + * any side-effects from skipped content. + * + * We use the lexing_if flag to avoid skipping any part of an + * if conditional expression. */ +[^\n]+/\n { + if (yyextra->lexing_if || + yyextra->skip_stack == NULL || + yyextra->skip_stack->type == SKIP_NO_SKIP) + { + REJECT; + } +} + {HASH}define{HSPACE}+/{IDENTIFIER}"(" { yyextra->space_tokens = 0; return HASH_DEFINE_FUNC; @@ -62,26 +101,6 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? return HASH_UNDEF; } -{HASH}if { - yyextra->space_tokens = 0; - return HASH_IF; -} - -{HASH}elif { - yyextra->space_tokens = 0; - return HASH_ELIF; -} - -{HASH}else { - yyextra->space_tokens = 0; - return HASH_ELSE; -} - -{HASH}endif { - yyextra->space_tokens = 0; - return HASH_ENDIF; -} - {HASH} { yyextra->space_tokens = 0; return HASH; @@ -163,6 +182,7 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? } \n { + yyextra->lexing_if = 0; return NEWLINE; } diff --git a/glcpp-parse.y b/glcpp-parse.y index f4c834e038f..dd8e133f550 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -159,19 +159,11 @@ input: line: control_line { - if (parser->skip_stack == NULL || - parser->skip_stack->type == SKIP_NO_SKIP) - { - printf ("\n"); - } + printf ("\n"); } | text_line { - if (parser->skip_stack == NULL || - parser->skip_stack->type == SKIP_NO_SKIP) - { - _glcpp_parser_print_expanded_token_list (parser, $1); - printf ("\n"); - } + _glcpp_parser_print_expanded_token_list (parser, $1); + printf ("\n"); talloc_free ($1); } | expanded_line @@ -889,6 +881,7 @@ glcpp_parser_create (void) parser->defines = hash_table_ctor (32, hash_table_string_hash, hash_table_string_compare); parser->active = _string_list_create (parser); + parser->lexing_if = 0; parser->space_tokens = 1; parser->newline_as_space = 0; parser->in_control_line = 0; diff --git a/glcpp.h b/glcpp.h index 5c8c304a9ca..41fc2043d13 100644 --- a/glcpp.h +++ b/glcpp.h @@ -127,6 +127,7 @@ struct glcpp_parser { yyscan_t scanner; struct hash_table *defines; string_list_t *active; + int lexing_if; int space_tokens; int newline_as_space; int in_control_line; diff --git a/tests/062-if-0-skips-garbage.c b/tests/062-if-0-skips-garbage.c new file mode 100644 index 00000000000..d9e439bb890 --- /dev/null +++ b/tests/062-if-0-skips-garbage.c @@ -0,0 +1,5 @@ +#define foo(a,b) +#if 0 +foo(bar) +foo( +#endif From 2571415d1a7eec72db33cd521ca48fe755c43f9c Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 1 Jun 2010 12:18:43 -0700 Subject: [PATCH 140/148] Implement comment handling in the lexer (with test). We support both single-line (//) and multi-line (/* ... */) comments and add a test for this, (trying to stress the rules just a bit by embedding one comment delimiter into a comment delimited with the other style, etc.). To keep the test suite passing we do now discard any output lines from glcpp that consist only of spacing, (in addition to blank lines as previously). We also discard any initial whitespace from gcc output. In neither case should the absence or presence of this whitespace affect correctness. --- glcpp-lex.l | 11 +++++++++++ tests/063-comments.c | 15 +++++++++++++++ tests/glcpp-test | 4 ++-- 3 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 tests/063-comments.c diff --git a/glcpp-lex.l b/glcpp-lex.l index a51d9e185fc..0954ab7e83d 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -47,6 +47,17 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? %% + /* Single-line comments */ +"//"[^\n]+\n { + return NEWLINE; +} + + /* Multi-line comments */ +[/][*]([^*]*[*]+[^/])*[^*]*[*]*[/] { + if (yyextra->space_tokens) + return SPACE; +} + {HASH}if/.*\n { yyextra->lexing_if = 1; yyextra->space_tokens = 0; diff --git a/tests/063-comments.c b/tests/063-comments.c new file mode 100644 index 00000000000..4cda52236e0 --- /dev/null +++ b/tests/063-comments.c @@ -0,0 +1,15 @@ +/* this is a comment */ +// so is this +// */ +f = g/**//h; +/*//*/l(); +m = n//**/o ++ p; +/* this +comment spans +multiple lines and +contains *** stars +and slashes / *** / +and other stuff. +****/ +more code here diff --git a/tests/glcpp-test b/tests/glcpp-test index ba398af0d54..24110333a5e 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -3,8 +3,8 @@ for test in *.c; do echo "Testing $test" ../glcpp < $test > $test.glcpp - grep -v '^$' < $test.glcpp > $test.out || true + grep -v '^ *$' < $test.glcpp > $test.out || true gcc -E $test -o $test.gcc - grep -v '^#' < $test.gcc | grep -v '^$' > $test.expected || true + grep -v '^#' < $test.gcc | grep -v '^$' | sed -r -e 's/^ +/ /' > $test.expected || true diff -u $test.expected $test.out done From 4c22f4dba7a87de4736e01010e361b073a7501c8 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 2 Jun 2010 10:48:47 -0700 Subject: [PATCH 141/148] Fix multi-line comment regular expression to handle (non) nested comments. Ken reminded me of a couple cases that I should be testing. These are the non-nestedness of things that look like nested comments as well as potentially tricky things like "/*/" and "/*/*/". The (non) nested comment case was not working in the case of the comment terminator with multiple '*' characters. We fix this by not considering a '*' as the "non-slash" to terminate a sequence of '*' characters within the comment. We also fix the final match of the terminator to use '+' rather than '*' to require the presence of a final '*' character in the comment terminator. --- glcpp-lex.l | 2 +- tests/063-comments.c | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 0954ab7e83d..7bc5fab76da 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -53,7 +53,7 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? } /* Multi-line comments */ -[/][*]([^*]*[*]+[^/])*[^*]*[*]*[/] { +[/][*]([^*]*[*]+[^*/])*[^*]*[*]+[/] { if (yyextra->space_tokens) return SPACE; } diff --git a/tests/063-comments.c b/tests/063-comments.c index 4cda52236e0..e641d2f0f9e 100644 --- a/tests/063-comments.c +++ b/tests/063-comments.c @@ -13,3 +13,8 @@ and slashes / *** / and other stuff. ****/ more code here +/* Test that /* nested + comments */ +are not treated like comments. +/*/ this is a comment */ +/*/*/ From e4b2731a25c071407d90c6c593a226574e9c36f9 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 2 Jun 2010 10:59:08 -0700 Subject: [PATCH 142/148] Make the multi-line comment regular expression a bit easier to read. Use quoted strings for literal portions rather than a sequence of single-character character classes. --- glcpp-lex.l | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 7bc5fab76da..2aec46a2ed1 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -53,7 +53,7 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? } /* Multi-line comments */ -[/][*]([^*]*[*]+[^*/])*[^*]*[*]+[/] { +"/*"([^*]*[*]+[^*/])*[^*]*[*]+"/" { if (yyextra->space_tokens) return SPACE; } From 111e25bd84fb923bbab5b0ca76bbbb5d9a537a26 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 2 Jun 2010 12:54:15 -0700 Subject: [PATCH 143/148] Factor out common sub-expression from multi-line-comment regular expression. In two places we look for an (optional) sequence of characters other than "*" followed by a sequence of on or more "*". Using a name for this (NON_STARS_THEN_STARS) seems to make it a bit easier to understand. --- glcpp-lex.l | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 2aec46a2ed1..0d9a75415a3 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -45,6 +45,8 @@ DECIMAL_INTEGER [1-9][0-9]*[uU]? OCTAL_INTEGER 0[0-7]*[uU]? HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? +NON_STARS_THEN_STARS [^*]*[*]+ + %% /* Single-line comments */ @@ -53,7 +55,7 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? } /* Multi-line comments */ -"/*"([^*]*[*]+[^*/])*[^*]*[*]+"/" { +"/*"({NON_STARS_THEN_STARS}[^*/])*{NON_STARS_THEN_STARS}"/" { if (yyextra->space_tokens) return SPACE; } From c7c95fe51f0ff83d4d3e07a926f96336248f9509 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 2 Jun 2010 14:43:03 -0700 Subject: [PATCH 144/148] Remove dead code: _glcpp_parser_expand_token_list_onto This function simply isn't being called anymore. --- glcpp-parse.y | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index dd8e133f550..a4e6559282c 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -1320,16 +1320,6 @@ _glcpp_parser_expand_token_list (glcpp_parser_t *parser, list->non_space_tail = list->tail; } -static void -_glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, - token_list_t *list, - token_list_t *result) -{ - _glcpp_parser_expand_token_list (parser, list); - - _token_list_append_list (result, list); -} - void _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, token_list_t *list) From 22b3aced03c1a243ba03fbcba5aa51f97e4f0abb Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 2 Jun 2010 15:32:03 -0700 Subject: [PATCH 145/148] Eliminate some recursion from children of _expand_token_list Previously, both _expand_node and _expand_function would always make mutually recursive calls into _expand_token_list. This was unnecessary since these functions can simply return unexpanded results, after which the outer iteration will next attempt expansion of the results. The only trick in doing this is to arrange so that the active list is popped at the appropriate time. To do this, we add a new token_node_t marker to the active stack. When pushing onto the active list, we set marker to last->next, and when the marker is seen by the token list iteration, we pop from the active stack. --- glcpp-parse.y | 159 ++++++++++++++++++++++++++++++++------------------ glcpp.h | 8 ++- 2 files changed, 110 insertions(+), 57 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index a4e6559282c..1c7c84dac7a 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -52,12 +52,6 @@ _string_list_append_item (string_list_t *list, const char *str); static void _string_list_append_list (string_list_t *list, string_list_t *tail); -static void -_string_list_push (string_list_t *list, const char *str); - -static void -_string_list_pop (string_list_t *list); - static int _string_list_contains (string_list_t *list, const char *member, int *index); @@ -96,6 +90,20 @@ _token_list_append (token_list_t *list, token_t *token); static void _token_list_append_list (token_list_t *list, token_list_t *tail); +static int +_token_list_length (token_list_t *list); + +static active_list_t * +_active_list_push (active_list_t *list, + const char *identifier, + token_node_t *marker); + +static active_list_t * +_active_list_pop (active_list_t *list); + +int +_active_list_contains (active_list_t *list, const char *identifier); + static void _glcpp_parser_evaluate_defined (glcpp_parser_t *parser, token_list_t *list); @@ -468,42 +476,6 @@ _string_list_append_item (string_list_t *list, const char *str) list->tail = node; } -void -_string_list_push (string_list_t *list, const char *str) -{ - string_node_t *node; - - node = xtalloc (list, string_node_t); - node->str = xtalloc_strdup (node, str); - node->next = list->head; - - if (list->tail == NULL) { - list->tail = node; - } - list->head = node; -} - -void -_string_list_pop (string_list_t *list) -{ - string_node_t *node; - - node = list->head; - - if (node == NULL) { - fprintf (stderr, "Internal error: _string_list_pop called on an empty list.\n"); - exit (1); - } - - list->head = node->next; - if (list->tail == node) { - assert (node->next == NULL); - list->tail = NULL; - } - - talloc_free (node); -} - int _string_list_contains (string_list_t *list, const char *member, int *index) { @@ -716,6 +688,21 @@ _token_list_trim_trailing_space (token_list_t *list) } } +static int +_token_list_length (token_list_t *list) +{ + int length = 0; + token_node_t *node; + + if (list == NULL) + return 0; + + for (node = list->head; node; node = node->next) + length++; + + return length; +} + static void _token_print (token_t *token) { @@ -880,7 +867,7 @@ glcpp_parser_create (void) glcpp_lex_init_extra (parser, &parser->scanner); parser->defines = hash_table_ctor (32, hash_table_string_hash, hash_table_string_compare); - parser->active = _string_list_create (parser); + parser->active = NULL; parser->lexing_if = 0; parser->space_tokens = 1; parser->newline_as_space = 0; @@ -1176,10 +1163,6 @@ _glcpp_parser_expand_function (glcpp_parser_t *parser, substituted->non_space_tail = substituted->tail; - _string_list_push (parser->active, identifier); - _glcpp_parser_expand_token_list (parser, substituted); - _string_list_pop (parser->active); - return substituted; } @@ -1206,7 +1189,6 @@ _glcpp_parser_expand_node (glcpp_parser_t *parser, token_t *token = node->token; const char *identifier; macro_t *macro; - token_list_t *expansion; /* We only expand identifiers */ if (token->type != IDENTIFIER) { @@ -1231,7 +1213,7 @@ _glcpp_parser_expand_node (glcpp_parser_t *parser, /* Finally, don't expand this macro if we're already actively * expanding it, (to avoid infinite recursion). */ - if (_string_list_contains (parser->active, identifier, NULL)) { + if (_active_list_contains (parser->active, identifier)) { /* We change the token type here from IDENTIFIER to * OTHER to prevent any future expansion of this * unexpanded token. */ @@ -1254,18 +1236,63 @@ _glcpp_parser_expand_node (glcpp_parser_t *parser, if (macro->replacements == NULL) return _token_list_create (parser); - expansion = _token_list_copy (parser, macro->replacements); - - _string_list_push (parser->active, identifier); - _glcpp_parser_expand_token_list (parser, expansion); - _string_list_pop (parser->active); - - return expansion; + return _token_list_copy (parser, macro->replacements); } return _glcpp_parser_expand_function (parser, node, last); } +/* Push a new identifier onto the active list, returning the new list. + * + * Here, 'marker' is the token node that appears in the list after the + * expansion of 'identifier'. That is, when the list iterator begins + * examinging 'marker', then it is time to pop this node from the + * active stack. + */ +active_list_t * +_active_list_push (active_list_t *list, + const char *identifier, + token_node_t *marker) +{ + active_list_t *node; + + node = xtalloc (list, active_list_t); + node->identifier = xtalloc_strdup (node, identifier); + node->marker = marker; + node->next = list; + + return node; +} + +active_list_t * +_active_list_pop (active_list_t *list) +{ + active_list_t *node = list; + + if (node == NULL) + return NULL; + + node = list->next; + talloc_free (list); + + return node; +} + +int +_active_list_contains (active_list_t *list, const char *identifier) +{ + active_list_t *node; + + if (list == NULL) + return 0; + + for (node = list; node; node = node->next) + if (strcmp (node->identifier, identifier) == 0) + return 1; + + return 0; +} + /* Walk over the token list replacing nodes with their expansion. * Whenever nodes are expanded the walking will walk over the new * nodes, continuing to expand as necessary. The results are placed in @@ -1288,10 +1315,27 @@ _glcpp_parser_expand_token_list (glcpp_parser_t *parser, node = list->head; while (node) { + + while (parser->active && parser->active->marker == node) + parser->active = _active_list_pop (parser->active); + /* Find the expansion for node, which will replace all * nodes from node to last, inclusive. */ expansion = _glcpp_parser_expand_node (parser, node, &last); if (expansion) { + token_node_t *n; + + for (n = node; n != last->next; n = n->next) + while (parser->active && + parser->active->marker == n) + { + parser->active = _active_list_pop (parser->active); + } + + parser->active = _active_list_push (parser->active, + node->token->value.str, + last->next); + /* Splice expansion into list, supporting a * simple deletion if the expansion is * empty. */ @@ -1317,6 +1361,9 @@ _glcpp_parser_expand_token_list (glcpp_parser_t *parser, node = node_prev ? node_prev->next : list->head; } + while (parser->active) + parser->active = _active_list_pop (parser->active); + list->non_space_tail = list->tail; } diff --git a/glcpp.h b/glcpp.h index 41fc2043d13..4459daa4f32 100644 --- a/glcpp.h +++ b/glcpp.h @@ -123,10 +123,16 @@ typedef struct skip_node { struct skip_node *next; } skip_node_t; +typedef struct active_list { + const char *identifier; + token_node_t *marker; + struct active_list *next; +} active_list_t; + struct glcpp_parser { yyscan_t scanner; struct hash_table *defines; - string_list_t *active; + active_list_t *active; int lexing_if; int space_tokens; int newline_as_space; From 14c98a56442a076a831aee85e9b3e54d934ec360 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 2 Jun 2010 15:49:54 -0700 Subject: [PATCH 146/148] Restore error message for a macro with unbalanced parentheses. We had to remove this earlier because our recursive function calls caused the same nodes to be examined for expansion more than once. And in the test suite, one node would be examined before it had its closing parenthesis and then again later after the parenthesis was added. So we removed this error message to allow the test case to pass. Now that we've removed the unnecessary recursive function call we can catch this error case and report it as desired. --- glcpp-parse.y | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 1c7c84dac7a..b07714eebd2 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -956,7 +956,7 @@ typedef enum function_status * Macro name not followed by a '('. This is not an error, but * simply that the macro name should be treated as a non-macro. * - * FUNCTION_UNBLANCED_PARENTHESES + * FUNCTION_UNBALANCED_PARENTHESES * * Macro name is not followed by a balanced set of parentheses. */ @@ -1065,6 +1065,9 @@ _glcpp_parser_expand_function (glcpp_parser_t *parser, case FUNCTION_NOT_A_FUNCTION: return NULL; case FUNCTION_UNBALANCED_PARENTHESES: + fprintf (stderr, "Error: Macro %s call has unbalanced parentheses\n", + identifier); + exit (1); return NULL; } From 5ae88af9886b4b7bf486cbc0d10a9bab6456165f Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 2 Jun 2010 15:59:45 -0700 Subject: [PATCH 147/148] test suite: Add expected output for every test. Rather than using the (munged) output of "gcc -E" we now capture precisely the output we expect from every test case. This allows us to stay immune from strange output from gcc (unpredictable whitespace output---aprticularly with different gcc versions). This will also allow us to write tests that capture expected error messages from the preprocessor as well. --- .gitignore | 3 -- Makefile | 2 +- tests/000-content-with-spaces.c.expected | 1 + tests/001-define.c.expected | 2 ++ tests/002-define-chain.c.expected | 3 ++ tests/003-define-chain-reverse.c.expected | 3 ++ tests/004-define-recursive.c.expected | 6 ++++ tests/005-define-composite-chain.c.expected | 3 ++ ...-define-composite-chain-reverse.c.expected | 3 ++ .../007-define-composite-recursive.c.expected | 6 ++++ tests/008-define-empty.c.expected | 2 ++ tests/009-undef.c.expected | 4 +++ tests/010-undef-re-define.c.expected | 6 ++++ tests/011-define-func-empty.c.expected | 2 ++ tests/012-define-func-no-args.c.expected | 2 ++ tests/013-define-func-1-arg-unused.c.expected | 2 ++ tests/014-define-func-2-arg-unused.c.expected | 2 ++ .../015-define-object-with-parens.c.expected | 4 +++ tests/016-define-func-1-arg.c.expected | 2 ++ tests/017-define-func-2-args.c.expected | 2 ++ ...-define-func-macro-as-parameter.c.expected | 3 ++ tests/019-define-func-1-arg-multi.c.expected | 2 ++ tests/020-define-func-2-arg-multi.c.expected | 2 ++ tests/021-define-func-compose.c.expected | 3 ++ ...022-define-func-arg-with-parens.c.expected | 2 ++ tests/023-define-extra-whitespace.c.expected | 8 +++++ ...-define-chain-to-self-recursion.c.expected | 3 ++ tests/025-func-macro-as-non-macro.c.expected | 2 ++ .../026-define-func-extra-newlines.c.expected | 3 ++ tests/027-define-chain-obj-to-func.c.expected | 3 ++ ...28-define-chain-obj-to-non-func.c.expected | 3 ++ ...ine-chain-obj-to-func-with-args.c.expected | 3 ++ ...efine-chain-obj-to-func-compose.c.expected | 4 +++ ...fine-chain-func-to-func-compose.c.expected | 4 +++ tests/032-define-func-self-recurse.c.expected | 2 ++ tests/033-define-func-self-compose.c.expected | 2 ++ ...fine-func-self-compose-non-func.c.expected | 2 ++ ...e-non-func-multi-token-argument.c.expected | 2 ++ ...-non-macro-multi-token-argument.c.expected | 3 ++ .../037-finalize-unexpanded-macro.c.expected | 3 ++ tests/038-func-arg-with-commas.c.expected | 2 ++ ...9-func-arg-obj-macro-with-comma.c.expected | 3 ++ tests/040-token-pasting.c.expected | 2 ++ tests/041-if-0.c.expected | 5 +++ tests/042-if-1.c.expected | 5 +++ tests/043-if-0-else.c.expected | 7 ++++ tests/044-if-1-else.c.expected | 7 ++++ tests/045-if-0-elif.c.expected | 11 ++++++ tests/046-if-1-elsif.c.expected | 11 ++++++ tests/047-if-elif-else.c.expected | 11 ++++++ tests/048-if-nested.c.expected | 11 ++++++ tests/049-if-expression-precedence.c.expected | 5 +++ tests/050-if-defined.c.expected | 17 +++++++++ tests/051-if-relational.c.expected | 35 +++++++++++++++++++ tests/052-if-bitwise.c.expected | 20 +++++++++++ tests/053-if-divide-and-shift.c.expected | 15 ++++++++ tests/054-if-with-macros.c.expected | 34 ++++++++++++++++++ ...hain-obj-to-func-parens-in-text.c.expected | 3 ++ .../056-macro-argument-with-comma.c.expected | 4 +++ tests/057-empty-arguments.c.expected | 6 ++++ ...8-token-pasting-empty-arguments.c.expected | 5 +++ tests/059-token-pasting-integer.c.expected | 4 +++ ...en-in-macro-right-paren-in-text.c.expected | 3 ++ ...-define-chain-obj-to-func-multi.c.expected | 5 +++ tests/062-if-0-skips-garbage.c.expected | 5 +++ tests/063-comments.c.expected | 13 +++++++ tests/071-punctuator.c.expected | 1 + tests/072-token-pasting-same-line.c.expected | 2 ++ tests/099-c99-example.c.expected | 16 +++++++++ tests/glcpp-test | 5 +-- 70 files changed, 384 insertions(+), 8 deletions(-) create mode 100644 tests/000-content-with-spaces.c.expected create mode 100644 tests/001-define.c.expected create mode 100644 tests/002-define-chain.c.expected create mode 100644 tests/003-define-chain-reverse.c.expected create mode 100644 tests/004-define-recursive.c.expected create mode 100644 tests/005-define-composite-chain.c.expected create mode 100644 tests/006-define-composite-chain-reverse.c.expected create mode 100644 tests/007-define-composite-recursive.c.expected create mode 100644 tests/008-define-empty.c.expected create mode 100644 tests/009-undef.c.expected create mode 100644 tests/010-undef-re-define.c.expected create mode 100644 tests/011-define-func-empty.c.expected create mode 100644 tests/012-define-func-no-args.c.expected create mode 100644 tests/013-define-func-1-arg-unused.c.expected create mode 100644 tests/014-define-func-2-arg-unused.c.expected create mode 100644 tests/015-define-object-with-parens.c.expected create mode 100644 tests/016-define-func-1-arg.c.expected create mode 100644 tests/017-define-func-2-args.c.expected create mode 100644 tests/018-define-func-macro-as-parameter.c.expected create mode 100644 tests/019-define-func-1-arg-multi.c.expected create mode 100644 tests/020-define-func-2-arg-multi.c.expected create mode 100644 tests/021-define-func-compose.c.expected create mode 100644 tests/022-define-func-arg-with-parens.c.expected create mode 100644 tests/023-define-extra-whitespace.c.expected create mode 100644 tests/024-define-chain-to-self-recursion.c.expected create mode 100644 tests/025-func-macro-as-non-macro.c.expected create mode 100644 tests/026-define-func-extra-newlines.c.expected create mode 100644 tests/027-define-chain-obj-to-func.c.expected create mode 100644 tests/028-define-chain-obj-to-non-func.c.expected create mode 100644 tests/029-define-chain-obj-to-func-with-args.c.expected create mode 100644 tests/030-define-chain-obj-to-func-compose.c.expected create mode 100644 tests/031-define-chain-func-to-func-compose.c.expected create mode 100644 tests/032-define-func-self-recurse.c.expected create mode 100644 tests/033-define-func-self-compose.c.expected create mode 100644 tests/034-define-func-self-compose-non-func.c.expected create mode 100644 tests/035-define-func-self-compose-non-func-multi-token-argument.c.expected create mode 100644 tests/036-define-func-non-macro-multi-token-argument.c.expected create mode 100644 tests/037-finalize-unexpanded-macro.c.expected create mode 100644 tests/038-func-arg-with-commas.c.expected create mode 100644 tests/039-func-arg-obj-macro-with-comma.c.expected create mode 100644 tests/040-token-pasting.c.expected create mode 100644 tests/041-if-0.c.expected create mode 100644 tests/042-if-1.c.expected create mode 100644 tests/043-if-0-else.c.expected create mode 100644 tests/044-if-1-else.c.expected create mode 100644 tests/045-if-0-elif.c.expected create mode 100644 tests/046-if-1-elsif.c.expected create mode 100644 tests/047-if-elif-else.c.expected create mode 100644 tests/048-if-nested.c.expected create mode 100644 tests/049-if-expression-precedence.c.expected create mode 100644 tests/050-if-defined.c.expected create mode 100644 tests/051-if-relational.c.expected create mode 100644 tests/052-if-bitwise.c.expected create mode 100644 tests/053-if-divide-and-shift.c.expected create mode 100644 tests/054-if-with-macros.c.expected create mode 100644 tests/055-define-chain-obj-to-func-parens-in-text.c.expected create mode 100644 tests/056-macro-argument-with-comma.c.expected create mode 100644 tests/057-empty-arguments.c.expected create mode 100644 tests/058-token-pasting-empty-arguments.c.expected create mode 100644 tests/059-token-pasting-integer.c.expected create mode 100644 tests/060-left-paren-in-macro-right-paren-in-text.c.expected create mode 100644 tests/061-define-chain-obj-to-func-multi.c.expected create mode 100644 tests/062-if-0-skips-garbage.c.expected create mode 100644 tests/063-comments.c.expected create mode 100644 tests/071-punctuator.c.expected create mode 100644 tests/072-token-pasting-same-line.c.expected create mode 100644 tests/099-c99-example.c.expected diff --git a/.gitignore b/.gitignore index b88f0cc75c7..077db8d8e14 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,4 @@ glcpp-parse.c glcpp-parse.h *.o *~ -tests/*.expected -tests/*.gcc -tests/*.glcpp tests/*.out diff --git a/Makefile b/Makefile index 0c06aa880fb..3fb44ac3b2e 100644 --- a/Makefile +++ b/Makefile @@ -22,4 +22,4 @@ test: glcpp clean: rm -f glcpp glcpp-lex.c glcpp-parse.c *.o *~ - rm -f tests/*.out tests/*.gcc tests/*.expected tests/*~ + rm -f tests/*.out tests/*~ diff --git a/tests/000-content-with-spaces.c.expected b/tests/000-content-with-spaces.c.expected new file mode 100644 index 00000000000..a7fc918c908 --- /dev/null +++ b/tests/000-content-with-spaces.c.expected @@ -0,0 +1 @@ +this is four tokens diff --git a/tests/001-define.c.expected b/tests/001-define.c.expected new file mode 100644 index 00000000000..a464d9da742 --- /dev/null +++ b/tests/001-define.c.expected @@ -0,0 +1,2 @@ + +1 diff --git a/tests/002-define-chain.c.expected b/tests/002-define-chain.c.expected new file mode 100644 index 00000000000..c6c9ee38a9e --- /dev/null +++ b/tests/002-define-chain.c.expected @@ -0,0 +1,3 @@ + + +1 diff --git a/tests/003-define-chain-reverse.c.expected b/tests/003-define-chain-reverse.c.expected new file mode 100644 index 00000000000..c6c9ee38a9e --- /dev/null +++ b/tests/003-define-chain-reverse.c.expected @@ -0,0 +1,3 @@ + + +1 diff --git a/tests/004-define-recursive.c.expected b/tests/004-define-recursive.c.expected new file mode 100644 index 00000000000..2d07687f8ca --- /dev/null +++ b/tests/004-define-recursive.c.expected @@ -0,0 +1,6 @@ + + + +foo +bar +baz diff --git a/tests/005-define-composite-chain.c.expected b/tests/005-define-composite-chain.c.expected new file mode 100644 index 00000000000..892975c268c --- /dev/null +++ b/tests/005-define-composite-chain.c.expected @@ -0,0 +1,3 @@ + + +a 1 diff --git a/tests/006-define-composite-chain-reverse.c.expected b/tests/006-define-composite-chain-reverse.c.expected new file mode 100644 index 00000000000..892975c268c --- /dev/null +++ b/tests/006-define-composite-chain-reverse.c.expected @@ -0,0 +1,3 @@ + + +a 1 diff --git a/tests/007-define-composite-recursive.c.expected b/tests/007-define-composite-recursive.c.expected new file mode 100644 index 00000000000..0b0b477d9df --- /dev/null +++ b/tests/007-define-composite-recursive.c.expected @@ -0,0 +1,6 @@ + + + +a b c foo +b c a bar +c a b baz diff --git a/tests/008-define-empty.c.expected b/tests/008-define-empty.c.expected new file mode 100644 index 00000000000..139597f9cb0 --- /dev/null +++ b/tests/008-define-empty.c.expected @@ -0,0 +1,2 @@ + + diff --git a/tests/009-undef.c.expected b/tests/009-undef.c.expected new file mode 100644 index 00000000000..9c0b35a4518 --- /dev/null +++ b/tests/009-undef.c.expected @@ -0,0 +1,4 @@ + +1 + +foo diff --git a/tests/010-undef-re-define.c.expected b/tests/010-undef-re-define.c.expected new file mode 100644 index 00000000000..5970f49028e --- /dev/null +++ b/tests/010-undef-re-define.c.expected @@ -0,0 +1,6 @@ + +1 + +foo + +2 diff --git a/tests/011-define-func-empty.c.expected b/tests/011-define-func-empty.c.expected new file mode 100644 index 00000000000..139597f9cb0 --- /dev/null +++ b/tests/011-define-func-empty.c.expected @@ -0,0 +1,2 @@ + + diff --git a/tests/012-define-func-no-args.c.expected b/tests/012-define-func-no-args.c.expected new file mode 100644 index 00000000000..9f075f26004 --- /dev/null +++ b/tests/012-define-func-no-args.c.expected @@ -0,0 +1,2 @@ + +bar diff --git a/tests/013-define-func-1-arg-unused.c.expected b/tests/013-define-func-1-arg-unused.c.expected new file mode 100644 index 00000000000..a464d9da742 --- /dev/null +++ b/tests/013-define-func-1-arg-unused.c.expected @@ -0,0 +1,2 @@ + +1 diff --git a/tests/014-define-func-2-arg-unused.c.expected b/tests/014-define-func-2-arg-unused.c.expected new file mode 100644 index 00000000000..a464d9da742 --- /dev/null +++ b/tests/014-define-func-2-arg-unused.c.expected @@ -0,0 +1,2 @@ + +1 diff --git a/tests/015-define-object-with-parens.c.expected b/tests/015-define-object-with-parens.c.expected new file mode 100644 index 00000000000..a70321a4c51 --- /dev/null +++ b/tests/015-define-object-with-parens.c.expected @@ -0,0 +1,4 @@ + +()1() + +()2() diff --git a/tests/016-define-func-1-arg.c.expected b/tests/016-define-func-1-arg.c.expected new file mode 100644 index 00000000000..6bfe04f7381 --- /dev/null +++ b/tests/016-define-func-1-arg.c.expected @@ -0,0 +1,2 @@ + +((bar)+1) diff --git a/tests/017-define-func-2-args.c.expected b/tests/017-define-func-2-args.c.expected new file mode 100644 index 00000000000..f7a2b8c26cb --- /dev/null +++ b/tests/017-define-func-2-args.c.expected @@ -0,0 +1,2 @@ + +((bar)*(baz)) diff --git a/tests/018-define-func-macro-as-parameter.c.expected b/tests/018-define-func-macro-as-parameter.c.expected new file mode 100644 index 00000000000..c6c9ee38a9e --- /dev/null +++ b/tests/018-define-func-macro-as-parameter.c.expected @@ -0,0 +1,3 @@ + + +1 diff --git a/tests/019-define-func-1-arg-multi.c.expected b/tests/019-define-func-1-arg-multi.c.expected new file mode 100644 index 00000000000..1e89b8cfd0c --- /dev/null +++ b/tests/019-define-func-1-arg-multi.c.expected @@ -0,0 +1,2 @@ + +(this is more than one word) diff --git a/tests/020-define-func-2-arg-multi.c.expected b/tests/020-define-func-2-arg-multi.c.expected new file mode 100644 index 00000000000..19f59f5ecb7 --- /dev/null +++ b/tests/020-define-func-2-arg-multi.c.expected @@ -0,0 +1,2 @@ + +one fish,two fish,red fish,blue fish diff --git a/tests/021-define-func-compose.c.expected b/tests/021-define-func-compose.c.expected new file mode 100644 index 00000000000..87f51f0baca --- /dev/null +++ b/tests/021-define-func-compose.c.expected @@ -0,0 +1,3 @@ + + +(2*((1+(3)))) diff --git a/tests/022-define-func-arg-with-parens.c.expected b/tests/022-define-func-arg-with-parens.c.expected new file mode 100644 index 00000000000..1dfc6698bb7 --- /dev/null +++ b/tests/022-define-func-arg-with-parens.c.expected @@ -0,0 +1,2 @@ + +(argument(including parens)for the win) diff --git a/tests/023-define-extra-whitespace.c.expected b/tests/023-define-extra-whitespace.c.expected new file mode 100644 index 00000000000..9c58275d0f9 --- /dev/null +++ b/tests/023-define-extra-whitespace.c.expected @@ -0,0 +1,8 @@ + + + + +1 +2 +3 4 +5 6 7 diff --git a/tests/024-define-chain-to-self-recursion.c.expected b/tests/024-define-chain-to-self-recursion.c.expected new file mode 100644 index 00000000000..15600af546b --- /dev/null +++ b/tests/024-define-chain-to-self-recursion.c.expected @@ -0,0 +1,3 @@ + + +foo diff --git a/tests/025-func-macro-as-non-macro.c.expected b/tests/025-func-macro-as-non-macro.c.expected new file mode 100644 index 00000000000..4a59f0520e3 --- /dev/null +++ b/tests/025-func-macro-as-non-macro.c.expected @@ -0,0 +1,2 @@ + +foo bar diff --git a/tests/026-define-func-extra-newlines.c.expected b/tests/026-define-func-extra-newlines.c.expected new file mode 100644 index 00000000000..5e3c70f2cc5 --- /dev/null +++ b/tests/026-define-func-extra-newlines.c.expected @@ -0,0 +1,3 @@ + + +bar diff --git a/tests/027-define-chain-obj-to-func.c.expected b/tests/027-define-chain-obj-to-func.c.expected new file mode 100644 index 00000000000..94c15f95059 --- /dev/null +++ b/tests/027-define-chain-obj-to-func.c.expected @@ -0,0 +1,3 @@ + + +success diff --git a/tests/028-define-chain-obj-to-non-func.c.expected b/tests/028-define-chain-obj-to-non-func.c.expected new file mode 100644 index 00000000000..94c15f95059 --- /dev/null +++ b/tests/028-define-chain-obj-to-non-func.c.expected @@ -0,0 +1,3 @@ + + +success diff --git a/tests/029-define-chain-obj-to-func-with-args.c.expected b/tests/029-define-chain-obj-to-func-with-args.c.expected new file mode 100644 index 00000000000..94c15f95059 --- /dev/null +++ b/tests/029-define-chain-obj-to-func-with-args.c.expected @@ -0,0 +1,3 @@ + + +success diff --git a/tests/030-define-chain-obj-to-func-compose.c.expected b/tests/030-define-chain-obj-to-func-compose.c.expected new file mode 100644 index 00000000000..bed826e7831 --- /dev/null +++ b/tests/030-define-chain-obj-to-func-compose.c.expected @@ -0,0 +1,4 @@ + + + +success diff --git a/tests/031-define-chain-func-to-func-compose.c.expected b/tests/031-define-chain-func-to-func-compose.c.expected new file mode 100644 index 00000000000..bed826e7831 --- /dev/null +++ b/tests/031-define-chain-func-to-func-compose.c.expected @@ -0,0 +1,4 @@ + + + +success diff --git a/tests/032-define-func-self-recurse.c.expected b/tests/032-define-func-self-recurse.c.expected new file mode 100644 index 00000000000..983f9417401 --- /dev/null +++ b/tests/032-define-func-self-recurse.c.expected @@ -0,0 +1,2 @@ + +foo(2*(3)) diff --git a/tests/033-define-func-self-compose.c.expected b/tests/033-define-func-self-compose.c.expected new file mode 100644 index 00000000000..08183623643 --- /dev/null +++ b/tests/033-define-func-self-compose.c.expected @@ -0,0 +1,2 @@ + +foo(2*(foo(2*(3)))) diff --git a/tests/034-define-func-self-compose-non-func.c.expected b/tests/034-define-func-self-compose-non-func.c.expected new file mode 100644 index 00000000000..3f808fe665d --- /dev/null +++ b/tests/034-define-func-self-compose-non-func.c.expected @@ -0,0 +1,2 @@ + +foo diff --git a/tests/035-define-func-self-compose-non-func-multi-token-argument.c.expected b/tests/035-define-func-self-compose-non-func-multi-token-argument.c.expected new file mode 100644 index 00000000000..09dfdd64e9b --- /dev/null +++ b/tests/035-define-func-self-compose-non-func-multi-token-argument.c.expected @@ -0,0 +1,2 @@ + +1+foo diff --git a/tests/036-define-func-non-macro-multi-token-argument.c.expected b/tests/036-define-func-non-macro-multi-token-argument.c.expected new file mode 100644 index 00000000000..580ed9599c5 --- /dev/null +++ b/tests/036-define-func-non-macro-multi-token-argument.c.expected @@ -0,0 +1,3 @@ + + +more success diff --git a/tests/037-finalize-unexpanded-macro.c.expected b/tests/037-finalize-unexpanded-macro.c.expected new file mode 100644 index 00000000000..e804d7e4f9f --- /dev/null +++ b/tests/037-finalize-unexpanded-macro.c.expected @@ -0,0 +1,3 @@ + + +expand(just once) diff --git a/tests/038-func-arg-with-commas.c.expected b/tests/038-func-arg-with-commas.c.expected new file mode 100644 index 00000000000..6544adb3a25 --- /dev/null +++ b/tests/038-func-arg-with-commas.c.expected @@ -0,0 +1,2 @@ + +success diff --git a/tests/039-func-arg-obj-macro-with-comma.c.expected b/tests/039-func-arg-obj-macro-with-comma.c.expected new file mode 100644 index 00000000000..8a15397a033 --- /dev/null +++ b/tests/039-func-arg-obj-macro-with-comma.c.expected @@ -0,0 +1,3 @@ + + +(two,words) diff --git a/tests/040-token-pasting.c.expected b/tests/040-token-pasting.c.expected new file mode 100644 index 00000000000..48e836ec3fa --- /dev/null +++ b/tests/040-token-pasting.c.expected @@ -0,0 +1,2 @@ + +onetoken diff --git a/tests/041-if-0.c.expected b/tests/041-if-0.c.expected new file mode 100644 index 00000000000..8b506b32d55 --- /dev/null +++ b/tests/041-if-0.c.expected @@ -0,0 +1,5 @@ +success_1 + + + +success_2 diff --git a/tests/042-if-1.c.expected b/tests/042-if-1.c.expected new file mode 100644 index 00000000000..a6ae9465a97 --- /dev/null +++ b/tests/042-if-1.c.expected @@ -0,0 +1,5 @@ +success_1 + +success_2 + +success_3 diff --git a/tests/043-if-0-else.c.expected b/tests/043-if-0-else.c.expected new file mode 100644 index 00000000000..3d7e6be96c8 --- /dev/null +++ b/tests/043-if-0-else.c.expected @@ -0,0 +1,7 @@ +success_1 + + + +success_2 + +success_3 diff --git a/tests/044-if-1-else.c.expected b/tests/044-if-1-else.c.expected new file mode 100644 index 00000000000..4a31e1cfa9e --- /dev/null +++ b/tests/044-if-1-else.c.expected @@ -0,0 +1,7 @@ +success_1 + +success_2 + + + +success_3 diff --git a/tests/045-if-0-elif.c.expected b/tests/045-if-0-elif.c.expected new file mode 100644 index 00000000000..a9bb1588e4f --- /dev/null +++ b/tests/045-if-0-elif.c.expected @@ -0,0 +1,11 @@ +success_1 + + + + + +success_3 + + + +success_4 diff --git a/tests/046-if-1-elsif.c.expected b/tests/046-if-1-elsif.c.expected new file mode 100644 index 00000000000..a4995713ca5 --- /dev/null +++ b/tests/046-if-1-elsif.c.expected @@ -0,0 +1,11 @@ +success_1 + +success_2 + + + + + + + +success_3 diff --git a/tests/047-if-elif-else.c.expected b/tests/047-if-elif-else.c.expected new file mode 100644 index 00000000000..54d30861197 --- /dev/null +++ b/tests/047-if-elif-else.c.expected @@ -0,0 +1,11 @@ +success_1 + + + + + + + +success_2 + +success_3 diff --git a/tests/048-if-nested.c.expected b/tests/048-if-nested.c.expected new file mode 100644 index 00000000000..8beb9c32c37 --- /dev/null +++ b/tests/048-if-nested.c.expected @@ -0,0 +1,11 @@ +success_1 + + + + + + + + + +success_2 diff --git a/tests/049-if-expression-precedence.c.expected b/tests/049-if-expression-precedence.c.expected new file mode 100644 index 00000000000..729bdd15f80 --- /dev/null +++ b/tests/049-if-expression-precedence.c.expected @@ -0,0 +1,5 @@ + + + +success + diff --git a/tests/050-if-defined.c.expected b/tests/050-if-defined.c.expected new file mode 100644 index 00000000000..737eb8d9403 --- /dev/null +++ b/tests/050-if-defined.c.expected @@ -0,0 +1,17 @@ + + + +success_1 + + + +success_2 + + + + + + + +success_3 + diff --git a/tests/051-if-relational.c.expected b/tests/051-if-relational.c.expected new file mode 100644 index 00000000000..652fefdd43b --- /dev/null +++ b/tests/051-if-relational.c.expected @@ -0,0 +1,35 @@ + + + +success_1 + + + +success_2 + + + + + +success_3 + + + + + +success_3 + + + + + + + +success_4 + + + +success_5 + + + diff --git a/tests/052-if-bitwise.c.expected b/tests/052-if-bitwise.c.expected new file mode 100644 index 00000000000..44e52b206e5 --- /dev/null +++ b/tests/052-if-bitwise.c.expected @@ -0,0 +1,20 @@ + + + +success_1 + + +success_2 + + + + + + +success_3 + + +success_4 + + + diff --git a/tests/053-if-divide-and-shift.c.expected b/tests/053-if-divide-and-shift.c.expected new file mode 100644 index 00000000000..7e78e0454e0 --- /dev/null +++ b/tests/053-if-divide-and-shift.c.expected @@ -0,0 +1,15 @@ + + + +success_1 + + +success_2 + + + + + + +success_3 + diff --git a/tests/054-if-with-macros.c.expected b/tests/054-if-with-macros.c.expected new file mode 100644 index 00000000000..70f737c90a9 --- /dev/null +++ b/tests/054-if-with-macros.c.expected @@ -0,0 +1,34 @@ + + + + + + + +success_1 + + +success_2 + + + + +success_3 + + + + +success_4 + + + + + + +success_5 + + +success_6 + + + diff --git a/tests/055-define-chain-obj-to-func-parens-in-text.c.expected b/tests/055-define-chain-obj-to-func-parens-in-text.c.expected new file mode 100644 index 00000000000..94c15f95059 --- /dev/null +++ b/tests/055-define-chain-obj-to-func-parens-in-text.c.expected @@ -0,0 +1,3 @@ + + +success diff --git a/tests/056-macro-argument-with-comma.c.expected b/tests/056-macro-argument-with-comma.c.expected new file mode 100644 index 00000000000..bed826e7831 --- /dev/null +++ b/tests/056-macro-argument-with-comma.c.expected @@ -0,0 +1,4 @@ + + + +success diff --git a/tests/057-empty-arguments.c.expected b/tests/057-empty-arguments.c.expected new file mode 100644 index 00000000000..7d97e15e29d --- /dev/null +++ b/tests/057-empty-arguments.c.expected @@ -0,0 +1,6 @@ + +success + +success + +success diff --git a/tests/058-token-pasting-empty-arguments.c.expected b/tests/058-token-pasting-empty-arguments.c.expected new file mode 100644 index 00000000000..e0967a1b951 --- /dev/null +++ b/tests/058-token-pasting-empty-arguments.c.expected @@ -0,0 +1,5 @@ + +ab +a +b + diff --git a/tests/059-token-pasting-integer.c.expected b/tests/059-token-pasting-integer.c.expected new file mode 100644 index 00000000000..f1288aa7cb7 --- /dev/null +++ b/tests/059-token-pasting-integer.c.expected @@ -0,0 +1,4 @@ + +12 +1000 +identifier2 diff --git a/tests/060-left-paren-in-macro-right-paren-in-text.c.expected b/tests/060-left-paren-in-macro-right-paren-in-text.c.expected new file mode 100644 index 00000000000..3e5501aa6e8 --- /dev/null +++ b/tests/060-left-paren-in-macro-right-paren-in-text.c.expected @@ -0,0 +1,3 @@ + + +5*2 diff --git a/tests/061-define-chain-obj-to-func-multi.c.expected b/tests/061-define-chain-obj-to-func-multi.c.expected new file mode 100644 index 00000000000..15eb64b97f1 --- /dev/null +++ b/tests/061-define-chain-obj-to-func-multi.c.expected @@ -0,0 +1,5 @@ + + + + +success diff --git a/tests/062-if-0-skips-garbage.c.expected b/tests/062-if-0-skips-garbage.c.expected new file mode 100644 index 00000000000..3f2ff2d6cc8 --- /dev/null +++ b/tests/062-if-0-skips-garbage.c.expected @@ -0,0 +1,5 @@ + + + + + diff --git a/tests/063-comments.c.expected b/tests/063-comments.c.expected new file mode 100644 index 00000000000..4998d76cc22 --- /dev/null +++ b/tests/063-comments.c.expected @@ -0,0 +1,13 @@ + + + +f = g /h; + l(); +m = n ++ p; + +more code here + +are not treated like comments. + + diff --git a/tests/071-punctuator.c.expected b/tests/071-punctuator.c.expected new file mode 100644 index 00000000000..959d6825988 --- /dev/null +++ b/tests/071-punctuator.c.expected @@ -0,0 +1 @@ +a = b diff --git a/tests/072-token-pasting-same-line.c.expected b/tests/072-token-pasting-same-line.c.expected new file mode 100644 index 00000000000..7b80af7e465 --- /dev/null +++ b/tests/072-token-pasting-same-line.c.expected @@ -0,0 +1,2 @@ + +success_1 success_2 success_3 diff --git a/tests/099-c99-example.c.expected b/tests/099-c99-example.c.expected new file mode 100644 index 00000000000..352bbff48f5 --- /dev/null +++ b/tests/099-c99-example.c.expected @@ -0,0 +1,16 @@ + + + + + + + + + + + + + +f(2 * (y+1)) + f(2 * (f(2 * (z[0])))) % f(2 * (0)) + t(1); +f(2 * (2 +(3,4)-0,1)) | f(2 * (~ 5)) & f(2 * (0,1))^m(0,1); +int i[] = { 1, 23, 4, 5, }; diff --git a/tests/glcpp-test b/tests/glcpp-test index 24110333a5e..396f6e175e8 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -2,9 +2,6 @@ for test in *.c; do echo "Testing $test" - ../glcpp < $test > $test.glcpp - grep -v '^ *$' < $test.glcpp > $test.out || true - gcc -E $test -o $test.gcc - grep -v '^#' < $test.gcc | grep -v '^$' | sed -r -e 's/^ +/ /' > $test.expected || true + ../glcpp < $test > $test.out diff -u $test.expected $test.out done From 2ab0b13dd9b281b9c68b3d3e2fb01d19564d115e Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 4 Jun 2010 14:53:58 -0700 Subject: [PATCH 148/148] Disallow defining macros whose names start with "__" or "GL_". The GLSL specification reserves these for future use. --- glcpp-parse.y | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/glcpp-parse.y b/glcpp-parse.y index dd8e133f550..5072c48ee8d 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -1344,6 +1344,22 @@ _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, _token_list_print (list); } +void +_check_for_reserved_macro_name (const char *identifier) +{ + /* According to the GLSL specification, macro names starting with "__" + * or "GL_" are reserved for future use. So, don't allow them. + */ + if (strncmp(identifier, "__", 2) == 0) { + fprintf (stderr, "Error: Macro names starting with \"__\" are reserved.\n"); + exit(1); + } + if (strncmp(identifier, "GL_", 3) == 0) { + fprintf (stderr, "Error: Macro names starting with \"GL_\" are reserved.\n"); + exit(1); + } +} + void _define_object_macro (glcpp_parser_t *parser, const char *identifier, @@ -1351,6 +1367,8 @@ _define_object_macro (glcpp_parser_t *parser, { macro_t *macro; + _check_for_reserved_macro_name(identifier); + macro = xtalloc (parser, macro_t); macro->is_function = 0; @@ -1369,6 +1387,8 @@ _define_function_macro (glcpp_parser_t *parser, { macro_t *macro; + _check_for_reserved_macro_name(identifier); + macro = xtalloc (parser, macro_t); macro->is_function = 1;