2
0
mirror of https://github.com/boostorg/spirit.git synced 2026-01-19 04:42:11 +00:00

First commit from BOOST_REVIEW branch spirit.sf.net

[SVN r17109]
This commit is contained in:
Joel de Guzman
2003-01-31 11:11:39 +00:00
parent cab9f1597a
commit 1f98134e30
130 changed files with 43246 additions and 0 deletions

View File

@@ -0,0 +1,17 @@
The C grammar parser is a full working example of using the Spirit
library and is able to parse the full ANSI C language.
The C grammar is adapted from
http://www.lysator.liu.se/c/ANSI-C-grammar-y.html
http://www.lysator.liu.se/c/ANSI-C-grammar-l.html
Not implemented is the analysis of typedef's because it requires semantic
analysis of the parsed code, which is beyond the scope of this sample.
The test files in this directory are adapted from
http://www.bagley.org/~doug/shootout
and where modified slightly to avoid preprocessing (#include's and
#define's are commented out, where possible).
The test files are parsed correctly.

View File

@@ -0,0 +1,20 @@
/* -*- mode: c -*-
* $Id$
* http://www.bagley.org/~doug/shootout/
*/
//#include <stdio.h>
//#include <stdlib.h>
//#include <unistd.h>
int Ack(int M, int N) { return(M ? (Ack(M-1,N ? Ack(M,(N-1)) : 1)) : N+1); }
int main(int argc, char *argv[]) {
int n = ((argc == 2) ? atoi(argv[1]) : 1);
printf("Ack(3,%d): %d\n", n, Ack(3, n));
/* sleep long enough so we can measure memory usage */
sleep(1);
return(0);
}

View File

@@ -0,0 +1,40 @@
/* -*- mode: c -*-
* $Id$
* http://www.bagley.org/~doug/shootout/
*
* this program is modified from:
* http://cm.bell-labs.com/cm/cs/who/bwk/interps/pap.html
* Timing Trials, or, the Trials of Timing: Experiments with Scripting
* and User-Interface Languages</a> by Brian W. Kernighan and
* Christopher J. Van Wyk.
*
* I added free() to deallocate memory.
*/
//#include <stdio.h>
//#include <stdlib.h>
int
main(int argc, char *argv[]) {
int n = ((argc == 2) ? atoi(argv[1]) : 1);
int i, k, *x, *y;
x = (int *) calloc(n, sizeof(int));
y = (int *) calloc(n, sizeof(int));
for (i = 0; i < n; i++) {
x[i] = i + 1;
}
for (k=0; k<1000; k++) {
for (i = n-1; i >= 0; i--) {
y[i] += x[i];
}
}
fprintf(stdout, "%d %d\n", y[0], y[n-1]);
free(x);
free(y);
return(0);
}

View File

@@ -0,0 +1,52 @@
/* -*- mode: c -*-
* $Id$
* http://www.bagley.org/~doug/shootout/
* Author: Waldemar Hebisch (hebisch@math.uni.wroc.pl)
* Optimizations: Michael Herf (mike@herfconsulting.com)
*/
//#include <stdio.h>
//#include <unistd.h>
//#define CHAR int
//#define BSIZ 4096
unsigned long ws[256];
char buff[4096];
int main() {
int w_cnt=0,l_cnt=0,b_cnt=0,cnt;
unsigned long was_sp = 1;
unsigned char *pp, *pe;
/* Fill tables */
for (cnt = 0; cnt < 256; cnt++) ws[cnt] = 0;
/* also: ws['\r']=ws['\v']=ws['\f']= */
ws[' ']=ws['\t']=ws['\n']=1;
ws['\n']=65536 + 1;
/* Main loop */
while((cnt=read(0,buff,BSIZ))) {
unsigned long tcnt = 0;
b_cnt += cnt;
pe = buff + cnt;
pp = buff;
while(pp < pe) {
tcnt += ws[*pp] ^ was_sp;
was_sp = ws[*pp] & 0xFFFF;
pp ++;
}
w_cnt += tcnt & 0xFFFF;
l_cnt += tcnt >> 16;
}
w_cnt += (ws[ buff[b_cnt&(BSIZ-1)-1] ]^1)&1;
w_cnt>>=1;
printf("%d %d %d\n", l_cnt, w_cnt, b_cnt);
return 0;
}

View File

@@ -0,0 +1,21 @@
/* -*- mode: c -*-
* $Id$
* http://www.bagley.org/~doug/shootout/
*/
/*
#include <stdio.h>
#include <stdlib.h>
*/
unsigned long
fib(unsigned long n) {
return( (n < 2) ? 1 : (fib(n-2) + fib(n-1)) );
}
int
main(int argc, char *argv[]) {
int N = ((argc == 2) ? atoi(argv[1]) : 1);
printf("%ld\n", fib(N));
return(0);
}

View File

@@ -0,0 +1,33 @@
/* -*- mode: c -*-
* $Id$
* http://www.bagley.org/~doug/shootout/
*/
/*
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "../../Include/simple_hash.h"
*/
int main(int argc, char *argv[]) {
int i, c=0, n = ((argc == 2) ? atoi(argv[1]) : 1);
char buf[32];
struct ht_ht *ht = ht_create(n);
for (i=1; i<=n; i++) {
sprintf(buf, "%x", i);
(ht_find_new(ht, buf))->val = i;
}
for (i=n; i>0; i--) {
sprintf(buf, "%d", i);
if (ht_find(ht, buf)) c++;
}
ht_destroy(ht);
printf("%d\n", c);
return(0);
}

View File

@@ -0,0 +1,40 @@
/* -*- mode: c -*-
* $Id$
* http://www.bagley.org/~doug/shootout/
*/
/*
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "../../Include/simple_hash.h"
*/
int main(int argc, char *argv[]) {
int i, n = ((argc == 2) ? atoi(argv[1]) : 1);
char buf[32];
struct ht_ht *ht1 = ht_create(10000);
struct ht_ht *ht2 = ht_create(10000);
struct ht_node *node;
for (i=0; i<=9999; ++i) {
sprintf(buf, "foo_%d", i);
ht_find_new(ht1, buf)->val = i;
}
for (i=0; i<n; ++i) {
for (node=ht_first(ht1); node; node=ht_next(ht1)) {
ht_find_new(ht2, node->key)->val += node->val;
}
}
printf("%d %d %d %d\n",
(ht_find(ht1, "foo_1"))->val,
(ht_find(ht1, "foo_9999"))->val,
(ht_find(ht2, "foo_1"))->val,
(ht_find(ht2, "foo_9999"))->val);
ht_destroy(ht1);
ht_destroy(ht2);
return(0);
}

View File

@@ -0,0 +1,74 @@
/* -*- mode: c -*-
* $Id$
* http://www.bagley.org/~doug/shootout/
*/
/*
#include <stdlib.h>
#include <math.h>
#include <stdio.h>
#define IM 139968
#define IA 3877
#define IC 29573
*/
double
gen_random(double max) {
static long last = 42;
return( max * (last = (last * IA + IC) % IM) / IM );
}
void
heapsort(int n, double *ra) {
int i, j;
int ir = n;
int l = (n >> 1) + 1;
double rra;
for (;;) {
if (l > 1) {
rra = ra[--l];
} else {
rra = ra[ir];
ra[ir] = ra[1];
if (--ir == 1) {
ra[1] = rra;
return;
}
}
i = l;
j = l << 1;
while (j <= ir) {
if (j < ir && ra[j] < ra[j+1]) { ++j; }
if (rra < ra[j]) {
ra[i] = ra[j];
j += (i = j);
} else {
j = ir + 1;
}
}
ra[i] = rra;
}
}
int
main(int argc, char *argv[]) {
int N = ((argc == 2) ? atoi(argv[1]) : 1);
double *ary;
int i;
/* create an array of N random doubles */
ary = (double *)malloc((N+1) * sizeof(double));
for (i=1; i<=N; i++) {
ary[i] = gen_random(1);
}
heapsort(N, ary);
printf("%.10f\n", ary[N]);
free(ary);
return(0);
}

View File

@@ -0,0 +1,13 @@
/* -*- mode: c -*-
* $Id$
* http://www.bagley.org/~doug/shootout/
*/
/*
#include <stdio.h>
*/
int main() {
fputs("hello world\n", stdout);
return(0);
}

View File

@@ -0,0 +1,122 @@
/* -*- mode: c -*-
* $Id$
* http://www.bagley.org/~doug/shootout/
* from Waldek Hebisch
*/
/*
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define MAXLINELEN 128
*/
/* kmedian permutes elements of a to get
a[i]<=a[k] for i<k
a[i]>=a[k] for i>k
See. N. Wirth, Algorithms+data structures = Programs
*/
void kmedian(double *a, int n, int k)
{
while (1){
int j=random()%n;
double b = a[j];
int i=0;
j = n-1;
while(1) {
while( a[i]<b ) i++;
while( a[j]>b ) j--;
if(i<j) {
double tmp=a[i];
a[i]=a[j];
a[j]=tmp;
i++;
j--;
} else {
if(a[j]<b) j++;
if(a[i]>b) i--;
break;
}
}
if(i<k) {
k-=i+1;
n-=i+1;
a+=i+1;
} else if (j>k) {
n=j;
} else return;
}
}
double max(double *a, int n)
{
int j;
double temp=a[0];
for(j=1;j<n;j++) {
if(a[j]>temp) {
temp=a[j];
}
}
return temp;
}
int
main() {
char line[MAXLINELEN];
int i, n = 0, mid = 0;
double sum = 0.0;
double mean = 0.0;
double average_deviation = 0.0;
double standard_deviation = 0.0;
double variance = 0.0;
double skew = 0.0;
double kurtosis = 0.0;
double median = 0.0;
double deviation = 0.0;
int array_size = 4096;
double *nums = (double *)malloc(array_size * sizeof(double));
while (fgets(line, MAXLINELEN, stdin)) {
sum += (nums[n++] = atof(line));
if (n == array_size) {
array_size *= 2;
nums = (double *)realloc(nums, array_size * sizeof(double));
}
}
mean = sum/n;
for (i=0; i<n; i++) {
double dev = nums[i] - mean;
double dev2=dev*dev;
double dev3=dev2*dev;
double dev4=dev3*dev;
average_deviation += fabs(dev);
variance += dev2 /*pow(deviation,2)*/;
skew += dev3 /* pow(deviation,3) */;
kurtosis += dev4 /* pow(deviation,4) */;
}
average_deviation /= n;
variance /= (n - 1);
standard_deviation = sqrt(variance);
if (variance) {
skew /= (n * variance * standard_deviation);
kurtosis = (kurtosis/(n * variance * variance)) - 3.0;
}
mid = (n/2);
kmedian(nums, n, mid);
median = n % 2 ? nums[mid] : (nums[mid] + max(nums,mid))/2;
free(nums);
printf("n: %d\n", n);
printf("median: %f\n", median);
printf("mean: %f\n", mean);
printf("average_deviation: %f\n", average_deviation);
printf("standard_deviation: %f\n", standard_deviation);
printf("variance: %f\n", variance);
printf("skew: %f\n", skew);
printf("kurtosis: %f\n", kurtosis);
return(0);
}

View File

@@ -0,0 +1,68 @@
/* -*- mode: c -*-
* $Id$
* http://www.bagley.org/~doug/shootout/
*/
/*
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#define SIZE 30
*/
int **mkmatrix(int rows, int cols) {
int i, j, count = 1;
int **m = (int **) malloc(rows * sizeof(int *));
for (i=0; i<rows; i++) {
m[i] = (int *) malloc(cols * sizeof(int));
for (j=0; j<cols; j++) {
m[i][j] = count++;
}
}
return(m);
}
void zeromatrix(int rows, int cols, int **m) {
int i, j;
for (i=0; i<rows; i++)
for (j=0; j<cols; j++)
m[i][j] = 0;
}
void freematrix(int rows, int **m) {
while (--rows > -1) { free(m[rows]); }
free(m);
}
int **mmult(int rows, int cols, int **m1, int **m2, int **m3) {
int i, j, k, val;
for (i=0; i<rows; i++) {
for (j=0; j<cols; j++) {
val = 0;
for (k=0; k<cols; k++) {
val += m1[i][k] * m2[k][j];
}
m3[i][j] = val;
}
}
return(m3);
}
int main(int argc, char *argv[]) {
int i, n = ((argc == 2) ? atoi(argv[1]) : 1);
int **m1 = mkmatrix(SIZE, SIZE);
int **m2 = mkmatrix(SIZE, SIZE);
int **mm = mkmatrix(SIZE, SIZE);
for (i=0; i<n; i++) {
mm = mmult(SIZE, SIZE, m1, m2, mm);
}
printf("%d %d %d %d\n", mm[0][0], mm[2][3], mm[3][2], mm[4][4]);
freematrix(SIZE, m1);
freematrix(SIZE, m2);
freematrix(SIZE, mm);
return(0);
}

View File

@@ -0,0 +1,27 @@
/* -*- mode: c -*-
* $Id$
* http://www.bagley.org/~doug/shootout/
*/
/*
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
*/
int
main(int argc, char *argv[]) {
int n = ((argc == 2) ? atoi(argv[1]) : 1);
int a, b, c, d, e, f, x=0;
for (a=0; a<n; a++)
for (b=0; b<n; b++)
for (c=0; c<n; c++)
for (d=0; d<n; d++)
for (e=0; e<n; e++)
for (f=0; f<n; f++)
x++;
printf("%d\n", x);
return(0);
}

View File

@@ -0,0 +1,31 @@
/* -*- mode: c -*-
* $Id$
* http://www.bagley.org/~doug/shootout/
*/
/*
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#define IM 139968
#define IA 3877
#define IC 29573
*/
/*inline*/ double gen_random(double max) {
static long last = 42;
last = (last * IA + IC) % IM;
return( max * last / IM );
}
int main(int argc, char *argv[]) {
int N = ((argc == 2) ? atoi(argv[1]) : 1) - 1;
while (N--) {
gen_random(100.0);
}
printf("%.9f\n", gen_random(100.0));
return(0);
}

View File

@@ -0,0 +1,40 @@
/* -*- mode: c -*-
* $Id$
* http://www.bagley.org/~doug/shootout/
* from Brad Knotwell
*/
/*
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#define MAXREAD 4096
*/
int main(int argc, char *argv[]) {
int nread, len = 0, size = (4 * MAXREAD);
char *cp, *buf = malloc(size + 1);
while((nread = read(0,(buf+len),MAXREAD)) > 0) {
len += nread;
if(MAXREAD > (size - len)) {
size <<= 1;
if((buf = realloc(buf,size+1)) == NULL)
return(fprintf(stderr,"realloc failed\n"),EXIT_FAILURE);
}
}
if(nread == -1) return(fprintf(stderr,"read\n"),EXIT_FAILURE);
for (cp = buf+len-1; cp != buf; --cp,nread++)
if ('\n' == *cp) {
fwrite(cp+1,nread,1,stdout);
nread = 0;
}
fwrite(cp,nread+1,1,stdout);
free(buf);
return(EXIT_SUCCESS);
}

View File

@@ -0,0 +1,36 @@
/* -*- mode: c -*-
* $Id$
* http://www.bagley.org/~doug/shootout/
*/
/*
#include <stdio.h>
#include <stdlib.h>
*/
int
main(int argc, char *argv[]) {
int NUM = ((argc == 2) ? atoi(argv[1]) : 1);
static char flags[8192 + 1];
long i, k;
int count = 0;
while (NUM--) {
count = 0;
for (i=2; i <= 8192; i++) {
flags[i] = 1;
}
for (i=2; i <= 8192; i++) {
if (flags[i]) {
// remove all multiples of prime: i
for (k=i+i; k <= 8192; k+=i) {
flags[k] = 0;
}
count++;
}
}
}
printf("Count: %d\n", count);
return(0);
}

View File

@@ -0,0 +1,40 @@
/* -*- mode: c -*-
* $Id$
* http://www.bagley.org/~doug/shootout/
*/
/*
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#define STUFF "hello\n"
*/
int
main(int argc, char *argv[]) {
int n = ((argc == 2) ? atoi(argv[1]) : 1);
int i, buflen = 32;
char *strbuf = calloc(sizeof(char), buflen);
char *strend = strbuf;
int stufflen = strlen(STUFF);
if (!strbuf) { perror("calloc strbuf"); exit(1); }
for (i=0; i<n; i++) {
if (((strbuf+buflen)-strend) < (stufflen+1)) {
buflen = 2*buflen;
strbuf = realloc(strbuf, buflen);
if (!strbuf) { perror("realloc strbuf"); exit(1); }
strend = strbuf + strlen(strbuf);
}
/* much faster to strcat to strend than to strbuf */
strcat(strend, STUFF);
strend += stufflen;
}
fprintf(stdout, "%d\n", strlen(strbuf));
free(strbuf);
sleep(1);
return(0);
}

View File

@@ -0,0 +1,24 @@
/* -*- mode: c -*-
* $Id$
* http://www.bagley.org/~doug/shootout/
*/
/*
#include <stdio.h>
#include <stdlib.h>
#define MAXLINELEN 128
*/
int
main() {
int sum = 0;
char line[128];
while (fgets(line, MAXLINELEN, stdin)) {
sum += atoi(line);
}
printf("%d\n", sum);
return(0);
}

View File

@@ -0,0 +1,92 @@
Thu Jan 29 21:13:32 2003
- Fixed exception handling to report the correct error position
- Fixed another bug in the stream position calculation scheme
- Added a more elaborate sample 'list_includes' which lists the dependency
information for a given source file (see test/list_includes/readme.txt).
Thu Jan 18 22:01:03 2003
- Fixed a bug in the stream position calculation scheme
- Made cpp::exceptions more standard conformant (added 'throw()' at appropriate
places)
- Overall housekeeping :-)
Thu Jan 15 21:54:20 2003
Changes since project start (still 0.5.0)
- Added #include <...> and #include "..." functionality
- pp directives are now generally recognized
- Decoupled the C++ lexers and the pp grammar to separate compilation
units (optionally) to speed up compilation (a lot!)
Thu Jan 2 12:39:30 2003
A completely new version 0.5.0 of the C preprocessor was startet. It's a
complete rewrite of the existing code base. The main differences are:
- The preprocessor is now implemented as an iterator, which returns the
current preprocessed token from the input stream.
- The preprocessing of include files isn't implemented through recursion
anymore. This follows directly from the first change. As a result of this
change the internal error handling is simplified.
- The C preprocessor iterator itself is feeded by a new unified C++ lexer
iterator. BTW, this C++ lexer iterator could be used standalone and is not
tied to the C preprocessor. There are two different C++ lexers implemented
now, which are functionally completely identical. These expose a similar
interface, so the C preprocessor could be used with both of them.
- The C++ lexers integrated into the C preprocessor by now are:
Slex: A spirit based table driven regular expression lexer (the slex
engine originally was written by Dan Nuffer and is available as a
separate Spirit sample).
Re2c: A C++ lexer generated with the help of the re2c tool. This C++
lexer was written as a sample by Dan Nuffer too.
It isn't hard to plug in additional different C++ lexers. There are plans to
integrate a third one written by Juan Carlos Arevalo-Baeza, which is
available as a Spirit sample.
-------------------------------------------------------------------------------
Tue Feb 12 22:29:50 2002
Changes from 0.2.3 to 0.2.4:
- Moved XML dumping functions to the main Spirit directory
- Fixed operator '##', it was not correctly implemented somehow :-(
Sun Feb 10 21:07:19 2002
Changes from 0.2.2 to 0.2.3:
- Implemented concatenation operator '##' (cpp.concat)
- Removed defined() functionality for Intel compiler (it ICE's) until this
issue is resolved
- Separated code for dumping a parsetree to XML for inclusion in the main
Spirit headers
Thu Jan 17 23:51:21 2002
Changes from 0.2.1 to 0.2.2:
- Fixes to compile with gcc 2.95.2 and gcc 3.0.2 (thanks Dan Nuffer)
- Reformatted the grammars to conform to a single formatting guideline
- Assigned explicit rule_id's to the rules of cpp_grammar, so that the
access code to the embedded definition class is not needed anymore
- Fixed a remaining const problem
Tue Jan 15 23:40:40 2002
Changes from 0.2.0 to 0.2.1:
- Corrected handling of defined() operator
- In preprocessing conditionals undefined identifiers now correctly
replaced by '0'
- Fixed several const problems
- Added parse_node_iterator for traversing one node in a parse_tree
without going deeper down the hierarchy than one level (this is useful,
if all inspected tokens arranged along a single node in the parse tree.
The main difference to the parse_tree_iterator is, that the underlying
iterator generally can be adjusted correctly after advancing the attached
parse_node_iterator
- Fixed a problem with gcc 2.95.2, which doesn't have a <sstream> header
- Prepared usage of slex for lexer states
Sun Jan 13 10:21:16 2002
Changes from 0.1.0 to 0.2.0:
- Added operator 'defined()'
- Added directive '#warning'
- Corrected error reporting
- Added command line option -I- for finer control of the searched include
directories (-I and -I- should now work as in gcc, see readme.html for
more info)
- Corrected conditional preprocessing (should be fully functional now)
- Fixed existing codebase for changes made in parse tree support
- Moved parse tree utility functions to a separate header (prepaired for
inclusion to the Spirit main library)

View File

@@ -0,0 +1,19 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
Acknowledgements
- to Juan Carlos Arevalo-Baeza, who wrote the Spirit cpp_lexer sample,
from which are taken some ideas
- to Dan Nuffer, who wrote the initial Re2C based C++ lexer and the
Slex (Spirit Lex) scanner generator sample
=============================================================================*/

View File

@@ -0,0 +1,16 @@
subproject libs/spirit/example/application/cpp ;
exe cpp :
cpp.hpp
instantiate_cpp_grammar.cpp
instantiate_slex_lexer.cpp
instantiate_re2c_lexer.cpp
./cpp_lexer/re2clex/aq.cpp
./cpp_lexer/re2clex/cpp.re.cpp
: <include>../..
<sysinclude>$(BOOST_ROOT)
<lib>program_options
<lib>fs
;

View File

@@ -0,0 +1,19 @@
A Standard compliant C preprocessor
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
See Copyright.txt for full copyright notices and acknowledgements.
General notes:
- The directory structure of this sample resembles the namespace structure
of the different classes/templates out of which this sample consists.
To make this clear, throughout the sample all #include's are given with
relation to the root folder of this sample (see next point!).
- The build process is straightforward. The only thing you should do
is to include the root directory of this sample into the include search
path of your compiler (usually through a -I. command line argument).
- The different test files and samples contained in this library require
the program_options library from Vladimir Prus, which is currently under
Boost review (http://zigzag.cs.msu.su:7813/program_options).
Please look at the file doc/readme.html.

View File

@@ -0,0 +1,150 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledegments.
=============================================================================*/
#include "cpp.hpp" // global configuration
#include "cpplexer/cpp_token_ids.hpp"
#include "cpplexer/cpp_lex_iterator.hpp"
#include "cpp/cpp_context.hpp"
///////////////////////////////////////////////////////////////////////////////
// include lexer specifics, import lexer names
#if defined(USE_SLEX_CPP_LEXER)
// use the slex based C++ lexer
#include "cpplexer/slex/cpp_slex_token.hpp"
#if !defined(CPP_SEPARATE_LEXER_INSTANTIATION)
#include "cpplexer/slex/cpp_slex_lexer.hpp"
#endif // !defined(CPP_SEPARATE_LEXER_INSTANTIATION)
using namespace cpplexer::slex;
#elif defined(USE_RE2C_CPP_LEXER)
// use the re2c based C++ lexer
#include "cpplexer/re2clex/cpp_re2c_token.hpp"
#if !defined(CPP_SEPARATE_LEXER_INSTANTIATION)
#include "cpplexer/re2clex/cpp_re2c_lexer.hpp"
#endif // !defined(CPP_SEPARATE_LEXER_INSTANTIATION)
using namespace cpplexer::re2clex;
#endif
///////////////////////////////////////////////////////////////////////////////
// import required names
using namespace boost::spirit;
using std::string;
using std::getline;
using std::ifstream;
using std::cout;
using std::cerr;
using std::endl;
using std::ostream;
///////////////////////////////////////////////////////////////////////////////
// main program
int
main(int argc, char *argv[])
{
if (2 != argc) {
cout << "Usage: <cpp file>" << endl;
return 1;
}
ifstream infile(argv[1]);
string teststr;
if (infile.is_open()) {
string line;
for (getline(infile, line); infile.good(); getline(infile, line)) {
teststr += line;
teststr += '\n';
}
}
else {
teststr = argv[1];
}
// The template lex_functor<> is defined in both namespaces: cpplexer::slex and
// cpplexer::re2clex. The 'using namespace' directive above tells the compiler,
// which of them to use.
typedef cpp::context<lex_token<std::string::iterator> > context_t;
// The C preprocessor iterator shouldn't be constructed directly. It is to be
// generated through a cpp::context<> object. This cpp:context<> object is
// additionally to be used to initialize and define different parameters of
// the actual preprocessing.
// The preprocessing of the input stream is done on the fly behind the scenes
// during iteration over the context_t::iterator_t stream.
context_t ctx (teststr.begin(), teststr.end(), argv[1]);
context_t::iterator_t first = ctx.begin();
context_t::iterator_t last = ctx.end();
context_t::token_t current_token;
try {
while (first != last) {
// retrieve next token
current_token = *first;
// find token name
string tokenname (cpplexer::get_token_name(cpplexer::token_id(current_token)));
// output token info
cout << "matched token "
<< tokenname
<< "(#" << cpplexer::token_id(ID_FROM_TOKEN(current_token))
<< ") at " << current_token.get_position().file << " ("
<< current_token.get_position().line << "/"
<< current_token.get_position().column
<< "): >" << current_token.get_value() << "<"
<< endl;
++first;
}
}
catch (cpp::abort_preprocess_exception &e) {
// abort the preprocessing: simply abort compilation
cerr
<< e.file_name() << "(" << e.line_no() << "): "
<< "aborting preprocessing."
<< endl;
return 1;
}
catch (cpp::cpp_exception &e) {
// some preprocessing error
cerr
<< e.file_name() << "(" << e.line_no() << "): "
<< e.description() << endl;
return 2;
}
catch (std::exception &e) {
// use last recognized token to retrieve the error position
cerr
<< current_token.get_position().file
<< "(" << current_token.get_position().line << "): "
<< "unexpected exception: " << e.what()
<< endl;
return 3;
}
catch (...) {
// use last recognized token to retrieve the error position
cerr
<< current_token.get_position().file
<< "(" << current_token.get_position().line << "): "
<< "unexpected exception." << endl;
return 4;
}
return 0;
}

View File

@@ -0,0 +1,43 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#if !defined(_CPP_HPP__920D0370_741F_44AF_BF86_F6104BDACF75__INCLUDED_)
#define _CPP_HPP__920D0370_741F_44AF_BF86_F6104BDACF75__INCLUDED_
///////////////////////////////////////////////////////////////////////////////
// This file may be used as a precompiled header (if applicable)
///////////////////////////////////////////////////////////////////////////////
// build version
#include "cpp_version.hpp"
///////////////////////////////////////////////////////////////////////////////
// configure this app here (global configiguration constants)
#include "cpp_config.hpp"
///////////////////////////////////////////////////////////////////////////////
// specific exceptions, thrown by the cpp::pp_iterator functions
#include "cpp/cpp_exceptions.hpp"
///////////////////////////////////////////////////////////////////////////////
// include often used files from the stdlib
#include <iostream>
#include <fstream>
#include <string>
///////////////////////////////////////////////////////////////////////////////
// include required boost libraries
#include <boost/assert.hpp>
#endif // !defined(_CPP_HPP__920D0370_741F_44AF_BF86_F6104BDACF75__INCLUDED_)

View File

@@ -0,0 +1,117 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Definition of the preprocessor context
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#if !defined(_CPP_CONTEXT_HPP__907485E2_6649_4A87_911B_7F7225F3E5B8__INCLUDED_)
#define _CPP_CONTEXT_HPP__907485E2_6649_4A87_911B_7F7225F3E5B8__INCLUDED_
#include <string>
#include <stack>
#include "cpp/cpp_ifblock.hpp"
#include "cpp/cpp_include_pathes.hpp"
#include "cpp/cpp_iterator.hpp"
#include "cpp/cpp_iteration_context.hpp"
///////////////////////////////////////////////////////////////////////////////
namespace cpp {
///////////////////////////////////////////////////////////////////////////////
//
// The C preprocessor context template class
//
// The cpp::context template is the main interface class to
// control the behaviour of the preprocessing engine.
//
// The following template parameter has to be supplied:
//
// TokenT The token type to return from the preprocessing
//
///////////////////////////////////////////////////////////////////////////////
template <
typename TokenT,
typename InputPolicyT = iteration_context_policies::
load_file_to_string<cpplexer::lex_iterator<TokenT> >
>
class context
: public InputPolicyT
{
public:
typedef typename TokenT::iterator_t target_iterator_t;
typedef cpplexer::lex_iterator<TokenT> lex_t;
typedef context<TokenT> self_t;
typedef pp_iterator<self_t> iterator_t;
typedef TokenT token_t;
typedef InputPolicyT input_policy_t;
private:
// stack of shared_ptr's to the pending iteration contexts
typedef boost::shared_ptr<base_iteration_context<lex_t> > iteration_ptr_t;
typedef std::stack<iteration_ptr_t> iteration_context_stack_t;
public:
context(target_iterator_t const &first_, target_iterator_t const &last_,
char const *fname = "<Unknown>")
: first(first_), last(last_), filename(fname)
{}
// iterator interface
iterator_t begin() { return iterator_t(*this, first, last, filename); }
iterator_t end() { return iterator_t(); }
// conditional compilation contexts
bool get_if_block_status() const { return ifblocks.get_status(); }
void enter_if_block(bool new_status)
{ ifblocks.enter_if_block(new_status); }
bool enter_elif_block(bool new_status)
{ return ifblocks.enter_elif_block(new_status); }
bool enter_else_block() { return ifblocks.enter_else_block(); }
bool exit_if_block() { return ifblocks.exit_if_block(); }
// maintain include pathes
bool add_include_path(char const *path_)
{ return includes.add_include_path(path_);}
void set_sys_include_delimiter() { includes.set_sys_include_delimiter(); }
bool find_include_file (std::string &s, bool is_system) const
{ return includes.find_include_file(s, is_system); }
void set_current_directory(char const *path_)
{ includes.set_current_directory(path_); }
// stack of iteration contexts
bool has_pending_iteration_context() const { return iter_ctxs.size() > 0; }
iteration_ptr_t pop_iteration_context()
{ iteration_ptr_t top = iter_ctxs.top(); iter_ctxs.pop(); return top; }
void push_iteration_context(iteration_ptr_t iter_ctx)
{ iter_ctxs.push(iter_ctx); }
int get_iteration_depth() const { return iter_ctxs.size(); }
private:
// the main input stream
target_iterator_t const &first; // underlying input stream
target_iterator_t const &last;
std::string filename; // associated main filename
if_block_stack ifblocks; // conditional compilation contexts
include_pathes includes; // lists of include directories to search
iteration_context_stack_t iter_ctxs; // iteration contexts
};
///////////////////////////////////////////////////////////////////////////////
} // namespace cpp
#endif // !defined(_CPP_CONTEXT_HPP__907485E2_6649_4A87_911B_7F7225F3E5B8__INCLUDED_)

View File

@@ -0,0 +1,285 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#if !defined(_EXCEPTIONS_HPP__5190E447_A781_4521_A275_5134FF9917D7__INCLUDED_)
#define _EXCEPTIONS_HPP__5190E447_A781_4521_A275_5134FF9917D7__INCLUDED_
#include <exception>
#include <string>
#include <boost/assert.hpp>
#include <boost/config.hpp>
///////////////////////////////////////////////////////////////////////////////
// helper macro for throwing exceptions
#ifdef BOOST_NO_STRINGSTREAM
#include <strstream>
#define CPP_THROW(cls, code, msg, act_tok) \
{ \
using namespace cpp; \
std::strstream stream; \
stream << cls::severity_text(cls::code) << ": " \
<< cls::error_text(cls::code) << ": " << msg << std::ends; \
std::string throwmsg = stream.str(); stream.freeze(false); \
throw cls(throwmsg.c_str(), cls::code, act_tok.get_position().line, \
act_tok.get_position().column, act_tok.get_position().file.c_str()); \
} \
/**/
#else
#include <sstream>
#define CPP_THROW(cls, code, msg, act_tok) \
{ \
using namespace cpp; \
std::stringstream stream; \
stream << cls::severity_text(cls::code) << ": " \
<< cls::error_text(cls::code) << ": " << msg << std::ends; \
throw cls(stream.str().c_str(), cls::code, act_tok.get_position().line, \
act_tok.get_position().column, act_tok.get_position().file.c_str()); \
} \
/**/
#endif // BOOST_NO_STRINGSTREAM
///////////////////////////////////////////////////////////////////////////////
namespace cpp {
///////////////////////////////////////////////////////////////////////////////
// generic file related exception
namespace severity_util {
enum severity {
severity_remark = 0,
severity_warning,
severity_error,
severity_fatal
};
inline char const *
get_severity(severity level)
{
static char const *severity_text[] =
{
"remark", // severity_remark
"warning", // severity_warning
"error", // severity_error
"fatal error" // severity_fatal
};
BOOST_ASSERT(severity_remark <= level && level <= severity_fatal);
return severity_text[level];
}
}
///////////////////////////////////////////////////////////////////////////////
// cpp_exception, the base class for all specific C preprocessor exceptions
class cpp_exception
: public std::exception
{
public:
cpp_exception(int line_, int column_, char const *filename_) throw()
: line(line_), column(column_)
{
unsigned int off = 0;
while (off < sizeof(filename) && *filename_)
filename[off++] = *filename_++;
filename[off] = 0;
}
~cpp_exception() throw() {}
virtual char const *what() const throw() = 0; // to be overloaded
virtual char const *description() const throw() = 0;
int line_no() const throw() { return line; }
int column_no() const throw() { return column; }
char const *file_name() const throw() { return filename; }
protected:
char filename[512];
int line;
int column;
};
///////////////////////////////////////////////////////////////////////////////
//
class file_exception :
public cpp_exception
{
public:
enum error_code {
invalid_argument = 0,
missing_unc
};
file_exception(char const *what_, error_code, int line_, int column_,
char const *filename_) throw()
: cpp_exception(line_, column_, filename_)
{
unsigned int off = 0;
while (off < sizeof(buffer) && *what_)
buffer[off++] = *what_++;
buffer[off] = 0;
}
~file_exception() throw() {}
virtual char const *what() const throw()
{
return "cpp::file_exception";
}
virtual char const *description() const throw()
{
return buffer;
}
static char const *error_text(int code)
{
// error texts in this array must apear in the same order as the items in
// the error enum above
static char const *file_exception_errors[] =
{
"Invalid argument", // invalid_argument
"The object must have an UNC (Universal Naming Convention) component",
// missing_unc
};
return file_exception_errors[code];
}
static char const *severity_text(int code)
{
static severity_util::severity file_exception_severity[] =
{
severity_util::severity_error, // invalid_argument
severity_util::severity_error // missing_unc
};
return severity_util::get_severity(file_exception_severity[code]);
}
private:
char buffer[512];
};
///////////////////////////////////////////////////////////////////////////////
// preprocessor error
class preprocess_exception :
public cpp_exception
{
public:
enum error_code {
unexpected_error = 0,
macro_redefinition,
macro_insertion_error,
bad_include_file,
bad_include_statement,
ill_formed_directive,
error_directive,
warning_directive,
ill_formed_expression,
missing_matching_if,
ill_formed_operator
};
preprocess_exception(char const *what_, error_code code, int line_,
int column_, char const *filename_) throw()
: cpp_exception(line_, column_, filename_), level(severity_level(code))
{
unsigned int off = 0;
while (off < sizeof(buffer) && *what_)
buffer[off++] = *what_++;
buffer[off] = 0;
}
~preprocess_exception() throw() {}
virtual char const *what() const throw()
{
return "cpp::preprocess_exception";
}
virtual char const *description() const throw()
{
return buffer;
}
severity_util::severity get_severity()
{
return level;
}
static char const *error_text(int code)
{
// error texts in this array must apear in the same order as the items in
// the error enum above
static char const *preprocess_exception_errors[] = {
"unexpected error (should not happen)", // unexpected_error
"illegal macro redefinition", // macro_redefinition
"macro definition failed (out of memory?)", // macro_insertion_error
"could not find include file", // bad_include_file
"ill formed include statement", // bad_include_statement
"unknown preprocessor directive (ignored)", // ill_formed_directive
"encountered #error directive", // error_directive
"encountered #warning directive", // warning_directive
"ill formed preprocessor expression", // ill_formed_expression
"the #if for this directive is missing", // missing_matching_if
"ill formed preprocessing operator" // ill_formed_operator
};
return preprocess_exception_errors[code];
}
static severity_util::severity severity_level(int code)
{
static severity_util::severity preprocess_exception_severity[] = {
severity_util::severity_fatal, // unexpected_error
severity_util::severity_warning, // macro_redefinition
severity_util::severity_fatal, // macro_insertion_error
severity_util::severity_error, // bad_include_file
severity_util::severity_error, // bad_include_statement
severity_util::severity_error, // ill_formed_directive
severity_util::severity_fatal, // error_directive
severity_util::severity_warning, // warning_directive
severity_util::severity_error, // ill_formed_expression
severity_util::severity_error, // missing_matching_if
severity_util::severity_error // ill_formed_operator
};
return preprocess_exception_severity[code];
}
static char const *severity_text(int code)
{
return severity_util::get_severity(severity_level(code));
}
private:
char buffer[512];
severity_util::severity level;
};
///////////////////////////////////////////////////////////////////////////////
// abort compilation
struct abort_preprocess_exception
: public cpp_exception
{
abort_preprocess_exception(int error_count_, int level_, int line_,
int column_, char const *filename_) throw()
: cpp_exception(line_, column_, filename_)
{
}
~abort_preprocess_exception() throw() {}
virtual char const *what() const throw()
{
return "cpp::abort_preprocess_exception";
}
virtual char const *description() const throw()
{
return "fatal error";
}
};
///////////////////////////////////////////////////////////////////////////////
} // namespace cpp
#endif // !defined(_EXCEPTIONS_HPP__5190E447_A781_4521_A275_5134FF9917D7__INCLUDED_)

View File

@@ -0,0 +1,419 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#if !defined(_CPP_GRAMMAR_HPP__FEAEBC2E_2734_428B_A7CA_85E5A415E23E__INCLUDED_)
#define _CPP_GRAMMAR_HPP__FEAEBC2E_2734_428B_A7CA_85E5A415E23E__INCLUDED_
#include <boost/spirit/core.hpp>
#include <boost/spirit/tree/parse_tree.hpp>
#include <boost/spirit/utility/confix.hpp>
#include <boost/spirit/utility/lists.hpp>
#include <boost/spirit/utility/functor_parser.hpp>
#include "cpp/cpp_grammar_gen.hpp"
#include "cpplexer/cpp_token_ids.hpp"
#include "cpplexer/cpp_lex_iterator.hpp"
///////////////////////////////////////////////////////////////////////////////
namespace cpp {
namespace {
///////////////////////////////////////////////////////////////////////////////
//
// store_position
//
// The store_position functor extracts the actual file position from the
// supplied token.
//
///////////////////////////////////////////////////////////////////////////////
template <typename PositionT>
struct store_position {
store_position(PositionT &pos_) : pos(pos_) {}
template <typename TokenT>
void operator()(TokenT const &token) const
{
pos = token.get_position();
}
PositionT &pos;
};
} // anonymous namespace
///////////////////////////////////////////////////////////////////////////////
// define, whether the rule's should generate some debug output
#define TRACE_CPP_GRAMMAR \
(BOOST_SPIRIT_DEBUG_FLAGS_CPP & BOOST_SPIRIT_DEBUG_FLAGS_CPP_GRAMMAR) \
/**/
///////////////////////////////////////////////////////////////////////////////
// Encapsulation of the C++ preprocessor grammar.
template <typename PositionT>
struct cpp_grammar :
public boost::spirit::grammar<cpp_grammar<PositionT> >
{
typedef cpp_grammar<PositionT> grammar_t;
typedef store_position<PositionT> store_pos_t;
template <typename ScannerT>
struct definition
{
// non-parse_tree generating rule type
typedef typename ScannerT::iteration_policy_t iteration_policy_t;
typedef boost::spirit::match_policy match_policy_t;
typedef typename ScannerT::action_policy_t action_policy_t;
typedef
boost::spirit::scanner_policies<
iteration_policy_t, match_policy_t, action_policy_t>
policies_t;
typedef
boost::spirit::scanner<typename ScannerT::iterator_t, policies_t>
non_tree_scanner_t;
typedef boost::spirit::rule<non_tree_scanner_t> no_tree_rule_t;
// 'normal' (parse_tree generating) rule type
typedef boost::spirit::rule<ScannerT> rule_t;
rule_t cpp_line;
rule_t include_file, system_include_file, macro_include_file;
rule_t plain_define, macro_definition, macro_parameters;
rule_t undefine;
rule_t ppifdef, ppifndef, ppif, ppelse, ppelif, ppendif;
rule_t ppline, line_file;
rule_t pperror, ppwarning;
rule_t ppnull;
rule_t pppragma;
rule_t illformed;
no_tree_rule_t ppspace;
definition(cpp_grammar const &self)
{
// import the spirit and cpplexer namespaces here
using namespace boost::spirit;
using namespace cpplexer;
// save the rule id's for later use
self.rule_ids.cpp_line_id = cpp_line.id().to_long();
self.rule_ids.include_file_id = include_file.id().to_long();
self.rule_ids.sysinclude_file_id = system_include_file.id().to_long();
self.rule_ids.macroinclude_file_id = macro_include_file.id().to_long();
self.rule_ids.plain_define_id = plain_define.id().to_long();
self.rule_ids.macro_parameters_id = macro_definition.id().to_long();
self.rule_ids.macro_definition_id = macro_parameters.id().to_long();
self.rule_ids.undefine_id = undefine.id().to_long();
self.rule_ids.ifdef_id = ppifdef.id().to_long();
self.rule_ids.ifndef_id = ppifndef.id().to_long();
self.rule_ids.if_id = ppif.id().to_long();
self.rule_ids.elif_id = ppelif.id().to_long();
self.rule_ids.else_id = ppelse.id().to_long();
self.rule_ids.endif_id = ppendif.id().to_long();
self.rule_ids.line_id = ppline.id().to_long();
self.rule_ids.line_file_id = line_file.id().to_long();
self.rule_ids.error_id = pperror.id().to_long();
self.rule_ids.warning_id = ppwarning.id().to_long();
self.rule_ids.null_id = ppnull.id().to_long();
self.rule_ids.pragma_id = pppragma.id().to_long();
self.rule_ids.illformed_id = illformed.id().to_long();
self.rule_ids.ppspace_id = ppspace.id().to_long();
// recognizes preprocessor directives only
cpp_line
= no_node_d[*ppspace]
>> (
( ppnull
| include_file
| system_include_file
| macro_include_file
| plain_define
| undefine
| ppifdef
| ppifndef
| ppif
| ppelse
| ppelif
| ppendif
| ppline
| pperror
| ppwarning
| pppragma
| illformed
)
>> no_node_d
[
*ppspace
>> !ch_p(T_CPPCOMMENT)
>> ch_p(T_NEWLINE)
[
store_pos_t(self.pos_of_newline)
]
]
)
;
// #include ...
include_file // include "..."
= ch_p(T_PP_QHEADER)
;
system_include_file // include <...>
= ch_p(T_PP_HHEADER)
;
macro_include_file // include ...anything else...
= no_node_d[ch_p(T_PP_INCLUDE)]
>> *( anychar_p - (ch_p(T_NEWLINE) | ch_p(T_CPPCOMMENT)) )
;
// #define FOO foo (with optional parameters)
plain_define
= no_node_d[ch_p(T_PP_DEFINE) >> +ppspace]
>> ch_p(T_IDENTIFIER)
>> !macro_parameters
>> !macro_definition
;
// parameter list
macro_parameters
= confix_p(
no_node_d[ch_p(T_LEFTPAREN) >> *ppspace],
list_p(
ch_p(T_IDENTIFIER),
no_node_d
[
*ppspace >> ch_p(T_COMMA) >> *ppspace
]
),
no_node_d[*ppspace >> ch_p(T_RIGHTPAREN)]
)
;
// macro body
macro_definition
= no_node_d[+ppspace]
>> *( anychar_p - (ch_p(T_NEWLINE) | ch_p(T_CPPCOMMENT)) )
;
// #undef FOO
undefine
= no_node_d[ch_p(T_PP_UNDEF) >> +ppspace]
>> ch_p(T_IDENTIFIER)
;
// #ifdef et.al.
ppifdef
= no_node_d[ch_p(T_PP_IFDEF) >> +ppspace]
>> ch_p(T_IDENTIFIER)
;
ppifndef
= no_node_d[ch_p(T_PP_IFNDEF) >> +ppspace]
>> ch_p(T_IDENTIFIER)
;
ppif
= no_node_d[ch_p(T_PP_IF) >> +ppspace]
>> +( anychar_p - (ch_p(T_NEWLINE) | ch_p(T_CPPCOMMENT)) )
;
ppelse
= no_node_d[ch_p(T_ELSE)]
;
ppelif
= no_node_d[ch_p(T_PP_ELIF) >> +ppspace]
>> +( anychar_p - (ch_p(T_NEWLINE) | ch_p(T_CPPCOMMENT)) )
;
ppendif
= no_node_d[ch_p(T_PP_ENDIF)]
;
// #line ...
ppline
= no_node_d[ch_p(T_PP_LINE) >> +ppspace]
>> ( ch_p(T_DECIMALINT) >> !line_file
| *( anychar_p - (ch_p(T_NEWLINE) | ch_p(T_CPPCOMMENT)) )
)
;
line_file
= no_node_d[+ppspace] >> ch_p(T_STRINGLIT)
;
// # (empty preprocessor directive)
ppnull
= no_node_d[ch_p(T_POUND)] // real null directive
;
// # something else (ill formed preprocessor directive)
illformed // for error reporting
= (anychar_p - (ch_p(T_NEWLINE) | ch_p(T_CPPCOMMENT)))
>> no_node_d
[
*( anychar_p - (ch_p(T_NEWLINE) | ch_p(T_CPPCOMMENT)) )
]
;
// #error
pperror
= no_node_d[ch_p(T_PP_ERROR) >> +ppspace]
>> *( anychar_p - (ch_p(T_NEWLINE) | ch_p(T_CPPCOMMENT)) )
;
// #warning
ppwarning
= no_node_d[ch_p(T_PP_WARNING) >> +ppspace]
>> *( anychar_p - (ch_p(T_NEWLINE) | ch_p(T_CPPCOMMENT)) )
;
// #pragma ...
pppragma
= no_node_d[ch_p(T_PP_PRAGMA) >> +ppspace]
>> *( anychar_p - (ch_p(T_NEWLINE) | ch_p(T_CPPCOMMENT)) )
;
// auxiliary helper rules
ppspace // valid space in a line with a preprocessor directive
= ch_p(T_SPACE) | ch_p(T_CCOMMENT)
;
BOOST_SPIRIT_TRACE_RULE(cpp_line, TRACE_CPP_GRAMMAR);
BOOST_SPIRIT_TRACE_RULE(include_file, TRACE_CPP_GRAMMAR);
BOOST_SPIRIT_TRACE_RULE(system_include_file, TRACE_CPP_GRAMMAR);
BOOST_SPIRIT_TRACE_RULE(macro_include_file, TRACE_CPP_GRAMMAR);
BOOST_SPIRIT_TRACE_RULE(plain_define, TRACE_CPP_GRAMMAR);
BOOST_SPIRIT_TRACE_RULE(macro_definition, TRACE_CPP_GRAMMAR);
BOOST_SPIRIT_TRACE_RULE(macro_parameters, TRACE_CPP_GRAMMAR);
BOOST_SPIRIT_TRACE_RULE(undefine, TRACE_CPP_GRAMMAR);
BOOST_SPIRIT_TRACE_RULE(ppifdef, TRACE_CPP_GRAMMAR);
BOOST_SPIRIT_TRACE_RULE(ppifndef, TRACE_CPP_GRAMMAR);
BOOST_SPIRIT_TRACE_RULE(ppif, TRACE_CPP_GRAMMAR);
BOOST_SPIRIT_TRACE_RULE(ppelse, TRACE_CPP_GRAMMAR);
BOOST_SPIRIT_TRACE_RULE(ppelif, TRACE_CPP_GRAMMAR);
BOOST_SPIRIT_TRACE_RULE(ppendif, TRACE_CPP_GRAMMAR);
BOOST_SPIRIT_TRACE_RULE(ppline, TRACE_CPP_GRAMMAR);
BOOST_SPIRIT_TRACE_RULE(line_file, TRACE_CPP_GRAMMAR);
BOOST_SPIRIT_TRACE_RULE(pperror, TRACE_CPP_GRAMMAR);
BOOST_SPIRIT_TRACE_RULE(ppwarning, TRACE_CPP_GRAMMAR);
BOOST_SPIRIT_TRACE_RULE(ppnull, TRACE_CPP_GRAMMAR);
BOOST_SPIRIT_TRACE_RULE(illformed, TRACE_CPP_GRAMMAR);
BOOST_SPIRIT_TRACE_RULE(ppspace, TRACE_CPP_GRAMMAR);
}
// start rule of this grammar
rule_t const& start() const
{ return cpp_line; }
};
cpp_grammar_rule_ids &rule_ids;
PositionT &pos_of_newline;
#if !defined(_DUMP_PARSE_TREE)
cpp_grammar(cpp_grammar_rule_ids &rule_ids_, PositionT &pos_of_newline_)
: rule_ids(rule_ids_), pos_of_newline(pos_of_newline_)
{ BOOST_SPIRIT_TRACE_RULE_NAME(*this, "cpp_grammar", TRACE_CPP_GRAMMAR); }
#else
cpp_grammar(cpp_grammar_rule_ids &rule_ids_, PositionT &pos_of_newline_)
: map_rule_id_to_name(this), rule_ids(rule_ids_),
pos_of_newline(pos_of_newline_)
{
BOOST_SPIRIT_TRACE_RULE_NAME(*this, "cpp_grammar", TRACE_CPP_GRAMMAR);
}
// helper function and data to get readable names of the rules known to us
struct map_ruleid_to_name :
public map<parser_id, string, less<parser_id> >
{
typedef map<parser_id, string, less<parser_id> > base_t;
map_ruleid_to_name(cpp_grammar const &self)
{
static struct {
int parser_id;
char const *rule_name;
} init_ruleid_name_map[] = {
{ self.rule_ids.cpp_line_id, "cpp_line" },
{ self.rule_ids.include_file_id, "include_file" },
{ self.rule_ids.sysinclude_file_id, "system_include_file" },
{ self.rule_ids.macroinclude_file_id, "macro_include_file" },
{ self.rule_ids.plain_define_id, "plain_define" },
{ self.rule_ids.macro_parameters_id, "macro_parameters" },
{ self.rule_ids.macro_definition_id, "macro_definition" },
{ self.rule_ids.undefine_id, "undefine" },
{ self.rule_ids.ifdef_id, "ppifdef" },
{ self.rule_ids.ifndef_id, "ppifndef" },
{ self.rule_ids.if_id, "ppif" },
{ self.rule_ids.elif_id, "ppelif" },
{ self.rule_ids.else_id, "ppelse" },
{ self.rule_ids.endif_id, "ppendif" },
{ self.rule_ids.line_id, "ppline" },
{ self.rule_ids.line_file_id, "line_file" },
{ self.rule_ids.error_id, "pperror" },
{ self.rule_ids.warning_id, "ppwarning" },
{ self.rule_ids.null_id, "ppnull" },
{ self.rule_ids.pragma_id, "pppragma" },
{ self.rule_ids.illformed_id, "illformed" },
{ self.rule_ids.ppspace_id, "ppspace" },
{ 0 }
};
// initialize parser_id to rule_name map
for (int i = 0; 0 != init_ruleid_name_map[i].parser_id; ++i)
base_t::insert(base_t::value_type(
parser_id(init_ruleid_name_map[i].parser_id),
std::string(init_ruleid_name_map[i].rule_name))
);
}
};
map_ruleid_to_name map_rule_id_to_name;
#endif // defined(_DUMP_PARSE_TREE)
};
///////////////////////////////////////////////////////////////////////////////
#undef TRACE_CPP_GRAMMAR
///////////////////////////////////////////////////////////////////////////////
//
// The following parse function is defined here, to allow the separation of
// the compilation of the cpp_grammar from the function using it.
//
///////////////////////////////////////////////////////////////////////////////
#if defined(CPP_SEPARATE_GRAMMAR_INSTANTIATION)
#define CPP_GRAMMAR_GEN_INLINE
#else
#define CPP_GRAMMAR_GEN_INLINE inline
#endif
template <typename TokenT>
CPP_GRAMMAR_GEN_INLINE
boost::spirit::tree_parse_info<cpplexer::lex_iterator<TokenT> >
cpp_grammar_gen<TokenT>::parse_cpp_grammar (
cpplexer::lex_iterator<TokenT> const &first,
cpplexer::lex_iterator<TokenT> const &last)
{
static cpp_grammar<typename TokenT::position_t> g (rule_ids, pos_of_newline);
return boost::spirit::pt_parse (first, last, g);
}
#undef CPP_GRAMMAR_GEN_INLINE
///////////////////////////////////////////////////////////////////////////////
} // namespace cpp
#endif // !defined(_CPP_GRAMMAR_HPP__FEAEBC2E_2734_428B_A7CA_85E5A415E23E__INCLUDED_)

View File

@@ -0,0 +1,94 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#if !defined(_CPP_GRAMMAR_GEN_HPP__80CB8A59_5411_4E45_B406_62531A12FB99__INCLUDED_)
#define _CPP_GRAMMAR_GEN_HPP__80CB8A59_5411_4E45_B406_62531A12FB99__INCLUDED_
#include <boost/spirit/tree/parse_tree.hpp>
#include "cpplexer/cpp_lex_iterator.hpp"
///////////////////////////////////////////////////////////////////////////////
namespace cpp {
///////////////////////////////////////////////////////////////////////////////
//
// store parser_id's of all rules of the cpp_grammar here for later access
//
///////////////////////////////////////////////////////////////////////////////
struct cpp_grammar_rule_ids {
long cpp_line_id;
long include_file_id; // #include "..."
long sysinclude_file_id; // #include <...>
long macroinclude_file_id; // #include ...
long plain_define_id; // #define
long macro_parameters_id;
long macro_definition_id;
long undefine_id; // #undef
long ifdef_id; // #ifdef
long ifndef_id; // #ifndef
long if_id; // #if
long elif_id; // #elif
long else_id; // #else
long endif_id; // #endif
long line_id; // #line
long line_file_id;
long error_id; // #error
long warning_id; // #warning
long null_id; // #
long pragma_id; // #pragma
long illformed_id;
long ppspace_id;
};
///////////////////////////////////////////////////////////////////////////////
//
// cpp_grammar_gen template class
//
// This template helps separating the compilation of the cpp_grammar
// class from the compilation of the main pp_iterator. This is done to
// safe compilation time.
//
///////////////////////////////////////////////////////////////////////////////
template <typename TokenT>
struct cpp_grammar_gen
{
typedef cpplexer::lex_iterator<TokenT> iterator_t;
// the parser_id's of all rules of the cpp_grammar are stored here
// note: these are valid only after the first call to parse_cpp_grammar
static cpp_grammar_rule_ids rule_ids;
// the actual position of the last matched T_NEWLINE is stored here into the
// member 'pos_of_newline'
static typename TokenT::position_t pos_of_newline;
// parse the cpp_grammar and return the resulting parse tree
static boost::spirit::tree_parse_info<iterator_t>
parse_cpp_grammar (iterator_t const &first, iterator_t const &last);
};
///////////////////////////////////////////////////////////////////////////////
// definitions of the static members
template <typename TokenT>
cpp_grammar_rule_ids cpp_grammar_gen<TokenT>::rule_ids;
template <typename TokenT>
typename TokenT::position_t cpp_grammar_gen<TokenT>::pos_of_newline;
///////////////////////////////////////////////////////////////////////////////
} // namespace cpp
#endif // !defined(_CPP_GRAMMAR_GEN_HPP__80CB8A59_5411_4E45_B406_62531A12FB99__INCLUDED_)

View File

@@ -0,0 +1,143 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#if !defined(_CPP_IFBLOCK_HPP__D4676B36_00C5_41F4_BC9F_9CBBAE3B8006__INCLUDED_)
#define _CPP_IFBLOCK_HPP__D4676B36_00C5_41F4_BC9F_9CBBAE3B8006__INCLUDED_
#include <stack>
///////////////////////////////////////////////////////////////////////////////
namespace cpp {
///////////////////////////////////////////////////////////////////////////////
// the class if_blocks handles recursive conditional compilation contexts
class if_block
{
public:
if_block() :
status(true), some_part_status(true),
enclosing_status(true), is_in_else(false)
{
}
if_block(bool status_, bool enclosing_status_) :
status(status_),
some_part_status(status_),
enclosing_status(enclosing_status_),
is_in_else(false)
{
}
void set_status(bool status_)
{
status = status_;
if (status_)
some_part_status = true;
}
bool get_status() const { return status; }
bool get_some_part_status() const { return some_part_status; }
bool get_enclosing_status() const { return enclosing_status; }
bool get_in_else() const { return is_in_else; }
void set_in_else() { is_in_else = true; }
private:
bool status; // Current block is true
bool some_part_status; // One of the preceeding or current #if/#elif was true
bool enclosing_status; // Enclosing #if block is true
bool is_in_else; // Inside the #else part
};
///////////////////////////////////////////////////////////////////////////////
// stack of conditional compilation contexts
class if_block_stack :
private std::stack<if_block>
{
public:
void enter_if_block(bool new_status)
{
// If enclosing block is false, then this block is also false
bool enclosing_status = get_status();
this->push (value_type (new_status && enclosing_status, enclosing_status));
}
bool enter_elif_block(bool new_status)
{
if (!is_inside_ifpart())
return false; // #elif without matching #if
if (get_enclosing_status()) {
if (get_status()) {
// entered a (false) #elif block from a true block
this->top().set_status(false);
}
else if (new_status && !this->top().get_some_part_status()) {
// Entered true #elif block and no previous block was true
this->top().set_status(new_status);
}
}
return true;
}
bool enter_else_block()
{
if (!is_inside_ifpart())
return false; // #else without matching #if
if (get_enclosing_status()) {
if (!this->top().get_some_part_status()) {
// Entered (true) #else block and no previous block was true
this->top().set_status(true);
}
else if (get_status()) {
// Entered (false) else block from true block
this->top().set_status(false);
}
// Set else flag
this->top().set_in_else();
}
return true;
}
bool exit_if_block()
{
if (0 == this->size())
return false; // #endif without matching #if
this->pop();
return true;
}
// return, wether the top (innermost) condition is true or false
bool get_status() const
{
return 0 == this->size() || this->top().get_status();
}
protected:
bool get_enclosing_status() const
{
return 0 == this->size() || this->top().get_enclosing_status();
}
bool is_inside_ifpart() const
{
return 0 != this->size() && !this->top().get_in_else();
}
bool is_inside_elsepart() const
{
return 0 != this->size() && this->top().get_in_else();
}
};
///////////////////////////////////////////////////////////////////////////////
} // namespace cpp
#endif // !defined(_CPP_IFBLOCK_HPP__D4676B36_00C5_41F4_BC9F_9CBBAE3B8006__INCLUDED_)

View File

@@ -0,0 +1,184 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Definition of the preprocessor context
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#if !defined(_CPP_INCLUDE_PATHES_HPP__AF620DA4_B3D2_4221_AD91_8A1ABFFB6944__INCLUDED_)
#define _CPP_INCLUDE_PATHES_HPP__AF620DA4_B3D2_4221_AD91_8A1ABFFB6944__INCLUDED_
#include <string>
#include <list>
#include <boost/filesystem/path.hpp>
#include <boost/filesystem/operations.hpp>
///////////////////////////////////////////////////////////////////////////////
namespace cpp {
///////////////////////////////////////////////////////////////////////////////
//
// include_pathes - controlling the include path search order
//
// General notes:
//
// Any directories specified with the 'add_include_path()' function before
// the function 'set_sys_include_delimiter()' is called are searched only
// for the case of '#include "file"' directives, they are not searched for
// '#include <file>' directives. If additional directories are specified
// with the 'add_include_path()' function after a call to the function
// 'set_sys_include_delimiter()', these directories are searched for all
// '#include' directives.
//
// In addition, a call to the function 'set_sys_include_delimiter()'
// inhibits the use of the current directory as the first search directory
// for '#include "file"' directives. Therefor, the current directory is
// searched only if it is requested explicitly with a call to the function
// 'add_include_path(".")'.
//
// Callig both functions, the 'set_sys_include_delimiter()' and
// 'add_include_path(".")' allows you to control precisely which
// directories are searched before the current one and which are searched
// after.
//
///////////////////////////////////////////////////////////////////////////////
class include_pathes
{
typedef std::list<boost::filesystem::path> include_list_t;
public:
include_pathes()
: was_sys_include_path(false),
current_dir(boost::filesystem::initial_path())
{}
bool add_include_path(char const *path_, bool is_system = false)
{
return add_include_path(path_, (is_system || was_sys_include_path) ?
system_include_pathes : user_include_pathes);
}
void set_sys_include_delimiter() { was_sys_include_path = true; }
bool find_include_file (std::string &s, bool is_system) const;
void set_current_directory(char const *path_);
protected:
bool find_include_file (std::string &s, include_list_t const &pathes) const;
bool add_include_path(char const *path_, include_list_t &pathes_);
private:
include_list_t user_include_pathes;
include_list_t system_include_pathes;
bool was_sys_include_path; // saw a set_sys_include_delimiter()
boost::filesystem::path current_dir;
};
///////////////////////////////////////////////////////////////////////////////
// Add an include path to one of the search lists (user include path or system
// include path).
inline
bool include_pathes::add_include_path (
char const *path_, include_list_t &pathes_)
{
namespace fs = boost::filesystem;
if (path_) {
fs::path newpath = fs::complete(fs::path(path_, fs::native), current_dir);
if (!fs::is_directory(newpath))
return false;
pathes_.push_back (newpath);
return true;
}
return false;
}
///////////////////////////////////////////////////////////////////////////////
// Find an include file by traversing the list of include directories
inline
bool include_pathes::find_include_file (std::string &s,
include_list_t const &pathes) const
{
namespace fs = boost::filesystem;
typedef include_list_t::const_iterator const_include_list_iter_t;
const_include_list_iter_t include_pathes_end = pathes.end();
for (const_include_list_iter_t it = pathes.begin();
it != include_pathes_end; ++it)
{
fs::path currpath (*it);
currpath /= s; // append filename
if (fs::exists(currpath)) {
// found the required file
s = currpath.string();
return true;
}
}
return false;
}
///////////////////////////////////////////////////////////////////////////////
// Find an include file by searching the user and system includes in the
// correct sequence (as it was configured by the user of the C preprocessor
inline
bool include_pathes::find_include_file (std::string &s, bool is_system) const
{
namespace fs = boost::filesystem;
// if not system include (<...>), then search actual directory first
if (!is_system) {
if (!was_sys_include_path) { // set_sys_include_delimiter() not called
// first look in the current directory
fs::path currpath = current_dir;
currpath /= s;
if (fs::exists(currpath)) {
s = currpath.string(); // found in local directory
return true;
}
// iterate all user include file directories to find the file
return find_include_file(s, user_include_pathes);
}
// iterate all user include file directories to find the file
if (find_include_file(s, user_include_pathes))
return true;
// if nothing found, fall through
// ...
}
// iterate all system include file directories to find the file
return find_include_file (s, system_include_pathes);
}
///////////////////////////////////////////////////////////////////////////////
// Set current directory from a given file name
inline
void include_pathes::set_current_directory(char const *path_)
{
namespace fs = boost::filesystem;
fs::path filename(path_, fs::native);
if (fs::is_directory(filename))
current_dir = filename;
else
current_dir = filename.branch_path();
}
///////////////////////////////////////////////////////////////////////////////
} // namespace cpp
#endif // !defined(_CPP_INCLUDE_PATHES_HPP__AF620DA4_B3D2_4221_AD91_8A1ABFFB6944__INCLUDED_)

View File

@@ -0,0 +1,160 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Definition of the preprocessor context
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#if !defined(_CPP_ITERATION_CONTEXT_HPP__00312288_9DDB_4668_AFE5_25D3994FD095__INCLUDED_)
#define _CPP_ITERATION_CONTEXT_HPP__00312288_9DDB_4668_AFE5_25D3994FD095__INCLUDED_
#include <iterator>
///////////////////////////////////////////////////////////////////////////////
namespace cpp {
namespace iteration_context_policies {
///////////////////////////////////////////////////////////////////////////////
//
// The iteration_context_policies templates are policies for the
// cpp::iteration_context which allows to control, how a given input file
// is to be represented by a pair of iterators pointing to the begin and
// the end of the resulting input sequence.
//
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////
//
// load_file_to_string
//
// Loads a file into a string and returns the iterators pointing to
// the beginning and the end of the loaded string.
//
///////////////////////////////////////////////////////////////////////////
template <typename IteratorT>
struct load_file_to_string {
template <typename IterContextT>
class inner {
public:
template <typename TokenT>
static
void init_iterators(IterContextT &iter_ctx, TokenT const &act_tok)
{
std::ifstream instream(iter_ctx.filename.c_str());
if (!instream.is_open()) {
CPP_THROW(preprocess_exception, bad_include_file,
iter_ctx.filename, act_tok);
}
iter_ctx.instring = std::string(
std::istreambuf_iterator<char>(instream.rdbuf()),
std::istreambuf_iterator<char>());
iter_ctx.first = IteratorT(iter_ctx.instring.begin(),
iter_ctx.instring.end(), iter_ctx.filename);
iter_ctx.last = IteratorT();
}
private:
std::string instring;
};
};
///////////////////////////////////////////////////////////////////////////////
//
// load_file
//
// The load_file policy opens a given file and returns the wrapped
// istreambuf_iterators.
//
///////////////////////////////////////////////////////////////////////////////
template <typename IteratorT>
struct load_file {
template <typename IterContextT>
class inner {
public:
template <typename TokenT>
static
void init_iterators(IterContextT &iter_ctx, TokenT const &act_tok)
{
iter_ctx.instream(iter_ctx.filename.c_str());
if (!iter_ctx.instream.is_open()) {
CPP_THROW(preprocess_exception, bad_include_file,
iter_ctx.filename, act_tok);
}
using boost::spirit::make_multi_pass;
iter_ctx.first = IteratorT(
make_multi_pass(std::istreambuf_iterator<char>(
iter_ctx.instream.rdbuf())),
make_multi_pass(std::istreambuf_iterator<char>()));
iter_ctx.last = IteratorT();
}
private:
std::ifstream instream;
};
};
} // namespace context_policies
///////////////////////////////////////////////////////////////////////////////
//
template <typename IteratorT>
struct base_iteration_context {
public:
base_iteration_context(std::string const &fname)
: filename(fname)
{}
base_iteration_context(IteratorT const &first_, IteratorT const &last_,
std::string const &fname)
: first(first_), last(last_), filename(fname)
{}
// the actual input stream
IteratorT first; // actual input stream position
IteratorT last; // end of input stream
std::string filename; // actual processed file
};
///////////////////////////////////////////////////////////////////////////////
//
template <
typename IteratorT,
typename InputPolicyT =
iteration_context_policies::load_file_to_string<IteratorT>
>
struct iteration_context
: public base_iteration_context<IteratorT>,
public InputPolicyT::template
inner<iteration_context<IteratorT, InputPolicyT> >
{
typedef iteration_context<IteratorT, InputPolicyT> self_t;
typedef typename IteratorT::token_t token_t;
iteration_context(std::string const &fname, token_t const &act_tok)
: base_iteration_context<IteratorT>(fname)
{
InputPolicyT::template inner<self_t>::init_iterators(*this, act_tok);
}
};
///////////////////////////////////////////////////////////////////////////////
} // namespace cpp
#endif // !defined(_CPP_ITERATION_CONTEXT_HPP__00312288_9DDB_4668_AFE5_25D3994FD095__INCLUDED_)

View File

@@ -0,0 +1,442 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Definition of the preprocessor iterator
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#if !defined(_CPP_ITERATOR_HPP__175CA88F_7273_43FA_9039_BCF7459E1F29__INCLUDED_)
#define _CPP_ITERATOR_HPP__175CA88F_7273_43FA_9039_BCF7459E1F29__INCLUDED_
#include <string>
#include <boost/shared_ptr.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/filesystem/operations.hpp>
#include <boost/spirit/iterator/multi_pass.hpp>
#include <boost/spirit/tree/parse_tree_utils.hpp>
#include "cpplexer/cpp_lex_iterator.hpp"
#include "cpp/cpp_exceptions.hpp"
#include "cpp/cpp_grammar_gen.hpp"
#include "cpp/cpp_iteration_context.hpp"
///////////////////////////////////////////////////////////////////////////////
namespace cpp {
namespace impl {
///////////////////////////////////////////////////////////////////////////////
//
// pp_iterator_functor
//
///////////////////////////////////////////////////////////////////////////////
template <typename ContextT>
class pp_iterator_functor {
typedef typename ContextT::lex_t lex_t;
typedef typename lex_t::token_t result_type;
typedef typename result_type::string_t string_t;
typedef cpp::cpp_grammar_gen<result_type> cpp_grammar_t;
typedef base_iteration_context<lex_t> base_iteration_context_t;
typedef
iteration_context<lex_t, typename ContextT::input_policy_t>
iteration_context_t;
public:
template <typename IteratorT>
pp_iterator_functor(ContextT &ctx_, IteratorT const &first_,
IteratorT const &last_, std::string const &filename_)
: ctx(ctx_),
iter_ctx(new base_iteration_context_t(
lex_t(first_, last_, filename_), lex_t(), filename_)),
seen_newline(true)
{}
// get the next preprocessed token
result_type get();
// get the last recognized token (for error processing etc.)
result_type const &current_token() const { return act_token; }
protected:
result_type pp_token();
bool pp_directive();
void dispatch_directive(boost::spirit::tree_parse_info<lex_t> const &hit);
void on_include (result_type const &t, bool is_system);
private:
ContextT &ctx; // context, this iterator is assicciated with
boost::shared_ptr<base_iteration_context_t> iter_ctx;
result_type act_token; // current token
bool seen_newline; // needed for recognizing begin of line
};
///////////////////////////////////////////////////////////////////////////////
//
// get(): get the next preprocessed token
//
// throws a pp_exception, if appropriate
//
///////////////////////////////////////////////////////////////////////////////
template <typename ContextT>
inline typename pp_iterator_functor<ContextT>::result_type
pp_iterator_functor<ContextT>::get()
{
using namespace cpplexer;
// test for EOI, if there is a pending input context, pop it back and continue
// parsing with it
if (iter_ctx->first == iter_ctx->last &&
ctx.has_pending_iteration_context())
{
iter_ctx = ctx.pop_iteration_context();
seen_newline = true; // fake a newline to trigger pp_directive
}
// try to generate the next token
while (iter_ctx->first != iter_ctx->last) {
bool was_seen_newline = seen_newline;
act_token = *iter_ctx->first;
if (act_token == T_NEWLINE) { // a newline is to be returned asap
seen_newline = true;
++iter_ctx->first;
return act_token;
}
seen_newline = false;
if (was_seen_newline && pp_directive()) {
// a pp directive was found, return the corresponding eol only
seen_newline = true;
return result_type(T_NEWLINE,
typename result_type::string_t("\n"),
cpp_grammar_t::pos_of_newline);
}
else if (ctx.get_if_block_status()) {
// preprocess this token, eat up more, if appropriate, return the next
// preprocessed token
return pp_token();
}
else {
// compilation condition is false: if the current token is a newline,
// return it, otherwise discard the actual token and try the next one
if (act_token == T_NEWLINE) {
seen_newline = true;
return act_token;
}
continue;
}
}
return result_type(); // return eof token
}
///////////////////////////////////////////////////////////////////////////////
//
// ppdirective(): recognize and dispatch a preprocessor directive
//
///////////////////////////////////////////////////////////////////////////////
template <typename ContextT>
inline typename pp_iterator_functor<ContextT>::result_type
pp_iterator_functor<ContextT>::pp_token()
{
// TODO: take the next preprocessed token from the unput queue or
// call the lexer, preprocess the required number of tokens, put them
// into the unput queue and return the next preprocessed token
return *iter_ctx->first++;
}
///////////////////////////////////////////////////////////////////////////////
//
// pp_directive(): recognize a preprocessor directive
//
///////////////////////////////////////////////////////////////////////////////
template <typename ContextT>
inline bool
pp_iterator_functor<ContextT>::pp_directive()
{
boost::spirit::tree_parse_info<lex_t> hit =
cpp_grammar_t::parse_cpp_grammar(iter_ctx->first, iter_ctx->last);
if (hit.match) {
// position the iterator past the matched sequence to allow
// resyncronisation, if an error occurs
iter_ctx->first = hit.stop;
// found a valid pp directive, dispatch to the correct function to handle
// the found pp directive
dispatch_directive (hit);
return true;
}
return false;
}
///////////////////////////////////////////////////////////////////////////////
//
// dispatch_directive(): dispatch a recognized preprocessor directive
//
///////////////////////////////////////////////////////////////////////////////
template <typename ContextT>
inline void
pp_iterator_functor<ContextT>::dispatch_directive(
boost::spirit::tree_parse_info<lex_t> const &hit)
{
using namespace boost::spirit;
typedef node_val_data_factory<nil_t> node_factory_t;
typedef tree_match<lex_t, node_factory_t> parse_tree_match_t;
typedef typename parse_tree_match_t::container_t parse_tree_t;
typedef typename parse_tree_match_t::parse_node_t parse_node_t;
parse_tree_t::const_iterator begin = hit.trees.begin();
// decide, which preprocessor directive was found
parse_tree_t const &root = (*begin).children;
parse_node_t const &node = boost::spirit::get_first_leaf(*root.begin()).value;
long node_id = node.id().to_long();
if (node_id == cpp_grammar_t::rule_ids.include_file_id) {
// #include "..."
on_include (*node.begin(), false);
}
else if (node_id == cpp_grammar_t::rule_ids.sysinclude_file_id) {
// #include <...>
//return on_include (*node.begin(), true);
}
else if (node_id == cpp_grammar_t::rule_ids.macroinclude_file_id) {
// #include ...
}
else if (node_id == cpp_grammar_t::rule_ids.plain_define_id) {
// #define
}
else if (node_id == cpp_grammar_t::rule_ids.undefine_id) {
// #undef
}
else if (node_id == cpp_grammar_t::rule_ids.ifdef_id) {
// #ifdef
}
else if (node_id == cpp_grammar_t::rule_ids.ifndef_id) {
// #ifndef
}
else if (node_id == cpp_grammar_t::rule_ids.if_id) {
// #if
}
else if (node_id == cpp_grammar_t::rule_ids.elif_id) {
// #elif
}
else if (node_id == cpp_grammar_t::rule_ids.else_id) {
// #else
}
else if (node_id == cpp_grammar_t::rule_ids.endif_id) {
// #endif
}
else if (node_id == cpp_grammar_t::rule_ids.line_id) {
// #line
}
else if (node_id == cpp_grammar_t::rule_ids.error_id) {
// #error
}
else if (node_id == cpp_grammar_t::rule_ids.warning_id) {
// #warning
}
else if (node_id == cpp_grammar_t::rule_ids.null_id) {
// #
}
else if (node_id == cpp_grammar_t::rule_ids.pragma_id) {
// #pragma
}
else if (node_id == cpp_grammar_t::rule_ids.illformed_id) {
// #something else
}
}
///////////////////////////////////////////////////////////////////////////////
//
// on_include: handle #include <...> or #include "..." directives
//
///////////////////////////////////////////////////////////////////////////////
template <typename ContextT>
inline void
pp_iterator_functor<ContextT>::on_include (result_type const &t, bool is_system)
{
namespace fs = boost::filesystem;
// skip this include, if conditional compilation is off
if (!ctx.get_if_block_status())
return;
// strip quotes first, extract filename
string_t const &s = t.get_value();
string_t::size_type pos_end = s.find_last_of(is_system ? '>' : '\"');
if (string_t::size_type(string::npos) == pos_end) {
CPP_THROW(preprocess_exception, bad_include_statement, s, act_token);
}
string_t::size_type pos_begin =
s.find_last_of(is_system ? '<' : '\"', pos_end-1);
if (string_t::size_type(string::npos) == pos_begin) {
CPP_THROW(preprocess_exception, bad_include_statement, s, act_token);
}
string_t file_path(s.substr(pos_begin+1, pos_end-pos_begin-1));
// try to locate the given file, searching through the include path lists
if (!ctx.find_include_file (file_path, is_system)) {
CPP_THROW(preprocess_exception, bad_include_file, file_path, act_token);
}
fs::path native_path(file_path, fs::native);
std::ifstream inclstrm(native_path.native_file_string().c_str());
if (!fs::exists(native_path)) {
CPP_THROW(preprocess_exception, bad_include_file, file_path, act_token);
}
// the new include file determines the actual current directory
ctx.set_current_directory(file_path.c_str());
// preprocess the opened file
boost::shared_ptr<base_iteration_context_t> new_iter_ctx (
new iteration_context_t(native_path.native_file_string(), act_token));
// push the old iteration context onto the stack and continue with the new
ctx.push_iteration_context(iter_ctx);
iter_ctx = new_iter_ctx;
seen_newline = true; // fake a newline to trigger pp_directive
}
///////////////////////////////////////////////////////////////////////////////
//
// pp_iterator_functor_shim
//
///////////////////////////////////////////////////////////////////////////////
template <typename ContextT>
class pp_iterator_functor_shim
{
typedef typename ContextT::lex_t lex_t;
typedef pp_iterator_functor<ContextT> functor_t;
public:
template <typename IteratorT>
pp_iterator_functor_shim(ContextT &ctx, IteratorT const &first,
IteratorT const &last, std::string const &filename)
: functor_ptr(new functor_t(ctx, first, last, filename))
{}
// interface to the boost::spirit::multi_pass_policies::functor_input policy
typedef typename lex_t::token_t result_type;
// VC7.1 gives a linker error, if the following is defined static
/*static*/ result_type const eof;
result_type operator()()
{
BOOST_ASSERT(0 != functor_ptr.get());
return functor_ptr->get();
}
private:
boost::shared_ptr<functor_t> functor_ptr;
};
///////////////////////////////////////////////////////////////////////////////
// eof token
// VC7.1 gives a linker error, if the following is defined static
//template <typename LexT>
//typename pp_iterator_functor_shim<LexT>::result_type const
// pp_iterator_functor_shim<LexT>::eof;
///////////////////////////////////////////////////////////////////////////////
//
// Store a stream position inside a given file to maintain an #include point
// stack.
//
///////////////////////////////////////////////////////////////////////////////
//template <typename LexT>
//struct stream_position {
//
// stream_position()
// {}
// stream_position(stream_position const &rhs)
// : act(rhs.act), end(rhs.end), filename(rhs.filename)
// {}
// stream_position &operator= (stream_position const &rhs)
// {
// if (&rhs != this) {
// stream_position pos (rhs); // may throw
//
// this->swap(pos);
// }
// return *this;
// }
//
// void swap(stream_position &rhs)
// {
// std::swap(act, rhs.act);
// std::swap(end, rhs.end);
// std::swap(filename, rhs.filename);
// }
//
// LexT act;
// LexT end;
// std::string filename;
//};
///////////////////////////////////////////////////////////////////////////////
} // namespace impl
///////////////////////////////////////////////////////////////////////////////
//
// pp_iterator
//
// The cpp::pp_iterator template is the iterator, through which
// the resulting preprocessed input stream is accessible.
//
///////////////////////////////////////////////////////////////////////////////
template <typename ContextT>
class pp_iterator
: public boost::spirit::multi_pass<
impl::pp_iterator_functor_shim<ContextT>,
boost::spirit::multi_pass_policies::functor_input
>
{
typedef impl::pp_iterator_functor_shim<ContextT> input_policy_t;
typedef
boost::spirit::multi_pass<input_policy_t,
boost::spirit::multi_pass_policies::functor_input>
base_t;
typedef pp_iterator<ContextT> self_t;
public:
pp_iterator()
{}
template <typename IteratorT>
pp_iterator(ContextT &ctx, IteratorT const &first, IteratorT const &last,
std::string const &filename)
: base_t(input_policy_t(ctx, first, last, filename))
{}
};
///////////////////////////////////////////////////////////////////////////////
} // namespace cpp
#endif // !defined(_CPP_ITERATOR_HPP__175CA88F_7273_43FA_9039_BCF7459E1F29__INCLUDED_)

View File

@@ -0,0 +1,88 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Global application configuration
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#if !defined(_CPP_CONFIG_HPP__F143F90A_A63F_4B27_AC41_9CA4F14F538D__INCLUDED_)
///////////////////////////////////////////////////////////////////////////////
// global configuration information
#include <boost/config.hpp>
///////////////////////////////////////////////////////////////////////////////
// undefine the following, to enable some MS specific language extensions
//#define SUPPORT_MS_EXTENSIONS
///////////////////////////////////////////////////////////////////////////////
// decide, which C++ lexer to use (choose one!)
#define USE_SLEX_CPP_LEXER // use the SLex based C++ lexer
//#define USE_RE2C_CPP_LEXER // use the Re2C based C++ lexer
///////////////////////////////////////////////////////////////////////////////
// undefine the following, if you need debug output, the
// BOOST_SPIRIT_DEBUG_FLAGS constants below help to fine control the amount of
// the generated debug output
//#define BOOST_SPIRIT_DEBUG
///////////////////////////////////////////////////////////////////////////////
// debug rules, subrules and grammars only, for possible flags see
// spirit/debug.hpp
#define BOOST_SPIRIT_DEBUG_FLAGS \
BOOST_SPIRIT_DEBUG_FLAGS_NODES
/**/
///////////////////////////////////////////////////////////////////////////////
// debug flags for CPP library, possible flags:
#define BOOST_SPIRIT_DEBUG_FLAGS_CPP_GRAMMAR 0x0001
#define BOOST_SPIRIT_DEBUG_FLAGS_TIME_CONVERSION 0x0002
#define BOOST_SPIRIT_DEBUG_FLAGS_CPP \
BOOST_SPIRIT_DEBUG_FLAGS_CPP_GRAMMAR \
/**/
///////////////////////////////////////////////////////////////////////////////
// Decide, whether to use the separate compilation model for the instantiation
// of the C++ lexer objects.
//
// If this is defined, you should explicitly instantiate the C++ lexer
// template with the correct parameters in a separate compilation unit of
// your program (see the files instantiate_slex_lexer.cpp and
// instantiate_re2c_lexer.cpp).
//
// To use the lexer inclusion model, undefine the following
//
#define CPP_SEPARATE_LEXER_INSTANTIATION
///////////////////////////////////////////////////////////////////////////////
// Decide, whether to use the separate compilation model for the instantiation
// of the grammar objects.
//
// If this is defined, you should explicitly instantiate the grammar
// templates with the correct parameters in a separate compilation unit of
// your program (see the files instantiate_cpp_grammar.cpp).
//
// To use the grammar inclusion model, undefine the following
//
#define CPP_SEPARATE_GRAMMAR_INSTANTIATION
///////////////////////////////////////////////////////////////////////////////
// You shouldn't have to change anything below
#if defined(BOOST_MSVC) && !defined(__COMO__)
#pragma warning (disable: 4355) // 'this' used in base member initializer list
#pragma inline_depth(255)
#pragma inline_recursion(on)
#endif // defined(_BOOST_MSVC)
#endif // !defined(_CPP_CONFIG_HPP__F143F90A_A63F_4B27_AC41_9CA4F14F538D__INCLUDED_)

View File

@@ -0,0 +1,22 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#if !defined(_CPP_VERSION_HPP__CE4FE67F_63F9_468D_8364_C855F89D3C5D__INCLUDED_)
#define _CPP_VERSION_HPP__CE4FE67F_63F9_468D_8364_C855F89D3C5D__INCLUDED_
#define VERSION_MAJOR 0
#define VERSION_MINOR 5
#define VERSION_SUBMINOR 0
#endif // !defined(_CPP_VERSION_HPP__CE4FE67F_63F9_468D_8364_C855F89D3C5D__INCLUDED_)

View File

@@ -0,0 +1,49 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Definition of the abstact lexer interface
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#if !defined(_CPP_LEX_INTERFACE_HPP__E83F52A4_90AC_4FBE_A9A7_B65F7F94C497__INCLUDED_)
#define _CPP_LEX_INTERFACE_HPP__E83F52A4_90AC_4FBE_A9A7_B65F7F94C497__INCLUDED_
///////////////////////////////////////////////////////////////////////////////
namespace cpplexer {
///////////////////////////////////////////////////////////////////////////////
//
// The lex_input_interface decouples the lex_iterator_shim from the actual
// lexer. This is done to allow compile time reduction.
// Thanks to JCAB for having this idea.
//
///////////////////////////////////////////////////////////////////////////////
template <typename TokenT>
struct lex_input_interface
{
virtual TokenT get() = 0;
//virtual typename TokenT::position_t get_position() = 0;
// The NewLexer function allows the opaque generation of a new lexer object.
// It is coupled to the token type to allow to distinguish different
// lexer/token configurations at compile time.
static lex_input_interface *
new_lexer(typename TokenT::iterator_t const &first,
typename TokenT::iterator_t const &last, std::string const &fname)
{ return TokenT::lexer_gen_t::new_lexer (first, last, fname); }
};
///////////////////////////////////////////////////////////////////////////////
} // namespace cpplexer
#endif // !defined(_CPP_LEX_INTERFACE_HPP__E83F52A4_90AC_4FBE_A9A7_B65F7F94C497__INCLUDED_)

View File

@@ -0,0 +1,132 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Definition of the lexer iterator
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#if !defined(_CPP_LEX_ITERATOR_HPP__AF0C37E3_CBD8_4F33_A225_51CF576FA61F__INCLUDED_)
#define _CPP_LEX_ITERATOR_HPP__AF0C37E3_CBD8_4F33_A225_51CF576FA61F__INCLUDED_
#include <string>
#include <iostream>
#include <boost/shared_ptr.hpp>
#include <boost/spirit/iterator/multi_pass.hpp>
#include "cpplexer/cpp_lex_interface.hpp"
///////////////////////////////////////////////////////////////////////////////
namespace cpplexer {
namespace impl {
///////////////////////////////////////////////////////////////////////////////
//
// lex_iterator_functor_shim
//
///////////////////////////////////////////////////////////////////////////////
template <typename TokenT>
class lex_iterator_functor_shim
{
public:
template <typename IteratorT>
lex_iterator_functor_shim(IteratorT const &first, IteratorT const &last,
std::string const &fname)
: functor_ptr(lex_input_interface<TokenT>::new_lexer(first, last, fname))
{}
// interface to the boost::spirit::multi_pass_policies::functor_input policy
typedef TokenT result_type;
// VC7.1 gives a linker error, if the following is defined static
/*static*/ result_type const eof;
result_type operator()()
{
BOOST_ASSERT(0 != functor_ptr.get());
return functor_ptr->get();
}
//typename TokenT::position_t get_position()
//{
// BOOST_ASSERT(0 != functor_ptr.get());
// return functor_ptr->get_position();
//}
private:
boost::shared_ptr<lex_input_interface<TokenT> > functor_ptr;
};
///////////////////////////////////////////////////////////////////////////////
// eof token
// VC7.1 gives a linker error, if the following is defined static
//template <typename LexT>
//typename lex_iterator_functor_shim<LexT>::result_type const
// lex_iterator_functor_shim<LexT>::eof;
///////////////////////////////////////////////////////////////////////////////
} // namespace impl
///////////////////////////////////////////////////////////////////////////////
//
// lex_iterator
//
// A generic C++ lexer interface class, which allows to plug in different
// lexer implementations (template parameter LexT). The following
// requirement apply:
//
// - the lexer type should have a function implemented, which returnes
// the next lexed token from the input stream:
// typename LexT::token_t get();
// - at the end of the input stream this function should return the
// eof token equivalent
// - the lexer should implement a constructor taking two iterators
// pointing to the beginning and the end of the input stream and
// a third parameter containing the name of the parsed input file
//
///////////////////////////////////////////////////////////////////////////////
template <typename TokenT>
class lex_iterator
: public boost::spirit::multi_pass<
impl::lex_iterator_functor_shim<TokenT>,
boost::spirit::multi_pass_policies::functor_input
>
{
typedef impl::lex_iterator_functor_shim<TokenT> input_policy_t;
typedef
boost::spirit::multi_pass<input_policy_t,
boost::spirit::multi_pass_policies::functor_input>
base_t;
typedef lex_iterator<TokenT> self_t;
public:
typedef typename input_policy_t::result_type token_t;
lex_iterator()
{}
template <typename IteratorT>
lex_iterator(IteratorT const &first, IteratorT const &last,
std::string const &fname)
: base_t(input_policy_t(first, last, fname))
{}
typename TokenT::position_t get_position()
{ return get_input().get_position(); }
};
///////////////////////////////////////////////////////////////////////////////
} // namespace cpplexer
#endif // !defined(_CPP_LEX_ITERATOR_HPP__AF0C37E3_CBD8_4F33_A225_51CF576FA61F__INCLUDED_)

View File

@@ -0,0 +1,407 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Copyright (c) 2001-2002 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#if !defined(_CPP_TOKEN_IDS_HPP__414E9A58_F079_4789_8AFF_513815CE475B__INCLUDED_)
#define _CPP_TOKEN_IDS_HPP__414E9A58_F079_4789_8AFF_513815CE475B__INCLUDED_
#include <string>
#include <boost/assert.hpp>
namespace cpplexer {
///////////////////////////////////////////////////////////////////////////////
// assemble tokenid's
#define TOKEN_FROM_ID(id, cat) ((id) | (cat))
#define ID_FROM_TOKEN(tok) ((tok) & ~::cpplexer::TokenTypeMask)
#define CATEGORY_FROM_TOKEN(tok) ((tok) & ::cpplexer::TokenTypeMask)
#define IS_CATEGORY(tok, cat) \
((CATEGORY_FROM_TOKEN(tok) == (cat)) ? true : false) \
/**/
///////////////////////////////////////////////////////////////////////////////
// the token_category helps to classify the different tokentypes
enum token_category {
IdentifierTokenType = 0x10000000,
KeywordTokenType = 0x20000000,
OperatorTokenType = 0x30000000,
LiteralTokenType = 0x40000000,
IntegerLiteralTokenType = 0x41000000,
FloatingLiteralTokenType = 0x42000000,
StringLiteralTokenType = 0x43000000,
CharacterLiteralTokenType = 0x44000000,
BoolLiteralTokenType = 0x45000000,
PPTokenType = 0x50000000,
UnknownTokenType = 0xB0000000,
EOLTokenType = 0xC0000000,
EOFTokenType = 0xD0000000,
WhiteSpaceTokenType = 0xE0000000,
TokenTypeMask = 0xFF000000,
};
///////////////////////////////////////////////////////////////////////////////
// the token_id assigns unique numbers to the different C++ lexemes
enum token_id {
T_FIRST_TOKEN = 256,
T_AND = TOKEN_FROM_ID(T_FIRST_TOKEN, OperatorTokenType),
T_ANDAND = TOKEN_FROM_ID(257, OperatorTokenType),
T_ASSIGN = TOKEN_FROM_ID(258, OperatorTokenType),
T_ANDASSIGN = TOKEN_FROM_ID(259, OperatorTokenType),
T_OR = TOKEN_FROM_ID(260, OperatorTokenType),
T_ORASSIGN = TOKEN_FROM_ID(261, OperatorTokenType),
T_XOR = TOKEN_FROM_ID(262, OperatorTokenType),
T_XORASSIGN = TOKEN_FROM_ID(263, OperatorTokenType),
T_COMMA = TOKEN_FROM_ID(264, OperatorTokenType),
T_COLON = TOKEN_FROM_ID(265, OperatorTokenType),
T_DIVIDE = TOKEN_FROM_ID(266, OperatorTokenType),
T_DIVIDEASSIGN = TOKEN_FROM_ID(267, OperatorTokenType),
T_DOT = TOKEN_FROM_ID(268, OperatorTokenType),
T_DOTSTAR = TOKEN_FROM_ID(269, OperatorTokenType),
T_ELLIPSIS = TOKEN_FROM_ID(270, OperatorTokenType),
T_EQUAL = TOKEN_FROM_ID(271, OperatorTokenType),
T_GREATER = TOKEN_FROM_ID(272, OperatorTokenType),
T_GREATEREQUAL = TOKEN_FROM_ID(273, OperatorTokenType),
T_LEFTBRACE = TOKEN_FROM_ID(274, OperatorTokenType),
T_LESS = TOKEN_FROM_ID(275, OperatorTokenType),
T_LESSEQUAL = TOKEN_FROM_ID(276, OperatorTokenType),
T_LEFTPAREN = TOKEN_FROM_ID(277, OperatorTokenType),
T_LEFTBRACKET = TOKEN_FROM_ID(278, OperatorTokenType),
T_MINUS = TOKEN_FROM_ID(279, OperatorTokenType),
T_MINUSASSIGN = TOKEN_FROM_ID(280, OperatorTokenType),
T_MINUSMINUS = TOKEN_FROM_ID(281, OperatorTokenType),
T_PERCENT = TOKEN_FROM_ID(282, OperatorTokenType),
T_PERCENTASSIGN = TOKEN_FROM_ID(283, OperatorTokenType),
T_NOT = TOKEN_FROM_ID(284, OperatorTokenType),
T_NOTEQUAL = TOKEN_FROM_ID(285, OperatorTokenType),
T_OROR = TOKEN_FROM_ID(286, OperatorTokenType),
T_PLUS = TOKEN_FROM_ID(287, OperatorTokenType),
T_PLUSASSIGN = TOKEN_FROM_ID(288, OperatorTokenType),
T_PLUSPLUS = TOKEN_FROM_ID(289, OperatorTokenType),
T_ARROW = TOKEN_FROM_ID(290, OperatorTokenType),
T_ARROWSTAR = TOKEN_FROM_ID(291, OperatorTokenType),
T_QUESTION_MARK = TOKEN_FROM_ID(292, OperatorTokenType),
T_RIGHTBRACE = TOKEN_FROM_ID(293, OperatorTokenType),
T_RIGHTPAREN = TOKEN_FROM_ID(294, OperatorTokenType),
T_RIGHTBRACKET = TOKEN_FROM_ID(295, OperatorTokenType),
T_COLON_COLON = TOKEN_FROM_ID(296, OperatorTokenType),
T_SEMICOLON = TOKEN_FROM_ID(297, OperatorTokenType),
T_SHIFTLEFT = TOKEN_FROM_ID(298, OperatorTokenType),
T_SHIFTLEFTASSIGN = TOKEN_FROM_ID(299, OperatorTokenType),
T_SHIFTRIGHT = TOKEN_FROM_ID(300, OperatorTokenType),
T_SHIFTRIGHTASSIGN = TOKEN_FROM_ID(301, OperatorTokenType),
T_STAR = TOKEN_FROM_ID(302, OperatorTokenType),
T_COMPL = TOKEN_FROM_ID(303, OperatorTokenType),
T_STARASSIGN = TOKEN_FROM_ID(304, OperatorTokenType),
T_ASM = TOKEN_FROM_ID(305, KeywordTokenType),
T_AUTO = TOKEN_FROM_ID(306, KeywordTokenType),
T_BOOL = TOKEN_FROM_ID(307, KeywordTokenType),
T_FALSE = TOKEN_FROM_ID(308, BoolLiteralTokenType),
T_TRUE = TOKEN_FROM_ID(309, BoolLiteralTokenType),
T_BREAK = TOKEN_FROM_ID(310, KeywordTokenType),
T_CASE = TOKEN_FROM_ID(311, KeywordTokenType),
T_CATCH = TOKEN_FROM_ID(312, KeywordTokenType),
T_CHAR = TOKEN_FROM_ID(313, KeywordTokenType),
T_CLASS = TOKEN_FROM_ID(314, KeywordTokenType),
T_CONST = TOKEN_FROM_ID(315, KeywordTokenType),
T_CONSTCAST = TOKEN_FROM_ID(316, KeywordTokenType),
T_CONTINUE = TOKEN_FROM_ID(317, KeywordTokenType),
T_DEFAULT = TOKEN_FROM_ID(318, KeywordTokenType),
T_DEFINED = TOKEN_FROM_ID(319, KeywordTokenType),
T_DELETE = TOKEN_FROM_ID(320, KeywordTokenType),
T_DO = TOKEN_FROM_ID(321, KeywordTokenType),
T_DOUBLE = TOKEN_FROM_ID(322, KeywordTokenType),
T_DYNAMICCAST = TOKEN_FROM_ID(323, KeywordTokenType),
T_ELSE = TOKEN_FROM_ID(324, KeywordTokenType),
T_ENUM = TOKEN_FROM_ID(325, KeywordTokenType),
T_EXPLICIT = TOKEN_FROM_ID(326, KeywordTokenType),
T_EXPORT = TOKEN_FROM_ID(327, KeywordTokenType),
T_EXTERN = TOKEN_FROM_ID(328, KeywordTokenType),
T_FLOAT = TOKEN_FROM_ID(329, KeywordTokenType),
T_FOR = TOKEN_FROM_ID(330, KeywordTokenType),
T_FRIEND = TOKEN_FROM_ID(331, KeywordTokenType),
T_GOTO = TOKEN_FROM_ID(332, KeywordTokenType),
T_IF = TOKEN_FROM_ID(333, KeywordTokenType),
T_INLINE = TOKEN_FROM_ID(334, KeywordTokenType),
T_INT = TOKEN_FROM_ID(335, KeywordTokenType),
T_LONG = TOKEN_FROM_ID(336, KeywordTokenType),
T_MUTABLE = TOKEN_FROM_ID(337, KeywordTokenType),
T_NAMESPACE = TOKEN_FROM_ID(338, KeywordTokenType),
T_NEW = TOKEN_FROM_ID(339, KeywordTokenType),
T_OPERATOR = TOKEN_FROM_ID(340, KeywordTokenType),
T_PRIVATE = TOKEN_FROM_ID(341, KeywordTokenType),
T_PROTECTED = TOKEN_FROM_ID(342, KeywordTokenType),
T_PUBLIC = TOKEN_FROM_ID(343, KeywordTokenType),
T_REGISTER = TOKEN_FROM_ID(344, KeywordTokenType),
T_REINTERPRETCAST = TOKEN_FROM_ID(345, KeywordTokenType),
T_RETURN = TOKEN_FROM_ID(346, KeywordTokenType),
T_SHORT = TOKEN_FROM_ID(347, KeywordTokenType),
T_SIGNED = TOKEN_FROM_ID(348, KeywordTokenType),
T_SIZEOF = TOKEN_FROM_ID(349, KeywordTokenType),
T_STATIC = TOKEN_FROM_ID(350, KeywordTokenType),
T_STATICCAST = TOKEN_FROM_ID(351, KeywordTokenType),
T_STRUCT = TOKEN_FROM_ID(352, KeywordTokenType),
T_SWITCH = TOKEN_FROM_ID(353, KeywordTokenType),
T_TEMPLATE = TOKEN_FROM_ID(354, KeywordTokenType),
T_THIS = TOKEN_FROM_ID(355, KeywordTokenType),
T_THROW = TOKEN_FROM_ID(356, KeywordTokenType),
T_TRY = TOKEN_FROM_ID(357, KeywordTokenType),
T_TYPEDEF = TOKEN_FROM_ID(358, KeywordTokenType),
T_TYPEID = TOKEN_FROM_ID(359, KeywordTokenType),
T_TYPENAME = TOKEN_FROM_ID(360, KeywordTokenType),
T_UNION = TOKEN_FROM_ID(361, KeywordTokenType),
T_UNSIGNED = TOKEN_FROM_ID(362, KeywordTokenType),
T_USING = TOKEN_FROM_ID(363, KeywordTokenType),
T_VIRTUAL = TOKEN_FROM_ID(364, KeywordTokenType),
T_VOID = TOKEN_FROM_ID(365, KeywordTokenType),
T_VOLATILE = TOKEN_FROM_ID(366, KeywordTokenType),
T_WCHART = TOKEN_FROM_ID(367, KeywordTokenType),
T_WHILE = TOKEN_FROM_ID(368, KeywordTokenType),
T_PP_DEFINE = TOKEN_FROM_ID(369, PPTokenType),
T_PP_IF = TOKEN_FROM_ID(370, PPTokenType),
T_PP_IFDEF = TOKEN_FROM_ID(371, PPTokenType),
T_PP_IFNDEF = TOKEN_FROM_ID(372, PPTokenType),
T_PP_ELIF = TOKEN_FROM_ID(373, PPTokenType),
T_PP_ENDIF = TOKEN_FROM_ID(374, PPTokenType),
T_PP_ERROR = TOKEN_FROM_ID(375, PPTokenType),
T_PP_LINE = TOKEN_FROM_ID(376, PPTokenType),
T_PP_PRAGMA = TOKEN_FROM_ID(377, PPTokenType),
T_PP_UNDEF = TOKEN_FROM_ID(378, PPTokenType),
T_PP_WARNING = TOKEN_FROM_ID(379, PPTokenType),
T_IDENTIFIER = TOKEN_FROM_ID(380, IdentifierTokenType),
T_OCTALINT = TOKEN_FROM_ID(381, IntegerLiteralTokenType),
T_DECIMALINT = TOKEN_FROM_ID(382, IntegerLiteralTokenType),
T_HEXAINT = TOKEN_FROM_ID(383, IntegerLiteralTokenType),
T_INTLIT = TOKEN_FROM_ID(384, IntegerLiteralTokenType),
T_FLOATLIT = TOKEN_FROM_ID(385, FloatingLiteralTokenType),
T_CCOMMENT = TOKEN_FROM_ID(386, WhiteSpaceTokenType),
T_CPPCOMMENT = TOKEN_FROM_ID(387, WhiteSpaceTokenType),
T_CHARLIT = TOKEN_FROM_ID(388, CharacterLiteralTokenType),
T_STRINGLIT = TOKEN_FROM_ID(389, StringLiteralTokenType),
T_CONTLINE = TOKEN_FROM_ID(390, EOLTokenType),
T_SPACE = TOKEN_FROM_ID(391, WhiteSpaceTokenType),
T_SPACE2 = TOKEN_FROM_ID(392, WhiteSpaceTokenType),
T_NEWLINE = TOKEN_FROM_ID(393, EOLTokenType),
T_POUND_POUND = TOKEN_FROM_ID(394, OperatorTokenType),
T_POUND = TOKEN_FROM_ID(395, OperatorTokenType),
T_ANY = TOKEN_FROM_ID(396, UnknownTokenType),
T_PP_INCLUDE = TOKEN_FROM_ID(397, PPTokenType),
T_PP_QHEADER = TOKEN_FROM_ID(398, PPTokenType),
T_PP_HHEADER = TOKEN_FROM_ID(399, PPTokenType),
T_EOF = TOKEN_FROM_ID(400, EOFTokenType), // end of input reached
#if defined(SUPPORT_MS_EXTENSIONS)
T_MSEXT_INT8 = TOKEN_FROM_ID(401, KeywordTokenType),
T_MSEXT_INT16 = TOKEN_FROM_ID(402, KeywordTokenType),
T_MSEXT_INT32 = TOKEN_FROM_ID(403, KeywordTokenType),
T_MSEXT_INT64 = TOKEN_FROM_ID(404, KeywordTokenType),
T_MSEXT_BASED = TOKEN_FROM_ID(405, KeywordTokenType),
T_MSEXT_DECLSPEC = TOKEN_FROM_ID(406, KeywordTokenType),
T_MSEXT_CDECL = TOKEN_FROM_ID(407, KeywordTokenType),
T_MSEXT_FASTCALL = TOKEN_FROM_ID(408, KeywordTokenType),
T_MSEXT_STDCALL = TOKEN_FROM_ID(409, KeywordTokenType),
T_MSEXT_TRY = TOKEN_FROM_ID(410, KeywordTokenType),
T_MSEXT_EXCEPT = TOKEN_FROM_ID(411, KeywordTokenType),
T_MSEXT_FINALLY = TOKEN_FROM_ID(412, KeywordTokenType),
T_MSEXT_LEAVE = TOKEN_FROM_ID(413, KeywordTokenType),
T_MSEXT_INLINE = TOKEN_FROM_ID(414, KeywordTokenType),
T_MSEXT_ASM = TOKEN_FROM_ID(415, KeywordTokenType),
#endif // defined(SUPPORT_MS_EXTENSIONS)
T_LAST_TOKEN_ID,
T_LAST_TOKEN = ID_FROM_TOKEN(T_LAST_TOKEN_ID),
};
///////////////////////////////////////////////////////////////////////////////
// return a token name
inline std::string
get_token_name(token_id tokid)
{
// Table of token names
//
// Please note that the sequence of token names must match the sequence of
// token id's defined in then enum token_id above.
static char const *tok_names[] = {
"AND",
"ANDAND",
"ASSIGN",
"ANDASSIGN",
"OR",
"ORASSIGN",
"XOR",
"XORASSIGN",
"COMMA",
"COLON",
"DIVIDE",
"DIVIDEASSIGN",
"DOT",
"DOTSTAR",
"ELLIPSIS",
"EQUAL",
"GREATER",
"GREATEREQUAL",
"LEFTBRACE",
"LESS",
"LESSEQUAL",
"LEFTPAREN",
"LEFTBRACKET",
"MINUS",
"MINUSASSIGN",
"MINUSMINUS",
"PERCENT",
"PERCENTASSIGN",
"NOT",
"NOTEQUAL",
"OROR",
"PLUS",
"PLUSASSIGN",
"PLUSPLUS",
"ARROW",
"ARROWSTAR",
"QUESTION_MARK",
"RIGHTBRACE",
"RIGHTPAREN",
"RIGHTBRACKET",
"COLON_COLON",
"SEMICOLON",
"SHIFTLEFT",
"SHIFTLEFTASSIGN",
"SHIFTRIGHT",
"SHIFTRIGHTASSIGN",
"STAR",
"COMPL",
"STARASSIGN",
"ASM",
"AUTO",
"BOOL",
"FALSE",
"TRUE",
"BREAK",
"CASE",
"CATCH",
"CHAR",
"CLASS",
"CONST",
"CONSTCAST",
"CONTINUE",
"DEFAULT",
"DEFINED",
"DELETE",
"DO",
"DOUBLE",
"DYNAMICCAST",
"ELSE",
"ENUM",
"EXPLICIT",
"EXPORT",
"EXTERN",
"FLOAT",
"FOR",
"FRIEND",
"GOTO",
"IF",
"INLINE",
"INT",
"LONG",
"MUTABLE",
"NAMESPACE",
"NEW",
"OPERATOR",
"PRIVATE",
"PROTECTED",
"PUBLIC",
"REGISTER",
"REINTERPRETCAST",
"RETURN",
"SHORT",
"SIGNED",
"SIZEOF",
"STATIC",
"STATICCAST",
"STRUCT",
"SWITCH",
"TEMPLATE",
"THIS",
"THROW",
"TRY",
"TYPEDEF",
"TYPEID",
"TYPENAME",
"UNION",
"UNSIGNED",
"USING",
"VIRTUAL",
"VOID",
"VOLATILE",
"WCHART",
"WHILE",
"PP_DEFINE",
"PP_IF",
"PP_IFDEF",
"PP_IFNDEF",
"PP_ELIF",
"PP_ENDIF",
"PP_ERROR",
"PP_LINE",
"PP_PRAGMA",
"PP_UNDEF",
"PP_WARNING",
"IDENTIFIER",
"OCTALINT",
"DECIMALINT",
"HEXAINT",
"INTLIT",
"FLOATLIT",
"CCOMMENT",
"CPPCOMMENT",
"CHARLIT",
"STRINGLIT",
"CONTLINE",
"SPACE",
"SPACE2",
"NEWLINE",
"POUND_POUND",
"POUND",
"ANY",
"PP_INCLUDE",
"PP_QHEADER",
"PP_HHEADER",
"EOF",
#if defined(SUPPORT_MS_EXTENSIONS)
"MSEXT_INT8",
"MSEXT_INT16",
"MSEXT_INT32",
"MSEXT_INT64",
"MSEXT_BASED",
"MSEXT_DECLSPEC",
"MSEXT_CDECL",
"MSEXT_FASTCALL",
"MSEXT_STDCALL",
"MSEXT_TRY",
"MSEXT_EXCEPT",
"MSEXT_FINALLY",
"MSEXT_LEAVE",
"MSEXT_INLINE",
"MSEXT_ASM",
#endif // defined(SUPPORT_MS_EXTENSIONS)
};
int id = ID_FROM_TOKEN(tokid)-T_FIRST_TOKEN;
BOOST_ASSERT(id >= 0 && id < T_LAST_TOKEN-T_FIRST_TOKEN);
return tok_names[id];
}
///////////////////////////////////////////////////////////////////////////////
} // namespace cpplexer
#endif // !defined(_CPP_TOKEN_IDS_HPP__414E9A58_F079_4789_8AFF_513815CE475B__INCLUDED_)

View File

@@ -0,0 +1,195 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#include "cpplexer/cpp_token_ids.hpp"
namespace cpplexer {
///////////////////////////////////////////////////////////////////////////////
//
// Table of token names
//
// Please note that the sequence of token names must match the sequence of
// token id's defined in the file tokens.h.
//
///////////////////////////////////////////////////////////////////////////////
char const *tok_names[] =
{
"AND",
"ANDAND",
"ASSIGN",
"ANDASSIGN",
"OR",
"ORASSIGN",
"XOR",
"XORASSIGN",
"COMMA",
"COLON",
"DIVIDE",
"DIVIDEASSIGN",
"DOT",
"DOTSTAR",
"ELLIPSIS",
"EQUAL",
"GREATER",
"GREATEREQUAL",
"LEFTBRACE",
"LESS",
"LESSEQUAL",
"LEFTPAREN",
"LEFTBRACKET",
"MINUS",
"MINUSASSIGN",
"MINUSMINUS",
"PERCENT",
"PERCENTASSIGN",
"NOT",
"NOTEQUAL",
"OROR",
"PLUS",
"PLUSASSIGN",
"PLUSPLUS",
"ARROW",
"ARROWSTAR",
"QUESTION_MARK",
"RIGHTBRACE",
"RIGHTPAREN",
"RIGHTBRACKET",
"COLON_COLON",
"SEMICOLON",
"SHIFTLEFT",
"SHIFTLEFTASSIGN",
"SHIFTRIGHT",
"SHIFTRIGHTASSIGN",
"STAR",
"COMPL",
"STARASSIGN",
"ASM",
"AUTO",
"BOOL",
"FALSE",
"TRUE",
"BREAK",
"CASE",
"CATCH",
"CHAR",
"CLASS",
"CONST",
"CONSTCAST",
"CONTINUE",
"DEFAULT",
"DEFINED",
"DELETE",
"DO",
"DOUBLE",
"DYNAMICCAST",
"ELSE",
"ENUM",
"EXPLICIT",
"EXPORT",
"EXTERN",
"FLOAT",
"FOR",
"FRIEND",
"GOTO",
"IF",
"INLINE",
"INT",
"LONG",
"MUTABLE",
"NAMESPACE",
"NEW",
"OPERATOR",
"PRIVATE",
"PROTECTED",
"PUBLIC",
"REGISTER",
"REINTERPRETCAST",
"RETURN",
"SHORT",
"SIGNED",
"SIZEOF",
"STATIC",
"STATICCAST",
"STRUCT",
"SWITCH",
"TEMPLATE",
"THIS",
"THROW",
"TRY",
"TYPEDEF",
"TYPEID",
"TYPENAME",
"UNION",
"UNSIGNED",
"USING",
"VIRTUAL",
"VOID",
"VOLATILE",
"WCHART",
"WHILE",
"PP_DEFINE",
"PP_IF",
"PP_IFDEF",
"PP_IFNDEF",
"PP_ELIF",
"PP_ENDIF",
"PP_ERROR",
"PP_LINE",
"PP_PRAGMA",
"PP_UNDEF",
"PP_WARNING",
"IDENTIFIER",
"OCTALINT",
"DECIMALINT",
"HEXAINT",
"INTLIT",
"FLOATLIT",
"CCOMMENT",
"CPPCOMMENT",
"CHARLIT",
"STRINGLIT",
"CONTLINE",
"SPACE",
"SPACE2",
"NEWLINE",
"POUND_POUND",
"POUND",
"ANY",
"PP_INCLUDE",
"PP_QHEADER",
"PP_HHEADER",
"EOF",
#if defined(SUPPORT_MS_EXTENSIONS)
"MSEXT_INT8",
"MSEXT_INT16",
"MSEXT_INT32",
"MSEXT_INT64",
"MSEXT_BASED",
"MSEXT_DECLSPEC",
"MSEXT_CDECL",
"MSEXT_FASTCALL",
"MSEXT_STDCALL",
"MSEXT_TRY",
"MSEXT_EXCEPT",
"MSEXT_FINALLY",
"MSEXT_LEAVE",
"MSEXT_INLINE",
"MSEXT_ASM",
#endif // defined(SUPPORT_MS_EXTENSIONS)
};
///////////////////////////////////////////////////////////////////////////////
} // namespace cpplexer

View File

@@ -0,0 +1,49 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Re2C based C++ lexer
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#if !defined(_RE2C_FUNCTOR_HPP__80E4E0B2_7C71_44FE_9F65_901BBCD5B840__INCLUDED_)
#define _RE2C_FUNCTOR_HPP__80E4E0B2_7C71_44FE_9F65_901BBCD5B840__INCLUDED_
#include "cpplexer/re2clex/cpp_re2c_lexer.hpp"
///////////////////////////////////////////////////////////////////////////////
namespace cpplexer {
///////////////////////////////////////////////////////////////////////////////
//
// re2c_functor class
//
// The re2c_functor template class is provided for simplicity reasons. It
// imports the unified re2clex::lex_functor lexer interface template under
// a new name into the cpplexer namespace.
//
// As an alternative the lex_functor is to be explicitely decorated with
// it's namespace (or imported by an using directive). This way it is
// possible by simply switching the used namespace to get different lexer
// implementations.
//
///////////////////////////////////////////////////////////////////////////////
template <typename IteratorT>
struct re2c_functor
: public re2clex::lex_functor<IteratorT>
{
};
///////////////////////////////////////////////////////////////////////////////
} // namespace cpplexer
#endif // !defined(_RE2C_FUNCTOR_HPP__80E4E0B2_7C71_44FE_9F65_901BBCD5B840__INCLUDED_)

View File

@@ -0,0 +1,215 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Copyright (c) 2001 Daniel C. Nuffer
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#include <memory.h>
#include <boost/assert.hpp>
#include "cpplexer/re2clex/aq.h"
///////////////////////////////////////////////////////////////////////////////
namespace cpplexer {
namespace re2clex {
int aq_grow(aq_queue q)
{
size_t new_size = q->max_size << 1;
aq_stdelement* new_queue = (aq_stdelement*)realloc(q->queue,
new_size * sizeof(aq_stdelement));
BOOST_ASSERT(q);
BOOST_ASSERT(q->max_size < 100000);
BOOST_ASSERT(q->size <= q->max_size);
#define ASSERT_SIZE BOOST_ASSERT( \
((q->tail + q->max_size + 1) - q->head) % q->max_size == \
q->size % q->max_size)
ASSERT_SIZE;
BOOST_ASSERT(q->head <= q->max_size);
BOOST_ASSERT(q->tail <= q->max_size);
if (!new_queue)
{
BOOST_ASSERT(0);
return 0;
}
q->queue = new_queue;
if (q->tail <= q->head) /* tail has wrapped around */
{
/* move the tail from the beginning to the end */
memcpy(q->queue + q->max_size, q->queue,
(q->tail + 1) * sizeof(aq_stdelement));
q->tail += q->max_size;
}
q->max_size = new_size;
BOOST_ASSERT(q->size <= q->max_size);
ASSERT_SIZE;
BOOST_ASSERT(q->head <= q->max_size);
BOOST_ASSERT(q->tail <= q->max_size);
return 1;
}
int aq_enqueue(aq_queue q, aq_stdelement e)
{
BOOST_ASSERT(q);
BOOST_ASSERT(q->size <= q->max_size);
ASSERT_SIZE;
BOOST_ASSERT(q->head <= q->max_size);
BOOST_ASSERT(q->tail <= q->max_size);
if (AQ_FULL(q))
if (!aq_grow(q))
return 0;
++q->tail;
if (q->tail == q->max_size)
q->tail = 0;
q->queue[q->tail] = e;
++q->size;
BOOST_ASSERT(q->size <= q->max_size);
ASSERT_SIZE;
BOOST_ASSERT(q->head <= q->max_size);
BOOST_ASSERT(q->tail <= q->max_size);
return 1;
}
int aq_enqueue_front(aq_queue q, aq_stdelement e)
{
BOOST_ASSERT(q);
BOOST_ASSERT(q->size <= q->max_size);
ASSERT_SIZE;
BOOST_ASSERT(q->head <= q->max_size);
BOOST_ASSERT(q->tail <= q->max_size);
if (AQ_FULL(q))
if (!aq_grow(q))
return 0;
if (q->head == 0)
q->head = q->max_size - 1;
else
--q->head;
q->queue[q->head] = e;
++q->size;
BOOST_ASSERT(q->size <= q->max_size);
ASSERT_SIZE;
BOOST_ASSERT(q->head <= q->max_size);
BOOST_ASSERT(q->tail <= q->max_size);
return 1;
}
int aq_serve(aq_queue q, aq_stdelement *e)
{
BOOST_ASSERT(q);
BOOST_ASSERT(q->size <= q->max_size);
ASSERT_SIZE;
BOOST_ASSERT(q->head <= q->max_size);
BOOST_ASSERT(q->tail <= q->max_size);
if (AQ_EMPTY(q))
return 0;
*e = q->queue[q->head];
return aq_pop(q);
}
int aq_pop(aq_queue q)
{
BOOST_ASSERT(q);
BOOST_ASSERT(q->size <= q->max_size);
ASSERT_SIZE;
BOOST_ASSERT(q->head <= q->max_size);
BOOST_ASSERT(q->tail <= q->max_size);
if (AQ_EMPTY(q))
return 0;
++q->head;
if (q->head == q->max_size)
q->head = 0;
--q->size;
BOOST_ASSERT(q->size <= q->max_size);
ASSERT_SIZE;
BOOST_ASSERT(q->head <= q->max_size);
BOOST_ASSERT(q->tail <= q->max_size);
return 1;
}
aq_queue aq_create(void)
{
aq_queue q;
q = (aq_queue)malloc(sizeof(aq_queuetype));
if (!q)
{
return 0;
}
q->max_size = 8; /* initial size */
q->queue = (aq_stdelement*)malloc(
sizeof(aq_stdelement) * q->max_size);
if (!q->queue)
{
free(q);
return 0;
}
q->head = 0;
q->tail = q->max_size - 1;
q->size = 0;
BOOST_ASSERT(q->size <= q->max_size);
ASSERT_SIZE;
BOOST_ASSERT(q->head <= q->max_size);
BOOST_ASSERT(q->tail <= q->max_size);
return q;
}
void aq_terminate(aq_queue q)
{
BOOST_ASSERT(q);
BOOST_ASSERT(q->size <= q->max_size);
ASSERT_SIZE;
BOOST_ASSERT(q->head <= q->max_size);
BOOST_ASSERT(q->tail <= q->max_size);
free(q->queue);
free(q);
}
///////////////////////////////////////////////////////////////////////////////
} // namespace re2clex
} // namespace cpplexer

View File

@@ -0,0 +1,52 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Copyright (c) 2001 Daniel C. Nuffer
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#ifndef AQ_H
#define AQ_H
#include <stdlib.h>
///////////////////////////////////////////////////////////////////////////////
namespace cpplexer {
namespace re2clex {
typedef unsigned int aq_stdelement;
typedef struct
{
size_t head;
size_t tail;
size_t size;
size_t max_size;
aq_stdelement* queue;
} aq_queuetype;
typedef aq_queuetype* aq_queue;
int aq_enqueue(aq_queue q, aq_stdelement e);
int aq_enqueue_front(aq_queue q, aq_stdelement e);
int aq_serve(aq_queue q, aq_stdelement *e);
int aq_pop(aq_queue q);
#define AQ_EMPTY(q) (q->size == 0)
#define AQ_FULL(q) (q->size == q->max_size)
aq_queue aq_create(void);
void aq_terminate(aq_queue q);
int aq_grow(aq_queue q);
///////////////////////////////////////////////////////////////////////////////
} // namespace re2clex
} // namespace cpplexer
#endif // AQ_H

View File

@@ -0,0 +1,638 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Copyright (c) 2001 Daniel C. Nuffer
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
TODO:
It also may be necessary to add $ to identifiers, for asm.
handle errors better.
have some easier way to parse strings instead of files (done)
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <boost/assert.hpp>
#if !defined(_MSC_VER)
#include <unistd.h>
#else
#include <memory.h>
#include <io.h>
#endif
#include "cpplexer/cpp_token_ids.hpp"
#include "cpplexer/re2clex/aq.h"
#include "cpplexer/re2clex/scanner.h"
#if defined(_MSC_VER)
#pragma warning (disable: 4101) // 'bla' : unreferenced local variable
#pragma warning (disable: 4102) // 'bla' : unreferenced label
#endif
#define BSIZE 196608
#define YYCTYPE uchar
#define YYCURSOR cursor
#define YYLIMIT s->lim
#define YYMARKER s->ptr
#define YYFILL(n) {cursor = fill(s, cursor);}
#define RET(i) {s->cur = cursor; return i;}
///////////////////////////////////////////////////////////////////////////////
namespace cpplexer {
namespace re2clex {
int get_one_char(Scanner *s)
{
if (s->fd != -1) {
uchar val;
if (read(s->fd, &val, sizeof(val)))
return val;
}
else if (0 != s->act) {
BOOST_ASSERT(s->first != 0 && s->last != 0);
BOOST_ASSERT(s->first <= s->act && s->act <= s->last);
if (s->act < s->last)
return *(s->act)++;
}
return -1;
}
int rewind_stream (Scanner *s, int cnt)
{
if (s->fd != -1) {
return lseek(s->fd, cnt, SEEK_CUR);
}
else if (0 != s->act) {
BOOST_ASSERT(s->first != 0 && s->last != 0);
s->act += cnt;
BOOST_ASSERT(s->first <= s->act && s->act <= s->last);
return s->act - s->first;
}
return 0;
}
unsigned int get_first_eol_offset(Scanner* s)
{
if (!AQ_EMPTY(s->eol_offsets))
{
return s->eol_offsets->queue[s->eol_offsets->head];
}
else
{
return -1;
}
}
void adjust_eol_offsets(Scanner* s, unsigned int adjustment)
{
aq_queue q;
size_t i;
if (!s->eol_offsets)
s->eol_offsets = aq_create();
q = s->eol_offsets;
if (AQ_EMPTY(q))
return;
i = q->head;
while (i != q->tail)
{
if (adjustment > q->queue[i])
q->queue[i] = 0;
else
q->queue[i] -= adjustment;
++i;
if (i == q->max_size)
i = 0;
}
if (adjustment > q->queue[i])
q->queue[i] = 0;
else
q->queue[i] -= adjustment;
}
uchar *fill(Scanner *s, uchar *cursor)
{
if(!s->eof)
{
uchar* p;
unsigned int cnt = s->tok - s->bot;
if(cnt)
{
memcpy(s->bot, s->tok, s->lim - s->tok);
s->tok = s->bot;
s->ptr -= cnt;
cursor -= cnt;
s->lim -= cnt;
adjust_eol_offsets(s, cnt);
}
if((s->top - s->lim) < BSIZE)
{
uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
if (buf == 0)
{
if (0 != s->error_proc)
(*s->error_proc)("Out of memory!");
else
printf("Out of memory!\n");
/* get the scanner to stop */
*cursor = 0;
return cursor;
}
memcpy(buf, s->tok, s->lim - s->tok);
s->tok = buf;
s->ptr = &buf[s->ptr - s->bot];
cursor = &buf[cursor - s->bot];
s->lim = &buf[s->lim - s->bot];
s->top = &s->lim[BSIZE];
free(s->bot);
s->bot = buf;
}
if (s->fd != -1) {
if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE)
{
s->eof = &s->lim[cnt]; *(s->eof)++ = '\0';
}
}
else if (s->act != 0) {
cnt = s->last - s->act;
if (cnt > BSIZE)
cnt = BSIZE;
memcpy(s->lim, s->act, cnt);
s->act += cnt;
if (cnt != BSIZE)
{
s->eof = &s->lim[cnt]; *(s->eof)++ = '\0';
}
}
/* backslash-newline erasing time */
/* first scan for backslash-newline and erase them */
for (p = s->lim; p < s->lim + cnt - 2; ++p)
{
if (*p == '\\')
{
if (*(p+1) == '\n')
{
memmove(p, p + 2, s->lim + cnt - p - 2);
cnt -= 2;
--p;
aq_enqueue(s->eol_offsets, p - s->bot);
}
else if (*(p+1) == '\r')
{
if (*(p+2) == '\n')
{
memmove(p, p + 3, s->lim + cnt - p - 3);
cnt -= 3;
--p;
}
else
{
memmove(p, p + 2, s->lim + cnt - p - 2);
cnt -= 2;
--p;
}
aq_enqueue(s->eol_offsets, p - s->bot);
}
}
}
/* check to see if what we just read ends in a backslash */
if (cnt >= 2)
{
uchar last = s->lim[cnt-1];
uchar last2 = s->lim[cnt-2];
/* check \ EOB */
if (last == '\\')
{
int next = get_one_char(s);
/* check for \ \n or \ \r or \ \r \n straddling the border */
if (next == '\n')
{
--cnt; /* chop the final \, we've already read the \n. */
aq_enqueue(s->eol_offsets, cnt + (s->lim - s->bot));
}
else if (next == '\r')
{
int next2 = get_one_char(s);
if (next2 == '\n')
{
--cnt; /* skip the backslash */
}
else
{
/* rewind one, and skip one char */
rewind_stream(s, -1);
--cnt;
}
aq_enqueue(s->eol_offsets, cnt + (s->lim - s->bot));
}
else if (next != -1) /* -1 means end of file */
{
/* next was something else, so rewind the stream */
lseek(s->fd, -1, SEEK_CUR);
}
}
/* check \ \r EOB */
else if (last == '\r' && last2 == '\\')
{
int next = get_one_char(s);
if (next == '\n')
{
cnt -= 2; /* skip the \ \r */
}
else
{
/* rewind one, and skip two chars */
rewind_stream(s, -1);
cnt -= 2;
}
aq_enqueue(s->eol_offsets, cnt + (s->lim - s->bot));
}
/* check \ \n EOB */
else if (last == '\n' && last2 == '\\')
{
cnt -= 2;
aq_enqueue(s->eol_offsets, cnt + (s->lim - s->bot));
}
}
s->lim += cnt;
if (s->eof) /* eof needs adjusting if we erased backslash-newlines */
{
s->eof = s->lim;
*(s->eof)++ = '\0';
}
}
return cursor;
}
int scan(Scanner *s){
uchar *cursor = s->tok = s->cur;
/*!re2c
any = [\t\v\f\r\n\040-\377];
OctalDigit = [0-7];
Digit = [0-9];
HexDigit = [a-fA-F0-9];
ExponentPart = [Ee] [+-]? Digit+;
FractionalConstant = (Digit* "." Digit+) | (Digit+ ".");
FloatingSuffix = [fF][lL]?|[lL][fF]?;
IntegerSuffix = [uU][lL]?|[lL][uU]?;
Backslash = [\\]|"??/";
EscapeSequence = Backslash ([abfnrtv?'"] | Backslash | "x" HexDigit+ | OctalDigit OctalDigit? OctalDigit?);
HexQuad = HexDigit HexDigit HexDigit HexDigit;
UniversalChar = Backslash ("u" HexQuad | "U" HexQuad HexQuad);
PPSpace = ([ \t]|("/*"((any\[*\n\r])*|"*"+(any\[*/\n\r])*|"\n"|"\r"|"\r\n")*"*/"))*;
Pound = "#" | "??=" | "%:";
Newline = "\r\n" | "\n" | "\r";
*/
/*!re2c
"/*" { goto ccomment; }
"//" { goto cppcomment; }
"asm" { RET(T_ASM); }
"auto" { RET(T_AUTO); }
"bool" { RET(T_BOOL); }
"break" { RET(T_BREAK); }
"case" { RET(T_CASE); }
"catch" { RET(T_CATCH); }
"char" { RET(T_CHAR); }
"class" { RET(T_CLASS); }
"const" { RET(T_CONST); }
"const_cast" { RET(T_CONSTCAST); }
"continue" { RET(T_CONTINUE); }
"default" { RET(T_DEFAULT); }
"delete" { RET(T_DELETE); }
"do" { RET(T_DO); }
"double" { RET(T_DOUBLE); }
"dynamic_cast" { RET(T_DYNAMICCAST); }
"else" { RET(T_ELSE); }
"enum" { RET(T_ENUM); }
"explicit" { RET(T_EXPLICIT); }
"export" { RET(T_EXPORT); }
"extern" { RET(T_EXTERN); }
"false" { RET(T_FALSE); }
"float" { RET(T_FLOAT); }
"for" { RET(T_FOR); }
"friend" { RET(T_FRIEND); }
"goto" { RET(T_GOTO); }
"if" { RET(T_IF); }
"inline" { RET(T_INLINE); }
"int" { RET(T_INT); }
"long" { RET(T_LONG); }
"mutable" { RET(T_MUTABLE); }
"namespace" { RET(T_NAMESPACE); }
"new" { RET(T_NEW); }
"operator" { RET(T_OPERATOR); }
"private" { RET(T_PRIVATE); }
"protected" { RET(T_PROTECTED); }
"public" { RET(T_PUBLIC); }
"register" { RET(T_REGISTER); }
"reinterpret_cast" { RET(T_REINTERPRETCAST); }
"return" { RET(T_RETURN); }
"short" { RET(T_SHORT); }
"signed" { RET(T_SIGNED); }
"sizeof" { RET(T_SIZEOF); }
"static" { RET(T_STATIC); }
"static_cast" { RET(T_STATICCAST); }
"struct" { RET(T_STRUCT); }
"switch" { RET(T_SWITCH); }
"template" { RET(T_TEMPLATE); }
"this" { RET(T_THIS); }
"throw" { RET(T_THROW); }
"true" { RET(T_TRUE); }
"try" { RET(T_TRY); }
"typedef" { RET(T_TYPEDEF); }
"typeid" { RET(T_TYPEID); }
"typename" { RET(T_TYPENAME); }
"union" { RET(T_UNION); }
"unsigned" { RET(T_UNSIGNED); }
"using" { RET(T_USING); }
"virtual" { RET(T_VIRTUAL); }
"void" { RET(T_VOID); }
"volatile" { RET(T_VOLATILE); }
"wchar_t" { RET(T_WCHART); }
"while" { RET(T_WHILE); }
"{" { RET(T_LEFTBRACE); }
"??<" { RET(T_LEFTBRACE); }
"<%" { RET(T_LEFTBRACE); }
"}" { RET(T_RIGHTBRACE); }
"??>" { RET(T_RIGHTBRACE); }
"%>" { RET(T_RIGHTBRACE); }
"[" { RET(T_LEFTBRACKET); }
"??(" { RET(T_LEFTBRACKET); }
"<:" { RET(T_LEFTBRACKET); }
"]" { RET(T_RIGHTBRACKET); }
"??)" { RET(T_RIGHTBRACKET); }
":>" { RET(T_RIGHTBRACKET); }
Pound { RET(T_POUND); }
"##" { RET(T_POUND_POUND); }
"#??=" { RET(T_POUND_POUND); }
"??=#" { RET(T_POUND_POUND); }
"??=??=" { RET(T_POUND_POUND); }
"%:%:" { RET(T_POUND_POUND); }
"(" { RET(T_LEFTPAREN); }
")" { RET(T_RIGHTPAREN); }
";" { RET(T_SEMICOLON); }
":" { RET(T_COLON); }
"..." { RET(T_ELLIPSIS); }
"?" { RET(T_QUESTION_MARK); }
"::" { RET(T_COLON_COLON); }
"." { RET(T_DOT); }
".*" { RET(T_DOTSTAR); }
"+" { RET(T_PLUS); }
"-" { RET(T_MINUS); }
"*" { RET(T_STAR); }
"/" { RET(T_DIVIDE); }
"%" { RET(T_PERCENT); }
"^" { RET(T_XOR); }
"??'" { RET(T_XOR); }
"xor" { RET(T_XOR); }
"&" { RET(T_AND); }
"bitand" { RET(T_AND); }
"|" { RET(T_OR); }
"bitor" { RET(T_OR); }
"??!" { RET(T_OR); }
"~" { RET(T_COMPL); }
"??-" { RET(T_COMPL); }
"compl" { RET(T_COMPL); }
"!" { RET(T_NOT); }
"not" { RET(T_NOT); }
"=" { RET(T_ASSIGN); }
"<" { RET(T_LESS); }
">" { RET(T_GREATER); }
"+=" { RET(T_PLUSASSIGN); }
"-=" { RET(T_MINUSASSIGN); }
"*=" { RET(T_STARASSIGN); }
"/=" { RET(T_DIVIDEASSIGN); }
"%=" { RET(T_PERCENTASSIGN); }
"^=" { RET(T_XORASSIGN); }
"xor_eq" { RET(T_XORASSIGN); }
"??'=" { RET(T_XORASSIGN); }
"&=" { RET(T_ANDASSIGN); }
"and_eq" { RET(T_ANDASSIGN); }
"|=" { RET(T_ORASSIGN); }
"or_eq" { RET(T_ORASSIGN); }
"??!=" { RET(T_ORASSIGN); }
"<<" { RET(T_SHIFTLEFT); }
">>" { RET(T_SHIFTRIGHT); }
">>=" { RET(T_SHIFTRIGHTASSIGN); }
"<<=" { RET(T_SHIFTLEFTASSIGN); }
"==" { RET(T_EQUAL); }
"!=" { RET(T_NOTEQUAL); }
"not_eq" { RET(T_NOTEQUAL); }
"<=" { RET(T_LESSEQUAL); }
">=" { RET(T_GREATEREQUAL); }
"&&" { RET(T_ANDAND); }
"and" { RET(T_ANDAND); }
"||" { RET(T_OROR); }
"??!|" { RET(T_OROR); }
"|??!" { RET(T_OROR); }
"or" { RET(T_OROR); }
"??!??!" { RET(T_OROR); }
"++" { RET(T_PLUSPLUS); }
"--" { RET(T_MINUSMINUS); }
"," { RET(T_COMMA); }
"->*" { RET(T_ARROWSTAR); }
"->" { RET(T_ARROW); }
[a-zA-Z_] ([a-zA-Z_0-9])*
{ RET(T_IDENTIFIER); }
(("0" [xX] HexDigit+) | ("0" OctalDigit*) | ([1-9] Digit*)) IntegerSuffix?
{ RET(T_INTLIT); }
((FractionalConstant ExponentPart?) | (Digit+ ExponentPart)) FloatingSuffix?
{ RET(T_FLOATLIT); }
"L"? (['] (EscapeSequence|any\[\n\r\\']|UniversalChar)+ ['])
{ RET(T_CHARLIT); }
"L"? (["] (EscapeSequence|any\[\n\r\\"]|UniversalChar)* ["])
{ RET(T_STRINGLIT); }
Pound PPSpace "include" PPSpace "<" (any\[\n\r>])+ ">"
{ RET(T_PP_HHEADER); }
Pound PPSpace "include" PPSpace "\"" (any\[\n\r"])+ "\""
{ RET(T_PP_QHEADER); }
Pound PPSpace "include" PPSpace
{ RET(T_PP_INCLUDE); }
Pound PPSpace "if" { RET(T_PP_IF); }
Pound PPSpace "ifdef" { RET(T_PP_IFDEF); }
Pound PPSpace "ifndef" { RET(T_PP_IFNDEF); }
Pound PPSpace "elif" { RET(T_PP_ELIF); }
Pound PPSpace "endif" { RET(T_PP_ENDIF); }
Pound PPSpace "define" { RET(T_PP_DEFINE); }
Pound PPSpace "undef" { RET(T_PP_UNDEF); }
Pound PPSpace "line" { RET(T_PP_LINE); }
Pound PPSpace "error" (any\[\n\r])* { RET(T_PP_ERROR); }
Pound PPSpace "pragma" { RET(T_PP_PRAGMA); }
[ \t\v\f]+
{ RET(T_SPACE); }
Newline
{
size_t diff, offset;
s->line++;
/* figure out how many backslash-newlines skipped over unknowingly. */
diff = cursor - s->bot;
offset = get_first_eol_offset(s);
while (offset <= diff && offset != (unsigned int)-1)
{
s->line++;
aq_pop(s->eol_offsets);
offset = get_first_eol_offset(s);
}
RET(T_NEWLINE);
}
"\000"
{
if(cursor != s->eof)
{
if (0 != s->error_proc)
(*s->error_proc)("'\\000' in input stream");
else
printf("Error: 0 in file\n");
}
RET(T_EOF);
}
any
{
/* handle this error
if (0 != s->error_proc)
(*s->error_proc)("Unexpected character: %c", *s->tok);
else
printf("unexpected character: %c\n", *s->tok);
*/
RET(T_SPACE);
}
*/
ccomment:
/*!re2c
"*/" { RET(T_CCOMMENT); }
Newline
{
size_t diff, offset;
/*if(cursor == s->eof) RET(T_EOF);*/
/*s->tok = cursor; */
s->line++;
/* figure out how many backslash-newlines skipped over unknowingly. */
diff = cursor - s->bot;
offset = get_first_eol_offset(s);
while (offset <= diff && offset != (unsigned int)-1)
{
s->line++;
aq_pop(s->eol_offsets);
offset = get_first_eol_offset(s);
}
goto ccomment;
}
any { goto ccomment; }
"\000"
{
if(cursor == s->eof)
{
if (s->error_proc)
(*s->error_proc)("Unterminated comment");
else
printf("Error: Unterminated comment\n");
}
else
{
if (s->error_proc)
(*s->error_proc)("'\\000' in input stream");
else
printf("Error: 0 in file");
}
RET(T_EOF);
}
*/
cppcomment:
/*!re2c
Newline
{
size_t diff, offset;
/*if(cursor == s->eof) RET(T_EOF); */
/*s->tok = cursor; */
s->line++;
/* figure out how many backslash-newlines skipped over unknowingly. */
diff = cursor - s->bot;
offset = get_first_eol_offset(s);
while (offset <= diff && offset != (unsigned int)-1)
{
s->line++;
aq_pop(s->eol_offsets);
offset = get_first_eol_offset(s);
}
RET(T_CPPCOMMENT);
}
any { goto cppcomment; }
"\000"
{
if(cursor != s->eof)
{
if (s->error_proc)
(*s->error_proc)("'\\000' in input stream");
else
printf("Error: 0 in file");
}
RET(T_EOF);
}
*/
} /* end of scan */
///////////////////////////////////////////////////////////////////////////////
} // namespace re2clex
} // namespace cpplexer

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,178 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Re2C based C++ lexer
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#if !defined(_CPP_RE2C_LEXER_HPP__B81A2629_D5B1_4944_A97D_60254182B9A8__INCLUDED_)
#define _CPP_RE2C_LEXER_HPP__B81A2629_D5B1_4944_A97D_60254182B9A8__INCLUDED_
#include <string>
#if defined(BOOST_SPIRIT_DEBUG)
#include <iostream>
#endif // defined(BOOST_SPIRIT_DEBUG)
#include <boost/spirit/core.hpp>
#include <boost/spirit/iterator/position_iterator.hpp>
#include "cpplexer/cpp_token_ids.hpp"
#include "cpplexer/cpp_lex_interface.hpp"
#include "cpplexer/re2clex/scanner.h"
#include "cpplexer/re2clex/cpp_re2c_token.hpp"
///////////////////////////////////////////////////////////////////////////////
namespace cpplexer {
namespace re2clex {
///////////////////////////////////////////////////////////////////////////////
//
// encapsulation of the re2c based cpp lexer
//
///////////////////////////////////////////////////////////////////////////////
template <typename IteratorT, typename PositionT = boost::spirit::file_position>
class lexer
{
public:
typedef char char_t;
typedef Scanner base_t;
typedef lex_token<IteratorT, PositionT> token_t;
lexer(IteratorT const &first, IteratorT const &last,
std::string const &fname);
~lexer();
lex_token<IteratorT, PositionT> scan();
private:
static char const *tok_names[];
Scanner scanner;
std::string filename;
};
///////////////////////////////////////////////////////////////////////////////
// initialize cpp lexer
template <typename IteratorT, typename PositionT>
inline
lexer<IteratorT, PositionT>::lexer(IteratorT const &first,
IteratorT const &last, std::string const &fname)
: filename(fname)
{
memset(&scanner, '\0', sizeof(Scanner));
scanner.fd = -1;
scanner.eol_offsets = aq_create();
scanner.first = scanner.act = (uchar *)&(*first);
scanner.last = (uchar *)&(*last);
}
template <typename IteratorT, typename PositionT>
inline
lexer<IteratorT, PositionT>::~lexer()
{
aq_terminate(scanner.eol_offsets);
free(scanner.bot);
}
///////////////////////////////////////////////////////////////////////////////
// get the next token from the input stream
template <typename IteratorT, typename PositionT>
inline lex_token<IteratorT, PositionT>
lexer<IteratorT, PositionT>::scan()
{
token_id id = token_id(re2clex::scan(&scanner));
return lex_token<IteratorT, PositionT>(id,
std::string((char const *)scanner.tok, scanner.cur - scanner.tok),
PositionT(filename, scanner.line)
);
}
///////////////////////////////////////////////////////////////////////////////
//
// lex_functor
//
///////////////////////////////////////////////////////////////////////////////
template <typename IteratorT, typename PositionT = boost::spirit::file_position>
class lex_functor
: public lex_input_interface<typename lexer<IteratorT, PositionT>::token_t>
{
public:
//typedef char char_t;
//typedef std::basic_string<char_t> string_t;
typedef typename lexer<IteratorT, PositionT>::token_t token_t;
lex_functor(IteratorT const &first, IteratorT const &last,
std::string const &fname)
: lexer(first, last, fname)
{}
// get the next token from the input stream
token_t get()
{ return lexer.scan(); }
private:
lexer<IteratorT, PositionT> lexer;
};
///////////////////////////////////////////////////////////////////////////////
//
// The new_lexer_gen<>::new_lexer function (declared in cpp_slex_token.hpp)
// should be defined inline, if the lex_functor shouldn't be instantiated
// separately from the lex_iterator.
//
// Separate (explicit) instantiation helps to reduce compilation time.
//
///////////////////////////////////////////////////////////////////////////////
#if defined(CPP_SEPARATE_LEXER_INSTANTIATION)
#define RE2C_NEW_LEXER_INLINE
#else
#define RE2C_NEW_LEXER_INLINE inline
#endif
///////////////////////////////////////////////////////////////////////////////
//
// The 'new_lexer' function allows the opaque generation of a new lexer object.
// It is coupled to the token type to allow to decouple the lexer/token
// configurations at compile time.
//
// This function is declared inside the cpp_slex_token.hpp file, which is
// referenced by the source file calling the lexer and the sourcefile, which
// instantiates the lex_functor. But is is defined here, so it will be
// instantiated only while compiling the sourcefile, which instantiates the
// lex_functor. While the cpp_re2c_token.hpp file may be included everywhere,
// this file (cpp_re2c_lexer.hpp) should be included only once. This allows
// to decouple the lexer interface from the lexer implementation and reduces
// compilation time.
//
///////////////////////////////////////////////////////////////////////////////
template <typename IteratorT, typename PositionT>
RE2C_NEW_LEXER_INLINE
lex_input_interface<lex_token<IteratorT, PositionT> > *
new_lexer_gen<IteratorT, PositionT>::new_lexer(IteratorT const &first,
IteratorT const &last, std::string const &fname)
{
return new lex_functor<IteratorT, PositionT>(first, last, fname);
}
#undef RE2C_NEW_LEXER_INLINE
///////////////////////////////////////////////////////////////////////////////
} // namespace re2clex
} // namespace cpplexer
#endif // !defined(_CPP_RE2C_LEXER_HPP__B81A2629_D5B1_4944_A97D_60254182B9A8__INCLUDED_)

View File

@@ -0,0 +1,118 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Re2C based C++ lexer token definition
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#if !defined(_CPP_RE2C_TOKEN_HPP__E35EB5F6_68BD_424A_B7EE_D36D344A2FE4__INCLUDED_)
#define _CPP_RE2C_TOKEN_HPP__E35EB5F6_68BD_424A_B7EE_D36D344A2FE4__INCLUDED_
#include <boost/spirit/iterator/position_iterator.hpp>
///////////////////////////////////////////////////////////////////////////////
namespace cpplexer {
namespace re2clex {
///////////////////////////////////////////////////////////////////////////////
//
// new_lexer_gen
//
///////////////////////////////////////////////////////////////////////////////
template <
typename IteratorT = char const *,
typename PositionT = boost::spirit::file_position
>
class lex_token;
template <
typename IteratorT = char const *,
typename PositionT = boost::spirit::file_position
>
struct new_lexer_gen
{
// The NewLexer function allows the opaque generation of a new lexer object.
// It is coupled to the token type to allow to decouple the lexer/token
// configurations at compile time.
static lex_input_interface<lex_token<IteratorT, PositionT> > *
new_lexer(IteratorT const &first, IteratorT const &last,
std::string const &fname);
};
///////////////////////////////////////////////////////////////////////////////
//
// lex_token
//
///////////////////////////////////////////////////////////////////////////////
template <typename IteratorT, typename PositionT>
class lex_token
{
public:
typedef IteratorT iterator_t;
typedef std::basic_string<char> string_t;
typedef PositionT position_t;
typedef new_lexer_gen<IteratorT, PositionT> lexer_gen_t;
lex_token()
: id(T_EOF)
{}
lex_token(token_id id_, string_t const &value_, PositionT const &pos_)
: id(id_), value(value_), pos(pos_)
{}
operator token_id() const { return id; }
string_t const &get_value() const { return value; }
position_t const &get_position() const { return pos; }
#if defined(BOOST_SPIRIT_DEBUG)
// debug support
void print (std::ostream &stream) const
{
stream << cpplexer::get_token_name(id) << "(";
for (size_t i = 0; i < value.size(); ++i) {
switch (value[i]) {
case '\r': stream << "\\r"; break;
case '\n': stream << "\\n"; break;
default:
stream << value[i];
break;
}
}
stream << ")";
}
#endif // defined(BOOST_SPIRIT_DEBUG)
private:
token_id id; // the token id
string_t value; // the text, which was parsed into this token
PositionT pos; // the original file position
};
#if defined(BOOST_SPIRIT_DEBUG)
template <typename IteratorT, typename PositionT>
inline std::ostream &
operator<< (std::ostream &stream, lex_token<IteratorT, PositionT> const &object)
{
object.print(stream);
return stream;
}
#endif // defined(BOOST_SPIRIT_DEBUG)
///////////////////////////////////////////////////////////////////////////////
} // namespace re2clex
} // namespace cpplexer
#endif // !defined(_CPP_RE2C_TOKEN_HPP__E35EB5F6_68BD_424A_B7EE_D36D344A2FE4__INCLUDED_)

View File

@@ -0,0 +1,56 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Copyright (c) 2001 Daniel C. Nuffer
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#ifndef SCANNER_H
#define SCANNER_H
#include "cpplexer/re2clex/aq.h"
///////////////////////////////////////////////////////////////////////////////
namespace cpplexer {
namespace re2clex {
typedef unsigned char uchar;
typedef int (* ReportErrorProc)(char *, ...);
typedef struct Scanner {
int fd; /* file descriptor */
uchar* first; /* start of input buffer (if fd == -1) */
uchar* act; /* act position of input buffer (if fd == -1) */
uchar* last; /* end (one past last char) of input buffer (if fd == -1) */
uchar* bot; /* beginning of the current buffer */
uchar* top; /* top of the current buffer */
uchar* eof; /* when we read in the last buffer, will point 1 past the
end of the file, otherwise 0 */
uchar* tok; /* points to the beginning of the current token */
uchar* ptr; /* used for YYMARKER - saves backtracking info */
uchar* cur; /* saves the cursor (maybe is redundant with tok?) */
uchar* lim; /* used for YYLIMIT - points to the end of the buffer */
/* (lim == top) except for the last buffer, it points to
the end of the input (lim == eof - 1) */
unsigned int line; /* current line being lexed */
ReportErrorProc error_proc; /* if != 0 this function is called to
report an error */
aq_queue eol_offsets;
} Scanner;
int scan(Scanner *s);
///////////////////////////////////////////////////////////////////////////////
} // namespace re2clex
} // namespace cpplexer
#endif // SCANNER_H

View File

@@ -0,0 +1,23 @@
check_PROGRAMS = test_lexer
test_lexer_SOURCES = \
aq.c \
aq.h \
cpp.re \
cpp.c \
scanner.h \
test_lexer.c \
../tokennames.cpp \
BUILT_SOURCES = cpp.c
$(srcdir)/cpp.c: $(srcdir)/cpp.re
re2c -s $< > $@
TESTS = run_tests.sh
EXTRA_DIST = \
lextest.in \
lextest.output \
test_lexer.in \
test_lexer.output

View File

@@ -0,0 +1,98 @@
/* this is a c comment */
/**/
/***/
/* now a multi-line c comment
this is the second line */
// a c++ comment
// now for all the identifiers
asm auto asm auto bool break case catch char class const const_cast
continue default delete do double dynamic_cast else enum explicit export
extern false float for friend goto if inline int long mutable namespace new
operator private protected public register reinterpret_cast return short
signed sizeof static static_cast struct switch template this throw true
try typedef typeid typename union unsigned using virtual void volatile
wchar_t while
// now here's the punctuation
{ ??< <% } ??> %> [
??( <: ] ??) :> #
??= %: ## #??= ??=# ??=??= %:%:
( ) ; : ...
? :: . .* +
- * / % ^
??' xor & bitand | bitor
??! ~ ??- compl ! not
= < > += -=
*= /= %= ^= xor_eq
??'= &= and_eq |= or_eq ??!=
<< >> >>= <<= == !=
// here's some identifiers
ident
ident1
ident1a
_ident
IdEnT_A
// hex ints
0x123a
0X123B
// octal ints
0
012375423
// decimal ints
1
1234999
// ints with suffixes
123l 123L 123u 123U
123ul 123uL 123Ul 123UL
123lu 123Lu 123lU 123LU
// floats
.123
1.1
1.
.123e345
1.1E+2
1.e-5
22e3
33E+4
43e-3
.123f 1.1F 1.l .123e345L
1.1E+2fl 1.e-5fL 22e3Fl 33E+4FL
43e-3lf .123Lf 1.1lF 1.LF
// chars
'a' '0' '"'
L'a' L'0' L'"'
'\a' '??/b' '\f' '??/n' '\r' '\t' '\v' '\?' '\'' '\"' '\\'
'\x1' '\xaBcD123' '\1' '\12' '\123'
'\u1234' '\UABCD1234'
// Strings
"" "a" "01234" "'"
L"" L"a" L"01234" L"'"
"\a??/b\f??/n\r\t\v\?\'\"\\"
"\x1\xaBcD123\1\12\123"
"\u1234\UABCD1234"
"/* a faked c comment */"
// pre-processor directives
#include <io.h>
# include"r.h"
#include SOME_HEADER
#if
#ifdef
#ifndef
#elif
#endif
#define
#undef
#line
#error
#pragma
/* another c comment */

View File

@@ -0,0 +1,594 @@
130 CCOMMENT Line: 1 >/* this is a c comment */<
137 NEWLINE Line: 2 >
<
130 CCOMMENT Line: 2 >/**/<
137 NEWLINE Line: 3 >
<
130 CCOMMENT Line: 3 >/***/<
137 NEWLINE Line: 4 >
<
130 CCOMMENT Line: 5 >/* now a multi-line c comment
this is the second line */<
137 NEWLINE Line: 6 >
<
131 CPPCOMMENT Line: 7 >// a c++ comment
<
131 CPPCOMMENT Line: 8 >// now for all the identifiers
<
49 ASM Line: 8 >asm<
135 SPACE Line: 8 > <
50 AUTO Line: 8 >auto<
135 SPACE Line: 8 > <
49 ASM Line: 8 >asm<
135 SPACE Line: 8 > <
50 AUTO Line: 8 >auto<
135 SPACE Line: 8 > <
51 BOOL Line: 8 >bool<
135 SPACE Line: 8 > <
54 BREAK Line: 8 >break<
135 SPACE Line: 8 > <
55 CASE Line: 8 >case<
135 SPACE Line: 8 > <
56 CATCH Line: 8 >catch<
135 SPACE Line: 8 > <
57 CHAR Line: 8 >char<
135 SPACE Line: 8 > <
58 CLASS Line: 8 >class<
135 SPACE Line: 8 > <
59 CONST Line: 8 >const<
135 SPACE Line: 8 > <
60 CONSTCAST Line: 8 >const_cast<
137 NEWLINE Line: 9 >
<
61 CONTINUE Line: 9 >continue<
135 SPACE Line: 9 > <
62 DEFAULT Line: 9 >default<
135 SPACE Line: 9 > <
64 DELETE Line: 9 >delete<
135 SPACE Line: 9 > <
65 DO Line: 9 >do<
135 SPACE Line: 9 > <
66 DOUBLE Line: 9 >double<
135 SPACE Line: 9 > <
67 DYNAMICCAST Line: 9 >dynamic_cast<
135 SPACE Line: 9 > <
68 ELSE Line: 9 >else<
135 SPACE Line: 9 > <
69 ENUM Line: 9 >enum<
135 SPACE Line: 9 > <
70 EXPLICIT Line: 9 >explicit<
135 SPACE Line: 9 > <
71 EXPORT Line: 9 >export<
137 NEWLINE Line: 10 >
<
72 EXTERN Line: 10 >extern<
135 SPACE Line: 10 > <
52 FALSE Line: 10 >false<
135 SPACE Line: 10 > <
73 FLOAT Line: 10 >float<
135 SPACE Line: 10 > <
74 FOR Line: 10 >for<
135 SPACE Line: 10 > <
75 FRIEND Line: 10 >friend<
135 SPACE Line: 10 > <
76 GOTO Line: 10 >goto<
135 SPACE Line: 10 > <
77 IF Line: 10 >if<
135 SPACE Line: 10 > <
78 INLINE Line: 10 >inline<
135 SPACE Line: 10 > <
79 INT Line: 10 >int<
135 SPACE Line: 10 > <
80 LONG Line: 10 >long<
135 SPACE Line: 10 > <
81 MUTABLE Line: 10 >mutable<
135 SPACE Line: 10 > <
82 NAMESPACE Line: 10 >namespace<
135 SPACE Line: 10 > <
83 NEW Line: 10 >new<
137 NEWLINE Line: 11 >
<
84 OPERATOR Line: 11 >operator<
135 SPACE Line: 11 > <
85 PRIVATE Line: 11 >private<
135 SPACE Line: 11 > <
86 PROTECTED Line: 11 >protected<
135 SPACE Line: 11 > <
87 PUBLIC Line: 11 >public<
135 SPACE Line: 11 > <
88 REGISTER Line: 11 >register<
135 SPACE Line: 11 > <
89 REINTERPRETCAST Line: 11 >reinterpret_cast<
135 SPACE Line: 11 > <
90 RETURN Line: 11 >return<
135 SPACE Line: 11 > <
91 SHORT Line: 11 >short<
137 NEWLINE Line: 12 >
<
92 SIGNED Line: 12 >signed<
135 SPACE Line: 12 > <
93 SIZEOF Line: 12 >sizeof<
135 SPACE Line: 12 > <
94 STATIC Line: 12 >static<
135 SPACE Line: 12 > <
95 STATICCAST Line: 12 >static_cast<
135 SPACE Line: 12 > <
96 STRUCT Line: 12 >struct<
135 SPACE Line: 12 > <
97 SWITCH Line: 12 >switch<
135 SPACE Line: 12 > <
98 TEMPLATE Line: 12 >template<
135 SPACE Line: 12 > <
99 THIS Line: 12 >this<
135 SPACE Line: 12 > <
100 THROW Line: 12 >throw<
135 SPACE Line: 12 > <
53 TRUE Line: 12 >true<
137 NEWLINE Line: 13 >
<
101 TRY Line: 13 >try<
135 SPACE Line: 13 > <
102 TYPEDEF Line: 13 >typedef<
135 SPACE Line: 13 > <
103 TYPEID Line: 13 >typeid<
135 SPACE Line: 13 > <
104 TYPENAME Line: 13 >typename<
135 SPACE Line: 13 > <
105 UNION Line: 13 >union<
135 SPACE Line: 13 > <
106 UNSIGNED Line: 13 >unsigned<
135 SPACE Line: 13 > <
107 USING Line: 13 >using<
135 SPACE Line: 13 > <
108 VIRTUAL Line: 13 >virtual<
135 SPACE Line: 13 > <
109 VOID Line: 13 >void<
135 SPACE Line: 13 > <
110 VOLATILE Line: 13 >volatile<
137 NEWLINE Line: 14 >
<
111 WCHART Line: 14 >wchar_t<
135 SPACE Line: 14 > <
112 WHILE Line: 14 >while<
137 NEWLINE Line: 15 >
<
137 NEWLINE Line: 16 >
<
131 CPPCOMMENT Line: 17 >// now here's the punctuation
<
18 LEFTBRACE Line: 17 >{<
135 SPACE Line: 17 > <
18 LEFTBRACE Line: 17 >??<<
135 SPACE Line: 17 > <
18 LEFTBRACE Line: 17 ><%<
135 SPACE Line: 17 > <
37 RIGHTBRACE Line: 17 >}<
135 SPACE Line: 17 > <
37 RIGHTBRACE Line: 17 >??><
135 SPACE Line: 17 > <
37 RIGHTBRACE Line: 17 >%><
135 SPACE Line: 17 > <
22 LEFTBRACKET Line: 17 >[<
135 SPACE Line: 17 > <
137 NEWLINE Line: 18 >
<
22 LEFTBRACKET Line: 18 >??(<
135 SPACE Line: 18 > <
22 LEFTBRACKET Line: 18 ><:<
135 SPACE Line: 18 > <
39 RIGHTBRACKET Line: 18 >]<
135 SPACE Line: 18 > <
39 RIGHTBRACKET Line: 18 >??)<
135 SPACE Line: 18 > <
39 RIGHTBRACKET Line: 18 >:><
135 SPACE Line: 18 > <
139 POUND Line: 18 >#<
137 NEWLINE Line: 19 >
<
139 POUND Line: 19 >??=<
135 SPACE Line: 19 > <
139 POUND Line: 19 >%:<
135 SPACE Line: 19 > <
138 POUND_POUND Line: 19 >##<
135 SPACE Line: 19 > <
138 POUND_POUND Line: 19 >#??=<
135 SPACE Line: 19 > <
138 POUND_POUND Line: 19 >??=#<
135 SPACE Line: 19 > <
138 POUND_POUND Line: 19 >??=??=<
135 SPACE Line: 19 > <
138 POUND_POUND Line: 19 >%:%:<
135 SPACE Line: 19 > <
137 NEWLINE Line: 20 >
<
21 LEFTPAREN Line: 20 >(<
135 SPACE Line: 20 > <
38 RIGHTPAREN Line: 20 >)<
135 SPACE Line: 20 > <
41 SEMICOLON Line: 20 >;<
135 SPACE Line: 20 > <
9 COLON Line: 20 >:<
135 SPACE Line: 20 > <
14 ELLIPSIS Line: 20 >...<
135 SPACE Line: 20 > <
137 NEWLINE Line: 21 >
<
36 QUESTION_MARK Line: 21 >?<
135 SPACE Line: 21 > <
40 COLON_COLON Line: 21 >::<
135 SPACE Line: 21 > <
12 DOT Line: 21 >.<
135 SPACE Line: 21 > <
13 DOTSTAR Line: 21 >.*<
135 SPACE Line: 21 > <
31 PLUS Line: 21 >+<
135 SPACE Line: 21 > <
137 NEWLINE Line: 22 >
<
23 MINUS Line: 22 >-<
135 SPACE Line: 22 > <
46 STAR Line: 22 >*<
135 SPACE Line: 22 > <
10 DIVIDE Line: 22 >/<
135 SPACE Line: 22 > <
26 PERCENT Line: 22 >%<
135 SPACE Line: 22 > <
6 XOR Line: 22 >^<
135 SPACE Line: 22 > <
137 NEWLINE Line: 23 >
<
6 XOR Line: 23 >??'<
135 SPACE Line: 23 > <
6 XOR Line: 23 >xor<
135 SPACE Line: 23 > <
0 AND Line: 23 >&<
135 SPACE Line: 23 > <
0 AND Line: 23 >bitand<
135 SPACE Line: 23 > <
4 OR Line: 23 >|<
135 SPACE Line: 23 > <
4 OR Line: 23 >bitor<
137 NEWLINE Line: 24 >
<
4 OR Line: 24 >??!<
135 SPACE Line: 24 > <
47 COMPL Line: 24 >~<
135 SPACE Line: 24 > <
47 COMPL Line: 24 >??-<
135 SPACE Line: 24 > <
47 COMPL Line: 24 >compl<
135 SPACE Line: 24 > <
28 NOT Line: 24 >!<
135 SPACE Line: 24 > <
28 NOT Line: 24 >not<
137 NEWLINE Line: 25 >
<
2 ASSIGN Line: 25 >=<
135 SPACE Line: 25 > <
19 LESS Line: 25 ><<
135 SPACE Line: 25 > <
16 GREATER Line: 25 >><
135 SPACE Line: 25 > <
32 PLUSASSIGN Line: 25 >+=<
135 SPACE Line: 25 > <
24 MINUSASSIGN Line: 25 >-=<
135 SPACE Line: 25 > <
137 NEWLINE Line: 26 >
<
48 STARASSIGN Line: 26 >*=<
135 SPACE Line: 26 > <
11 DIVIDEASSIGN Line: 26 >/=<
135 SPACE Line: 26 > <
27 PERCENTASSIGN Line: 26 >%=<
135 SPACE Line: 26 > <
7 XORASSIGN Line: 26 >^=<
135 SPACE Line: 26 > <
7 XORASSIGN Line: 26 >xor_eq<
137 NEWLINE Line: 27 >
<
7 XORASSIGN Line: 27 >??'=<
135 SPACE Line: 27 > <
3 ANDASSIGN Line: 27 >&=<
135 SPACE Line: 27 > <
3 ANDASSIGN Line: 27 >and_eq<
135 SPACE Line: 27 > <
5 ORASSIGN Line: 27 >|=<
135 SPACE Line: 27 > <
5 ORASSIGN Line: 27 >or_eq<
135 SPACE Line: 27 > <
5 ORASSIGN Line: 27 >??!=<
135 SPACE Line: 27 > <
137 NEWLINE Line: 28 >
<
42 SHIFTLEFT Line: 28 ><<<
135 SPACE Line: 28 > <
44 SHIFTRIGHT Line: 28 >>><
135 SPACE Line: 28 > <
45 SHIFTRIGHTASSIGN Line: 28 >>>=<
135 SPACE Line: 28 > <
43 SHIFTLEFTASSIGN Line: 28 ><<=<
135 SPACE Line: 28 > <
15 EQUAL Line: 28 >==<
135 SPACE Line: 28 > <
29 NOTEQUAL Line: 28 >!=<
137 NEWLINE Line: 29 >
<
137 NEWLINE Line: 30 >
<
131 CPPCOMMENT Line: 31 >// here's some identifiers
<
124 IDENTIFIER Line: 31 >ident<
137 NEWLINE Line: 32 >
<
124 IDENTIFIER Line: 32 >ident1<
137 NEWLINE Line: 33 >
<
124 IDENTIFIER Line: 33 >ident1a<
137 NEWLINE Line: 34 >
<
124 IDENTIFIER Line: 34 >_ident<
137 NEWLINE Line: 35 >
<
124 IDENTIFIER Line: 35 >IdEnT_A<
137 NEWLINE Line: 36 >
<
137 NEWLINE Line: 37 >
<
131 CPPCOMMENT Line: 38 >// hex ints
<
128 INTLIT Line: 38 >0x123a<
137 NEWLINE Line: 39 >
<
128 INTLIT Line: 39 >0X123B<
137 NEWLINE Line: 40 >
<
137 NEWLINE Line: 41 >
<
131 CPPCOMMENT Line: 42 >// octal ints
<
128 INTLIT Line: 42 >0<
137 NEWLINE Line: 43 >
<
128 INTLIT Line: 43 >012375423<
137 NEWLINE Line: 44 >
<
137 NEWLINE Line: 45 >
<
131 CPPCOMMENT Line: 46 >// decimal ints
<
128 INTLIT Line: 46 >1<
137 NEWLINE Line: 47 >
<
128 INTLIT Line: 47 >1234999<
137 NEWLINE Line: 48 >
<
137 NEWLINE Line: 49 >
<
131 CPPCOMMENT Line: 50 >// ints with suffixes
<
128 INTLIT Line: 50 >123l<
135 SPACE Line: 50 > <
128 INTLIT Line: 50 >123L<
135 SPACE Line: 50 > <
128 INTLIT Line: 50 >123u<
135 SPACE Line: 50 > <
128 INTLIT Line: 50 >123U<
137 NEWLINE Line: 51 >
<
128 INTLIT Line: 51 >123ul<
135 SPACE Line: 51 > <
128 INTLIT Line: 51 >123uL<
135 SPACE Line: 51 > <
128 INTLIT Line: 51 >123Ul<
135 SPACE Line: 51 > <
128 INTLIT Line: 51 >123UL<
137 NEWLINE Line: 52 >
<
128 INTLIT Line: 52 >123lu<
135 SPACE Line: 52 > <
128 INTLIT Line: 52 >123Lu<
135 SPACE Line: 52 > <
128 INTLIT Line: 52 >123lU<
135 SPACE Line: 52 > <
128 INTLIT Line: 52 >123LU<
137 NEWLINE Line: 53 >
<
137 NEWLINE Line: 54 >
<
131 CPPCOMMENT Line: 55 >// floats
<
129 FLOATLIT Line: 55 >.123<
137 NEWLINE Line: 56 >
<
129 FLOATLIT Line: 56 >1.1<
137 NEWLINE Line: 57 >
<
129 FLOATLIT Line: 57 >1.<
137 NEWLINE Line: 58 >
<
129 FLOATLIT Line: 58 >.123e345<
137 NEWLINE Line: 59 >
<
129 FLOATLIT Line: 59 >1.1E+2<
137 NEWLINE Line: 60 >
<
129 FLOATLIT Line: 60 >1.e-5<
137 NEWLINE Line: 61 >
<
129 FLOATLIT Line: 61 >22e3<
137 NEWLINE Line: 62 >
<
129 FLOATLIT Line: 62 >33E+4<
137 NEWLINE Line: 63 >
<
129 FLOATLIT Line: 63 >43e-3<
137 NEWLINE Line: 64 >
<
129 FLOATLIT Line: 64 >.123f<
135 SPACE Line: 64 > <
129 FLOATLIT Line: 64 >1.1F<
135 SPACE Line: 64 > <
129 FLOATLIT Line: 64 >1.l<
135 SPACE Line: 64 > <
129 FLOATLIT Line: 64 >.123e345L<
137 NEWLINE Line: 65 >
<
129 FLOATLIT Line: 65 >1.1E+2fl<
135 SPACE Line: 65 > <
129 FLOATLIT Line: 65 >1.e-5fL<
135 SPACE Line: 65 > <
129 FLOATLIT Line: 65 >22e3Fl<
135 SPACE Line: 65 > <
129 FLOATLIT Line: 65 >33E+4FL<
137 NEWLINE Line: 66 >
<
129 FLOATLIT Line: 66 >43e-3lf<
135 SPACE Line: 66 > <
129 FLOATLIT Line: 66 >.123Lf<
135 SPACE Line: 66 > <
129 FLOATLIT Line: 66 >1.1lF<
135 SPACE Line: 66 > <
129 FLOATLIT Line: 66 >1.LF<
137 NEWLINE Line: 67 >
<
137 NEWLINE Line: 68 >
<
131 CPPCOMMENT Line: 69 >// chars
<
132 CHARLIT Line: 69 >'a'<
135 SPACE Line: 69 > <
132 CHARLIT Line: 69 >'0'<
135 SPACE Line: 69 > <
132 CHARLIT Line: 69 >'"'<
137 NEWLINE Line: 70 >
<
132 CHARLIT Line: 70 >L'a'<
135 SPACE Line: 70 > <
132 CHARLIT Line: 70 >L'0'<
135 SPACE Line: 70 > <
132 CHARLIT Line: 70 >L'"'<
137 NEWLINE Line: 71 >
<
132 CHARLIT Line: 71 >'\a'<
135 SPACE Line: 71 > <
132 CHARLIT Line: 71 >'??/b'<
135 SPACE Line: 71 > <
132 CHARLIT Line: 71 >'\f'<
135 SPACE Line: 71 > <
132 CHARLIT Line: 71 >'??/n'<
135 SPACE Line: 71 > <
132 CHARLIT Line: 71 >'\r'<
135 SPACE Line: 71 > <
132 CHARLIT Line: 71 >'\t'<
135 SPACE Line: 71 > <
132 CHARLIT Line: 71 >'\v'<
135 SPACE Line: 71 > <
132 CHARLIT Line: 71 >'\?'<
135 SPACE Line: 71 > <
132 CHARLIT Line: 71 >'\''<
135 SPACE Line: 71 > <
132 CHARLIT Line: 71 >'\"'<
135 SPACE Line: 71 > <
132 CHARLIT Line: 71 >'\\'<
137 NEWLINE Line: 72 >
<
132 CHARLIT Line: 72 >'\x1'<
135 SPACE Line: 72 > <
132 CHARLIT Line: 72 >'\xaBcD123'<
135 SPACE Line: 72 > <
132 CHARLIT Line: 72 >'\1'<
135 SPACE Line: 72 > <
132 CHARLIT Line: 72 >'\12'<
135 SPACE Line: 72 > <
132 CHARLIT Line: 72 >'\123'<
137 NEWLINE Line: 73 >
<
132 CHARLIT Line: 73 >'\u1234'<
135 SPACE Line: 73 > <
132 CHARLIT Line: 73 >'\UABCD1234'<
137 NEWLINE Line: 74 >
<
137 NEWLINE Line: 75 >
<
131 CPPCOMMENT Line: 76 >// Strings
<
133 STRINGLIT Line: 76 >""<
135 SPACE Line: 76 > <
133 STRINGLIT Line: 76 >"a"<
135 SPACE Line: 76 > <
133 STRINGLIT Line: 76 >"01234"<
135 SPACE Line: 76 > <
133 STRINGLIT Line: 76 >"'"<
137 NEWLINE Line: 77 >
<
133 STRINGLIT Line: 77 >L""<
135 SPACE Line: 77 > <
133 STRINGLIT Line: 77 >L"a"<
135 SPACE Line: 77 > <
133 STRINGLIT Line: 77 >L"01234"<
135 SPACE Line: 77 > <
133 STRINGLIT Line: 77 >L"'"<
137 NEWLINE Line: 78 >
<
133 STRINGLIT Line: 78 >"\a??/b\f??/n\r\t\v\?\'\"\\"<
137 NEWLINE Line: 79 >
<
133 STRINGLIT Line: 79 >"\x1\xaBcD123\1\12\123"<
137 NEWLINE Line: 80 >
<
133 STRINGLIT Line: 80 >"\u1234\UABCD1234"<
137 NEWLINE Line: 81 >
<
133 STRINGLIT Line: 81 >"/* a faked c comment */"<
137 NEWLINE Line: 82 >
<
137 NEWLINE Line: 83 >
<
131 CPPCOMMENT Line: 84 >// pre-processor directives
<
143 PP_HHEADER Line: 84 >#include <io.h><
137 NEWLINE Line: 85 >
<
135 SPACE Line: 85 > <
142 PP_QHEADER Line: 85 ># include"r.h"<
137 NEWLINE Line: 86 >
<
141 PP_INCLUDE Line: 86 >#include <
124 IDENTIFIER Line: 86 >SOME_HEADER<
137 NEWLINE Line: 87 >
<
114 PP_IF Line: 87 >#if<
137 NEWLINE Line: 88 >
<
115 PP_IFDEF Line: 88 >#ifdef<
137 NEWLINE Line: 89 >
<
116 PP_IFNDEF Line: 89 >#ifndef<
137 NEWLINE Line: 90 >
<
117 PP_ELIF Line: 90 >#elif<
137 NEWLINE Line: 91 >
<
118 PP_ENDIF Line: 91 >#endif<
137 NEWLINE Line: 92 >
<
113 PP_DEFINE Line: 92 >#define<
137 NEWLINE Line: 93 >
<
122 PP_UNDEF Line: 93 >#undef<
137 NEWLINE Line: 94 >
<
120 PP_LINE Line: 94 >#line<
137 NEWLINE Line: 95 >
<
119 PP_ERROR Line: 95 >#error<
137 NEWLINE Line: 96 >
<
121 PP_PRAGMA Line: 96 >#pragma<
137 NEWLINE Line: 97 >
<
137 NEWLINE Line: 98 >
<
130 CCOMMENT Line: 98 >/* another c comment */<
137 NEWLINE Line: 99 >
<

View File

@@ -0,0 +1,4 @@
#!/bin/sh
SRCDIR=`dirname $0`
./test_lexer $SRCDIR/lextest.in | diff - $SRCDIR/lextest.output || exit 1
./test_lexer $SRCDIR/test_lexer.in | diff - $SRCDIR/test_lexer.output || exit 1

View File

@@ -0,0 +1,42 @@
#include <fcntl.h>
#if !defined(_MSC_VER)
#include <unistd.h>
#else
#include <memory.h>
#include <io.h>
#endif
#include <stdio.h>
#include "../tokens.h"
#include "../tokennames.cpp"
#include "scanner.h"
int main(int argc, char** argv) {
Scanner in;
int t;
memset((char*) &in, 0, sizeof(in));
in.fd = -1;
if (argc == 2)
{
in.fd = open(argv[1], O_RDONLY);
}
if (-1 == in.fd)
{
printf ("Usage: test_lexer <input>\n");
exit(1);
}
in.eol_offsets = aq_create();
while((t = scan(&in)) != T_EOF)
{
printf("%4d %16s Line: %5d >%.*s<\n", ID_FROM_TOKEN(t-T_FIRST_TOKEN),
tok_names[ID_FROM_TOKEN(t-T_FIRST_TOKEN)], in.line + 1,
in.cur - in.tok, in.tok);
}
aq_terminate(in.eol_offsets);
free(in.bot);
close(in.fd);
return 0;
}

View File

@@ -0,0 +1,42 @@
#include <fcntl.h>
#if !defined(_MSC_VER)
#include <unistd.h>
#else
#include <memory.h>
#include <io.h>
#endif
#include <stdio.h>
#include "../tokens.h"
#include "../tokennames.cpp"
#include "scanner.h"
int main(int argc, char** argv) {
Scanner in;
int t;
memset((char*) &in, 0, sizeof(in));
in.fd = -1;
if (argc == 2)
{
in.fd = open(argv[1], O_RDONLY);
}
if (-1 == in.fd)
{
printf ("Usage: test_lexer <input>\n");
exit(1);
}
in.eol_offsets = aq_create();
while((t = scan(&in)) != T_EOF)
{
printf("%4d %16s Line: %5d >%.*s<\n", ID_FROM_TOKEN(t-T_FIRST_TOKEN),
tok_names[ID_FROM_TOKEN(t-T_FIRST_TOKEN)], in.line + 1,
in.cur - in.tok, in.tok);
}
aq_terminate(in.eol_offsets);
free(in.bot);
close(in.fd);
return 0;
}

View File

@@ -0,0 +1,340 @@
143 PP_HHEADER Line: 1 >#include <fcntl.h><
137 NEWLINE Line: 2 >
<
114 PP_IF Line: 2 >#if<
135 SPACE Line: 2 > <
28 NOT Line: 2 >!<
124 IDENTIFIER Line: 2 >defined<
21 LEFTPAREN Line: 2 >(<
124 IDENTIFIER Line: 2 >_MSC_VER<
38 RIGHTPAREN Line: 2 >)<
137 NEWLINE Line: 3 >
<
143 PP_HHEADER Line: 3 >#include <unistd.h><
137 NEWLINE Line: 4 >
<
139 POUND Line: 4 >#<
68 ELSE Line: 4 >else<
137 NEWLINE Line: 5 >
<
143 PP_HHEADER Line: 5 >#include <memory.h><
137 NEWLINE Line: 6 >
<
143 PP_HHEADER Line: 6 >#include <io.h><
137 NEWLINE Line: 7 >
<
118 PP_ENDIF Line: 7 >#endif<
135 SPACE Line: 7 > <
137 NEWLINE Line: 8 >
<
143 PP_HHEADER Line: 8 >#include <stdio.h><
137 NEWLINE Line: 9 >
<
137 NEWLINE Line: 10 >
<
142 PP_QHEADER Line: 10 >#include "../tokens.h"<
137 NEWLINE Line: 11 >
<
142 PP_QHEADER Line: 11 >#include "../tokennames.cpp"<
137 NEWLINE Line: 12 >
<
137 NEWLINE Line: 13 >
<
142 PP_QHEADER Line: 13 >#include "scanner.h"<
137 NEWLINE Line: 14 >
<
137 NEWLINE Line: 15 >
<
79 INT Line: 15 >int<
135 SPACE Line: 15 > <
124 IDENTIFIER Line: 15 >main<
21 LEFTPAREN Line: 15 >(<
79 INT Line: 15 >int<
135 SPACE Line: 15 > <
124 IDENTIFIER Line: 15 >argc<
8 COMMA Line: 15 >,<
135 SPACE Line: 15 > <
57 CHAR Line: 15 >char<
46 STAR Line: 15 >*<
46 STAR Line: 15 >*<
135 SPACE Line: 15 > <
124 IDENTIFIER Line: 15 >argv<
38 RIGHTPAREN Line: 15 >)<
135 SPACE Line: 15 > <
18 LEFTBRACE Line: 15 >{<
137 NEWLINE Line: 16 >
<
135 SPACE Line: 16 > <
124 IDENTIFIER Line: 16 >Scanner<
135 SPACE Line: 16 > <
124 IDENTIFIER Line: 16 >in<
41 SEMICOLON Line: 16 >;<
137 NEWLINE Line: 17 >
<
135 SPACE Line: 17 > <
79 INT Line: 17 >int<
135 SPACE Line: 17 > <
124 IDENTIFIER Line: 17 >t<
41 SEMICOLON Line: 17 >;<
137 NEWLINE Line: 18 >
<
135 SPACE Line: 18 > <
124 IDENTIFIER Line: 18 >memset<
21 LEFTPAREN Line: 18 >(<
21 LEFTPAREN Line: 18 >(<
57 CHAR Line: 18 >char<
46 STAR Line: 18 >*<
38 RIGHTPAREN Line: 18 >)<
135 SPACE Line: 18 > <
0 AND Line: 18 >&<
124 IDENTIFIER Line: 18 >in<
8 COMMA Line: 18 >,<
135 SPACE Line: 18 > <
128 INTLIT Line: 18 >0<
8 COMMA Line: 18 >,<
135 SPACE Line: 18 > <
93 SIZEOF Line: 18 >sizeof<
21 LEFTPAREN Line: 18 >(<
124 IDENTIFIER Line: 18 >in<
38 RIGHTPAREN Line: 18 >)<
38 RIGHTPAREN Line: 18 >)<
41 SEMICOLON Line: 18 >;<
137 NEWLINE Line: 19 >
<
135 SPACE Line: 19 > <
124 IDENTIFIER Line: 19 >in<
12 DOT Line: 19 >.<
124 IDENTIFIER Line: 19 >fd<
135 SPACE Line: 19 > <
2 ASSIGN Line: 19 >=<
135 SPACE Line: 19 > <
23 MINUS Line: 19 >-<
128 INTLIT Line: 19 >1<
41 SEMICOLON Line: 19 >;<
137 NEWLINE Line: 20 >
<
135 SPACE Line: 20 > <
77 IF Line: 20 >if<
135 SPACE Line: 20 > <
21 LEFTPAREN Line: 20 >(<
124 IDENTIFIER Line: 20 >argc<
135 SPACE Line: 20 > <
15 EQUAL Line: 20 >==<
135 SPACE Line: 20 > <
128 INTLIT Line: 20 >2<
38 RIGHTPAREN Line: 20 >)<
137 NEWLINE Line: 21 >
<
135 SPACE Line: 21 > <
18 LEFTBRACE Line: 21 >{<
137 NEWLINE Line: 22 >
<
135 SPACE Line: 22 > <
124 IDENTIFIER Line: 22 >in<
12 DOT Line: 22 >.<
124 IDENTIFIER Line: 22 >fd<
135 SPACE Line: 22 > <
2 ASSIGN Line: 22 >=<
135 SPACE Line: 22 > <
124 IDENTIFIER Line: 22 >open<
21 LEFTPAREN Line: 22 >(<
124 IDENTIFIER Line: 22 >argv<
22 LEFTBRACKET Line: 22 >[<
128 INTLIT Line: 22 >1<
39 RIGHTBRACKET Line: 22 >]<
8 COMMA Line: 22 >,<
135 SPACE Line: 22 > <
124 IDENTIFIER Line: 22 >O_RDONLY<
38 RIGHTPAREN Line: 22 >)<
41 SEMICOLON Line: 22 >;<
137 NEWLINE Line: 23 >
<
135 SPACE Line: 23 > <
37 RIGHTBRACE Line: 23 >}<
137 NEWLINE Line: 24 >
<
135 SPACE Line: 24 > <
137 NEWLINE Line: 25 >
<
135 SPACE Line: 25 > <
77 IF Line: 25 >if<
135 SPACE Line: 25 > <
21 LEFTPAREN Line: 25 >(<
23 MINUS Line: 25 >-<
128 INTLIT Line: 25 >1<
135 SPACE Line: 25 > <
15 EQUAL Line: 25 >==<
135 SPACE Line: 25 > <
124 IDENTIFIER Line: 25 >in<
12 DOT Line: 25 >.<
124 IDENTIFIER Line: 25 >fd<
38 RIGHTPAREN Line: 25 >)<
137 NEWLINE Line: 26 >
<
135 SPACE Line: 26 > <
18 LEFTBRACE Line: 26 >{<
137 NEWLINE Line: 27 >
<
135 SPACE Line: 27 > <
124 IDENTIFIER Line: 27 >printf<
135 SPACE Line: 27 > <
21 LEFTPAREN Line: 27 >(<
133 STRINGLIT Line: 27 >"Usage: test_lexer <input>\n"<
38 RIGHTPAREN Line: 27 >)<
41 SEMICOLON Line: 27 >;<
137 NEWLINE Line: 28 >
<
135 SPACE Line: 28 > <
124 IDENTIFIER Line: 28 >exit<
21 LEFTPAREN Line: 28 >(<
128 INTLIT Line: 28 >1<
38 RIGHTPAREN Line: 28 >)<
41 SEMICOLON Line: 28 >;<
137 NEWLINE Line: 29 >
<
135 SPACE Line: 29 > <
37 RIGHTBRACE Line: 29 >}<
137 NEWLINE Line: 30 >
<
135 SPACE Line: 30 > <
124 IDENTIFIER Line: 30 >in<
12 DOT Line: 30 >.<
124 IDENTIFIER Line: 30 >eol_offsets<
135 SPACE Line: 30 > <
2 ASSIGN Line: 30 >=<
135 SPACE Line: 30 > <
124 IDENTIFIER Line: 30 >aq_create<
21 LEFTPAREN Line: 30 >(<
38 RIGHTPAREN Line: 30 >)<
41 SEMICOLON Line: 30 >;<
137 NEWLINE Line: 31 >
<
135 SPACE Line: 31 > <
112 WHILE Line: 31 >while<
21 LEFTPAREN Line: 31 >(<
21 LEFTPAREN Line: 31 >(<
124 IDENTIFIER Line: 31 >t<
135 SPACE Line: 31 > <
2 ASSIGN Line: 31 >=<
135 SPACE Line: 31 > <
124 IDENTIFIER Line: 31 >scan<
21 LEFTPAREN Line: 31 >(<
0 AND Line: 31 >&<
124 IDENTIFIER Line: 31 >in<
38 RIGHTPAREN Line: 31 >)<
38 RIGHTPAREN Line: 31 >)<
135 SPACE Line: 31 > <
29 NOTEQUAL Line: 31 >!=<
135 SPACE Line: 31 > <
124 IDENTIFIER Line: 31 >T_EOF<
38 RIGHTPAREN Line: 31 >)<
137 NEWLINE Line: 32 >
<
135 SPACE Line: 32 > <
18 LEFTBRACE Line: 32 >{<
137 NEWLINE Line: 33 >
<
135 SPACE Line: 33 > <
124 IDENTIFIER Line: 33 >printf<
21 LEFTPAREN Line: 33 >(<
133 STRINGLIT Line: 33 >"%4d %16s Line: %5d >%.*s<\n"<
8 COMMA Line: 33 >,<
135 SPACE Line: 33 > <
124 IDENTIFIER Line: 33 >ID_FROM_TOKEN<
21 LEFTPAREN Line: 33 >(<
124 IDENTIFIER Line: 33 >t<
23 MINUS Line: 33 >-<
124 IDENTIFIER Line: 33 >T_FIRST_TOKEN<
38 RIGHTPAREN Line: 33 >)<
8 COMMA Line: 33 >,<
135 SPACE Line: 33 > <
137 NEWLINE Line: 34 >
<
135 SPACE Line: 34 > <
124 IDENTIFIER Line: 34 >tok_names<
22 LEFTBRACKET Line: 34 >[<
124 IDENTIFIER Line: 34 >ID_FROM_TOKEN<
21 LEFTPAREN Line: 34 >(<
124 IDENTIFIER Line: 34 >t<
23 MINUS Line: 34 >-<
124 IDENTIFIER Line: 34 >T_FIRST_TOKEN<
38 RIGHTPAREN Line: 34 >)<
39 RIGHTBRACKET Line: 34 >]<
8 COMMA Line: 34 >,<
135 SPACE Line: 34 > <
124 IDENTIFIER Line: 34 >in<
12 DOT Line: 34 >.<
124 IDENTIFIER Line: 34 >line<
135 SPACE Line: 34 > <
31 PLUS Line: 34 >+<
135 SPACE Line: 34 > <
128 INTLIT Line: 34 >1<
8 COMMA Line: 34 >,<
135 SPACE Line: 34 > <
137 NEWLINE Line: 35 >
<
135 SPACE Line: 35 > <
124 IDENTIFIER Line: 35 >in<
12 DOT Line: 35 >.<
124 IDENTIFIER Line: 35 >cur<
135 SPACE Line: 35 > <
23 MINUS Line: 35 >-<
135 SPACE Line: 35 > <
124 IDENTIFIER Line: 35 >in<
12 DOT Line: 35 >.<
124 IDENTIFIER Line: 35 >tok<
8 COMMA Line: 35 >,<
135 SPACE Line: 35 > <
124 IDENTIFIER Line: 35 >in<
12 DOT Line: 35 >.<
124 IDENTIFIER Line: 35 >tok<
38 RIGHTPAREN Line: 35 >)<
41 SEMICOLON Line: 35 >;<
137 NEWLINE Line: 36 >
<
135 SPACE Line: 36 > <
37 RIGHTBRACE Line: 36 >}<
137 NEWLINE Line: 37 >
<
135 SPACE Line: 37 > <
124 IDENTIFIER Line: 37 >aq_terminate<
21 LEFTPAREN Line: 37 >(<
124 IDENTIFIER Line: 37 >in<
12 DOT Line: 37 >.<
124 IDENTIFIER Line: 37 >eol_offsets<
38 RIGHTPAREN Line: 37 >)<
41 SEMICOLON Line: 37 >;<
137 NEWLINE Line: 38 >
<
135 SPACE Line: 38 > <
124 IDENTIFIER Line: 38 >free<
21 LEFTPAREN Line: 38 >(<
124 IDENTIFIER Line: 38 >in<
12 DOT Line: 38 >.<
124 IDENTIFIER Line: 38 >bot<
38 RIGHTPAREN Line: 38 >)<
41 SEMICOLON Line: 38 >;<
137 NEWLINE Line: 39 >
<
135 SPACE Line: 39 > <
124 IDENTIFIER Line: 39 >close<
21 LEFTPAREN Line: 39 >(<
124 IDENTIFIER Line: 39 >in<
12 DOT Line: 39 >.<
124 IDENTIFIER Line: 39 >fd<
38 RIGHTPAREN Line: 39 >)<
41 SEMICOLON Line: 39 >;<
137 NEWLINE Line: 40 >
<
137 NEWLINE Line: 41 >
<
135 SPACE Line: 41 > <
90 RETURN Line: 41 >return<
135 SPACE Line: 41 > <
128 INTLIT Line: 41 >0<
41 SEMICOLON Line: 41 >;<
137 NEWLINE Line: 42 >
<
37 RIGHTBRACE Line: 42 >}<
137 NEWLINE Line: 43 >
<

View File

@@ -0,0 +1,492 @@
/*=============================================================================
A Standard compliant C++ preprocessor
SLex (Spirit Lex) based C++ lexer
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#if !defined(_CPP_SLEX_LEXER_HPP__5E8E1DF0_BB41_4938_B7E5_A4BB68222FF6__INCLUDED_)
#define _CPP_SLEX_LEXER_HPP__5E8E1DF0_BB41_4938_B7E5_A4BB68222FF6__INCLUDED_
#include <string>
#if defined(BOOST_SPIRIT_DEBUG)
#include <iostream>
#endif // defined(BOOST_SPIRIT_DEBUG)
#include <boost/spirit/core.hpp>
#include <boost/spirit/iterator/position_iterator.hpp>
#include "../slex/lexer.hpp" // "spirit/lexer.hpp"
#include "cpplexer/cpp_token_ids.hpp"
#include "cpplexer/cpp_lex_interface.hpp"
#include "cpplexer/slex/cpp_slex_token.hpp"
#include "cpplexer/slex/util/time_conversion_helper.hpp"
///////////////////////////////////////////////////////////////////////////////
namespace cpplexer {
namespace slex {
///////////////////////////////////////////////////////////////////////////////
//
// encapsulation of the boost::spirit::slex based cpp lexer
//
///////////////////////////////////////////////////////////////////////////////
template <typename IteratorT, typename PositionT>
class lexer
: public boost::spirit::lexer<boost::spirit::position_iterator<IteratorT> >
{
public:
typedef boost::spirit::position_iterator<IteratorT> iterator_t;
typedef typename std::iterator_traits<IteratorT>::value_type char_t;
typedef boost::spirit::lexer<iterator_t> base_t;
typedef lex_token<IteratorT, PositionT> token_t;
lexer();
// get time of last compilation
std::time_t get_compilation_time() { return compilation_time.get_time(); }
private:
// initialization data (regular expressions for the token definitions)
struct lexer_data {
token_id tokenid; // token data
char_t const *tokenregex; // associated token to match
typename base_t::callback_t tokencb; // associated callback function
unsigned int lexerstate; // valid for lexer state
};
static lexer_data const init_data[];
// helper for calculation of the time of last compilation
static util::time_conversion_helper compilation_time;
};
///////////////////////////////////////////////////////////////////////////////
// several callback functions for the lexing process
template <typename IteratorT>
void ignore_callback(IteratorT const &, IteratorT &,
IteratorT const&, const int &, boost::spirit::lexer_control<int>& ctl)
{
ctl.ignore_current_token();
}
///////////////////////////////////////////////////////////////////////////////
// data required for initialization of the lexer (token definitions)
#define OR "|"
#define Q(c) "\\" c
#define TRI(c) Q("?") Q("?") c
// definition of some subtoken regexps to simplify the regex definitions
#define BLANK "[ \\t]"
#define CCOMMENT \
Q("/") Q("*") "[^*]*" Q("*") "+" "(" "[^/*][^*]*" Q("*") "+" ")*" Q("/")
#define PPSPACE "(" BLANK OR CCOMMENT ")*"
#define OCTALDIGIT "[0-7]"
#define DIGIT "[0-9]"
#define HEXDIGIT "[0-9a-fA-F]"
#define SIGN "[-+]?"
#define EXPONENT "(" "[eE]" SIGN "[0-9]+" ")"
#define INTEGER_SUFFIX "(" "[uU][lL]?|[lL][uU]?" ")"
#define FLOAT_SUFFIX "(" "[fF][lL]?|[lL][fF]?" ")"
#define CHAR_SPEC "L?"
#define BACKSLASH "(" Q("\\") OR TRI(Q("/")) ")"
#define ESCAPESEQ BACKSLASH "(" \
"[abfnrtv?'\"]" OR \
BACKSLASH OR \
"x" HEXDIGIT "+" OR \
OCTALDIGIT OCTALDIGIT "?" OCTALDIGIT "?" \
")"
#define HEXQUAD HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT
#define UNIVERSALCHAR BACKSLASH "(" \
"u" HEXQUAD OR \
"U" HEXQUAD HEXQUAD \
")"
#define POUNDDEF "(" "#" OR TRI("=") OR Q("%:") ")"
#define NEWLINEDEF "(" "\\n" OR "\\r" OR "\\r\\n" ")"
///////////////////////////////////////////////////////////////////////////////
// sexer state constants
#define LEXER_STATE_NORMAL 0
#define LEXER_STATE_PP 1
#define NUM_LEXER_STATES 1
// helper for initializing token data
#define TOKEN_DATA(id, regex) \
{ T_##id, regex, 0, LEXER_STATE_NORMAL }
#define TOKEN_DATA_EX(id, regex, callback) \
{ T_##id, regex, callback, LEXER_STATE_NORMAL }
///////////////////////////////////////////////////////////////////////////////
// C++ token definitions
template <typename IteratorT, typename PositionT>
typename lexer<IteratorT, PositionT>::lexer_data const
lexer<IteratorT, PositionT>::init_data[] =
{
TOKEN_DATA(AND, "&" OR "bitand"),
TOKEN_DATA(ANDAND, "&&"),
TOKEN_DATA(ASSIGN, "="),
TOKEN_DATA(ANDASSIGN, "&=" OR "and_eq"),
TOKEN_DATA(OR, Q("|") OR TRI("!") OR "bitor"),
TOKEN_DATA(ORASSIGN, Q("|=") OR TRI("!=") OR "or_eq"),
TOKEN_DATA(XOR, Q("^") OR TRI("'") OR "xor"),
TOKEN_DATA(XORASSIGN, Q("^=") OR TRI("'=") OR "xor_eq"),
TOKEN_DATA(COMMA, ","),
TOKEN_DATA(COLON, ":"),
TOKEN_DATA(DIVIDE, Q("/")),
TOKEN_DATA(DIVIDEASSIGN, Q("/=")),
TOKEN_DATA(DOT, Q(".")),
TOKEN_DATA(DOTSTAR, Q(".") Q("*")),
TOKEN_DATA(ELLIPSIS, Q(".") Q(".") Q(".")),
TOKEN_DATA(EQUAL, "=="),
TOKEN_DATA(GREATER, ">"),
TOKEN_DATA(GREATEREQUAL, ">="),
TOKEN_DATA(LEFTBRACE, Q("{") OR TRI("<") OR "<" Q("%")),
TOKEN_DATA(LESS, "<"),
TOKEN_DATA(LESSEQUAL, "<="),
TOKEN_DATA(LEFTPAREN, Q("(")),
TOKEN_DATA(LEFTBRACKET, Q("[") OR TRI(Q("(")) OR "<:"),
TOKEN_DATA(MINUS, Q("-")),
TOKEN_DATA(MINUSASSIGN, Q("-=")),
TOKEN_DATA(MINUSMINUS, Q("-") Q("-")),
TOKEN_DATA(PERCENT, Q("%")),
TOKEN_DATA(PERCENTASSIGN, Q("%=")),
TOKEN_DATA(NOT, "!" OR "not"),
TOKEN_DATA(NOTEQUAL, "!=" OR "noeq"),
TOKEN_DATA(OROR, Q("|") Q("|") OR TRI("!") Q("|") OR Q("|") TRI("!") OR
TRI("!") TRI("!")),
TOKEN_DATA(PLUS, Q("+")),
TOKEN_DATA(PLUSASSIGN, Q("+=")),
TOKEN_DATA(PLUSPLUS, Q("+") Q("+")),
TOKEN_DATA(ARROW, Q("->")),
TOKEN_DATA(ARROWSTAR, Q("->") Q("*")),
TOKEN_DATA(QUESTION_MARK, Q("?")),
TOKEN_DATA(RIGHTBRACE, Q("}") OR TRI(">") OR Q("%>")),
TOKEN_DATA(RIGHTPAREN, Q(")")),
TOKEN_DATA(RIGHTBRACKET, Q("]") OR TRI(Q(")")) OR ":>"),
TOKEN_DATA(COLON_COLON, "::"),
TOKEN_DATA(SEMICOLON, ";"),
TOKEN_DATA(SHIFTLEFT, "<<"),
TOKEN_DATA(SHIFTLEFTASSIGN, "<<="),
TOKEN_DATA(SHIFTRIGHT, ">>"),
TOKEN_DATA(SHIFTRIGHTASSIGN, ">>="),
TOKEN_DATA(STAR, Q("*")),
TOKEN_DATA(COMPL, Q("~") OR TRI("-") OR "compl"),
TOKEN_DATA(STARASSIGN, Q("*=")),
TOKEN_DATA(ASM, "asm"),
TOKEN_DATA(AUTO, "auto"),
TOKEN_DATA(BOOL, "bool"),
TOKEN_DATA(FALSE, "false"),
TOKEN_DATA(TRUE, "true"),
TOKEN_DATA(BREAK, "break"),
TOKEN_DATA(CASE, "case"),
TOKEN_DATA(CATCH, "catch"),
TOKEN_DATA(CHAR, "char"),
TOKEN_DATA(CLASS, "class"),
TOKEN_DATA(CONST, "const"),
TOKEN_DATA(CONSTCAST, "const_cast"),
TOKEN_DATA(CONTINUE, "continue"),
TOKEN_DATA(DEFAULT, "default"),
TOKEN_DATA(DEFINED, "defined"),
TOKEN_DATA(DELETE, "delete"),
TOKEN_DATA(DO, "do"),
TOKEN_DATA(DOUBLE, "double"),
TOKEN_DATA(DYNAMICCAST, "dynamic_cast"),
TOKEN_DATA(ELSE, "else"),
TOKEN_DATA(ENUM, "enum"),
TOKEN_DATA(EXPLICIT, "explicit"),
TOKEN_DATA(EXPORT, "export"),
TOKEN_DATA(EXTERN, "extern"),
TOKEN_DATA(FLOAT, "float"),
TOKEN_DATA(FOR, "for"),
TOKEN_DATA(FRIEND, "friend"),
TOKEN_DATA(GOTO, "goto"),
TOKEN_DATA(IF, "if"),
TOKEN_DATA(INLINE, "inline"),
TOKEN_DATA(INT, "int"),
TOKEN_DATA(LONG, "long"),
TOKEN_DATA(MUTABLE, "mutable"),
TOKEN_DATA(NAMESPACE, "namespace"),
TOKEN_DATA(NEW, "new"),
TOKEN_DATA(OPERATOR, "operator"),
TOKEN_DATA(PRIVATE, "private"),
TOKEN_DATA(PROTECTED, "protected"),
TOKEN_DATA(PUBLIC, "public"),
TOKEN_DATA(REGISTER, "register"),
TOKEN_DATA(REINTERPRETCAST, "reinterpret_cast"),
TOKEN_DATA(RETURN, "return"),
TOKEN_DATA(SHORT, "short"),
TOKEN_DATA(SIGNED, "signed"),
TOKEN_DATA(SIZEOF, "sizeof"),
TOKEN_DATA(STATIC, "static"),
TOKEN_DATA(STATICCAST, "static_cast"),
TOKEN_DATA(STRUCT, "struct"),
TOKEN_DATA(SWITCH, "switch"),
TOKEN_DATA(TEMPLATE, "template"),
TOKEN_DATA(THIS, "this"),
TOKEN_DATA(THROW, "throw"),
TOKEN_DATA(TRY, "try"),
TOKEN_DATA(TYPEDEF, "typedef"),
TOKEN_DATA(TYPEID, "typeid"),
TOKEN_DATA(TYPENAME, "typename"),
TOKEN_DATA(UNION, "union"),
TOKEN_DATA(UNSIGNED, "unsigned"),
TOKEN_DATA(USING, "using"),
TOKEN_DATA(VIRTUAL, "virtual"),
TOKEN_DATA(VOID, "void"),
TOKEN_DATA(VOLATILE, "volatile"),
TOKEN_DATA(WCHART, "wchar_t"),
TOKEN_DATA(WHILE, "while"),
TOKEN_DATA(PP_DEFINE, POUNDDEF PPSPACE "define"),
TOKEN_DATA(PP_IF, POUNDDEF PPSPACE "if"),
TOKEN_DATA(PP_IFDEF, POUNDDEF PPSPACE "ifdef"),
TOKEN_DATA(PP_IFNDEF, POUNDDEF PPSPACE "ifndef"),
TOKEN_DATA(PP_ELIF, POUNDDEF PPSPACE "elif"),
TOKEN_DATA(PP_ENDIF, POUNDDEF PPSPACE "endif"),
TOKEN_DATA(PP_ERROR, POUNDDEF PPSPACE "error"),
TOKEN_DATA(PP_QHEADER, POUNDDEF PPSPACE "include" PPSPACE Q("\"") "[^\\n\\r\"]+" Q("\"")),
TOKEN_DATA(PP_HHEADER, POUNDDEF PPSPACE "include" PPSPACE "<" "[^\\n\\r>]+" ">"),
TOKEN_DATA(PP_INCLUDE, POUNDDEF PPSPACE "include" PPSPACE),
TOKEN_DATA(PP_LINE, POUNDDEF PPSPACE "line"),
TOKEN_DATA(PP_PRAGMA, POUNDDEF PPSPACE "pragma"),
TOKEN_DATA(PP_UNDEF, POUNDDEF PPSPACE "undef"),
TOKEN_DATA(PP_WARNING, POUNDDEF PPSPACE "warning"),
TOKEN_DATA(IDENTIFIER, "[a-zA-Z_][a-zA-Z0-9_]*"),
// TOKEN_DATA(OCTALINT, "0" OCTALDIGIT "*" INTEGER_SUFFIX "?"),
// TOKEN_DATA(DECIMALINT, "[1-9]" DIGIT "*" INTEGER_SUFFIX "?"),
// TOKEN_DATA(HEXAINT, "(0x|0X)" HEXDIGIT "+" INTEGER_SUFFIX "?"),
TOKEN_DATA(INTLIT, "(" "(0x|0X)" HEXDIGIT "+" OR "0" OCTALDIGIT "*" OR \
"[1-9]" DIGIT "*" ")" INTEGER_SUFFIX "?"),
TOKEN_DATA(FLOATLIT,
"(" DIGIT "*" Q(".") DIGIT "+" OR DIGIT "+" Q(".") ")"
EXPONENT "?" FLOAT_SUFFIX "?" OR
DIGIT "+" EXPONENT FLOAT_SUFFIX "?"),
TOKEN_DATA(CCOMMENT, CCOMMENT),
TOKEN_DATA(CPPCOMMENT, Q("/") Q("/[^\\n\\r]*") NEWLINEDEF ),
TOKEN_DATA(CHARLIT, CHAR_SPEC "'"
"(" ESCAPESEQ OR "[^\\n\\r']" OR UNIVERSALCHAR ")+" "'"),
TOKEN_DATA(STRINGLIT, CHAR_SPEC Q("\"")
"(" ESCAPESEQ OR "[^\\n\\r\"]" OR UNIVERSALCHAR ")*" Q("\"")),
TOKEN_DATA(SPACE, BLANK "+"),
TOKEN_DATA(SPACE2, "[\\v\\f]+"),
TOKEN_DATA_EX(CONTLINE, Q("\\") "\\n", ignore_callback<iterator_t>),
TOKEN_DATA(NEWLINE, NEWLINEDEF),
TOKEN_DATA(POUND_POUND, "##" OR "#" TRI("=") OR TRI("=#") OR
TRI("=") TRI("=") OR Q("%:") Q("%:")),
TOKEN_DATA(POUND, POUNDDEF),
TOKEN_DATA(ANY, "."),
#if defined(SUPPORMS_EXTENSIONS)
TOKEN_DATA(MSEXINT8, "__int8"),
TOKEN_DATA(MSEXINT16, "__int16"),
TOKEN_DATA(MSEXINT32, "__int32"),
TOKEN_DATA(MSEXINT64, "__int64"),
TOKEN_DATA(MSEXBASED, "_?" "_based"),
TOKEN_DATA(MSEXDECLSPEC, "_?" "_declspec"),
TOKEN_DATA(MSEXCDECL, "_?" "_cdecl"),
TOKEN_DATA(MSEXFASTCALL, "_?" "_fastcall"),
TOKEN_DATA(MSEXSTDCALL, "_?" "_stdcall"),
TOKEN_DATA(MSEXTRY , "__try"),
TOKEN_DATA(MSEXEXCEPT, "__except"),
TOKEN_DATA(MSEXFINALLY, "__finally"),
TOKEN_DATA(MSEXLEAVE, "__leave"),
TOKEN_DATA(MSEXINLINE, "_?" "_inline"),
TOKEN_DATA(MSEXASM, "_?" "_asm"),
#endif // defined(SUPPORMS_EXTENSIONS)
{ token_id(0) } // this should be the last entry
};
///////////////////////////////////////////////////////////////////////////////
// undefine macros, required for regular expression definitions
#undef POUNDDEF
#undef CCOMMENT
#undef PPSPACE
#undef DIGIT
#undef OCTALDIGIT
#undef HEXDIGIT
#undef SIGN
#undef EXPONENT
#undef INTEGER_SUFFIX
#undef FLOAT_SUFFIX
#undef CHAR_SPEC
#undef BACKSLASH
#undef ESCAPESEQ
#undef HEXQUAD
#undef UNIVERSALCHAR
#undef Q
#undef TRI
#undef OR
#undef TOKEN_DATA
#undef TOKEN_DATA_EX
///////////////////////////////////////////////////////////////////////////////
// initialize cpp lexer with token data
template <typename IteratorT, typename PositionT>
inline
lexer<IteratorT, PositionT>::lexer()
: base_t(NUM_LEXER_STATES)
{
int i = 0;
while (0 != init_data[i].tokenid) {
this->register_regex(init_data[i].tokenregex, init_data[i].tokenid,
init_data[i].tokencb, init_data[i].lexerstate);
++i;
}
}
///////////////////////////////////////////////////////////////////////////////
// get time of last compilation of this file
template <typename IteratorT, typename PositionT>
util::time_conversion_helper
lexer<IteratorT, PositionT>::compilation_time(__DATE__ " " __TIME__);
///////////////////////////////////////////////////////////////////////////////
//
template <typename IteratorT, typename PositionT>
inline void
init_lexer (lexer<IteratorT, PositionT> &lexer, bool force_reinit = false)
{
if (lexer.has_compiled_dfa())
return; // nothing to do
using std::ifstream;
using std::ofstream;
using std::ios;
using std::cerr;
using std::endl;
ifstream dfa_in("cpp_slex_lexer.dfa", ios::in|ios::binary);
if (force_reinit || !dfa_in.is_open() ||
!lexer.load (dfa_in, lexer.get_compilation_time()))
{
cerr << "Compiling regular expressions for slex ...";
dfa_in.close();
lexer.create_dfa();
ofstream dfa_out ("cpp_slex_lexer.dfa", ios::out|ios::binary|ios::trunc);
if (dfa_out.is_open())
lexer.save (dfa_out, lexer.get_compilation_time());
cerr << " Done." << endl;
}
}
///////////////////////////////////////////////////////////////////////////////
//
// lex_functor
//
///////////////////////////////////////////////////////////////////////////////
template <typename IteratorT, typename PositionT = boost::spirit::file_position>
class lex_functor
: public lex_input_interface<typename lexer<IteratorT, PositionT>::token_t>
{
public:
typedef boost::spirit::position_iterator<IteratorT> iterator_t;
typedef typename std::iterator_traits<IteratorT>::value_type char_t;
typedef std::basic_string<char_t> string_t;
typedef typename lexer<IteratorT, PositionT>::token_t token_t;
lex_functor(IteratorT const &first, IteratorT const &last,
std::string const &fname)
: first(first, last, fname.c_str())
{
init_lexer(lexer); // initialize lexer dfa tables
}
// get the next token from the input stream
token_t get()
{
token_t token;
iterator_t prev = first;
token_id id = token_id(lexer.next_token(first, last));
if (-1 != id)
token = token_t(id, string_t(prev, first), prev.get_position());
return token;
}
// get the position of the actual token
//PositionT get_position()
//{ return first.get_position(); }
private:
iterator_t first;
iterator_t last;
static lexer<IteratorT, PositionT> lexer; // needed only once
};
template <typename IteratorT, typename PositionT>
lexer<IteratorT, PositionT> lex_functor<IteratorT, PositionT>::lexer;
///////////////////////////////////////////////////////////////////////////////
//
// The new_lexer_gen<>::new_lexer function (declared in cpp_slex_token.hpp)
// should be defined inline, if the lex_functor shouldn't be instantiated
// separately from the lex_iterator.
//
// Separate (explicit) instantiation helps to reduce compilation time.
//
///////////////////////////////////////////////////////////////////////////////
#if defined(CPP_SEPARATE_LEXER_INSTANTIATION)
#define SLEX_NEW_LEXER_INLINE
#else
#define SLEX_NEW_LEXER_INLINE inline
#endif
///////////////////////////////////////////////////////////////////////////////
//
// The 'new_lexer' function allows the opaque generation of a new lexer object.
// It is coupled to the token type to allow to decouple the lexer/token
// configurations at compile time.
//
// This function is declared inside the cpp_slex_token.hpp file, which is
// referenced by the source file calling the lexer and the sourcefile, which
// instantiates the lex_functor. But is is defined here, so it will be
// instantiated only while compiling the sourcefile, which instantiates the
// lex_functor. While the cpp_slex_token.hpp file may be included everywhere,
// this file (cpp_slex_lexer.hpp) should be included only once. This allows
// to decouple the lexer interface from the lexer implementation and reduces
// compilation time.
//
///////////////////////////////////////////////////////////////////////////////
template <typename IteratorT, typename PositionT>
SLEX_NEW_LEXER_INLINE
lex_input_interface<lex_token<IteratorT, PositionT> > *
new_lexer_gen<IteratorT, PositionT>::new_lexer(IteratorT const &first,
IteratorT const &last, std::string const &fname)
{
return new lex_functor<IteratorT, PositionT>(first, last, fname);
}
#undef SLEX_NEW_LEXER_INLINE
///////////////////////////////////////////////////////////////////////////////
} // namespace slex
} // namespace cpplexer
#endif // !defined(_CPP_SLEX_LEXER_HPP__5E8E1DF0_BB41_4938_B7E5_A4BB68222FF6__INCLUDED_)

View File

@@ -0,0 +1,112 @@
/*=============================================================================
A Standard compliant C++ preprocessor
SLex (Spirit Lex) based C++ lexer token definition
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#if !defined(_CPP_SLEX_TOKEN_HPP__53A13BD2_FBAA_444B_9B8B_FCB225C2BBA8__INCLUDED_)
#define _CPP_SLEX_TOKEN_HPP__53A13BD2_FBAA_444B_9B8B_FCB225C2BBA8__INCLUDED_
#include <boost/spirit/iterator/position_iterator.hpp>
///////////////////////////////////////////////////////////////////////////////
namespace cpplexer {
namespace slex {
///////////////////////////////////////////////////////////////////////////////
//
// new_lexer_gen
//
///////////////////////////////////////////////////////////////////////////////
template <typename IteratorT, typename PositionT = boost::spirit::file_position>
class lex_token;
template <typename IteratorT, typename PositionT = boost::spirit::file_position>
struct new_lexer_gen
{
// The NewLexer function allows the opaque generation of a new lexer object.
// It is coupled to the token type to allow to decouple the lexer/token
// configurations at compile time.
static lex_input_interface<lex_token<IteratorT, PositionT> > *
new_lexer(IteratorT const &first, IteratorT const &last,
std::string const &fname);
};
///////////////////////////////////////////////////////////////////////////////
//
// lex_token
//
///////////////////////////////////////////////////////////////////////////////
template <typename IteratorT, typename PositionT>
class lex_token
{
public:
typedef typename std::iterator_traits<IteratorT>::value_type char_t;
typedef IteratorT iterator_t;
typedef std::basic_string<char_t> string_t;
typedef PositionT position_t;
typedef new_lexer_gen<IteratorT, PositionT> lexer_gen_t;
lex_token()
: id(T_EOF)
{}
lex_token(token_id id_, string_t const &value_, PositionT const &pos_)
: id(id_), value(value_), pos(pos_)
{}
operator token_id() const { return id; }
string_t const &get_value() const { return value; }
position_t const &get_position() const { return pos; }
#if defined(BOOST_SPIRIT_DEBUG)
// debug support
void print (std::ostream &stream) const
{
stream << cpplexer::get_token_name(id) << "(";
for (size_t i = 0; i < value.size(); ++i) {
switch (value[i]) {
case '\r': stream << "\\r"; break;
case '\n': stream << "\\n"; break;
default:
stream << value[i];
break;
}
}
stream << ")";
}
#endif // defined(BOOST_SPIRIT_DEBUG)
private:
token_id id; // the token id
string_t value; // the text, which was parsed into this token
PositionT pos; // the original file position
};
#if defined(BOOST_SPIRIT_DEBUG)
template <typename IteratorT, typename PositionT>
inline std::ostream &
operator<< (std::ostream &stream, lex_token<IteratorT, PositionT> const &object)
{
object.print(stream);
return stream;
}
#endif // defined(BOOST_SPIRIT_DEBUG)
///////////////////////////////////////////////////////////////////////////////
} // namespace slex
} // namespace cpplexer
#endif // !defined(_CPP_SLEX_TOKEN_HPP__53A13BD2_FBAA_444B_9B8B_FCB225C2BBA8__INCLUDED_)

View File

@@ -0,0 +1,125 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#if !defined(_TIME_CONVERSION_HELPER_HPP__DA97E389_1797_43BA_82AE_B071064B3EF4__INCLUDED_)
#define _TIME_CONVERSION_HELPER_HPP__DA97E389_1797_43BA_82AE_B071064B3EF4__INCLUDED_
#include <ctime>
#include <boost/spirit/core.hpp>
#include <boost/spirit/symbols.hpp>
namespace cpplexer {
namespace slex {
namespace util {
///////////////////////////////////////////////////////////////////////////////
// define, whether the rule's should generate some debug output
#define TRACE_CPP_TIME_CONVERSION \
(BOOST_SPIRIT_DEBUG_FLAGS_CPP & BOOST_SPIRIT_DEBUG_FLAGS_TIME_CONVERSION) \
/**/
///////////////////////////////////////////////////////////////////////////////
// Grammar for parsing a date/time string generated by the C++ compiler from
// __DATE__ and __TIME__
class time_conversion_grammar :
public boost::spirit::grammar<time_conversion_grammar>
{
public:
time_conversion_grammar() : fYearIsCorrected(false)
{
memset (&time_stamp, 0, sizeof(std::tm));
BOOST_SPIRIT_TRACE_RULE_NAME(*this, "time_conversion_grammar",
TRACE_CPP_TIME_CONVERSION);
}
template <typename ScannerT>
struct definition {
definition(time_conversion_grammar const &self)
{
using boost::spirit::int_p;
using boost::spirit::add;
char const *m[] = {
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
};
for (int i = 0; i < 12; ++i)
add (month, m[i], i);
time_rule // expected format is 'Dec 29 2001 11:23:59'
= month[boost::spirit::assign(self.time_stamp.tm_mon)]
>> int_p[boost::spirit::assign(self.time_stamp.tm_mday)]
>> int_p[boost::spirit::assign(self.time_stamp.tm_year)]
>> int_p[boost::spirit::assign(self.time_stamp.tm_hour)]
>> int_p[boost::spirit::assign(self.time_stamp.tm_min)]
>> int_p[boost::spirit::assign(self.time_stamp.tm_sec)]
;
BOOST_SPIRIT_TRACE_RULE(time_rule, TRACE_CPP_TIME_CONVERSION);
}
boost::spirit::rule<ScannerT> time_rule;
boost::spirit::symbols<> month;
boost::spirit::rule<ScannerT> const&
start() const { return time_rule; }
};
void correct_year()
{
if (!fYearIsCorrected) {
time_stamp.tm_year -= 1900;
fYearIsCorrected = true;
}
}
mutable std::tm time_stamp;
bool fYearIsCorrected;
};
///////////////////////////////////////////////////////////////////////////////
// calculate the time of the compilation as a std::time_t to ensure correctness
// of the saved dfa table
class time_conversion_helper
{
public:
time_conversion_helper(char const *act_time) : compile_time(0)
{
using namespace boost::spirit;
time_conversion_grammar g;
if (parse (act_time, g, space_p | ch_p(':')).full) {
g.correct_year();
compile_time = std::mktime(&g.time_stamp);
}
BOOST_ASSERT(0 != compile_time);
}
std::time_t get_time() { return compile_time; }
private:
std::time_t compile_time;
};
///////////////////////////////////////////////////////////////////////////////
#undef TRACE_CPP_TIME_CONVERSION
} // namespace util
} // namespace slex
} // namespace cpplexer
#endif // !defined(_TIME_CONVERSION_HELPER_HPP__DA97E389_1797_43BA_82AE_B071064B3EF4__INCLUDED_)

View File

@@ -0,0 +1,49 @@
/*=============================================================================
A Standard compliant C++ preprocessor
SLex (Spirit Lex) based C++ lexer
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#if !defined(_SLEX_FUNCTOR_HPP__557F0176_1340_4DC0_8E5A_1297F6AF9D46__INCLUDED_)
#define _SLEX_FUNCTOR_HPP__557F0176_1340_4DC0_8E5A_1297F6AF9D46__INCLUDED_
#include "cpplexer/slex/cpp_slex_lexer.hpp"
///////////////////////////////////////////////////////////////////////////////
namespace cpplexer {
///////////////////////////////////////////////////////////////////////////////
//
// slex_functor class
//
// The slex_functor template class is provided for simplicity reasons. It
// imports the unified slex::lex_functor lexer interface template under
// a new name into the cpplexer namespace.
//
// As an alternative the lex_functor is to be explicitely decorated with
// it's namespace (or imported by an using directive). This way it is
// possible by simply switching the used namespace to get different lexer
// implementations.
//
///////////////////////////////////////////////////////////////////////////////
template <typename IteratorT>
struct slex_functor
: public slex::lex_functor<IteratorT>
{
};
///////////////////////////////////////////////////////////////////////////////
} // namespace cpplexer
#endif // !defined(_SLEX_FUNCTOR_HPP__557F0176_1340_4DC0_8E5A_1297F6AF9D46__INCLUDED_)

View File

@@ -0,0 +1,33 @@
check_PROGRAMS = test_re2c_lexer test_slex_lexer
test_re2c_lexer_SOURCES = \
test_re2c_lexer.cpp \
test_re2c_lexer.hpp \
instantiate_re2c_lexer.cpp \
../re2clex/aq.cpp \
../re2clex/aq.h \
../re2clex/cpp.re \
../re2clex/cpp.re.cpp \
../re2clex/scanner.h \
../re2clex/cpp_re2c_lexer.hpp \
../re2clex/cpp_re2c_token.hpp \
../cpp_token_ids.hpp \
../cpp_lex_interface.hpp \
../cpp_lex_iterator.hpp
test_slex_lexer_SOURCES = \
test_slex_lexer.cpp \
test_slex_lexer.hpp \
instantiate_slex_lexer.cpp \
../slex/cpp_slex_lexer.hpp \
../slex/cpp_slex_token.hpp \
../cpp_token_ids.hpp \
../cpp_lex_interface.hpp \
../cpp_lex_iterator.hpp
TESTS = run_tests.sh
EXTRA_DIST = \
lextest.in \
lextest.re2c.output \
lextest.slex.output

View File

@@ -0,0 +1,50 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Test the SLex (Spirit Lex) based C++ lexer
Explicit instantiation of the lex_functor generation function
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#include <string>
#include "test_slex_lexer.hpp" // config data
#if defined(CPP_SEPARATE_LEXER_INSTANTIATION)
#include "cpplexer/cpp_token_ids.hpp"
#include "cpplexer/cpp_lex_iterator.hpp"
#include "cpplexer/re2clex/cpp_re2c_token.hpp"
///////////////////////////////////////////////////////////////////////////////
// The following file needs to be included only once throughout the whole
// program.
#include "cpplexer/re2clex/cpp_re2c_lexer.hpp"
///////////////////////////////////////////////////////////////////////////////
//
// This instantiates the correct 'new_lexer' function, which generates the
// C++ lexer used in this sample.
//
// This is moved into a separate compilation unit to decouple the compilation
// of the C++ lexer from the compilation of the other modules, which helps to
// reduce compilation time.
//
// The template parameter(s) supplied should be identical to the parameters
// supplied while instantiating the lex_iterator<> template (see the file
// test_slex_lexer.cpp).
//
///////////////////////////////////////////////////////////////////////////////
template cpplexer::re2clex::new_lexer_gen<>;
#endif // defined(CPP_SEPARATE_LEXER_INSTANTIATION)

View File

@@ -0,0 +1,50 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Test the SLex (Spirit Lex) based C++ lexer
Explicit instantiation of the lex_functor generation function
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#include <string>
#include "test_slex_lexer.hpp" // config data
#if defined(CPP_SEPARATE_LEXER_INSTANTIATION)
#include "cpplexer/cpp_token_ids.hpp"
#include "cpplexer/cpp_lex_iterator.hpp"
#include "cpplexer/slex/cpp_slex_token.hpp"
///////////////////////////////////////////////////////////////////////////////
// The following file needs to be included only once throughout the whole
// program.
#include "cpplexer/slex/cpp_slex_lexer.hpp"
///////////////////////////////////////////////////////////////////////////////
//
// This instantiates the correct 'new_lexer' function, which generates the
// C++ lexer used in this sample.
//
// This is moved into a separate compilation unit to decouple the compilation
// of the C++ lexer from the compilation of the other modules, which helps to
// reduce compilation time.
//
// The template parameter(s) supplied should be identical to the parameters
// supplied while instantiating the lex_iterator<> template (see the file
// test_slex_lexer.cpp).
//
///////////////////////////////////////////////////////////////////////////////
template cpplexer::slex::new_lexer_gen<std::string::iterator>;
#endif // defined(CPP_SEPARATE_LEXER_INSTANTIATION)

View File

@@ -0,0 +1,98 @@
/* this is a c comment */
/**/
/***/
/* now a multi-line c comment
this is the second line */
// a c++ comment
// now for all the identifiers
asm auto asm auto bool break case catch char class const const_cast
continue default delete do double dynamic_cast else enum explicit export
extern false float for friend goto if inline int long mutable namespace new
operator private protected public register reinterpret_cast return short
signed sizeof static static_cast struct switch template this throw true
try typedef typeid typename union unsigned using virtual void volatile
wchar_t while
// now here's the punctuation
{ ??< <% } ??> %> [
??( <: ] ??) :> #
??= %: ## #??= ??=# ??=??= %:%:
( ) ; : ...
? :: . .* +
- * / % ^
??' xor & bitand | bitor
??! ~ ??- compl ! not
= < > += -=
*= /= %= ^= xor_eq
??'= &= and_eq |= or_eq ??!=
<< >> >>= <<= == !=
// here's some identifiers
ident
ident1
ident1a
_ident
IdEnT_A
// hex ints
0x123a
0X123B
// octal ints
0
012375423
// decimal ints
1
1234999
// ints with suffixes
123l 123L 123u 123U
123ul 123uL 123Ul 123UL
123lu 123Lu 123lU 123LU
// floats
.123
1.1
1.
.123e345
1.1E+2
1.e-5
22e3
33E+4
43e-3
.123f 1.1F 1.l .123e345L
1.1E+2fl 1.e-5fL 22e3Fl 33E+4FL
43e-3lf .123Lf 1.1lF 1.LF
// chars
'a' '0' '"'
L'a' L'0' L'"'
'\a' '??/b' '\f' '??/n' '\r' '\t' '\v' '\?' '\'' '\"' '\\'
'\x1' '\xaBcD123' '\1' '\12' '\123'
'\u1234' '\UABCD1234'
// Strings
"" "a" "01234" "'"
L"" L"a" L"01234" L"'"
"\a??/b\f??/n\r\t\v\?\'\"\\"
"\x1\xaBcD123\1\12\123"
"\u1234\UABCD1234"
"/* a faked c comment */"
// pre-processor directives
#include <io.h>
# include"r.h"
#include SOME_HEADER
#if
#ifdef
#ifndef
#elif
#endif
#define
#undef
#line
#error
#pragma
/* another c comment */

View File

@@ -0,0 +1,594 @@
matched token CCOMMENT(#386): >/* this is a c comment */<
matched token NEWLINE(#393): >
<
matched token CCOMMENT(#386): >/**/<
matched token NEWLINE(#393): >
<
matched token CCOMMENT(#386): >/***/<
matched token NEWLINE(#393): >
<
matched token CCOMMENT(#386): >/* now a multi-line c comment
this is the second line */<
matched token NEWLINE(#393): >
<
matched token CPPCOMMENT(#387): >// a c++ comment
<
matched token CPPCOMMENT(#387): >// now for all the identifiers
<
matched token ASM(#305): >asm<
matched token SPACE(#391): > <
matched token AUTO(#306): >auto<
matched token SPACE(#391): > <
matched token ASM(#305): >asm<
matched token SPACE(#391): > <
matched token AUTO(#306): >auto<
matched token SPACE(#391): > <
matched token BOOL(#307): >bool<
matched token SPACE(#391): > <
matched token BREAK(#310): >break<
matched token SPACE(#391): > <
matched token CASE(#311): >case<
matched token SPACE(#391): > <
matched token CATCH(#312): >catch<
matched token SPACE(#391): > <
matched token CHAR(#313): >char<
matched token SPACE(#391): > <
matched token CLASS(#314): >class<
matched token SPACE(#391): > <
matched token CONST(#315): >const<
matched token SPACE(#391): > <
matched token CONSTCAST(#316): >const_cast<
matched token NEWLINE(#393): >
<
matched token CONTINUE(#317): >continue<
matched token SPACE(#391): > <
matched token DEFAULT(#318): >default<
matched token SPACE(#391): > <
matched token DELETE(#320): >delete<
matched token SPACE(#391): > <
matched token DO(#321): >do<
matched token SPACE(#391): > <
matched token DOUBLE(#322): >double<
matched token SPACE(#391): > <
matched token DYNAMICCAST(#323): >dynamic_cast<
matched token SPACE(#391): > <
matched token ELSE(#324): >else<
matched token SPACE(#391): > <
matched token ENUM(#325): >enum<
matched token SPACE(#391): > <
matched token EXPLICIT(#326): >explicit<
matched token SPACE(#391): > <
matched token EXPORT(#327): >export<
matched token NEWLINE(#393): >
<
matched token EXTERN(#328): >extern<
matched token SPACE(#391): > <
matched token FALSE(#308): >false<
matched token SPACE(#391): > <
matched token FLOAT(#329): >float<
matched token SPACE(#391): > <
matched token FOR(#330): >for<
matched token SPACE(#391): > <
matched token FRIEND(#331): >friend<
matched token SPACE(#391): > <
matched token GOTO(#332): >goto<
matched token SPACE(#391): > <
matched token IF(#333): >if<
matched token SPACE(#391): > <
matched token INLINE(#334): >inline<
matched token SPACE(#391): > <
matched token INT(#335): >int<
matched token SPACE(#391): > <
matched token LONG(#336): >long<
matched token SPACE(#391): > <
matched token MUTABLE(#337): >mutable<
matched token SPACE(#391): > <
matched token NAMESPACE(#338): >namespace<
matched token SPACE(#391): > <
matched token NEW(#339): >new<
matched token NEWLINE(#393): >
<
matched token OPERATOR(#340): >operator<
matched token SPACE(#391): > <
matched token PRIVATE(#341): >private<
matched token SPACE(#391): > <
matched token PROTECTED(#342): >protected<
matched token SPACE(#391): > <
matched token PUBLIC(#343): >public<
matched token SPACE(#391): > <
matched token REGISTER(#344): >register<
matched token SPACE(#391): > <
matched token REINTERPRETCAST(#345): >reinterpret_cast<
matched token SPACE(#391): > <
matched token RETURN(#346): >return<
matched token SPACE(#391): > <
matched token SHORT(#347): >short<
matched token NEWLINE(#393): >
<
matched token SIGNED(#348): >signed<
matched token SPACE(#391): > <
matched token SIZEOF(#349): >sizeof<
matched token SPACE(#391): > <
matched token STATIC(#350): >static<
matched token SPACE(#391): > <
matched token STATICCAST(#351): >static_cast<
matched token SPACE(#391): > <
matched token STRUCT(#352): >struct<
matched token SPACE(#391): > <
matched token SWITCH(#353): >switch<
matched token SPACE(#391): > <
matched token TEMPLATE(#354): >template<
matched token SPACE(#391): > <
matched token THIS(#355): >this<
matched token SPACE(#391): > <
matched token THROW(#356): >throw<
matched token SPACE(#391): > <
matched token TRUE(#309): >true<
matched token NEWLINE(#393): >
<
matched token TRY(#357): >try<
matched token SPACE(#391): > <
matched token TYPEDEF(#358): >typedef<
matched token SPACE(#391): > <
matched token TYPEID(#359): >typeid<
matched token SPACE(#391): > <
matched token TYPENAME(#360): >typename<
matched token SPACE(#391): > <
matched token UNION(#361): >union<
matched token SPACE(#391): > <
matched token UNSIGNED(#362): >unsigned<
matched token SPACE(#391): > <
matched token USING(#363): >using<
matched token SPACE(#391): > <
matched token VIRTUAL(#364): >virtual<
matched token SPACE(#391): > <
matched token VOID(#365): >void<
matched token SPACE(#391): > <
matched token VOLATILE(#366): >volatile<
matched token NEWLINE(#393): >
<
matched token WCHART(#367): >wchar_t<
matched token SPACE(#391): > <
matched token WHILE(#368): >while<
matched token NEWLINE(#393): >
<
matched token NEWLINE(#393): >
<
matched token CPPCOMMENT(#387): >// now here's the punctuation
<
matched token LEFTBRACE(#274): >{<
matched token SPACE(#391): > <
matched token LEFTBRACE(#274): >??<<
matched token SPACE(#391): > <
matched token LEFTBRACE(#274): ><%<
matched token SPACE(#391): > <
matched token RIGHTBRACE(#293): >}<
matched token SPACE(#391): > <
matched token RIGHTBRACE(#293): >??><
matched token SPACE(#391): > <
matched token RIGHTBRACE(#293): >%><
matched token SPACE(#391): > <
matched token LEFTBRACKET(#278): >[<
matched token SPACE(#391): > <
matched token NEWLINE(#393): >
<
matched token LEFTBRACKET(#278): >??(<
matched token SPACE(#391): > <
matched token LEFTBRACKET(#278): ><:<
matched token SPACE(#391): > <
matched token RIGHTBRACKET(#295): >]<
matched token SPACE(#391): > <
matched token RIGHTBRACKET(#295): >??)<
matched token SPACE(#391): > <
matched token RIGHTBRACKET(#295): >:><
matched token SPACE(#391): > <
matched token POUND(#395): >#<
matched token NEWLINE(#393): >
<
matched token POUND(#395): >??=<
matched token SPACE(#391): > <
matched token POUND(#395): >%:<
matched token SPACE(#391): > <
matched token POUND_POUND(#394): >##<
matched token SPACE(#391): > <
matched token POUND_POUND(#394): >#??=<
matched token SPACE(#391): > <
matched token POUND_POUND(#394): >??=#<
matched token SPACE(#391): > <
matched token POUND_POUND(#394): >??=??=<
matched token SPACE(#391): > <
matched token POUND_POUND(#394): >%:%:<
matched token SPACE(#391): > <
matched token NEWLINE(#393): >
<
matched token LEFTPAREN(#277): >(<
matched token SPACE(#391): > <
matched token RIGHTPAREN(#294): >)<
matched token SPACE(#391): > <
matched token SEMICOLON(#297): >;<
matched token SPACE(#391): > <
matched token COLON(#265): >:<
matched token SPACE(#391): > <
matched token ELLIPSIS(#270): >...<
matched token SPACE(#391): > <
matched token NEWLINE(#393): >
<
matched token QUESTION_MARK(#292): >?<
matched token SPACE(#391): > <
matched token COLON_COLON(#296): >::<
matched token SPACE(#391): > <
matched token DOT(#268): >.<
matched token SPACE(#391): > <
matched token DOTSTAR(#269): >.*<
matched token SPACE(#391): > <
matched token PLUS(#287): >+<
matched token SPACE(#391): > <
matched token NEWLINE(#393): >
<
matched token MINUS(#279): >-<
matched token SPACE(#391): > <
matched token STAR(#302): >*<
matched token SPACE(#391): > <
matched token DIVIDE(#266): >/<
matched token SPACE(#391): > <
matched token PERCENT(#282): >%<
matched token SPACE(#391): > <
matched token XOR(#262): >^<
matched token SPACE(#391): > <
matched token NEWLINE(#393): >
<
matched token XOR(#262): >??'<
matched token SPACE(#391): > <
matched token XOR(#262): >xor<
matched token SPACE(#391): > <
matched token AND(#256): >&<
matched token SPACE(#391): > <
matched token AND(#256): >bitand<
matched token SPACE(#391): > <
matched token OR(#260): >|<
matched token SPACE(#391): > <
matched token OR(#260): >bitor<
matched token NEWLINE(#393): >
<
matched token OR(#260): >??!<
matched token SPACE(#391): > <
matched token COMPL(#303): >~<
matched token SPACE(#391): > <
matched token COMPL(#303): >??-<
matched token SPACE(#391): > <
matched token COMPL(#303): >compl<
matched token SPACE(#391): > <
matched token NOT(#284): >!<
matched token SPACE(#391): > <
matched token NOT(#284): >not<
matched token NEWLINE(#393): >
<
matched token ASSIGN(#258): >=<
matched token SPACE(#391): > <
matched token LESS(#275): ><<
matched token SPACE(#391): > <
matched token GREATER(#272): >><
matched token SPACE(#391): > <
matched token PLUSASSIGN(#288): >+=<
matched token SPACE(#391): > <
matched token MINUSASSIGN(#280): >-=<
matched token SPACE(#391): > <
matched token NEWLINE(#393): >
<
matched token STARASSIGN(#304): >*=<
matched token SPACE(#391): > <
matched token DIVIDEASSIGN(#267): >/=<
matched token SPACE(#391): > <
matched token PERCENTASSIGN(#283): >%=<
matched token SPACE(#391): > <
matched token XORASSIGN(#263): >^=<
matched token SPACE(#391): > <
matched token XORASSIGN(#263): >xor_eq<
matched token NEWLINE(#393): >
<
matched token XORASSIGN(#263): >??'=<
matched token SPACE(#391): > <
matched token ANDASSIGN(#259): >&=<
matched token SPACE(#391): > <
matched token ANDASSIGN(#259): >and_eq<
matched token SPACE(#391): > <
matched token ORASSIGN(#261): >|=<
matched token SPACE(#391): > <
matched token ORASSIGN(#261): >or_eq<
matched token SPACE(#391): > <
matched token ORASSIGN(#261): >??!=<
matched token SPACE(#391): > <
matched token NEWLINE(#393): >
<
matched token SHIFTLEFT(#298): ><<<
matched token SPACE(#391): > <
matched token SHIFTRIGHT(#300): >>><
matched token SPACE(#391): > <
matched token SHIFTRIGHTASSIGN(#301): >>>=<
matched token SPACE(#391): > <
matched token SHIFTLEFTASSIGN(#299): ><<=<
matched token SPACE(#391): > <
matched token EQUAL(#271): >==<
matched token SPACE(#391): > <
matched token NOTEQUAL(#285): >!=<
matched token NEWLINE(#393): >
<
matched token NEWLINE(#393): >
<
matched token CPPCOMMENT(#387): >// here's some identifiers
<
matched token IDENTIFIER(#380): >ident<
matched token NEWLINE(#393): >
<
matched token IDENTIFIER(#380): >ident1<
matched token NEWLINE(#393): >
<
matched token IDENTIFIER(#380): >ident1a<
matched token NEWLINE(#393): >
<
matched token IDENTIFIER(#380): >_ident<
matched token NEWLINE(#393): >
<
matched token IDENTIFIER(#380): >IdEnT_A<
matched token NEWLINE(#393): >
<
matched token NEWLINE(#393): >
<
matched token CPPCOMMENT(#387): >// hex ints
<
matched token INTLIT(#384): >0x123a<
matched token NEWLINE(#393): >
<
matched token INTLIT(#384): >0X123B<
matched token NEWLINE(#393): >
<
matched token NEWLINE(#393): >
<
matched token CPPCOMMENT(#387): >// octal ints
<
matched token INTLIT(#384): >0<
matched token NEWLINE(#393): >
<
matched token INTLIT(#384): >012375423<
matched token NEWLINE(#393): >
<
matched token NEWLINE(#393): >
<
matched token CPPCOMMENT(#387): >// decimal ints
<
matched token INTLIT(#384): >1<
matched token NEWLINE(#393): >
<
matched token INTLIT(#384): >1234999<
matched token NEWLINE(#393): >
<
matched token NEWLINE(#393): >
<
matched token CPPCOMMENT(#387): >// ints with suffixes
<
matched token INTLIT(#384): >123l<
matched token SPACE(#391): > <
matched token INTLIT(#384): >123L<
matched token SPACE(#391): > <
matched token INTLIT(#384): >123u<
matched token SPACE(#391): > <
matched token INTLIT(#384): >123U<
matched token NEWLINE(#393): >
<
matched token INTLIT(#384): >123ul<
matched token SPACE(#391): > <
matched token INTLIT(#384): >123uL<
matched token SPACE(#391): > <
matched token INTLIT(#384): >123Ul<
matched token SPACE(#391): > <
matched token INTLIT(#384): >123UL<
matched token NEWLINE(#393): >
<
matched token INTLIT(#384): >123lu<
matched token SPACE(#391): > <
matched token INTLIT(#384): >123Lu<
matched token SPACE(#391): > <
matched token INTLIT(#384): >123lU<
matched token SPACE(#391): > <
matched token INTLIT(#384): >123LU<
matched token NEWLINE(#393): >
<
matched token NEWLINE(#393): >
<
matched token CPPCOMMENT(#387): >// floats
<
matched token FLOATLIT(#385): >.123<
matched token NEWLINE(#393): >
<
matched token FLOATLIT(#385): >1.1<
matched token NEWLINE(#393): >
<
matched token FLOATLIT(#385): >1.<
matched token NEWLINE(#393): >
<
matched token FLOATLIT(#385): >.123e345<
matched token NEWLINE(#393): >
<
matched token FLOATLIT(#385): >1.1E+2<
matched token NEWLINE(#393): >
<
matched token FLOATLIT(#385): >1.e-5<
matched token NEWLINE(#393): >
<
matched token FLOATLIT(#385): >22e3<
matched token NEWLINE(#393): >
<
matched token FLOATLIT(#385): >33E+4<
matched token NEWLINE(#393): >
<
matched token FLOATLIT(#385): >43e-3<
matched token NEWLINE(#393): >
<
matched token FLOATLIT(#385): >.123f<
matched token SPACE(#391): > <
matched token FLOATLIT(#385): >1.1F<
matched token SPACE(#391): > <
matched token FLOATLIT(#385): >1.l<
matched token SPACE(#391): > <
matched token FLOATLIT(#385): >.123e345L<
matched token NEWLINE(#393): >
<
matched token FLOATLIT(#385): >1.1E+2fl<
matched token SPACE(#391): > <
matched token FLOATLIT(#385): >1.e-5fL<
matched token SPACE(#391): > <
matched token FLOATLIT(#385): >22e3Fl<
matched token SPACE(#391): > <
matched token FLOATLIT(#385): >33E+4FL<
matched token NEWLINE(#393): >
<
matched token FLOATLIT(#385): >43e-3lf<
matched token SPACE(#391): > <
matched token FLOATLIT(#385): >.123Lf<
matched token SPACE(#391): > <
matched token FLOATLIT(#385): >1.1lF<
matched token SPACE(#391): > <
matched token FLOATLIT(#385): >1.LF<
matched token NEWLINE(#393): >
<
matched token NEWLINE(#393): >
<
matched token CPPCOMMENT(#387): >// chars
<
matched token CHARLIT(#388): >'a'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'0'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'"'<
matched token NEWLINE(#393): >
<
matched token CHARLIT(#388): >L'a'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >L'0'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >L'"'<
matched token NEWLINE(#393): >
<
matched token CHARLIT(#388): >'\a'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'??/b'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'\f'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'??/n'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'\r'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'\t'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'\v'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'\?'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'\''<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'\"'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'\\'<
matched token NEWLINE(#393): >
<
matched token CHARLIT(#388): >'\x1'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'\xaBcD123'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'\1'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'\12'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'\123'<
matched token NEWLINE(#393): >
<
matched token CHARLIT(#388): >'\u1234'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'\UABCD1234'<
matched token NEWLINE(#393): >
<
matched token NEWLINE(#393): >
<
matched token CPPCOMMENT(#387): >// Strings
<
matched token STRINGLIT(#389): >""<
matched token SPACE(#391): > <
matched token STRINGLIT(#389): >"a"<
matched token SPACE(#391): > <
matched token STRINGLIT(#389): >"01234"<
matched token SPACE(#391): > <
matched token STRINGLIT(#389): >"'"<
matched token NEWLINE(#393): >
<
matched token STRINGLIT(#389): >L""<
matched token SPACE(#391): > <
matched token STRINGLIT(#389): >L"a"<
matched token SPACE(#391): > <
matched token STRINGLIT(#389): >L"01234"<
matched token SPACE(#391): > <
matched token STRINGLIT(#389): >L"'"<
matched token NEWLINE(#393): >
<
matched token STRINGLIT(#389): >"\a??/b\f??/n\r\t\v\?\'\"\\"<
matched token NEWLINE(#393): >
<
matched token STRINGLIT(#389): >"\x1\xaBcD123\1\12\123"<
matched token NEWLINE(#393): >
<
matched token STRINGLIT(#389): >"\u1234\UABCD1234"<
matched token NEWLINE(#393): >
<
matched token STRINGLIT(#389): >"/* a faked c comment */"<
matched token NEWLINE(#393): >
<
matched token NEWLINE(#393): >
<
matched token CPPCOMMENT(#387): >// pre-processor directives
<
matched token PP_HHEADER(#399): >#include <io.h><
matched token NEWLINE(#393): >
<
matched token SPACE(#391): > <
matched token PP_QHEADER(#398): ># include"r.h"<
matched token NEWLINE(#393): >
<
matched token PP_INCLUDE(#397): >#include <
matched token IDENTIFIER(#380): >SOME_HEADER<
matched token NEWLINE(#393): >
<
matched token PP_IF(#370): >#if<
matched token NEWLINE(#393): >
<
matched token PP_IFDEF(#371): >#ifdef<
matched token NEWLINE(#393): >
<
matched token PP_IFNDEF(#372): >#ifndef<
matched token NEWLINE(#393): >
<
matched token PP_ELIF(#373): >#elif<
matched token NEWLINE(#393): >
<
matched token PP_ENDIF(#374): >#endif<
matched token NEWLINE(#393): >
<
matched token PP_DEFINE(#369): >#define<
matched token NEWLINE(#393): >
<
matched token PP_UNDEF(#378): >#undef<
matched token NEWLINE(#393): >
<
matched token PP_LINE(#376): >#line<
matched token NEWLINE(#393): >
<
matched token PP_ERROR(#375): >#error<
matched token NEWLINE(#393): >
<
matched token PP_PRAGMA(#377): >#pragma<
matched token NEWLINE(#393): >
<
matched token NEWLINE(#393): >
<
matched token CCOMMENT(#386): >/* another c comment */<
matched token NEWLINE(#393): >
<

View File

@@ -0,0 +1,594 @@
matched token CCOMMENT(#386): >/* this is a c comment */<
matched token NEWLINE(#393): >
<
matched token CCOMMENT(#386): >/**/<
matched token NEWLINE(#393): >
<
matched token CCOMMENT(#386): >/***/<
matched token NEWLINE(#393): >
<
matched token CCOMMENT(#386): >/* now a multi-line c comment
this is the second line */<
matched token NEWLINE(#393): >
<
matched token CPPCOMMENT(#387): >// a c++ comment
<
matched token CPPCOMMENT(#387): >// now for all the identifiers
<
matched token ASM(#305): >asm<
matched token SPACE(#391): > <
matched token AUTO(#306): >auto<
matched token SPACE(#391): > <
matched token ASM(#305): >asm<
matched token SPACE(#391): > <
matched token AUTO(#306): >auto<
matched token SPACE(#391): > <
matched token BOOL(#307): >bool<
matched token SPACE(#391): > <
matched token BREAK(#310): >break<
matched token SPACE(#391): > <
matched token CASE(#311): >case<
matched token SPACE(#391): > <
matched token CATCH(#312): >catch<
matched token SPACE(#391): > <
matched token CHAR(#313): >char<
matched token SPACE(#391): > <
matched token CLASS(#314): >class<
matched token SPACE(#391): > <
matched token CONST(#315): >const<
matched token SPACE(#391): > <
matched token CONSTCAST(#316): >const_cast<
matched token NEWLINE(#393): >
<
matched token CONTINUE(#317): >continue<
matched token SPACE(#391): > <
matched token DEFAULT(#318): >default<
matched token SPACE(#391): > <
matched token DELETE(#320): >delete<
matched token SPACE(#391): > <
matched token DO(#321): >do<
matched token SPACE(#391): > <
matched token DOUBLE(#322): >double<
matched token SPACE(#391): > <
matched token DYNAMICCAST(#323): >dynamic_cast<
matched token SPACE(#391): > <
matched token ELSE(#324): >else<
matched token SPACE(#391): > <
matched token ENUM(#325): >enum<
matched token SPACE(#391): > <
matched token EXPLICIT(#326): >explicit<
matched token SPACE(#391): > <
matched token EXPORT(#327): >export<
matched token NEWLINE(#393): >
<
matched token EXTERN(#328): >extern<
matched token SPACE(#391): > <
matched token FALSE(#308): >false<
matched token SPACE(#391): > <
matched token FLOAT(#329): >float<
matched token SPACE(#391): > <
matched token FOR(#330): >for<
matched token SPACE(#391): > <
matched token FRIEND(#331): >friend<
matched token SPACE(#391): > <
matched token GOTO(#332): >goto<
matched token SPACE(#391): > <
matched token IF(#333): >if<
matched token SPACE(#391): > <
matched token INLINE(#334): >inline<
matched token SPACE(#391): > <
matched token INT(#335): >int<
matched token SPACE(#391): > <
matched token LONG(#336): >long<
matched token SPACE(#391): > <
matched token MUTABLE(#337): >mutable<
matched token SPACE(#391): > <
matched token NAMESPACE(#338): >namespace<
matched token SPACE(#391): > <
matched token NEW(#339): >new<
matched token NEWLINE(#393): >
<
matched token OPERATOR(#340): >operator<
matched token SPACE(#391): > <
matched token PRIVATE(#341): >private<
matched token SPACE(#391): > <
matched token PROTECTED(#342): >protected<
matched token SPACE(#391): > <
matched token PUBLIC(#343): >public<
matched token SPACE(#391): > <
matched token REGISTER(#344): >register<
matched token SPACE(#391): > <
matched token REINTERPRETCAST(#345): >reinterpret_cast<
matched token SPACE(#391): > <
matched token RETURN(#346): >return<
matched token SPACE(#391): > <
matched token SHORT(#347): >short<
matched token NEWLINE(#393): >
<
matched token SIGNED(#348): >signed<
matched token SPACE(#391): > <
matched token SIZEOF(#349): >sizeof<
matched token SPACE(#391): > <
matched token STATIC(#350): >static<
matched token SPACE(#391): > <
matched token STATICCAST(#351): >static_cast<
matched token SPACE(#391): > <
matched token STRUCT(#352): >struct<
matched token SPACE(#391): > <
matched token SWITCH(#353): >switch<
matched token SPACE(#391): > <
matched token TEMPLATE(#354): >template<
matched token SPACE(#391): > <
matched token THIS(#355): >this<
matched token SPACE(#391): > <
matched token THROW(#356): >throw<
matched token SPACE(#391): > <
matched token TRUE(#309): >true<
matched token NEWLINE(#393): >
<
matched token TRY(#357): >try<
matched token SPACE(#391): > <
matched token TYPEDEF(#358): >typedef<
matched token SPACE(#391): > <
matched token TYPEID(#359): >typeid<
matched token SPACE(#391): > <
matched token TYPENAME(#360): >typename<
matched token SPACE(#391): > <
matched token UNION(#361): >union<
matched token SPACE(#391): > <
matched token UNSIGNED(#362): >unsigned<
matched token SPACE(#391): > <
matched token USING(#363): >using<
matched token SPACE(#391): > <
matched token VIRTUAL(#364): >virtual<
matched token SPACE(#391): > <
matched token VOID(#365): >void<
matched token SPACE(#391): > <
matched token VOLATILE(#366): >volatile<
matched token NEWLINE(#393): >
<
matched token WCHART(#367): >wchar_t<
matched token SPACE(#391): > <
matched token WHILE(#368): >while<
matched token NEWLINE(#393): >
<
matched token NEWLINE(#393): >
<
matched token CPPCOMMENT(#387): >// now here's the punctuation
<
matched token LEFTBRACE(#274): >{<
matched token SPACE(#391): > <
matched token LEFTBRACE(#274): >??<<
matched token SPACE(#391): > <
matched token LEFTBRACE(#274): ><%<
matched token SPACE(#391): > <
matched token RIGHTBRACE(#293): >}<
matched token SPACE(#391): > <
matched token RIGHTBRACE(#293): >??><
matched token SPACE(#391): > <
matched token RIGHTBRACE(#293): >%><
matched token SPACE(#391): > <
matched token LEFTBRACKET(#278): >[<
matched token SPACE(#391): > <
matched token NEWLINE(#393): >
<
matched token LEFTBRACKET(#278): >??(<
matched token SPACE(#391): > <
matched token LEFTBRACKET(#278): ><:<
matched token SPACE(#391): > <
matched token RIGHTBRACKET(#295): >]<
matched token SPACE(#391): > <
matched token RIGHTBRACKET(#295): >??)<
matched token SPACE(#391): > <
matched token RIGHTBRACKET(#295): >:><
matched token SPACE(#391): > <
matched token POUND(#395): >#<
matched token NEWLINE(#393): >
<
matched token POUND(#395): >??=<
matched token SPACE(#391): > <
matched token POUND(#395): >%:<
matched token SPACE(#391): > <
matched token POUND_POUND(#394): >##<
matched token SPACE(#391): > <
matched token POUND_POUND(#394): >#??=<
matched token SPACE(#391): > <
matched token POUND_POUND(#394): >??=#<
matched token SPACE(#391): > <
matched token POUND_POUND(#394): >??=??=<
matched token SPACE(#391): > <
matched token POUND_POUND(#394): >%:%:<
matched token SPACE(#391): > <
matched token NEWLINE(#393): >
<
matched token LEFTPAREN(#277): >(<
matched token SPACE(#391): > <
matched token RIGHTPAREN(#294): >)<
matched token SPACE(#391): > <
matched token SEMICOLON(#297): >;<
matched token SPACE(#391): > <
matched token COLON(#265): >:<
matched token SPACE(#391): > <
matched token ELLIPSIS(#270): >...<
matched token SPACE(#391): > <
matched token NEWLINE(#393): >
<
matched token QUESTION_MARK(#292): >?<
matched token SPACE(#391): > <
matched token COLON_COLON(#296): >::<
matched token SPACE(#391): > <
matched token DOT(#268): >.<
matched token SPACE(#391): > <
matched token DOTSTAR(#269): >.*<
matched token SPACE(#391): > <
matched token PLUS(#287): >+<
matched token SPACE(#391): > <
matched token NEWLINE(#393): >
<
matched token MINUS(#279): >-<
matched token SPACE(#391): > <
matched token STAR(#302): >*<
matched token SPACE(#391): > <
matched token DIVIDE(#266): >/<
matched token SPACE(#391): > <
matched token PERCENT(#282): >%<
matched token SPACE(#391): > <
matched token XOR(#262): >^<
matched token SPACE(#391): > <
matched token NEWLINE(#393): >
<
matched token XOR(#262): >??'<
matched token SPACE(#391): > <
matched token XOR(#262): >xor<
matched token SPACE(#391): > <
matched token AND(#256): >&<
matched token SPACE(#391): > <
matched token AND(#256): >bitand<
matched token SPACE(#391): > <
matched token OR(#260): >|<
matched token SPACE(#391): > <
matched token OR(#260): >bitor<
matched token NEWLINE(#393): >
<
matched token OR(#260): >??!<
matched token SPACE(#391): > <
matched token COMPL(#303): >~<
matched token SPACE(#391): > <
matched token COMPL(#303): >??-<
matched token SPACE(#391): > <
matched token COMPL(#303): >compl<
matched token SPACE(#391): > <
matched token NOT(#284): >!<
matched token SPACE(#391): > <
matched token NOT(#284): >not<
matched token NEWLINE(#393): >
<
matched token ASSIGN(#258): >=<
matched token SPACE(#391): > <
matched token LESS(#275): ><<
matched token SPACE(#391): > <
matched token GREATER(#272): >><
matched token SPACE(#391): > <
matched token PLUSASSIGN(#288): >+=<
matched token SPACE(#391): > <
matched token MINUSASSIGN(#280): >-=<
matched token SPACE(#391): > <
matched token NEWLINE(#393): >
<
matched token STARASSIGN(#304): >*=<
matched token SPACE(#391): > <
matched token DIVIDEASSIGN(#267): >/=<
matched token SPACE(#391): > <
matched token PERCENTASSIGN(#283): >%=<
matched token SPACE(#391): > <
matched token XORASSIGN(#263): >^=<
matched token SPACE(#391): > <
matched token XORASSIGN(#263): >xor_eq<
matched token NEWLINE(#393): >
<
matched token XORASSIGN(#263): >??'=<
matched token SPACE(#391): > <
matched token ANDASSIGN(#259): >&=<
matched token SPACE(#391): > <
matched token ANDASSIGN(#259): >and_eq<
matched token SPACE(#391): > <
matched token ORASSIGN(#261): >|=<
matched token SPACE(#391): > <
matched token ORASSIGN(#261): >or_eq<
matched token SPACE(#391): > <
matched token ORASSIGN(#261): >??!=<
matched token SPACE(#391): > <
matched token NEWLINE(#393): >
<
matched token SHIFTLEFT(#298): ><<<
matched token SPACE(#391): > <
matched token SHIFTRIGHT(#300): >>><
matched token SPACE(#391): > <
matched token SHIFTRIGHTASSIGN(#301): >>>=<
matched token SPACE(#391): > <
matched token SHIFTLEFTASSIGN(#299): ><<=<
matched token SPACE(#391): > <
matched token EQUAL(#271): >==<
matched token SPACE(#391): > <
matched token NOTEQUAL(#285): >!=<
matched token NEWLINE(#393): >
<
matched token NEWLINE(#393): >
<
matched token CPPCOMMENT(#387): >// here's some identifiers
<
matched token IDENTIFIER(#380): >ident<
matched token NEWLINE(#393): >
<
matched token IDENTIFIER(#380): >ident1<
matched token NEWLINE(#393): >
<
matched token IDENTIFIER(#380): >ident1a<
matched token NEWLINE(#393): >
<
matched token IDENTIFIER(#380): >_ident<
matched token NEWLINE(#393): >
<
matched token IDENTIFIER(#380): >IdEnT_A<
matched token NEWLINE(#393): >
<
matched token NEWLINE(#393): >
<
matched token CPPCOMMENT(#387): >// hex ints
<
matched token INTLIT(#384): >0x123a<
matched token NEWLINE(#393): >
<
matched token INTLIT(#384): >0X123B<
matched token NEWLINE(#393): >
<
matched token NEWLINE(#393): >
<
matched token CPPCOMMENT(#387): >// octal ints
<
matched token INTLIT(#384): >0<
matched token NEWLINE(#393): >
<
matched token INTLIT(#384): >012375423<
matched token NEWLINE(#393): >
<
matched token NEWLINE(#393): >
<
matched token CPPCOMMENT(#387): >// decimal ints
<
matched token INTLIT(#384): >1<
matched token NEWLINE(#393): >
<
matched token INTLIT(#384): >1234999<
matched token NEWLINE(#393): >
<
matched token NEWLINE(#393): >
<
matched token CPPCOMMENT(#387): >// ints with suffixes
<
matched token INTLIT(#384): >123l<
matched token SPACE(#391): > <
matched token INTLIT(#384): >123L<
matched token SPACE(#391): > <
matched token INTLIT(#384): >123u<
matched token SPACE(#391): > <
matched token INTLIT(#384): >123U<
matched token NEWLINE(#393): >
<
matched token INTLIT(#384): >123ul<
matched token SPACE(#391): > <
matched token INTLIT(#384): >123uL<
matched token SPACE(#391): > <
matched token INTLIT(#384): >123Ul<
matched token SPACE(#391): > <
matched token INTLIT(#384): >123UL<
matched token NEWLINE(#393): >
<
matched token INTLIT(#384): >123lu<
matched token SPACE(#391): > <
matched token INTLIT(#384): >123Lu<
matched token SPACE(#391): > <
matched token INTLIT(#384): >123lU<
matched token SPACE(#391): > <
matched token INTLIT(#384): >123LU<
matched token NEWLINE(#393): >
<
matched token NEWLINE(#393): >
<
matched token CPPCOMMENT(#387): >// floats
<
matched token FLOATLIT(#385): >.123<
matched token NEWLINE(#393): >
<
matched token FLOATLIT(#385): >1.1<
matched token NEWLINE(#393): >
<
matched token FLOATLIT(#385): >1.<
matched token NEWLINE(#393): >
<
matched token FLOATLIT(#385): >.123e345<
matched token NEWLINE(#393): >
<
matched token FLOATLIT(#385): >1.1E+2<
matched token NEWLINE(#393): >
<
matched token FLOATLIT(#385): >1.e-5<
matched token NEWLINE(#393): >
<
matched token FLOATLIT(#385): >22e3<
matched token NEWLINE(#393): >
<
matched token FLOATLIT(#385): >33E+4<
matched token NEWLINE(#393): >
<
matched token FLOATLIT(#385): >43e-3<
matched token NEWLINE(#393): >
<
matched token FLOATLIT(#385): >.123f<
matched token SPACE(#391): > <
matched token FLOATLIT(#385): >1.1F<
matched token SPACE(#391): > <
matched token FLOATLIT(#385): >1.l<
matched token SPACE(#391): > <
matched token FLOATLIT(#385): >.123e345L<
matched token NEWLINE(#393): >
<
matched token FLOATLIT(#385): >1.1E+2fl<
matched token SPACE(#391): > <
matched token FLOATLIT(#385): >1.e-5fL<
matched token SPACE(#391): > <
matched token FLOATLIT(#385): >22e3Fl<
matched token SPACE(#391): > <
matched token FLOATLIT(#385): >33E+4FL<
matched token NEWLINE(#393): >
<
matched token FLOATLIT(#385): >43e-3lf<
matched token SPACE(#391): > <
matched token FLOATLIT(#385): >.123Lf<
matched token SPACE(#391): > <
matched token FLOATLIT(#385): >1.1lF<
matched token SPACE(#391): > <
matched token FLOATLIT(#385): >1.LF<
matched token NEWLINE(#393): >
<
matched token NEWLINE(#393): >
<
matched token CPPCOMMENT(#387): >// chars
<
matched token CHARLIT(#388): >'a'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'0'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'"'<
matched token NEWLINE(#393): >
<
matched token CHARLIT(#388): >L'a'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >L'0'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >L'"'<
matched token NEWLINE(#393): >
<
matched token CHARLIT(#388): >'\a'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'??/b'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'\f'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'??/n'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'\r'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'\t'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'\v'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'\?'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'\''<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'\"'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'\\'<
matched token NEWLINE(#393): >
<
matched token CHARLIT(#388): >'\x1'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'\xaBcD123'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'\1'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'\12'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'\123'<
matched token NEWLINE(#393): >
<
matched token CHARLIT(#388): >'\u1234'<
matched token SPACE(#391): > <
matched token CHARLIT(#388): >'\UABCD1234'<
matched token NEWLINE(#393): >
<
matched token NEWLINE(#393): >
<
matched token CPPCOMMENT(#387): >// Strings
<
matched token STRINGLIT(#389): >""<
matched token SPACE(#391): > <
matched token STRINGLIT(#389): >"a"<
matched token SPACE(#391): > <
matched token STRINGLIT(#389): >"01234"<
matched token SPACE(#391): > <
matched token STRINGLIT(#389): >"'"<
matched token NEWLINE(#393): >
<
matched token STRINGLIT(#389): >L""<
matched token SPACE(#391): > <
matched token STRINGLIT(#389): >L"a"<
matched token SPACE(#391): > <
matched token STRINGLIT(#389): >L"01234"<
matched token SPACE(#391): > <
matched token STRINGLIT(#389): >L"'"<
matched token NEWLINE(#393): >
<
matched token STRINGLIT(#389): >"\a??/b\f??/n\r\t\v\?\'\"\\"<
matched token NEWLINE(#393): >
<
matched token STRINGLIT(#389): >"\x1\xaBcD123\1\12\123"<
matched token NEWLINE(#393): >
<
matched token STRINGLIT(#389): >"\u1234\UABCD1234"<
matched token NEWLINE(#393): >
<
matched token STRINGLIT(#389): >"/* a faked c comment */"<
matched token NEWLINE(#393): >
<
matched token NEWLINE(#393): >
<
matched token CPPCOMMENT(#387): >// pre-processor directives
<
matched token PP_HHEADER(#399): >#include <io.h><
matched token NEWLINE(#393): >
<
matched token SPACE(#391): > <
matched token PP_QHEADER(#398): ># include"r.h"<
matched token NEWLINE(#393): >
<
matched token PP_INCLUDE(#397): >#include <
matched token IDENTIFIER(#380): >SOME_HEADER<
matched token NEWLINE(#393): >
<
matched token PP_IF(#370): >#if<
matched token NEWLINE(#393): >
<
matched token PP_IFDEF(#371): >#ifdef<
matched token NEWLINE(#393): >
<
matched token PP_IFNDEF(#372): >#ifndef<
matched token NEWLINE(#393): >
<
matched token PP_ELIF(#373): >#elif<
matched token NEWLINE(#393): >
<
matched token PP_ENDIF(#374): >#endif<
matched token NEWLINE(#393): >
<
matched token PP_DEFINE(#369): >#define<
matched token NEWLINE(#393): >
<
matched token PP_UNDEF(#378): >#undef<
matched token NEWLINE(#393): >
<
matched token PP_LINE(#376): >#line<
matched token NEWLINE(#393): >
<
matched token PP_ERROR(#375): >#error<
matched token NEWLINE(#393): >
<
matched token PP_PRAGMA(#377): >#pragma<
matched token NEWLINE(#393): >
<
matched token NEWLINE(#393): >
<
matched token CCOMMENT(#386): >/* another c comment */<
matched token NEWLINE(#393): >
<

View File

@@ -0,0 +1,4 @@
#!/bin/sh
SRCDIR=`dirname $0`
./test_re2c_lexer $SRCDIR/lextest.in | diff - $SRCDIR/lextest.re2c.output || exit 1
./test_slex_lexer $SRCDIR/lextest.in | diff - $SRCDIR/lextest.slex.output || exit 1

View File

@@ -0,0 +1,114 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Test the Re2C based C++ lexer
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgments.
=============================================================================*/
#include <iostream>
#include <fstream>
#include <string>
///////////////////////////////////////////////////////////////////////////////
// include required boost libraries
#include <boost/assert.hpp>
#include <boost/spirit/iterator/position_iterator.hpp>
#include "test_re2c_lexer.hpp" // config data
#include "cpplexer/cpp_token_ids.hpp"
#include "cpplexer/cpp_lex_iterator.hpp"
#include "cpplexer/re2clex/cpp_re2c_token.hpp"
#if !defined(CPP_SEPARATE_LEXER_INSTANTIATION)
#include "cpplexer/re2clex/cpp_re2c_lexer.hpp"
#endif
using namespace boost::spirit;
using namespace cpplexer;
using std::string;
using std::getline;
using std::ifstream;
using std::cout;
using std::cerr;
using std::endl;
using std::ostream;
///////////////////////////////////////////////////////////////////////////////
// main entry point
int main(int argc, char** argv)
{
if (2 != argc) {
cout << "Usage: <cpp file>" << endl;
return 1;
}
ifstream infile(argv[1]);
string teststr;
if (infile) {
string line;
for (getline(infile, line); infile.good(); getline(infile, line)) {
teststr += line;
teststr += '\n';
}
}
else {
teststr = argv[1];
}
// As you can see, the slex::lex_functor object, which is hidden behind the
// lex_iterator isn't explicit mentioned here. This allows to separate the
// compilation of the slex::lex_functor and reduces compilation time.
using namespace cpplexer::re2clex;
char const *begin = teststr.c_str();
char const *end = begin + teststr.size();
lex_iterator<lex_token<> > first (begin, end, argv[1]);
lex_iterator<lex_token<> > last;
lex_token<> current_token;
try {
while (first != last) {
current_token = *first;
// find token name
string tokenname (get_token_name(token_id(current_token)));
// output token info
cout << "matched token "
<< tokenname
<< "(#" << token_id(ID_FROM_TOKEN(current_token)) << "): "
<< ">" << current_token.get_value() << "<"
<< endl;
++first;
}
}
catch (std::exception &e) {
// use last recognized token to retrieve the error position
cerr
<< current_token.get_position().file
<< "(" << current_token.get_position().line << "): "
<< "unexpected exception: " << e.what()
<< endl;
return 1;
}
catch (...) {
// use last recognized token to retrieve the error position
cerr
<< current_token.get_position().file
<< "(" << current_token.get_position().line << "): "
<< "unexpected exception." << endl;
return 2;
}
return 0;
}

View File

@@ -0,0 +1,35 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Test the Re2C based C++ lexer
Configuration data
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#if !defined(_TEST_RE2C_LEXER_HPP__D17C10F4_25B5_4952_9B81_BB92A1B96B18__INCLUDED_)
#define _TEST_RE2C_LEXER_HPP__D17C10F4_25B5_4952_9B81_BB92A1B96B18__INCLUDED_
///////////////////////////////////////////////////////////////////////////////
//
// Use the separation model for the instantiation of the lex_functor object.
//
// If this is defined, you should explicitly instantiate the lex_functor
// template with the correct parameters in a separate compilation unit of
// your program (see the file instantiate_slex_lexer.cpp).
//
// To use the lexer inclusion model, undefine the following
//
///////////////////////////////////////////////////////////////////////////////
#define CPP_SEPARATE_LEXER_INSTANTIATION
#endif // !defined(_TEST_RE2C_LEXER_HPP__D17C10F4_25B5_4952_9B81_BB92A1B96B18__INCLUDED_)

View File

@@ -0,0 +1,114 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Test the SLex (Spirit Lex) based C++ lexer
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#include <iostream>
#include <fstream>
#include <string>
///////////////////////////////////////////////////////////////////////////////
// include required boost libraries
#include <boost/assert.hpp>
#include <boost/spirit/iterator/position_iterator.hpp>
#include "test_slex_lexer.hpp" // config data
#include "cpplexer/cpp_token_ids.hpp"
#include "cpplexer/cpp_lex_iterator.hpp"
#include "cpplexer/slex/cpp_slex_token.hpp"
#if !defined(CPP_SEPARATE_LEXER_INSTANTIATION)
#include "cpplexer/slex/cpp_slex_lexer.hpp"
#endif
using namespace boost::spirit;
using namespace cpplexer;
using std::string;
using std::getline;
using std::ifstream;
using std::cout;
using std::cerr;
using std::endl;
using std::ostream;
///////////////////////////////////////////////////////////////////////////////
// main entry point
int
main(int argc, char *argv[])
{
if (2 != argc) {
cout << "Usage: <cpp file>" << endl;
return 1;
}
ifstream infile(argv[1]);
string teststr;
if (infile) {
string line;
for (getline(infile, line); infile.good(); getline(infile, line)) {
teststr += line;
teststr += '\n';
}
}
else {
teststr = argv[1];
}
// As you can see, the slex::lex_functor object, which is hidden behind the
// lex_iterator isn't explicit mentioned here. This allows to separate the
// compilation of the slex::lex_functor and reduces compilation time.
using namespace cpplexer::slex;
lex_iterator<lex_token<string::iterator> > first (teststr.begin(), teststr.end(), argv[1]);
lex_iterator<lex_token<string::iterator> > last;
lex_token<string::iterator> current_token;
try {
while (first != last) {
// retrieve next token
current_token = *first;
// find token name
string tokenname (get_token_name(token_id(current_token)));
// output token info
cout << "matched token "
<< tokenname
<< "(#" << token_id(ID_FROM_TOKEN(current_token)) << "): "
<< ">" << current_token.get_value() << "<"
<< endl;
++first;
}
}
catch (std::exception &e) {
// use last recognized token to retrieve the error position
cerr
<< current_token.get_position().file
<< "(" << current_token.get_position().line << "): "
<< "unexpected exception: " << e.what()
<< endl;
return 1;
}
catch (...) {
// use last recognized token to retrieve the error position
cerr
<< current_token.get_position().file
<< "(" << current_token.get_position().line << "): "
<< "unexpected exception." << endl;
return 2;
}
return 0;
}

View File

@@ -0,0 +1,35 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Test the SLex (Spirit Lex) based C++ lexer
Configuration data
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#if !defined(_TEST_SLEX_LEXER_HPP__18D7FB53_7B33_48BC_A797_ACDA4DC356D6__INCLUDED_)
#define _TEST_SLEX_LEXER_HPP__18D7FB53_7B33_48BC_A797_ACDA4DC356D6__INCLUDED_
///////////////////////////////////////////////////////////////////////////////
//
// Use the separation model for the instantiation of the lex_functor object.
//
// If this is defined, you should explicitly instantiate the lex_functor
// template with the correct parameters in a separate compilation unit of
// your program (see the file instantiate_slex_lexer.cpp).
//
// To use the lexer inclusion model, undefine the following
//
///////////////////////////////////////////////////////////////////////////////
#define CPP_SEPARATE_LEXER_INSTANTIATION
#endif // !defined(_TEST_SLEX_LEXER_HPP__18D7FB53_7B33_48BC_A797_ACDA4DC356D6__INCLUDED_)

View File

@@ -0,0 +1,47 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#include "cpp.hpp" // config data
#if defined(CPP_SEPARATE_GRAMMAR_INSTANTIATION)
#include <string>
#include "cpp/cpp_grammar.hpp"
///////////////////////////////////////////////////////////////////////////////
// decide, which C++ lexer to use
#if defined(USE_SLEX_CPP_LEXER)
// use the slex based C++ lexer
#include "cpplexer/slex/cpp_slex_token.hpp"
using namespace cpplexer::slex;
#elif defined(USE_RE2C_CPP_LEXER)
// use the re2c based C++ lexer
#include "cpplexer/re2clex/cpp_re2c_token.hpp"
using namespace cpplexer::re2clex;
#endif
///////////////////////////////////////////////////////////////////////////////
//
// Explicit instantiation of the cpp_grammar_gen template with the correct
// token type. This instantiates the corresponding pt_parse function, which
// in turn instantiates the cpp_grammar object (see cpp/cpp_grammar.hpp)
//
///////////////////////////////////////////////////////////////////////////////
template cpp::cpp_grammar_gen<lex_token<std::string::iterator> >;
#endif // #if defined(CPP_SEPARATE_GRAMMAR_INSTANTIATION)

View File

@@ -0,0 +1,50 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Test the C preprocessor
Explicit instantiation of the lex_functor generation function
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#include "cpp.hpp" // config data
#if defined(CPP_SEPARATE_LEXER_INSTANTIATION) && defined(USE_RE2C_CPP_LEXER)
#include <string>
#include "cpplexer/cpp_token_ids.hpp"
#include "cpplexer/cpp_lex_iterator.hpp"
#include "cpplexer/re2clex/cpp_re2c_token.hpp"
///////////////////////////////////////////////////////////////////////////////
// The following file needs to be included only once throughout the whole
// program.
#include "cpplexer/re2clex/cpp_re2c_lexer.hpp"
///////////////////////////////////////////////////////////////////////////////
//
// This instantiates the correct 'new_lexer' function, which generates the
// C++ lexer used in this sample.
//
// This is moved into a separate compilation unit to decouple the compilation
// of the C++ lexer from the compilation of the other modules, which helps to
// reduce compilation time.
//
// The template parameter(s) supplied should be identical to the parameters
// supplied while instantiating the lex_iterator<> template (see the file
// test_slex_lexer.cpp).
//
///////////////////////////////////////////////////////////////////////////////
template cpplexer::re2clex::new_lexer_gen<std::string::iterator>;
#endif // defined(CPP_SEPARATE_LEXER_INSTANTIATION) && defined(USE_RE2C_CPP_LEXER)

View File

@@ -0,0 +1,50 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Test the C preprocessor
Explicit instantiation of the lex_functor generation function
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#include "cpp.hpp" // config data
#if defined(CPP_SEPARATE_LEXER_INSTANTIATION) && defined(USE_SLEX_CPP_LEXER)
#include <string>
#include "cpplexer/cpp_token_ids.hpp"
#include "cpplexer/cpp_lex_iterator.hpp"
#include "cpplexer/slex/cpp_slex_token.hpp"
///////////////////////////////////////////////////////////////////////////////
// The following file needs to be included only once throughout the whole
// program.
#include "cpplexer/slex/cpp_slex_lexer.hpp"
///////////////////////////////////////////////////////////////////////////////
//
// This instantiates the correct 'new_lexer' function, which generates the
// C++ lexer used in this sample.
//
// This is moved into a separate compilation unit to decouple the compilation
// of the C++ lexer from the compilation of the other modules, which helps to
// reduce compilation time.
//
// The template parameter(s) supplied should be identical to the parameters
// supplied while instantiating the lex_iterator<> template (see the file
// test_slex_lexer.cpp).
//
///////////////////////////////////////////////////////////////////////////////
template cpplexer::slex::new_lexer_gen<std::string::iterator>;
#endif // defined(CPP_SEPARATE_LEXER_INSTANTIATION) && defined(USE_SLEX_CPP_LEXER)

View File

@@ -0,0 +1,12 @@
subproject libs/spirit/example/application/cpp/test/list_includes ;
exe list_includes :
list_includes.cpp
instantiate_cpp_grammar.cpp
instantiate_slex_lexer.cpp
instantiate_re2c_lexer.cpp
: <include>../..
<sysinclude>$(BOOST_ROOT)
<lib>program_options
<lib>fs
;

View File

@@ -0,0 +1,18 @@
A Standard compliant C preprocessor
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
See Copyright.txt for full copyright notices and acknowledgements.
The 'list_includes' sample shows a simple way to use the C preprocessor
iterator to extract a list of included files from a given source file. to
get a hint, which commandline options are supported, call it with the --help
option.
Note: This sample requires the program_options library written by Vladimir Prus,
which is currently under Boost review.
It is available here: http://zigzag.cs.msu.su:7813/program_options.
The build process is straightforward. The only thing you should do
is to include the root directory of this sample into the include search
path of your compiler (usually through a -I. command line argument).

View File

@@ -0,0 +1,50 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Sample: List include dependencies of a given source file
Explicit instantiation of the cpp_grammar parsing function
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#include "list_includes.hpp" // config data
#if defined(CPP_SEPARATE_GRAMMAR_INSTANTIATION)
#include <string>
#include "cpp/cpp_grammar.hpp"
///////////////////////////////////////////////////////////////////////////////
// decide, which C++ lexer to use
#if defined(USE_SLEX_CPP_LEXER)
// use the slex based C++ lexer
#include "cpplexer/slex/cpp_slex_token.hpp"
using namespace cpplexer::slex;
#elif defined(USE_RE2C_CPP_LEXER)
// use the re2c based C++ lexer
#include "cpplexer/re2clex/cpp_re2c_token.hpp"
using namespace cpplexer::re2clex;
#endif
///////////////////////////////////////////////////////////////////////////////
//
// Explicit instantiation of the cpp_grammar_gen template with the correct
// token type. This instantiates the corresponding pt_parse function, which
// in turn instantiates the cpp_grammar object (see cpp/cpp_grammar.hpp)
//
///////////////////////////////////////////////////////////////////////////////
template cpp::cpp_grammar_gen<lex_token<std::string::iterator> >;
#endif // #if defined(CPP_SEPARATE_GRAMMAR_INSTANTIATION)

View File

@@ -0,0 +1,50 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Sample: List include dependencies of a given source file
Explicit instantiation of the lex_functor generation function
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#include "list_includes.hpp" // config data
#if defined(CPP_SEPARATE_LEXER_INSTANTIATION) && defined(USE_RE2C_CPP_LEXER)
#include <string>
#include "cpplexer/cpp_token_ids.hpp"
#include "cpplexer/cpp_lex_iterator.hpp"
#include "cpplexer/re2clex/cpp_re2c_token.hpp"
///////////////////////////////////////////////////////////////////////////////
// The following file needs to be included only once throughout the whole
// program.
#include "cpplexer/re2clex/cpp_re2c_lexer.hpp"
///////////////////////////////////////////////////////////////////////////////
//
// This instantiates the correct 'new_lexer' function, which generates the
// C++ lexer used in this sample.
//
// This is moved into a separate compilation unit to decouple the compilation
// of the C++ lexer from the compilation of the other modules, which helps to
// reduce compilation time.
//
// The template parameter(s) supplied should be identical to the parameters
// supplied while instantiating the lex_iterator<> template (see the file
// test_slex_lexer.cpp).
//
///////////////////////////////////////////////////////////////////////////////
template cpplexer::re2clex::new_lexer_gen<std::string::iterator>;
#endif // defined(CPP_SEPARATE_LEXER_INSTANTIATION) && defined(USE_RE2C_CPP_LEXER)

View File

@@ -0,0 +1,50 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Sample: List include dependencies of a given source file
Explicit instantiation of the lex_functor generation function
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#include "list_includes.hpp" // config data
#if defined(CPP_SEPARATE_LEXER_INSTANTIATION) && defined(USE_SLEX_CPP_LEXER)
#include <string>
#include "cpplexer/cpp_token_ids.hpp"
#include "cpplexer/cpp_lex_iterator.hpp"
#include "cpplexer/slex/cpp_slex_token.hpp"
///////////////////////////////////////////////////////////////////////////////
// The following file needs to be included only once throughout the whole
// program.
#include "cpplexer/slex/cpp_slex_lexer.hpp"
///////////////////////////////////////////////////////////////////////////////
//
// This instantiates the correct 'new_lexer' function, which generates the
// C++ lexer used in this sample.
//
// This is moved into a separate compilation unit to decouple the compilation
// of the C++ lexer from the compilation of the other modules, which helps to
// reduce compilation time.
//
// The template parameter(s) supplied should be identical to the parameters
// supplied while instantiating the lex_iterator<> template (see the file
// test_slex_lexer.cpp).
//
///////////////////////////////////////////////////////////////////////////////
template cpplexer::slex::new_lexer_gen<std::string::iterator>;
#endif // defined(CPP_SEPARATE_LEXER_INSTANTIATION) && defined(USE_SLEX_CPP_LEXER)

View File

@@ -0,0 +1,286 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Sample: List include dependencies of a given source file
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
///////////////////////////////////////////////////////////////////////////////
// include required boost libraries
#include <boost/assert.hpp>
#include <boost/spirit/iterator/position_iterator.hpp>
///////////////////////////////////////////////////////////////////////////////
// This sample requires the program_options library written by Vladimir Prus,
// which is currently under Boost review.
// It is available here: http://zigzag.cs.msu.su:7813/program_options
//
#include <boost/program_options.hpp>
#include "list_includes.hpp" // config data
#include "list_includes_version.hpp"
#include "cpplexer/cpp_token_ids.hpp"
#include "cpplexer/cpp_lex_iterator.hpp"
#include "cpplexer/slex/util/time_conversion_helper.hpp"
#include "cpp/cpp_context.hpp"
///////////////////////////////////////////////////////////////////////////////
// include lexer specifics, import lexer names
#if defined(USE_SLEX_CPP_LEXER)
// use the slex based C++ lexer
#include "cpplexer/slex/cpp_slex_token.hpp"
#if !defined(CPP_SEPARATE_LEXER_INSTANTIATION)
#include "cpplexer/slex/cpp_slex_lexer.hpp"
#endif // !defined(CPP_SEPARATE_LEXER_INSTANTIATION)
using namespace cpplexer::slex;
#elif defined(USE_RE2C_CPP_LEXER)
// use the re2c based C++ lexer
#include "cpplexer/re2clex/cpp_re2c_token.hpp"
#if !defined(CPP_SEPARATE_LEXER_INSTANTIATION)
#include "cpplexer/re2clex/cpp_re2c_lexer.hpp"
#endif // !defined(CPP_SEPARATE_LEXER_INSTANTIATION)
using namespace cpplexer::re2clex;
#endif
///////////////////////////////////////////////////////////////////////////////
// import required names
using namespace boost::spirit;
using std::string;
using std::vector;
using std::cout;
using std::cerr;
using std::endl;
using std::ifstream;
using std::ostream;
using std::istreambuf_iterator;
namespace po = boost::program_options;
namespace fs = boost::filesystem;
///////////////////////////////////////////////////////////////////////////////
// print the current version
int print_version()
{
// get time of last compilation of this file
util::time_conversion_helper compilation_time(__DATE__ " " __TIME__);
// calculate the number of days since Jan 29 2003
// (the day the list_includes project was started)
std::tm first_day;
std::memset (&first_day, 0, sizeof(std::tm));
first_day.tm_mon = 0; // Jan
first_day.tm_mday = 29; // 29
first_day.tm_year = 103; // 2003
long seconds = long(std::difftime(compilation_time.get_time(),
std::mktime(&first_day)));
cout
<< LIST_INCLUDES_VERSION_MAJOR << '.'
<< LIST_INCLUDES_VERSION_MINOR << '.'
<< LIST_INCLUDES_VERSION_SUBMINOR << '.'
<< seconds/(3600*24); // get number of days from seconds
return 1; // exit app
}
///////////////////////////////////////////////////////////////////////////////
//
int do_actual_work (po::options_and_arguments const opts,
po::variables_map const &vm, vector<string> const &pathes)
{
// current file position is saved for exception handling
file_position current_position;
try {
vector<string> const &arguments = opts.arguments();
vector<string>::const_iterator lastfile = arguments.end();
for (vector<string>::const_iterator file_it = arguments.begin();
file_it != lastfile; ++file_it)
{
ifstream instream((*file_it).c_str());
string instring;
if (!instream.is_open()) {
cerr << "Could not open input file: " << *file_it << endl;
continue;
}
instring = string(istreambuf_iterator<char>(instream.rdbuf()),
istreambuf_iterator<char>());
// The template lex_functor<> is defined in both namespaces:
// cpplexer::slex and cpplexer::re2clex. The 'using namespace'
// directive above tells the compiler, which of them to use.
typedef cpp::context<lex_token<std::string::iterator> > context_t;
// The C preprocessor iterator shouldn't be constructed directly. It is
// to be generated through a cpp::context<> object. This cpp:context<>
// object is additionally to be used to initialize and define different
// parameters of the actual preprocessing.
// The preprocessing of the input stream is done on the fly behind the
// scenes during iteration over the context_t::iterator_t stream.
context_t ctx (instring.begin(), instring.end(), (*file_it).c_str());
// add include directories to the include path
if (vm.count("path")) {
vector<string>::const_iterator end = pathes.end();
for (vector<string>::const_iterator cit = pathes.begin();
cit != end; ++cit)
{
ctx.add_include_path((*cit).c_str());
}
}
// analyze the actual file
context_t::iterator_t first = ctx.begin();
context_t::iterator_t last = ctx.end();
int iter_depth = 0;
cout << "Printing dependency information for: "
<< *file_it << endl;
while (first != last) {
// retrieve position of the next token
current_position = (*first).get_position();
if (iter_depth != ctx.get_iteration_depth()) {
if (iter_depth < ctx.get_iteration_depth()) {
// the get_iteration_depth() reflects the include level of
// the _next_ to be returned token
if (++first == last)
break;
current_position = (*first).get_position();
// extract the filename of the current token and print out
// the required dependency information
for (int i = 0; i <= iter_depth; ++i)
cout << " "; // indent
cout << current_position.file << endl;
}
else {
++first; // advance token stream
}
// save new iteration depth for later comparision
iter_depth = ctx.get_iteration_depth();
}
else {
++first; // advance token stream
}
}
// prepend endl before next file
cout << endl;
}
}
catch (cpp::abort_preprocess_exception &e) {
// abort the preprocessing: simply abort compilation
cerr
<< e.file_name() << "(" << e.line_no() << "): "
<< "aborting preprocessing."
<< endl;
return 1;
}
catch (cpp::cpp_exception &e) {
// some preprocessing error
cerr
<< e.file_name() << "(" << e.line_no() << "): "
<< e.description() << endl;
return 2;
}
catch (std::exception &e) {
// use last recognized token to retrieve the error position
cerr
<< current_position.file
<< "(" << current_position.line << "): "
<< "exception caught: " << e.what()
<< endl;
return 3;
}
catch (...) {
// use last recognized token to retrieve the error position
cerr
<< current_position.file
<< "(" << current_position.line << "): "
<< "unexpected exception caught." << endl;
return 4;
}
return 0;
}
///////////////////////////////////////////////////////////////////////////////
// here we go!
int
main (int argc, char const *argv[])
{
try {
// analyze the command line options and arguments
vector<string> pathes;
po::options_description desc("Usage: list_includes [options] file ...");
desc.add_options()
("help,h", "", "print out program usage")
("version,v", "", "print the version number")
("path,I", po::parameter<vector<string> >("dir", &pathes),
"specify additional include directory")
;
po::options_and_arguments opts = po::parse_command_line(argc, argv, desc);
po::variables_map vm;
po::store(opts, vm, desc);
if (vm.count("help")) {
cout << desc << "\n";
return 1;
}
if (vm.count("version")) {
return print_version();
}
// if there is no input file given, then exit
if (0 == opts.arguments().size()) {
cerr << "list_includes: No input file given. "
<< "Use --help to get a hint." << endl;
return 5;
}
// iterate over all given input files
return do_actual_work(opts, vm, pathes);
}
catch (std::exception &e) {
cout << "list_includes: exception caught: " << e.what() << endl;
return 6;
}
catch (...) {
cerr << "list_includes: unexpected exception caught." << endl;
return 7;
}
}

View File

@@ -0,0 +1,59 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Sample: List include dependencies of a given source file
Configuration data
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#if !defined(_LIST_INCLUDES_HPP__843DB412_3AA8_4BCF_8081_AA4A5FDE0BE7__INCLUDED_)
#define _LIST_INCLUDES_HPP__843DB412_3AA8_4BCF_8081_AA4A5FDE0BE7__INCLUDED_
///////////////////////////////////////////////////////////////////////////////
// decide, which C++ lexer to use (choose one!)
#define USE_SLEX_CPP_LEXER // use the SLex based C++ lexer
//#define USE_RE2C_CPP_LEXER // use the Re2C based C++ lexer
///////////////////////////////////////////////////////////////////////////////
//
// Use the separation model for the instantiation of the lex_functor object.
//
// If this is defined, you should explicitly instantiate the lex_functor
// template with the correct parameters in a separate compilation unit of
// your program (see the file instantiate_slex_lexer.cpp).
//
// To use the lexer inclusion model, undefine the following
//
#define CPP_SEPARATE_LEXER_INSTANTIATION
///////////////////////////////////////////////////////////////////////////////
// Decide, whether to use the separate compilation model for the instantiation
// of the grammar objects.
//
// If this is defined, you should explicitly instantiate the grammar
// templates with the correct parameters in a separate compilation unit of
// your program (see the files instantiate_cpp_grammar.cpp).
//
// To use the grammar inclusion model, undefine the following
//
#define CPP_SEPARATE_GRAMMAR_INSTANTIATION
///////////////////////////////////////////////////////////////////////////////
// You shouldn't have to change anything below
#if defined(BOOST_MSVC) && !defined(__COMO__)
#pragma warning (disable: 4355) // 'this' used in base member initializer list
#pragma inline_depth(255)
#pragma inline_recursion(on)
#endif // defined(_BOOST_MSVC)
#endif // !defined(_LIST_INCLUDES_HPP__843DB412_3AA8_4BCF_8081_AA4A5FDE0BE7__INCLUDED_)

View File

@@ -0,0 +1,25 @@
/*=============================================================================
A Standard compliant C++ preprocessor
Sample: List include dependencies of a given source file
Version number
Copyright (c) 2001-2003 Hartmut Kaiser
http://spirit.sourceforge.net/
Permission to copy, use, modify, sell and distribute this software
is granted provided this copyright notice appears in all copies.
This software is provided "as is" without express or implied
warranty, and with no claim as to its suitability for any purpose.
See Copyright.txt for full copyright notices and acknowledgements.
=============================================================================*/
#if !defined(_LIST_INCLUDES_VERSION_HPP__FF662D6C_C3E6_4BEC_A062_5D9BD7415EBF__INCLUDED_)
#define _LIST_INCLUDES_VERSION_HPP__FF662D6C_C3E6_4BEC_A062_5D9BD7415EBF__INCLUDED_
#define LIST_INCLUDES_VERSION_MAJOR 0
#define LIST_INCLUDES_VERSION_MINOR 1
#define LIST_INCLUDES_VERSION_SUBMINOR 0
#endif // !defined(_LIST_INCLUDES_VERSION_HPP__FF662D6C_C3E6_4BEC_A062_5D9BD7415EBF__INCLUDED_)

View File

@@ -0,0 +1,451 @@
//
// C++ Lexer implemented with Spirit (http://spirit.sourceforge.net/)
//
// Copyright© 2002 Juan Carlos Arevalo-Baeza, All rights reserved
// email: jcab@JCABs-Rumblings.com
// Created: 8-Nov-2002
//
#include "cpp_lexer.hpp"
#include <iostream>
#include <fstream>
#include <stdio.h>
#include <boost/spirit/core.hpp>
#include <boost/spirit/utility/functor_parser.hpp>
#include <boost/spirit/attribute.hpp>
#include <boost/spirit/symbols.hpp>
#include <boost/phoenix/primitives.hpp>
#include <boost/phoenix/casts.hpp>
#include <boost/phoenix/binders.hpp>
///////////////////////////////////////////////////////////////////////////////
// used namespaces
using namespace boost::spirit;
//using namespace phoenix;
//using namespace std;
using std::stringstream;
using std::string;
using std::cout;
using std::cerr;
using std::endl;
using phoenix::var;
using phoenix::val;
using phoenix::value;
using phoenix::actor;
using phoenix::arg1;
using phoenix::arg2;
using phoenix::construct_;
using phoenix::function_ptr;
///////////////////////////////////////////////////////////////////////////////
// Utility parsers for debugging and error handling.
namespace {
template <typename ErrorDescrT>
class trace_ {
public:
typedef nil_t result_t;
trace_(ErrorDescrT const& what): info(what) {}
template <typename ScannerT>
int
operator()(ScannerT const& scan, result_t& result) const {
file_position lc = (*scan).filePos;
cout << lc << "Trace: " << info() << "\n";
return 0;
}
private:
ErrorDescrT info;
};
template < class ActorT >
static
functor_parser<trace_<ActorT> >
trace_p(ActorT const& str) {
return trace_<ActorT>(str);
}
static
functor_parser<trace_<actor<value<std::string> > > >
trace_p(std::string const& str) {
return trace_<actor<value<std::string> > >(val(str));
}
static
functor_parser<trace_<actor<value<char const*> > > >
trace_p(char const* str) {
return trace_<actor<value<char const*> > >(val(str));
}
}
///////////////////////////////////////////////////////////////////////////////
// The C++ lexer grammars.
namespace {
template < typename ResultT >
struct result_closure: closure<result_closure<ResultT>, ResultT> {
member1 result_;
};
template <>
struct result_closure<nil_t> {
typedef parser_context context_t;
};
struct IDENTIFIER:
grammar<IDENTIFIER, result_closure<std::string>::context_t>
{
template < typename ScannerT >
struct definition {
typedef rule<ScannerT> rule_t;
rule_t main;
rule_t const& start() const {
return main;
}
definition(IDENTIFIER const& self) {
main = (
lexeme_d[
((alpha_p | '_' | '$') >> *(alnum_p | '_' | '$'))
[self.result_ = construct_<std::string>(arg1, arg2)]
]
);
}
};
} IDENTIFIER;
struct STRING_LITERAL:
grammar<STRING_LITERAL, result_closure<std::string>::context_t>
{
template < typename ScannerT >
struct definition {
typedef rule<ScannerT> rule_t;
rule_t main;
rule_t const& start() const {
return main;
}
definition(STRING_LITERAL const& self) {
bool is_wchar = false;
main = (
lexeme_d[
(
!(nocase_d[chlit<>('L')] [var(is_wchar) = true])
>> '\"'
>> *(str_p("\\\\") | "\\\"" | anychar_p - '\"' )
)
[self.result_ = construct_<std::string>(arg1, arg2)]
>> chlit<>('\"')
] >> *lexeme_d[
!nocase_d[chlit<>('L')] >> '\"'
>> ( *( str_p("\\\\") | "\\\"" | anychar_p - '\"' ) )
[self.result_ += construct_<std::string>(arg1, arg2)]
>> chlit<>('\"')
]
);
}
};
} STRING_LITERAL;
struct CHARACTER_LITERAL:
grammar<CHARACTER_LITERAL, result_closure<std::string>::context_t>
{
template < typename ScannerT >
struct definition {
typedef rule<ScannerT> rule_t;
rule_t main;
rule_t const& start() const {
return main;
}
definition(CHARACTER_LITERAL const& self) {
bool is_wchar = false;
main = (
lexeme_d[
(
!(nocase_d[chlit<>('L')] [var(is_wchar) = true])
>> '\''
>> +(str_p("\\\\") | "\\\'" | anychar_p - '\'' )
)
[self.result_ = construct_<std::string>(arg1, arg2)]
>> chlit<>('\'')
]
);
}
};
} CHARACTER_LITERAL;
struct INT_CONSTANT:
grammar<INT_CONSTANT, result_closure<std::string>::context_t>
{
template < typename ScannerT >
struct definition {
typedef rule<ScannerT> rule_t;
rule_t main;
rule_t const& start() const {
return main;
}
definition(INT_CONSTANT const& self) {
subrule<0> submain;
subrule<1> hex_int;
subrule<2> oct_int;
subrule<3> dec_int;
subrule<4> char_int;
subrule<5> suffix_part;
main = (
(
submain =
(hex_int | oct_int | dec_int | char_int) [
self.result_ =
construct_<std::string>(arg1, arg2)
],
hex_int =
lexeme_d[
'0' >> nocase_d[chlit<>('x')] // prefix
>> +xdigit_p // the number
>> suffix_part // suffix
],
oct_int =
lexeme_d[
'0' // prefix
>> +range<>('0', '7') // the number
>> suffix_part // suffix
],
dec_int =
lexeme_d[
+digit_p // the number
>> suffix_part // suffix
],
char_int = CHARACTER_LITERAL,
suffix_part = !nocase_d[chlit<>('l') | chlit<>('u')]
)
);
}
};
} INT_CONSTANT;
struct FLOAT_CONSTANT:
grammar<FLOAT_CONSTANT, result_closure<std::string>::context_t>
{
template < typename ScannerT >
struct definition {
typedef rule<ScannerT> rule_t;
rule_t main;
rule_t const& start() const {
return main;
}
definition(FLOAT_CONSTANT const& self) {
subrule<0> submain;
subrule<1> exponent_part;
main = (
(
submain =
lexeme_d[
(
chlit<>('.') >> +digit_p >> !exponent_part
| +digit_p >> (
(chlit<>('.') >> *digit_p)
|| exponent_part
)
) >> !nocase_d[chlit<>('l') | chlit<>('f')]
]
[
self.result_ =
construct_<std::string>(arg1, arg2)
],
exponent_part =
nocase_d[chlit<>('e')]
>> !(chlit<>('+') | chlit<>('-')) >> +digit_p
)
);
}
};
} FLOAT_CONSTANT;
}
///////////////////////////////////////////////////////////////////////////////
// TokenID type tag.
// Internal transfer. Defined in cpp_lexer_tokens.cpp.
extern boost::spirit::parser<boost::spirit::symbols<TokenID> > const& cpp_operator_p;
namespace {
struct get_file_position_parser {
file_position& filePos;
get_file_position_parser(file_position& filePos_):
filePos(filePos_)
{}
typedef nil_t result_t;
template < typename ScannerT >
int operator()(ScannerT const& scan, result_t& result) const {
filePos = scan.first.get_position();
return 0;
}
};
functor_parser<get_file_position_parser>
get_file_position_p(file_position& filePos)
{
return get_file_position_parser(filePos);
}
struct token_lexer:
grammar<token_lexer, result_closure<Token>::context_t>
{
template < typename ScannerT >
struct definition {
typedef rule<ScannerT> rule_t;
rule_t main;
rule_t const& start() const {
return main;
}
Token token;
file_position filePos;
definition(token_lexer const& self);
};
} token_lexer;
template < typename ScannerT >
token_lexer::definition<ScannerT>::definition(token_lexer const& self)
{
subrule<0> submain;
subrule<1> skip_until_eol;
subrule<2> singleline_comment;
subrule<3> multiline_comment;
subrule<4> directive;
main = (
submain =
*(blank_p | ('\\' >> eol_p))
>> get_file_position_p(filePos)
>> (
eol_p >> (
// TODO: Don't ignore directives like this.
directive [SetEOLToken (token, filePos)]
| epsilon_p [SetEOLToken (token, filePos)]
)
| singleline_comment [SetCommentToken (token, filePos)]
| multiline_comment [SetCommentToken (token, filePos)]
| CHARACTER_LITERAL [SetStringToken (token, filePos)]
| STRING_LITERAL [SetStringToken (token, filePos)]
| FLOAT_CONSTANT [SetFloatingToken (token, filePos)]
| INT_CONSTANT [SetIntegerToken (token, filePos)]
| IDENTIFIER [SetIdentifierToken(token, filePos)]
| cpp_operator_p [SetOperatorToken (token, filePos)]
)
>> epsilon_p [self.result_ = var(token)],
skip_until_eol = *(('\\' >> eol_p) | (anychar_p - eol_p)),
singleline_comment = "//" >> skip_until_eol,
multiline_comment = "/*" >> *(anychar_p - "*/") >> "*/",
directive = *blank_p >> '#' >> skip_until_eol
);
}
template < typename IteratorT >
struct lex_input_interface_iterator: lex_input_interface {
public:
typedef Token result_type;
unsigned refCount;
IteratorT first;
scanner<IteratorT> scan;
lex_input_interface_iterator(IteratorT const& first_,
IteratorT const& last_):
refCount(1),
first(first_),
scan (first, last_)
{}
virtual void add_ref() {
++refCount;
}
virtual void dec_ref() {
--refCount;
if (refCount == 0) {
delete this;
}
}
virtual Token get() {
Token result;
if (token_lexer[assign(result)].parse(scan)) {
return result;
} else {
return eof();
}
}
virtual boost::spirit::file_position get_position() {
return scan.first.get_position();
}
};
template < typename IteratorT >
lex_input_interface*
NewLexerImpl(IteratorT const& first,
IteratorT const& last,
char const* fname = "<filename>")
{
typedef position_iterator<IteratorT> Iterator;
Iterator pfirst(first, last, fname);
Iterator plast;
return new lex_input_interface_iterator<Iterator>(pfirst, plast);
}
}
Token const& lex_input_interface::eof()
{
static Token const result = Token(file_position(), "", EOF_token);
return result;
}
lex_input_interface*
NewLexer(char const* first, char const* last, char const* fname)
{
return NewLexerImpl(first, last, fname);
}
Token const lex_input_policy::eof
= Token(boost::spirit::file_position(), "", EOF_token);

View File

@@ -0,0 +1,114 @@
//
// C++ Lexer implemented with Spirit (http://spirit.sourceforge.net/)
//
// Copyright© 2002 Juan Carlos Arevalo-Baeza, All rights reserved
// email: jcab@JCABs-Rumblings.com
// Created: 8-Nov-2002
//
// The basics:
// The lexer is an iterator that iterates over tokens generated on the fly
// from a text input.
// In order to use it, you create a "lex_iterator" object, and initialize it
// with the beginning and end iterators of the text input, and a string that
// represents the name of the input file (for inclusion into the tokens).
// This purposefully doesn't export any Spirit parser definitions, so access
// to the input sequence must be done through a generic run-time interface
// (virtual functions). The multi_pass iterator adapter helps handle this
// very well.
//
#ifndef cpp_lexer_hpp_included
#define cpp_lexer_hpp_included
#include "cpp_lexer_token.hpp"
#include <boost/spirit/iterator/multi_pass.hpp>
///////////////////////////////////////////////////////////////////////////////
// Lexer proper.
struct lex_input_interface {
protected:
virtual ~lex_input_interface() {}
public:
virtual void add_ref() = 0;
virtual void dec_ref() = 0;
virtual Token get() = 0;
virtual boost::spirit::file_position get_position() = 0;
Token const& eof();
};
lex_input_interface*
NewLexer(char const* first,
char const* last,
char const* fname = "<filename>");
struct lex_input_policy {
public:
typedef Token result_type;
lex_input_interface* interf;
lex_input_policy(lex_input_policy const& other):
interf(other.interf)
{
interf->add_ref();
}
lex_input_policy& operator=(lex_input_policy const& other)
{
if (other.interf != interf) {
other.interf->add_ref();
interf->dec_ref();
interf = other.interf;
}
return *this;
}
template < typename IteratorT >
lex_input_policy(IteratorT const& first,
IteratorT const& last,
char const* fname = "<filename>"):
interf(NewLexer(first, last, fname))
{
}
~lex_input_policy() {
interf->dec_ref();
}
result_type operator()() {
return interf->get();
}
static Token const eof;
};
struct lex_iterator:
boost::spirit::multi_pass<
lex_input_policy,
boost::spirit::multi_pass_policies::functor_input
/* ,
multi_pass_policies::first_owner,
multi_pass_policies::no_check,
multi_pass_policies::std_deque
*/
>
{
typedef boost::spirit::multi_pass<
lex_input_policy,
boost::spirit::multi_pass_policies::functor_input
> base_t;
typedef lex_iterator self_t;
lex_iterator() {}
template < typename IteratorT >
lex_iterator(IteratorT const& first,
IteratorT const& last,
char const* fname = "<filename>"):
base_t(lex_input_policy(first, last, fname))
{}
};
#endif

View File

@@ -0,0 +1,146 @@
//
// C++ Lexer implemented with Spirit (http://spirit.sourceforge.net/)
//
// Copyright© 2002 Juan Carlos Arevalo-Baeza, All rights reserved
// email: jcab@JCABs-Rumblings.com
// Created: 8-Nov-2002
//
#include "cpp_lexer.hpp"
#include <boost/spirit/core.hpp>
#include <boost/spirit/utility/functor_parser.hpp>
#include <boost/phoenix/primitives.hpp>
#include <boost/phoenix/composite.hpp>
#include <boost/phoenix/functions.hpp>
#include <boost/phoenix/operators.hpp>
#include <boost/phoenix/special_ops.hpp>
#include <boost/phoenix/binders.hpp>
#include <boost/phoenix/statements.hpp>
///////////////////////////////////////////////////////////////////////////////
// used namespaces
using namespace boost::spirit;
using phoenix::var;
///////////////////////////////////////////////////////////////////////////////
struct get_file_position_parser {
boost::spirit::file_position& filePos;
get_file_position_parser(boost::spirit::file_position& filePos_):
filePos(filePos_)
{}
typedef nil_t result_t;
template < typename ScannerT >
int operator()(ScannerT const& scan, result_t& result) const {
if (scan.at_end()) {
return -1;
}
filePos = scan.first->filePos;
return 0;
}
};
boost::spirit::functor_parser<get_file_position_parser>
get_file_position_p(boost::spirit::file_position& filePos)
{
return get_file_position_parser(filePos);
}
///////////////////////////////////////////////////////////////////////////////
struct test_grammar: grammar<test_grammar> {
template < typename ScannerT >
struct definition {
typedef rule<ScannerT> rule_t;
rule_t main;
rule_t const& start() const {
return main;
}
boost::spirit::file_position filePos;
definition(test_grammar const& self) {
main =
*(
ch_p(Kwd_using)[var(std::cout) << " using "]
| ch_p(EOL_token) >> get_file_position_p(filePos)[var(std::cout) << "\n" << var(filePos)]
| anychar_p[OutToken][var(std::cout) << " "]
);
}
};
};
///////////////////////////////////////////////////////////////////////////////
// main entrypoint
int
main(int argc, char* argv[])
{
// Start grammar definition
std::cerr << "C++ Lexer implemented with Spirit ..." << std::endl;
char const* rule = "";
char const* fname = "";
// main driver code
if (3 == argc) {
std::cerr << "Using rule " << argv[2] << std::endl;
fname = argv[1];
rule = argv[2];
} else if (2 == argc) {
fname = argv[1];
} else {
std::cerr << "No filename given" << std::endl;
fname = "cpp_lexer.cpp";
std::cerr << "Parsing " << fname << std::endl;
}
FILE* f = fopen(fname, "rb");
if (!f) {
std::cerr << "Cannot open input file: " << fname << std::endl;
}
fseek(f, 0, SEEK_END);
int const size = ftell(f);
fseek(f, 0, SEEK_SET);
char *buf = new char[size];
fread(buf, 1, size, f);
fclose(f);
std::cout << "File size: " << size << " bytes\n";
Token token;
test_grammar grammar;
lex_iterator first(buf, buf+size, fname);
lex_iterator last;
parse_info<lex_iterator> result =
parse(
first, last,
grammar,
ch_p(Comment_token)
)
;
if (result.full) {
std::cerr << "\n" << fname << " Parses OK" << std::endl;
} else {
std::cerr << "\n" << fname << " Fails Parsing" << std::endl;
std::cerr << "These tokens follow:\n";
for (int i = 0; i < 10; i++)
{
if (result.stop == last) {
break;
}
PrintToken(*result.stop++);
}
}
return 0;
}

View File

@@ -0,0 +1,370 @@
//
// C++ Lexer token definitions
//
// Copyright© 2002 Juan Carlos Arevalo-Baeza, All rights reserved
// email: jcab@JCABs-Rumblings.com
// Created: 8-Nov-2002
//
#include "cpp_lexer_token.hpp"
#include <boost/spirit/symbols.hpp>
#include <map>
#include <iostream>
///////////////////////////////////////////////////////////////////////////////
// File position tools.
namespace std {
std::ostream& operator<<(std::ostream& out, boost::spirit::file_position const& lc)
{
return out <<
lc.file << ":" <<
lc.line << ":" <<
lc.column << ": ";
}
}
///////////////////////////////////////////////////////////////////////////////
// TokenID database definition.
class TokenDB: public boost::spirit::symbols<TokenID> {
public:
typedef boost::spirit::symbols<TokenID> super_t;
TokenID next;
std::map<TokenID, std::string> list;
TokenDB(TokenID next_): next(next_) {}
TokenID add(char const* text, TokenID id)
{
std::map<TokenID, std::string>::const_iterator it = list.find(id);
if (it == list.end()) {
list[id] = text;
}
super_t::add(text, id);
return id;
}
TokenID add(char const* text)
{
TokenID id = next;
next = TokenID(next + 1);
return add(text, id);
}
TokenID add(std::string const& text, TokenID id)
{
return add(text.c_str(), id);
}
TokenID add(std::string const& text)
{
return add(text.c_str());
}
TokenID* find(char const* text) const {
return boost::spirit::find(
*static_cast<super_t const*>(this),
text
);
}
TokenID* find(std::string const& text) const {
return find(text.c_str());
}
std::string const& find(TokenID id) const {
std::map<TokenID, std::string>::const_iterator it = list.find(id);
if (it == list.end()) {
static std::string const dummy;
return dummy;
} else {
return it->second;
}
}
};
namespace {
struct c_keywords_db: TokenDB {
c_keywords_db(): TokenDB(Ident_next) {
add("auto" , Kwd_auto );
add("break" , Kwd_break );
add("case" , Kwd_case );
add("char" , Kwd_char );
add("const" , Kwd_const );
add("continue" , Kwd_continue );
add("default" , Kwd_default );
add("do" , Kwd_do );
add("double" , Kwd_double );
add("else" , Kwd_else );
add("enum" , Kwd_enum );
add("extern" , Kwd_extern );
add("float" , Kwd_float );
add("for" , Kwd_for );
add("goto" , Kwd_goto );
add("if" , Kwd_if );
add("int" , Kwd_int );
add("long" , Kwd_long );
add("register" , Kwd_register );
add("return" , Kwd_return );
add("short" , Kwd_short );
add("signed" , Kwd_signed );
add("sizeof" , Kwd_sizeof );
add("static" , Kwd_static );
add("struct" , Kwd_struct );
add("switch" , Kwd_switch );
add("typedef" , Kwd_typedef );
add("union" , Kwd_union );
add("unsigned" , Kwd_unsigned );
add("void" , Kwd_void );
add("volatile" , Kwd_volatile );
add("while" , Kwd_while );
add("and_eq" , Op_BitAnd_Assign );
add("xor_eq" , Op_BitXor_Assign );
add("or_eq" , Op_BitOr_Assign );
add("and" , Op_And );
add("or" , Op_Or );
add("not_eq" , Op_NE );
add("bitand" , Op_Ampersand );
add("not" , Op_Bang );
add("compl" , Op_Compl );
add("xor" , Op_BitXor );
add("bitor" , Op_BitOr );
add("_" , Ident__ );
}
};
struct cpp_keywords_db: c_keywords_db {
cpp_keywords_db() {
add("asm" , Kwd_asm );
add("bool" , Kwd_bool );
add("catch" , Kwd_catch );
add("class" , Kwd_class );
add("const_cast" , Kwd_const_cast );
add("delete" , Kwd_delete );
add("dynamic_cast" , Kwd_dynamic_cast );
add("explicit" , Kwd_explicit );
add("export" , Kwd_export );
add("false" , Kwd_false );
add("friend" , Kwd_friend );
add("inline" , Kwd_inline );
add("mutable" , Kwd_mutable );
add("namespace" , Kwd_namespace );
add("new" , Kwd_new );
add("operator" , Kwd_operator );
add("private" , Kwd_private );
add("protected" , Kwd_protected );
add("public" , Kwd_public );
add("reinterpret_cast" , Kwd_reinterpret_cast);
add("static_cast" , Kwd_static_cast );
add("template" , Kwd_template );
add("this" , Kwd_this );
add("throw" , Kwd_throw );
add("true" , Kwd_true );
add("try" , Kwd_try );
add("typeid" , Kwd_typeid );
add("typename" , Kwd_typename );
add("using" , Kwd_using );
add("virtual" , Kwd_virtual );
add("wchar_t" , Kwd_wchar_t );
}
};
struct c_operators_db: TokenDB {
c_operators_db(): TokenDB(Op_next) {
add("...", Op_Ellipsis );
add(">>=", Op_Right_Assign );
add("<<=", Op_Left_Assign );
add("+=" , Op_Add_Assign );
add("-=" , Op_Sub_Assign );
add("*=" , Op_Mul_Assign );
add("/=" , Op_Div_Assign );
add("%=" , Op_Mod_Assign );
add("&=" , Op_BitAnd_Assign );
add("^=" , Op_BitXor_Assign );
add("|=" , Op_BitOr_Assign );
add(">>" , Op_Right );
add("<<" , Op_Left );
add("++" , Op_Inc );
add("--" , Op_Dec );
add("->" , Op_Ptr );
add("&&" , Op_And );
add("||" , Op_Or );
add("<=" , Op_LE );
add(">=" , Op_GE );
add("==" , Op_EQ );
add("!=" , Op_NE );
add(";" , Op_Semicolon );
add("{" , Op_Left_Brace );
add("}" , Op_Right_Brace );
add("<%" , Op_Left_Brace );
add("%>" , Op_Right_Brace );
add("," , Op_Comma );
add(":" , Op_Colon );
add("=" , Op_Assign );
add("(" , Op_Left_Paren );
add(")" , Op_Right_Paren );
add("[" , Op_Left_Bracket );
add("]" , Op_Right_Bracket );
add("<:" , Op_Left_Bracket );
add(":>" , Op_Right_Bracket );
add("." , Op_Dot );
add("&" , Op_Ampersand );
add("!" , Op_Bang );
add("~" , Op_Tilde );
add("-" , Op_Minus );
add("+" , Op_Plus );
add("*" , Op_Star );
add("/" , Op_Slash );
add("%" , Op_Percent );
add("<" , Op_LT );
add(">" , Op_GT );
add("^" , Op_BitXor );
add("|" , Op_BitOr );
add("?" , Op_Question );
add("@" , Op_At );
add("$" , Op_Dollar );
}
};
struct cpp_operators_db: c_operators_db {
cpp_operators_db() {
add("::" , Op_Scope );
add(".*" , Op_Member_Ref);
add("->*", Op_Member_Ptr);
}
};
struct integers_db: TokenDB {
integers_db(): TokenDB(Integer_next) {
add("0", Integer_zero);
}
};
cpp_keywords_db identifierDB;
cpp_operators_db operatorDB;
integers_db integerDB;
TokenDB floatingDB (Floating_next);
TokenDB stringDB (String_next);
}
// Internal transfer. Used in cpp_lexer.cpp.
extern
boost::spirit::parser<boost::spirit::symbols<TokenID> > const&
cpp_operator_p = operatorDB;
TokenID MakeIdentifierTokenID(std::string const& text)
{
TokenID const* kwdID;
if (kwdID = identifierDB.find(text)) {
return *kwdID;
} else {
return identifierDB.add(text);
}
}
void PrintToken_class::operator()(Token const& token) const {
struct { // Local function.
void operator()(char const* str, Token const& token) {
std::cout << str << "'" << token.text << "'\n";
}
} OutText;
std::cout << token.filePos;
switch(int(token.id & TokenTypeMask)) {
case IdentifierTokenType:
if (token.id >= Kwd_last) {
OutText("Identifier: ", token); break;
} else {
OutText("Keyword: ", token); break;
}
case OperatorTokenType : OutText("Operator: ", token); break;
case IntegerTokenType : OutText("Integer: ", token); break;
case FloatingTokenType : OutText("Floating: ", token); break;
case StringTokenType : OutText("String: ", token); break;
case EOLTokenType : std::cout << "EOL\n" ; break;
case CommentTokenType : std::cout << "Comment\n"; break;
}
}
void OutToken_class::operator()(Token const& token) const {
switch(int(token.id & TokenTypeMask)) {
case IdentifierTokenType: std::cout << token.text; break;
case OperatorTokenType : std::cout << token.text; break;
case IntegerTokenType : std::cout << token.text; break;
case FloatingTokenType : std::cout << token.text; break;
case StringTokenType :
switch (token.text[0]) {
case '"': std::cout << token.text << "\""; break;
case '\'': std::cout << token.text << "'"; break;
default:
// Must be the 'L' prefix.
switch (token.text[1]) {
case '"': std::cout << token.text << "\""; break;
case '\'': std::cout << token.text << "'"; break;
}
}
break;
case EOLTokenType : std::cout << "\n" ; break;
// case CommentTokenType : std::cout << "Comment\n"; break;
}
}
void SetOperatorToken::operator()(std::string const& text) const
{
TokenID const* kwdID;
if (kwdID = operatorDB.find(text)) {
dest = Token(filePos, text, *kwdID);
} else {
dest = Token(filePos, text, Op_unknown);
}
}
void SetOperatorToken::operator()(TokenID id) const
{
std::string const& text = operatorDB.find(id);
if (!text.empty()) {
dest = Token(filePos, text, id);
} else {
dest = Token(filePos, "<UNKNOWN>", Op_unknown);
}
}
// Create a literal integer token.
SetLiteralToken
SetIntegerToken(Token& dest, boost::spirit::file_position const& filePos)
{
return SetLiteralToken(dest, filePos, integerDB);
}
// Create a literal float token.
SetLiteralToken
SetFloatingToken(Token& dest, boost::spirit::file_position const& filePos)
{
return SetLiteralToken(dest, filePos, floatingDB);
}
// Create a literal string token.
SetLiteralToken
SetStringToken(Token& dest, boost::spirit::file_position const& filePos)
{
return SetLiteralToken(dest, filePos, stringDB);
}
void SetLiteralToken::operator()(std::string const& text) const
{
TokenID const* kwdID;
if (kwdID = db.find(text)) {
dest = Token(filePos, text, *kwdID);
} else {
TokenID id = db.add(text);
dest = Token(filePos, text, id);
}
}

View File

@@ -0,0 +1,388 @@
//
// C++ Lexer token definitions
//
// Copyright© 2002 Juan Carlos Arevalo-Baeza, All rights reserved
// email: jcab@JCABs-Rumblings.com
// Created: 8-Nov-2002
//
// The basics:
// Tokens are constructed from a string, and contain a TokenID, which is
// unique for each token, a file position and the string representation of
// the token.
//
#ifndef cpp_lexer_token_hpp_included
#define cpp_lexer_token_hpp_included
#include <boost/spirit/iterator/position_iterator.hpp>
#include <string>
#include <iosfwd>
///////////////////////////////////////////////////////////////////////////////
// File position tools.
//
// TODO: Move this to the definition within Spirit?
namespace std {
std::ostream& operator<<(std::ostream& out, boost::spirit::file_position const& lc);
}
///////////////////////////////////////////////////////////////////////////////
// TokenID database definition.
//
// The implementation of this class is private.
// It holds the relationship between unique IDs and their text representation.
class TokenDB;
///////////////////////////////////////////////////////////////////////////////
// TokenID type definitions.
//
// Token types are categories of tokens (identifiers, integers,
// strings, etc..). Newlines, comments and the end of file are
// valid tokens, too, which can (or not) be bypassed in a skipper.
enum TokenType {
IdentifierTokenType = 0x00000000,
OperatorTokenType = 0x10000000,
IntegerTokenType = 0x20000000,
FloatingTokenType = 0x30000000,
StringTokenType = 0x40000000,
UnknownTokenType = 0xC0000000,
EOLTokenType = 0xD0000000,
EOFTokenType = 0xE0000000,
CommentTokenType = 0xF0000000,
};
///////////////////////////////////////////////////////////////////////////////
// Token ID definitions.
//
// Token IDs are enough to define a unique token within the database.
// For example, identifier "x" will always have the same ID.
// So, comparing IDs for equality is enough to correctly parse
// a sequence of tokens.
enum TokenID {
Token_Force_DWORD = 0x7FFFFFFF,
TokenTypeMask = 0xF0000000,
TokenIndexMask = 0x0FFFFFFF,
// Predefined tokens
Ident_unknown = IdentifierTokenType,
Ident_first ,
Kwd_auto = Ident_first,
Kwd_break ,
Kwd_case ,
Kwd_char ,
Kwd_const ,
Kwd_continue ,
Kwd_default ,
Kwd_do ,
Kwd_double ,
Kwd_else ,
Kwd_enum ,
Kwd_extern ,
Kwd_float ,
Kwd_for ,
Kwd_goto ,
Kwd_if ,
Kwd_int ,
Kwd_long ,
Kwd_register ,
Kwd_return ,
Kwd_short ,
Kwd_signed ,
Kwd_sizeof ,
Kwd_static ,
Kwd_struct ,
Kwd_switch ,
Kwd_typedef ,
Kwd_union ,
Kwd_unsigned ,
Kwd_void ,
Kwd_volatile ,
Kwd_while ,
Kwd_asm ,
Kwd_bool ,
Kwd_catch ,
Kwd_class ,
Kwd_const_cast ,
Kwd_delete ,
Kwd_dynamic_cast,
Kwd_explicit ,
Kwd_export ,
Kwd_false ,
Kwd_friend ,
Kwd_inline ,
Kwd_mutable ,
Kwd_namespace ,
Kwd_new ,
Kwd_operator ,
Kwd_private ,
Kwd_protected ,
Kwd_public ,
Kwd_reinterpret_cast,
Kwd_static_cast ,
Kwd_template ,
Kwd_this ,
Kwd_throw ,
Kwd_true ,
Kwd_try ,
Kwd_typeid ,
Kwd_typename ,
Kwd_using ,
Kwd_virtual ,
Kwd_wchar_t ,
Kwd_last ,
Ident__ = Kwd_last, // This is the single underscore identifier.
Ident_next ,
Ident_last = OperatorTokenType-1,
Op_unknown = OperatorTokenType,
Op_first ,
Op_Ellipsis = Op_first, // ...
Op_Right_Assign , // >>=
Op_Left_Assign , // <<=
Op_Add_Assign , // +=
Op_Sub_Assign , // -=
Op_Mul_Assign , // *=
Op_Div_Assign , // /=
Op_Mod_Assign , // %=
Op_BitAnd_Assign, // &=
Op_BitXor_Assign, // ^=
Op_BitOr_Assign , // |=
Op_Right , // >>
Op_Left , // <<
Op_Inc , // ++
Op_Dec , // --
Op_Ptr , // ->
Op_And , // &&
Op_Or , // ||
Op_LE , // <=
Op_GE , // >=
Op_EQ , // ==
Op_NE , // !=
Op_Semicolon , // ;
Op_Left_Brace , // {
Op_Right_Brace , // }
Op_Comma , // ,
Op_Colon , // :
Op_Assign , // =
Op_Left_Paren , // (
Op_Right_Paren , // )
Op_Left_Bracket , // [
Op_Right_Bracket, // ]
Op_Dot , // .
Op_Ampersand , // &
Op_Bang , // !
Op_Compl , // ~ (for complement)
Op_Tilde , // ~ (for destructors)
Op_Minus , // -
Op_Plus , // +
Op_Star , // *
Op_Slash , // /
Op_Percent , // %
Op_LT , // <
Op_GT , // >
Op_BitXor , // ^
Op_BitOr , // |
Op_Question , // ?
Op_Scope , // ::
Op_Member_Ref , // .*
Op_Member_Ptr , // ->*
Op_At , // @
Op_Dollar , // $
Op_next ,
Integer_unknown = IntegerTokenType,
Integer_first ,
Integer_zero = Integer_first,
Integer_next ,
Floating_unknown = FloatingTokenType,
Floating_first ,
Floating_next = Floating_first,
String_unknown = StringTokenType,
String_first ,
String_next = String_first,
Unknown_token = UnknownTokenType,
EOL_token = EOLTokenType,
EOF_token = EOFTokenType,
Comment_token = CommentTokenType,
};
// Identifier construction function.
// TODO: Add others (integer, float...).
// Used to initialize the database with extra specific tokens
// that are handled differently by the parser.
TokenID MakeIdentifierTokenID(std::string const& text);
///////////////////////////////////////////////////////////////////////////////
// Token structure definitions.
//
// A token structure holds a token ID (see above) and also information about
// where and how it was found in the source, which aids in doing better
// error reporting.
struct Token {
boost::spirit::file_position filePos; // Where in which file the token was found.
std::string text; // The actual text in the file.
TokenID id; // The ID of this token.
Token() {}
Token(boost::spirit::file_position const& filePos_,
std::string const& text_,
TokenID id_):
filePos(filePos_),
text (text_),
id (id_)
{}
// Conversion to ID.
operator TokenID() const {
return id;
}
};
// Token comparison functions.
inline bool operator==(Token const& t1, Token const& t2)
{ return t1.id == t2.id && t1.text == t2.text; }
inline bool operator!=(Token const& t1, Token const& t2)
{ return t1.id != t2.id || t1.text != t2.text; }
inline bool operator==(Token const& t1, TokenID t2)
{ return t1.id == t2; }
inline bool operator!=(Token const& t1, TokenID t2)
{ return t1.id != t2; }
inline bool operator==(TokenID t1, Token const& t2)
{ return t1 == t2.id; }
inline bool operator!=(TokenID t1, Token const& t2)
{ return t1 != t2.id; }
// Simple actions that can be used in Spirit parsers.
// For examples of usage, look into cpp_lexer.cpp
// Used for testing.
struct PrintToken_class {
void operator()(Token const& token) const;
};
PrintToken_class const PrintToken = PrintToken_class();
// Used for testing.
struct OutToken_class {
void operator()(Token const& token) const;
};
OutToken_class const OutToken = OutToken_class();
// Create an identifier token.
struct SetIdentifierToken {
Token& dest;
boost::spirit::file_position const& filePos;
SetIdentifierToken(Token& dest_, boost::spirit::file_position const& filePos_):
dest (dest_),
filePos(filePos_)
{}
void operator()(std::string const& text) const {
dest = Token(filePos, text, MakeIdentifierTokenID(text));
}
};
// Create an operator (symbol) token.
struct SetOperatorToken {
Token& dest;
boost::spirit::file_position const& filePos;
SetOperatorToken(Token& dest_, boost::spirit::file_position const& filePos_):
dest (dest_),
filePos(filePos_)
{}
void operator()(std::string const& text) const;
void operator()(TokenID id) const;
};
// Create a literal token.
struct SetLiteralToken {
Token& dest;
boost::spirit::file_position const& filePos;
TokenDB& db;
SetLiteralToken(Token& dest_,
boost::spirit::file_position const& filePos_,
TokenDB& db_):
dest (dest_),
filePos(filePos_),
db (db_)
{}
void operator()(std::string const& text) const;
};
// Create a literal integer token.
SetLiteralToken
SetIntegerToken(Token& dest, boost::spirit::file_position const& filePos);
// Create a literal float token.
SetLiteralToken
SetFloatingToken(Token& dest, boost::spirit::file_position const& filePos);
// Create a literal string token.
SetLiteralToken
SetStringToken(Token& dest, boost::spirit::file_position const& filePos);
// Create a special token.
struct SetSpecialToken {
Token& dest;
boost::spirit::file_position const& filePos;
TokenID id;
SetSpecialToken(Token& dest_,
boost::spirit::file_position const& filePos_,
TokenID id_):
dest (dest_),
filePos(filePos_),
id (id_)
{}
void operator()(std::string const& text) const {
dest = Token(filePos, text, id);
}
template < typename IteratorT >
void operator()(IteratorT const& first, IteratorT const& last) const {
dest = Token(filePos, std::string(first, last), id);
}
};
// Create a special newline token.
inline
SetSpecialToken
SetEOLToken(Token& dest, boost::spirit::file_position const& filePos)
{
return SetSpecialToken(dest, filePos, EOL_token);
}
// Create a special end-of-file token.
inline
SetSpecialToken
SetEOFToken(Token& dest, boost::spirit::file_position const& filePos)
{
return SetSpecialToken(dest, filePos, EOF_token);
}
// Create a special comment token.
inline
SetSpecialToken
SetCommentToken(Token& dest, boost::spirit::file_position const& filePos)
{
return SetSpecialToken(dest, filePos, Comment_token);
}
#endif

View File

@@ -0,0 +1,19 @@
# CC = C:/cygwin/usr/local/bin/g++ -O2 -ftemplate-depth-500
# LD = C:/cygwin/usr/local/bin/g++ -O2 -ftemplate-depth-500
CC = g++ -O2 -ftemplate-depth-500
LD = g++ -O2 -ftemplate-depth-500
default: cpp_lexer_test.exe
cpp_lexer_test.exe: cpp_lexer_test.o cpp_lexer_token.o cpp_lexer.o
$(LD) -o $@ $^
cpp_lexer.o : cpp_lexer.cpp cpp_lexer.hpp
cpp_lexer_token.o : cpp_lexer_token.cpp cpp_lexer_token.hpp
cpp_lexer_test.o : cpp_lexer_test.cpp cpp_lexer.hpp cpp_lexer_token.hpp
%.o: %.cpp
$(CC) -o $@ -c $<

View File

@@ -0,0 +1,11 @@
EXTRA_PROGRAMS = cpp_to_html
all: $(EXTRA_PROGRAMS)
cpp_to_html_SOURCES = cpp_to_html.cpp
INCLUDES = -I$(top_srcdir)
CLEANFILES = $(EXTRA_PROGRAMS)
EXTRA_DIST = readme.txt style.css

View File

@@ -0,0 +1,252 @@
///////////////////////////////////////////////////////////////////////////////
//
// C++ Source to HTML converter
//
// [ JDG 9/01/2002 ]
//
///////////////////////////////////////////////////////////////////////////////
#include <boost/spirit/core.hpp>
#include <boost/spirit/symbols/symbols.hpp>
#include <boost/spirit/utility/chset.hpp>
#include <boost/spirit/utility/escape_char.hpp>
#include <boost/spirit/utility/confix.hpp>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
///////////////////////////////////////////////////////////////////////////////
using namespace std;
using namespace boost::spirit;
///////////////////////////////////////////////////////////////////////////////
//
// Semantic actions
//
///////////////////////////////////////////////////////////////////////////////
template <typename CharT>
void print_char(CharT ch, ostream& out)
{
switch (ch)
{
case '<': out << "&lt;"; break;
case '>': out << "&gt;"; break;
case '&': out << "&amp;"; break;
case '"': out << "&quot;"; break;
default: out << ch; break;
}
}
///////////////////////////////////////
struct process
{
process(char const* name_, ostream& out_)
: name(name_), out(out_) {}
template <typename IteratorT>
void operator()(IteratorT first, IteratorT last) const
{
out << "<span class=" << name << ">";
while (first != last)
print_char(*first++, out);
out << "</span>";
}
char const* name;
ostream& out;
};
///////////////////////////////////////
struct unexpected_char
{
unexpected_char(ostream& out_)
: out(out_) {}
template <typename CharT>
void operator()(CharT) const
{
out << '#'; // print out an unexpected character
}
ostream& out;
};
///////////////////////////////////////////////////////////////////////////////
//
// Our C++ to HTML grammar
//
///////////////////////////////////////////////////////////////////////////////
struct cpp_to_html : public grammar<cpp_to_html>
{
cpp_to_html(ostream& out_)
: out(out_) {}
template <typename ScannerT>
struct definition
{
definition(cpp_to_html const& self)
{
program
=
*( preprocessor [process("preprocessor", self.out)]
| comment [process("comment", self.out)]
| keyword [process("keyword", self.out)]
| identifier [process("identifier", self.out)]
| special [process("special", self.out)]
| string_ [process("string", self.out)]
| literal [process("literal", self.out)]
| number [process("number", self.out)]
| anychar_p [unexpected_char(self.out)]
)
;
preprocessor
= lexeme_d['#' >> ((alpha_p | '_') >> *(alnum_p | '_'))]
;
comment
= +lexeme_d[comment_p("//") | comment_p("/*", "*/")]
;
keyword
= lexeme_d[keyword_ >> (eps_p - (alnum_p | '_'))];
; // make sure we recognize whole words only
keyword_
= "and_eq", "and", "asm", "auto", "bitand", "bitor",
"bool", "break", "case", "catch", "char", "class",
"compl", "const_cast", "const", "continue", "default",
"delete", "do", "double", "dynamic_cast", "else",
"enum", "explicit", "export", "extern", "false",
"float", "for", "friend", "goto", "if", "inline",
"int", "long", "mutable", "namespace", "new", "not_eq",
"not", "operator", "or_eq", "or", "private",
"protected", "public", "register", "reinterpret_cast",
"return", "short", "signed", "sizeof", "static",
"static_cast", "struct", "switch", "template", "this",
"throw", "true", "try", "typedef", "typeid",
"typename", "union", "unsigned", "using", "virtual",
"void", "volatile", "wchar_t", "while", "xor_eq", "xor"
;
special
= lexeme_d[(+chset_p("~!%^&*()+={[}]:;,<.>?/|\\-"))]
;
string_
= !nocase_d['l'] >> confix_p('"', *c_escape_ch_p, '"')
;
literal
= !nocase_d['l'] >> confix_p('\'', *c_escape_ch_p, '\'')
;
number
= ( real_p
| nocase_d["0x"] >> hex_p
| '0' >> oct_p
)
>> *nocase_d[chset_p("ldfu")]
;
identifier
= lexeme_d[((alpha_p | '_') >> *(alnum_p | '_'))]
;
}
rule<ScannerT> program, preprocessor, comment, special,
string_, literal, number, identifier, keyword;
symbols<> keyword_;
rule<ScannerT> const&
start() const { return program; }
};
ostream& out;
};
///////////////////////////////////////////////////////////////////////////////
//
// Parse a file
//
///////////////////////////////////////////////////////////////////////////////
static void
parse(char const* filename)
{
ifstream in(filename);
if (!in)
{
cerr << "Could not open input file: " << filename << endl;
return;
}
string out_name(filename);
out_name += ".html";
ofstream out(out_name.c_str());
if (!out)
{
cerr << "Could not open output file: " << out_name << endl;
return;
}
in.unsetf(ios::skipws); // Turn of white space skipping on the stream
vector<char> vec;
std::copy(
istream_iterator<char>(in),
istream_iterator<char>(),
std::back_inserter(vec));
out << "<html>\n" << "<head>\n";
out << "<!-- Generated by the Spirit (http://spirit.sf.net) ";
out << "C++ to HTML Converter -->\n";
out << "<title>\n";
out << out_name << "</title>\n";
out << "<link rel=\"stylesheet\" href=\"style.css\" type=\"text/css\">";
out << "</head>\n";
out << "<body>\n" << "<pre>\n";
vector<char>::const_iterator first = vec.begin();
vector<char>::const_iterator last = vec.end();
cpp_to_html p(out);
parse_info<vector<char>::const_iterator> info =
parse(first, last, p, space_p);
if (!info.full)
{
cerr << "---PARSING FAILURE---\n";
cerr << string(info.stop, last);
}
out << "</pre>\n" << "</body>\n" << "</html>\n";
}
///////////////////////////////////////////////////////////////////////////////
//
// Main program
//
///////////////////////////////////////////////////////////////////////////////
int
main(int argc, char* argv[])
{
if (argc > 1)
{
for (int i = 1; i < argc; ++i)
{
cout << argv[i] << endl;
parse(argv[i]);
}
}
else
{
cerr << "---NO FILENAME GIVEN---" << endl;
}
return 0;
}

View File

@@ -0,0 +1,260 @@
///////////////////////////////////////////////////////////////////////////////
//
// C++ Source to HTML converter (VC6/7 version)
//
// [ JDG 10/31/2002 ]
//
///////////////////////////////////////////////////////////////////////////////
#include <boost/spirit/core.hpp>
#include <boost/spirit/symbols/symbols.hpp>
#include <boost/spirit/utility/escape_char.hpp>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
///////////////////////////////////////////////////////////////////////////////
using namespace std;
using namespace boost::spirit;
///////////////////////////////////////////////////////////////////////////////
//
// Semantic actions
//
///////////////////////////////////////////////////////////////////////////////
template <typename CharT>
void print_char(CharT ch, ostream& out)
{
switch (ch)
{
case '<': out << "&lt;"; break;
case '>': out << "&gt;"; break;
case '&': out << "&amp;"; break;
case '"': out << "&quot;"; break;
default: out << ch; break;
}
}
///////////////////////////////////////
struct process
{
process(char const* name_, ostream& out_)
: name(name_), out(out_) {}
template <typename IteratorT>
void operator()(IteratorT first, IteratorT last) const
{
out << "<span class=" << name << ">";
while (first != last)
print_char(*first++, out);
out << "</span>";
}
char const* name;
ostream& out;
};
///////////////////////////////////////
struct unexpected_char
{
unexpected_char(ostream& out_)
: out(out_) {}
template <typename CharT>
void operator()(CharT) const
{
out << '#'; // print out an unexpected character
}
ostream& out;
};
///////////////////////////////////////////////////////////////////////////////
//
// Our C++ to HTML grammar
//
///////////////////////////////////////////////////////////////////////////////
struct cpp_to_html : public grammar<cpp_to_html>
{
cpp_to_html(ostream& out_)
: out(out_) {}
template <typename ScannerT>
struct definition
{
definition(cpp_to_html const& self)
{
program
=
*( preprocessor [process("preprocessor", self.out)]
| comment [process("comment", self.out)]
| keyword [process("keyword", self.out)]
| identifier [process("identifier", self.out)]
| special [process("special", self.out)]
| string_ [process("string", self.out)]
| literal [process("literal", self.out)]
| number [process("number", self.out)]
| anychar_p [unexpected_char(self.out)]
)
;
preprocessor
= lexeme_d['#' >> ((alpha_p | '_') >> *(alnum_p | '_'))]
;
comment
= +lexeme_d
[
("//" >> *(anychar_p - eol_p) >> eol_p)
| ("/*" >> *(anychar_p - "*/") >> "*/")
]
;
keyword
= lexeme_d[keyword_ >> (eps_p - (alnum_p | '_'))];
; // make sure we recognize whole words only
keyword_
= "and_eq", "and", "asm", "auto", "bitand", "bitor",
"bool", "break", "case", "catch", "char", "class",
"compl", "const_cast", "const", "continue", "default",
"delete", "do", "double", "dynamic_cast", "else",
"enum", "explicit", "export", "extern", "false",
"float", "for", "friend", "goto", "if", "inline",
"int", "long", "mutable", "namespace", "new", "not_eq",
"not", "operator", "or_eq", "or", "private",
"protected", "public", "register", "reinterpret_cast",
"return", "short", "signed", "sizeof", "static",
"static_cast", "struct", "switch", "template", "this",
"throw", "true", "try", "typedef", "typeid",
"typename", "union", "unsigned", "using", "virtual",
"void", "volatile", "wchar_t", "while", "xor_eq", "xor"
;
special
= lexeme_d
[
ch_p('~') | "!" | "%" | "^" | "&" | "*"
| "(" | ")" | "+" | "=" | "{" | "["
| "}" | "]" | ":" | ";" | "," | "<"
| "." | ">" | "?" | "/" | "|" | "\\" | "-"
]
;
string_
= !nocase_d['l'] >> '"' >> *(c_escape_ch_p - '"') >> '"';
;
literal
= !nocase_d['l'] >> '\'' >> *(c_escape_ch_p - '\'') >> '\'';
;
number
= ( real_p
| nocase_d["0x"] >> hex_p
| '0' >> oct_p
)
>> *nocase_d[ch_p('l') | 'd' | 'f' | 'u']
;
identifier
= lexeme_d[((alpha_p | '_') >> *(alnum_p | '_'))]
;
}
rule<ScannerT> program, preprocessor, comment, special,
string_, literal, number, identifier, keyword;
symbols<> keyword_;
rule<ScannerT> const&
start() const { return program; }
};
ostream& out;
};
///////////////////////////////////////////////////////////////////////////////
//
// Parse a file
//
///////////////////////////////////////////////////////////////////////////////
static void
parse(char const* filename)
{
ifstream in(filename);
if (!in)
{
cerr << "Could not open input file: " << filename << endl;
return;
}
string out_name(filename);
out_name += ".html";
ofstream out(out_name.c_str());
if (!out)
{
cerr << "Could not open output file: " << out_name << endl;
return;
}
in.unsetf(ios::skipws); // Turn of white space skipping on the stream
vector<char> vec;
std::copy(
istream_iterator<char>(in),
istream_iterator<char>(),
std::back_inserter(vec));
out << "<html>\n" << "<head>\n";
out << "<!-- Generated by the Spirit (http://spirit.sf.net) ";
out << "C++ to HTML Converter -->\n";
out << "<title>\n";
out << out_name << "</title>\n";
out << "<link rel=\"stylesheet\" href=\"style.css\" type=\"text/css\">";
out << "</head>\n";
out << "<body>\n" << "<pre>\n";
vector<char>::const_iterator first = vec.begin();
vector<char>::const_iterator last = vec.end();
cpp_to_html p(out);
parse_info<vector<char>::const_iterator> info =
parse(first, last, p, space_p);
if (!info.full)
{
cerr << "---PARSING FAILURE---\n";
cerr << string(info.stop, last);
}
out << "</pre>\n" << "</body>\n" << "</html>\n";
}
///////////////////////////////////////////////////////////////////////////////
//
// Main program
//
///////////////////////////////////////////////////////////////////////////////
int
main(int argc, char* argv[])
{
if (argc > 1)
{
for (int i = 1; i < argc; ++i)
{
cout << argv[i] << endl;
parse(argv[i]);
}
}
else
{
cerr << "---NO FILENAME GIVEN---" << endl;
}
return 0;
}

View File

@@ -0,0 +1,23 @@
C++ to HTML converter
This sample stylizes C++ code using HTML. The generated HTML file uses CSS
classes to allow flexibility and easy tweaking of color/font schemes (see
the sample CSS file "style.css"). Try compiling "cpp_to_html.cpp" and make
the compiled application parse its source ("cpp_to_html.cpp"). The
generated HTML file will be "cpp_to_html.cpp.html". You can then view the
generated html. Make sure that the "style.css" is in the same directory as
the generated html file. or tweak the html source to suit.
The application can accept multiple files from the command line. Input
files are assumed to be valid C++ files otherwise parsing will be
terminated as soon as an invalid syntax is encountered.
Cheers,
--Joel de Guzman
As an aside - this example parses C#, Java and JScript just as well as
C/C++ due to their nearly identical lexical structure. --Carl Daniel

View File

@@ -0,0 +1,8 @@
pre { BORDER-RIGHT: gray 1pt solid; PADDING-RIGHT: 2pt; BORDER-TOP: gray 1pt solid; DISPLAY: block; PADDING-LEFT: 2pt; PADDING-BOTTOM: 2pt; MARGIN-LEFT: 32pt; BORDER-LEFT: gray 1pt solid; MARGIN-RIGHT: 32pt; PADDING-TOP: 2pt; BORDER-BOTTOM: gray 1pt solid; FONT-FAMILY: "Courier New", Courier, mono; background-color: #EEEEEE; font-size: small}
.keyword { color: #000099}
.identifier { }
.comment { font-style: italic; color: #990000}
.special { color: #800040}
.preprocessor { color: #3F007F}
.string { font-style: italic; color: #666666}
.literal { font-style: italic; color: #666666}

View File

@@ -0,0 +1,15 @@
SUBDIRS = test_files
EXTRA_PROGRAMS = sg_pascal
all: $(EXTRA_PROGRAMS)
sg_pascal_SOURCES = sg_pascal.cpp
INCLUDES = -I$(top_srcdir)
CLEANFILES = $(EXTRA_PROGRAMS)
EXTRA_DIST = \
readme.txt

View File

@@ -0,0 +1,18 @@
This is the code from Hakki Dogusan.
The Pascal parser demonstrates many features of the Spirit Parser.
The code is fully working and is able to parse all the test files
included.
Some of the files have deliberate syntax errors: t4.pas and t7.pas
Pascal Parser Grammar for Spirit (http://spirit.sourceforge.net/)
Adapted from,
Pascal User Manual And Report (Second Edition-1978)
Kathleen Jensen - Niklaus Wirth
Written by: Hakki Dogusan dogusanh@tr.net
Adapted by Joel de Guzman djowel@gmx.co.uk
ported to Spirit v1.5 [ JDG 9/16/2002 ]

View File

@@ -0,0 +1,940 @@
///////////////////////////////////////////////////////////////////////////////
//
// Pascal Parser Grammar for Spirit (http://spirit.sourceforge.net/)
//
// Adapted from,
// Pascal User Manual And Report (Second Edition-1978)
// Kathleen Jensen - Niklaus Wirth
//
// Written by: Hakki Dogusan dogusanh@tr.net
// Adapted by Joel de Guzman djowel@gmx.co.uk
// ported to Spirit v1.5 [ JDG 9/16/2002 ]
//
///////////////////////////////////////////////////////////////////////////////
//#define BOOST_SPIRIT_DEBUG ///$$$ DEFINE THIS WHEN DEBUGGING $$$///
#include <boost/spirit/core.hpp>
#include <boost/spirit/symbols/symbols.hpp>
#include <fstream>
#include <iostream>
#include <vector>
///////////////////////////////////////////////////////////////////////////////
using namespace std;
using namespace boost::spirit;
///////////////////////////////////////////////////////////////////////////////
//
// Our Pascal grammar
//
///////////////////////////////////////////////////////////////////////////////
struct pascal_grammar : public grammar<pascal_grammar>
{
pascal_grammar() {}
template <typename ScannerT>
struct definition
{
definition(pascal_grammar const& /*self*/)
{
#ifdef BOOST_SPIRIT_DEBUG
debug(); // define the debug names
#endif
//-----------------------------------------------------------------
// KEYWORDS
//-----------------------------------------------------------------
keywords =
"in", "div", "mod", "and", "or", "not", "nil", "goto",
"if", "then", "else", "case", "while", "repeat", "until",
"for", "do", "to", "downto", "with", "program", "label",
"const", "type", "char", "boolean", "integer", "real",
"packed", "array", "of", "record", "end", "set", "file",
"var", "procedure", "function", "begin";
//-----------------------------------------------------------------
// OPERATORS
//-----------------------------------------------------------------
chlit<> PLUS('+');
chlit<> MINUS('-');
chlit<> STAR('*');
chlit<> SLASH('/');
strlit<> ASSIGN(":=");
chlit<> COMMA(',');
chlit<> SEMI(';');
chlit<> COLON(':');
chlit<> EQUAL('=');
strlit<> NOT_EQUAL("<>");
chlit<> LT('<');
strlit<> LE("<=");
strlit<> GE(">=");
chlit<> GT('>');
chlit<> LPAREN('(');
chlit<> RPAREN(')');
chlit<> LBRACK('[');
chlit<> RBRACK(']');
chlit<> POINTER('^');
chlit<> DOT('.');
strlit<> DOTDOT("..");
//-----------------------------------------------------------------
// TOKENS
//-----------------------------------------------------------------
typedef inhibit_case<strlit<> > token_t;
token_t IN_ = nocase_d["in"];
token_t DIV = nocase_d["div"];
token_t MOD = nocase_d["mod"];
token_t AND = nocase_d["and"];
token_t OR = nocase_d["or"];
token_t NOT = nocase_d["not"];
token_t NIL = nocase_d["nil"];
token_t GOTO = nocase_d["goto"];
token_t IF = nocase_d["if"];
token_t THEN = nocase_d["then"];
token_t ELSE = nocase_d["else"];
token_t CASE = nocase_d["case"];
token_t WHILE = nocase_d["while"];
token_t REPEAT = nocase_d["repeat"];
token_t UNTIL = nocase_d["until"];
token_t FOR = nocase_d["for"];
token_t DO = nocase_d["do"];
token_t TO = nocase_d["to"];
token_t DOWNTO = nocase_d["downto"];
token_t WITH = nocase_d["with"];
token_t PROGRAM = nocase_d["program"];
token_t LABEL = nocase_d["label"];
token_t CONST_ = nocase_d["const"];
token_t TYPE = nocase_d["type"];
token_t CHAR = nocase_d["char"];
token_t BOOLEAN = nocase_d["boolean"];
token_t INTEGER = nocase_d["integer"];
token_t REAL = nocase_d["real"];
token_t PACKED_ = nocase_d["packed"];
token_t ARRAY = nocase_d["array"];
token_t OF = nocase_d["of"];
token_t RECORD = nocase_d["record"];
token_t END = nocase_d["end"];
token_t SET = nocase_d["set"];
token_t FILE = nocase_d["file"];
token_t VAR = nocase_d["var"];
token_t PROCEDURE = nocase_d["procedure"];
token_t FUNCTION = nocase_d["function"];
token_t BEGIN = nocase_d["begin"];
//-----------------------------------------------------------------
// Start grammar definition
//-----------------------------------------------------------------
identifier
= nocase_d[
lexeme_d[
(alpha_p >> *(alnum_p | '_'))
- (keywords >> anychar_p - (alnum_p | '_'))
]
];
string_literal
= lexeme_d[ chlit<>('\'') >>
+( strlit<>("\'\'") | anychar_p-chlit<>('\'') ) >>
chlit<>('\'') ];
program
= programHeading >>
block >>
DOT
;
programHeading
= PROGRAM >> identifier >>
LPAREN >> fileIdentifier
>> *( COMMA >> fileIdentifier ) >> RPAREN >>
SEMI
;
fileIdentifier
= identifier.alias()
;
block
= *( labelDeclarationPart
| constantDefinitionPart
| typeDefinitionPart
| variableDeclarationPart
| procedureAndFunctionDeclarationPart
) >>
statementPart
;
labelDeclarationPart
= LABEL >> label >> *( COMMA >> label ) >> SEMI
;
label
= unsignedInteger.alias()
;
constantDefinitionPart
= CONST_ >> constantDefinition
>> *( SEMI >> constantDefinition ) >> SEMI
;
constantDefinition
= identifier >> EQUAL >> constant
;
constant
= unsignedNumber
| sign >> unsignedNumber
| constantIdentifier
| sign >> constantIdentifier
| string_literal
;
unsignedNumber
= lexeme_d[uint_p
>> !('.' >> uint_p)
>> !(nocase_d['e'] >> (ch_p('+') | '-') >> uint_p)]
;
unsignedInteger
= uint_p
;
unsignedReal
= ureal_p
;
sign
= PLUS | MINUS
;
constantIdentifier
= identifier.alias()
;
typeDefinitionPart
= TYPE >> typeDefinition
>> *( SEMI >> typeDefinition ) >> SEMI
;
typeDefinition
= identifier >> EQUAL >> type
;
type
= simpleType
| structuredType
| pointerType
;
simpleType
= scalarType
| subrangeType
| typeIdentifier
;
scalarType
= LPAREN >> identifier
>> *( COMMA >> identifier ) >> RPAREN
;
subrangeType
= constant >> DOTDOT >> constant
;
typeIdentifier
= identifier
| CHAR
| BOOLEAN
| INTEGER
| REAL
;
structuredType
= ( PACKED_
| empty
) >>
unpackedStructuredType
;
unpackedStructuredType
= arrayType
| recordType
| setType
| fileType
;
arrayType
= ARRAY >> LBRACK >> indexType
>> *( COMMA >> indexType ) >> RBRACK >> OF >>
componentType
;
indexType
= simpleType.alias()
;
componentType
= type.alias()
;
recordType
= RECORD >> fieldList >> END
;
fieldList
= fixedPart >>
( SEMI >> variantPart
| empty
)
| variantPart
;
fixedPart
= recordSection >> *( SEMI >> recordSection )
;
recordSection
= fieldIdentifier
>> *( COMMA >> fieldIdentifier ) >> COLON >> type
| empty
;
variantPart
= CASE >> tagField >> typeIdentifier >> OF >>
variant >> *( SEMI >> variant )
;
tagField
= fieldIdentifier >> COLON
| empty
;
variant
= caseLabelList >> COLON >> LPAREN >> fieldList >> RPAREN
| empty
;
caseLabelList
= caseLabel >> *( COMMA >> caseLabel )
;
caseLabel
= constant.alias()
;
setType
= SET >> OF >> baseType
;
baseType
= simpleType.alias()
;
fileType
= FILE >> OF >> type
;
pointerType
= POINTER >> typeIdentifier
;
variableDeclarationPart
= VAR >> variableDeclaration
>> *( SEMI >> variableDeclaration ) >> SEMI
;
variableDeclaration
= identifier >> *( COMMA >> identifier ) >> COLON >> type
;
procedureAndFunctionDeclarationPart
= procedureOrFunctionDeclaration >> SEMI
;
procedureOrFunctionDeclaration
= procedureDeclaration
| functionDeclaration
;
procedureDeclaration
= procedureHeading
>> block
;
procedureHeading
= PROCEDURE >> identifier >> parameterList >> SEMI
;
parameterList
= empty
| LPAREN >> formalParameterSection
>> *( SEMI >> formalParameterSection ) >> RPAREN
;
formalParameterSection
= parameterGroup
| VAR >> parameterGroup
| FUNCTION >> parameterGroup
| PROCEDURE >> identifier >> *( COMMA >> identifier )
;
parameterGroup
= identifier >> *( COMMA >> identifier ) >> COLON >> typeIdentifier
;
functionDeclaration
= functionHeading >>
block
;
functionHeading
= FUNCTION >> identifier >> parameterList
>> COLON >> resultType >> SEMI
;
resultType
= typeIdentifier.alias()
;
statementPart
= compoundStatement.alias()
;
statement
= ( label >> COLON
| empty
) >>
unlabelledStatement
;
unlabelledStatement
= structuredStatement
| simpleStatement
;
simpleStatement
= assignmentStatement
| procedureStatement
| gotoStatement
| emptyStatement
;
assignmentStatement
= variable >> ASSIGN >> expression
| functionIdentifier >> ASSIGN >> expression
;
variable
= componentVariable
| referencedVariable
| entireVariable
;
entireVariable
= variableIdentifier.alias()
;
variableIdentifier
= identifier.alias()
;
componentVariable
= indexedVariable
| fieldDesignator
| fileBuffer
;
indexedVariable
= arrayVariable >> LBRACK
>> expression >> *( COMMA >> expression) >> RBRACK
;
arrayVariable
= identifier.alias()
;
fieldDesignator
= recordVariable >> DOT >> fieldIdentifier
;
recordVariable
= identifier.alias()
;
fieldIdentifier
= identifier.alias()
;
fileBuffer
= fileVariable >> POINTER
;
fileVariable
= identifier.alias()
;
referencedVariable
= pointerVariable >> POINTER
;
pointerVariable
= identifier.alias()
;
expression
= simpleExpression >>
( relationalOperator >> simpleExpression
| empty
)
;
relationalOperator
= EQUAL | NOT_EQUAL | GE | LE | LT | GT | IN_
;
simpleExpression
= ( sign
| empty
) >>
term >> *( addingOperator >> term )
;
addingOperator
= PLUS | MINUS | OR
;
term
= factor >> *( multiplyingOperator >> factor )
;
multiplyingOperator
= STAR | SLASH | DIV | MOD | AND
;
factor
= LPAREN >> expression >> RPAREN
| set
| longest_d[
variable
| unsignedConstant
| functionDesignator
]
| NOT >> factor
;
unsignedConstant
= unsignedNumber
| string_literal
| constantIdentifier
| NIL
;
functionDesignator
= functionIdentifier >>
( LPAREN >> actualParameter
>> *( COMMA >> actualParameter ) >> RPAREN
| empty
)
;
functionIdentifier
= identifier.alias()
;
set
= LBRACK >> elementList >> RBRACK
;
elementList
= element >> *( COMMA >> element )
| empty
;
element
= expression >>
( DOTDOT >> expression
| empty
)
;
procedureStatement
= procedureIdentifier >>
( LPAREN >> actualParameter
>> *( COMMA >> actualParameter ) >> RPAREN
| empty
)
;
procedureIdentifier
= identifier.alias()
;
actualParameter
= expression
| variable
| procedureIdentifier
| functionIdentifier
;
gotoStatement
= GOTO >> label
;
emptyStatement
= empty.alias()
;
empty
= epsilon_p
;
structuredStatement
= compoundStatement
| conditionalStatement
| repetetiveStatement
| withStatement
;
compoundStatement
= BEGIN >>
statement >> *( SEMI >> statement ) >>
END
;
conditionalStatement
= ifStatement
| caseStatement
;
ifStatement
= IF >> expression >> THEN >> statement >>
( ELSE >> statement
| empty
)
;
caseStatement
= CASE >> expression >> OF >>
caseListElement >> *( SEMI >> caseListElement ) >>
END
;
caseListElement
= caseLabelList >> COLON >> statement
| empty
;
repetetiveStatement
= whileStatement
| repeatStatement
| forStatement
;
whileStatement
= WHILE >> expression >> DO >>
statement
;
repeatStatement
= REPEAT >>
statement >> *( SEMI >> statement ) >>
UNTIL >> expression
;
forStatement
= FOR >> controlVariable >> ASSIGN >> forList >> DO >>
statement
;
forList
= initialValue >> ( TO | DOWNTO ) >> finalValue
;
controlVariable
= identifier.alias()
;
initialValue
= expression.alias()
;
finalValue
= expression.alias()
;
withStatement
= WITH >> recordVariableList >> DO >>
statement
;
recordVariableList
= recordVariable >> *( COMMA >> recordVariable )
;
//-----------------------------------------------------------------
// End grammar definition
//-----------------------------------------------------------------
}
#ifdef BOOST_SPIRIT_DEBUG
void
debug()
{
BOOST_SPIRIT_DEBUG_RULE(program);
BOOST_SPIRIT_DEBUG_RULE(programHeading);
BOOST_SPIRIT_DEBUG_RULE(fileIdentifier);
BOOST_SPIRIT_DEBUG_RULE(identifier);
BOOST_SPIRIT_DEBUG_RULE(block);
BOOST_SPIRIT_DEBUG_RULE(labelDeclarationPart);
BOOST_SPIRIT_DEBUG_RULE(label);
BOOST_SPIRIT_DEBUG_RULE(constantDefinitionPart);
BOOST_SPIRIT_DEBUG_RULE(constantDefinition);
BOOST_SPIRIT_DEBUG_RULE(constant);
BOOST_SPIRIT_DEBUG_RULE(unsignedNumber);
BOOST_SPIRIT_DEBUG_RULE(unsignedInteger);
BOOST_SPIRIT_DEBUG_RULE(unsignedReal);
BOOST_SPIRIT_DEBUG_RULE(sign);
BOOST_SPIRIT_DEBUG_RULE(constantIdentifier);
BOOST_SPIRIT_DEBUG_RULE(string_literal);
BOOST_SPIRIT_DEBUG_RULE(typeDefinitionPart);
BOOST_SPIRIT_DEBUG_RULE(typeDefinition);
BOOST_SPIRIT_DEBUG_RULE(type);
BOOST_SPIRIT_DEBUG_RULE(simpleType);
BOOST_SPIRIT_DEBUG_RULE(scalarType);
BOOST_SPIRIT_DEBUG_RULE(subrangeType);
BOOST_SPIRIT_DEBUG_RULE(typeIdentifier);
BOOST_SPIRIT_DEBUG_RULE(structuredType);
BOOST_SPIRIT_DEBUG_RULE(unpackedStructuredType);
BOOST_SPIRIT_DEBUG_RULE(arrayType);
BOOST_SPIRIT_DEBUG_RULE(indexType);
BOOST_SPIRIT_DEBUG_RULE(componentType);
BOOST_SPIRIT_DEBUG_RULE(recordType);
BOOST_SPIRIT_DEBUG_RULE(fieldList);
BOOST_SPIRIT_DEBUG_RULE(fixedPart);
BOOST_SPIRIT_DEBUG_RULE(recordSection);
BOOST_SPIRIT_DEBUG_RULE(variantPart);
BOOST_SPIRIT_DEBUG_RULE(tagField);
BOOST_SPIRIT_DEBUG_RULE(variant);
BOOST_SPIRIT_DEBUG_RULE(caseLabelList);
BOOST_SPIRIT_DEBUG_RULE(caseLabel);
BOOST_SPIRIT_DEBUG_RULE(setType);
BOOST_SPIRIT_DEBUG_RULE(baseType);
BOOST_SPIRIT_DEBUG_RULE(fileType);
BOOST_SPIRIT_DEBUG_RULE(pointerType);
BOOST_SPIRIT_DEBUG_RULE(variableDeclarationPart);
BOOST_SPIRIT_DEBUG_RULE(variableDeclaration);
BOOST_SPIRIT_DEBUG_RULE(procedureAndFunctionDeclarationPart);
BOOST_SPIRIT_DEBUG_RULE(procedureOrFunctionDeclaration);
BOOST_SPIRIT_DEBUG_RULE(procedureDeclaration);
BOOST_SPIRIT_DEBUG_RULE(procedureHeading);
BOOST_SPIRIT_DEBUG_RULE(parameterList);
BOOST_SPIRIT_DEBUG_RULE(formalParameterSection);
BOOST_SPIRIT_DEBUG_RULE(parameterGroup);
BOOST_SPIRIT_DEBUG_RULE(functionDeclaration);
BOOST_SPIRIT_DEBUG_RULE(functionHeading);
BOOST_SPIRIT_DEBUG_RULE(resultType);
BOOST_SPIRIT_DEBUG_RULE(statementPart);
BOOST_SPIRIT_DEBUG_RULE(statement);
BOOST_SPIRIT_DEBUG_RULE(unlabelledStatement);
BOOST_SPIRIT_DEBUG_RULE(simpleStatement);
BOOST_SPIRIT_DEBUG_RULE(assignmentStatement);
BOOST_SPIRIT_DEBUG_RULE(variable);
BOOST_SPIRIT_DEBUG_RULE(entireVariable);
BOOST_SPIRIT_DEBUG_RULE(variableIdentifier);
BOOST_SPIRIT_DEBUG_RULE(componentVariable);
BOOST_SPIRIT_DEBUG_RULE(indexedVariable);
BOOST_SPIRIT_DEBUG_RULE(arrayVariable);
BOOST_SPIRIT_DEBUG_RULE(fieldDesignator);
BOOST_SPIRIT_DEBUG_RULE(recordVariable);
BOOST_SPIRIT_DEBUG_RULE(fieldIdentifier);
BOOST_SPIRIT_DEBUG_RULE(fileBuffer);
BOOST_SPIRIT_DEBUG_RULE(fileVariable);
BOOST_SPIRIT_DEBUG_RULE(referencedVariable);
BOOST_SPIRIT_DEBUG_RULE(pointerVariable);
BOOST_SPIRIT_DEBUG_RULE(expression);
BOOST_SPIRIT_DEBUG_RULE(relationalOperator);
BOOST_SPIRIT_DEBUG_RULE(simpleExpression);
BOOST_SPIRIT_DEBUG_RULE(addingOperator);
BOOST_SPIRIT_DEBUG_RULE(term);
BOOST_SPIRIT_DEBUG_RULE(multiplyingOperator);
BOOST_SPIRIT_DEBUG_RULE(factor);
BOOST_SPIRIT_DEBUG_RULE(unsignedConstant);
BOOST_SPIRIT_DEBUG_RULE(functionDesignator);
BOOST_SPIRIT_DEBUG_RULE(functionIdentifier);
BOOST_SPIRIT_DEBUG_RULE(set);
BOOST_SPIRIT_DEBUG_RULE(elementList);
BOOST_SPIRIT_DEBUG_RULE(element);
BOOST_SPIRIT_DEBUG_RULE(procedureStatement);
BOOST_SPIRIT_DEBUG_RULE(procedureIdentifier);
BOOST_SPIRIT_DEBUG_RULE(actualParameter);
BOOST_SPIRIT_DEBUG_RULE(gotoStatement);
BOOST_SPIRIT_DEBUG_RULE(emptyStatement);
BOOST_SPIRIT_DEBUG_RULE(empty);
BOOST_SPIRIT_DEBUG_RULE(structuredStatement);
BOOST_SPIRIT_DEBUG_RULE(compoundStatement);
BOOST_SPIRIT_DEBUG_RULE(conditionalStatement);
BOOST_SPIRIT_DEBUG_RULE(ifStatement);
BOOST_SPIRIT_DEBUG_RULE(caseStatement);
BOOST_SPIRIT_DEBUG_RULE(caseListElement);
BOOST_SPIRIT_DEBUG_RULE(repetetiveStatement);
BOOST_SPIRIT_DEBUG_RULE(whileStatement);
BOOST_SPIRIT_DEBUG_RULE(repeatStatement);
BOOST_SPIRIT_DEBUG_RULE(forStatement);
BOOST_SPIRIT_DEBUG_RULE(forList);
BOOST_SPIRIT_DEBUG_RULE(controlVariable);
BOOST_SPIRIT_DEBUG_RULE(initialValue);
BOOST_SPIRIT_DEBUG_RULE(finalValue);
BOOST_SPIRIT_DEBUG_RULE(withStatement);
BOOST_SPIRIT_DEBUG_RULE(recordVariableList);
}
#endif
rule<ScannerT> const&
start() const { return program; }
symbols<> keywords;
rule<ScannerT>
string_literal, program, programHeading, fileIdentifier,
identifier, block, labelDeclarationPart, label,
constantDefinitionPart, constantDefinition, constant,
unsignedNumber, unsignedInteger, unsignedReal, sign,
constantIdentifier, typeDefinitionPart, typeDefinition, type,
simpleType, scalarType, subrangeType, typeIdentifier,
structuredType, unpackedStructuredType, arrayType, indexType,
componentType, recordType, fieldList, fixedPart, recordSection,
variantPart, tagField, variant, caseLabelList, caseLabel,
setType, baseType, fileType, pointerType,
variableDeclarationPart, variableDeclaration,
procedureAndFunctionDeclarationPart,
procedureOrFunctionDeclaration, procedureDeclaration,
procedureHeading, parameterList, formalParameterSection,
parameterGroup, functionDeclaration, functionHeading,
resultType, statementPart, statement, unlabelledStatement,
simpleStatement, assignmentStatement, variable, entireVariable,
variableIdentifier, componentVariable, indexedVariable,
arrayVariable, fieldDesignator, recordVariable,
fieldIdentifier, fileBuffer, fileVariable, referencedVariable,
pointerVariable, expression, relationalOperator,
simpleExpression, addingOperator, term, multiplyingOperator,
factor, unsignedConstant, functionDesignator,
functionIdentifier, set, elementList, element,
procedureStatement, procedureIdentifier, actualParameter,
gotoStatement, emptyStatement, empty, structuredStatement,
compoundStatement, conditionalStatement, ifStatement,
caseStatement, caseListElement, repetetiveStatement,
whileStatement, repeatStatement, forStatement, forList,
controlVariable, initialValue, finalValue, withStatement,
recordVariableList;
};
};
///////////////////////////////////////////////////////////////////////////////
//
// The Pascal White Space Skipper
//
///////////////////////////////////////////////////////////////////////////////
struct pascal_skipper : public grammar<pascal_skipper>
{
pascal_skipper() {}
template <typename ScannerT>
struct definition
{
definition(pascal_skipper const& /*self*/)
{
skip
= space_p
| '{' >> (*(anychar_p - '}')) >> '}' // pascal comment 1
| "(*" >> (*(anychar_p - "*)")) >> "*)" // pascal comment 2
;
#ifdef BOOST_SPIRIT_DEBUG
BOOST_SPIRIT_DEBUG_RULE(skip);
#endif
}
rule<ScannerT> skip;
rule<ScannerT> const&
start() const { return skip; }
};
};
///////////////////////////////////////////////////////////////////////////////
//
// Parse a file
//
///////////////////////////////////////////////////////////////////////////////
static void
parse(char const* filename)
{
ifstream in(filename);
if (!in)
{
cerr << "Could not open input file: " << filename << endl;
return;
}
in.unsetf(ios::skipws); // Turn of white space skipping on the stream
vector<char> vec;
std::copy(
istream_iterator<char>(in),
istream_iterator<char>(),
std::back_inserter(vec));
vector<char>::const_iterator first = vec.begin();
vector<char>::const_iterator last = vec.end();
pascal_skipper skip_p;
pascal_grammar p;
#ifdef BOOST_SPIRIT_DEBUG
BOOST_SPIRIT_DEBUG_NODE(skip_p);
BOOST_SPIRIT_DEBUG_NODE(p);
#endif
parse_info<vector<char>::const_iterator> info =
parse(first, last, p, skip_p);
if (info.full)
{
cout << "\t\t" << filename << " Parses OK\n\n\n";
}
else
{
cerr << "---PARSING FAILURE---\n";
cerr << string(info.stop, last);
}
}
///////////////////////////////////////////////////////////////////////////////
//
// Main program
//
///////////////////////////////////////////////////////////////////////////////
int
main(int argc, char* argv[])
{
cout << "/////////////////////////////////////////////////////////\n\n";
cout << "\t\tPascal Grammar For Spirit...\n\n";
cout << "/////////////////////////////////////////////////////////\n\n";
if (argc > 1)
{
for (int i = 1; i < argc; ++i)
{
cout << argv[i] << endl;
parse(argv[i]);
}
}
else
{
cerr << "---NO FILENAME GIVEN---" << endl;
}
return 0;
}

View File

@@ -0,0 +1,9 @@
EXTRA_DIST = \
t1.pas \
t2.pas \
t3.pas \
t4.pas \
t5.pas \
t6.pas \
t7.pas \
t8.pas

View File

@@ -0,0 +1,28 @@
{ program 0.1
assuming annual inflation rates of 7, 8, and 10 per cent,
find the factor by which the frank, dollar, pound
sterling, mark, or guilder will have been devalued in
1, 2, ... n years.}
program inflation(output);
const
n = 10;
var
i : integer;
w1, w2, w3 : real;
begin
i := 0;
w1 := 1.0;
w2 := 1.0;
w3 := 1.0;
repeat
i := i + 1;
w1 := w1 * 1.07;
w2 := w2 * 1.08;
w3 := w3 * 1.10;
writeln(i, w1, w2, w3);
until i=n
end.

View File

@@ -0,0 +1,25 @@
{ program 3.1
example of constant definition part }
program convert(output);
const
addin = 32;
mulby = 1.8;
low = 0;
high = 39;
seperator = '------------';
var
degree : low..high;
begin
writeln(seperator);
for degree := low to high do
begin
write(degree, 'c', round(degree * mulby + addin), 'f');
if ndd(degree) then writeln
end;
writeln;
writeln(seperator)
end.

View File

@@ -0,0 +1,12 @@
{ program 4.1
the compound statement }
program beginend(output);
var
sum : integer;
begin
sum := 3 + 5;
writeln(sum, -sum);
end.

View File

@@ -0,0 +1,20 @@
{ program 4.2
compute h(n) = 1 + 1/2 + 1/3 +...+ 1/n }
program egwhile(input, output);
var
n : integer;
h : real;
begin
read(n);
write(n);
h := 0;
while n>0 do
bein {SYNTAX ERROR HERE!!! should be begin}
h := h + 1/n;
n := n - 1;
end;
writeln(h);
end.

View File

@@ -0,0 +1,19 @@
{ program 4.3
compute h(n) = 1 + 1/2 + 1/3 +...+ 1/n }
program egrepeat(input, output);
var
n : integer;
h : real;
begin
read(n);
write(n);
h := 0;
repeat
h := h + 1/n;
n := n - 1;
until n=0;
writeln(h);
end.

View File

@@ -0,0 +1,17 @@
{ program 4.4
compute h(n) = 1 + 1/2 + 1/3 +...+ 1/n }
program egfor(input, output);
var
i, n : integer;
h : real;
begin
read(n);
write(n);
h := 0;
for i:= n downto 1 do
h := h + 1/i;
writeln(h);
end.

View File

@@ -0,0 +1,28 @@
{ program 4.5
compute the cosine using the expansion:
cos(x) = 1 - x**2/(2*1) + x**4/(4*3*2*1) - ... }
program cosine(input, output);
const
eps = 1e-14;
var
x, sx, s, t : real;
i, k, n : integer;
begin
read(n);
for i:=1 to n do
begin
read(x);
t := 1; k := 0; s := 1; sx := sqr(x);
while abs(t) > eps*abs(s) do
begin
k := k+2;
t := -t*sx/(k*(k-1));
s := s+t;
wnd; { error here! }
writeln(x,s,k div 2)
end
end. {SYNTAX ERROR HERE: unbalanced end}

Some files were not shown because too many files have changed in this diff Show More