49#include "EST_string_aux.h"
50#include "EST_cutils.h"
53const EST_String EST_Token_Default_WhiteSpaceChars =
" \t\n\r";
54const EST_String EST_Token_Default_SingleCharSymbols =
"(){}[]";
55const EST_String EST_Token_Default_PrePunctuationSymbols =
"\"'`({[";
56const EST_String EST_Token_Default_PunctuationSymbols =
"\"'`.,:;!?]})";
57const EST_String Token_Origin_FD =
"existing file descriptor";
58const EST_String Token_Origin_Stream =
"existing istream";
59const EST_String Token_Origin_String =
"existing string";
61static EST_Regex RXanywhitespace(
"[ \t\n\r]");
63static inline char *check_extend_str_in(
char *str,
int pos,
int *max)
75 newstuff =
new char[*max];
76 strncpy(newstuff,str,pos);
84#define check_extend_str(STR, POS, MAX) \
85 (((POS)>= *(MAX))?check_extend_str_in((STR),(POS),(MAX)):(STR))
87ostream& operator<<(ostream& s,
const EST_Token &p)
89 s <<
"[TOKEN " << p.pname <<
"]";
98 p_filepos = a.p_filepos;
99 p_quoted = a.p_quoted;
109 return "line "+itoString(linenum)+
" char "+itoString(linepos);
118EST_TokenStream::EST_TokenStream()
121 tok_wspace =
new char[tok_wspacelen];
123 tok_stuff =
new char[tok_stufflen];
124 tok_prepuncslen = 32;
125 tok_prepuncs =
new char[tok_prepuncslen];
134 cerr <<
"TokenStream: warning passing TokenStream not as reference"
150void EST_TokenStream::default_values()
154 peeked_charp = FALSE;
159 WhiteSpaceChars = EST_Token_Default_WhiteSpaceChars;
169 if (type != tst_none)
171 delete [] tok_wspace;
173 delete [] tok_prepuncs;
179 s <<
"[TOKENSTREAM ";
183 cerr <<
"UNSET";
break;
185 cerr <<
"FILE";
break;
187 cerr <<
"PIPE";
break;
189 cerr <<
"ISTREAM";
break;
191 cerr <<
"STRING";
break;
193 cerr <<
"UNKNOWN" << endl;
202 if (type != tst_none)
208 cerr <<
"Cannot open file " <<
filename <<
" as tokenstream"
221 if (type != tst_none)
227 cerr <<
"Cannot absorb NULL filestream as tokenstream" << endl;
230 Origin = Token_Origin_FD;
233 close_at_end = close_when_finished;
241 if (type != tst_none)
245 Origin = Token_Origin_Stream;
255 if (type != tst_none)
258 buf = (
const char *)newbuffer;
259 buffer_length = newbuffer.
length();
260 buffer =
new char[buffer_length+1];
261 memmove(buffer,buf,buffer_length+1);
263 Origin = Token_Origin_String;
269int EST_TokenStream::seek_end()
272 peeked_charp = FALSE;
278 cerr <<
"EST_TokenStream unset" << endl;
282 fseek(fp,0,SEEK_END);
283 p_filepos = ftell(fp);
286 cerr <<
"EST_TokenStream seek on pipe not supported" << endl;
290 is->seekg(0,is->end);
291 p_filepos = is->tellg();
298 cerr <<
"EST_TokenStream: unknown type" << endl;
307 peeked_charp = FALSE;
313 cerr <<
"EST_TokenStream unset" << endl;
317 p_filepos = position;
318 return fseek(fp,position,SEEK_SET);
320 cerr <<
"EST_TokenStream seek on pipe not supported" << endl;
324 p_filepos = position;
325 is->seekg(position, is->beg);
341 cerr <<
"EST_TokenStream: unknown type" << endl;
349static int stdio_fread(
void *buff,
int size,
int nitems,FILE *fp)
352 return fread(buff,size,nitems,fp);
364 <<
" peeked into binary data" << endl;
368 peeked_charp = FALSE;
374 cerr <<
"EST_TokenStream unset" << endl;
378 items_read = stdio_fread(buff,(
size_t)size,(
size_t)nitems,fp);
379 p_filepos += items_read*size;
382 cerr <<
"EST_TokenStream fread pipe not yet supported" << endl;
386 is->read((
char*)buff, (
size_t) size*nitems);
387 return is->gcount()/size;
390 if ((buffer_length-pos)/size < nitems)
391 items_read = (buffer_length-pos)/size;
394 memcpy(buff,&buffer[pos],items_read*size);
395 pos += items_read*size;
398 cerr <<
"EST_TokenStream: unknown type" << endl;
427 cerr <<
"EST_TokenStream: unknown type" << endl;
432 peeked_charp = FALSE;
446 fp = freopen(Origin,
"rb",fp);
450 cerr <<
"EST_TokenStream: can't rewind pipe" << endl;
454 cerr <<
"EST_TokenStream: can't rewind istream" << endl;
460 cerr <<
"EST_TokenStream: unknown type" << endl;
465 peeked_charp = FALSE;
503 result += t.whitespace() + t.prepunctuation() +
504 t.string() + t.punctuation();
507 cerr <<
"EST_TokenStream: end of file when looking for \"" <<
526 result += t.whitespace() + t.prepunctuation();
529 result += quote_string(t.string());
531 result += t.string();
533 result += t.punctuation();
546 char *w = wstrdup(
peek().whitespace());
548 for (i=0; w[i] != 0; i++)
573 EST_error(
"Expected '%s' got '%s' at %s",
574 (
const char *)expected,
584void EST_TokenStream::build_table()
590 for (i=0; i<256; ++i)
593 for (p=WhiteSpaceChars; *p; ++p)
594 if (p_table[c=(
unsigned char)*p])
595 EST_warning(
"Character '%c' has two classes, '%c' and '%c'",
600 for (p=SingleCharSymbols; *p; ++p)
601 if (p_table[c=(
unsigned char)*p])
602 EST_warning(
"Character '%c' has two classes, '%c' and '%c'",
603 *p, p_table[c],
'!');
607 for (p=PunctuationSymbols; *p; ++p)
608 if (p_table[c=(
unsigned char)*p] ==
'@')
611 EST_warning(
"Character '%c' has two classes, '%c' and '%c'",
612 *p, p_table[c],
'.');
616 for(p=PrePunctuationSymbols; *p; ++p)
617 if (p_table[c=(
unsigned char)*p] ==
'@')
619 else if (p_table[c] ==
'.')
622 EST_warning(
"Character '%c' has two classes, '%c' and '%c'",
623 *p, p_table[c],
'$');
630inline int EST_TokenStream::getpeeked_internal(
void)
632 peeked_charp = FALSE;
637int EST_TokenStream::getch_internal()
640 if (EST_TokenStream::peeked_charp)
642 return getpeeked_internal();
648 cerr <<
"EST_TokenStream unset" << endl;
655 if (stdio_fread(&lc,1,1,fp) == 0)
662 cerr <<
"EST_TokenStream pipe not yet supported" << endl;
669 if (pos < buffer_length)
672 return buffer[pos++];
677 cerr <<
"EST_TokenStream: unknown type" << endl;
684int EST_TokenStream::getch(
void)
686 return getch_internal();
689inline int EST_TokenStream::peekch_internal()
694 peeked_char = getch_internal();
700int EST_TokenStream::peekch(
void)
702 return peekch_internal();
706#define CLASS(C,CL) (p_table[(unsigned char)(C)]==(CL))
708#define CLASS2(C,CL1,CL2) (p_table[(unsigned char)(C)]==(CL1)||p_table[(unsigned char)(C)]==(CL2))
724 for (i=0; (CLASS(c=getch_internal(),
' ') &&
727 if (c ==
'\n') linepos++;
728 tok_wspace = check_extend_str(tok_wspace,i,&tok_wspacelen);
731 tok_wspace[i] =
'\0';
743 ((c = getch_internal()) != EOF)
748 tok_stuff = check_extend_str(tok_stuff,i,&tok_stufflen);
750 c = getch_internal();
757 for (i=0,tok_stuff[i++]=c;
760 !CLASS(c=peekch_internal(),
' ') &&
764 tok_stuff = check_extend_str(tok_stuff,i,&tok_stufflen);
766 tok_stuff[i++] = getpeeked_internal();
772 ((j < i) && CLASS2(tok_stuff[j],
'$',
'"'));
774 if ((j > 0) && (j < i))
776 tok_prepuncs = check_extend_str(tok_prepuncs,j+1,&tok_prepuncslen);
777 memmove(tok_prepuncs,tok_stuff,j);
778 tok_prepuncs[j] =
'\0';
790 ((j > 0) && CLASS2(word[j],
'.',
'"'));
792 if (word[j+1] !=
'\0')
801 if (tok_wspace[0] ==
'\0')
823 if ((
peek().whitespace().contains(
"\n")) ||
eof())
832 if (!peeked_tokp)
get();
854 char *quoted =
new char[s.
length()*(quote.length()+escape.
length())+
855 1+quote.length()+quote.length()];
856 quoted[0] = quote(0);
857 for (i=1,j=0; j < s.
length(); j++,i++)
859 if (s(j) == quote(0))
860 quoted[i++] = escape(0);
861 else if (s(j) == escape(0))
862 quoted[i++] = escape(0);
865 quoted[i++] = quote(0);
867 quoted_form = quoted;
877 return Origin+
":"+itoString(linepos);
static const EST_String Empty
Constant empty string.
int length(void) const
Length of string ({not} length of underlying chunk)
int contains(const char *s, int pos=-1) const
Does it contain this substring?
int restart(void)
Reset to start of file/string.
EST_Token & get()
get next token in stream
EST_Token get_upto(const EST_String &s)
get up to {\tt s} in stream as a single token.
int fread(void *buff, int size, int nitems) EST_WARN_UNUSED_RESULT
Reading binary data, (don't use peek() immediately beforehand)
const EST_String filename() const
The originating filename (if there is one)
~EST_TokenStream()
will close file if appropriate for type
const EST_String pos_description()
A string describing current position, suitable for error messages.
int open_string(const EST_String &newbuffer)
open a \Ref{EST_TokenStream} for string rather than a file
EST_Token get_upto_eoln(void)
get up to {\tt s} in end of line as a single token.
EST_Token & peek(void)
peek at next token
void close(void)
Close stream.
int open(const EST_String &filename)
open a \Ref{EST_TokenStream} for a file.
int seek(int position)
seek, reposition file pointer
void set_prepunctuation(const EST_String &p)
set prepunction
void set_whitespace(const EST_String &p)
set whitespace of token.
void set_punctuation(const EST_String &p)
set (post) punctuation of token.
const EST_String pos_description() const
A string describing current position, suitable for error messages.
void set_filepos(int c)
Set file position in original \Ref{EST_TokenStream}.
void set_quoted(int q)
Note that this token was quoted (or not)
void set_token(const EST_String &p)
set token from a string