WvStreams
wvbackslash.cc
1/*
2 * Worldvisions Weaver Software:
3 * Copyright (C) 2002 Net Integration Technologies, Inc.
4 *
5 * Performs C-style backslash escaping and unescaping of strings.
6 */
7#include <ctype.h>
8#include "wvbackslash.h"
9
10static const char *escapein = "\a\b\f\n\r\t\v";
11static const char *escapeout = "abfnrtv";
12
13static inline char tohex(int digit, char alphabase = ('a' - 10))
14{
15 return (digit < 10 ? '0' : alphabase) + digit;
16}
17
18static inline int fromhex(char digit)
19{
20 if (isdigit(digit))
21 return digit - '0';
22 if (digit >= 'A' && digit <= 'F')
23 return digit - 'A' + 10;
24 if (digit >= 'a' && digit <= 'f')
25 return digit - 'a' + 10;
26 return -1;
27}
28
29static inline int fromoctal(char digit)
30{
31 if (digit >= '0' && digit <= '7')
32 return digit - '0';
33 return -1;
34}
35
36
37/***** WvBackslashEncoder *****/
38
40 nasties(_nasties)
41{
42}
43
44
46 bool flush)
47{
48 size_t avail = outbuf.free();
49 size_t len;
50 while ((len = inbuf.optgettable()) != 0)
51 {
52 const unsigned char *datain = inbuf.get(len);
53 for (size_t i = 0; i < len; ++i)
54 {
55 int c = datain[i];
56
57 // handle 1 character escape sequences
58 if (avail < 1)
59 { outbuf.unget(len - i); return ! flush; }
60 const char *foundnasty = NULL;
61 const char *foundspecial = NULL;
62 if (c != '\0')
63 {
64 foundnasty = strchr(nasties.cstr(), c);
65 if (! foundnasty)
66 {
67 foundspecial = strchr(escapein, c);
68 if (! foundspecial && isprint(c))
69 {
70 outbuf.putch(c);
71 avail -= 1;
72 continue;
73 }
74 }
75 }
76
77 // handle 2 character escape sequences
78 if (avail < 2)
79 { outbuf.unget(len - i); return ! flush; }
80 if (foundnasty != NULL)
81 {
82 outbuf.putch('\\');
83 outbuf.putch(c);
84 avail -= 2;
85 continue;
86 }
87 if (foundspecial != NULL)
88 {
89 outbuf.putch('\\');
90 outbuf.putch(escapeout[foundspecial - escapein]);
91 avail -= 2;
92 continue;
93 }
94
95 // handle 4 character escape sequences
96 if (avail < 4)
97 { outbuf.unget(len - i); return ! flush; }
98 outbuf.put("\\x", 2);
99 outbuf.putch(tohex(c >> 4));
100 outbuf.putch(tohex(c & 15));
101 avail -= 4;
102 }
103 }
104 return true;
105}
106
107
109{
110 return true;
111}
112
113
114/***** WvBackslashDecoder *****/
115
117{
118 _reset();
119}
120
121
123 bool flush)
124{
125 if (outbuf.free() == 0)
126 return inbuf.used() == 0;
127 if (! flushtmpbuf(outbuf))
128 return false;
129
130 size_t len;
131 while ((len = inbuf.optgettable()) != 0)
132 {
133 const unsigned char *datain = inbuf.get(len);
134 for (size_t i = 0; i < len; ++i)
135 {
136 int c = datain[i];
137
138 switch (state)
139 {
140 case Initial:
141 if (c == '\\')
142 state = Escape;
143 tmpbuf.putch(c);
144 break;
145
146 case Escape:
147 if (c >= '0' && c <= '3')
148 {
149 tmpbuf.unalloc(1);
150 value = c - '0';
151 state = Octal1;
152 }
153 else if (c == 'x')
154 {
155 tmpbuf.putch(c);
156 state = Hex1;
157 }
158 else if (c == '\n')
159 {
160 // line continuation sequence
161 tmpbuf.unalloc(1);
162 tmpbuf.putch('\n');
163 state = Initial;
164 }
165 else
166 {
167 const char *found = strchr(escapeout, c);
168 tmpbuf.unalloc(1);
169 if (found != NULL)
170 c = escapein[found - escapeout];
171 // else we just drop the backslash
172 tmpbuf.putch(c);
173 state = Initial;
174 }
175 break;
176
177 case Hex2:
178 case Hex1: {
179 int digit = fromhex(c);
180 if (digit >= 0)
181 {
182 if (state == Hex1)
183 {
184 tmpbuf.unalloc(2);
185 value = digit;
186 state = Hex2;
187 }
188 else
189 {
190 value = (value << 4) | digit;
191 state = Initial;
192 }
193 }
194 else
195 {
196 i -= 1;
197 state = Initial;
198 }
199 break;
200 }
201
202 case Octal3:
203 case Octal2:
204 case Octal1: {
205 int digit = fromoctal(c);
206 if (digit >= 0)
207 {
208 value = (value << 3) | digit;
209 if (state != Octal3)
210 state = State(state + 1);
211 else
212 state = Initial;
213 }
214 else
215 {
216 i -= 1;
217 state = Initial;
218 }
219 break;
220 }
221 }
222
223 flushtmpbuf(outbuf);
224 if (outbuf.free() == 0)
225 {
226 inbuf.unget(len - i);
227 break;
228 }
229 }
230 }
231 if (flush)
232 {
233 if (inbuf.used() != 0)
234 return false;
235 state = Initial;
236 return flushtmpbuf(outbuf);
237 }
238 return true;
239
240}
241
242
244{
245 state = Initial;
246 value = -1;
247 tmpbuf.zap();
248 return true;
249}
250
251
252bool WvBackslashDecoder::flushtmpbuf(WvBuf &outbuf)
253{
254 if (state != Initial)
255 return true;
256
257 if (value != -1)
258 {
259 tmpbuf.putch(value);
260 value = -1;
261 }
262
263 size_t len = tmpbuf.used();
264 if (len == 0)
265 return true;
266 size_t avail = outbuf.free();
267 if (avail > len)
268 avail = len;
269 outbuf.merge(tmpbuf, avail);
270 len -= avail;
271 if (len == 0)
272 {
273 tmpbuf.zap();
274 return true;
275 }
276 return false;
277}
virtual bool _encode(WvBuf &inbuf, WvBuf &outbuf, bool flush)
Template method implementation of encode().
Definition: wvbackslash.cc:122
WvBackslashDecoder()
Creates a C-style backslash decoder.
Definition: wvbackslash.cc:116
virtual bool _reset()
Template method implementation of reset().
Definition: wvbackslash.cc:243
WvBackslashEncoder(WvStringParm _nasties="\\\"")
Creates a C-style backslash encoder.
Definition: wvbackslash.cc:39
virtual bool _encode(WvBuf &inbuf, WvBuf &outbuf, bool flush)
Template method implementation of encode().
Definition: wvbackslash.cc:45
virtual bool _reset()
Template method implementation of reset().
Definition: wvbackslash.cc:108
void merge(Buffer &inbuf, size_t count)
Efficiently moves count bytes from the specified buffer into this one.
Definition: wvbufbase.h:558
size_t optgettable() const
Returns the optimal maximum number of elements in the buffer currently available for reading without ...
Definition: wvbufbase.h:154
const T * get(size_t count)
Reads exactly the specified number of elements and returns a pointer to a storage location owned by t...
Definition: wvbufbase.h:114
void unget(size_t count)
Ungets exactly the specified number of elements by returning them to the buffer for subsequent reads.
Definition: wvbufbase.h:177
void unalloc(size_t count)
Unallocates exactly the specified number of elements by removing them from the buffer and releasing t...
Definition: wvbufbase.h:421
size_t free() const
Returns the number of elements that the buffer can currently accept for writing.
Definition: wvbufbase.h:353
void zap()
Clears the buffer.
Definition: wvbufbase.h:257
size_t used() const
Returns the number of elements in the buffer currently available for reading.
Definition: wvbufbase.h:92
Specialization of WvBufBase for unsigned char type buffers intended for use with raw memory buffers.
Definition: wvbuf.h:24
void putch(int ch)
Puts a single character into the buffer as an int.
Definition: wvbuf.h:76
bool flush(WvBuf &inbuf, WvBuf &outbuf, bool finish=false)
Flushes the encoder and optionally finishes it.
Definition: wvencoder.h:163
A WvFastString acts exactly like a WvString, but can take (const char *) strings without needing to a...
Definition: wvstring.h:94
const char * cstr() const
return a (const char *) for this string.
Definition: wvstring.h:267