WvStreams
wvglob.cc
1/*
2 * Worldvisions Weaver Software:
3 * Copyright (C) 1997-2004 Net Integration Technologies, Inc.
4 *
5 * Implementation of globbing support through WvRegex
6 */
7#include "wvglob.h"
8
10{
11}
12
14{
15 set(glob);
16}
17
19{
20 WvString errstr;
21 WvString regex = glob_to_regex(glob, &errstr);
22 if (!!errstr)
23 WvErrorBase::seterr(errstr);
24 else if (!!regex)
25 WvRegex::set(regex);
26 else WvErrorBase::seterr("Failed to convert glob pattern to regex");
27 return isok();
28}
29
30const bool WvGlob::normal_quit_chars[256] = {
31 false, false, false, false, false, false, false, false,
32 false, false, false, false, false, false, false, false,
33 false, false, false, false, false, false, false, false,
34 false, false, false, false, false, false, false, false,
35 false, false, false, false, false, false, false, false,
36 false, false, false, false, false, false, false, false,
37 false, false, false, false, false, false, false, false,
38 false, false, false, false, false, false, false, false,
39 false, false, false, false, false, false, false, false,
40 false, false, false, false, false, false, false, false,
41 false, false, false, false, false, false, false, false,
42 false, false, false, false, false, false, false, false,
43 false, false, false, false, false, false, false, false,
44 false, false, false, false, false, false, false, false,
45 false, false, false, false, false, false, false, false,
46 false, false, false, false, false, false, false, false,
47 false, false, false, false, false, false, false, false,
48 false, false, false, false, false, false, false, false,
49 false, false, false, false, false, false, false, false,
50 false, false, false, false, false, false, false, false,
51 false, false, false, false, false, false, false, false,
52 false, false, false, false, false, false, false, false,
53 false, false, false, false, false, false, false, false,
54 false, false, false, false, false, false, false, false,
55 false, false, false, false, false, false, false, false,
56 false, false, false, false, false, false, false, false,
57 false, false, false, false, false, false, false, false,
58 false, false, false, false, false, false, false, false,
59 false, false, false, false, false, false, false, false,
60 false, false, false, false, false, false, false, false,
61 false, false, false, false, false, false, false, false,
62 false, false, false, false, false, false, false, false
63};
64const bool WvGlob::brace_quit_chars[256] = {
65 false, false, false, false, false, false, false, false,
66 false, false, false, false, false, false, false, false,
67 false, false, false, false, false, false, false, false,
68 false, false, false, false, false, false, false, false,
69 false, false, false, false, false, false, false, false,
70 false, false, false, false, true /* , */, false, false, false,
71 false, false, false, false, false, false, false, false,
72 false, false, false, false, false, false, false, false,
73 false, false, false, false, false, false, false, false,
74 false, false, false, false, false, false, false, false,
75 false, false, false, false, false, false, false, false,
76 false, false, false, false, false, false, false, false,
77 false, false, false, false, false, false, false, false,
78 false, false, false, false, false, false, false, false,
79 false, false, false, false, false, false, false, false,
80 false, false, false, false, false, true /* } */, false, false,
81 false, false, false, false, false, false, false, false,
82 false, false, false, false, false, false, false, false,
83 false, false, false, false, false, false, false, false,
84 false, false, false, false, false, false, false, false,
85 false, false, false, false, false, false, false, false,
86 false, false, false, false, false, false, false, false,
87 false, false, false, false, false, false, false, false,
88 false, false, false, false, false, false, false, false,
89 false, false, false, false, false, false, false, false,
90 false, false, false, false, false, false, false, false,
91 false, false, false, false, false, false, false, false,
92 false, false, false, false, false, false, false, false,
93 false, false, false, false, false, false, false, false,
94 false, false, false, false, false, false, false, false,
95 false, false, false, false, false, false, false, false,
96 false, false, false, false, false, false, false, false
97};
98
99//
100// Known bugs:
101//
102// - If / is part of a range it will not be excluded in the resulting regex
103// eg. fred[.-0]joe will match fred/joe (this violates glob(7))
104// However, explcit / in bracket expression results in error.
105//
106WvString WvGlob::glob_to_regex(const char *src, size_t &src_used,
107 char *dst, size_t &dst_used, const bool quit_chars[256])
108{
109 enum { NORMAL, BACKSLASH, BRACKET, BRACKET_FIRST } state = NORMAL;
110 src_used = 0;
111 dst_used = 0;
112 bool quit_now = false;
113 while (!quit_now && src[src_used])
114 {
115 switch (state)
116 {
117 case NORMAL:
118 if (quit_chars[(unsigned char)src[src_used]])
119 {
120 quit_now = true;
121 break;
122 }
123
124 switch (src[src_used])
125 {
126 case '\\':
127 state = BACKSLASH;
128 break;
129
130 case '[':
131 if (src[src_used+1] == '^' && src[src_used+2] == ']')
132 {
133 // Get rid of degenerate case:
134 src_used += 2;
135 if (dst) dst[dst_used] = '\\'; ++dst_used;
136 if (dst) dst[dst_used] = '^'; ++dst_used;
137 }
138 else
139 {
140 if (dst) dst[dst_used] = '('; ++dst_used;
141 state = BRACKET_FIRST;
142 }
143 break;
144
145 case '*':
146 if (dst) dst[dst_used] = '('; ++dst_used;
147 if (dst) dst[dst_used] = '['; ++dst_used;
148 if (dst) dst[dst_used] = '^'; ++dst_used;
149 if (dst) dst[dst_used] = '/'; ++dst_used;
150 if (dst) dst[dst_used] = ']'; ++dst_used;
151 if (dst) dst[dst_used] = '*'; ++dst_used;
152 if (dst) dst[dst_used] = ')'; ++dst_used;
153 break;
154
155 case '?':
156 if (dst) dst[dst_used] = '('; ++dst_used;
157 if (dst) dst[dst_used] = '['; ++dst_used;
158 if (dst) dst[dst_used] = '^'; ++dst_used;
159 if (dst) dst[dst_used] = '/'; ++dst_used;
160 if (dst) dst[dst_used] = ']'; ++dst_used;
161 if (dst) dst[dst_used] = ')'; ++dst_used;
162 break;
163
164 case '{':
165 if (dst) dst[dst_used] = '('; ++dst_used;
166 ++src_used;
167 while (true)
168 {
169 size_t sub_src_used, sub_dst_used;
170
171 WvString errstr =
172 glob_to_regex(&src[src_used], sub_src_used,
173 dst? &dst[dst_used]: NULL, sub_dst_used,
174 brace_quit_chars);
175 if (errstr) return errstr;
176
177 src_used += sub_src_used;
178 dst_used += sub_dst_used;
179
180 if (src[src_used] == '}')
181 break;
182 else if (src[src_used] != ',')
183 return WvString("Unfinished brace expression (index %s)", src_used);
184 if (dst) dst[dst_used] = '|'; ++dst_used;
185 ++src_used;
186 }
187 if (dst) dst[dst_used] = ')'; ++dst_used;
188 break;
189
190 case '^':
191 case '.':
192 case '$':
193 case '(':
194 case ')':
195 case '|':
196 case '+':
197 if (dst) dst[dst_used] = '\\'; ++dst_used;
198 if (dst) dst[dst_used] = src[src_used]; ++dst_used;
199 break;
200
201 default:
202 if (dst) dst[dst_used] = src[src_used]; ++dst_used;
203 break;
204 }
205 break;
206
207 case BACKSLASH:
208 switch (src[src_used])
209 {
210 case '^':
211 case '.':
212 case '$':
213 case '(':
214 case ')':
215 case '|':
216 case '+':
217 case '[':
218 case '{':
219 case '*':
220 case '?':
221 case '\\':
222 if (dst) dst[dst_used] = '\\'; ++dst_used;
223 // fall through..
224 default:
225 if (dst) dst[dst_used] = src[src_used]; ++dst_used;
226 break;
227
228 }
229 state = NORMAL;
230 break;
231
232 case BRACKET_FIRST:
233 switch (src[src_used])
234 {
235 case '!':
236 if (dst) dst[dst_used] = '['; ++dst_used;
237 if (dst) dst[dst_used] = '^'; ++dst_used;
238 break;
239
240 case '^':
241 if (dst) dst[dst_used] = '\\'; ++dst_used;
242 if (dst) dst[dst_used] = '^'; ++dst_used;
243 if (dst) dst[dst_used] = '|'; ++dst_used;
244 if (dst) dst[dst_used] = '['; ++dst_used;
245 break;
246
247 case '/':
248 return WvString("Slash not allowed in bracket expression (index %s)", src_used);
249
250 default:
251 if (dst) dst[dst_used] = '['; ++dst_used;
252 if (dst) dst[dst_used] = src[src_used]; ++dst_used;
253 break;
254 }
255 state = BRACKET;
256 break;
257
258 case BRACKET:
259 switch (src[src_used])
260 {
261 case ']':
262 if (dst) dst[dst_used] = ']'; ++dst_used;
263 if (dst) dst[dst_used] = ')'; ++dst_used;
264 state = NORMAL;
265 break;
266
267 case '/':
268 return WvString("Slash not allowed in bracket expression (index %s)", src_used);
269
270 default:
271 if (dst) dst[dst_used] = src[src_used]; ++dst_used;
272 break;
273 }
274 break;
275 }
276
277 if (!quit_now) ++src_used;
278 }
279
280 if (state == BRACKET || state == BRACKET_FIRST)
281 return WvString("Unfinished bracket expression (index %s)", src_used);
282 else if (state == BACKSLASH)
283 return WvString("Unfinished backslash expression (index %s)", src_used);
284 else return WvString::null;
285}
286
287WvString WvGlob::glob_to_regex(WvStringParm glob, WvString *errstr)
288{
289 if (glob.isnull())
290 {
291 if (errstr) *errstr = WvString("Glob is NULL");
292 return WvString::null;
293 }
294
295 size_t src_used, dst_used;
296 WvString local_errstr = glob_to_regex(glob, src_used, NULL, dst_used, normal_quit_chars);
297 if (!!local_errstr)
298 {
299 if (errstr) *errstr = local_errstr;
300 return WvString::null;
301 }
302
303 WvString result;
304 result.setsize(1+dst_used+1+1);
305 char *dst = result.edit();
306 *dst++ = '^';
307 local_errstr = glob_to_regex(glob, src_used, dst, dst_used, normal_quit_chars);
308 if (!!local_errstr)
309 {
310 if (errstr) *errstr = local_errstr;
311 return WvString::null;
312 }
313 dst += dst_used;
314 *dst++ = '$';
315 *dst++ = '\0';
316
317 return result;
318}
virtual bool isok() const
By default, returns true if geterr() == 0.
Definition: wverror.h:39
virtual void seterr(int _errnum)
Set the errnum variable – we have an error.
Definition: wverror.cc:144
A WvFastString acts exactly like a WvString, but can take (const char *) strings without needing to a...
Definition: wvstring.h:94
bool isnull() const
returns true if this string is null
Definition: wvstring.h:290
bool set(WvStringParm glob)
Replace the current regex to match with a new one.
Definition: wvglob.cc:18
WvGlob()
Construct an empty glob object.
Definition: wvglob.cc:9
WvRegex – Unified support for regular expressions.
Definition: wvregex.h:48
bool set(WvStringParm regex, int cflags=default_cflags)
Replace the current regex to match with a new one.
Definition: wvregex.cc:27
WvString is an implementation of a simple and efficient printable-string class.
Definition: wvstring.h:330
char * edit()
make the string editable, and return a non-const (char*)
Definition: wvstring.h:397