WvStreams
wvstrutils.h
Go to the documentation of this file.
1/* -*- Mode: C++ -*-
2 * Worldvisions Weaver Software:
3 * Copyright (C) 1997-2002 Net Integration Technologies, Inc.
4 *
5 * Various little string functions...
6 *
7 * FIXME: and some other assorted crap that belongs anywhere but here.
8 */
9#ifndef __WVSTRUTILS_H
10#define __WVSTRUTILS_H
11
12#include <sys/types.h> // for off_t
13#include <time.h>
14#include <ctype.h>
15#include "wvstring.h"
16#include "wvstringlist.h"
17#include "wvhex.h"
18#ifndef _WIN32
19#include "wvregex.h"
20#endif
21
34char *terminate_string(char *string, char c);
35
44char *trim_string(char *string);
45
50char *trim_string(char *string, char c);
51
65WvString spacecat(WvStringParm a, WvStringParm b, char sep = ' ',
66 bool onesep = false);
67
68
73char *non_breaking(const char *string);
74
79void replace_char(void *string, char c1, char c2, int length);
80
84char *snip_string(char *haystack, char *needle);
85
86#ifndef _WIN32
91char *strlwr(char *string);
92
97char *strupr(char *string);
98
99#endif
100
102bool is_word(const char *string);
103
112WvString hexdump_buffer(const void *buf, size_t len, bool charRep = true);
113
118bool isnewline(char c);
119
127WvString url_decode(WvStringParm str, bool no_space = false);
128
129
139
140
144WvString diff_dates(time_t t1, time_t t2);
145
146
151WvString rfc822_date(time_t _when = -1);
152
154WvString rfc1123_date(time_t _when);
155
157WvString local_date(time_t _when = -1);
158
160WvString intl_time(time_t _when = -1);
161
163WvString intl_date(time_t _when = -1);
164
166WvString intl_datetime(time_t _when = -1);
167
168time_t intl_gmtoff(time_t t);
169
170#ifndef _WIN32
176WvString passwd_crypt(const char *str);
177
178#endif
184WvString passwd_md5(const char *str);
185
191
193int strcount(WvStringParm s, const char c);
194
200
208
215WvString getdirname(WvStringParm fullname);
216
217/*
218 * Possible rounding methods for numbers -- remember from school?
219 */
220enum RoundingMethod
221{
222 ROUND_DOWN,
223 ROUND_DOWN_AT_POINT_FIVE,
224 ROUND_UP_AT_POINT_FIVE,
225 ROUND_UP
226};
227
233WvString sizetoa(unsigned long long blocks, unsigned long blocksize = 1,
234 RoundingMethod rounding_method = ROUND_UP_AT_POINT_FIVE);
235
240WvString sizektoa(unsigned long long kbytes,
241 RoundingMethod rounding_method = ROUND_UP_AT_POINT_FIVE);
242
248WvString sizeitoa(unsigned long long blocks, unsigned long blocksize = 1,
249 RoundingMethod rounding_method = ROUND_UP_AT_POINT_FIVE);
250
255WvString sizekitoa(unsigned long long kbytes,
256 RoundingMethod rounding_method = ROUND_UP_AT_POINT_FIVE);
257
261WvString secondstoa(unsigned int total_seconds);
262
267int lookup(const char *str, const char * const *table,
268 bool case_sensitive = false);
269
277template<class StringCollection>
278void strcoll_split(StringCollection &coll, WvStringParm _s,
279 const char *splitchars = " \t", int limit = 0)
280{
281 WvString s(_s);
282 char *sptr = s.edit(), *eptr, oldc;
283
284 // Simple if statement to catch (and add) empty (but not NULL) strings.
285 if (sptr && !*sptr )
286 {
287 WvString *emptyString = new WvString("");
288 coll.add(emptyString, true);
289 }
290
291 // Needed to catch delimeters at the beginning of the string.
292 bool firstrun = true;
293
294 while (sptr && *sptr)
295 {
296 --limit;
297
298 if (firstrun)
299 {
300 firstrun = false;
301 }
302 else
303 {
304 sptr += strspn(sptr, splitchars);
305 }
306
307 if (limit)
308 {
309 eptr = sptr + strcspn(sptr, splitchars);
310 }
311 else
312 {
313 eptr = sptr + strlen(sptr);
314 }
315
316 oldc = *eptr;
317 *eptr = 0;
318
319 WvString *newstr = new WvString(sptr);
320 coll.add(newstr, true);
321
322 *eptr = oldc;
323 sptr = eptr;
324 }
325}
326
327
341template<class StringCollection>
342void strcoll_splitstrict(StringCollection &coll, WvStringParm _s,
343 const char *splitchars = " \t", int limit = 0)
344{
345 WvString s(_s);
346 char *cur = s.edit();
347
348 if (!cur) return;
349
350 for (;;)
351 {
352 --limit;
353 if (!limit)
354 {
355 coll.add(new WvString(cur), true);
356 break;
357 }
358
359 int len = strcspn(cur, splitchars);
360
361 char tmp = cur[len];
362 cur[len] = 0;
363 coll.add(new WvString(cur), true);
364 cur[len] = tmp;
365
366 if (!cur[len]) break;
367 cur += len + 1;
368 }
369}
370
371
372#ifndef _WIN32 // don't have regex on win32
380template<class StringCollection>
381void strcoll_split(StringCollection &coll, WvStringParm s,
382 const WvRegex &regex, int limit = 0)
383{
384 int start = 0;
385 int match_start, match_end;
386 int count = 0;
387
388 while ((limit == 0 || count < limit)
389 && regex.continuable_match(&s[start], match_start, match_end)
390 && match_end > 0)
391 {
392 WvString *substr = new WvString;
393 int len = match_start;
394 substr->setsize(len+1);
395 memcpy(substr->edit(), &s[start], len);
396 substr->edit()[len] = '\0';
397 coll.add(substr, true);
398 start += match_end;
399 ++count;
400 }
401
402 if (limit == 0 || count < limit)
403 {
404 WvString *last = new WvString(&s[start]);
405 last->unique();
406 coll.add(last, true);
407 }
408}
409#endif
410
411
417template<class StringCollection>
418WvString strcoll_join(const StringCollection &coll,
419 const char *joinchars = " \t")
420{
421 size_t joinlen = strlen(joinchars);
422 size_t totlen = 1;
423 typename StringCollection::Iter s(
424 const_cast<StringCollection&>(coll));
425 for (s.rewind(); s.next(); )
426 {
427 if (s->cstr())
428 totlen += strlen(s->cstr());
429 totlen += joinlen;
430 }
431 totlen -= joinlen; // no join chars at tail
432
433 WvString total;
434 total.setsize(totlen);
435
436 char *te = total.edit();
437 te[0] = 0;
438 bool first = true;
439 for (s.rewind(); s.next(); )
440 {
441 if (first)
442 first = false;
443 else
444 strcat(te, joinchars);
445 if (s->cstr())
446 strcat(te, s->cstr());
447 }
448 return total;
449}
450
456
458WvString undupe(WvStringParm s, char c);
459
462
465
468
473WvString metriculate(const off_t i);
474
480
486
493WvString substr(WvString line, unsigned int pos, unsigned int len);
494
500
501// Converts a string in decimal to an arbitrary numeric type
502template<class T>
503bool wvstring_to_num(WvStringParm str, T &n)
504{
505 bool neg = false;
506 n = 0;
507
508 for (const char *p = str; *p; ++p)
509 {
510 if (isdigit(*p))
511 {
512 n = n * T(10) + T(*p - '0');
513 }
514 else if ((const char *)str == p
515 && *p == '-')
516 {
517 neg = true;
518 }
519 else return false;
520 }
521
522 if (neg)
523 n = -n;
524
525 return true;
526}
527
528/*
529 * Before using the C-style string escaping functions below, please consider
530 * using the functions in wvtclstring.h instead; they usualy lead to much more
531 * human readable and manageable results, and allow representation of
532 * lists of strings.
533 */
534
536{
537 char ch;
538 const char *esc;
539};
540extern const CStrExtraEscape CSTR_TCLSTR_ESCAPES[];
541
543//
544// If data is NULL, returns WvString::null; otherwise, returns an allocated
545// WvString containing the C-style string constant that represents the data.
546//
547// All printable characters including space except " and \ are represented with
548// escaping.
549//
550// The usual C escapes are performed, such as \n, \r, \", \\ and \0.
551//
552// All other characters are escaped in uppercase hex form, eg. \x9E
553//
554// The extra_escapes parameter allows for additional characters beyond
555// the usual ones escaped in C; setting it to CSTR_TCLSTR_ESCAPES will
556// escape { and } as < and >, which allows the resulting strings to be
557// TCL-string coded without ridiculous double-escaping.
558//
559WvString cstr_escape(const void *data, size_t size,
560 const CStrExtraEscape extra_escapes[] = NULL);
561
563//
564// This function does *not* include the trailing null that a C compiler would --
565// if you want this null, put \0 at the end of the C-style string
566//
567// If cstr is correctly formatted and max_size is large enough for the
568// resulting data, returns true and size will equal the size of the
569// resulting data. If data is not NULL it will contain this data.
570//
571// If cstr is correctly formatted but max_size is too small for the resulting
572// data, returns false and size will equal the minimum value of min_size
573// for this function to have returned true. If data is non-NULL it will
574// contain the first max_size bytes of resulting data.
575//
576// If cstr is incorrectly formatted, returns false and size will equal 0.
577//
578// This functions works just as well on multiple, whitespace-separated
579// C-style strings as well. This allows you to concatenate strings produced
580// by cstr_escape, and the result of cstr_unescape will be the data blocks
581// concatenated together. This implies that the empty string corresponds
582// to a valid data block of length zero; however, a null string still returns
583// an error.
584//
585// The extra_escapes parameter must match that used in the call to
586// cstr_escape used to produce the escaped strings.
587//
588bool cstr_unescape(WvStringParm cstr, void *data, size_t max_size, size_t &size,
589 const CStrExtraEscape extra_escapes[] = NULL);
590
591static inline bool is_int(const char *str)
592{
593 if (!str)
594 return false;
595
596 if (*str == '-')
597 ++str;
598
599 if (!*str)
600 return false;
601
602 while (*str)
603 if (!isdigit(*str++))
604 return false;
605
606 return true;
607}
608
611WvString ptr2str(void* ptr);
612
613#endif // __WVSTRUTILS_H
A WvFastString acts exactly like a WvString, but can take (const char *) strings without needing to a...
Definition: wvstring.h:94
WvRegex – Unified support for regular expressions.
Definition: wvregex.h:48
bool continuable_match(WvStringParm string, int &match_start, int &match_end, WVREGEX_REGS_DECL) const
Match a given string against the compiled regular expression, capturing the start and end positions o...
Definition: wvregex.h:230
WvString is an implementation of a simple and efficient printable-string class.
Definition: wvstring.h:330
WvString & unique()
make the buf and str pointers owned only by this WvString.
Definition: wvstring.cc:306
char * edit()
make the string editable, and return a non-const (char*)
Definition: wvstring.h:397
Hex functions for compatibility with older code.
bool cstr_unescape(WvStringParm cstr, void *data, size_t max_size, size_t &size, const CStrExtraEscape extra_escapes[]=NULL)
Converts a C-style string constant into data.
Definition: strutils.cc:1182
bool isnewline(char c)
Returns true if 'c' is a newline or carriage return character.
Definition: strutils.cc:304
WvString beforestr(WvStringParm line, WvStringParm a)
Returns everything in line (exclusively) before 'a'.
Definition: strutils.cc:981
char * terminate_string(char *string, char c)
Add character c to the end of a string after removing terminating carriage returns/linefeeds if any.
Definition: strutils.cc:32
WvString fqdomainname()
Get the fqdn of the local host, using gethostbyname() and gethostname()
Definition: strutils.cc:893
WvString encode_hostname_as_DN(WvStringParm hostname)
Example: encode_hostname_as_DN("www.fizzle.com") will result in dc=www,dc=fizzle,dc=com,...
Definition: strutils.cc:444
WvString strreplace(WvStringParm s, WvStringParm a, WvStringParm b)
Replace any instances of "a" with "b" in "s".
Definition: strutils.cc:797
WvString backslash_escape(WvStringParm s1)
Returns a string with a backslash in front of every non alphanumeric character in s1.
Definition: strutils.cc:410
WvString url_encode(WvStringParm str, WvStringParm unsafe="")
Converts all those pesky spaces, colons, and other nasties into nice unreadable Quasi-Unicode codes.
Definition: strutils.cc:351
WvString getfilename(WvStringParm fullname)
Take a full path/file name and splits it up into respective pathname and filename.
Definition: strutils.cc:506
WvString rfc822_date(time_t _when=-1)
Returns an RFC822-compatible date made out of _when, or, if _when < 0, out of the current time.
Definition: strutils.cc:395
WvString hexdump_buffer(const void *buf, size_t len, bool charRep=true)
Produce a hexadecimal dump of the data buffer in 'buf' of length 'len'.
Definition: strutils.cc:245
WvString passwd_md5(const char *str)
Similar to crypt(), but this randomly selects its own salt.
Definition: strcrypt.cc:38
WvString ptr2str(void *ptr)
Converts a pointer into a string, like glibc's p formatter would do.
Definition: strutils.cc:1318
WvString sizetoa(unsigned long long blocks, unsigned long blocksize=1, RoundingMethod rounding_method=ROUND_UP_AT_POINT_FIVE)
Given a number of blocks and a blocksize (default==1 byte), return a WvString containing a human-read...
Definition: strutils.cc:708
int lookup(const char *str, const char *const *table, bool case_sensitive=false)
Finds a string in an array and returns its index.
Definition: strutils.cc:850
WvString local_date(time_t _when=-1)
Return the local date (TZ applied) out of _when.
Definition: strutils.cc:1232
WvString strcoll_join(const StringCollection &coll, const char *joinchars=" \t")
Concatenates all strings in a collection and returns the result.
Definition: wvstrutils.h:418
WvString nice_hostname(WvStringParm name)
Given a hostname, turn it into a "nice" one.
Definition: strutils.cc:460
bool is_word(const char *string)
Returns true if all characters in 'string' are isalnum() (alphanumeric).
Definition: strutils.cc:228
int strcount(WvStringParm s, const char c)
How many times does 'c' occur in "s"?
Definition: strutils.cc:433
WvString sizeitoa(unsigned long long blocks, unsigned long blocksize=1, RoundingMethod rounding_method=ROUND_UP_AT_POINT_FIVE)
Given a number of blocks and a blocksize (default==1 byte), return a WvString containing a human-read...
Definition: strutils.cc:729
WvString intl_date(time_t _when=-1)
Return the local date (in format of ISO 8601) out of _when.
Definition: strutils.cc:1260
WvString rfc1123_date(time_t _when)
Returns an RFC1123-compatible date made out of _when.
Definition: strutils.cc:838
WvString sizektoa(unsigned long long kbytes, RoundingMethod rounding_method=ROUND_UP_AT_POINT_FIVE)
Given a size in kilobyes, return a human readable size.
Definition: strutils.cc:721
WvString diff_dates(time_t t1, time_t t2)
Returns the difference between to dates in a human readable format.
Definition: strutils.cc:376
WvString hostname()
Do gethostname() without a fixed-length buffer.
Definition: strutils.cc:870
WvString metriculate(const off_t i)
Inserts SI-style spacing into a number (eg passing 9876543210 returns "9 876 543 210")
Definition: strutils.cc:926
WvString secondstoa(unsigned int total_seconds)
Given a number of seconds, returns a formatted human-readable string saying how long the period is.
Definition: strutils.cc:750
char * non_breaking(const char *string)
Replaces all whitespace characters in the string with non-breaking spaces (&#160;) for use with web stuff...
Definition: strutils.cc:154
WvString sizekitoa(unsigned long long kbytes, RoundingMethod rounding_method=ROUND_UP_AT_POINT_FIVE)
Given a size in kilobytes, return a human readable size.
Definition: strutils.cc:742
char * trim_string(char *string)
Trims whitespace from the beginning and end of the character string, including carriage return / line...
Definition: strutils.cc:59
time_t intl_gmtoff(time_t t)
Return the number of seconds by which localtime (at the given timestamp) is offset from GMT.
Definition: strutils.cc:1294
WvString substr(WvString line, unsigned int pos, unsigned int len)
Returns the string of length len starting at pos in line.
Definition: strutils.cc:998
WvString url_decode(WvStringParm str, bool no_space=false)
Converts escaped characters (things like %20 etc.) from web URLS into their normal ASCII representati...
Definition: strutils.cc:311
WvString afterstr(WvStringParm line, WvStringParm a)
Returns everything in line (exclusively) after a.
Definition: strutils.cc:965
void replace_char(void *string, char c1, char c2, int length)
Replace all instances of c1 with c2 for the first 'length' characters in 'string'.
Definition: strutils.cc:178
WvString passwd_crypt(const char *str)
Similar to crypt(), but this randomly selects its own salt.
Definition: strcrypt.cc:14
WvString intl_datetime(time_t _when=-1)
Return the local date and time (in format of ISO 8601) out of _when.
Definition: strutils.cc:1274
WvString cstr_escape(const void *data, size_t size, const CStrExtraEscape extra_escapes[]=NULL)
Converts data into a C-style string constant.
Definition: strutils.cc:1143
char * strlwr(char *string)
In-place modify a character string so that all contained letters are in lower case.
Definition: strutils.cc:201
void strcoll_split(StringCollection &coll, WvStringParm _s, const char *splitchars=" \t", int limit=0)
Splits a string and adds each substring to a collection.
Definition: wvstrutils.h:278
char * snip_string(char *haystack, char *needle)
Snip off the first part of 'haystack' if it consists of 'needle'.
Definition: strutils.cc:187
void strcoll_splitstrict(StringCollection &coll, WvStringParm _s, const char *splitchars=" \t", int limit=0)
Splits a string and adds each substring to a collection.
Definition: wvstrutils.h:342
WvString depunctuate(WvStringParm line)
Removes any trailing punctuation ('.
Definition: strutils.cc:1306
char * strupr(char *string)
In-place modify a character string so that all contained letters are in upper case.
Definition: strutils.cc:214
WvString spacecat(WvStringParm a, WvStringParm b, char sep=' ', bool onesep=false)
return the string formed by concatenating string 'a' and string 'b' with the 'sep' character between ...
Definition: strutils.cc:114
WvString undupe(WvStringParm s, char c)
Replace any consecutive instances of character c with a single one.
Definition: strutils.cc:814
WvString wvgetcwd()
Get the current working directory without a fixed-length buffer.
Definition: strutils.cc:905
WvString intl_time(time_t _when=-1)
Return the local time (in format of ISO 8601) out of _when.
Definition: strutils.cc:1246