Edinburgh Speech Tools 2.4-release
relation_io.cc
1/*************************************************************************/
2/* */
3/* Centre for Speech Technology Research */
4/* University of Edinburgh, UK */
5/* Copyright (c) 1995,1996 */
6/* All Rights Reserved. */
7/* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21/* */
22/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30/* THIS SOFTWARE. */
31/* */
32/*************************************************************************/
33/* Author : Paul Taylor updated by awb */
34/* Date : Feb 1999 */
35/*-----------------------------------------------------------------------*/
36/* Relation class file i/o, label files */
37/* */
38/*=======================================================================*/
39#include <cstdlib>
40#include <cstdio>
41#include <fstream>
42#include "EST_unix.h"
43#include "EST_types.h"
44#include "ling_class/EST_Relation.h"
45#include "EST_string_aux.h"
46#include "EST_cutils.h"
47#include "EST_TList.h"
48#include "EST_Option.h"
49#include "relation_io.h"
50
51#define DEF_SAMPLE_RATE 16000
52#define HTK_UNITS_PER_SECOND 10000000
53
54static EST_Regex RXleadingwhitespace("^[ \t\n\r][ \t\n\r]*.*$");
55
56EST_read_status read_label_portion(EST_TokenStream &ts, EST_Relation &s,
57 int sample);
58
59EST_read_status load_esps_label(EST_TokenStream &ts,EST_Relation &rel)
60{
62 ts.set_quotes('"','\\');
63 EST_String key, val;
64
65 // Skip the header
66 while (!ts.eof())
67 {
68 key = ts.get().string();
69 if (key == "#")
70 break;
71
72 val = ts.get_upto_eoln().string();
73 // delete leading whitespace
74 if (val.matches(RXleadingwhitespace))
75 val = val.after(RXwhite);
76 rel.f.set(key, val);
77 }
78
79 if (ts.peek() == "") return format_ok;
80
81 while (!ts.eof())
82 {
83 EST_Item *si = rel.append();
84 EST_String name;
85
86 si->set("end",(float)atof(ts.get().string()));
87 ts.get(); // skip the color;
88
89 for (name = ""; (!ts.eoln()) && (ts.peek() != ";"); )
90 {
91 EST_Token &t = ts.get();
92 if (name.length() > 0) // preserve internal whitespace
93 name += t.whitespace();
94 name += t.string();
95 }
96 si->set_name(name);
97
98 if (ts.peek().string() == ";") // absorb separator
99 {
100 ts.get();
101 si->features().load(ts);
102 }
103 }
104 return format_ok;
105}
106
107EST_write_status save_esps_label(const EST_String &filename,
108 const EST_Relation &s,
109 bool evaluate_ff)
110{
111 ostream *outf;
112 if (filename == "-")
113 outf = &cout;
114 else
115 outf = new ofstream(filename);
116
117 if (!(*outf))
118 {
119 cerr << "save_esps_label: can't open label output file \"" <<
120 filename << "\"" << endl;
121 return write_fail;
122 }
123
124 EST_write_status st=save_esps_label(outf, s, evaluate_ff);
125
126 if (outf != &cout)
127 delete outf;
128
129 return st;
130}
131
132EST_write_status save_esps_label(ostream *outf,
133 const EST_Relation &s,
134 bool evaluate_ff)
135{
136 EST_Item *ptr;
137
138 *outf << "separator ;\n";
139 if (!s.f.present("nfields"))
140 *outf << "nfields 1\n";
141
143 for (p.begin(s.f); p; ++p)
144 *outf << p->k << " " << p->v << endl;
145
146 *outf << "#\n";
147/* if (f("timing_style") == "event")
148 *outf << "timing_style event\n";
149 else if (f("timing_style") == "unit")
150 *outf << "timing_style unit\n";
151*/
152
153 for (ptr = s.head(); ptr != 0; ptr = inext(ptr))
154 {
155 *outf << "\t";
156 outf->precision(5);
157 outf->setf(ios::scientific, ios::floatfield);
158 outf->width(8);
159 // outf->fill('0');
160 if (s.f("timing_style","0") == "event")
161 *outf << ptr->F("time",0);
162 else
163 *outf << ptr->F("end",0);
164
165 *outf << " 26 \t" << ptr->S("name","0");
166
167 EST_Features f2;
168 f2 = ptr->features();
169 f2.remove("name");
170 f2.remove("end");
171 if (evaluate_ff)
172 evaluate(ptr,f2);
173
174 if (f2.length() > 0)
175 {
176 *outf << " ; ";
177 f2.save(*outf);
178 }
179 *outf << endl;
180 }
181
182 return write_ok;
183}
184
185EST_read_status load_ogi_label(EST_TokenStream &ts, EST_Relation &s)
186{
187 // This function reads OGI style label files. The start, end
188 // time and names of the labels are mandatory.
189 EST_String key, val;
190 float sr;
191 int isr;
192
193 // set up the character constant values for this stream
194 ts.set_SingleCharSymbols(";");
195
196 // Skip over header
197
198 while(!ts.eof())
199 {
200 if ((ts.peek().col() == 0) && (ts.peek() == "END"))
201 {
202 if (ts.peek() == "END")
203 { // read rest of header
204 ts.get();
205 ts.get();
206 ts.get();
207 }
208 break;
209 }
210 key = ts.get().string();
211 val = ts.get().string();
212 }
213
214 sr = 1000.0 / atof(val);
215 isr = (int)sr;
216
217 if (ts.eof())
218 {
219 cerr << "Error: couldn't find header in label file "
220 << ts.filename() << endl;
221 return wrong_format;
222 }
223
224 if (read_label_portion(ts, s, isr) == misc_read_error)
225 {
226 cerr << "error: in label file " << ts.filename() << " at line " <<
227 ts.linenum() << endl;
228 return misc_read_error;
229 }
230 return format_ok;
231}
232
233EST_read_status load_words_label(EST_TokenStream &ts, EST_Relation &s)
234{
235 // This function reads label files in the form of simple word strings
236 // with no timing information.
237 EST_Item *item;
238
239 while (!ts.eof())
240 {
241 item = s.append();
242 item->set("name",(EST_String)ts.get());
243 item->set("end",0.0);
244 }
245
246 return format_ok;
247}
248
249static float convert_long_num_string_to_time(const char *s,int sample)
250{
251 // For those label files that think 100 nanosecond times are cool
252 // we have to provide a special function to convert them as
253 // this quickly gets beyond the capabilities of ints.
254
255 if (strlen(s) < 15)
256 return atof(s)/sample;
257 else
258 {
259 double a = 0,d;
260 int i=0;
261 for (i=0;
262 (strchr(" \n\r\t",s[i]) != NULL) && (s[i] != '\0');
263 i++);
264
265 for ( ;
266 (s[i] != '\0') && (s[i] >= '0') && (s[i] <= '9');
267 i++)
268 {
269 a = a*10;
270 d = s[i]-'0';
271 a += (d/(double)sample);
272 }
273 return a;
274 }
275}
276
277EST_read_status read_label_portion(EST_TokenStream &ts, EST_Relation &s,
278 int sample)
279{
280 EST_Item *item;
281 float hend;
282 EST_String str;
283
284 while(!ts.eof())
285 {
286 str = ts.get().string();
287 if (str == ".")
288 return format_ok;
289
290 item = s.append();
291
292 str = ts.get().string();
293 hend = convert_long_num_string_to_time(str,sample);
294
295 item->set("end",hend); // time
296 item->set("name",ts.get().string()); // name
297
298 if (!ts.eoln())
299 item->set("rest_lab",ts.get_upto_eoln().string());
300 }
301
302 return format_ok;
303}
304
305EST_read_status load_sample_label(EST_TokenStream &ts,
306 EST_Relation &s, int sample)
307{
308
309 if (sample == 0) // maybe this should be an error
310 sample = DEF_SAMPLE_RATE;
311
312 // set up the character constant values for this stream
313 ts.set_SingleCharSymbols(";");
314
315 s.clear();
316 if (read_label_portion(ts, s, sample) == misc_read_error)
317 {
318 cerr << "error: in label file " << ts.filename() << " at line " <<
319 ts.linenum() << endl;
320 return misc_read_error;
321 }
322 return format_ok;
323}
324
325EST_write_status save_htk_label(const EST_String &filename,
326 const EST_Relation &a)
327{
328 ostream *outf;
329 if (filename == "-")
330 outf = &cout;
331 else
332 outf = new ofstream(filename);
333
334 if (!(*outf))
335 {
336 cerr << "save_htk_label: can't open label output file \"" <<
337 filename << "\"" << endl;
338 return write_fail;
339 }
340
341 EST_write_status s = save_htk_label(outf, a);
342
343
344 if (outf != &cout)
345 delete outf;
346
347 return s;
348}
349
350EST_write_status save_htk_label(ostream *outf,
351 const EST_Relation &a)
352{
353 EST_Item *ptr;
354 float end,start;
355
356 outf->precision(6);
357
358 start = end = 0;
359 for (ptr = a.head(); ptr != 0; ptr = inext(ptr))
360 {
361 outf->width(15);
362 cout.setf(ios::left,ios::adjustfield);
363 *outf << (int)(start * HTK_UNITS_PER_SECOND);
364 outf->width(15);
365 end = ptr->F("end",0.0);
366 *outf << (int)(end * HTK_UNITS_PER_SECOND);
367 *outf << " " << ptr->name() << endl;
368 start = end;
369 }
370
371 return write_ok;
372}
373
374#if 0
375EST_write_status save_label_spn(const EST_String &filename,
376 const EST_Relation &a)
377{
378 EST_Stream_Item *ptr;
379
380 ostream *outf;
381 if (filename == "-")
382 outf = &cout;
383 else
384 outf = new ofstream(filename);
385
386 if (!(*outf))
387 {
388 cerr << "save_label_spn: can't open label output file \""
389 << filename << "\"" << endl;
390 return write_fail;
391 }
392
393 ptr = a.head();
394 outf->precision(3);
395 outf->setf(ios::left, ios::adjustfield);
396 outf->width(8);
397 *outf << ptr->name();
398 outf->setf(ios::scientific, ios::floatfield);
399 outf->width(8);
400 *outf << (ptr->dur() * 1000.0) << "\t (0,140)" << endl;
401
402 for (; inext(ptr) != 0; ptr = inext(ptr))
403 {
404 outf->precision(3);
405 outf->setf(ios::left, ios::adjustfield);
406 outf->width(8);
407 *outf << ptr->name();
408 outf->setf(ios::scientific, ios::floatfield);
409 outf->width(8);
410 *outf << (ptr->dur() * 1000.0) << endl;
411 }
412 // outf->precision(3);
413 // outf->setf(ios::left, ios::adjustfield);
414 outf->width(8);
415 *outf << ptr->name();
416 outf->setf(ios::scientific, ios::floatfield);
417 outf->width(8);
418 *outf << (ptr->dur() * 1000.0) << "\t (99,80)" << endl;
419
420 if (outf != &cout)
421 delete outf;
422
423 return write_ok;
424}
425
426EST_write_status save_label_names(const EST_String &filename,
427 const EST_Relation &a,
428 const EST_String &features)
429{
430 EST_Stream_Item *ptr;
431
432 ostream *outf;
433 if (filename == "-")
434 outf = &cout;
435 else
436 outf = new ofstream(filename);
437
438 if (!(*outf))
439 {
440 cerr << "save_label_name: can't open label output file \""
441 << filename << "\"" << endl;
442 return misc_write_error;
443 }
444
445 for (ptr = a.head(); inext(ptr) != 0; ptr = inext(ptr))
446 {
447 *outf << ptr->name();
448 if ((features != "") && (features != "OneLine"))
449 *outf << endl;
450 else
451 *outf << " ";
452 }
453
454 *outf << ptr->name() << endl;
455
456 if (outf != &cout)
457 delete outf;
458 return write_ok;
459}
460#endif
461
462EST_write_status save_RelationList(const EST_String &filename,
463 const EST_RelationList &plist,
464 int time, int path)
465{
466 EST_Litem *p;
467 EST_Item *ptr;
468 EST_String outname;
469 float start,end;
470
471 ostream *outf;
472 if (filename == "-")
473 outf = &cout;
474 else
475 outf = new ofstream(filename);
476
477 if (!(*outf))
478 {
479 cerr << "save_StreamList: can't open MLF output file \""
480 << filename << "\"\n";
481 return write_fail;
482 }
483
484 *outf << "#!MLF!#\n"; // MLF header/identifier
485 outf->precision(6);
486
487 start = end = 0;
488 for (p = plist.head(); p != 0; p = p->next())
489 {
490 outname = path ? plist(p).name() : basename(plist(p).name());
491 *outf << "\"*/" << outname<<"\"\n";
492 for (ptr = plist(p).head(); ptr != 0; ptr = inext(ptr))
493 {
494 if (time)
495 {
496 outf->width(15);
497 cout.setf(ios::left,ios::adjustfield);
498 *outf << (int)(start * HTK_UNITS_PER_SECOND);
499 outf->width(15);
500 end = ptr->F("end",0.0);
501 *outf << (int)(end * HTK_UNITS_PER_SECOND) << " ";
502 start = end;
503 }
504 *outf << ptr->S("name","0") << endl;
505 }
506 *outf << ".\n";
507 }
508
509 if (outf != &cout)
510 delete outf;
511 return write_ok;
512}
513
514EST_write_status save_WordList(const EST_String &filename,
515 const EST_RelationList &plist,
516 int style)
517{
518 EST_Litem *p;
519 EST_Item *ptr;
520
521 ostream *outf;
522 if (filename == "-")
523 outf = &cout;
524 else
525 outf = new ofstream(filename);
526
527 if (!(*outf))
528 {
529 cerr << "save:WordList: can't open WordList output file \""
530 << filename << "\"\n";
531 return write_fail;
532 }
533
534 for (p = plist.head(); p != 0; p = p->next())
535 {
536 for (ptr = plist(p).head(); inext(ptr) != 0; ptr = inext(ptr))
537 {
538 *outf << ptr->name();
539 if (style == 0)
540 *outf << endl;
541 else
542 *outf << " ";
543 }
544 if (ptr != 0)
545 *outf << ptr->name() << endl;
546 }
547
548 if (outf != &cout)
549 delete outf;
550 return write_ok;
551}
552
553EST_write_status save_ind_RelationList(const EST_String &filename,
554 const EST_RelationList &plist,
555 const EST_String &features,
556 int path)
557{
558 EST_Litem *p;
559 EST_String outname;
560 (void) filename;
561 (void) features;
562
563 for (p = plist.head(); p != 0; p = p->next())
564 {
565 outname = path ? plist(p).name() : basename(plist(p).name());
566 if (plist(p).save(outname,false) != write_ok)
567 return misc_write_error;
568 }
569
570 return write_ok;
571}
572
573EST_read_status load_RelationList(const EST_String &filename,
574 EST_RelationList &plist)
575{
577 EST_String fns, name;
578
579 if (((filename == "-") ? ts.open(cin) : ts.open(filename)) != 0)
580 {
581 cerr << "Can't open label input file " << filename << endl;
582 return misc_read_error;
583 }
584 // set up the character constant values for this stream
585 ts.set_SingleCharSymbols(";");
586
587 // Skip over header
588 if (ts.get().string() != "#!MLF!#")
589 {
590 cerr << "Not MLF file\n";
591 return wrong_format;
592 }
593
594 while(!ts.eof())
595 {
596 // put filename in as stream name. The filename is usually surrounded
597 // by quotes, so remove these.
598 fns = ts.get().string();
599 strip_quotes(fns);
600 EST_Relation s(fns);
601 s.f.set("name", fns); // simonk
602 plist.append(s);
603
604 if (read_label_portion(ts, plist.last(), 10000000) == misc_read_error)
605 {
606 cerr << "error: in reading MLF file\n";
607 cerr << "section for file " << fns <<
608 " at line " << ts.linenum() << " is badly formatted\n";
609
610 return misc_read_error;
611 }
612 }
613
614 return format_ok;
615}
616
617static void pad_ends(EST_Relation &s, float length)
618{
619 // add evenly spaced dummy end values to Relation
620 EST_Item *p;
621 int i;
622
623 for (i = 0, p = s.head(); p; p = inext(p), ++i)
624 p->set("end",(length * float(i)/float(s.length())));
625}
626
627EST_read_status read_RelationList(EST_RelationList &plist,
628 EST_StrList &files, EST_Option &al)
629{
630 EST_Litem *p, *plp;
631
632 if (al.val("-itype", 0) == "mlf")
633 {
634 if (load_RelationList(files.first(), plist) != format_ok)
635 exit (-1);
636 }
637 else
638 for (p = files.head(); p; p = p->next())
639 {
640 EST_Relation s(files(p));
641 plist.append(s);
642 plp = plist.tail();
643 if (al.present("-itype"))
644 {
645 if (plist(plp).load(files(p), al.val("-itype")) != format_ok)
646 exit (-1);
647 }
648 else if (plist(plp).load(files(p)) != format_ok)
649 exit (-1);
650 if ((al.val("-itype", 0) == "words") && (al.present("-length")))
651 pad_ends(s, al.fval("-length"));
652
653 }
654
655 return format_ok;
656}
void set(const EST_String &name, int ival)
Definition: EST_Features.h:185
void remove(const EST_String &name)
Definition: EST_Features.h:246
EST_read_status load(EST_TokenStream &ts)
load features from already opened EST_TokenStream
EST_write_status save(ostream &outf) const
save features in already opened ostream
int present(const EST_String &name) const
int length() const
Definition: EST_Features.h:250
void set(const EST_String &name, int ival)
Definition: EST_Item.h:179
const EST_String S(const EST_String &name) const
Definition: EST_Item.h:143
const float F(const EST_String &name) const
Definition: EST_Item.h:134
float fval(const EST_String &rkey, int m=1) const
Definition: EST_Option.cc:98
EST_Features f
Definition: EST_Relation.h:103
EST_Item * head() const
Definition: EST_Relation.h:125
int length() const
int length(void) const
Length of string ({not} length of underlying chunk)
Definition: EST_String.h:241
void begin(const Container &over)
Set the iterator ready to run over this container.
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
Definition: EST_TKVL.cc:145
const int present(const K &rkey) const
Returns true if key is present.
Definition: EST_TKVL.cc:222
const T & last() const
return const reference to last item in list
Definition: EST_TList.h:149
void append(const T &item)
add item onto end of list
Definition: EST_TList.h:191
const T & first() const
return const reference to first item in list
Definition: EST_TList.h:146
int eof()
end of file
Definition: EST_Token.h:356
void set_SingleCharSymbols(const EST_String &sc)
set which characters are to be treated as single character symbols
Definition: EST_Token.h:338
const EST_String filename() const
The originating filename (if there is one)
Definition: EST_Token.h:372
int linenum(void) const
returns line number of \Ref{EST_TokenStream}
Definition: EST_Token.h:354
void set_quotes(char q, char e)
set characters to be used as quotes and escape, and set quote mode
Definition: EST_Token.h:347
EST_Token get_upto_eoln(void)
get up to {\tt s} in end of line as a single token.
Definition: EST_Token.cc:516
int eoln()
end of line
Definition: EST_Token.cc:818
EST_Token & peek(void)
peek at next token
Definition: EST_Token.cc:830
int open(const EST_String &filename)
open a \Ref{EST_TokenStream} for a file.
Definition: EST_Token.cc:200
EST_TokenStream & get(EST_Token &t)
get next token in stream
Definition: EST_Token.cc:486
int col(void) const
Line position in original \Ref{EST_TokenStream}.
Definition: EST_Token.h:184