46#include "EST_Pathname.h"
47#include "EST_cutils.h"
49#include "EST_FileType.h"
52#include "EST_TVector.h"
55Declare_TVector_Base_T(
EST_WFST_State *, NULL, NULL, EST_WFST_StateP)
58#if defined(INSTANTIATE_TEMPLATES)
59#include "../base_class/EST_TList.cc"
63#include "../base_class/EST_TVector.cc"
70int EST_WFST::traverse_tag = 0;
72EST_WFST_State::EST_WFST_State(
int name)
83 p_name = state.p_name;
84 p_type = state.p_type;
86 for (p=state.transitions.head(); p != 0; p=p->next())
90EST_WFST_State::~EST_WFST_State()
94 for (p=transitions.head(); p != 0; p=p->next())
95 delete transitions(p);
120 for (
int i=0; i < p_num_states; ++i)
135 p_in_symbols = wfst.p_in_symbols;
136 p_out_symbols = wfst.p_out_symbols;
137 p_start_state = wfst.p_start_state;
138 current_tag = wfst.current_tag;
139 p_num_states = wfst.p_num_states;
140 p_states.
resize(p_num_states);
141 for (
int i=0; i < p_num_states; ++i)
151 p_states.
resize(init_num_states);
152 for (i=0; i < p_states.
length(); i++)
154 p_num_states = init_num_states;
165 for (iin=in_alphabet; iin != NIL; iin=cdr(iin))
166 if ((!streq(get_c_string(car(iin)),
"__epsilon__")) &&
167 (!streq(get_c_string(car(iin)),
"=")))
168 in.
append(get_c_string(car(iin)));
170 out.
append(
"__epsilon__");
172 for (oout=out_alphabet; oout != NIL; oout=cdr(oout))
173 if ((!streq(get_c_string(car(oout)),
"__epsilon__")) &&
174 (!streq(get_c_string(car(oout)),
"=")))
175 out.
append(get_c_string(car(oout)));
177 p_in_symbols.
init(in);
178 p_out_symbols.
init(out);
185 int in_i = p_in_symbols.
name(in);
190 cerr <<
"WFST transduce: \"" << in <<
"\" not in alphabet" << endl;
191 return WFST_ERROR_STATE;
196 out = p_out_symbols.
name(out_i);
206 for (i=s->transitions.head(); i != 0; i=i->next())
208 if (in == s->transitions(i)->in_symbol())
211 s->transitions(i)->set_weight(1+s->transitions(i)->weight());
212 out.
append(s->transitions(i));
224 for (i=s->transitions.head(); i != 0; i=i->next())
226 if (in == s->transitions(i)->in_symbol())
228 out = s->transitions(i)->out_symbol();
229 return s->transitions(i)->state();
233 return WFST_ERROR_STATE;
247 int in_i = p_in_symbols.
name(in);
248 int out_i = p_out_symbols.
name(out);
250 if ((in_i == -1) || (out_i == -1))
252 cerr <<
"WFST: one of " << in <<
"/" << out <<
" not in alphabet"
254 return WFST_ERROR_STATE;
273 for (i=s->transitions.head(); i != 0; i=i->next())
275 if ((in == s->transitions(i)->in_symbol()) &&
276 (out == s->transitions(i)->out_symbol()))
279 s->transitions(i)->set_weight(1+s->transitions(i)->weight());
280 return s->transitions(i);
295 return WFST_ERROR_STATE;
299 prob = trans->weight();
300 return trans->state();
304EST_write_status EST_WFST::save_binary(FILE *fd)
308 int num_transitions, type, in, out, next_state;
311 for (i=0; i<p_num_states; i++)
313 num_transitions = p_states[i]->num_transitions();
314 fwrite(&num_transitions,4,1,fd);
315 if (p_states[i]->type() == wfst_final)
317 else if (p_states[i]->type() == wfst_nonfinal)
318 type = WFST_NONFINAL;
319 else if (p_states[i]->type() == wfst_licence)
323 fwrite(&type,4,1,fd);
324 for (j=p_states[i]->transitions.head(); j != 0; j=j->next())
326 in = p_states[i]->transitions(j)->in_symbol();
327 out = p_states[i]->transitions(j)->out_symbol();
328 next_state = p_states[i]->transitions(j)->state();
329 weight = p_states[i]->transitions(j)->weight();
341 fwrite(&next_state,4,1,fd);
342 fwrite(&weight,4,1,fd);
354 static EST_Regex needquotes(
".*[()'\";., \t\n\r].*");
359 else if ((ofd = fopen(filename,
"wb")) == NULL)
361 cerr <<
"WFST: cannot write to file \"" << filename <<
"\"" << endl;
362 return misc_write_error;
365 fprintf(ofd,
"EST_File fst\n");
366 fprintf(ofd,
"DataType %s\n",(
const char *)type);
367 fprintf(ofd,
"in %s\n",
369 p_in_symbols.print_to_string(TRUE)+
")",
371 fprintf(ofd,
"out %s\n",
373 p_out_symbols.print_to_string(TRUE)+
")",
375 fprintf(ofd,
"NumStates %d\n",p_num_states);
376 fprintf(ofd,
"ByteOrder %s\n", ((EST_NATIVE_BO == bo_big) ?
"10" :
"01"));
377 fprintf(ofd,
"EST_Header_End\n");
379 if (type ==
"binary")
383 for (i=0; i < p_num_states; i++)
386 fprintf(ofd,
"((%d ",s->name());
390 fprintf(ofd,
"final ");
393 fprintf(ofd,
"nonfinal ");
396 fprintf(ofd,
"licence ");
399 fprintf(ofd,
"error ");
401 fprintf(ofd,
"%d)\n",s->num_transitions());
402 for (j=s->transitions.head(); j != 0; j=j->next())
404 EST_String in = p_in_symbols.
name(s->transitions(j)->in_symbol());
405 EST_String out=p_out_symbols.
name(s->transitions(j)->out_symbol());
407 fprintf(ofd,
" (%s ",(
const char *)quote_string(in,
"\"",
"\\",1));
409 fprintf(ofd,
" (%s ",(
const char *)in);
411 fprintf(ofd,
" %s ",(
const char *)quote_string(out,
"\"",
"\\",1));
413 fprintf(ofd,
" %s ",(
const char *)out);
414 fprintf(ofd,
"%d %g)\n",
415 s->transitions(j)->state(),
416 s->transitions(j)->weight());
427static float get_float(FILE *fd,
int swap)
431 if (swap) swapfloat(&f);
435static int get_int(FILE *fd,
int swap)
445EST_read_status EST_WFST::load_binary(FILE *fd,
452 int num_trans, state_type;
453 int in_sym, out_sym, next_state;
458 for (i=0; i < num_states; i++)
460 num_trans = get_int(fd,swap);
461 state_type = get_int(fd,swap);
463 if (state_type == WFST_FINAL)
465 else if (state_type == WFST_NONFINAL)
467 else if (state_type == WFST_LICENCE)
469 else if (state_type == WFST_ERROR)
473 cerr <<
"WFST load: unknown state type \"" <<
474 state_type <<
"\"" << endl;
475 r = read_format_error;
481 cerr <<
"WFST load: internal error: unexpected state misalignment"
483 r = read_format_error;
487 for (j=0; j < num_trans; j++)
489 in_sym = get_int(fd,swap);
496 out_sym = get_int(fd,swap);
497 next_state = get_int(fd,swap);
498 trans_cost = get_float(fd,swap);
500 p_states[i]->add_transition(trans_cost,next_state,in_sym,out_sym);
520 if ((fd=fopen(filename,
"r")) == NULL)
522 cerr <<
"WFST load: unable to open \"" << filename
523 <<
"\" for reading" << endl;
529 if (((r = read_est_header(ts, hinfo, ascii, t)) != format_ok) ||
532 cerr <<
"WFST load: not a WFST file \"" << filename <<
"\"" <<endl;
533 return misc_read_error;
539 read_from_string(get_c_string(read_from_string(hinfo.
val(
"in"))));
541 read_from_string(get_c_string(read_from_string(hinfo.
val(
"out"))));
545 init(inalpha,outalpha);
547 int num_states = hinfo.
ival(
"NumStates");
552 if (!hinfo.
present(
"ByteOrder"))
554 else if (((hinfo.
val(
"ByteOrder") ==
"01") ? bo_little : bo_big)
559 r = load_binary(fd,hinfo,num_states,swap);
563 for (i=0; i < num_states; i++)
565 LISP sd = lreadf(fd);
566 if (i != get_c_int(car(car(sd))))
568 cerr <<
"WFST load: expected description of state " << i <<
569 " but found \"" << siod_sprint(sd) <<
"\"" << endl;
570 r = read_format_error;
573 if (streq(
"final",get_c_string(car(cdr(car(sd))))))
575 else if (streq(
"nonfinal",get_c_string(car(cdr(car(sd))))))
577 else if (streq(
"licence",get_c_string(car(cdr(car(sd))))))
581 cerr <<
"WFST load: unknown state type \"" <<
582 siod_sprint(car(cdr(car(sd)))) <<
"\"" << endl;
583 r = read_format_error;
589 cerr <<
"WFST load: internal error: unexpected state misalignment"
591 r = read_format_error;
594 if (load_transitions_from_lisp(s,cdr(sd)) != format_ok)
596 r = read_format_error;
607EST_read_status EST_WFST::load_transitions_from_lisp(
int s, LISP trans)
611 for (t=trans; t != NIL; t=cdr(t))
613 float w = get_c_float(siod_nth(3,car(t)));
614 int end = get_c_int(siod_nth(2,car(t)));
615 int in = p_in_symbols.
name(get_c_string(siod_nth(0,car(t))));
616 int out = p_out_symbols.
name(get_c_string(siod_nth(1,car(t))));
618 if ((in == -1) || (out == -1))
620 cerr <<
"WFST load: unknown vocabulary in state transition"
622 cerr <<
"WFST load: " << siod_sprint(car(t)) << endl;
623 return read_format_error;
625 p_states[s]->add_transition(w,end,in,out);
635 for (i=0; i < p_num_states; i++)
636 tt += p_states(i)->transitions.
length();
638 return EST_String(
"WFST ")+itoString(p_num_states)+
" states "+
639 itoString(tt)+
" transitions ";
643void EST_WFST::more_states(
int new_max)
648 for (i=p_num_states; i < new_max; i++)
657 if (p_num_states >= p_states.
length())
660 more_states((
int)((
float)(p_states.
length()+1)*1.5));
663 p_states[p_num_states] = s;
664 s->set_type(state_type);
677 for (i=0; i < p_num_states; i++)
680 for (j=s->transitions.head(); j !=0; j=j->next())
681 s->transitions(j)->set_weight(0);
692 for (i=0; i < p_num_states; i++)
695 for (t=0,j=s->transitions.head(); j !=0; j=j->next())
696 t += s->transitions(j)->weight();
698 for (j=s->transitions.head(); j !=0; j=j->next())
699 s->transitions(j)->set_weight(s->transitions(j)->weight()/t);
bool init(const EST_StrList &vocab)
(re-)initialise
const EST_String & name(const int n) const
The name given the index.
int ival(const EST_String &rkey, int m=1) const
EST_String before(int pos, int len=0) const
Part before position.
int contains(const char *s, int pos=-1) const
Does it contain this substring?
EST_String after(int pos, int len=1) const
Part after pos+len.
int matches(const char *e, int pos=0) const
Exactly match this string?
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
const int present(const K &rkey) const
Returns true if key is present.
void append(const T &item)
add item onto end of list
void resize(int n, int set=1)
INLINE int length() const
number of items in vector.
void set_quotes(char q, char e)
set characters to be used as quotes and escape, and set quote mode
int open(const EST_String &filename)
open a \Ref{EST_TokenStream} for a file.
void start_cumulate()
Clear and start cumulation.
int add_state(enum wfst_state_type state_type)
Add a new state, returns new name.
EST_WFST_Transition * find_transition(int state, int in, int out) const
Find (first) transition given in and out symbols.
void init(int init_num_states=10)
Clear with (estimation of number of states required)
void clear()
clear removing existing states if any
void copy(const EST_WFST &wfst)
Copy from existing wfst.
EST_write_status save(const EST_String &filename, const EST_String type="ascii")
?
int cumulate() const
Cumulation condition.
const EST_WFST_State * state(int i) const
Return internal state information.
void stop_cumulate()
Stop cumulation and calculate probabilities on transitions.
int transduce(int state, int in, int &out) const
Transduce in to out from state.
EST_read_status load(const EST_String &filename)
?
int transition(int state, int in, int out) const
Find (first) new state given in and out symbols.