44#include "EST_SCFG_Chart.h"
46EST_SCFG_Chart_Edge::EST_SCFG_Chart_Edge()
50EST_SCFG_Chart_Edge::EST_SCFG_Chart_Edge(
double prob,
60EST_SCFG_Chart_Edge::~EST_SCFG_Chart_Edge()
64LISP EST_SCFG_Chart::print_edge(
int start,
int end,
int p,
73 else if (start+1 == end)
77 cons(flocons(e->
prob()),
89 d1 = edges[start][e->
pos()][e->
d1()];
90 d2 = edges[e->
pos()][end][e->
d2()];
93 cons(print_edge(start,e->
pos(),e->
d1(),d1),
94 cons(print_edge(e->
pos(),end,e->
d2(),d2),
97 cons(flocons(e->
prob()),
105EST_SCFG_Chart::EST_SCFG_Chart()
111 grammar_local = TRUE;
115EST_SCFG_Chart::~EST_SCFG_Chart()
128 grammar_local = FALSE;
129 grammar = &imported_grammar;
152 for (n_vertices=1,p=s; p != e; p=inext(p))
156 for (n=0,p=s; p != e; p=inext(p),n++)
161 cerr <<
"SCFG_Chart: unknown terminal symbol \"" <<
162 p->f(name).
string() <<
"\"" << endl;
169void EST_SCFG_Chart::delete_edge_table()
173 if (wfst == 0)
return;
175 for (i=0; i < n_vertices; i++)
178 for (j=0; j < n_vertices; j++)
181 if (edges[i][j][k] != emptyedge)
182 delete edges[i][j][k];
183 delete [] edges[i][j];
196void EST_SCFG_Chart::setup_edge_table()
205 for (i=0; i < n_vertices; i++)
209 for (j=0; j < n_vertices; j++)
212 for (k=0; k < nt; k++)
218double EST_SCFG_Chart::find_best_tree_cal(
int start,
int end,
int p)
222 int best_q = -1, best_r = -1;
223 double best_prob = 0;
227 best_prob = grammar->
prob_U(p,wfst[start]->d1());
229 edges[start][end][p] =
231 wfst[start]->d1(),0,-1);
233 edges[start][end][p] = emptyedge;
239 double s=0,t_prob,left,right;
243 for (q=0; q < nt; q++)
244 for (r=0; r < nt; r++)
246 double pBpqr = grammar->
prob_B(p,q,r);
249 for (j=start+1; j < end; j++)
251 left=find_best_tree(start,j,q);
254 right = find_best_tree(j,end,r);
255 t_prob = pBpqr * left * right;
256 if (t_prob > best_prob)
270 edges[start][end][p] =
273 edges[start][end][p] = emptyedge;
281 if (n_vertices - 1 > 0)
282 find_best_tree(0,n_vertices-1,grammar->distinguished_symbol());
291 top = edges[0][n_vertices-1][grammar->distinguished_symbol()];
296 return print_edge(0,n_vertices-1,grammar->distinguished_symbol(),top);
318 for (num_words=0,p=s; p != e; p=inext(p))
321 if (num_words != (n_vertices-1))
323 cerr <<
"SCFG_Chart: extract_parse, number of items in link stream " <<
324 " different from those in parse tree" << endl;
331 top = edges[0][n_vertices-1][grammar->distinguished_symbol()];
339 extract_edge(0,n_vertices-1,grammar->distinguished_symbol(),top,
342 if ((force) && (!daughter1(ss)))
343 extract_forced_parse(0,n_vertices-1,ss,w);
348void EST_SCFG_Chart::extract_forced_parse(
int start,
int end,
356 s->append_daughter(w);
357 s->set_name(grammar->
nonterminal(grammar->distinguished_symbol()));
362 extract_forced_parse(start,start+1,s->append_daughter(),w);
363 EST_Item *st = s->append_daughter();
364 st->set_name(grammar->
nonterminal(grammar->distinguished_symbol()));
367 extract_forced_parse(start+1,end,st,nw);
371void EST_SCFG_Chart::extract_edge(
int start,
int end,
int p,
382 else if (start+1 == end)
385 s->append_daughter((*word));
387 s->
set(
"prob",(
float)e->
prob());
388 *word = inext(*word);
396 d1 = edges[start][e->
pos()][e->
d1()];
397 d2 = edges[e->
pos()][end][e->
d2()];
400 s->append_daughter();
401 s->append_daughter();
403 extract_edge(start,e->
pos(),e->
d1(),d1,daughter1(s),word);
404 extract_edge(e->
pos(),end,e->
d2(),d2,daughter2(s),word);
407 s->
set(
"prob",(
float)e->
prob());
413void EST_SCFG_chart_load_relation(
EST_Relation &s,LISP sent)
419 for (w=sent; w != NIL; w=cdr(w))
425 word->set_name(get_c_string(car(car(w))));
426 if (consp(car(cdr(car(w)))))
427 for (f=car(cdr(car(w))); f != NIL; f=cdr(f))
429 if (FLONUMP(car(cdr(car(f)))))
430 word->
set(get_c_string(car(car(f))),
431 get_c_float(car(cdr(car(f)))));
433 word->
set(get_c_string(car(car(f))),
434 get_c_string(car(cdr(car(f)))));
437 word->
set(
"name",get_c_string(car(cdr(car(w)))));
440 word->
set(
"name",get_c_string(car(w)));
460LISP scfg_parse(LISP
string, LISP grammar)
469 EST_SCFG_chart_load_relation(words,
string);
477LISP scfg_parse(LISP
string,
EST_SCFG &grammar)
486 EST_SCFG_chart_load_relation(words,
string);
494LISP scfg_bracketing_only(LISP parse)
496 if (consp(siod_nth(4,parse)))
500 for (d=cdr(cdr(cdr(cdr(parse)))),ds=NIL; d != NIL; d=cdr(d))
501 ds = cons(scfg_bracketing_only(car(d)),ds);
505 return siod_nth(4,parse);
519 int fully_contained=0;
521 for (c=0; c < corpus.
length(); c++)
523 LISP flat = siod_flatten(corpus.
a_no_check(c).string());
525 parse = scfg_parse(flat,*
this);
535 count_bracket_crossing(corpus.
a_no_check(c),parsed,vs);
542 cout <<
"cross bracketing " << cb.
mean()*100 <<
" (" << failed <<
543 " failed " << (float)(100.0*fully_contained)/corpus.
length() <<
544 "% fully consistent from " << corpus.
length()
545 <<
" sentences)" << endl;
555 if (ref.length() != test.length())
557 EST_error(
"bracket_crossing: sentences of different lengths");
560 for (i=0; i < ref.length(); i++)
561 for (j=i+1; j <= ref.length(); j++)
562 if (test.
valid(i,j) == 1)
564 if (ref.
valid(i,j) == 0)
void set(const EST_String &name, int ival)
int pos(void)
Postion, 0 1 or 2, where 0 is empty, 1 is incomplete 2 is complete.
double prob(void)
Edge probability.
int d1()
(Non)terminal of daughter 1
int d2()
(Non)terminal of daughter 2
void extract_parse(EST_Relation *syn, EST_Relation *word, int force=0)
Extract parse tree and add it to syn linking leafs to word.
void setup_wfst(EST_Relation *s, const EST_String &name="name")
void parse()
Parses the loaded WFST with the loaded grammar.
LISP find_parse()
Return the parse in full LISP form.
void set_grammar_rules(LISP r)
Initialize from LISP rules set.
void test_crossbrackets()
EST_String nonterminal(int p) const
Convert nonterminal index to string form.
int num_nonterminals() const
Number of nonterminals.
double prob_B(int p, int q, int r) const
The rule probability of given binary rule.
EST_String terminal(int m) const
Convert terminal index to string form.
void set_rules(LISP rules)
Set (or reset) rules from external source after construction.
double prob_U(int p, int m) const
The rule probability of given unary rule.
double mean(void) const
mean of currently cummulated values
INLINE int length() const
number of items in vector.
INLINE const T & a_no_check(int n) const
read-only const access operator: without bounds checking
const EST_String & string(void) const
int valid(int i, int k) const
If a bracketing from i to k is valid in string.