PocketSphinx 5prealpha
fsg_search.c
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 *
19 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
20 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
23 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 * ====================================================================
32 *
33 */
34
35/*
36 * fsg_search.c -- Search structures for FSM decoding.
37 *
38 * **********************************************
39 * CMU ARPA Speech Project
40 *
41 * Copyright (c) 2004 Carnegie Mellon University.
42 * ALL RIGHTS RESERVED.
43 * **********************************************
44 *
45 * HISTORY
46 *
47 * 18-Feb-2004 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
48 * Started.
49 */
50
51/* System headers. */
52#include <stdio.h>
53#include <string.h>
54#include <assert.h>
55
56/* SphinxBase headers. */
57#include <sphinxbase/err.h>
58#include <sphinxbase/ckd_alloc.h>
59#include <sphinxbase/strfuncs.h>
60#include <sphinxbase/cmd_ln.h>
61
62/* Local headers. */
64#include "ps_lattice_internal.h"
65#include "fsg_search_internal.h"
66#include "fsg_history.h"
67#include "fsg_lextree.h"
68
69/* Turn this on for detailed debugging dump */
70#define __FSG_DBG__ 0
71#define __FSG_DBG_CHAN__ 0
72
73static ps_seg_t *fsg_search_seg_iter(ps_search_t *search);
74static ps_lattice_t *fsg_search_lattice(ps_search_t *search);
75static int fsg_search_prob(ps_search_t *search);
76
77static ps_searchfuncs_t fsg_funcs = {
78 /* start: */ fsg_search_start,
79 /* step: */ fsg_search_step,
80 /* finish: */ fsg_search_finish,
81 /* reinit: */ fsg_search_reinit,
82 /* free: */ fsg_search_free,
83 /* lattice: */ fsg_search_lattice,
84 /* hyp: */ fsg_search_hyp,
85 /* prob: */ fsg_search_prob,
86 /* seg_iter: */ fsg_search_seg_iter,
87};
88
89static int
90fsg_search_add_silences(fsg_search_t *fsgs, fsg_model_t *fsg)
91{
92 dict_t *dict;
93 int32 wid;
94 int n_sil;
95
96 dict = ps_search_dict(fsgs);
97 /*
98 * NOTE: Unlike N-Gram search, we do not use explicit start and
99 * end symbols. This is because the start and end nodes are
100 * defined in the grammar. We do add silence/filler self-loops to
101 * all states in order to allow for silence between words and at
102 * the beginning and end of utterances.
103 *
104 * This has some implications for word graph generation, namely,
105 * that there can (and usually will) be multiple start and end
106 * states in the word graph. We therefore do add explicit start
107 * and end nodes to the graph.
108 */
109 /* Add silence self-loops to all states. */
110 fsg_model_add_silence(fsg, "<sil>", -1,
111 cmd_ln_float32_r(ps_search_config(fsgs), "-silprob"));
112 n_sil = 0;
113 /* Add self-loops for all other fillers. */
114 for (wid = dict_filler_start(dict); wid < dict_filler_end(dict); ++wid) {
115 char const *word = dict_wordstr(dict, wid);
116 if (wid == dict_startwid(dict) || wid == dict_finishwid(dict))
117 continue;
118 fsg_model_add_silence(fsg, word, -1,
119 cmd_ln_float32_r(ps_search_config(fsgs), "-fillprob"));
120 ++n_sil;
121 }
122
123 return n_sil;
124}
125
126/* Scans the dictionary and check if all words are present. */
127static int
128fsg_search_check_dict(fsg_search_t *fsgs, fsg_model_t *fsg)
129{
130 dict_t *dict;
131 int i;
132
133 dict = ps_search_dict(fsgs);
134 for (i = 0; i < fsg_model_n_word(fsg); ++i) {
135 char const *word;
136 int32 wid;
137
138 word = fsg_model_word_str(fsg, i);
139 wid = dict_wordid(dict, word);
140 if (wid == BAD_S3WID) {
141 E_ERROR("The word '%s' is missing in the dictionary\n", word);
142 return FALSE;
143 }
144 }
145
146 return TRUE;
147}
148
149static int
150fsg_search_add_altpron(fsg_search_t *fsgs, fsg_model_t *fsg)
151{
152 dict_t *dict;
153 int n_alt, n_word;
154 int i;
155
156 dict = ps_search_dict(fsgs);
157 /* Scan FSG's vocabulary for words that have alternate pronunciations. */
158 n_alt = 0;
159 n_word = fsg_model_n_word(fsg);
160 for (i = 0; i < n_word; ++i) {
161 char const *word;
162 int32 wid;
163
164 word = fsg_model_word_str(fsg, i);
165 wid = dict_wordid(dict, word);
166 if (wid != BAD_S3WID) {
167 while ((wid = dict_nextalt(dict, wid)) != BAD_S3WID) {
168 n_alt += fsg_model_add_alt(fsg, word, dict_wordstr(dict, wid));
169 }
170 }
171 }
172
173 E_INFO("Added %d alternate word transitions\n", n_alt);
174 return n_alt;
175}
176
178fsg_search_init(const char *name,
179 fsg_model_t *fsg,
180 cmd_ln_t *config,
181 acmod_t *acmod,
182 dict_t *dict,
183 dict2pid_t *d2p)
184{
185 fsg_search_t *fsgs = ckd_calloc(1, sizeof(*fsgs));
186 ps_search_init(ps_search_base(fsgs), &fsg_funcs, PS_SEARCH_TYPE_FSG, name, config, acmod, dict, d2p);
187
188 fsgs->fsg = fsg_model_retain(fsg);
189 /* Initialize HMM context. */
190 fsgs->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
191 acmod->tmat->tp, NULL, acmod->mdef->sseq);
192 if (fsgs->hmmctx == NULL) {
193 ps_search_free(ps_search_base(fsgs));
194 return NULL;
195 }
196
197 /* Intialize the search history object */
198 fsgs->history = fsg_history_init(NULL, dict);
199 fsgs->frame = -1;
200
201 /* Get search pruning parameters */
202 fsgs->beam_factor = 1.0f;
203 fsgs->beam = fsgs->beam_orig
204 = (int32) logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-beam"))
205 >> SENSCR_SHIFT;
206 fsgs->pbeam = fsgs->pbeam_orig
207 = (int32) logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pbeam"))
208 >> SENSCR_SHIFT;
209 fsgs->wbeam = fsgs->wbeam_orig
210 = (int32) logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-wbeam"))
211 >> SENSCR_SHIFT;
212
213 /* LM related weights/penalties */
214 fsgs->lw = cmd_ln_float32_r(config, "-lw");
215 fsgs->pip = (int32) (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pip"))
216 * fsgs->lw)
217 >> SENSCR_SHIFT;
218 fsgs->wip = (int32) (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-wip"))
219 * fsgs->lw)
220 >> SENSCR_SHIFT;
221
222 /* Acoustic score scale for posterior probabilities. */
223 fsgs->ascale = 1.0 / cmd_ln_float32_r(config, "-ascale");
224
225 E_INFO("FSG(beam: %d, pbeam: %d, wbeam: %d; wip: %d, pip: %d)\n",
226 fsgs->beam_orig, fsgs->pbeam_orig, fsgs->wbeam_orig,
227 fsgs->wip, fsgs->pip);
228
229 if (!fsg_search_check_dict(fsgs, fsg)) {
230 fsg_search_free(ps_search_base(fsgs));
231 return NULL;
232 }
233
234 if (cmd_ln_boolean_r(config, "-fsgusefiller") &&
235 !fsg_model_has_sil(fsg))
236 fsg_search_add_silences(fsgs, fsg);
237
238 if (cmd_ln_boolean_r(config, "-fsgusealtpron") &&
239 !fsg_model_has_alt(fsg))
240 fsg_search_add_altpron(fsgs, fsg);
241
242 if (fsg_search_reinit(ps_search_base(fsgs),
243 ps_search_dict(fsgs),
244 ps_search_dict2pid(fsgs)) < 0)
245 {
246 ps_search_free(ps_search_base(fsgs));
247 return NULL;
248
249 }
250 ptmr_init(&fsgs->perf);
251
252 return ps_search_base(fsgs);
253}
254
255void
256fsg_search_free(ps_search_t *search)
257{
258 fsg_search_t *fsgs = (fsg_search_t *)search;
259
260 double n_speech = (double)fsgs->n_tot_frame
261 / cmd_ln_int32_r(ps_search_config(fsgs), "-frate");
262
263 E_INFO("TOTAL fsg %.2f CPU %.3f xRT\n",
264 fsgs->perf.t_tot_cpu,
265 fsgs->perf.t_tot_cpu / n_speech);
266 E_INFO("TOTAL fsg %.2f wall %.3f xRT\n",
267 fsgs->perf.t_tot_elapsed,
268 fsgs->perf.t_tot_elapsed / n_speech);
269
270 ps_search_base_free(search);
272 if (fsgs->history) {
273 fsg_history_reset(fsgs->history);
274 fsg_history_set_fsg(fsgs->history, NULL, NULL);
275 fsg_history_free(fsgs->history);
276 }
278 fsg_model_free(fsgs->fsg);
279 ckd_free(fsgs);
280}
281
282int
283fsg_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
284{
285 fsg_search_t *fsgs = (fsg_search_t *)search;
286
287 /* Free the old lextree */
288 if (fsgs->lextree)
290
291 /* Free old dict2pid, dict */
292 ps_search_base_reinit(search, dict, d2p);
293
294 /* Update the number of words (not used by this module though). */
295 search->n_words = dict_size(dict);
296
297 /* Allocate new lextree for the given FSG */
298 fsgs->lextree = fsg_lextree_init(fsgs->fsg, dict, d2p,
299 ps_search_acmod(fsgs)->mdef,
300 fsgs->hmmctx, fsgs->wip, fsgs->pip);
301
302 /* Inform the history module of the new fsg */
303 fsg_history_set_fsg(fsgs->history, fsgs->fsg, dict);
304
305 return 0;
306}
307
308
309static void
310fsg_search_sen_active(fsg_search_t *fsgs)
311{
312 gnode_t *gn;
313 fsg_pnode_t *pnode;
314 hmm_t *hmm;
315
316 acmod_clear_active(ps_search_acmod(fsgs));
317
318 for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) {
319 pnode = (fsg_pnode_t *) gnode_ptr(gn);
320 hmm = fsg_pnode_hmmptr(pnode);
321 assert(hmm_frame(hmm) == fsgs->frame);
322 acmod_activate_hmm(ps_search_acmod(fsgs), hmm);
323 }
324}
325
326
327/*
328 * Evaluate all the active HMMs.
329 * (Executed once per frame.)
330 */
331static void
332fsg_search_hmm_eval(fsg_search_t *fsgs)
333{
334 gnode_t *gn;
335 fsg_pnode_t *pnode;
336 hmm_t *hmm;
337 int32 bestscore;
338 int32 n, maxhmmpf;
339
340 bestscore = WORST_SCORE;
341
342 if (!fsgs->pnode_active) {
343 E_ERROR("Frame %d: No active HMM!!\n", fsgs->frame);
344 return;
345 }
346
347 for (n = 0, gn = fsgs->pnode_active; gn; gn = gnode_next(gn), n++) {
348 int32 score;
349
350 pnode = (fsg_pnode_t *) gnode_ptr(gn);
351 hmm = fsg_pnode_hmmptr(pnode);
352 assert(hmm_frame(hmm) == fsgs->frame);
353
354#if __FSG_DBG__
355 E_INFO("pnode(%08x) active @frm %5d\n", (int32) pnode,
356 fsgs->frame);
357 hmm_dump(hmm, stdout);
358#endif
359 score = hmm_vit_eval(hmm);
360#if __FSG_DBG_CHAN__
361 E_INFO("pnode(%08x) after eval @frm %5d\n",
362 (int32) pnode, fsgs->frame);
363 hmm_dump(hmm, stdout);
364#endif
365
366 if (score BETTER_THAN bestscore)
367 bestscore = score;
368 }
369
370#if __FSG_DBG__
371 E_INFO("[%5d] %6d HMM; bestscr: %11d\n", fsgs->frame, n, bestscore);
372#endif
373 fsgs->n_hmm_eval += n;
374
375 /* Adjust beams if #active HMMs larger than absolute threshold */
376 maxhmmpf = cmd_ln_int32_r(ps_search_config(fsgs), "-maxhmmpf");
377 if (maxhmmpf != -1 && n > maxhmmpf) {
378 /*
379 * Too many HMMs active; reduce the beam factor applied to the default
380 * beams, but not if the factor is already at a floor (0.1).
381 */
382 if (fsgs->beam_factor > 0.1) { /* Hack!! Hardwired constant 0.1 */
383 fsgs->beam_factor *= 0.9f; /* Hack!! Hardwired constant 0.9 */
384 fsgs->beam =
385 (int32) (fsgs->beam_orig * fsgs->beam_factor);
386 fsgs->pbeam =
387 (int32) (fsgs->pbeam_orig * fsgs->beam_factor);
388 fsgs->wbeam =
389 (int32) (fsgs->wbeam_orig * fsgs->beam_factor);
390 }
391 }
392 else {
393 fsgs->beam_factor = 1.0f;
394 fsgs->beam = fsgs->beam_orig;
395 fsgs->pbeam = fsgs->pbeam_orig;
396 fsgs->wbeam = fsgs->wbeam_orig;
397 }
398
399 if (n > fsg_lextree_n_pnode(fsgs->lextree))
400 E_FATAL("PANIC! Frame %d: #HMM evaluated(%d) > #PNodes(%d)\n",
401 fsgs->frame, n, fsg_lextree_n_pnode(fsgs->lextree));
402
403 fsgs->bestscore = bestscore;
404}
405
406
407static void
408fsg_search_pnode_trans(fsg_search_t *fsgs, fsg_pnode_t * pnode)
409{
410 fsg_pnode_t *child;
411 hmm_t *hmm;
412 int32 newscore, thresh, nf;
413
414 assert(pnode);
415 assert(!fsg_pnode_leaf(pnode));
416
417 nf = fsgs->frame + 1;
418 thresh = fsgs->bestscore + fsgs->beam;
419
420 hmm = fsg_pnode_hmmptr(pnode);
421
422 for (child = fsg_pnode_succ(pnode);
423 child; child = fsg_pnode_sibling(child)) {
424 newscore = hmm_out_score(hmm) + child->logs2prob;
425
426 if ((newscore BETTER_THAN thresh)
427 && (newscore BETTER_THAN hmm_in_score(&child->hmm))) {
428 /* Incoming score > pruning threshold and > target's existing score */
429 if (hmm_frame(&child->hmm) < nf) {
430 /* Child node not yet activated; do so */
431 fsgs->pnode_active_next =
432 glist_add_ptr(fsgs->pnode_active_next,
433 (void *) child);
434 }
435
436 hmm_enter(&child->hmm, newscore, hmm_out_history(hmm), nf);
437 }
438 }
439}
440
441
442static void
443fsg_search_pnode_exit(fsg_search_t *fsgs, fsg_pnode_t * pnode)
444{
445 hmm_t *hmm;
446 fsg_link_t *fl;
447 int32 wid;
448 fsg_pnode_ctxt_t ctxt;
449
450 assert(pnode);
451 assert(fsg_pnode_leaf(pnode));
452
453 hmm = fsg_pnode_hmmptr(pnode);
454 fl = fsg_pnode_fsglink(pnode);
455 assert(fl);
456
457 wid = fsg_link_wid(fl);
458 assert(wid >= 0);
459
460#if __FSG_DBG__
461 E_INFO("[%5d] Exit(%08x) %10d(score) %5d(pred)\n",
462 fsgs->frame, (int32) pnode,
463 hmm_out_score(hmm), hmm_out_history(hmm));
464#endif
465
466 /*
467 * Check if this is filler or single phone word; these do not model right
468 * context (i.e., the exit score applies to all right contexts).
469 */
470 if (fsg_model_is_filler(fsgs->fsg, wid)
471 /* FIXME: This might be slow due to repeated calls to dict_to_id(). */
472 || (dict_is_single_phone(ps_search_dict(fsgs),
473 dict_wordid(ps_search_dict(fsgs),
474 fsg_model_word_str(fsgs->fsg, wid))))) {
475 /* Create a dummy context structure that applies to all right contexts */
477
478 /* Create history table entry for this word exit */
479 fsg_history_entry_add(fsgs->history,
480 fl,
481 fsgs->frame,
482 hmm_out_score(hmm),
483 hmm_out_history(hmm),
484 pnode->ci_ext, ctxt);
485
486 }
487 else {
488 /* Create history table entry for this word exit */
489 fsg_history_entry_add(fsgs->history,
490 fl,
491 fsgs->frame,
492 hmm_out_score(hmm),
493 hmm_out_history(hmm),
494 pnode->ci_ext, pnode->ctxt);
495 }
496}
497
498
499/*
500 * (Beam) prune the just evaluated HMMs, determine which ones remain
501 * active, which ones transition to successors, which ones exit and
502 * terminate in their respective destination FSM states.
503 * (Executed once per frame.)
504 */
505static void
506fsg_search_hmm_prune_prop(fsg_search_t *fsgs)
507{
508 gnode_t *gn;
509 fsg_pnode_t *pnode;
510 hmm_t *hmm;
511 int32 thresh, word_thresh, phone_thresh;
512
513 assert(fsgs->pnode_active_next == NULL);
514
515 thresh = fsgs->bestscore + fsgs->beam;
516 phone_thresh = fsgs->bestscore + fsgs->pbeam;
517 word_thresh = fsgs->bestscore + fsgs->wbeam;
518
519 for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) {
520 pnode = (fsg_pnode_t *) gnode_ptr(gn);
521 hmm = fsg_pnode_hmmptr(pnode);
522
523 if (hmm_bestscore(hmm) >= thresh) {
524 /* Keep this HMM active in the next frame */
525 if (hmm_frame(hmm) == fsgs->frame) {
526 hmm_frame(hmm) = fsgs->frame + 1;
527 fsgs->pnode_active_next =
528 glist_add_ptr(fsgs->pnode_active_next,
529 (void *) pnode);
530 }
531 else {
532 assert(hmm_frame(hmm) == fsgs->frame + 1);
533 }
534
535 if (!fsg_pnode_leaf(pnode)) {
536 if (hmm_out_score(hmm) >= phone_thresh) {
537 /* Transition out of this phone into its children */
538 fsg_search_pnode_trans(fsgs, pnode);
539 }
540 }
541 else {
542 if (hmm_out_score(hmm) >= word_thresh) {
543 /* Transition out of leaf node into destination FSG state */
544 fsg_search_pnode_exit(fsgs, pnode);
545 }
546 }
547 }
548 }
549}
550
551
552/*
553 * Propagate newly created history entries through null transitions.
554 */
555static void
556fsg_search_null_prop(fsg_search_t *fsgs)
557{
558 int32 bpidx, n_entries, thresh, newscore;
559 fsg_hist_entry_t *hist_entry;
560 fsg_link_t *l;
561 int32 s;
562 fsg_model_t *fsg;
563
564 fsg = fsgs->fsg;
565 thresh = fsgs->bestscore + fsgs->wbeam; /* Which beam really?? */
566
567 n_entries = fsg_history_n_entries(fsgs->history);
568
569 for (bpidx = fsgs->bpidx_start; bpidx < n_entries; bpidx++) {
570 fsg_arciter_t *itor;
571 hist_entry = fsg_history_entry_get(fsgs->history, bpidx);
572
573 l = fsg_hist_entry_fsglink(hist_entry);
574
575 /* Destination FSG state for history entry */
576 s = l ? fsg_link_to_state(l) : fsg_model_start_state(fsg);
577
578 /*
579 * Check null transitions from d to all other states. (Only need to
580 * propagate one step, since FSG contains transitive closure of null
581 * transitions.)
582 */
583 /* Add all links from from_state to dst */
584 for (itor = fsg_model_arcs(fsg, s); itor;
585 itor = fsg_arciter_next(itor)) {
586 fsg_link_t *l = fsg_arciter_get(itor);
587
588 /* FIXME: Need to deal with tag transitions somehow. */
589 if (fsg_link_wid(l) != -1)
590 continue;
591 newscore =
592 fsg_hist_entry_score(hist_entry) +
593 (fsg_link_logs2prob(l) >> SENSCR_SHIFT);
594
595 if (newscore >= thresh) {
596 fsg_history_entry_add(fsgs->history, l,
597 fsg_hist_entry_frame(hist_entry),
598 newscore,
599 bpidx,
600 fsg_hist_entry_lc(hist_entry),
601 fsg_hist_entry_rc(hist_entry));
602 }
603 }
604 }
605}
606
607
608/*
609 * Perform cross-word transitions; propagate each history entry created in this
610 * frame to lextree roots attached to the target FSG state for that entry.
611 */
612static void
613fsg_search_word_trans(fsg_search_t *fsgs)
614{
615 int32 bpidx, n_entries;
616 fsg_hist_entry_t *hist_entry;
617 fsg_link_t *l;
618 int32 score, newscore, thresh, nf, d;
619 fsg_pnode_t *root;
620 int32 lc, rc;
621
622 n_entries = fsg_history_n_entries(fsgs->history);
623
624 thresh = fsgs->bestscore + fsgs->beam;
625 nf = fsgs->frame + 1;
626
627 for (bpidx = fsgs->bpidx_start; bpidx < n_entries; bpidx++) {
628 hist_entry = fsg_history_entry_get(fsgs->history, bpidx);
629 assert(hist_entry);
630 score = fsg_hist_entry_score(hist_entry);
631 assert(fsgs->frame == fsg_hist_entry_frame(hist_entry));
632
633 l = fsg_hist_entry_fsglink(hist_entry);
634
635 /* Destination state for hist_entry */
636 d = l ? fsg_link_to_state(l) : fsg_model_start_state(fsgs->
637 fsg);
638
639 lc = fsg_hist_entry_lc(hist_entry);
640
641 /* Transition to all root nodes attached to state d */
642 for (root = fsg_lextree_root(fsgs->lextree, d);
643 root; root = root->sibling) {
644 rc = root->ci_ext;
645
646 if ((root->ctxt.bv[lc >> 5] & (1 << (lc & 0x001f))) &&
647 (hist_entry->rc.bv[rc >> 5] & (1 << (rc & 0x001f)))) {
648 /*
649 * Last CIphone of history entry is in left-context list supported by
650 * target root node, and
651 * first CIphone of target root node is in right context list supported
652 * by history entry;
653 * So the transition can go ahead (if new score is good enough).
654 */
655 newscore = score + root->logs2prob;
656
657 if ((newscore BETTER_THAN thresh)
658 && (newscore BETTER_THAN hmm_in_score(&root->hmm))) {
659 if (hmm_frame(&root->hmm) < nf) {
660 /* Newly activated node; add to active list */
661 fsgs->pnode_active_next =
662 glist_add_ptr(fsgs->pnode_active_next,
663 (void *) root);
664#if __FSG_DBG__
665 E_INFO
666 ("[%5d] WordTrans bpidx[%d] -> pnode[%08x] (activated)\n",
667 fsgs->frame, bpidx, (int32) root);
668#endif
669 }
670 else {
671#if __FSG_DBG__
672 E_INFO
673 ("[%5d] WordTrans bpidx[%d] -> pnode[%08x]\n",
674 fsgs->frame, bpidx, (int32) root);
675#endif
676 }
677
678 hmm_enter(&root->hmm, newscore, bpidx, nf);
679 }
680 }
681 }
682 }
683}
684
685
686int
687fsg_search_step(ps_search_t *search, int frame_idx)
688{
689 fsg_search_t *fsgs = (fsg_search_t *)search;
690 int16 const *senscr;
691 acmod_t *acmod = search->acmod;
692 gnode_t *gn;
693 fsg_pnode_t *pnode;
694 hmm_t *hmm;
695
696 /* Activate our HMMs for the current frame if need be. */
697 if (!acmod->compallsen)
698 fsg_search_sen_active(fsgs);
699 /* Compute GMM scores for the current frame. */
700 senscr = acmod_score(acmod, &frame_idx);
701 fsgs->n_sen_eval += acmod->n_senone_active;
702 hmm_context_set_senscore(fsgs->hmmctx, senscr);
703
704 /* Mark backpointer table for current frame. */
705 fsgs->bpidx_start = fsg_history_n_entries(fsgs->history);
706
707 /* Evaluate all active pnodes (HMMs) */
708 fsg_search_hmm_eval(fsgs);
709
710 /*
711 * Prune and propagate the HMMs evaluated; create history entries for
712 * word exits. The words exits are tentative, and may be pruned; make
713 * the survivors permanent via fsg_history_end_frame().
714 */
715 fsg_search_hmm_prune_prop(fsgs);
716 fsg_history_end_frame(fsgs->history);
717
718 /*
719 * Propagate new history entries through any null transitions, creating
720 * new history entries, and then make the survivors permanent.
721 */
722 fsg_search_null_prop(fsgs);
723 fsg_history_end_frame(fsgs->history);
724
725 /*
726 * Perform cross-word transitions; propagate each history entry across its
727 * terminating state to the root nodes of the lextree attached to the state.
728 */
729 fsg_search_word_trans(fsgs);
730
731 /*
732 * We've now come full circle, HMM and FSG states have been updated for
733 * the next frame.
734 * Update the active lists, deactivate any currently active HMMs that
735 * did not survive into the next frame
736 */
737 for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) {
738 pnode = (fsg_pnode_t *) gnode_ptr(gn);
739 hmm = fsg_pnode_hmmptr(pnode);
740
741 if (hmm_frame(hmm) == fsgs->frame) {
742 /* This HMM NOT activated for the next frame; reset it */
744 }
745 else {
746 assert(hmm_frame(hmm) == (fsgs->frame + 1));
747 }
748 }
749
750 /* Free the currently active list */
751 glist_free(fsgs->pnode_active);
752
753 /* Make the next-frame active list the current one */
754 fsgs->pnode_active = fsgs->pnode_active_next;
755 fsgs->pnode_active_next = NULL;
756
757 /* End of this frame; ready for the next */
758 ++fsgs->frame;
759
760 return 1;
761}
762
763
764/*
765 * Set all HMMs to inactive, clear active lists, initialize FSM start
766 * state to be the only active node.
767 * (Executed at the start of each utterance.)
768 */
769int
770fsg_search_start(ps_search_t *search)
771{
772 fsg_search_t *fsgs = (fsg_search_t *)search;
773 int32 silcipid;
774 fsg_pnode_ctxt_t ctxt;
775
776 /* Reset dynamic adjustment factor for beams */
777 fsgs->beam_factor = 1.0f;
778 fsgs->beam = fsgs->beam_orig;
779 fsgs->pbeam = fsgs->pbeam_orig;
780 fsgs->wbeam = fsgs->wbeam_orig;
781
782 silcipid = bin_mdef_ciphone_id(ps_search_acmod(fsgs)->mdef, "SIL");
783
784 /* Initialize EVERYTHING to be inactive */
785 assert(fsgs->pnode_active == NULL);
786 assert(fsgs->pnode_active_next == NULL);
787
788 fsg_history_reset(fsgs->history);
789 fsg_history_utt_start(fsgs->history);
790 fsgs->final = FALSE;
791
792 /* Dummy context structure that allows all right contexts to use this entry */
794
795 /* Create dummy history entry leading to start state */
796 fsgs->frame = -1;
797 fsgs->bestscore = 0;
798 fsg_history_entry_add(fsgs->history,
799 NULL, -1, 0, -1, silcipid, ctxt);
800 fsgs->bpidx_start = 0;
801
802 /* Propagate dummy history entry through NULL transitions from start state */
803 fsg_search_null_prop(fsgs);
804
805 /* Perform word transitions from this dummy history entry */
806 fsg_search_word_trans(fsgs);
807
808 /* Make the next-frame active list the current one */
809 fsgs->pnode_active = fsgs->pnode_active_next;
810 fsgs->pnode_active_next = NULL;
811
812 ++fsgs->frame;
813
814 fsgs->n_hmm_eval = 0;
815 fsgs->n_sen_eval = 0;
816
817 ptmr_reset(&fsgs->perf);
818 ptmr_start(&fsgs->perf);
819
820 return 0;
821}
822
823/*
824 * Cleanup at the end of each utterance.
825 */
826int
827fsg_search_finish(ps_search_t *search)
828{
829 fsg_search_t *fsgs = (fsg_search_t *)search;
830 gnode_t *gn;
831 fsg_pnode_t *pnode;
832 int32 n_hist, cf;
833
834 /* Deactivate all nodes in the current and next-frame active lists */
835 for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) {
836 pnode = (fsg_pnode_t *) gnode_ptr(gn);
838 }
839 for (gn = fsgs->pnode_active_next; gn; gn = gnode_next(gn)) {
840 pnode = (fsg_pnode_t *) gnode_ptr(gn);
842 }
843
844 glist_free(fsgs->pnode_active);
845 fsgs->pnode_active = NULL;
846 glist_free(fsgs->pnode_active_next);
847 fsgs->pnode_active_next = NULL;
848
849 fsgs->final = TRUE;
850
851 n_hist = fsg_history_n_entries(fsgs->history);
852 fsgs->n_tot_frame += fsgs->frame;
853 E_INFO
854 ("%d frames, %d HMMs (%d/fr), %d senones (%d/fr), %d history entries (%d/fr)\n\n",
855 fsgs->frame, fsgs->n_hmm_eval,
856 (fsgs->frame > 0) ? fsgs->n_hmm_eval / fsgs->frame : 0,
857 fsgs->n_sen_eval,
858 (fsgs->frame > 0) ? fsgs->n_sen_eval / fsgs->frame : 0,
859 n_hist, (fsgs->frame > 0) ? n_hist / fsgs->frame : 0);
860
861 /* Print out some statistics. */
862 ptmr_stop(&fsgs->perf);
863 /* This is the number of frames processed. */
864 cf = ps_search_acmod(fsgs)->output_frame;
865 if (cf > 0) {
866 double n_speech = (double) (cf + 1)
867 / cmd_ln_int32_r(ps_search_config(fsgs), "-frate");
868 E_INFO("fsg %.2f CPU %.3f xRT\n",
869 fsgs->perf.t_cpu, fsgs->perf.t_cpu / n_speech);
870 E_INFO("fsg %.2f wall %.3f xRT\n",
871 fsgs->perf.t_elapsed, fsgs->perf.t_elapsed / n_speech);
872 }
873
874
875 return 0;
876}
877
878static int
879fsg_search_find_exit(fsg_search_t *fsgs, int frame_idx, int final, int32 *out_score)
880{
881 fsg_hist_entry_t *hist_entry = NULL;
882 fsg_model_t *fsg;
883 int bpidx, frm, last_frm, besthist;
884 int32 bestscore;
885
886 if (frame_idx == -1)
887 frame_idx = fsgs->frame - 1;
888 last_frm = frm = frame_idx;
889
890 /* Scan backwards to find a word exit in frame_idx. */
891 bpidx = fsg_history_n_entries(fsgs->history) - 1;
892 while (bpidx > 0) {
893 hist_entry = fsg_history_entry_get(fsgs->history, bpidx);
894 if (fsg_hist_entry_frame(hist_entry) <= frame_idx) {
895 frm = last_frm = fsg_hist_entry_frame(hist_entry);
896 break;
897 }
898 bpidx--;
899 }
900
901 /* No hypothesis (yet). */
902 if (bpidx <= 0)
903 return bpidx;
904
905 /* Now find best word exit in this frame. */
906 bestscore = INT_MIN;
907 besthist = -1;
908 fsg = fsgs->fsg;
909 while (frm == last_frm) {
910 fsg_link_t *fl;
911 int32 score;
912
913 fl = fsg_hist_entry_fsglink(hist_entry);
914 score = fsg_hist_entry_score(hist_entry);
915
916 if (fl == NULL)
917 break;
918
919 /* Prefer final hypothesis */
920 if (score == bestscore && fsg_link_to_state(fl) == fsg_model_final_state(fsg)) {
921 besthist = bpidx;
922 } else if (score BETTER_THAN bestscore) {
923 /* Only enforce the final state constraint if this is a final hypothesis. */
924 if ((!final)
925 || fsg_link_to_state(fl) == fsg_model_final_state(fsg)) {
926 bestscore = score;
927 besthist = bpidx;
928 }
929 }
930
931 --bpidx;
932 if (bpidx < 0)
933 break;
934 hist_entry = fsg_history_entry_get(fsgs->history, bpidx);
935 frm = fsg_hist_entry_frame(hist_entry);
936 }
937
938 /* Final state not reached. */
939 if (besthist == -1) {
940 E_ERROR("Final result does not match the grammar in frame %d\n", frame_idx);
941 return -1;
942 }
943
944 /* This here's the one we want. */
945 if (out_score)
946 *out_score = bestscore;
947
948 return besthist;
949}
950
951/* FIXME: Mostly duplicated with ngram_search_bestpath(). */
952static ps_latlink_t *
953fsg_search_bestpath(ps_search_t *search, int32 *out_score, int backward)
954{
955 fsg_search_t *fsgs = (fsg_search_t *)search;
956
957 if (search->last_link == NULL) {
958 search->last_link = ps_lattice_bestpath(search->dag, NULL,
959 1.0, fsgs->ascale);
960 if (search->last_link == NULL)
961 return NULL;
962 /* Also calculate betas so we can fill in the posterior
963 * probability field in the segmentation. */
964 if (search->post == 0)
965 search->post = ps_lattice_posterior(search->dag, NULL, fsgs->ascale);
966 }
967 if (out_score)
968 *out_score = search->last_link->path_scr + search->dag->final_node_ascr;
969 return search->last_link;
970}
971
972char const *
973fsg_search_hyp(ps_search_t *search, int32 *out_score)
974{
975 fsg_search_t *fsgs = (fsg_search_t *)search;
976 dict_t *dict = ps_search_dict(search);
977 char *c;
978 size_t len;
979 int bp, bpidx;
980
981 /* Get last backpointer table index. */
982 bpidx = fsg_search_find_exit(fsgs, fsgs->frame, fsgs->final, out_score);
983 /* No hypothesis (yet). */
984 if (bpidx <= 0) {
985 return NULL;
986 }
987
988 /* If bestpath is enabled and the utterance is complete, then run it. */
989 if (fsgs->bestpath && fsgs->final) {
990 ps_lattice_t *dag;
991 ps_latlink_t *link;
992
993 if ((dag = fsg_search_lattice(search)) == NULL) {
994 E_WARN("Failed to obtain the lattice while bestpath enabled\n");
995 return NULL;
996 }
997 if ((link = fsg_search_bestpath(search, out_score, FALSE)) == NULL) {
998 E_WARN("Failed to find the bestpath in a lattice\n");
999 return NULL;
1000 }
1001 return ps_lattice_hyp(dag, link);
1002 }
1003
1004 bp = bpidx;
1005 len = 0;
1006 while (bp > 0) {
1007 fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp);
1008 fsg_link_t *fl = fsg_hist_entry_fsglink(hist_entry);
1009 char const *baseword;
1010 int32 wid;
1011
1012 bp = fsg_hist_entry_pred(hist_entry);
1013 wid = fsg_link_wid(fl);
1014 if (wid < 0 || fsg_model_is_filler(fsgs->fsg, wid))
1015 continue;
1016 baseword = dict_basestr(dict,
1017 dict_wordid(dict,
1018 fsg_model_word_str(fsgs->fsg, wid)));
1019 len += strlen(baseword) + 1;
1020 }
1021
1022 ckd_free(search->hyp_str);
1023 if (len == 0) {
1024 search->hyp_str = NULL;
1025 return search->hyp_str;
1026 }
1027 search->hyp_str = ckd_calloc(1, len);
1028
1029 bp = bpidx;
1030 c = search->hyp_str + len - 1;
1031 while (bp > 0) {
1032 fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp);
1033 fsg_link_t *fl = fsg_hist_entry_fsglink(hist_entry);
1034 char const *baseword;
1035 int32 wid;
1036
1037 bp = fsg_hist_entry_pred(hist_entry);
1038 wid = fsg_link_wid(fl);
1039 if (wid < 0 || fsg_model_is_filler(fsgs->fsg, wid))
1040 continue;
1041 baseword = dict_basestr(dict,
1042 dict_wordid(dict,
1043 fsg_model_word_str(fsgs->fsg, wid)));
1044 len = strlen(baseword);
1045 c -= len;
1046 memcpy(c, baseword, len);
1047 if (c > search->hyp_str) {
1048 --c;
1049 *c = ' ';
1050 }
1051 }
1052
1053 return search->hyp_str;
1054}
1055
1056static void
1057fsg_seg_bp2itor(ps_seg_t *seg, fsg_hist_entry_t *hist_entry)
1058{
1059 fsg_search_t *fsgs = (fsg_search_t *)seg->search;
1060 fsg_hist_entry_t *ph = NULL;
1061 int32 bp;
1062
1063 if ((bp = fsg_hist_entry_pred(hist_entry)) >= 0)
1064 ph = fsg_history_entry_get(fsgs->history, bp);
1065 seg->word = fsg_model_word_str(fsgs->fsg, hist_entry->fsglink->wid);
1066 seg->ef = fsg_hist_entry_frame(hist_entry);
1067 seg->sf = ph ? fsg_hist_entry_frame(ph) + 1 : 0;
1068 /* This is kind of silly but it happens for null transitions. */
1069 if (seg->sf > seg->ef) seg->sf = seg->ef;
1070 seg->prob = 0; /* Bogus value... */
1071 /* "Language model" score = transition probability. */
1072 seg->lback = 1;
1073 seg->lscr = fsg_link_logs2prob(hist_entry->fsglink) >> SENSCR_SHIFT;
1074 if (ph) {
1075 /* FIXME: Not sure exactly how cross-word triphones are handled. */
1076 seg->ascr = hist_entry->score - ph->score - seg->lscr;
1077 }
1078 else
1079 seg->ascr = hist_entry->score - seg->lscr;
1080}
1081
1082
1083static void
1084fsg_seg_free(ps_seg_t *seg)
1085{
1086 fsg_seg_t *itor = (fsg_seg_t *)seg;
1087 ckd_free(itor->hist);
1088 ckd_free(itor);
1089}
1090
1091static ps_seg_t *
1092fsg_seg_next(ps_seg_t *seg)
1093{
1094 fsg_seg_t *itor = (fsg_seg_t *)seg;
1095
1096 if (++itor->cur == itor->n_hist) {
1097 fsg_seg_free(seg);
1098 return NULL;
1099 }
1100
1101 fsg_seg_bp2itor(seg, itor->hist[itor->cur]);
1102 return seg;
1103}
1104
1105static ps_segfuncs_t fsg_segfuncs = {
1106 /* seg_next */ fsg_seg_next,
1107 /* seg_free */ fsg_seg_free
1108};
1109
1110static ps_seg_t *
1111fsg_search_seg_iter(ps_search_t *search)
1112{
1113 fsg_search_t *fsgs = (fsg_search_t *)search;
1114 fsg_seg_t *itor;
1115 int32 out_score;
1116 int bp, bpidx, cur;
1117
1118 bpidx = fsg_search_find_exit(fsgs, fsgs->frame, fsgs->final, &out_score);
1119 /* No hypothesis (yet). */
1120 if (bpidx <= 0)
1121 return NULL;
1122
1123 /* If bestpath is enabled and the utterance is complete, then run it. */
1124 if (fsgs->bestpath && fsgs->final) {
1125 ps_lattice_t *dag;
1126 ps_latlink_t *link;
1127
1128 if ((dag = fsg_search_lattice(search)) == NULL)
1129 return NULL;
1130 if ((link = fsg_search_bestpath(search, &out_score, TRUE)) == NULL)
1131 return NULL;
1132 return ps_lattice_seg_iter(dag, link, 1.0);
1133 }
1134
1135 /* Calling this an "iterator" is a bit of a misnomer since we have
1136 * to get the entire backtrace in order to produce it. On the
1137 * other hand, all we actually need is the bptbl IDs, and we can
1138 * allocate a fixed-size array of them. */
1139 itor = ckd_calloc(1, sizeof(*itor));
1140 itor->base.vt = &fsg_segfuncs;
1141 itor->base.search = search;
1142 itor->base.lwf = 1.0;
1143 itor->n_hist = 0;
1144 bp = bpidx;
1145 while (bp > 0) {
1146 fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp);
1147 bp = fsg_hist_entry_pred(hist_entry);
1148 ++itor->n_hist;
1149 }
1150 if (itor->n_hist == 0) {
1151 ckd_free(itor);
1152 return NULL;
1153 }
1154 itor->hist = ckd_calloc(itor->n_hist, sizeof(*itor->hist));
1155 cur = itor->n_hist - 1;
1156 bp = bpidx;
1157 while (bp > 0) {
1158 fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp);
1159 itor->hist[cur] = hist_entry;
1160 bp = fsg_hist_entry_pred(hist_entry);
1161 --cur;
1162 }
1163
1164 /* Fill in relevant fields for first element. */
1165 fsg_seg_bp2itor((ps_seg_t *)itor, itor->hist[0]);
1166
1167 return (ps_seg_t *)itor;
1168}
1169
1170static int
1171fsg_search_prob(ps_search_t *search)
1172{
1173 fsg_search_t *fsgs = (fsg_search_t *)search;
1174
1175 /* If bestpath is enabled and the utterance is complete, then run it. */
1176 if (fsgs->bestpath && fsgs->final) {
1177 ps_lattice_t *dag;
1178 ps_latlink_t *link;
1179
1180 if ((dag = fsg_search_lattice(search)) == NULL)
1181 return 0;
1182 if ((link = fsg_search_bestpath(search, NULL, TRUE)) == NULL)
1183 return 0;
1184 return search->post;
1185 }
1186 else {
1187 /* FIXME: Give some kind of good estimate here, eventually. */
1188 return 0;
1189 }
1190}
1191
1192static ps_latnode_t *
1193find_node(ps_lattice_t *dag, fsg_model_t *fsg, int sf, int32 wid, int32 node_id)
1194{
1195 ps_latnode_t *node;
1196
1197 for (node = dag->nodes; node; node = node->next)
1198 if ((node->sf == sf) && (node->wid == wid) && (node->node_id == node_id))
1199 break;
1200 return node;
1201}
1202
1203static ps_latnode_t *
1204new_node(ps_lattice_t *dag, fsg_model_t *fsg, int sf, int ef, int32 wid, int32 node_id, int32 ascr)
1205{
1206 ps_latnode_t *node;
1207
1208 node = find_node(dag, fsg, sf, wid, node_id);
1209
1210 if (node) {
1211 /* Update end frames. */
1212 if (node->lef == -1 || node->lef < ef)
1213 node->lef = ef;
1214 if (node->fef == -1 || node->fef > ef)
1215 node->fef = ef;
1216 /* Update best link score. */
1217 if (ascr BETTER_THAN node->info.best_exit)
1218 node->info.best_exit = ascr;
1219 }
1220 else {
1221 /* New node; link to head of list */
1222 node = listelem_malloc(dag->latnode_alloc);
1223 node->wid = wid;
1224 node->sf = sf;
1225 node->fef = node->lef = ef;
1226 node->reachable = FALSE;
1227 node->entries = NULL;
1228 node->exits = NULL;
1229 node->info.best_exit = ascr;
1230 node->node_id = node_id;
1231
1232 node->next = dag->nodes;
1233 dag->nodes = node;
1234 ++dag->n_nodes;
1235 }
1236
1237 return node;
1238}
1239
1240static ps_latnode_t *
1241find_start_node(fsg_search_t *fsgs, ps_lattice_t *dag)
1242{
1243 ps_latnode_t *node;
1244 glist_t start = NULL;
1245 int nstart = 0;
1246
1247 /* Look for all nodes starting in frame zero with some exits. */
1248 for (node = dag->nodes; node; node = node->next) {
1249 if (node->sf == 0 && node->exits) {
1250 E_INFO("Start node %s.%d:%d:%d\n",
1251 fsg_model_word_str(fsgs->fsg, node->wid),
1252 node->sf, node->fef, node->lef);
1253 start = glist_add_ptr(start, node);
1254 ++nstart;
1255 }
1256 }
1257
1258 /* If there was more than one start node candidate, then we need
1259 * to create an artificial start node with epsilon transitions to
1260 * all of them. */
1261 if (nstart == 1) {
1262 node = gnode_ptr(start);
1263 }
1264 else {
1265 gnode_t *st;
1266 int wid;
1267
1268 wid = fsg_model_word_add(fsgs->fsg, "<s>");
1269 if (fsgs->fsg->silwords)
1270 bitvec_set(fsgs->fsg->silwords, wid);
1271 node = new_node(dag, fsgs->fsg, 0, 0, wid, -1, 0);
1272 for (st = start; st; st = gnode_next(st))
1273 ps_lattice_link(dag, node, gnode_ptr(st), 0, 0);
1274 }
1275 glist_free(start);
1276 return node;
1277}
1278
1279static ps_latnode_t *
1280find_end_node(fsg_search_t *fsgs, ps_lattice_t *dag)
1281{
1282 ps_latnode_t *node;
1283 glist_t end = NULL;
1284 int nend = 0;
1285
1286 /* Look for all nodes ending in last frame with some entries. */
1287 for (node = dag->nodes; node; node = node->next) {
1288 if (node->lef == dag->n_frames - 1 && node->entries) {
1289 E_INFO("End node %s.%d:%d:%d (%d)\n",
1290 fsg_model_word_str(fsgs->fsg, node->wid),
1291 node->sf, node->fef, node->lef, node->info.best_exit);
1292 end = glist_add_ptr(end, node);
1293 ++nend;
1294 }
1295 }
1296
1297 if (nend == 1) {
1298 node = gnode_ptr(end);
1299 }
1300 else if (nend == 0) {
1301 ps_latnode_t *last = NULL;
1302 int ef = 0;
1303
1304 /* If there were no end node candidates, then just use the
1305 * node with the last exit frame. */
1306 for (node = dag->nodes; node; node = node->next) {
1307 if (node->lef > ef && node->entries) {
1308 last = node;
1309 ef = node->lef;
1310 }
1311 }
1312 node = last;
1313 if (node)
1314 E_INFO("End node %s.%d:%d:%d (%d)\n",
1315 fsg_model_word_str(fsgs->fsg, node->wid),
1316 node->sf, node->fef, node->lef, node->info.best_exit);
1317 }
1318 else {
1319 /* If there was more than one end node candidate, then we need
1320 * to create an artificial end node with epsilon transitions
1321 * out of all of them. */
1322 gnode_t *st;
1323 int wid;
1324 wid = fsg_model_word_add(fsgs->fsg, "</s>");
1325 if (fsgs->fsg->silwords)
1326 bitvec_set(fsgs->fsg->silwords, wid);
1327 node = new_node(dag, fsgs->fsg, fsgs->frame, fsgs->frame, wid, -1, 0);
1328 /* Use the "best" (in reality it will be the only) exit link
1329 * score from this final node as the link score. */
1330 for (st = end; st; st = gnode_next(st)) {
1331 ps_latnode_t *src = gnode_ptr(st);
1332 ps_lattice_link(dag, src, node, src->info.best_exit, fsgs->frame);
1333 }
1334 }
1335 glist_free(end);
1336 return node;
1337}
1338
1339static void
1340mark_reachable(ps_lattice_t *dag, ps_latnode_t *end)
1341{
1342 glist_t q;
1343
1344 /* It doesn't matter which order we do this in. */
1345 end->reachable = TRUE;
1346 q = glist_add_ptr(NULL, end);
1347 while (q) {
1348 ps_latnode_t *node = gnode_ptr(q);
1349 latlink_list_t *x;
1350
1351 /* Pop the front of the list. */
1352 q = gnode_free(q, NULL);
1353 /* Expand all its predecessors that haven't been seen yet. */
1354 for (x = node->entries; x; x = x->next) {
1355 ps_latnode_t *next = x->link->from;
1356 if (!next->reachable) {
1357 next->reachable = TRUE;
1358 q = glist_add_ptr(q, next);
1359 }
1360 }
1361 }
1362}
1363
1372static ps_lattice_t *
1373fsg_search_lattice(ps_search_t *search)
1374{
1375 fsg_search_t *fsgs;
1376 fsg_model_t *fsg;
1377 ps_latnode_t *node;
1378 ps_lattice_t *dag;
1379 int32 i, n;
1380
1381 fsgs = (fsg_search_t *)search;
1382
1383 /* Check to see if a lattice has previously been created over the
1384 * same number of frames, and reuse it if so. */
1385 if (search->dag && search->dag->n_frames == fsgs->frame)
1386 return search->dag;
1387
1388 /* Nope, create a new one. */
1389 ps_lattice_free(search->dag);
1390 search->dag = NULL;
1391 dag = ps_lattice_init_search(search, fsgs->frame);
1392 fsg = fsgs->fsg;
1393
1394 /*
1395 * Each history table entry represents a link in the word graph.
1396 * The set of nodes is determined by the number of unique
1397 * (word,start-frame) pairs in the history table. So we will
1398 * first find all those nodes.
1399 */
1400 n = fsg_history_n_entries(fsgs->history);
1401 for (i = 0; i < n; ++i) {
1402 fsg_hist_entry_t *fh = fsg_history_entry_get(fsgs->history, i);
1403 int32 ascr;
1404 int sf;
1405
1406 /* Skip null transitions. */
1407 if (fh->fsglink == NULL || fh->fsglink->wid == -1)
1408 continue;
1409
1410 /* Find the start node of this link. */
1411 if (fh->pred) {
1412 fsg_hist_entry_t *pfh = fsg_history_entry_get(fsgs->history, fh->pred);
1413 /* FIXME: We include the transition score in the lattice
1414 * link score. This is because of the practical
1415 * difficulty of obtaining it separately in bestpath or
1416 * forward-backward search, and because it is essentially
1417 * a unigram probability, so there is no need to treat it
1418 * separately from the acoustic score. However, it's not
1419 * clear that this will actually yield correct results.*/
1420 ascr = fh->score - pfh->score;
1421 sf = pfh->frame + 1;
1422 }
1423 else {
1424 ascr = fh->score;
1425 sf = 0;
1426 }
1427
1428 /*
1429 * Note that although scores are tied to links rather than
1430 * nodes, it's possible that there are no links out of the
1431 * destination node, and thus we need to preserve its score in
1432 * case it turns out to be utterance-final.
1433 */
1434 new_node(dag, fsg, sf, fh->frame, fh->fsglink->wid, fsg_link_to_state(fh->fsglink), ascr);
1435 }
1436
1437 /*
1438 * Now, we will create links only to nodes that actually exist.
1439 */
1440 n = fsg_history_n_entries(fsgs->history);
1441 for (i = 0; i < n; ++i) {
1442 fsg_hist_entry_t *fh = fsg_history_entry_get(fsgs->history, i);
1443 fsg_arciter_t *itor;
1444 ps_latnode_t *src, *dest;
1445 int32 ascr;
1446 int sf;
1447
1448 /* Skip null transitions. */
1449 if (fh->fsglink == NULL || fh->fsglink->wid == -1)
1450 continue;
1451
1452 /* Find the start node of this link and calculate its link score. */
1453 if (fh->pred) {
1454 fsg_hist_entry_t *pfh = fsg_history_entry_get(fsgs->history, fh->pred);
1455 sf = pfh->frame + 1;
1456 ascr = fh->score - pfh->score;
1457 }
1458 else {
1459 ascr = fh->score;
1460 sf = 0;
1461 }
1462 src = find_node(dag, fsg, sf, fh->fsglink->wid, fsg_link_to_state(fh->fsglink));
1463 sf = fh->frame + 1;
1464
1465 for (itor = fsg_model_arcs(fsg, fsg_link_to_state(fh->fsglink));
1466 itor; itor = fsg_arciter_next(itor)) {
1467 fsg_link_t *link = fsg_arciter_get(itor);
1468
1469 /* FIXME: Need to figure out what to do about tag transitions. */
1470 if (link->wid >= 0) {
1471 /*
1472 * For each non-epsilon link following this one, look for a
1473 * matching node in the lattice and link to it.
1474 */
1475 if ((dest = find_node(dag, fsg, sf, link->wid, fsg_link_to_state(link))) != NULL)
1476 ps_lattice_link(dag, src, dest, ascr, fh->frame);
1477 }
1478 else {
1479 /*
1480 * Transitive closure on nulls has already been done, so we
1481 * just need to look one link forward from them.
1482 */
1483 fsg_arciter_t *itor2;
1484
1485 /* Add all non-null links out of j. */
1486 for (itor2 = fsg_model_arcs(fsg, fsg_link_to_state(link));
1487 itor2; itor2 = fsg_arciter_next(itor2)) {
1488 fsg_link_t *link = fsg_arciter_get(itor2);
1489
1490 if (link->wid == -1)
1491 continue;
1492
1493 if ((dest = find_node(dag, fsg, sf, link->wid, fsg_link_to_state(link))) != NULL) {
1494 ps_lattice_link(dag, src, dest, ascr, fh->frame);
1495 }
1496 }
1497 }
1498 }
1499 }
1500
1501
1502 /* Figure out which nodes are the start and end nodes. */
1503 if ((dag->start = find_start_node(fsgs, dag)) == NULL) {
1504 E_WARN("Failed to find the start node\n");
1505 goto error_out;
1506 }
1507 if ((dag->end = find_end_node(fsgs, dag)) == NULL) {
1508 E_WARN("Failed to find the end node\n");
1509 goto error_out;
1510 }
1511
1512
1513 E_INFO("lattice start node %s.%d end node %s.%d\n",
1514 fsg_model_word_str(fsg, dag->start->wid), dag->start->sf,
1515 fsg_model_word_str(fsg, dag->end->wid), dag->end->sf);
1516 /* FIXME: Need to calculate final_node_ascr here. */
1517
1518 /*
1519 * Convert word IDs from FSG to dictionary.
1520 */
1521 for (node = dag->nodes; node; node = node->next) {
1522 node->wid = dict_wordid(dag->search->dict,
1523 fsg_model_word_str(fsg, node->wid));
1524 node->basewid = dict_basewid(dag->search->dict, node->wid);
1525 }
1526
1527 /*
1528 * Now we are done, because the links in the graph are uniquely
1529 * defined by the history table. However we should remove any
1530 * nodes which are not reachable from the end node of the FSG.
1531 * Everything is reachable from the start node by definition.
1532 */
1533 mark_reachable(dag, dag->end);
1534
1536 {
1537 int32 silpen, fillpen;
1538
1539 silpen = (int32)(logmath_log(fsg->lmath,
1540 cmd_ln_float32_r(ps_search_config(fsgs), "-silprob"))
1541 * fsg->lw)
1542 >> SENSCR_SHIFT;
1543 fillpen = (int32)(logmath_log(fsg->lmath,
1544 cmd_ln_float32_r(ps_search_config(fsgs), "-fillprob"))
1545 * fsg->lw)
1546 >> SENSCR_SHIFT;
1547
1548 ps_lattice_penalize_fillers(dag, silpen, fillpen);
1549 }
1550 search->dag = dag;
1551
1552 return dag;
1553
1554
1555error_out:
1556 ps_lattice_free(dag);
1557 return NULL;
1558
1559}
1560
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
Definition: acmod.c:1213
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
Definition: acmod.c:1106
void acmod_clear_active(acmod_t *acmod)
Clear set of active senones.
Definition: acmod.c:1197
int bin_mdef_ciphone_id(bin_mdef_t *m, const char *ciphone)
Context-independent phone lookup.
Definition: bin_mdef.c:691
#define dict_size(d)
Packaged macro access to dictionary members.
Definition: dict.h:151
POCKETSPHINX_EXPORT s3wid_t dict_wordid(dict_t *d, const char *word)
Return word id for given word string if present.
Definition: dict.c:399
void fsg_lextree_free(fsg_lextree_t *lextree)
Free lextrees for an FSG.
Definition: fsg_lextree.c:286
void fsg_psubtree_pnode_deactivate(fsg_pnode_t *pnode)
Mark the given pnode as inactive (for search).
Definition: fsg_lextree.c:832
void fsg_pnode_add_all_ctxt(fsg_pnode_ctxt_t *ctxt)
Set all flags on in the given context bitvector.
Definition: fsg_lextree.c:328
fsg_lextree_t * fsg_lextree_init(fsg_model_t *fsg, dict_t *dict, dict2pid_t *d2p, bin_mdef_t *mdef, hmm_context_t *ctx, int32 wip, int32 pip)
Create, initialize, and return a new phonetic lextree for the given FSG.
Definition: fsg_lextree.c:215
int32 hmm_vit_eval(hmm_t *hmm)
Viterbi evaluation of given HMM.
Definition: hmm.c:789
#define BETTER_THAN
Is one score better than another?
Definition: hmm.h:95
#define hmm_context_set_senscore(ctx, senscr)
Change the senone score array for a context.
Definition: hmm.h:227
void hmm_enter(hmm_t *h, int32 score, int32 histid, int frame)
Enter an HMM with the given path score and history ID.
Definition: hmm.c:201
#define WORST_SCORE
Large "bad" score.
Definition: hmm.h:84
void hmm_context_free(hmm_context_t *ctx)
Free an HMM context.
Definition: hmm.c:80
void hmm_dump(hmm_t *h, FILE *fp)
For debugging, dump the whole HMM out.
Definition: hmm.c:116
hmm_context_t * hmm_context_init(int32 n_emit_state, uint8 **const *tp, int16 const *senscore, uint16 *const *sseq)
Create an HMM context.
Definition: hmm.c:56
#define SENSCR_SHIFT
Shift count for senone scores.
Definition: hmm.h:73
Internal implementation of PocketSphinx decoder.
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
Re-initialize base structure with new dictionary.
void ps_search_base_free(ps_search_t *search)
Free search.
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, const char *type, const char *name, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
ps_lattice_t * ps_lattice_init_search(ps_search_t *search, int n_frame)
Construct an empty word graph with reference to a search structure.
Definition: ps_lattice.c:639
void ps_lattice_penalize_fillers(ps_lattice_t *dag, int32 silpen, int32 fillpen)
Insert penalty for fillers.
Definition: ps_lattice.c:106
char const * ps_lattice_hyp(ps_lattice_t *dag, ps_latlink_t *link)
Get hypothesis string after bestpath search.
Definition: ps_lattice.c:830
void ps_lattice_delete_unreachable(ps_lattice_t *dag)
Remove nodes marked as unreachable.
Definition: ps_lattice.c:174
ps_seg_t * ps_lattice_seg_iter(ps_lattice_t *dag, ps_latlink_t *link, float32 lwf)
Get hypothesis segmentation iterator after bestpath search.
Definition: ps_lattice.c:1006
POCKETSPHINX_EXPORT int ps_lattice_free(ps_lattice_t *dag)
Free a lattice.
Definition: ps_lattice.c:665
POCKETSPHINX_EXPORT void ps_lattice_link(ps_lattice_t *dag, ps_latnode_t *from, ps_latnode_t *to, int32 score, int32 ef)
Create a directed link between "from" and "to" nodes, but if a link already exists,...
Definition: ps_lattice.c:65
POCKETSPHINX_EXPORT int32 ps_lattice_posterior(ps_lattice_t *dag, ngram_model_t *lmset, float32 ascale)
Calculate link posterior probabilities on a word graph.
Definition: ps_lattice.c:1446
POCKETSPHINX_EXPORT ps_latlink_t * ps_lattice_bestpath(ps_lattice_t *dag, ngram_model_t *lmset, float32 lwf, float32 ascale)
Do N-Gram based best-path search on a word graph.
Definition: ps_lattice.c:1215
Word graph search implementation.
#define BAD_S3WID
Dictionary word id.
Definition: s3types.h:90
Acoustic model structure.
Definition: acmod.h:148
bin_mdef_t * mdef
Model definition.
Definition: acmod.h:159
int n_senone_active
Number of active GMMs.
Definition: acmod.h:169
logmath_t * lmath
Log-math computation.
Definition: acmod.h:151
tmat_t * tmat
Transition matrices.
Definition: acmod.h:160
uint8 compallsen
Compute all senones?
Definition: acmod.h:188
uint16 ** sseq
Unique senone sequences (2D array built at load time)
Definition: bin_mdef.h:134
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:84
a structure for a dictionary.
Definition: dict.h:76
Definition: fsg_history.h:95
Implementation of FSG search (and "FSG set") structure.
int32 beam_orig
Global pruning threshold.
int32 bpidx_start
First history entry index this frame.
glist_t pnode_active
Those active in this frame.
float32 ascale
Acoustic score scale for posterior probabilities.
uint8 final
Decoding is finished for this utterance.
int32 bestscore
For beam pruning.
int32 n_sen_eval
Total senones evaluated this utt.
hmm_context_t * hmmctx
HMM context.
int32 pbeam_orig
Pruning threshold for phone transition.
int32 n_hmm_eval
Total HMMs evaluated this utt.
int32 wbeam_orig
Pruning threshold for word exit.
float32 beam_factor
Dynamic/adaptive factor (<=1) applied to above beams to determine actual effective beams.
glist_t pnode_active_next
Those activated for the next frame.
fsg_model_t * fsg
FSG model.
uint8 bestpath
Whether to run bestpath search and confidence annotation at end.
struct fsg_history_s * history
For storing the Viterbi search history.
struct fsg_lextree_s * lextree
Lextree structure for the currently active FSG.
frame_idx_t frame
Current frame.
ptmr_t perf
Performance counter.
int32 wip
Language weights.
int32 wbeam
Effective beams after applying beam_factor.
Segmentation "iterator" for FSG history.
int16 cur
Current position in hist.
ps_seg_t base
Base structure.
int16 n_hist
Number of history entries.
fsg_hist_entry_t ** hist
Sequence of history entries.
An individual HMM among the HMM search space.
latlink_list_t * entries
Links into this node.
frame_idx_t sf
Start frame.
int32 node_id
Node from fsg model, used to map lattice back to model.
latlink_list_t * exits
Links out of this node.
int32 fef
First end frame.
int32 lef
Last end frame.
int32 best_exit
Best exit score (used for final nodes only)
struct ps_latnode_s * next
Next node in DAG (no ordering implied)
int32 basewid
Dictionary base word id.
int16 reachable
From.
int32 wid
Dictionary word id.
Word graph structure used in bestpath/nbest search.
ps_latnode_t * end
Ending node.
listelem_alloc_t * latnode_alloc
Node allocator for this DAG.
frame_idx_t n_frames
Number of frames for this utterance.
ps_latnode_t * start
Starting node.
ps_latnode_t * nodes
List of all nodes.
ps_search_t * search
Search (if generated by search).
int32 final_node_ascr
Acoustic score of implicit link exiting final node.
int32 n_nodes
Number of nodes in this lattice.
Base structure for search module.
acmod_t * acmod
Acoustic model.
int32 post
Utterance posterior probability.
ps_lattice_t * dag
Current hypothesis word graph.
dict_t * dict
Pronunciation dictionary.
ps_latlink_t * last_link
Final link in best path.
char * hyp_str
Current hypothesis string.
int32 n_words
Number of words known to search (may be less than in the dictionary)
V-table for search algorithm.
Base structure for hypothesis segmentation iterator.
ps_search_t * search
Search object from whence this came.
float32 lwf
Language weight factor (for second-pass searches)
int32 lback
Language model backoff.
ps_segfuncs_t * vt
V-table of seg methods.
int32 lscr
Language model score.
int32 ascr
Acoustic score.
frame_idx_t sf
Start frame.
char const * word
Word string (pointer into dictionary hash)
frame_idx_t ef
End frame.
int32 prob
Log posterior probability.
uint8 *** tp
The transition matrices; kept in the same scale as acoustic scores; tp[tmatid][from-state][to-state].
Definition: tmat.h:56