PocketSphinx 5prealpha
acmod.c
Go to the documentation of this file.
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 2008 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37
38
44/* System headers. */
45#include <assert.h>
46#include <string.h>
47#include <math.h>
48
49/* SphinxBase headers. */
50#include <sphinxbase/prim_type.h>
51#include <sphinxbase/err.h>
52#include <sphinxbase/cmd_ln.h>
53#include <sphinxbase/strfuncs.h>
54#include <sphinxbase/byteorder.h>
55#include <sphinxbase/feat.h>
56#include <sphinxbase/bio.h>
57
58/* Local headers. */
59#include "cmdln_macro.h"
60#include "acmod.h"
61#include "s2_semi_mgau.h"
62#include "ptm_mgau.h"
63#include "ms_mgau.h"
64
65static int32 acmod_process_mfcbuf(acmod_t *acmod);
66
67static int
68acmod_init_am(acmod_t *acmod)
69{
70 char const *mdeffn, *tmatfn, *mllrfn, *hmmdir;
71
72 /* Read model definition. */
73 if ((mdeffn = cmd_ln_str_r(acmod->config, "_mdef")) == NULL) {
74 if ((hmmdir = cmd_ln_str_r(acmod->config, "-hmm")) == NULL)
75 E_ERROR("Acoustic model definition is not specified either "
76 "with -mdef option or with -hmm\n");
77 else
78 E_ERROR("Folder '%s' does not contain acoustic model "
79 "definition 'mdef'\n", hmmdir);
80
81 return -1;
82 }
83
84 if ((acmod->mdef = bin_mdef_read(acmod->config, mdeffn)) == NULL) {
85 E_ERROR("Failed to read acoustic model definition from %s\n", mdeffn);
86 return -1;
87 }
88
89 /* Read transition matrices. */
90 if ((tmatfn = cmd_ln_str_r(acmod->config, "_tmat")) == NULL) {
91 E_ERROR("No tmat file specified\n");
92 return -1;
93 }
94 acmod->tmat = tmat_init(tmatfn, acmod->lmath,
95 cmd_ln_float32_r(acmod->config, "-tmatfloor"),
96 TRUE);
97
98 /* Read the acoustic models. */
99 if ((cmd_ln_str_r(acmod->config, "_mean") == NULL)
100 || (cmd_ln_str_r(acmod->config, "_var") == NULL)
101 || (cmd_ln_str_r(acmod->config, "_tmat") == NULL)) {
102 E_ERROR("No mean/var/tmat files specified\n");
103 return -1;
104 }
105
106 if (cmd_ln_str_r(acmod->config, "_senmgau")) {
107 E_INFO("Using general multi-stream GMM computation\n");
108 acmod->mgau = ms_mgau_init(acmod, acmod->lmath, acmod->mdef);
109 if (acmod->mgau == NULL)
110 return -1;
111 }
112 else {
113 E_INFO("Attempting to use PTM computation module\n");
114 if ((acmod->mgau = ptm_mgau_init(acmod, acmod->mdef)) == NULL) {
115 E_INFO("Attempting to use semi-continuous computation module\n");
116 if ((acmod->mgau = s2_semi_mgau_init(acmod)) == NULL) {
117 E_INFO("Falling back to general multi-stream GMM computation\n");
118 acmod->mgau = ms_mgau_init(acmod, acmod->lmath, acmod->mdef);
119 if (acmod->mgau == NULL) {
120 E_ERROR("Failed to read acoustic model\n");
121 return -1;
122 }
123 }
124 }
125 }
126
127 /* If there is an MLLR transform, apply it. */
128 if ((mllrfn = cmd_ln_str_r(acmod->config, "-mllr"))) {
129 ps_mllr_t *mllr = ps_mllr_read(mllrfn);
130 if (mllr == NULL)
131 return -1;
132 acmod_update_mllr(acmod, mllr);
133 }
134
135 return 0;
136}
137
138static int
139acmod_init_feat(acmod_t *acmod)
140{
141 acmod->fcb =
142 feat_init(cmd_ln_str_r(acmod->config, "-feat"),
143 cmn_type_from_str(cmd_ln_str_r(acmod->config,"-cmn")),
144 cmd_ln_boolean_r(acmod->config, "-varnorm"),
145 agc_type_from_str(cmd_ln_str_r(acmod->config, "-agc")),
146 1, cmd_ln_int32_r(acmod->config, "-ceplen"));
147 if (acmod->fcb == NULL)
148 return -1;
149
150 if (cmd_ln_str_r(acmod->config, "_lda")) {
151 E_INFO("Reading linear feature transformation from %s\n",
152 cmd_ln_str_r(acmod->config, "_lda"));
153 if (feat_read_lda(acmod->fcb,
154 cmd_ln_str_r(acmod->config, "_lda"),
155 cmd_ln_int32_r(acmod->config, "-ldadim")) < 0)
156 return -1;
157 }
158
159 if (cmd_ln_str_r(acmod->config, "-svspec")) {
160 int32 **subvecs;
161 E_INFO("Using subvector specification %s\n",
162 cmd_ln_str_r(acmod->config, "-svspec"));
163 if ((subvecs = parse_subvecs(cmd_ln_str_r(acmod->config, "-svspec"))) == NULL)
164 return -1;
165 if ((feat_set_subvecs(acmod->fcb, subvecs)) < 0)
166 return -1;
167 }
168
169 if (cmd_ln_exists_r(acmod->config, "-agcthresh")
170 && 0 != strcmp(cmd_ln_str_r(acmod->config, "-agc"), "none")) {
171 agc_set_threshold(acmod->fcb->agc_struct,
172 cmd_ln_float32_r(acmod->config, "-agcthresh"));
173 }
174
175 if (acmod->fcb->cmn_struct
176 && cmd_ln_exists_r(acmod->config, "-cmninit")) {
177 char *c, *cc, *vallist;
178 int32 nvals;
179
180 vallist = ckd_salloc(cmd_ln_str_r(acmod->config, "-cmninit"));
181 c = vallist;
182 nvals = 0;
183 while (nvals < acmod->fcb->cmn_struct->veclen
184 && (cc = strchr(c, ',')) != NULL) {
185 *cc = '\0';
186 acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c));
187 c = cc + 1;
188 ++nvals;
189 }
190 if (nvals < acmod->fcb->cmn_struct->veclen && *c != '\0') {
191 acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c));
192 }
193 ckd_free(vallist);
194 }
195 return 0;
196}
197
198int
199acmod_fe_mismatch(acmod_t *acmod, fe_t *fe)
200{
201 /* Output vector dimension needs to be the same. */
202 if (cmd_ln_int32_r(acmod->config, "-ceplen") != fe_get_output_size(fe)) {
203 E_ERROR("Configured feature length %d doesn't match feature "
204 "extraction output size %d\n",
205 cmd_ln_int32_r(acmod->config, "-ceplen"),
206 fe_get_output_size(fe));
207 return TRUE;
208 }
209 /* Feature parameters need to be the same. */
210 /* ... */
211 return FALSE;
212}
213
214int
215acmod_feat_mismatch(acmod_t *acmod, feat_t *fcb)
216{
217 /* Feature type needs to be the same. */
218 if (0 != strcmp(cmd_ln_str_r(acmod->config, "-feat"), feat_name(fcb)))
219 return TRUE;
220 /* Input vector dimension needs to be the same. */
221 if (cmd_ln_int32_r(acmod->config, "-ceplen") != feat_cepsize(fcb))
222 return TRUE;
223 /* FIXME: Need to check LDA and stuff too. */
224 return FALSE;
225}
226
227acmod_t *
228acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
229{
230 acmod_t *acmod;
231
232 acmod = ckd_calloc(1, sizeof(*acmod));
233 acmod->config = cmd_ln_retain(config);
234 acmod->lmath = lmath;
235 acmod->state = ACMOD_IDLE;
236
237 /* Initialize feature computation. */
238 if (fe) {
239 if (acmod_fe_mismatch(acmod, fe))
240 goto error_out;
241 fe_retain(fe);
242 acmod->fe = fe;
243 }
244 else {
245 /* Initialize a new front end. */
246 acmod->fe = fe_init_auto_r(config);
247 if (acmod->fe == NULL)
248 goto error_out;
249 if (acmod_fe_mismatch(acmod, acmod->fe))
250 goto error_out;
251 }
252 if (fcb) {
253 if (acmod_feat_mismatch(acmod, fcb))
254 goto error_out;
255 feat_retain(fcb);
256 acmod->fcb = fcb;
257 }
258 else {
259 /* Initialize a new fcb. */
260 if (acmod_init_feat(acmod) < 0)
261 goto error_out;
262 }
263
264 /* Load acoustic model parameters. */
265 if (acmod_init_am(acmod) < 0)
266 goto error_out;
267
268
269 /* The MFCC buffer needs to be at least as large as the dynamic
270 * feature window. */
271 acmod->n_mfc_alloc = acmod->fcb->window_size * 2 + 1;
272 acmod->mfc_buf = (mfcc_t **)
273 ckd_calloc_2d(acmod->n_mfc_alloc, acmod->fcb->cepsize,
274 sizeof(**acmod->mfc_buf));
275
276 /* Feature buffer has to be at least as large as MFCC buffer. */
277 acmod->n_feat_alloc = acmod->n_mfc_alloc + cmd_ln_int32_r(config, "-pl_window");
278 acmod->feat_buf = feat_array_alloc(acmod->fcb, acmod->n_feat_alloc);
279 acmod->framepos = ckd_calloc(acmod->n_feat_alloc, sizeof(*acmod->framepos));
280
281 acmod->utt_start_frame = 0;
282
283 /* Senone computation stuff. */
284 acmod->senone_scores = ckd_calloc(bin_mdef_n_sen(acmod->mdef),
285 sizeof(*acmod->senone_scores));
286 acmod->senone_active_vec = bitvec_alloc(bin_mdef_n_sen(acmod->mdef));
287 acmod->senone_active = ckd_calloc(bin_mdef_n_sen(acmod->mdef),
288 sizeof(*acmod->senone_active));
289 acmod->log_zero = logmath_get_zero(acmod->lmath);
290 acmod->compallsen = cmd_ln_boolean_r(config, "-compallsen");
291 return acmod;
292
293error_out:
294 acmod_free(acmod);
295 return NULL;
296}
297
298void
300{
301 if (acmod == NULL)
302 return;
303
304 feat_free(acmod->fcb);
305 fe_free(acmod->fe);
306 cmd_ln_free_r(acmod->config);
307
308 if (acmod->mfc_buf)
309 ckd_free_2d((void **)acmod->mfc_buf);
310 if (acmod->feat_buf)
311 feat_array_free(acmod->feat_buf);
312
313 if (acmod->mfcfh)
314 fclose(acmod->mfcfh);
315 if (acmod->rawfh)
316 fclose(acmod->rawfh);
317 if (acmod->senfh)
318 fclose(acmod->senfh);
319
320 ckd_free(acmod->framepos);
321 ckd_free(acmod->senone_scores);
322 ckd_free(acmod->senone_active_vec);
323 ckd_free(acmod->senone_active);
324 ckd_free(acmod->rawdata);
325
326 if (acmod->mdef)
327 bin_mdef_free(acmod->mdef);
328 if (acmod->tmat)
329 tmat_free(acmod->tmat);
330 if (acmod->mgau)
331 ps_mgau_free(acmod->mgau);
332 if (acmod->mllr)
333 ps_mllr_free(acmod->mllr);
334
335 ckd_free(acmod);
336}
337
338ps_mllr_t *
340{
341 if (acmod->mllr)
342 ps_mllr_free(acmod->mllr);
343 acmod->mllr = mllr;
344 ps_mgau_transform(acmod->mgau, mllr);
345
346 return mllr;
347}
348
349int
351{
352 char nsenstr[64], logbasestr[64];
353
354 sprintf(nsenstr, "%d", bin_mdef_n_sen(acmod->mdef));
355 sprintf(logbasestr, "%f", logmath_get_base(acmod->lmath));
356 return bio_writehdr(logfh,
357 "version", "0.1",
358 "mdef_file", cmd_ln_str_r(acmod->config, "_mdef"),
359 "n_sen", nsenstr,
360 "logbase", logbasestr, NULL);
361}
362
363int
364acmod_set_senfh(acmod_t *acmod, FILE *logfh)
365{
366 if (acmod->senfh)
367 fclose(acmod->senfh);
368 acmod->senfh = logfh;
369 if (logfh == NULL)
370 return 0;
371 return acmod_write_senfh_header(acmod, logfh);
372}
373
374int
375acmod_set_mfcfh(acmod_t *acmod, FILE *logfh)
376{
377 int rv = 0;
378
379 if (acmod->mfcfh)
380 fclose(acmod->mfcfh);
381 acmod->mfcfh = logfh;
382 fwrite(&rv, 4, 1, acmod->mfcfh);
383 return rv;
384}
385
386int
387acmod_set_rawfh(acmod_t *acmod, FILE *logfh)
388{
389 if (acmod->rawfh)
390 fclose(acmod->rawfh);
391 acmod->rawfh = logfh;
392 return 0;
393}
394
395void
396acmod_grow_feat_buf(acmod_t *acmod, int nfr)
397{
398 if (nfr > MAX_N_FRAMES)
399 E_FATAL("Decoder can not process more than %d frames at once, "
400 "requested %d\n", MAX_N_FRAMES, nfr);
401
402 acmod->feat_buf = feat_array_realloc(acmod->fcb, acmod->feat_buf,
403 acmod->n_feat_alloc, nfr);
404 acmod->framepos = ckd_realloc(acmod->framepos,
405 nfr * sizeof(*acmod->framepos));
406 acmod->n_feat_alloc = nfr;
407}
408
409int
410acmod_set_grow(acmod_t *acmod, int grow_feat)
411{
412 int tmp = acmod->grow_feat;
413 acmod->grow_feat = grow_feat;
414
415 /* Expand feat_buf to a reasonable size to start with. */
416 if (grow_feat && acmod->n_feat_alloc < 128)
417 acmod_grow_feat_buf(acmod, 128);
418
419 return tmp;
420}
421
422int
424{
425 fe_start_utt(acmod->fe);
426 acmod->state = ACMOD_STARTED;
427 acmod->n_mfc_frame = 0;
428 acmod->n_feat_frame = 0;
429 acmod->mfc_outidx = 0;
430 acmod->feat_outidx = 0;
431 acmod->output_frame = 0;
432 acmod->senscr_frame = -1;
433 acmod->n_senone_active = 0;
434 acmod->mgau->frame_idx = 0;
435 acmod->rawdata_pos = 0;
436
437 return 0;
438}
439
440int
442{
443 int32 nfr = 0;
444
445 acmod->state = ACMOD_ENDED;
446 if (acmod->n_mfc_frame < acmod->n_mfc_alloc) {
447 int inptr;
448 /* Where to start writing them (circular buffer) */
449 inptr = (acmod->mfc_outidx + acmod->n_mfc_frame) % acmod->n_mfc_alloc;
450 /* nfr is always either zero or one. */
451 fe_end_utt(acmod->fe, acmod->mfc_buf[inptr], &nfr);
452 acmod->n_mfc_frame += nfr;
453
454 /* Process whatever's left, and any leadout or update stats if needed. */
455 if (nfr)
456 nfr = acmod_process_mfcbuf(acmod);
457 else
458 feat_update_stats(acmod->fcb);
459 }
460 if (acmod->mfcfh) {
461 long outlen;
462 int32 rv;
463 outlen = (ftell(acmod->mfcfh) - 4) / 4;
464 /* Try to seek and write */
465 if ((rv = fseek(acmod->mfcfh, 0, SEEK_SET)) == 0) {
466 fwrite(&outlen, 4, 1, acmod->mfcfh);
467 }
468 fclose(acmod->mfcfh);
469 acmod->mfcfh = NULL;
470 }
471 if (acmod->rawfh) {
472 fclose(acmod->rawfh);
473 acmod->rawfh = NULL;
474 }
475
476 if (acmod->senfh) {
477 fclose(acmod->senfh);
478 acmod->senfh = NULL;
479 }
480
481 return nfr;
482}
483
484static int
485acmod_log_mfc(acmod_t *acmod,
486 mfcc_t **cep, int n_frames)
487{
488 int n = n_frames * feat_cepsize(acmod->fcb);
489 /* Write features. */
490 if (fwrite(cep[0], sizeof(mfcc_t), n, acmod->mfcfh) != n) {
491 E_ERROR_SYSTEM("Failed to write %d values to file", n);
492 }
493 return 0;
494}
495
496static int
497acmod_process_full_cep(acmod_t *acmod,
498 mfcc_t ***inout_cep,
499 int *inout_n_frames)
500{
501 int32 nfr;
502
503 /* Write to file. */
504 if (acmod->mfcfh)
505 acmod_log_mfc(acmod, *inout_cep, *inout_n_frames);
506
507 /* Resize feat_buf to fit. */
508 if (acmod->n_feat_alloc < *inout_n_frames) {
509
510 if (*inout_n_frames > MAX_N_FRAMES)
511 E_FATAL("Batch processing can not process more than %d frames "
512 "at once, requested %d\n", MAX_N_FRAMES, *inout_n_frames);
513
514 feat_array_free(acmod->feat_buf);
515 acmod->feat_buf = feat_array_alloc(acmod->fcb, *inout_n_frames);
516 acmod->n_feat_alloc = *inout_n_frames;
517 acmod->n_feat_frame = 0;
518 acmod->feat_outidx = 0;
519 }
520 /* Make dynamic features. */
521 nfr = feat_s2mfc2feat_live(acmod->fcb, *inout_cep, inout_n_frames,
522 TRUE, TRUE, acmod->feat_buf);
523 acmod->n_feat_frame = nfr;
524 assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
525 *inout_cep += *inout_n_frames;
526 *inout_n_frames = 0;
527
528 return nfr;
529}
530
531static int
532acmod_process_full_raw(acmod_t *acmod,
533 int16 const **inout_raw,
534 size_t *inout_n_samps)
535{
536 int32 nfr, ntail;
537 mfcc_t **cepptr;
538
539 /* Write to logging file if any. */
540 if (*inout_n_samps + acmod->rawdata_pos < acmod->rawdata_size) {
541 memcpy(acmod->rawdata + acmod->rawdata_pos, *inout_raw, *inout_n_samps * sizeof(int16));
542 acmod->rawdata_pos += *inout_n_samps;
543 }
544 if (acmod->rawfh)
545 fwrite(*inout_raw, sizeof(int16), *inout_n_samps, acmod->rawfh);
546 /* Resize mfc_buf to fit. */
547 if (fe_process_frames(acmod->fe, NULL, inout_n_samps, NULL, &nfr, NULL) < 0)
548 return -1;
549 if (acmod->n_mfc_alloc < nfr + 1) {
550 ckd_free_2d(acmod->mfc_buf);
551 acmod->mfc_buf = ckd_calloc_2d(nfr + 1, fe_get_output_size(acmod->fe),
552 sizeof(**acmod->mfc_buf));
553 acmod->n_mfc_alloc = nfr + 1;
554 }
555 acmod->n_mfc_frame = 0;
556 acmod->mfc_outidx = 0;
557 fe_start_utt(acmod->fe);
558 if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps,
559 acmod->mfc_buf, &nfr, NULL) < 0)
560 return -1;
561 fe_end_utt(acmod->fe, acmod->mfc_buf[nfr], &ntail);
562 nfr += ntail;
563
564 cepptr = acmod->mfc_buf;
565 nfr = acmod_process_full_cep(acmod, &cepptr, &nfr);
566 acmod->n_mfc_frame = 0;
567 return nfr;
568}
569
573static int32
574acmod_process_mfcbuf(acmod_t *acmod)
575{
576 mfcc_t **mfcptr;
577 int32 ncep;
578
579 ncep = acmod->n_mfc_frame;
580 /* Also do this in two parts because of the circular mfc_buf. */
581 if (acmod->mfc_outidx + ncep > acmod->n_mfc_alloc) {
582 int32 ncep1 = acmod->n_mfc_alloc - acmod->mfc_outidx;
583 int saved_state = acmod->state;
584
585 /* Make sure we don't end the utterance here. */
586 if (acmod->state == ACMOD_ENDED)
587 acmod->state = ACMOD_PROCESSING;
588 mfcptr = acmod->mfc_buf + acmod->mfc_outidx;
589 ncep1 = acmod_process_cep(acmod, &mfcptr, &ncep1, FALSE);
590 /* It's possible that not all available frames were filled. */
591 ncep -= ncep1;
592 acmod->n_mfc_frame -= ncep1;
593 acmod->mfc_outidx += ncep1;
594 acmod->mfc_outidx %= acmod->n_mfc_alloc;
595 /* Restore original state (could this really be the end) */
596 acmod->state = saved_state;
597 }
598 mfcptr = acmod->mfc_buf + acmod->mfc_outidx;
599 ncep = acmod_process_cep(acmod, &mfcptr, &ncep, FALSE);
600 acmod->n_mfc_frame -= ncep;
601 acmod->mfc_outidx += ncep;
602 acmod->mfc_outidx %= acmod->n_mfc_alloc;
603 return ncep;
604}
605
606int
608 int16 const **inout_raw,
609 size_t *inout_n_samps,
610 int full_utt)
611{
612 int32 ncep;
613 int32 out_frameidx;
614 int16 const *prev_audio_inptr;
615
616 /* If this is a full utterance, process it all at once. */
617 if (full_utt)
618 return acmod_process_full_raw(acmod, inout_raw, inout_n_samps);
619
620 /* Append MFCCs to the end of any that are previously in there
621 * (in practice, there will probably be none) */
622 if (inout_n_samps && *inout_n_samps) {
623 int inptr;
624 int32 processed_samples;
625
626 prev_audio_inptr = *inout_raw;
627 /* Total number of frames available. */
628 ncep = acmod->n_mfc_alloc - acmod->n_mfc_frame;
629 /* Where to start writing them (circular buffer) */
630 inptr = (acmod->mfc_outidx + acmod->n_mfc_frame) % acmod->n_mfc_alloc;
631
632 /* Write them in two (or more) parts if there is wraparound. */
633 while (inptr + ncep > acmod->n_mfc_alloc) {
634 int32 ncep1 = acmod->n_mfc_alloc - inptr;
635 if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps,
636 acmod->mfc_buf + inptr, &ncep1, &out_frameidx) < 0)
637 return -1;
638
639 if (out_frameidx > 0)
640 acmod->utt_start_frame = out_frameidx;
641
642 processed_samples = *inout_raw - prev_audio_inptr;
643 if (processed_samples + acmod->rawdata_pos < acmod->rawdata_size) {
644 memcpy(acmod->rawdata + acmod->rawdata_pos, prev_audio_inptr, processed_samples * sizeof(int16));
645 acmod->rawdata_pos += processed_samples;
646 }
647 /* Write to logging file if any. */
648 if (acmod->rawfh) {
649 fwrite(prev_audio_inptr, sizeof(int16),
650 processed_samples,
651 acmod->rawfh);
652 }
653 prev_audio_inptr = *inout_raw;
654
655 /* ncep1 now contains the number of frames actually
656 * processed. This is a good thing, but it means we
657 * actually still might have some room left at the end of
658 * the buffer, hence the while loop. Unfortunately it
659 * also means that in the case where we are really
660 * actually done, we need to get out totally, hence the
661 * goto. */
662 acmod->n_mfc_frame += ncep1;
663 ncep -= ncep1;
664 inptr += ncep1;
665 inptr %= acmod->n_mfc_alloc;
666 if (ncep1 == 0)
667 goto alldone;
668 }
669
670 assert(inptr + ncep <= acmod->n_mfc_alloc);
671 if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps,
672 acmod->mfc_buf + inptr, &ncep, &out_frameidx) < 0)
673 return -1;
674
675 if (out_frameidx > 0)
676 acmod->utt_start_frame = out_frameidx;
677
678
679 processed_samples = *inout_raw - prev_audio_inptr;
680 if (processed_samples + acmod->rawdata_pos < acmod->rawdata_size) {
681 memcpy(acmod->rawdata + acmod->rawdata_pos, prev_audio_inptr, processed_samples * sizeof(int16));
682 acmod->rawdata_pos += processed_samples;
683 }
684 if (acmod->rawfh) {
685 fwrite(prev_audio_inptr, sizeof(int16),
686 processed_samples, acmod->rawfh);
687 }
688 prev_audio_inptr = *inout_raw;
689 acmod->n_mfc_frame += ncep;
690 alldone:
691 ;
692 }
693
694 /* Hand things off to acmod_process_cep. */
695 return acmod_process_mfcbuf(acmod);
696}
697
698int
700 mfcc_t ***inout_cep,
701 int *inout_n_frames,
702 int full_utt)
703{
704 int32 nfeat, ncep, inptr;
705 int orig_n_frames;
706
707 /* If this is a full utterance, process it all at once. */
708 if (full_utt)
709 return acmod_process_full_cep(acmod, inout_cep, inout_n_frames);
710
711 /* Write to file. */
712 if (acmod->mfcfh)
713 acmod_log_mfc(acmod, *inout_cep, *inout_n_frames);
714
715 /* Maximum number of frames we're going to generate. */
716 orig_n_frames = ncep = nfeat = *inout_n_frames;
717
718 /* FIXME: This behaviour isn't guaranteed... */
719 if (acmod->state == ACMOD_ENDED)
720 nfeat += feat_window_size(acmod->fcb);
721 else if (acmod->state == ACMOD_STARTED)
722 nfeat -= feat_window_size(acmod->fcb);
723
724 /* Clamp number of features to fit available space. */
725 if (nfeat > acmod->n_feat_alloc - acmod->n_feat_frame) {
726 /* Grow it as needed - we have to grow it at the end of an
727 * utterance because we can't return a short read there. */
728 if (acmod->grow_feat || acmod->state == ACMOD_ENDED)
729 acmod_grow_feat_buf(acmod, acmod->n_feat_alloc + nfeat);
730 else
731 ncep -= (nfeat - (acmod->n_feat_alloc - acmod->n_feat_frame));
732 }
733
734 /* Where to start writing in the feature buffer. */
735 if (acmod->grow_feat) {
736 /* Grow to avoid wraparound if grow_feat == TRUE. */
737 inptr = acmod->feat_outidx + acmod->n_feat_frame;
738 while (inptr + nfeat >= acmod->n_feat_alloc)
739 acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
740 }
741 else {
742 inptr = (acmod->feat_outidx + acmod->n_feat_frame) % acmod->n_feat_alloc;
743 }
744
745
746 /* FIXME: we can't split the last frame drop properly to be on the bounary,
747 * so just return
748 */
749 if (inptr + nfeat > acmod->n_feat_alloc && acmod->state == ACMOD_ENDED) {
750 *inout_n_frames -= ncep;
751 *inout_cep += ncep;
752 return 0;
753 }
754
755 /* Write them in two parts if there is wraparound. */
756 if (inptr + nfeat > acmod->n_feat_alloc) {
757 int32 ncep1 = acmod->n_feat_alloc - inptr;
758
759 /* Make sure we don't end the utterance here. */
760 nfeat = feat_s2mfc2feat_live(acmod->fcb, *inout_cep,
761 &ncep1,
762 (acmod->state == ACMOD_STARTED),
763 FALSE,
764 acmod->feat_buf + inptr);
765 if (nfeat < 0)
766 return -1;
767 /* Move the output feature pointer forward. */
768 acmod->n_feat_frame += nfeat;
769 assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
770 inptr += nfeat;
771 inptr %= acmod->n_feat_alloc;
772 /* Move the input feature pointers forward. */
773 *inout_n_frames -= ncep1;
774 *inout_cep += ncep1;
775 ncep -= ncep1;
776 }
777
778 nfeat = feat_s2mfc2feat_live(acmod->fcb, *inout_cep,
779 &ncep,
780 (acmod->state == ACMOD_STARTED),
781 (acmod->state == ACMOD_ENDED),
782 acmod->feat_buf + inptr);
783 if (nfeat < 0)
784 return -1;
785 acmod->n_feat_frame += nfeat;
786 assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
787 /* Move the input feature pointers forward. */
788 *inout_n_frames -= ncep;
789 *inout_cep += ncep;
790 if (acmod->state == ACMOD_STARTED)
791 acmod->state = ACMOD_PROCESSING;
792
793 return orig_n_frames - *inout_n_frames;
794}
795
796int
798 mfcc_t **feat)
799{
800 int i, inptr;
801
802 if (acmod->n_feat_frame == acmod->n_feat_alloc) {
803 if (acmod->grow_feat)
804 acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
805 else
806 return 0;
807 }
808
809 if (acmod->grow_feat) {
810 /* Grow to avoid wraparound if grow_feat == TRUE. */
811 inptr = acmod->feat_outidx + acmod->n_feat_frame;
812 while (inptr + 1 >= acmod->n_feat_alloc)
813 acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
814 }
815 else {
816 inptr = (acmod->feat_outidx + acmod->n_feat_frame) % acmod->n_feat_alloc;
817 }
818 for (i = 0; i < feat_dimension1(acmod->fcb); ++i)
819 memcpy(acmod->feat_buf[inptr][i],
820 feat[i], feat_dimension2(acmod->fcb, i) * sizeof(**feat));
821 ++acmod->n_feat_frame;
822 assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
823
824 return 1;
825}
826
827static int
828acmod_read_senfh_header(acmod_t *acmod)
829{
830 char **name, **val;
831 int32 swap;
832 int i;
833
834 if (bio_readhdr(acmod->insenfh, &name, &val, &swap) < 0)
835 goto error_out;
836 for (i = 0; name[i] != NULL; ++i) {
837 if (!strcmp(name[i], "n_sen")) {
838 if (atoi(val[i]) != bin_mdef_n_sen(acmod->mdef)) {
839 E_ERROR("Number of senones in senone file (%d) does not "
840 "match mdef (%d)\n", atoi(val[i]),
841 bin_mdef_n_sen(acmod->mdef));
842 goto error_out;
843 }
844 }
845
846 if (!strcmp(name[i], "logbase")) {
847 if (fabs(atof_c(val[i]) - logmath_get_base(acmod->lmath)) > 0.001) {
848 E_ERROR("Logbase in senone file (%f) does not match acmod "
849 "(%f)\n", atof_c(val[i]),
850 logmath_get_base(acmod->lmath));
851 goto error_out;
852 }
853 }
854 }
855 acmod->insen_swap = swap;
856 bio_hdrarg_free(name, val);
857 return 0;
858error_out:
859 bio_hdrarg_free(name, val);
860 return -1;
861}
862
863int
864acmod_set_insenfh(acmod_t *acmod, FILE *senfh)
865{
866 acmod->insenfh = senfh;
867 if (senfh == NULL) {
868 acmod->n_feat_frame = 0;
869 acmod->compallsen = cmd_ln_boolean_r(acmod->config, "-compallsen");
870 return 0;
871 }
872 acmod->compallsen = TRUE;
873 return acmod_read_senfh_header(acmod);
874}
875
876int
878{
879 /* If the feature buffer is circular, this is not possible. */
880 if (acmod->output_frame > acmod->n_feat_alloc) {
881 E_ERROR("Circular feature buffer cannot be rewound (output frame %d, "
882 "alloc %d)\n", acmod->output_frame, acmod->n_feat_alloc);
883 return -1;
884 }
885
886 /* Frames consumed + frames available */
887 acmod->n_feat_frame = acmod->output_frame + acmod->n_feat_frame;
888
889 /* Reset output pointers. */
890 acmod->feat_outidx = 0;
891 acmod->output_frame = 0;
892 acmod->senscr_frame = -1;
893 acmod->mgau->frame_idx = 0;
894
895 return 0;
896}
897
898int
900{
901 /* Advance the output pointers. */
902 if (++acmod->feat_outidx == acmod->n_feat_alloc)
903 acmod->feat_outidx = 0;
904 --acmod->n_feat_frame;
905 ++acmod->mgau->frame_idx;
906
907 return ++acmod->output_frame;
908}
909
910int
911acmod_write_scores(acmod_t *acmod, int n_active, uint8 const *active,
912 int16 const *senscr, FILE *senfh)
913{
914 int16 n_active2;
915
916 /* Uncompressed frame format:
917 *
918 * (2 bytes) n_active: Number of active senones
919 * If all senones active:
920 * (n_active * 2 bytes) scores of active senones
921 *
922 * Otherwise:
923 * (2 bytes) n_active: Number of active senones
924 * (n_active bytes) deltas to active senones
925 * (n_active * 2 bytes) scores of active senones
926 */
927 n_active2 = n_active;
928 if (fwrite(&n_active2, 2, 1, senfh) != 1)
929 goto error_out;
930 if (n_active == bin_mdef_n_sen(acmod->mdef)) {
931 if (fwrite(senscr, 2, n_active, senfh) != n_active)
932 goto error_out;
933 }
934 else {
935 int i, n;
936 if (fwrite(active, 1, n_active, senfh) != n_active)
937 goto error_out;
938 for (i = n = 0; i < n_active; ++i) {
939 n += active[i];
940 if (fwrite(senscr + n, 2, 1, senfh) != 1)
941 goto error_out;
942 }
943 }
944 return 0;
945error_out:
946 E_ERROR_SYSTEM("Failed to write frame to senone file");
947 return -1;
948}
949
953static int
954acmod_read_scores_internal(acmod_t *acmod)
955{
956 FILE *senfh = acmod->insenfh;
957 int16 n_active;
958 size_t rv;
959
960 if (acmod->n_feat_frame == acmod->n_feat_alloc) {
961 if (acmod->grow_feat)
962 acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
963 else
964 return 0;
965 }
966
967 if (senfh == NULL)
968 return -1;
969
970 if ((rv = fread(&n_active, 2, 1, senfh)) != 1)
971 goto error_out;
972
973 acmod->n_senone_active = n_active;
974 if (acmod->n_senone_active == bin_mdef_n_sen(acmod->mdef)) {
975 if ((rv = fread(acmod->senone_scores, 2,
976 acmod->n_senone_active, senfh)) != acmod->n_senone_active)
977 goto error_out;
978 }
979 else {
980 int i, n;
981
982 if ((rv = fread(acmod->senone_active, 1,
983 acmod->n_senone_active, senfh)) != acmod->n_senone_active)
984 goto error_out;
985
986 for (i = 0, n = 0; i < acmod->n_senone_active; ++i) {
987 int j, sen = n + acmod->senone_active[i];
988 for (j = n + 1; j < sen; ++j)
989 acmod->senone_scores[j] = SENSCR_DUMMY;
990
991 if ((rv = fread(acmod->senone_scores + sen, 2, 1, senfh)) != 1)
992 goto error_out;
993
994 n = sen;
995 }
996
997 n++;
998 while (n < bin_mdef_n_sen(acmod->mdef))
999 acmod->senone_scores[n++] = SENSCR_DUMMY;
1000 }
1001 return 1;
1002
1003error_out:
1004 if (ferror(senfh)) {
1005 E_ERROR_SYSTEM("Failed to read frame from senone file");
1006 return -1;
1007 }
1008 return 0;
1009}
1010
1011int
1013{
1014 int inptr, rv;
1015
1016 if (acmod->grow_feat) {
1017 /* Grow to avoid wraparound if grow_feat == TRUE. */
1018 inptr = acmod->feat_outidx + acmod->n_feat_frame;
1019 /* Has to be +1, otherwise, next time acmod_advance() is
1020 * called, this will wrap around. */
1021 while (inptr + 1 >= acmod->n_feat_alloc)
1022 acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
1023 }
1024 else {
1025 inptr = (acmod->feat_outidx + acmod->n_feat_frame) %
1026 acmod->n_feat_alloc;
1027 }
1028
1029 if ((rv = acmod_read_scores_internal(acmod)) != 1)
1030 return rv;
1031
1032 /* Set acmod->senscr_frame appropriately so that these scores
1033 get reused below in acmod_score(). */
1034 acmod->senscr_frame = acmod->output_frame + acmod->n_feat_frame;
1035
1036 E_DEBUG(1,("Frame %d has %d active states\n",
1037 acmod->senscr_frame, acmod->n_senone_active));
1038
1039 /* Increment the "feature frame counter" and record the file
1040 * position for the relevant frame in the (possibly circular)
1041 * buffer. */
1042 ++acmod->n_feat_frame;
1043 acmod->framepos[inptr] = ftell(acmod->insenfh);
1044
1045 return 1;
1046}
1047
1048static int
1049calc_frame_idx(acmod_t *acmod, int *inout_frame_idx)
1050{
1051 int frame_idx;
1052
1053 /* Calculate the absolute frame index to be scored. */
1054 if (inout_frame_idx == NULL)
1055 frame_idx = acmod->output_frame;
1056 else if (*inout_frame_idx < 0)
1057 frame_idx = acmod->output_frame + 1 + *inout_frame_idx;
1058 else
1059 frame_idx = *inout_frame_idx;
1060
1061 return frame_idx;
1062}
1063
1064static int
1065calc_feat_idx(acmod_t *acmod, int frame_idx)
1066{
1067 int n_backfr, feat_idx;
1068
1069 n_backfr = acmod->n_feat_alloc - acmod->n_feat_frame;
1070 if (frame_idx < 0 || acmod->output_frame - frame_idx > n_backfr) {
1071 E_ERROR("Frame %d outside queue of %d frames, %d alloc (%d > %d), "
1072 "cannot score\n", frame_idx, acmod->n_feat_frame,
1073 acmod->n_feat_alloc, acmod->output_frame - frame_idx,
1074 n_backfr);
1075 return -1;
1076 }
1077
1078 /* Get the index in feat_buf/framepos of the frame to be scored. */
1079 feat_idx = (acmod->feat_outidx + frame_idx - acmod->output_frame) %
1080 acmod->n_feat_alloc;
1081 if (feat_idx < 0)
1082 feat_idx += acmod->n_feat_alloc;
1083
1084 return feat_idx;
1085}
1086
1087mfcc_t **
1088acmod_get_frame(acmod_t *acmod, int *inout_frame_idx)
1089{
1090 int frame_idx, feat_idx;
1091
1092 /* Calculate the absolute frame index requested. */
1093 frame_idx = calc_frame_idx(acmod, inout_frame_idx);
1094
1095 /* Calculate position of requested frame in circular buffer. */
1096 if ((feat_idx = calc_feat_idx(acmod, frame_idx)) < 0)
1097 return NULL;
1098
1099 if (inout_frame_idx)
1100 *inout_frame_idx = frame_idx;
1101
1102 return acmod->feat_buf[feat_idx];
1103}
1104
1105int16 const *
1106acmod_score(acmod_t *acmod, int *inout_frame_idx)
1107{
1108 int frame_idx, feat_idx;
1109
1110 /* Calculate the absolute frame index to be scored. */
1111 frame_idx = calc_frame_idx(acmod, inout_frame_idx);
1112
1113 /* If all senones are being computed, or we are using a senone file,
1114 then we can reuse existing scores. */
1115 if ((acmod->compallsen || acmod->insenfh)
1116 && frame_idx == acmod->senscr_frame) {
1117 if (inout_frame_idx)
1118 *inout_frame_idx = frame_idx;
1119 return acmod->senone_scores;
1120 }
1121
1122 /* Calculate position of requested frame in circular buffer. */
1123 if ((feat_idx = calc_feat_idx(acmod, frame_idx)) < 0)
1124 return NULL;
1125
1126 /*
1127 * If there is an input senone file locate the appropriate frame and read
1128 * it.
1129 */
1130 if (acmod->insenfh) {
1131 fseek(acmod->insenfh, acmod->framepos[feat_idx], SEEK_SET);
1132 if (acmod_read_scores_internal(acmod) < 0)
1133 return NULL;
1134 }
1135 else {
1136 /* Build active senone list. */
1137 acmod_flags2list(acmod);
1138
1139 /* Generate scores for the next available frame */
1140 ps_mgau_frame_eval(acmod->mgau,
1141 acmod->senone_scores,
1142 acmod->senone_active,
1143 acmod->n_senone_active,
1144 acmod->feat_buf[feat_idx],
1145 frame_idx,
1146 acmod->compallsen);
1147 }
1148
1149 if (inout_frame_idx)
1150 *inout_frame_idx = frame_idx;
1151 acmod->senscr_frame = frame_idx;
1152
1153 /* Dump scores to the senone dump file if one exists. */
1154 if (acmod->senfh) {
1155 if (acmod_write_scores(acmod, acmod->n_senone_active,
1156 acmod->senone_active,
1157 acmod->senone_scores,
1158 acmod->senfh) < 0)
1159 return NULL;
1160 E_DEBUG(1,("Frame %d has %d active states\n", frame_idx,
1161 acmod->n_senone_active));
1162 }
1163
1164 return acmod->senone_scores;
1165}
1166
1167int
1168acmod_best_score(acmod_t *acmod, int *out_best_senid)
1169{
1170 int i, best;
1171
1172 best = SENSCR_DUMMY;
1173 if (acmod->compallsen) {
1174 for (i = 0; i < bin_mdef_n_sen(acmod->mdef); ++i) {
1175 if (acmod->senone_scores[i] < best) {
1176 best = acmod->senone_scores[i];
1177 *out_best_senid = i;
1178 }
1179 }
1180 }
1181 else {
1182 int16 *senscr;
1183 senscr = acmod->senone_scores;
1184 for (i = 0; i < acmod->n_senone_active; ++i) {
1185 senscr += acmod->senone_active[i];
1186 if (*senscr < best) {
1187 best = *senscr;
1188 *out_best_senid = i;
1189 }
1190 }
1191 }
1192 return best;
1193}
1194
1195
1196void
1198{
1199 if (acmod->compallsen)
1200 return;
1201 bitvec_clear_all(acmod->senone_active_vec, bin_mdef_n_sen(acmod->mdef));
1202 acmod->n_senone_active = 0;
1203}
1204
1205#define MPX_BITVEC_SET(a,h,i) \
1206 if (hmm_mpx_ssid(h,i) != BAD_SSID) \
1207 bitvec_set((a)->senone_active_vec, hmm_mpx_senid(h,i))
1208#define NONMPX_BITVEC_SET(a,h,i) \
1209 bitvec_set((a)->senone_active_vec, \
1210 hmm_nonmpx_senid(h,i))
1211
1212void
1214{
1215 int i;
1216
1217 if (acmod->compallsen)
1218 return;
1219 if (hmm_is_mpx(hmm)) {
1220 switch (hmm_n_emit_state(hmm)) {
1221 case 5:
1222 MPX_BITVEC_SET(acmod, hmm, 4);
1223 MPX_BITVEC_SET(acmod, hmm, 3);
1224 case 3:
1225 MPX_BITVEC_SET(acmod, hmm, 2);
1226 MPX_BITVEC_SET(acmod, hmm, 1);
1227 MPX_BITVEC_SET(acmod, hmm, 0);
1228 break;
1229 default:
1230 for (i = 0; i < hmm_n_emit_state(hmm); ++i) {
1231 MPX_BITVEC_SET(acmod, hmm, i);
1232 }
1233 }
1234 }
1235 else {
1236 switch (hmm_n_emit_state(hmm)) {
1237 case 5:
1238 NONMPX_BITVEC_SET(acmod, hmm, 4);
1239 NONMPX_BITVEC_SET(acmod, hmm, 3);
1240 case 3:
1241 NONMPX_BITVEC_SET(acmod, hmm, 2);
1242 NONMPX_BITVEC_SET(acmod, hmm, 1);
1243 NONMPX_BITVEC_SET(acmod, hmm, 0);
1244 break;
1245 default:
1246 for (i = 0; i < hmm_n_emit_state(hmm); ++i) {
1247 NONMPX_BITVEC_SET(acmod, hmm, i);
1248 }
1249 }
1250 }
1251}
1252
1253int32
1255{
1256 int32 w, l, n, b, total_dists, total_words, extra_bits;
1257 bitvec_t *flagptr;
1258
1259 total_dists = bin_mdef_n_sen(acmod->mdef);
1260 if (acmod->compallsen) {
1261 acmod->n_senone_active = total_dists;
1262 return total_dists;
1263 }
1264 total_words = total_dists / BITVEC_BITS;
1265 extra_bits = total_dists % BITVEC_BITS;
1266 w = n = l = 0;
1267 for (flagptr = acmod->senone_active_vec; w < total_words; ++w, ++flagptr) {
1268 if (*flagptr == 0)
1269 continue;
1270 for (b = 0; b < BITVEC_BITS; ++b) {
1271 if (*flagptr & (1UL << b)) {
1272 int32 sen = w * BITVEC_BITS + b;
1273 int32 delta = sen - l;
1274 /* Handle excessive deltas "lossily" by adding a few
1275 extra senones to bridge the gap. */
1276 while (delta > 255) {
1277 acmod->senone_active[n++] = 255;
1278 delta -= 255;
1279 }
1280 acmod->senone_active[n++] = delta;
1281 l = sen;
1282 }
1283 }
1284 }
1285
1286 for (b = 0; b < extra_bits; ++b) {
1287 if (*flagptr & (1UL << b)) {
1288 int32 sen = w * BITVEC_BITS + b;
1289 int32 delta = sen - l;
1290 /* Handle excessive deltas "lossily" by adding a few
1291 extra senones to bridge the gap. */
1292 while (delta > 255) {
1293 acmod->senone_active[n++] = 255;
1294 delta -= 255;
1295 }
1296 acmod->senone_active[n++] = delta;
1297 l = sen;
1298 }
1299 }
1300
1301 acmod->n_senone_active = n;
1302 E_DEBUG(1, ("acmod_flags2list: %d active in frame %d\n",
1303 acmod->n_senone_active, acmod->output_frame));
1304 return n;
1305}
1306
1307int32
1309{
1310 return acmod->utt_start_frame;
1311}
1312
1313void
1315{
1316 fe_start_stream(acmod->fe);
1317 acmod->utt_start_frame = 0;
1318}
1319
1320void
1322{
1323 assert(size >= 0);
1324 acmod->rawdata_size = size;
1325 if (acmod->rawdata_size > 0) {
1326 ckd_free(acmod->rawdata);
1327 acmod->rawdata = ckd_calloc(size, sizeof(int16));
1328 }
1329}
1330
1331void
1332acmod_get_rawdata(acmod_t *acmod, int16 **buffer, int32 *size)
1333{
1334 if (buffer) {
1335 *buffer = acmod->rawdata;
1336 }
1337 if (size) {
1338 *size = acmod->rawdata_pos;
1339 }
1340}
1341
void acmod_get_rawdata(acmod_t *acmod, int16 **buffer, int32 *size)
Retrieves the raw data collected during utterance decoding.
Definition: acmod.c:1332
int acmod_set_grow(acmod_t *acmod, int grow_feat)
Set memory allocation policy for utterance processing.
Definition: acmod.c:410
void acmod_free(acmod_t *acmod)
Finalize an acoustic model.
Definition: acmod.c:299
int acmod_process_raw(acmod_t *acmod, int16 const **inout_raw, size_t *inout_n_samps, int full_utt)
TODO: Set queue length for utterance processing.
Definition: acmod.c:607
int acmod_end_utt(acmod_t *acmod)
Mark the end of an utterance.
Definition: acmod.c:441
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
Definition: acmod.c:1213
int acmod_best_score(acmod_t *acmod, int *out_best_senid)
Get best score and senone index for current frame.
Definition: acmod.c:1168
int acmod_advance(acmod_t *acmod)
Advance the frame index.
Definition: acmod.c:899
mfcc_t ** acmod_get_frame(acmod_t *acmod, int *inout_frame_idx)
Get a frame of dynamic feature data.
Definition: acmod.c:1088
int acmod_set_mfcfh(acmod_t *acmod, FILE *logfh)
Start logging MFCCs to a filehandle.
Definition: acmod.c:375
int acmod_set_insenfh(acmod_t *acmod, FILE *senfh)
Set up a senone score dump file for input.
Definition: acmod.c:864
int acmod_read_scores(acmod_t *acmod)
Read one frame of scores from senone score dump file.
Definition: acmod.c:1012
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
Definition: acmod.c:1106
int acmod_write_scores(acmod_t *acmod, int n_active, uint8 const *active, int16 const *senscr, FILE *senfh)
Write a frame of senone scores to a dump file.
Definition: acmod.c:911
int acmod_set_rawfh(acmod_t *acmod, FILE *logfh)
Start logging raw audio to a filehandle.
Definition: acmod.c:387
int acmod_rewind(acmod_t *acmod)
Rewind the current utterance, allowing it to be rescored.
Definition: acmod.c:877
int32 acmod_stream_offset(acmod_t *acmod)
Get the offset of the utterance start of the current stream, helpful for stream-wide timing.
Definition: acmod.c:1308
void acmod_set_rawdata_size(acmod_t *acmod, int32 size)
Sets the limit of the raw audio data to store.
Definition: acmod.c:1321
int acmod_process_cep(acmod_t *acmod, mfcc_t ***inout_cep, int *inout_n_frames, int full_utt)
Feed acoustic feature data into the acoustic model for scoring.
Definition: acmod.c:699
int acmod_start_utt(acmod_t *acmod)
Mark the start of an utterance.
Definition: acmod.c:423
int acmod_process_feat(acmod_t *acmod, mfcc_t **feat)
Feed dynamic feature data into the acoustic model for scoring.
Definition: acmod.c:797
int acmod_set_senfh(acmod_t *acmod, FILE *logfh)
Start logging senone scores to a filehandle.
Definition: acmod.c:364
int acmod_write_senfh_header(acmod_t *acmod, FILE *logfh)
Write senone dump file header.
Definition: acmod.c:350
ps_mllr_t * acmod_update_mllr(acmod_t *acmod, ps_mllr_t *mllr)
Adapt acoustic model using a linear transform.
Definition: acmod.c:339
int32 acmod_flags2list(acmod_t *acmod)
Build active list from.
Definition: acmod.c:1254
void acmod_start_stream(acmod_t *acmod)
Reset the current stream.
Definition: acmod.c:1314
acmod_t * acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
Initialize an acoustic model.
Definition: acmod.c:228
void acmod_clear_active(acmod_t *acmod)
Clear set of active senones.
Definition: acmod.c:1197
Acoustic model structures for PocketSphinx.
@ ACMOD_IDLE
Not in an utterance.
Definition: acmod.h:68
@ ACMOD_PROCESSING
Utterance in progress.
Definition: acmod.h:70
@ ACMOD_ENDED
Utterance ended, still buffering.
Definition: acmod.h:71
@ ACMOD_STARTED
Utterance started, no data yet.
Definition: acmod.h:69
#define SENSCR_DUMMY
Dummy senone score value for unintentionally active states.
Definition: acmod.h:77
POCKETSPHINX_EXPORT bin_mdef_t * bin_mdef_read(cmd_ln_t *config, const char *filename)
Read a binary mdef from a file.
Definition: bin_mdef.c:323
int bin_mdef_free(bin_mdef_t *m)
Release a pointer to a binary mdef.
Definition: bin_mdef.c:272
#define MAX_N_FRAMES
Maximum number of frames in index, should be in sync with above.
Definition: hmm.h:69
(Sphinx 3.0 specific) A module that wraps up the code of gauden and senone because they are closely r...
POCKETSPHINX_EXPORT ps_mllr_t * ps_mllr_read(char const *file)
Read a speaker-adaptive linear transform from a file.
Definition: ps_mllr.c:52
POCKETSPHINX_EXPORT int ps_mllr_free(ps_mllr_t *mllr)
Release a pointer to a linear transform.
Definition: ps_mllr.c:145
Fast phonetically-tied mixture evaluation.
Acoustic model structure.
Definition: acmod.h:148
ps_mgau_t * mgau
Model parameters.
Definition: acmod.h:161
uint8 state
State of utterance processing.
Definition: acmod.h:187
frame_idx_t n_mfc_frame
Number of frames active in mfc_buf.
Definition: acmod.h:196
bin_mdef_t * mdef
Model definition.
Definition: acmod.h:159
cmd_ln_t * config
Configuration.
Definition: acmod.h:150
feat_t * fcb
Dynamic feature computation.
Definition: acmod.h:156
bitvec_t * senone_active_vec
Active GMMs in current frame.
Definition: acmod.h:166
ps_mllr_t * mllr
Speaker transformation.
Definition: acmod.h:162
int n_senone_active
Number of active GMMs.
Definition: acmod.h:169
fe_t * fe
Acoustic feature computation.
Definition: acmod.h:155
uint8 * senone_active
Array of deltas to active GMMs.
Definition: acmod.h:167
mfcc_t ** mfc_buf
Temporary buffer of acoustic features.
Definition: acmod.h:173
FILE * mfcfh
File for writing acoustic feature data.
Definition: acmod.h:176
frame_idx_t n_feat_alloc
Number of frames allocated in feat_buf.
Definition: acmod.h:198
frame_idx_t n_feat_frame
Number of frames active in feat_buf.
Definition: acmod.h:199
int log_zero
Zero log-probability value.
Definition: acmod.h:170
logmath_t * lmath
Log-math computation.
Definition: acmod.h:151
int16 * senone_scores
GMM scores for current frame.
Definition: acmod.h:165
tmat_t * tmat
Transition matrices.
Definition: acmod.h:160
int senscr_frame
Frame index for senone_scores.
Definition: acmod.h:168
FILE * senfh
File for writing senone score data.
Definition: acmod.h:177
long * framepos
File positions of recent frames in senone file.
Definition: acmod.h:179
uint8 compallsen
Compute all senones?
Definition: acmod.h:188
uint8 grow_feat
Whether to grow feat_buf.
Definition: acmod.h:189
FILE * insenfh
Input senone score file.
Definition: acmod.h:178
frame_idx_t feat_outidx
Start of active frames in feat_buf.
Definition: acmod.h:200
uint8 insen_swap
Whether to swap input senone score.
Definition: acmod.h:190
FILE * rawfh
File for writing raw audio data.
Definition: acmod.h:175
frame_idx_t output_frame
Index of next frame of dynamic features.
Definition: acmod.h:194
frame_idx_t utt_start_frame
Index of the utterance start in the stream, all timings are relative to that.
Definition: acmod.h:192
frame_idx_t n_mfc_alloc
Number of frames allocated in mfc_buf.
Definition: acmod.h:195
mfcc_t *** feat_buf
Temporary buffer of dynamic features.
Definition: acmod.h:174
frame_idx_t mfc_outidx
Start of active frames in mfc_buf.
Definition: acmod.h:197
An individual HMM among the HMM search space.
int frame_idx
frame counter.
Definition: acmod.h:115
Feature space linear transform structure.
Definition: acmod.h:82
tmat_t * tmat_init(char const *tmatfile, logmath_t *lmath, float64 tpfloor, int32 breport)
Initialize transition matrix.
Definition: tmat.c:134
void tmat_free(tmat_t *t)
RAH, add code to remove memory allocated by tmat_init.
Definition: tmat.c:275