PocketSphinx  0.6
acmod.c
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2008 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 
44 /* System headers. */
45 #include <assert.h>
46 #include <string.h>
47 
48 /* SphinxBase headers. */
49 #include <sphinxbase/prim_type.h>
50 #include <sphinxbase/err.h>
51 #include <sphinxbase/cmd_ln.h>
52 #include <sphinxbase/strfuncs.h>
53 #include <sphinxbase/byteorder.h>
54 #include <sphinxbase/feat.h>
55 #include <sphinxbase/bio.h>
56 
57 /* Local headers. */
58 #include "cmdln_macro.h"
59 #include "acmod.h"
60 #include "s2_semi_mgau.h"
61 #include "ptm_mgau.h"
62 #include "ms_mgau.h"
63 
64 /* Feature and front-end parameters that may be in feat.params */
65 static const arg_t feat_defn[] = {
66  waveform_to_cepstral_command_line_macro(),
67  cepstral_to_feature_command_line_macro(),
68  CMDLN_EMPTY_OPTION
69 };
70 
71 #ifndef WORDS_BIGENDIAN
72 #define WORDS_BIGENDIAN 1
73 #endif
74 
75 static int32 acmod_process_mfcbuf(acmod_t *acmod);
76 
77 static int
78 acmod_init_am(acmod_t *acmod)
79 {
80  char const *mdeffn, *tmatfn, *mllrfn, *hmmdir;
81 
82  /* Read model definition. */
83  if ((mdeffn = cmd_ln_str_r(acmod->config, "-mdef")) == NULL) {
84  if ((hmmdir = cmd_ln_str_r(acmod->config, "-hmm")) == NULL) {
85  E_ERROR("Acoustic model definition is not specified neither with -mdef option nor with -hmm\n");
86  } else {
87  E_ERROR("Folder '%s' does not contain acoustic model definition 'mdef'\n", hmmdir);
88  }
89  return -1;
90  }
91 
92  if ((acmod->mdef = bin_mdef_read(acmod->config, mdeffn)) == NULL) {
93  E_ERROR("Failed to read acoustic model definition from %s\n", mdeffn);
94  return -1;
95  }
96 
97  /* Read transition matrices. */
98  if ((tmatfn = cmd_ln_str_r(acmod->config, "-tmat")) == NULL) {
99  E_ERROR("No tmat file specified\n");
100  return -1;
101  }
102  acmod->tmat = tmat_init(tmatfn, acmod->lmath,
103  cmd_ln_float32_r(acmod->config, "-tmatfloor"),
104  TRUE);
105 
106  /* Read the acoustic models. */
107  if ((cmd_ln_str_r(acmod->config, "-mean") == NULL)
108  || (cmd_ln_str_r(acmod->config, "-var") == NULL)
109  || (cmd_ln_str_r(acmod->config, "-tmat") == NULL)) {
110  E_ERROR("No mean/var/tmat files specified\n");
111  return -1;
112  }
113 
114  if (cmd_ln_str_r(acmod->config, "-senmgau")) {
115  E_INFO("Using general multi-stream GMM computation\n");
116  acmod->mgau = ms_mgau_init(acmod, acmod->lmath, acmod->mdef);
117  if (acmod->mgau == NULL)
118  return -1;
119  }
120  else {
121  E_INFO("Attempting to use SCHMM computation module\n");
122  if ((acmod->mgau = s2_semi_mgau_init(acmod)) == NULL) {
123  E_INFO("Attempting to use PTHMM computation module\n");
124  if ((acmod->mgau = ptm_mgau_init(acmod, acmod->mdef)) == NULL) {
125  E_INFO("Falling back to general multi-stream GMM computation\n");
126  acmod->mgau = ms_mgau_init(acmod, acmod->lmath, acmod->mdef);
127  if (acmod->mgau == NULL)
128  return -1;
129  }
130  }
131  }
132 
133  /* If there is an MLLR transform, apply it. */
134  if ((mllrfn = cmd_ln_str_r(acmod->config, "-mllr"))) {
135  ps_mllr_t *mllr = ps_mllr_read(mllrfn);
136  if (mllr == NULL)
137  return -1;
138  acmod_update_mllr(acmod, mllr);
139  }
140 
141  return 0;
142 }
143 
144 static int
145 acmod_init_feat(acmod_t *acmod)
146 {
147  acmod->fcb =
148  feat_init(cmd_ln_str_r(acmod->config, "-feat"),
149  cmn_type_from_str(cmd_ln_str_r(acmod->config,"-cmn")),
150  cmd_ln_boolean_r(acmod->config, "-varnorm"),
151  agc_type_from_str(cmd_ln_str_r(acmod->config, "-agc")),
152  1, cmd_ln_int32_r(acmod->config, "-ceplen"));
153  if (acmod->fcb == NULL)
154  return -1;
155 
156  if (cmd_ln_str_r(acmod->config, "-lda")) {
157  E_INFO("Reading linear feature transformation from %s\n",
158  cmd_ln_str_r(acmod->config, "-lda"));
159  if (feat_read_lda(acmod->fcb,
160  cmd_ln_str_r(acmod->config, "-lda"),
161  cmd_ln_int32_r(acmod->config, "-ldadim")) < 0)
162  return -1;
163  }
164 
165  if (cmd_ln_str_r(acmod->config, "-svspec")) {
166  int32 **subvecs;
167  E_INFO("Using subvector specification %s\n",
168  cmd_ln_str_r(acmod->config, "-svspec"));
169  if ((subvecs = parse_subvecs(cmd_ln_str_r(acmod->config, "-svspec"))) == NULL)
170  return -1;
171  if ((feat_set_subvecs(acmod->fcb, subvecs)) < 0)
172  return -1;
173  }
174 
175  if (cmd_ln_exists_r(acmod->config, "-agcthresh")
176  && 0 != strcmp(cmd_ln_str_r(acmod->config, "-agc"), "none")) {
177  agc_set_threshold(acmod->fcb->agc_struct,
178  cmd_ln_float32_r(acmod->config, "-agcthresh"));
179  }
180 
181  if (acmod->fcb->cmn_struct
182  && cmd_ln_exists_r(acmod->config, "-cmninit")) {
183  char *c, *cc, *vallist;
184  int32 nvals;
185 
186  vallist = ckd_salloc(cmd_ln_str_r(acmod->config, "-cmninit"));
187  c = vallist;
188  nvals = 0;
189  while (nvals < acmod->fcb->cmn_struct->veclen
190  && (cc = strchr(c, ',')) != NULL) {
191  *cc = '\0';
192  acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c));
193  c = cc + 1;
194  ++nvals;
195  }
196  if (nvals < acmod->fcb->cmn_struct->veclen && *c != '\0') {
197  acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c));
198  }
199  ckd_free(vallist);
200  }
201  return 0;
202 }
203 
204 int
205 acmod_fe_mismatch(acmod_t *acmod, fe_t *fe)
206 {
207  /* Output vector dimension needs to be the same. */
208  if (cmd_ln_int32_r(acmod->config, "-ceplen") != fe_get_output_size(fe)) {
209  E_ERROR("Configured feature length %d doesn't match feature extraction output size %d\n",
210  cmd_ln_int32_r(acmod->config, "-ceplen"),
211  fe_get_output_size(fe));
212  return TRUE;
213  }
214  /* Feature parameters need to be the same. */
215  /* ... */
216  return FALSE;
217 }
218 
219 int
220 acmod_feat_mismatch(acmod_t *acmod, feat_t *fcb)
221 {
222  /* Feature type needs to be the same. */
223  if (0 != strcmp(cmd_ln_str_r(acmod->config, "-feat"), feat_name(fcb)))
224  return TRUE;
225  /* Input vector dimension needs to be the same. */
226  if (cmd_ln_int32_r(acmod->config, "-ceplen") != feat_cepsize(fcb))
227  return TRUE;
228  /* FIXME: Need to check LDA and stuff too. */
229  return FALSE;
230 }
231 
232 acmod_t *
233 acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
234 {
235  acmod_t *acmod;
236  char const *featparams;
237 
238  acmod = ckd_calloc(1, sizeof(*acmod));
239  acmod->config = cmd_ln_retain(config);
240  acmod->lmath = lmath;
241  acmod->state = ACMOD_IDLE;
242 
243  /* Look for feat.params in acoustic model dir. */
244  if ((featparams = cmd_ln_str_r(acmod->config, "-featparams"))) {
245  if (cmd_ln_parse_file_r(acmod->config, feat_defn, featparams, FALSE) != NULL) {
246  E_INFO("Parsed model-specific feature parameters from %s\n", featparams);
247  }
248  }
249 
250  /* Initialize feature computation. */
251  if (fe) {
252  if (acmod_fe_mismatch(acmod, fe))
253  goto error_out;
254  fe_retain(fe);
255  acmod->fe = fe;
256  }
257  else {
258  /* Initialize a new front end. */
259  acmod->fe = fe_init_auto_r(config);
260  if (acmod->fe == NULL)
261  goto error_out;
262  if (acmod_fe_mismatch(acmod, acmod->fe))
263  goto error_out;
264  }
265  if (fcb) {
266  if (acmod_feat_mismatch(acmod, fcb))
267  goto error_out;
268  feat_retain(fcb);
269  acmod->fcb = fcb;
270  }
271  else {
272  /* Initialize a new fcb. */
273  if (acmod_init_feat(acmod) < 0)
274  goto error_out;
275  }
276 
277  /* Load acoustic model parameters. */
278  if (acmod_init_am(acmod) < 0)
279  goto error_out;
280 
281 
282  /* The MFCC buffer needs to be at least as large as the dynamic
283  * feature window. */
284  acmod->n_mfc_alloc = acmod->fcb->window_size * 2 + 1;
285  acmod->mfc_buf = (mfcc_t **)
286  ckd_calloc_2d(acmod->n_mfc_alloc, acmod->fcb->cepsize,
287  sizeof(**acmod->mfc_buf));
288 
289  /* Feature buffer has to be at least as large as MFCC buffer. */
290  acmod->n_feat_alloc = acmod->n_mfc_alloc + cmd_ln_int32_r(config, "-pl_window");
291  acmod->feat_buf = feat_array_alloc(acmod->fcb, acmod->n_feat_alloc);
292  acmod->framepos = ckd_calloc(acmod->n_feat_alloc, sizeof(*acmod->framepos));
293 
294  /* Senone computation stuff. */
295  acmod->senone_scores = ckd_calloc(bin_mdef_n_sen(acmod->mdef),
296  sizeof(*acmod->senone_scores));
297  acmod->senone_active_vec = bitvec_alloc(bin_mdef_n_sen(acmod->mdef));
298  acmod->senone_active = ckd_calloc(bin_mdef_n_sen(acmod->mdef),
299  sizeof(*acmod->senone_active));
300  acmod->log_zero = logmath_get_zero(acmod->lmath);
301  acmod->compallsen = cmd_ln_boolean_r(config, "-compallsen");
302  return acmod;
303 
304 error_out:
305  acmod_free(acmod);
306  return NULL;
307 }
308 
309 void
311 {
312  if (acmod == NULL)
313  return;
314 
315  feat_free(acmod->fcb);
316  fe_free(acmod->fe);
317  cmd_ln_free_r(acmod->config);
318 
319  if (acmod->mfc_buf)
320  ckd_free_2d((void **)acmod->mfc_buf);
321  if (acmod->feat_buf)
322  feat_array_free(acmod->feat_buf);
323 
324  if (acmod->mfcfh)
325  fclose(acmod->mfcfh);
326  if (acmod->rawfh)
327  fclose(acmod->rawfh);
328  if (acmod->senfh)
329  fclose(acmod->senfh);
330 
331  ckd_free(acmod->framepos);
332  ckd_free(acmod->senone_scores);
333  ckd_free(acmod->senone_active_vec);
334  ckd_free(acmod->senone_active);
335 
336  if (acmod->mdef)
337  bin_mdef_free(acmod->mdef);
338  if (acmod->tmat)
339  tmat_free(acmod->tmat);
340  if (acmod->mgau)
341  ps_mgau_free(acmod->mgau);
342  if (acmod->mllr)
343  ps_mllr_free(acmod->mllr);
344 
345  ckd_free(acmod);
346 }
347 
348 ps_mllr_t *
350 {
351  if (acmod->mllr)
352  ps_mllr_free(acmod->mllr);
353  acmod->mllr = mllr;
354  ps_mgau_transform(acmod->mgau, mllr);
355 
356  return mllr;
357 }
358 
359 int
360 acmod_write_senfh_header(acmod_t *acmod, FILE *logfh)
361 {
362  char nsenstr[64], logbasestr[64];
363 
364  sprintf(nsenstr, "%d", bin_mdef_n_sen(acmod->mdef));
365  sprintf(logbasestr, "%f", logmath_get_base(acmod->lmath));
366  return bio_writehdr(logfh,
367  "version", "0.1",
368  "mdef_file", cmd_ln_str_r(acmod->config, "-mdef"),
369  "n_sen", nsenstr,
370  "logbase", logbasestr, NULL);
371 }
372 
373 int
374 acmod_set_senfh(acmod_t *acmod, FILE *logfh)
375 {
376  if (acmod->senfh)
377  fclose(acmod->senfh);
378  acmod->senfh = logfh;
379  if (logfh == NULL)
380  return 0;
381  return acmod_write_senfh_header(acmod, logfh);
382 }
383 
384 int
385 acmod_set_mfcfh(acmod_t *acmod, FILE *logfh)
386 {
387  int rv = 0;
388 
389  if (acmod->mfcfh)
390  fclose(acmod->mfcfh);
391  acmod->mfcfh = logfh;
392  fwrite(&rv, 4, 1, acmod->mfcfh);
393  return rv;
394 }
395 
396 int
397 acmod_set_rawfh(acmod_t *acmod, FILE *logfh)
398 {
399  if (acmod->rawfh)
400  fclose(acmod->rawfh);
401  acmod->rawfh = logfh;
402  return 0;
403 }
404 
405 void
406 acmod_grow_feat_buf(acmod_t *acmod, int nfr)
407 {
408  if (nfr > MAX_N_FRAMES)
409  E_FATAL("Decoder can not process more than %d frames at once, requested %d\n",
410  MAX_N_FRAMES, nfr);
411 
412  acmod->feat_buf = feat_array_realloc(acmod->fcb, acmod->feat_buf, acmod->n_feat_alloc, nfr);
413  acmod->framepos = ckd_realloc(acmod->framepos,
414  nfr * sizeof(*acmod->framepos));
415  acmod->n_feat_alloc = nfr;
416 }
417 
418 int
419 acmod_set_grow(acmod_t *acmod, int grow_feat)
420 {
421  int tmp = acmod->grow_feat;
422  acmod->grow_feat = grow_feat;
423 
424  /* Expand feat_buf to a reasonable size to start with. */
425  if (grow_feat && acmod->n_feat_alloc < 128)
426  acmod_grow_feat_buf(acmod, 128);
427 
428  return tmp;
429 }
430 
431 int
433 {
434  fe_start_utt(acmod->fe);
435  acmod->state = ACMOD_STARTED;
436  acmod->n_mfc_frame = 0;
437  acmod->n_feat_frame = 0;
438  acmod->mfc_outidx = 0;
439  acmod->feat_outidx = 0;
440  acmod->output_frame = 0;
441  acmod->senscr_frame = -1;
442  acmod->n_senone_active = 0;
443  acmod->mgau->frame_idx = 0;
444  return 0;
445 }
446 
447 int
449 {
450  int32 nfr = 0;
451 
452  acmod->state = ACMOD_ENDED;
453  if (acmod->n_mfc_frame < acmod->n_mfc_alloc) {
454  int inptr;
455  /* Where to start writing them (circular buffer) */
456  inptr = (acmod->mfc_outidx + acmod->n_mfc_frame) % acmod->n_mfc_alloc;
457  /* nfr is always either zero or one. */
458  fe_end_utt(acmod->fe, acmod->mfc_buf[inptr], &nfr);
459  acmod->n_mfc_frame += nfr;
460  /* Process whatever's left, and any leadout. */
461  if (nfr)
462  nfr = acmod_process_mfcbuf(acmod);
463  }
464  if (acmod->mfcfh) {
465  int32 outlen, rv;
466  outlen = (int32) ((FTELL(acmod->mfcfh) - 4) / 4);
467  if (!WORDS_BIGENDIAN)
468  SWAP_INT32(&outlen);
469  /* Try to seek and write */
470  if ((rv = FSEEK(acmod->mfcfh, 0, SEEK_SET)) == 0) {
471  fwrite(&outlen, 4, 1, acmod->mfcfh);
472  }
473  fclose(acmod->mfcfh);
474  acmod->mfcfh = NULL;
475  }
476  if (acmod->rawfh) {
477  fclose(acmod->rawfh);
478  acmod->rawfh = NULL;
479  }
480 
481  if (acmod->senfh) {
482  fclose(acmod->senfh);
483  acmod->senfh = NULL;
484  }
485 
486  return nfr;
487 }
488 
489 static int
490 acmod_log_mfc(acmod_t *acmod,
491  mfcc_t **cep, int n_frames)
492 {
493  int i, n;
494  int32 *ptr = (int32 *)cep[0];
495 
496  n = n_frames * feat_cepsize(acmod->fcb);
497  /* Swap bytes. */
498  if (!WORDS_BIGENDIAN) {
499  for (i = 0; i < (n * sizeof(mfcc_t)); ++i) {
500  SWAP_INT32(ptr + i);
501  }
502  }
503  /* Write features. */
504  if (fwrite(cep[0], sizeof(mfcc_t), n, acmod->mfcfh) != n) {
505  E_ERROR_SYSTEM("Failed to write %d values to log file", n);
506  }
507 
508  /* Swap them back. */
509  if (!WORDS_BIGENDIAN) {
510  for (i = 0; i < (n * sizeof(mfcc_t)); ++i) {
511  SWAP_INT32(ptr + i);
512  }
513  }
514  return 0;
515 }
516 
517 static int
518 acmod_process_full_cep(acmod_t *acmod,
519  mfcc_t ***inout_cep,
520  int *inout_n_frames)
521 {
522  int32 nfr;
523 
524  /* Write to log file. */
525  if (acmod->mfcfh)
526  acmod_log_mfc(acmod, *inout_cep, *inout_n_frames);
527 
528  /* Resize feat_buf to fit. */
529  if (acmod->n_feat_alloc < *inout_n_frames) {
530 
531  if (*inout_n_frames > MAX_N_FRAMES)
532  E_FATAL("Batch processing can not process more than %d frames at once, requested %d\n",
533  MAX_N_FRAMES, *inout_n_frames);
534 
535  feat_array_free(acmod->feat_buf);
536  acmod->feat_buf = feat_array_alloc(acmod->fcb, *inout_n_frames);
537  acmod->n_feat_alloc = *inout_n_frames;
538  acmod->n_feat_frame = 0;
539  acmod->feat_outidx = 0;
540  }
541  /* Make dynamic features. */
542  nfr = feat_s2mfc2feat_live(acmod->fcb, *inout_cep, inout_n_frames,
543  TRUE, TRUE, acmod->feat_buf);
544  acmod->n_feat_frame = nfr;
545  assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
546  *inout_cep += *inout_n_frames;
547  *inout_n_frames = 0;
548  return nfr;
549 }
550 
551 static int
552 acmod_process_full_raw(acmod_t *acmod,
553  int16 const **inout_raw,
554  size_t *inout_n_samps)
555 {
556  int32 nfr, ntail;
557  mfcc_t **cepptr;
558 
559  /* Write to logging file if any. */
560  if (acmod->rawfh)
561  fwrite(*inout_raw, 2, *inout_n_samps, acmod->rawfh);
562  /* Resize mfc_buf to fit. */
563  if (fe_process_frames(acmod->fe, NULL, inout_n_samps, NULL, &nfr) < 0)
564  return -1;
565  if (acmod->n_mfc_alloc < nfr + 1) {
566  ckd_free_2d(acmod->mfc_buf);
567  acmod->mfc_buf = ckd_calloc_2d(nfr + 1, fe_get_output_size(acmod->fe),
568  sizeof(**acmod->mfc_buf));
569  acmod->n_mfc_alloc = nfr + 1;
570  }
571  acmod->n_mfc_frame = 0;
572  acmod->mfc_outidx = 0;
573  fe_start_utt(acmod->fe);
574  if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps,
575  acmod->mfc_buf, &nfr) < 0)
576  return -1;
577  fe_end_utt(acmod->fe, acmod->mfc_buf[nfr], &ntail);
578  nfr += ntail;
579 
580  cepptr = acmod->mfc_buf;
581  nfr = acmod_process_full_cep(acmod, &cepptr, &nfr);
582  acmod->n_mfc_frame = 0;
583  return nfr;
584 }
585 
589 static int32
590 acmod_process_mfcbuf(acmod_t *acmod)
591 {
592  mfcc_t **mfcptr;
593  int32 ncep;
594 
595  ncep = acmod->n_mfc_frame;
596  /* Also do this in two parts because of the circular mfc_buf. */
597  if (acmod->mfc_outidx + ncep > acmod->n_mfc_alloc) {
598  int32 ncep1 = acmod->n_mfc_alloc - acmod->mfc_outidx;
599  int saved_state = acmod->state;
600 
601  /* Make sure we don't end the utterance here. */
602  if (acmod->state == ACMOD_ENDED)
603  acmod->state = ACMOD_PROCESSING;
604  mfcptr = acmod->mfc_buf + acmod->mfc_outidx;
605  ncep1 = acmod_process_cep(acmod, &mfcptr, &ncep1, FALSE);
606  /* It's possible that not all available frames were filled. */
607  ncep -= ncep1;
608  acmod->n_mfc_frame -= ncep1;
609  acmod->mfc_outidx += ncep1;
610  acmod->mfc_outidx %= acmod->n_mfc_alloc;
611  /* Restore original state (could this really be the end) */
612  acmod->state = saved_state;
613  }
614  mfcptr = acmod->mfc_buf + acmod->mfc_outidx;
615  ncep = acmod_process_cep(acmod, &mfcptr, &ncep, FALSE);
616  acmod->n_mfc_frame -= ncep;
617  acmod->mfc_outidx += ncep;
618  acmod->mfc_outidx %= acmod->n_mfc_alloc;
619  return ncep;
620 }
621 
622 int
624  int16 const **inout_raw,
625  size_t *inout_n_samps,
626  int full_utt)
627 {
628  int32 ncep;
629 
630  /* If this is a full utterance, process it all at once. */
631  if (full_utt)
632  return acmod_process_full_raw(acmod, inout_raw, inout_n_samps);
633 
634  /* Append MFCCs to the end of any that are previously in there
635  * (in practice, there will probably be none) */
636  if (inout_n_samps && *inout_n_samps) {
637  int16 const *prev_audio_inptr = *inout_raw;
638  int inptr;
639 
640  /* Total number of frames available. */
641  ncep = acmod->n_mfc_alloc - acmod->n_mfc_frame;
642  /* Where to start writing them (circular buffer) */
643  inptr = (acmod->mfc_outidx + acmod->n_mfc_frame) % acmod->n_mfc_alloc;
644 
645  /* Write them in two (or more) parts if there is wraparound. */
646  while (inptr + ncep > acmod->n_mfc_alloc) {
647  int32 ncep1 = acmod->n_mfc_alloc - inptr;
648  if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps,
649  acmod->mfc_buf + inptr, &ncep1) < 0)
650  return -1;
651  /* Write to logging file if any. */
652  if (acmod->rawfh) {
653  fwrite(prev_audio_inptr, 2,
654  *inout_raw - prev_audio_inptr,
655  acmod->rawfh);
656  prev_audio_inptr = *inout_raw;
657  }
658  /* ncep1 now contains the number of frames actually
659  * processed. This is a good thing, but it means we
660  * actually still might have some room left at the end of
661  * the buffer, hence the while loop. Unfortunately it
662  * also means that in the case where we are really
663  * actually done, we need to get out totally, hence the
664  * goto. */
665  acmod->n_mfc_frame += ncep1;
666  ncep -= ncep1;
667  inptr += ncep1;
668  inptr %= acmod->n_mfc_alloc;
669  if (ncep1 == 0)
670  goto alldone;
671  }
672  assert(inptr + ncep <= acmod->n_mfc_alloc);
673  if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps,
674  acmod->mfc_buf + inptr, &ncep) < 0)
675  return -1;
676  /* Write to logging file if any. */
677  if (acmod->rawfh) {
678  fwrite(prev_audio_inptr, 2,
679  *inout_raw - prev_audio_inptr, acmod->rawfh);
680  prev_audio_inptr = *inout_raw;
681  }
682  acmod->n_mfc_frame += ncep;
683  alldone:
684  ;
685  }
686 
687  /* Hand things off to acmod_process_cep. */
688  return acmod_process_mfcbuf(acmod);
689 }
690 
691 int
693  mfcc_t ***inout_cep,
694  int *inout_n_frames,
695  int full_utt)
696 {
697  int32 nfeat, ncep, inptr;
698  int orig_n_frames;
699 
700  /* If this is a full utterance, process it all at once. */
701  if (full_utt)
702  return acmod_process_full_cep(acmod, inout_cep, inout_n_frames);
703 
704  /* Write to log file. */
705  if (acmod->mfcfh)
706  acmod_log_mfc(acmod, *inout_cep, *inout_n_frames);
707 
708  /* Maximum number of frames we're going to generate. */
709  orig_n_frames = ncep = nfeat = *inout_n_frames;
710 
711  /* FIXME: This behaviour isn't guaranteed... */
712  if (acmod->state == ACMOD_ENDED)
713  nfeat += feat_window_size(acmod->fcb);
714  else if (acmod->state == ACMOD_STARTED)
715  nfeat -= feat_window_size(acmod->fcb);
716 
717  /* Clamp number of features to fit available space. */
718  if (nfeat > acmod->n_feat_alloc - acmod->n_feat_frame) {
719  /* Grow it as needed - we have to grow it at the end of an
720  * utterance because we can't return a short read there. */
721  if (acmod->grow_feat || acmod->state == ACMOD_ENDED)
722  acmod_grow_feat_buf(acmod, acmod->n_feat_alloc + nfeat);
723  else
724  ncep -= (nfeat - (acmod->n_feat_alloc - acmod->n_feat_frame));
725  }
726 
727  /* Where to start writing in the feature buffer. */
728  if (acmod->grow_feat) {
729  /* Grow to avoid wraparound if grow_feat == TRUE. */
730  inptr = acmod->feat_outidx + acmod->n_feat_frame;
731  while (inptr + nfeat >= acmod->n_feat_alloc)
732  acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
733  }
734  else {
735  inptr = (acmod->feat_outidx + acmod->n_feat_frame) % acmod->n_feat_alloc;
736  }
737 
738 
739  /* FIXME: we can't split the last frame drop properly to be on the bounary, so just return */
740  if (inptr + nfeat > acmod->n_feat_alloc && acmod->state == ACMOD_ENDED) {
741  *inout_n_frames -= ncep;
742  *inout_cep += ncep;
743  return 0;
744  }
745 
746  /* Write them in two parts if there is wraparound. */
747  if (inptr + nfeat > acmod->n_feat_alloc) {
748  int32 ncep1 = acmod->n_feat_alloc - inptr;
749 
750  /* Make sure we don't end the utterance here. */
751  nfeat = feat_s2mfc2feat_live(acmod->fcb, *inout_cep,
752  &ncep1,
753  (acmod->state == ACMOD_STARTED),
754  FALSE,
755  acmod->feat_buf + inptr);
756  if (nfeat < 0)
757  return -1;
758  /* Move the output feature pointer forward. */
759  acmod->n_feat_frame += nfeat;
760  assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
761  inptr += nfeat;
762  inptr %= acmod->n_feat_alloc;
763  /* Move the input feature pointers forward. */
764  *inout_n_frames -= ncep1;
765  *inout_cep += ncep1;
766  ncep -= ncep1;
767  }
768 
769  nfeat = feat_s2mfc2feat_live(acmod->fcb, *inout_cep,
770  &ncep,
771  (acmod->state == ACMOD_STARTED),
772  (acmod->state == ACMOD_ENDED),
773  acmod->feat_buf + inptr);
774  if (nfeat < 0)
775  return -1;
776  acmod->n_feat_frame += nfeat;
777  assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
778  /* Move the input feature pointers forward. */
779  *inout_n_frames -= ncep;
780  *inout_cep += ncep;
781  if (acmod->state == ACMOD_STARTED)
782  acmod->state = ACMOD_PROCESSING;
783  return orig_n_frames - *inout_n_frames;
784 }
785 
786 int
788  mfcc_t **feat)
789 {
790  int i, inptr;
791 
792  if (acmod->n_feat_frame == acmod->n_feat_alloc) {
793  if (acmod->grow_feat)
794  acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
795  else
796  return 0;
797  }
798 
799  if (acmod->grow_feat) {
800  /* Grow to avoid wraparound if grow_feat == TRUE. */
801  inptr = acmod->feat_outidx + acmod->n_feat_frame;
802  while (inptr + 1 >= acmod->n_feat_alloc)
803  acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
804  }
805  else {
806  inptr = (acmod->feat_outidx + acmod->n_feat_frame) % acmod->n_feat_alloc;
807  }
808  for (i = 0; i < feat_dimension1(acmod->fcb); ++i)
809  memcpy(acmod->feat_buf[inptr][i],
810  feat[i], feat_dimension2(acmod->fcb, i) * sizeof(**feat));
811  ++acmod->n_feat_frame;
812  assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
813 
814  return 1;
815 }
816 
817 static int
818 acmod_read_senfh_header(acmod_t *acmod)
819 {
820  char **name, **val;
821  int32 swap;
822  int i;
823 
824  if (bio_readhdr(acmod->insenfh, &name, &val, &swap) < 0)
825  goto error_out;
826  for (i = 0; name[i] != NULL; ++i) {
827  if (!strcmp(name[i], "n_sen")) {
828  if (atoi(val[i]) != bin_mdef_n_sen(acmod->mdef)) {
829  E_ERROR("Number of senones in senone file (%d) does not match mdef (%d)\n",
830  atoi(val[i]), bin_mdef_n_sen(acmod->mdef));
831  goto error_out;
832  }
833  }
834  if (!strcmp(name[i], "logbase")) {
835  if (abs(atof(val[i]) - logmath_get_base(acmod->lmath)) > 0.001) {
836  E_ERROR("Logbase in senone file (%f) does not match acmod (%f)\n",
837  atof(val[i]), logmath_get_base(acmod->lmath));
838  goto error_out;
839  }
840  }
841  }
842  acmod->insen_swap = swap;
843  bio_hdrarg_free(name, val);
844  return 0;
845 error_out:
846  bio_hdrarg_free(name, val);
847  return -1;
848 }
849 
850 int
851 acmod_set_insenfh(acmod_t *acmod, FILE *senfh)
852 {
853  acmod->insenfh = senfh;
854  if (senfh == NULL) {
855  acmod->n_feat_frame = 0;
856  acmod->compallsen = cmd_ln_boolean_r(acmod->config, "-compallsen");
857  return 0;
858  }
859  acmod->compallsen = TRUE;
860  return acmod_read_senfh_header(acmod);
861 }
862 
863 int
865 {
866  /* If the feature buffer is circular, this is not possible. */
867  if (acmod->output_frame > acmod->n_feat_alloc) {
868  E_ERROR("Circular feature buffer cannot be rewound (output frame %d, alloc %d)\n",
869  acmod->output_frame, acmod->n_feat_alloc);
870  return -1;
871  }
872 
873  /* Frames consumed + frames available */
874  acmod->n_feat_frame = acmod->output_frame + acmod->n_feat_frame;
875 
876  /* Reset output pointers. */
877  acmod->feat_outidx = 0;
878  acmod->output_frame = 0;
879  acmod->senscr_frame = -1;
880  acmod->mgau->frame_idx = 0;
881 
882  return 0;
883 }
884 
885 int
887 {
888  /* Advance the output pointers. */
889  if (++acmod->feat_outidx == acmod->n_feat_alloc)
890  acmod->feat_outidx = 0;
891  --acmod->n_feat_frame;
892  ++acmod->mgau->frame_idx;
893 
894  return ++acmod->output_frame;
895 }
896 
897 int
898 acmod_write_scores(acmod_t *acmod, int n_active, uint8 const *active,
899  int16 const *senscr, FILE *senfh)
900 {
901  int16 n_active2;
902 
903  /* Uncompressed frame format:
904  *
905  * (2 bytes) n_active: Number of active senones
906  * If all senones active:
907  * (n_active * 2 bytes) scores of active senones
908  *
909  * Otherwise:
910  * (2 bytes) n_active: Number of active senones
911  * (n_active bytes) deltas to active senones
912  * (n_active * 2 bytes) scores of active senones
913  */
914  n_active2 = n_active;
915  if (fwrite(&n_active2, 2, 1, senfh) != 1)
916  goto error_out;
917  if (n_active == bin_mdef_n_sen(acmod->mdef)) {
918  if (fwrite(senscr, 2, n_active, senfh) != n_active)
919  goto error_out;
920  }
921  else {
922  int i, n;
923  if (fwrite(active, 1, n_active, senfh) != n_active)
924  goto error_out;
925  for (i = n = 0; i < n_active; ++i) {
926  n += active[i];
927  if (fwrite(senscr + n, 2, 1, senfh) != 1)
928  goto error_out;
929  }
930  }
931  return 0;
932 error_out:
933  E_ERROR_SYSTEM("Failed to write frame to senone file");
934  return -1;
935 }
936 
940 static int
941 acmod_read_scores_internal(acmod_t *acmod)
942 {
943  FILE *senfh = acmod->insenfh;
944  int16 n_active;
945  int rv;
946 
947  if (acmod->n_feat_frame == acmod->n_feat_alloc) {
948  if (acmod->grow_feat)
949  acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
950  else
951  return 0;
952  }
953 
954  if (senfh == NULL)
955  return -1;
956  if ((rv = fread(&n_active, 2, 1, senfh)) < 0)
957  goto error_out;
958  else if (rv == 0)
959  return 0;
960 
961  acmod->n_senone_active = n_active;
962  if (acmod->n_senone_active == bin_mdef_n_sen(acmod->mdef)) {
963  if ((rv = fread(acmod->senone_scores, 2,
964  acmod->n_senone_active, senfh)) < 0)
965  goto error_out;
966  else if (rv != acmod->n_senone_active)
967  return 0;
968  }
969  else {
970  int i, n;
971  if ((rv = fread(acmod->senone_active, 1,
972  acmod->n_senone_active, senfh)) < 0)
973  goto error_out;
974  else if (rv != acmod->n_senone_active)
975  return 0;
976  for (i = 0, n = 0; i < acmod->n_senone_active; ++i) {
977  int j, sen = n + acmod->senone_active[i];
978  for (j = n + 1; j < sen; ++j)
979  acmod->senone_scores[j] = SENSCR_DUMMY;
980  if ((rv = fread(acmod->senone_scores + sen, 2, 1, senfh)) < 0)
981  goto error_out;
982  else if (rv == 0)
983  return 0;
984  n = sen;
985  }
986  ++n;
987  while (n < bin_mdef_n_sen(acmod->mdef))
988  acmod->senone_scores[n++] = SENSCR_DUMMY;
989  }
990  return 1;
991 error_out:
992  E_ERROR_SYSTEM("Failed to read frame from senone file");
993  return -1;
994 }
995 
996 int
998 {
999  int inptr, rv;
1000 
1001  if (acmod->grow_feat) {
1002  /* Grow to avoid wraparound if grow_feat == TRUE. */
1003  inptr = acmod->feat_outidx + acmod->n_feat_frame;
1004  /* Has to be +1, otherwise, next time acmod_advance() is
1005  * called, this will wrap around. */
1006  while (inptr + 1 >= acmod->n_feat_alloc)
1007  acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
1008  }
1009  else {
1010  inptr = (acmod->feat_outidx + acmod->n_feat_frame) % acmod->n_feat_alloc;
1011  }
1012 
1013  if ((rv = acmod_read_scores_internal(acmod)) != 1)
1014  return rv;
1015 
1016  /* Set acmod->senscr_frame appropriately so that these scores
1017  get reused below in acmod_score(). */
1018  acmod->senscr_frame = acmod->output_frame + acmod->n_feat_frame;
1019 
1020  E_DEBUG(1,("Frame %d has %d active states\n",
1021  acmod->senscr_frame, acmod->n_senone_active));
1022 
1023  /* Increment the "feature frame counter" and record the file
1024  * position for the relevant frame in the (possibly circular)
1025  * buffer. */
1026  ++acmod->n_feat_frame;
1027  acmod->framepos[inptr] = FTELL(acmod->insenfh);
1028 
1029  return 1;
1030 }
1031 
1032 static int
1033 calc_frame_idx(acmod_t *acmod, int *inout_frame_idx)
1034 {
1035  int frame_idx;
1036 
1037  /* Calculate the absolute frame index to be scored. */
1038  if (inout_frame_idx == NULL)
1039  frame_idx = acmod->output_frame;
1040  else if (*inout_frame_idx < 0)
1041  frame_idx = acmod->output_frame + 1 + *inout_frame_idx;
1042  else
1043  frame_idx = *inout_frame_idx;
1044 
1045  return frame_idx;
1046 }
1047 
1048 static int
1049 calc_feat_idx(acmod_t *acmod, int frame_idx)
1050 {
1051  int n_backfr, feat_idx;
1052 
1053  n_backfr = acmod->n_feat_alloc - acmod->n_feat_frame;
1054  if (frame_idx < 0 || acmod->output_frame - frame_idx > n_backfr) {
1055  E_ERROR("Frame %d outside queue of %d frames, %d alloc (%d > %d), cannot score\n",
1056  frame_idx, acmod->n_feat_frame, acmod->n_feat_alloc,
1057  acmod->output_frame - frame_idx, n_backfr);
1058  return -1;
1059  }
1060 
1061  /* Get the index in feat_buf/framepos of the frame to be scored. */
1062  feat_idx = ((acmod->feat_outidx + frame_idx - acmod->output_frame)
1063  % acmod->n_feat_alloc);
1064  if (feat_idx < 0) feat_idx += acmod->n_feat_alloc;
1065 
1066  return feat_idx;
1067 }
1068 
1069 mfcc_t **
1070 acmod_get_frame(acmod_t *acmod, int *inout_frame_idx)
1071 {
1072  int frame_idx, feat_idx;
1073 
1074  /* Calculate the absolute frame index requested. */
1075  frame_idx = calc_frame_idx(acmod, inout_frame_idx);
1076 
1077  /* Calculate position of requested frame in circular buffer. */
1078  if ((feat_idx = calc_feat_idx(acmod, frame_idx)) < 0)
1079  return NULL;
1080 
1081  if (inout_frame_idx)
1082  *inout_frame_idx = frame_idx;
1083 
1084  return acmod->feat_buf[feat_idx];
1085 }
1086 
1087 int16 const *
1088 acmod_score(acmod_t *acmod, int *inout_frame_idx)
1089 {
1090  int frame_idx, feat_idx;
1091 
1092  /* Calculate the absolute frame index to be scored. */
1093  frame_idx = calc_frame_idx(acmod, inout_frame_idx);
1094 
1095  /* If all senones are being computed, or we are using a senone file,
1096  then we can reuse existing scores. */
1097  if ((acmod->compallsen || acmod->insenfh)
1098  && frame_idx == acmod->senscr_frame) {
1099  if (inout_frame_idx)
1100  *inout_frame_idx = frame_idx;
1101  return acmod->senone_scores;
1102  }
1103 
1104  /* Calculate position of requested frame in circular buffer. */
1105  if ((feat_idx = calc_feat_idx(acmod, frame_idx)) < 0)
1106  return NULL;
1107 
1108  /* If there is an input senone file locate the appropriate frame and read it. */
1109  if (acmod->insenfh) {
1110  FSEEK(acmod->insenfh, acmod->framepos[feat_idx], SEEK_SET);
1111  if (acmod_read_scores_internal(acmod) < 0)
1112  return NULL;
1113  }
1114  else {
1115  /* Build active senone list. */
1116  acmod_flags2list(acmod);
1117 
1118  /* Generate scores for the next available frame */
1119  ps_mgau_frame_eval(acmod->mgau,
1120  acmod->senone_scores,
1121  acmod->senone_active,
1122  acmod->n_senone_active,
1123  acmod->feat_buf[feat_idx],
1124  frame_idx,
1125  acmod->compallsen);
1126  }
1127 
1128  if (inout_frame_idx)
1129  *inout_frame_idx = frame_idx;
1130  acmod->senscr_frame = frame_idx;
1131 
1132  /* Dump scores to the senone dump file if one exists. */
1133  if (acmod->senfh) {
1134  if (acmod_write_scores(acmod, acmod->n_senone_active,
1135  acmod->senone_active,
1136  acmod->senone_scores,
1137  acmod->senfh) < 0)
1138  return NULL;
1139  E_DEBUG(1,("Frame %d has %d active states\n", frame_idx, acmod->n_senone_active));
1140  }
1141 
1142  return acmod->senone_scores;
1143 }
1144 
1145 int
1146 acmod_best_score(acmod_t *acmod, int *out_best_senid)
1147 {
1148  int i, best;
1149 
1150  best = SENSCR_DUMMY;
1151  if (acmod->compallsen) {
1152  for (i = 0; i < bin_mdef_n_sen(acmod->mdef); ++i) {
1153  if (acmod->senone_scores[i] < best) {
1154  best = acmod->senone_scores[i];
1155  *out_best_senid = i;
1156  }
1157  }
1158  }
1159  else {
1160  int16 *senscr;
1161  senscr = acmod->senone_scores;
1162  for (i = 0; i < acmod->n_senone_active; ++i) {
1163  senscr += acmod->senone_active[i];
1164  if (*senscr < best) {
1165  best = *senscr;
1166  *out_best_senid = i;
1167  }
1168  }
1169  }
1170  return best;
1171 }
1172 
1173 
1174 void
1176 {
1177  if (acmod->compallsen)
1178  return;
1179  bitvec_clear_all(acmod->senone_active_vec, bin_mdef_n_sen(acmod->mdef));
1180  acmod->n_senone_active = 0;
1181 }
1182 
1183 #define MPX_BITVEC_SET(a,h,i) \
1184  if (hmm_mpx_ssid(h,i) != BAD_SSID) \
1185  bitvec_set((a)->senone_active_vec, hmm_mpx_senid(h,i))
1186 #define NONMPX_BITVEC_SET(a,h,i) \
1187  bitvec_set((a)->senone_active_vec, \
1188  hmm_nonmpx_senid(h,i))
1189 
1190 void
1192 {
1193  int i;
1194 
1195  if (acmod->compallsen)
1196  return;
1197  if (hmm_is_mpx(hmm)) {
1198  switch (hmm_n_emit_state(hmm)) {
1199  case 5:
1200  MPX_BITVEC_SET(acmod, hmm, 4);
1201  MPX_BITVEC_SET(acmod, hmm, 3);
1202  case 3:
1203  MPX_BITVEC_SET(acmod, hmm, 2);
1204  MPX_BITVEC_SET(acmod, hmm, 1);
1205  MPX_BITVEC_SET(acmod, hmm, 0);
1206  break;
1207  default:
1208  for (i = 0; i < hmm_n_emit_state(hmm); ++i) {
1209  MPX_BITVEC_SET(acmod, hmm, i);
1210  }
1211  }
1212  }
1213  else {
1214  switch (hmm_n_emit_state(hmm)) {
1215  case 5:
1216  NONMPX_BITVEC_SET(acmod, hmm, 4);
1217  NONMPX_BITVEC_SET(acmod, hmm, 3);
1218  case 3:
1219  NONMPX_BITVEC_SET(acmod, hmm, 2);
1220  NONMPX_BITVEC_SET(acmod, hmm, 1);
1221  NONMPX_BITVEC_SET(acmod, hmm, 0);
1222  break;
1223  default:
1224  for (i = 0; i < hmm_n_emit_state(hmm); ++i) {
1225  NONMPX_BITVEC_SET(acmod, hmm, i);
1226  }
1227  }
1228  }
1229 }
1230 
1231 int32
1233 {
1234  int32 w, l, n, b, total_dists, total_words, extra_bits;
1235  bitvec_t *flagptr;
1236 
1237  total_dists = bin_mdef_n_sen(acmod->mdef);
1238  if (acmod->compallsen) {
1239  acmod->n_senone_active = total_dists;
1240  return total_dists;
1241  }
1242  total_words = total_dists / BITVEC_BITS;
1243  extra_bits = total_dists % BITVEC_BITS;
1244  w = n = l = 0;
1245  for (flagptr = acmod->senone_active_vec; w < total_words; ++w, ++flagptr) {
1246  if (*flagptr == 0)
1247  continue;
1248  for (b = 0; b < BITVEC_BITS; ++b) {
1249  if (*flagptr & (1UL << b)) {
1250  int32 sen = w * BITVEC_BITS + b;
1251  int32 delta = sen - l;
1252  /* Handle excessive deltas "lossily" by adding a few
1253  extra senones to bridge the gap. */
1254  while (delta > 255) {
1255  acmod->senone_active[n++] = 255;
1256  delta -= 255;
1257  }
1258  acmod->senone_active[n++] = delta;
1259  l = sen;
1260  }
1261  }
1262  }
1263 
1264  for (b = 0; b < extra_bits; ++b) {
1265  if (*flagptr & (1UL << b)) {
1266  int32 sen = w * BITVEC_BITS + b;
1267  int32 delta = sen - l;
1268  /* Handle excessive deltas "lossily" by adding a few
1269  extra senones to bridge the gap. */
1270  while (delta > 255) {
1271  acmod->senone_active[n++] = 255;
1272  delta -= 255;
1273  }
1274  acmod->senone_active[n++] = delta;
1275  l = sen;
1276  }
1277  }
1278 
1279  acmod->n_senone_active = n;
1280  E_DEBUG(1, ("acmod_flags2list: %d active in frame %d\n",
1281  acmod->n_senone_active, acmod->output_frame));
1282  return n;
1283 }
(Sphinx 3.0 specific) A module that wraps up the code of gauden and senone because they are closely r...
FILE * insenfh
Input senone score file.
Definition: acmod.h:178
uint8 grow_feat
Whether to grow feat_buf.
Definition: acmod.h:184
ps_mgau_t * mgau
Model parameters.
Definition: acmod.h:161
int acmod_read_scores(acmod_t *acmod)
Read one frame of scores from senone score dump file.
Definition: acmod.c:997
Not in an utterance.
Definition: acmod.h:68
uint8 * senone_active
Array of deltas to active GMMs.
Definition: acmod.h:167
long * framepos
File positions of recent frames in senone file.
Definition: acmod.h:179
Utterance started, no data yet.
Definition: acmod.h:69
int acmod_set_insenfh(acmod_t *acmod, FILE *senfh)
Set up a senone score dump file for input.
Definition: acmod.c:851
int acmod_rewind(acmod_t *acmod)
Rewind the current utterance, allowing it to be rescored.
Definition: acmod.c:864
int16 * senone_scores
GMM scores for current frame.
Definition: acmod.h:165
ps_mllr_t * acmod_update_mllr(acmod_t *acmod, ps_mllr_t *mllr)
Adapt acoustic model using a linear transform.
Definition: acmod.c:349
int acmod_process_cep(acmod_t *acmod, mfcc_t ***inout_cep, int *inout_n_frames, int full_utt)
Feed acoustic feature data into the acoustic model for scoring.
Definition: acmod.c:692
Utterance in progress.
Definition: acmod.h:70
int n_senone_active
Number of active GMMs.
Definition: acmod.h:169
An individual HMM among the HMM search space.
logmath_t * lmath
Log-math computation.
Definition: acmod.h:151
fe_t * fe
Acoustic feature computation.
Definition: acmod.h:155
frame_idx_t n_mfc_frame
Number of frames active in mfc_buf.
Definition: acmod.h:189
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
Definition: acmod.c:1191
Utterance ended, still buffering.
Definition: acmod.h:71
float32 *** b
Bias part of mean transformations.
Definition: acmod.h:88
FILE * rawfh
File for writing raw audio data.
Definition: acmod.h:175
mfcc_t ** mfc_buf
Temporary buffer of acoustic features.
Definition: acmod.h:173
void tmat_free(tmat_t *t)
RAH, add code to remove memory allocated by tmat_init.
Definition: tmat.c:332
frame_idx_t n_feat_alloc
Number of frames allocated in feat_buf.
Definition: acmod.h:191
mfcc_t *** feat_buf
Temporary buffer of dynamic features.
Definition: acmod.h:174
tmat_t * tmat_init(char const *tmatfile, logmath_t *lmath, float64 tpfloor, int32 breport)
Initialize transition matrix.
Definition: tmat.c:191
int acmod_set_senfh(acmod_t *acmod, FILE *logfh)
Start logging senone scores to a filehandle.
Definition: acmod.c:374
#define MAX_N_FRAMES
Maximum number of frames in index, should be in sync with above.
Definition: hmm.h:73
int acmod_set_rawfh(acmod_t *acmod, FILE *logfh)
Start logging raw audio to a filehandle.
Definition: acmod.c:397
ps_mllr_t * mllr
Speaker transformation.
Definition: acmod.h:162
Fast phonetically-tied mixture evaluation.
POCKETSPHINX_EXPORT ps_mllr_t * ps_mllr_read(char const *file)
Read a speaker-adaptive linear transform from a file.
Definition: ps_mllr.c:52
uint8 compallsen
Compute all senones?
Definition: acmod.h:183
POCKETSPHINX_EXPORT bin_mdef_t * bin_mdef_read(cmd_ln_t *config, const char *filename)
Read a binary mdef from a file.
Definition: bin_mdef.c:323
int acmod_process_feat(acmod_t *acmod, mfcc_t **feat)
Feed dynamic feature data into the acoustic model for scoring.
Definition: acmod.c:787
int acmod_write_senfh_header(acmod_t *acmod, FILE *logfh)
Write senone dump file header.
Definition: acmod.c:360
cmd_ln_t * config
Configuration.
Definition: acmod.h:150
frame_idx_t output_frame
Index of next frame of dynamic features.
Definition: acmod.h:187
int acmod_write_scores(acmod_t *acmod, int n_active, uint8 const *active, int16 const *senscr, FILE *senfh)
Write a frame of senone scores to a dump file.
Definition: acmod.c:898
tmat_t * tmat
Transition matrices.
Definition: acmod.h:160
int32 acmod_flags2list(acmod_t *acmod)
Build active list from.
Definition: acmod.c:1232
POCKETSPHINX_EXPORT int ps_mllr_free(ps_mllr_t *mllr)
Release a pointer to a linear transform.
Definition: ps_mllr.c:145
int acmod_end_utt(acmod_t *acmod)
Mark the end of an utterance.
Definition: acmod.c:448
int acmod_advance(acmod_t *acmod)
Advance the frame index.
Definition: acmod.c:886
uint8 state
State of utterance processing.
Definition: acmod.h:182
int acmod_set_mfcfh(acmod_t *acmod, FILE *logfh)
Start logging MFCCs to a filehandle.
Definition: acmod.c:385
void acmod_free(acmod_t *acmod)
Finalize an acoustic model.
Definition: acmod.c:310
void acmod_clear_active(acmod_t *acmod)
Clear set of active senones.
Definition: acmod.c:1175
#define SENSCR_DUMMY
Dummy senone score value for unintentionally active states.
Definition: acmod.h:77
Feature space linear transform structure.
Definition: acmod.h:82
frame_idx_t feat_outidx
Start of active frames in feat_buf.
Definition: acmod.h:193
mfcc_t ** acmod_get_frame(acmod_t *acmod, int *inout_frame_idx)
Get a frame of dynamic feature data.
Definition: acmod.c:1070
feat_t * fcb
Dynamic feature computation.
Definition: acmod.h:156
int log_zero
Zero log-probability value.
Definition: acmod.h:170
FILE * senfh
File for writing senone score data.
Definition: acmod.h:177
frame_idx_t mfc_outidx
Start of active frames in mfc_buf.
Definition: acmod.h:190
frame_idx_t n_mfc_alloc
Number of frames allocated in mfc_buf.
Definition: acmod.h:188
int acmod_process_raw(acmod_t *acmod, int16 const **inout_raw, size_t *inout_n_samps, int full_utt)
TODO: Set queue length for utterance processing.
Definition: acmod.c:623
int bin_mdef_free(bin_mdef_t *m)
Release a pointer to a binary mdef.
Definition: bin_mdef.c:272
uint8 insen_swap
Whether to swap input senone score.
Definition: acmod.h:185
int acmod_start_utt(acmod_t *acmod)
Mark the start of an utterance.
Definition: acmod.c:432
int senscr_frame
Frame index for senone_scores.
Definition: acmod.h:168
bin_mdef_t * mdef
Model definition.
Definition: acmod.h:159
int acmod_best_score(acmod_t *acmod, int *out_best_senid)
Get best score and senone index for current frame.
Definition: acmod.c:1146
acmod_t * acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
Initialize an acoustic model.
Definition: acmod.c:233
frame_idx_t n_feat_frame
Number of frames active in feat_buf.
Definition: acmod.h:192
Acoustic model structures for PocketSphinx.
FILE * mfcfh
File for writing acoustic feature data.
Definition: acmod.h:176
Acoustic model structure.
Definition: acmod.h:148
int acmod_set_grow(acmod_t *acmod, int grow_feat)
Set memory allocation policy for utterance processing.
Definition: acmod.c:419
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
Definition: acmod.c:1088
int frame_idx
frame counter.
Definition: acmod.h:115
bitvec_t * senone_active_vec
Active GMMs in current frame.
Definition: acmod.h:166