PocketSphinx  0.6
pocketsphinx.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2008 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 /* System headers. */
39 #include <stdio.h>
40 #include <assert.h>
41 
42 /* SphinxBase headers. */
43 #include <sphinxbase/err.h>
44 #include <sphinxbase/strfuncs.h>
45 #include <sphinxbase/filename.h>
46 #include <sphinxbase/pio.h>
47 
48 /* Local headers. */
49 #include "cmdln_macro.h"
50 #include "pocketsphinx_internal.h"
51 #include "ps_lattice_internal.h"
52 #include "phone_loop_search.h"
53 #include "fsg_search_internal.h"
54 #include "ngram_search.h"
55 #include "ngram_search_fwdtree.h"
56 #include "ngram_search_fwdflat.h"
57 
58 static const arg_t ps_args_def[] = {
59  POCKETSPHINX_OPTIONS,
60  CMDLN_EMPTY_OPTION
61 };
62 
63 /* I'm not sure what the portable way to do this is. */
64 static int
65 file_exists(const char *path)
66 {
67  FILE *tmp;
68 
69  tmp = fopen(path, "rb");
70  if (tmp) fclose(tmp);
71  return (tmp != NULL);
72 }
73 
74 static int
75 hmmdir_exists(const char *path)
76 {
77  FILE *tmp;
78  char *mdef = string_join(path, "/mdef", NULL);
79 
80  tmp = fopen(mdef, "rb");
81  if (tmp) fclose(tmp);
82  ckd_free(mdef);
83  return (tmp != NULL);
84 }
85 
86 static void
87 ps_add_file(ps_decoder_t *ps, const char *arg,
88  const char *hmmdir, const char *file)
89 {
90  char *tmp = string_join(hmmdir, "/", file, NULL);
91 
92  if (cmd_ln_str_r(ps->config, arg) == NULL && file_exists(tmp))
93  cmd_ln_set_str_r(ps->config, arg, tmp);
94  ckd_free(tmp);
95 }
96 
97 static void
98 ps_init_defaults(ps_decoder_t *ps)
99 {
100  char const *hmmdir, *lmfile, *dictfile;
101 
102  /* Disable memory mapping on Blackfin (FIXME: should be uClinux in general). */
103 #ifdef __ADSPBLACKFIN__
104  E_INFO("Will not use mmap() on uClinux/Blackfin.");
105  cmd_ln_set_boolean_r(ps->config, "-mmap", FALSE);
106 #endif
107 
108 #ifdef MODELDIR
109  /* Set default acoustic and language models. */
110  hmmdir = cmd_ln_str_r(ps->config, "-hmm");
111  lmfile = cmd_ln_str_r(ps->config, "-lm");
112  dictfile = cmd_ln_str_r(ps->config, "-dict");
113  if (hmmdir == NULL && hmmdir_exists(MODELDIR "/hmm/en_US/hub4wsj_sc_8k")) {
114  hmmdir = MODELDIR "/hmm/en_US/hub4wsj_sc_8k";
115  cmd_ln_set_str_r(ps->config, "-hmm", hmmdir);
116  }
117  if (lmfile == NULL && !cmd_ln_str_r(ps->config, "-fsg")
118  && !cmd_ln_str_r(ps->config, "-jsgf")
119  && file_exists(MODELDIR "/lm/en_US/hub4.5000.DMP")) {
120  lmfile = MODELDIR "/lm/en_US/hub4.5000.DMP";
121  cmd_ln_set_str_r(ps->config, "-lm", lmfile);
122  }
123  if (dictfile == NULL && file_exists(MODELDIR "/lm/en_US/cmu07a.dic")) {
124  dictfile = MODELDIR "/lm/en_US/cmu07a.dic";
125  cmd_ln_set_str_r(ps->config, "-dict", dictfile);
126  }
127 
128  /* Expand acoustic and language model filenames relative to installation path. */
129  if (hmmdir && !path_is_absolute(hmmdir) && !hmmdir_exists(hmmdir)) {
130  char *tmphmm = string_join(MODELDIR "/hmm/", hmmdir, NULL);
131  if (hmmdir_exists(tmphmm)) {
132  cmd_ln_set_str_r(ps->config, "-hmm", tmphmm);
133  } else {
134  E_ERROR("Failed to find mdef file inside the model folder specified with -hmm '%s'\n", hmmdir);
135  }
136  ckd_free(tmphmm);
137  }
138  if (lmfile && !path_is_absolute(lmfile) && !file_exists(lmfile)) {
139  char *tmplm = string_join(MODELDIR "/lm/", lmfile, NULL);
140  cmd_ln_set_str_r(ps->config, "-lm", tmplm);
141  ckd_free(tmplm);
142  }
143  if (dictfile && !path_is_absolute(dictfile) && !file_exists(dictfile)) {
144  char *tmpdict = string_join(MODELDIR "/lm/", dictfile, NULL);
145  cmd_ln_set_str_r(ps->config, "-dict", tmpdict);
146  ckd_free(tmpdict);
147  }
148 #endif
149 
150  /* Get acoustic model filenames and add them to the command-line */
151  if ((hmmdir = cmd_ln_str_r(ps->config, "-hmm")) != NULL) {
152  ps_add_file(ps, "-mdef", hmmdir, "mdef");
153  ps_add_file(ps, "-mean", hmmdir, "means");
154  ps_add_file(ps, "-var", hmmdir, "variances");
155  ps_add_file(ps, "-tmat", hmmdir, "transition_matrices");
156  ps_add_file(ps, "-mixw", hmmdir, "mixture_weights");
157  ps_add_file(ps, "-sendump", hmmdir, "sendump");
158  ps_add_file(ps, "-fdict", hmmdir, "noisedict");
159  ps_add_file(ps, "-lda", hmmdir, "feature_transform");
160  ps_add_file(ps, "-featparams", hmmdir, "feat.params");
161  ps_add_file(ps, "-senmgau", hmmdir, "senmgau");
162  }
163 }
164 
165 static void
166 ps_free_searches(ps_decoder_t *ps)
167 {
168  gnode_t *gn;
169 
170  if (ps->searches == NULL)
171  return;
172 
173  for (gn = ps->searches; gn; gn = gnode_next(gn))
174  ps_search_free(gnode_ptr(gn));
175  glist_free(ps->searches);
176  ps->searches = NULL;
177  ps->search = NULL;
178 }
179 
180 static ps_search_t *
181 ps_find_search(ps_decoder_t *ps, char const *name)
182 {
183  gnode_t *gn;
184 
185  for (gn = ps->searches; gn; gn = gnode_next(gn)) {
186  if (0 == strcmp(ps_search_name(gnode_ptr(gn)), name))
187  return (ps_search_t *)gnode_ptr(gn);
188  }
189  return NULL;
190 }
191 
192 int
193 ps_reinit(ps_decoder_t *ps, cmd_ln_t *config)
194 {
195  char const *lmfile, *lmctl = NULL;
196 
197  if (config && config != ps->config) {
198  cmd_ln_free_r(ps->config);
199  ps->config = cmd_ln_retain(config);
200  }
201 
202  err_set_debug_level(cmd_ln_int32_r(ps->config, "-debug"));
203  ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir");
204  ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir");
205  ps->senlogdir = cmd_ln_str_r(ps->config, "-senlogdir");
206 
207  /* Fill in some default arguments. */
208  ps_init_defaults(ps);
209 
210  /* Free old searches (do this before other reinit) */
211  ps_free_searches(ps);
212 
213  /* Free old acmod. */
214  acmod_free(ps->acmod);
215  ps->acmod = NULL;
216 
217  /* Free old dictionary (must be done after the two things above) */
218  dict_free(ps->dict);
219  ps->dict = NULL;
220 
221  /* Free d2p */
222  dict2pid_free(ps->d2p);
223  ps->d2p = NULL;
224 
225  /* Logmath computation (used in acmod and search) */
226  if (ps->lmath == NULL
227  || (logmath_get_base(ps->lmath) !=
228  (float64)cmd_ln_float32_r(ps->config, "-logbase"))) {
229  if (ps->lmath)
230  logmath_free(ps->lmath);
231  ps->lmath = logmath_init
232  ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0,
233  cmd_ln_boolean_r(ps->config, "-bestpath"));
234  }
235 
236  /* Acoustic model (this is basically everything that
237  * uttproc.c, senscr.c, and others used to do) */
238  if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL)
239  return -1;
240 
241  if ((ps->pl_window = cmd_ln_int32_r(ps->config, "-pl_window"))) {
242  /* Initialize an auxiliary phone loop search, which will run in
243  * "parallel" with FSG or N-Gram search. */
244  if ((ps->phone_loop = phone_loop_search_init(ps->config,
245  ps->acmod, ps->dict)) == NULL)
246  return -1;
247  ps->searches = glist_add_ptr(ps->searches, ps->phone_loop);
248  }
249 
250  /* Dictionary and triphone mappings (depends on acmod). */
251  /* FIXME: pass config, change arguments, implement LTS, etc. */
252  if ((ps->dict = dict_init(ps->config, ps->acmod->mdef)) == NULL)
253  return -1;
254 
255  /* Determine whether we are starting out in FSG or N-Gram search mode. */
256  if (cmd_ln_str_r(ps->config, "-fsg") || cmd_ln_str_r(ps->config, "-jsgf")) {
257  ps_search_t *fsgs;
258 
259  if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL)
260  return -1;
261  if ((fsgs = fsg_search_init(ps->config, ps->acmod, ps->dict, ps->d2p)) == NULL)
262  return -1;
263  fsgs->pls = ps->phone_loop;
264  ps->searches = glist_add_ptr(ps->searches, fsgs);
265  ps->search = fsgs;
266  }
267  else if ((lmfile = cmd_ln_str_r(ps->config, "-lm"))
268  || (lmctl = cmd_ln_str_r(ps->config, "-lmctl"))) {
269  ps_search_t *ngs;
270 
271  /* Make the acmod's feature buffer growable if we are doing two-pass search. */
272  if (cmd_ln_boolean_r(ps->config, "-fwdflat")
273  && cmd_ln_boolean_r(ps->config, "-fwdtree"))
274  acmod_set_grow(ps->acmod, TRUE);
275 
276  if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL)
277  return -1;
278  if ((ngs = ngram_search_init(ps->config, ps->acmod, ps->dict, ps->d2p)) == NULL)
279  return -1;
280  ngs->pls = ps->phone_loop;
281  ps->searches = glist_add_ptr(ps->searches, ngs);
282  ps->search = ngs;
283  }
284  /* Otherwise, we will initialize the search whenever the user
285  * decides to load an FSG or a language model. */
286  else {
287  if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL)
288  return -1;
289  }
290 
291  /* Initialize performance timer. */
292  ps->perf.name = "decode";
293  ptmr_init(&ps->perf);
294 
295  return 0;
296 }
297 
298 ps_decoder_t *
299 ps_init(cmd_ln_t *config)
300 {
301  ps_decoder_t *ps;
302 
303  ps = ckd_calloc(1, sizeof(*ps));
304  ps->refcount = 1;
305  if (ps_reinit(ps, config) < 0) {
306  ps_free(ps);
307  return NULL;
308  }
309  return ps;
310 }
311 
312 arg_t const *
313 ps_args(void)
314 {
315  return ps_args_def;
316 }
317 
318 ps_decoder_t *
320 {
321  ++ps->refcount;
322  return ps;
323 }
324 
325 int
327 {
328  if (ps == NULL)
329  return 0;
330  if (--ps->refcount > 0)
331  return ps->refcount;
332  ps_free_searches(ps);
333  dict_free(ps->dict);
334  dict2pid_free(ps->d2p);
335  acmod_free(ps->acmod);
336  logmath_free(ps->lmath);
337  cmd_ln_free_r(ps->config);
338  ckd_free(ps->uttid);
339  ckd_free(ps);
340  return 0;
341 }
342 
343 char const *
345 {
346  return ps->uttid;
347 }
348 
349 cmd_ln_t *
351 {
352  return ps->config;
353 }
354 
355 logmath_t *
357 {
358  return ps->lmath;
359 }
360 
361 fe_t *
363 {
364  return ps->acmod->fe;
365 }
366 
367 feat_t *
369 {
370  return ps->acmod->fcb;
371 }
372 
373 ps_mllr_t *
375 {
376  return acmod_update_mllr(ps->acmod, mllr);
377 }
378 
379 ngram_model_t *
381 {
382  if (ps->search == NULL
383  || 0 != strcmp(ps_search_name(ps->search), "ngram"))
384  return NULL;
385  return ((ngram_search_t *)ps->search)->lmset;
386 }
387 
388 ngram_model_t *
389 ps_update_lmset(ps_decoder_t *ps, ngram_model_t *lmset)
390 {
391  ngram_search_t *ngs;
392  ps_search_t *search;
393 
394  /* Look for N-Gram search. */
395  search = ps_find_search(ps, "ngram");
396  if (search == NULL) {
397  /* Initialize N-Gram search. */
398  search = ngram_search_init(ps->config, ps->acmod, ps->dict, ps->d2p);
399  if (search == NULL)
400  return NULL;
401  search->pls = ps->phone_loop;
402  ps->searches = glist_add_ptr(ps->searches, search);
403  ngs = (ngram_search_t *)search;
404  }
405  else if (lmset != NULL) {
406  ngs = (ngram_search_t *)search;
407  /* Free any previous lmset if this is a new one. */
408  if (ngs->lmset != NULL && ngs->lmset != lmset)
409  ngram_model_free(ngs->lmset);
410  ngs->lmset = lmset;
411  /* Tell N-Gram search to update its view of the world. */
412  if (ps_search_reinit(search, ps->dict, ps->d2p) < 0)
413  return NULL;
414  } else {
415  /* Just activate the existing search */
416  ngs = (ngram_search_t *)search;
417  }
418  ps->search = search;
419  return ngs->lmset;
420 }
421 
422 fsg_set_t *
424 {
425  if (ps->search == NULL
426  || 0 != strcmp(ps_search_name(ps->search), "fsg"))
427  return NULL;
428  return (fsg_set_t *)ps->search;
429 }
430 
431 fsg_set_t *
433 {
434  ps_search_t *search;
435 
436  /* Look for FSG search. */
437  search = ps_find_search(ps, "fsg");
438  if (search == NULL) {
439  /* Initialize FSG search. */
440  if ((search = fsg_search_init(ps->config,
441  ps->acmod, ps->dict, ps->d2p)) == NULL) {
442  return NULL;
443  }
444  search->pls = ps->phone_loop;
445  ps->searches = glist_add_ptr(ps->searches, search);
446  }
447  else {
448  /* Tell FSG search to update its view of the world. */
449  if (ps_search_reinit(search, ps->dict, ps->d2p) < 0)
450  return NULL;
451  }
452  ps->search = search;
453  return (fsg_set_t *)search;
454 }
455 
456 int
457 ps_load_dict(ps_decoder_t *ps, char const *dictfile,
458  char const *fdictfile, char const *format)
459 {
460  cmd_ln_t *newconfig;
461  dict2pid_t *d2p;
462  dict_t *dict;
463  gnode_t *gn;
464  int rv;
465 
466  /* Create a new scratch config to load this dict (so existing one
467  * won't be affected if it fails) */
468  newconfig = cmd_ln_init(NULL, ps_args(), TRUE, NULL);
469  cmd_ln_set_boolean_r(newconfig, "-dictcase",
470  cmd_ln_boolean_r(ps->config, "-dictcase"));
471  cmd_ln_set_str_r(newconfig, "-dict", dictfile);
472  if (fdictfile)
473  cmd_ln_set_str_r(newconfig, "-fdict", fdictfile);
474  else
475  cmd_ln_set_str_r(newconfig, "-fdict",
476  cmd_ln_str_r(ps->config, "-fdict"));
477 
478  /* Try to load it. */
479  if ((dict = dict_init(newconfig, ps->acmod->mdef)) == NULL) {
480  cmd_ln_free_r(newconfig);
481  return -1;
482  }
483 
484  /* Reinit the dict2pid. */
485  if ((d2p = dict2pid_build(ps->acmod->mdef, dict)) == NULL) {
486  cmd_ln_free_r(newconfig);
487  return -1;
488  }
489 
490  /* Success! Update the existing config to reflect new dicts and
491  * drop everything into place. */
492  cmd_ln_free_r(newconfig);
493  cmd_ln_set_str_r(ps->config, "-dict", dictfile);
494  if (fdictfile)
495  cmd_ln_set_str_r(ps->config, "-fdict", fdictfile);
496  dict_free(ps->dict);
497  ps->dict = dict;
498  dict2pid_free(ps->d2p);
499  ps->d2p = d2p;
500 
501  /* And tell all searches to reconfigure themselves. */
502  for (gn = ps->searches; gn; gn = gnode_next(gn)) {
503  ps_search_t *search = gnode_ptr(gn);
504  if ((rv = ps_search_reinit(search, dict, d2p)) < 0)
505  return rv;
506  }
507 
508  return 0;
509 }
510 
511 int
512 ps_save_dict(ps_decoder_t *ps, char const *dictfile,
513  char const *format)
514 {
515  return dict_write(ps->dict, dictfile, format);
516 }
517 
518 int
520  char const *word,
521  char const *phones,
522  int update)
523 {
524  int32 wid, lmwid;
525  ngram_model_t *lmset;
526  s3cipid_t *pron;
527  char **phonestr, *tmp;
528  int np, i, rv;
529 
530  /* Parse phones into an array of phone IDs. */
531  tmp = ckd_salloc(phones);
532  np = str2words(tmp, NULL, 0);
533  phonestr = ckd_calloc(np, sizeof(*phonestr));
534  str2words(tmp, phonestr, np);
535  pron = ckd_calloc(np, sizeof(*pron));
536  for (i = 0; i < np; ++i) {
537  pron[i] = bin_mdef_ciphone_id(ps->acmod->mdef, phonestr[i]);
538  if (pron[i] == -1) {
539  E_ERROR("Unknown phone %s in phone string %s\n",
540  phonestr[i], tmp);
541  ckd_free(phonestr);
542  ckd_free(tmp);
543  ckd_free(pron);
544  return -1;
545  }
546  }
547  /* No longer needed. */
548  ckd_free(phonestr);
549  ckd_free(tmp);
550 
551  /* Add it to the dictionary. */
552  if ((wid = dict_add_word(ps->dict, word, pron, np)) == -1) {
553  ckd_free(pron);
554  return -1;
555  }
556  /* No longer needed. */
557  ckd_free(pron);
558 
559  /* Now we also have to add it to dict2pid. */
560  dict2pid_add_word(ps->d2p, wid);
561 
562  if ((lmset = ps_get_lmset(ps)) != NULL) {
563  /* Add it to the LM set (meaning, the current LM). In a perfect
564  * world, this would result in the same WID, but because of the
565  * weird way that word IDs are handled, it doesn't. */
566  if ((lmwid = ngram_model_add_word(lmset, word, 1.0))
567  == NGRAM_INVALID_WID)
568  return -1;
569  }
570 
571  /* Rebuild the widmap and search tree if requested. */
572  if (update) {
573  if ((rv = ps_search_reinit(ps->search, ps->dict, ps->d2p) < 0))
574  return rv;
575  }
576  return wid;
577 }
578 
579 int
580 ps_decode_raw(ps_decoder_t *ps, FILE *rawfh,
581  char const *uttid, long maxsamps)
582 {
583  long total;
584  OFF_T pos;
585 
586  ps_start_utt(ps, uttid);
587  /* If this file is seekable or maxsamps is specified, then decode
588  * the whole thing at once. */
589  if (maxsamps != -1 || (pos = FTELL(rawfh)) >= 0) {
590  int16 *data;
591 
592  if (maxsamps == -1) {
593  OFF_T endpos;
594  FSEEK(rawfh, 0, SEEK_END);
595  endpos = FTELL(rawfh);
596  FSEEK(rawfh, pos, SEEK_SET);
597  maxsamps = endpos - pos;
598  }
599  data = ckd_calloc(maxsamps, sizeof(*data));
600  total = fread(data, sizeof(*data), maxsamps, rawfh);
601  ps_process_raw(ps, data, total, FALSE, TRUE);
602  ckd_free(data);
603  }
604  else {
605  /* Otherwise decode it in a stream. */
606  total = 0;
607  while (!feof(rawfh)) {
608  int16 data[256];
609  size_t nread;
610 
611  nread = fread(data, sizeof(*data), sizeof(data)/sizeof(*data), rawfh);
612  ps_process_raw(ps, data, nread, FALSE, FALSE);
613  total += nread;
614  }
615  }
616  ps_end_utt(ps);
617  return total;
618 }
619 
620 int
621 ps_start_utt(ps_decoder_t *ps, char const *uttid)
622 {
623  int rv;
624 
625  if (ps->search == NULL) {
626  E_ERROR("No search module is selected, did you forget to "
627  "specify a language model or grammar?\n");
628  return -1;
629  }
630 
631  ptmr_reset(&ps->perf);
632  ptmr_start(&ps->perf);
633 
634  if (uttid) {
635  ckd_free(ps->uttid);
636  ps->uttid = ckd_salloc(uttid);
637  }
638  else {
639  char nuttid[16];
640  ckd_free(ps->uttid);
641  sprintf(nuttid, "%09u", ps->uttno);
642  ps->uttid = ckd_salloc(nuttid);
643  ++ps->uttno;
644  }
645  /* Remove any residual word lattice and hypothesis. */
646  ps_lattice_free(ps->search->dag);
647  ps->search->dag = NULL;
648  ps->search->last_link = NULL;
649  ps->search->post = 0;
650  ckd_free(ps->search->hyp_str);
651  ps->search->hyp_str = NULL;
652 
653  if ((rv = acmod_start_utt(ps->acmod)) < 0)
654  return rv;
655 
656  /* Start logging features and audio if requested. */
657  if (ps->mfclogdir) {
658  char *logfn = string_join(ps->mfclogdir, "/",
659  ps->uttid, ".mfc", NULL);
660  FILE *mfcfh;
661  E_INFO("Writing MFCC log file: %s\n", logfn);
662  if ((mfcfh = fopen(logfn, "wb")) == NULL) {
663  E_ERROR_SYSTEM("Failed to open MFCC log file %s", logfn);
664  ckd_free(logfn);
665  return -1;
666  }
667  ckd_free(logfn);
668  acmod_set_mfcfh(ps->acmod, mfcfh);
669  }
670  if (ps->rawlogdir) {
671  char *logfn = string_join(ps->rawlogdir, "/",
672  ps->uttid, ".raw", NULL);
673  FILE *rawfh;
674  E_INFO("Writing raw audio log file: %s\n", logfn);
675  if ((rawfh = fopen(logfn, "wb")) == NULL) {
676  E_ERROR_SYSTEM("Failed to open raw audio log file %s", logfn);
677  ckd_free(logfn);
678  return -1;
679  }
680  ckd_free(logfn);
681  acmod_set_rawfh(ps->acmod, rawfh);
682  }
683  if (ps->senlogdir) {
684  char *logfn = string_join(ps->senlogdir, "/",
685  ps->uttid, ".sen", NULL);
686  FILE *senfh;
687  E_INFO("Writing senone score log file: %s\n", logfn);
688  if ((senfh = fopen(logfn, "wb")) == NULL) {
689  E_ERROR_SYSTEM("Failed to open senone score log file %s", logfn);
690  ckd_free(logfn);
691  return -1;
692  }
693  ckd_free(logfn);
694  acmod_set_senfh(ps->acmod, senfh);
695  }
696 
697  /* Start auxiliary phone loop search. */
698  if (ps->phone_loop)
699  ps_search_start(ps->phone_loop);
700 
701  return ps_search_start(ps->search);
702 }
703 
704 static int
705 ps_search_forward(ps_decoder_t *ps)
706 {
707  int nfr;
708 
709  nfr = 0;
710  while (ps->acmod->n_feat_frame > 0) {
711  int k;
712  if (ps->phone_loop)
713  if ((k = ps_search_step(ps->phone_loop, ps->acmod->output_frame)) < 0)
714  return k;
715  if (ps->acmod->output_frame >= ps->pl_window)
716  if ((k = ps_search_step(ps->search,
717  ps->acmod->output_frame - ps->pl_window)) < 0)
718  return k;
719  acmod_advance(ps->acmod);
720  ++ps->n_frame;
721  ++nfr;
722  }
723  return nfr;
724 }
725 
726 int
728  char const *uttid)
729 {
730  int nfr, n_searchfr;
731 
732  ps_start_utt(ps, uttid);
733  n_searchfr = 0;
734  acmod_set_insenfh(ps->acmod, senfh);
735  while ((nfr = acmod_read_scores(ps->acmod)) > 0) {
736  if ((nfr = ps_search_forward(ps)) < 0) {
737  ps_end_utt(ps);
738  return nfr;
739  }
740  n_searchfr += nfr;
741  }
742  ps_end_utt(ps);
743  acmod_set_insenfh(ps->acmod, NULL);
744 
745  return n_searchfr;
746 }
747 
748 int
750  int16 const *data,
751  size_t n_samples,
752  int no_search,
753  int full_utt)
754 {
755  int n_searchfr = 0;
756 
757  if (ps->acmod->state == ACMOD_IDLE) {
758  E_ERROR("Failed to process data, utterance is not started. Use start_utt to start it\n");
759  return 0;
760  }
761 
762  if (no_search)
763  acmod_set_grow(ps->acmod, TRUE);
764 
765  while (n_samples) {
766  int nfr;
767 
768  /* Process some data into features. */
769  if ((nfr = acmod_process_raw(ps->acmod, &data,
770  &n_samples, full_utt)) < 0)
771  return nfr;
772 
773  /* Score and search as much data as possible */
774  if (no_search)
775  continue;
776  if ((nfr = ps_search_forward(ps)) < 0)
777  return nfr;
778  n_searchfr += nfr;
779  }
780 
781  return n_searchfr;
782 }
783 
784 int
786  mfcc_t **data,
787  int32 n_frames,
788  int no_search,
789  int full_utt)
790 {
791  int n_searchfr = 0;
792 
793  if (no_search)
794  acmod_set_grow(ps->acmod, TRUE);
795 
796  while (n_frames) {
797  int nfr;
798 
799  /* Process some data into features. */
800  if ((nfr = acmod_process_cep(ps->acmod, &data,
801  &n_frames, full_utt)) < 0)
802  return nfr;
803 
804  /* Score and search as much data as possible */
805  if (no_search)
806  continue;
807  if ((nfr = ps_search_forward(ps)) < 0)
808  return nfr;
809  n_searchfr += nfr;
810  }
811 
812  return n_searchfr;
813 }
814 
815 int
817 {
818  int rv, i;
819 
820  acmod_end_utt(ps->acmod);
821 
822  /* Search any remaining frames. */
823  if ((rv = ps_search_forward(ps)) < 0) {
824  ptmr_stop(&ps->perf);
825  return rv;
826  }
827  /* Finish phone loop search. */
828  if (ps->phone_loop) {
829  if ((rv = ps_search_finish(ps->phone_loop)) < 0) {
830  ptmr_stop(&ps->perf);
831  return rv;
832  }
833  }
834  /* Search any frames remaining in the lookahead window. */
835  for (i = ps->acmod->output_frame - ps->pl_window;
836  i < ps->acmod->output_frame; ++i)
837  ps_search_step(ps->search, i);
838  /* Finish main search. */
839  if ((rv = ps_search_finish(ps->search)) < 0) {
840  ptmr_stop(&ps->perf);
841  return rv;
842  }
843  ptmr_stop(&ps->perf);
844 
845  /* Log a backtrace if requested. */
846  if (cmd_ln_boolean_r(ps->config, "-backtrace")) {
847  char const *uttid, *hyp;
848  ps_seg_t *seg;
849  int32 score;
850 
851  hyp = ps_get_hyp(ps, &score, &uttid);
852  E_INFO("%s: %s (%d)\n", uttid, hyp, score);
853  E_INFO_NOFN("%-20s %-5s %-5s %-5s %-10s %-10s %-3s\n",
854  "word", "start", "end", "pprob", "ascr", "lscr", "lback");
855  for (seg = ps_seg_iter(ps, &score); seg;
856  seg = ps_seg_next(seg)) {
857  char const *word;
858  int sf, ef;
859  int32 post, lscr, ascr, lback;
860 
861  word = ps_seg_word(seg);
862  ps_seg_frames(seg, &sf, &ef);
863  post = ps_seg_prob(seg, &ascr, &lscr, &lback);
864  E_INFO_NOFN("%-20s %-5d %-5d %-1.3f %-10d %-10d %-3d\n",
865  word, sf, ef, logmath_exp(ps_get_logmath(ps), post), ascr, lscr, lback);
866  }
867  }
868  return rv;
869 }
870 
871 char const *
872 ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score, char const **out_uttid)
873 {
874  char const *hyp;
875 
876  ptmr_start(&ps->perf);
877  hyp = ps_search_hyp(ps->search, out_best_score, NULL);
878  if (out_uttid)
879  *out_uttid = ps->uttid;
880  ptmr_stop(&ps->perf);
881  return hyp;
882 }
883 
884 char const *
885 ps_get_hyp_final(ps_decoder_t *ps, int32 *out_is_final)
886 {
887  char const *hyp;
888 
889  ptmr_start(&ps->perf);
890  hyp = ps_search_hyp(ps->search, NULL, out_is_final);
891  ptmr_stop(&ps->perf);
892  return hyp;
893 }
894 
895 
896 int32
897 ps_get_prob(ps_decoder_t *ps, char const **out_uttid)
898 {
899  int32 prob;
900 
901  ptmr_start(&ps->perf);
902  prob = ps_search_prob(ps->search);
903  if (out_uttid)
904  *out_uttid = ps->uttid;
905  ptmr_stop(&ps->perf);
906  return prob;
907 }
908 
909 ps_seg_t *
910 ps_seg_iter(ps_decoder_t *ps, int32 *out_best_score)
911 {
912  ps_seg_t *itor;
913 
914  ptmr_start(&ps->perf);
915  itor = ps_search_seg_iter(ps->search, out_best_score);
916  ptmr_stop(&ps->perf);
917  return itor;
918 }
919 
920 ps_seg_t *
922 {
923  return ps_search_seg_next(seg);
924 }
925 
926 char const *
928 {
929  return seg->word;
930 }
931 
932 void
933 ps_seg_frames(ps_seg_t *seg, int *out_sf, int *out_ef)
934 {
935  if (out_sf) *out_sf = seg->sf;
936  if (out_ef) *out_ef = seg->ef;
937 }
938 
939 int32
940 ps_seg_prob(ps_seg_t *seg, int32 *out_ascr, int32 *out_lscr, int32 *out_lback)
941 {
942  if (out_ascr) *out_ascr = seg->ascr;
943  if (out_lscr) *out_lscr = seg->lscr;
944  if (out_lback) *out_lback = seg->lback;
945  return seg->prob;
946 }
947 
948 void
950 {
951  ps_search_seg_free(seg);
952 }
953 
954 ps_lattice_t *
956 {
957  return ps_search_lattice(ps->search);
958 }
959 
960 ps_nbest_t *
961 ps_nbest(ps_decoder_t *ps, int sf, int ef,
962  char const *ctx1, char const *ctx2)
963 {
964  ps_lattice_t *dag;
965  ngram_model_t *lmset;
966  ps_astar_t *nbest;
967  float32 lwf;
968  int32 w1, w2;
969 
970  if (ps->search == NULL)
971  return NULL;
972  if ((dag = ps_get_lattice(ps)) == NULL)
973  return NULL;
974 
975  /* FIXME: This is all quite specific to N-Gram search. Either we
976  * should make N-best a method for each search module or it needs
977  * to be abstracted to work for N-Gram and FSG. */
978  if (0 != strcmp(ps_search_name(ps->search), "ngram")) {
979  lmset = NULL;
980  lwf = 1.0f;
981  }
982  else {
983  lmset = ((ngram_search_t *)ps->search)->lmset;
984  lwf = ((ngram_search_t *)ps->search)->bestpath_fwdtree_lw_ratio;
985  }
986 
987  w1 = ctx1 ? dict_wordid(ps_search_dict(ps->search), ctx1) : -1;
988  w2 = ctx2 ? dict_wordid(ps_search_dict(ps->search), ctx2) : -1;
989  nbest = ps_astar_start(dag, lmset, lwf, sf, ef, w1, w2);
990 
991  return (ps_nbest_t *)nbest;
992 }
993 
994 void
996 {
997  ps_astar_finish(nbest);
998 }
999 
1000 ps_nbest_t *
1002 {
1003  ps_latpath_t *next;
1004 
1005  next = ps_astar_next(nbest);
1006  if (next == NULL) {
1007  ps_nbest_free(nbest);
1008  return NULL;
1009  }
1010  return nbest;
1011 }
1012 
1013 char const *
1014 ps_nbest_hyp(ps_nbest_t *nbest, int32 *out_score)
1015 {
1016  assert(nbest != NULL);
1017 
1018  if (nbest->top == NULL)
1019  return NULL;
1020  if (out_score) *out_score = nbest->top->score;
1021  return ps_astar_hyp(nbest, nbest->top);
1022 }
1023 
1024 ps_seg_t *
1025 ps_nbest_seg(ps_nbest_t *nbest, int32 *out_score)
1026 {
1027  if (nbest->top == NULL)
1028  return NULL;
1029  if (out_score) *out_score = nbest->top->score;
1030  return ps_astar_seg_iter(nbest, nbest->top, 1.0);
1031 }
1032 
1033 int
1035 {
1036  return ps->acmod->output_frame + 1;
1037 }
1038 
1039 void
1040 ps_get_utt_time(ps_decoder_t *ps, double *out_nspeech,
1041  double *out_ncpu, double *out_nwall)
1042 {
1043  int32 frate;
1044 
1045  frate = cmd_ln_int32_r(ps->config, "-frate");
1046  *out_nspeech = (double)ps->acmod->output_frame / frate;
1047  *out_ncpu = ps->perf.t_cpu;
1048  *out_nwall = ps->perf.t_elapsed;
1049 }
1050 
1051 void
1052 ps_get_all_time(ps_decoder_t *ps, double *out_nspeech,
1053  double *out_ncpu, double *out_nwall)
1054 {
1055  int32 frate;
1056 
1057  frate = cmd_ln_int32_r(ps->config, "-frate");
1058  *out_nspeech = (double)ps->n_frame / frate;
1059  *out_ncpu = ps->perf.t_tot_cpu;
1060  *out_nwall = ps->perf.t_tot_elapsed;
1061 }
1062 
1063 void
1065  cmd_ln_t *config, acmod_t *acmod, dict_t *dict,
1066  dict2pid_t *d2p)
1067 {
1068  search->vt = vt;
1069  search->config = config;
1070  search->acmod = acmod;
1071  if (d2p)
1072  search->d2p = dict2pid_retain(d2p);
1073  else
1074  search->d2p = NULL;
1075  if (dict) {
1076  search->dict = dict_retain(dict);
1077  search->start_wid = dict_startwid(dict);
1078  search->finish_wid = dict_finishwid(dict);
1079  search->silence_wid = dict_silwid(dict);
1080  search->n_words = dict_size(dict);
1081  }
1082  else {
1083  search->dict = NULL;
1084  search->start_wid = search->finish_wid = search->silence_wid = -1;
1085  search->n_words = 0;
1086  }
1087 }
1088 
1089 void
1091  dict2pid_t *d2p)
1092 {
1093  dict_free(search->dict);
1094  dict2pid_free(search->d2p);
1095  /* FIXME: _retain() should just return NULL if passed NULL. */
1096  if (dict) {
1097  search->dict = dict_retain(dict);
1098  search->start_wid = dict_startwid(dict);
1099  search->finish_wid = dict_finishwid(dict);
1100  search->silence_wid = dict_silwid(dict);
1101  search->n_words = dict_size(dict);
1102  }
1103  else {
1104  search->dict = NULL;
1105  search->start_wid = search->finish_wid = search->silence_wid = -1;
1106  search->n_words = 0;
1107  }
1108  if (d2p)
1109  search->d2p = dict2pid_retain(d2p);
1110  else
1111  search->d2p = NULL;
1112 }
1113 
1114 
1115 void
1117 {
1118  /* FIXME: We will have refcounting on acmod, config, etc, at which
1119  * point we will free them here too. */
1120  dict_free(search->dict);
1121  dict2pid_free(search->d2p);
1122  ckd_free(search->hyp_str);
1123  ps_lattice_free(search->dag);
1124 }
Implementation of FSG search (and "FSG set") structure.
ptmr_t perf
Performance counter for all of decoding.
dict_t * dict_init(cmd_ln_t *config, bin_mdef_t *mdef)
Initialize a new dictionary.
Definition: dict.c:251
Internal implementation of PocketSphinx decoder.
POCKETSPHINX_EXPORT void ps_get_utt_time(ps_decoder_t *ps, double *out_nspeech, double *out_ncpu, double *out_nwall)
Get performance information for the current utterance.
POCKETSPHINX_EXPORT feat_t * ps_get_feat(ps_decoder_t *ps)
Get the dynamic feature computation object for this decoder.
Definition: pocketsphinx.c:368
int acmod_read_scores(acmod_t *acmod)
Read one frame of scores from senone score dump file.
Definition: acmod.c:997
Not in an utterance.
Definition: acmod.h:68
void ps_astar_finish(ps_astar_t *nbest)
Finish N-best search, releasing resources associated with it.
Definition: ps_lattice.c:1878
ps_latpath_t * ps_astar_next(ps_astar_t *nbest)
Find next best hypothesis of A* on a word graph.
Definition: ps_lattice.c:1724
char const * ps_astar_hyp(ps_astar_t *nbest, ps_latpath_t *path)
Get hypothesis string from A* search.
Definition: ps_lattice.c:1757
POCKETSPHINX_EXPORT ps_seg_t * ps_seg_iter(ps_decoder_t *ps, int32 *out_best_score)
Get an iterator over the word segmentation for the best hypothesis.
Definition: pocketsphinx.c:910
POCKETSPHINX_EXPORT int ps_process_cep(ps_decoder_t *ps, mfcc_t **data, int n_frames, int no_search, int full_utt)
Decode acoustic feature data.
Base structure for search module.
POCKETSPHINX_EXPORT arg_t const * ps_args(void)
Returns the argument definitions used in ps_init().
Definition: pocketsphinx.c:313
dict_t * dict
Pronunciation dictionary.
int acmod_set_insenfh(acmod_t *acmod, FILE *senfh)
Set up a senone score dump file for input.
Definition: acmod.c:851
POCKETSPHINX_EXPORT s3wid_t dict_wordid(dict_t *d, const char *word)
Return word id for given word string if present.
Definition: dict.c:382
ps_mllr_t * acmod_update_mllr(acmod_t *acmod, ps_mllr_t *mllr)
Adapt acoustic model using a linear transform.
Definition: acmod.c:349
int acmod_process_cep(acmod_t *acmod, mfcc_t ***inout_cep, int *inout_n_frames, int full_utt)
Feed acoustic feature data into the acoustic model for scoring.
Definition: acmod.c:692
POCKETSPHINX_EXPORT int ps_load_dict(ps_decoder_t *ps, char const *dictfile, char const *fdictfile, char const *format)
Reload the pronunciation dictionary from a file.
Definition: pocketsphinx.c:457
POCKETSPHINX_EXPORT void ps_nbest_free(ps_nbest_t *nbest)
Finish N-best search early, releasing resources.
Definition: pocketsphinx.c:995
int32 silence_wid
Silence word ID.
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
Re-initialize base structure with new dictionary.
acmod_t * acmod
Acoustic model.
int dict_free(dict_t *d)
Release a pointer to a dictionary.
Definition: dict.c:451
POCKETSPHINX_EXPORT ps_nbest_t * ps_nbest(ps_decoder_t *ps, int sf, int ef, char const *ctx1, char const *ctx2)
Get an iterator over the best hypotheses, optionally within a selected region of the utterance...
Definition: pocketsphinx.c:961
POCKETSPHINX_EXPORT ps_seg_t * ps_nbest_seg(ps_nbest_t *nbest, int32 *out_score)
Get the word segmentation from an N-best list iterator.
POCKETSPHINX_EXPORT void ps_seg_frames(ps_seg_t *seg, int *out_sf, int *out_ef)
Get inclusive start and end frames from a segmentation iterator.
Definition: pocketsphinx.c:933
POCKETSPHINX_EXPORT ps_mllr_t * ps_update_mllr(ps_decoder_t *ps, ps_mllr_t *mllr)
Adapt current acoustic model using a linear transform.
Definition: pocketsphinx.c:374
int32 finish_wid
Finish word ID.
fe_t * fe
Acoustic feature computation.
Definition: acmod.h:155
int bin_mdef_ciphone_id(bin_mdef_t *m, const char *ciphone)
Context-independent phone lookup.
Definition: bin_mdef.c:692
int32 lscr
Language model score.
int32 n_words
Number of words known to search (may be less than in the dictionary)
POCKETSPHINX_EXPORT int ps_save_dict(ps_decoder_t *ps, char const *dictfile, char const *format)
Dump the current pronunciation dictionary to a file.
Definition: pocketsphinx.c:512
POCKETSPHINX_EXPORT int ps_decode_raw(ps_decoder_t *ps, FILE *rawfh, char const *uttid, long maxsamps)
Decode a raw audio stream.
Definition: pocketsphinx.c:580
char const * mfclogdir
Log directory for MFCC files.
Word graph search implementation.
POCKETSPHINX_EXPORT char const * ps_nbest_hyp(ps_nbest_t *nbest, int32 *out_score)
Get the hypothesis string from an N-best list iterator.
int refcount
Reference count.
int dict2pid_free(dict2pid_t *d2p)
Free the memory dict2pid structure.
Definition: dict2pid.c:507
POCKETSPHINX_EXPORT ngram_model_t * ps_get_lmset(ps_decoder_t *ps)
Get the language model set object for this decoder.
Definition: pocketsphinx.c:380
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
A* search structure.
POCKETSPHINX_EXPORT ps_lattice_t * ps_get_lattice(ps_decoder_t *ps)
Get word lattice.
Definition: pocketsphinx.c:955
POCKETSPHINX_EXPORT ps_nbest_t * ps_nbest_next(ps_nbest_t *nbest)
Move an N-best list iterator forward.
int32 prob
Log posterior probability.
dict_t * dict_retain(dict_t *d)
Retain a pointer to an dict_t.
Definition: dict.c:444
POCKETSPHINX_EXPORT int32 ps_get_prob(ps_decoder_t *ps, char const **out_uttid)
Get posterior probability.
Definition: pocketsphinx.c:897
char const * word
Word string (pointer into dictionary hash)
int acmod_set_senfh(acmod_t *acmod, FILE *logfh)
Start logging senone scores to a filehandle.
Definition: acmod.c:374
acmod_t * acmod
Acoustic model.
ps_search_t * search
Currently active search module.
POCKETSPHINX_EXPORT ngram_model_t * ps_update_lmset(ps_decoder_t *ps, ngram_model_t *lmset)
Update the language model set object for this decoder.
Definition: pocketsphinx.c:389
Lexicon tree based Viterbi search.
int acmod_set_rawfh(acmod_t *acmod, FILE *logfh)
Start logging raw audio to a filehandle.
Definition: acmod.c:397
int dict_write(dict_t *dict, char const *filename, char const *format)
Write dictionary to a file.
Definition: dict.c:220
POCKETSPHINX_EXPORT int ps_start_utt(ps_decoder_t *ps, char const *uttid)
Start utterance processing.
Definition: pocketsphinx.c:621
logmath_t * lmath
Log math computation.
int32 start_wid
Start word ID.
POCKETSPHINX_EXPORT ps_seg_t * ps_seg_next(ps_seg_t *seg)
Get the next segment in a word segmentation.
Definition: pocketsphinx.c:921
char * uttid
Utterance ID for current utterance.
N-Gram search module structure.
Definition: ngram_search.h:197
dict2pid_t * d2p
Dictionary to senone mappings.
Decoder object.
char const * rawlogdir
Log directory for audio files.
POCKETSPHINX_EXPORT int ps_decode_senscr(ps_decoder_t *ps, FILE *senfh, char const *uttid)
Decode a senone score dump file.
Definition: pocketsphinx.c:727
frame_idx_t output_frame
Index of next frame of dynamic features.
Definition: acmod.h:187
POCKETSPHINX_EXPORT int ps_free(ps_decoder_t *ps)
Finalize the decoder.
Definition: pocketsphinx.c:326
int16 s3cipid_t
Size definitions for more semantially meaningful units.
Definition: s3types.h:109
N-Gram based multi-pass search ("FBS")
POCKETSPHINX_EXPORT void ps_seg_free(ps_seg_t *seg)
Finish iterating over a word segmentation early, freeing resources.
Definition: pocketsphinx.c:949
POCKETSPHINX_EXPORT fsg_set_t * ps_update_fsgset(ps_decoder_t *ps)
Update the finite-state grammar set object for this decoder.
Definition: pocketsphinx.c:432
frame_idx_t ef
End frame.
int32 ascr
Acoustic score.
cmd_ln_t * config
Configuration.
int acmod_end_utt(acmod_t *acmod)
Mark the end of an utterance.
Definition: acmod.c:448
POCKETSPHINX_EXPORT ps_decoder_t * ps_retain(ps_decoder_t *ps)
Retain a pointer to the decoder.
Definition: pocketsphinx.c:319
int dict2pid_add_word(dict2pid_t *d2p, int32 wid)
Add a word to the dict2pid structure (after adding it to dict).
Definition: dict2pid.c:298
int acmod_advance(acmod_t *acmod)
Advance the frame index.
Definition: acmod.c:886
uint8 state
State of utterance processing.
Definition: acmod.h:182
int acmod_set_mfcfh(acmod_t *acmod, FILE *logfh)
Start logging MFCCs to a filehandle.
Definition: acmod.c:385
void acmod_free(acmod_t *acmod)
Finalize an acoustic model.
Definition: acmod.c:310
POCKETSPHINX_EXPORT cmd_ln_t * ps_get_config(ps_decoder_t *ps)
Get the configuration object for this decoder.
Definition: pocketsphinx.c:350
glist_t searches
List of search modules.
phone_loop_t * phones
Array of phone arcs.
ps_search_t * phone_loop
Phone loop search for lookahead.
ps_search_t * ngram_search_init(cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize the N-Gram search module.
Definition: ngram_search.c:139
Feature space linear transform structure.
Definition: acmod.h:82
POCKETSPHINX_EXPORT int ps_process_raw(ps_decoder_t *ps, int16 const *data, size_t n_samples, int no_search, int full_utt)
Decode raw audio data.
Definition: pocketsphinx.c:749
a structure for a dictionary.
Definition: dict.h:79
feat_t * fcb
Dynamic feature computation.
Definition: acmod.h:156
Word graph structure used in bestpath/nbest search.
char const * senlogdir
Log directory for senone score files.
ps_searchfuncs_t * vt
V-table of search methods.
ps_astar_t * ps_astar_start(ps_lattice_t *dag, ngram_model_t *lmset, float32 lwf, int sf, int ef, int w1, int w2)
Begin N-Gram based A* search on a word graph.
Definition: ps_lattice.c:1665
uint32 n_frame
Total number of frames processed.
POCKETSPHINX_EXPORT int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config)
Reinitialize the decoder with updated configuration.
Definition: pocketsphinx.c:193
Fast and rough context-independent phoneme loop search.
dict2pid_t * d2p
Dictionary to senone mapping.
dict2pid_t * dict2pid_retain(dict2pid_t *d2p)
Retain a pointer to dict2pid.
Definition: dict2pid.c:500
POCKETSPHINX_EXPORT int ps_end_utt(ps_decoder_t *ps)
End utterance processing.
Definition: pocketsphinx.c:816
int32 post
Utterance posterior probability.
char * hyp_str
Current hypothesis string.
Partial path structure used in N-best (A*) search.
dict_t * dict
Pronunciation dictionary.
int acmod_process_raw(acmod_t *acmod, int16 const **inout_raw, size_t *inout_n_samps, int full_utt)
TODO: Set queue length for utterance processing.
Definition: acmod.c:623
Flat lexicon based Viterbi search.
POCKETSPHINX_EXPORT int ps_add_word(ps_decoder_t *ps, char const *word, char const *phones, int update)
Add a word to the pronunciation dictionary.
Definition: pocketsphinx.c:519
ngram_model_t * lmset
Set of language models.
Definition: ngram_search.h:199
int32 lback
Language model backoff.
POCKETSPHINX_EXPORT void ps_get_all_time(ps_decoder_t *ps, double *out_nspeech, double *out_ncpu, double *out_nwall)
Get overall performance information.
POCKETSPHINX_EXPORT logmath_t * ps_get_logmath(ps_decoder_t *ps)
Get the log-math computation object for this decoder.
Definition: pocketsphinx.c:356
POCKETSPHINX_EXPORT char const * ps_get_uttid(ps_decoder_t *ps)
Get current utterance ID.
Definition: pocketsphinx.c:344
int acmod_start_utt(acmod_t *acmod)
Mark the start of an utterance.
Definition: acmod.c:432
POCKETSPHINX_EXPORT int32 ps_seg_prob(ps_seg_t *seg, int32 *out_ascr, int32 *out_lscr, int32 *out_lback)
Get language, acoustic, and posterior probabilities from a segmentation iterator. ...
Definition: pocketsphinx.c:940
POCKETSPHINX_EXPORT char const * ps_get_hyp_final(ps_decoder_t *ps, int32 *out_is_final)
Get hypothesis string and final flag.
Definition: pocketsphinx.c:885
POCKETSPHINX_EXPORT fsg_set_t * ps_get_fsgset(ps_decoder_t *ps)
Get the finite-state grammar set object for this decoder.
Definition: pocketsphinx.c:423
s3wid_t dict_add_word(dict_t *d, char const *word, s3cipid_t const *p, int32 np)
Add a word with the given ciphone pronunciation list to the dictionary.
Definition: dict.c:80
bin_mdef_t * mdef
Model definition.
Definition: acmod.h:159
ps_latlink_t * last_link
Final link in best path.
dict2pid_t * dict2pid_build(bin_mdef_t *mdef, dict_t *dict)
Build the dict2pid structure for the given model/dictionary.
Definition: dict2pid.c:388
acmod_t * acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
Initialize an acoustic model.
Definition: acmod.c:233
frame_idx_t n_feat_frame
Number of frames active in feat_buf.
Definition: acmod.h:192
POCKETSPHINX_EXPORT fe_t * ps_get_fe(ps_decoder_t *ps)
Get the feature extraction object for this decoder.
Definition: pocketsphinx.c:362
V-table for search algorithm.
ps_search_t * pls
Phoneme loop for lookahead.
ps_seg_t * ps_astar_seg_iter(ps_astar_t *astar, ps_latpath_t *path, float32 lwf)
Get hypothesis segmentation from A* search.
Definition: ps_lattice.c:1851
ps_lattice_t * dag
Current hypothesis word graph.
POCKETSPHINX_EXPORT ps_decoder_t * ps_init(cmd_ln_t *config)
Initialize the decoder from a configuration object.
Definition: pocketsphinx.c:299
Base structure for hypothesis segmentation iterator.
cmd_ln_t * config
Configuration.
#define dict_size(d)
Packaged macro access to dictionary members.
Definition: dict.h:154
int32 score
Exact score from start node up to node->sf.
POCKETSPHINX_EXPORT int ps_lattice_free(ps_lattice_t *dag)
Free a lattice.
Definition: ps_lattice.c:688
POCKETSPHINX_EXPORT int ps_get_n_frames(ps_decoder_t *ps)
Get the number of frames of data searched.
Acoustic model structure.
Definition: acmod.h:148
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:148
void ps_search_deinit(ps_search_t *search)
De-initialize base structure.
int acmod_set_grow(acmod_t *acmod, int grow_feat)
Set memory allocation policy for utterance processing.
Definition: acmod.c:419
POCKETSPHINX_EXPORT char const * ps_seg_word(ps_seg_t *seg)
Get word string from a segmentation iterator.
Definition: pocketsphinx.c:927
uint32 uttno
Utterance counter.
frame_idx_t sf
Start frame.
int pl_window
Window size for phoneme lookahead.
POCKETSPHINX_EXPORT char const * ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score, char const **out_uttid)
Get hypothesis string and path score.
Definition: pocketsphinx.c:872