PocketSphinx  0.6
ngram_search.c
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2008 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
42 /* System headers. */
43 #include <string.h>
44 #include <assert.h>
45 
46 /* SphinxBase headers. */
47 #include <sphinxbase/ckd_alloc.h>
48 #include <sphinxbase/listelem_alloc.h>
49 #include <sphinxbase/err.h>
50 
51 /* Local headers. */
52 #include "pocketsphinx_internal.h"
53 #include "ps_lattice_internal.h"
54 #include "ngram_search.h"
55 #include "ngram_search_fwdtree.h"
56 #include "ngram_search_fwdflat.h"
57 
58 static int ngram_search_start(ps_search_t *search);
59 static int ngram_search_step(ps_search_t *search, int frame_idx);
60 static int ngram_search_finish(ps_search_t *search);
61 static int ngram_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p);
62 static char const *ngram_search_hyp(ps_search_t *search, int32 *out_score, int32 *out_is_final);
63 static int32 ngram_search_prob(ps_search_t *search);
64 static ps_seg_t *ngram_search_seg_iter(ps_search_t *search, int32 *out_score);
65 
66 static ps_searchfuncs_t ngram_funcs = {
67  /* name: */ "ngram",
68  /* start: */ ngram_search_start,
69  /* step: */ ngram_search_step,
70  /* finish: */ ngram_search_finish,
71  /* reinit: */ ngram_search_reinit,
72  /* free: */ ngram_search_free,
73  /* lattice: */ ngram_search_lattice,
74  /* hyp: */ ngram_search_hyp,
75  /* prob: */ ngram_search_prob,
76  /* seg_iter: */ ngram_search_seg_iter,
77 };
78 
79 static void
80 ngram_search_update_widmap(ngram_search_t *ngs)
81 {
82  const char **words;
83  int32 i, n_words;
84 
85  /* It's okay to include fillers since they won't be in the LM */
86  n_words = ps_search_n_words(ngs);
87  words = ckd_calloc(n_words, sizeof(*words));
88  /* This will include alternates, again, that's okay since they aren't in the LM */
89  for (i = 0; i < n_words; ++i)
90  words[i] = (const char *)dict_wordstr(ps_search_dict(ngs), i);
91  ngram_model_set_map_words(ngs->lmset, words, n_words);
92  ckd_free(words);
93 }
94 
95 static void
96 ngram_search_calc_beams(ngram_search_t *ngs)
97 {
98  cmd_ln_t *config;
99  acmod_t *acmod;
100 
101  config = ps_search_config(ngs);
102  acmod = ps_search_acmod(ngs);
103 
104  /* Log beam widths. */
105  ngs->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-beam"))>>SENSCR_SHIFT;
106  ngs->wbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-wbeam"))>>SENSCR_SHIFT;
107  ngs->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pbeam"))>>SENSCR_SHIFT;
108  ngs->lpbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-lpbeam"))>>SENSCR_SHIFT;
109  ngs->lponlybeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-lponlybeam"))>>SENSCR_SHIFT;
110  ngs->fwdflatbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-fwdflatbeam"))>>SENSCR_SHIFT;
111  ngs->fwdflatwbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-fwdflatwbeam"))>>SENSCR_SHIFT;
112 
113  /* Absolute pruning parameters. */
114  ngs->maxwpf = cmd_ln_int32_r(config, "-maxwpf");
115  ngs->maxhmmpf = cmd_ln_int32_r(config, "-maxhmmpf");
116 
117  /* Various penalties which may or may not be useful. */
118  ngs->wip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-wip")) >>SENSCR_SHIFT;
119  ngs->nwpen = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-nwpen")) >>SENSCR_SHIFT;
120  ngs->pip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pip")) >>SENSCR_SHIFT;
121  ngs->silpen = ngs->pip
122  + (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-silprob"))>>SENSCR_SHIFT);
123  ngs->fillpen = ngs->pip
124  + (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-fillprob"))>>SENSCR_SHIFT);
125 
126  /* Language weight ratios for fwdflat and bestpath search. */
127  ngs->fwdflat_fwdtree_lw_ratio =
128  cmd_ln_float32_r(config, "-fwdflatlw")
129  / cmd_ln_float32_r(config, "-lw");
130  ngs->bestpath_fwdtree_lw_ratio =
131  cmd_ln_float32_r(config, "-bestpathlw")
132  / cmd_ln_float32_r(config, "-lw");
133 
134  /* Acoustic score scale for posterior probabilities. */
135  ngs->ascale = 1.0 / cmd_ln_float32_r(config, "-ascale");
136 }
137 
138 ps_search_t *
139 ngram_search_init(cmd_ln_t *config,
140  acmod_t *acmod,
141  dict_t *dict,
142  dict2pid_t *d2p)
143 {
144  ngram_search_t *ngs;
145  const char *path;
146 
147  ngs = ckd_calloc(1, sizeof(*ngs));
148  ps_search_init(&ngs->base, &ngram_funcs, config, acmod, dict, d2p);
149  ngs->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
150  acmod->tmat->tp, NULL, acmod->mdef->sseq);
151  if (ngs->hmmctx == NULL) {
152  ps_search_free(ps_search_base(ngs));
153  return NULL;
154  }
155  ngs->chan_alloc = listelem_alloc_init(sizeof(chan_t));
156  ngs->root_chan_alloc = listelem_alloc_init(sizeof(root_chan_t));
157  ngs->latnode_alloc = listelem_alloc_init(sizeof(ps_latnode_t));
158 
159  /* Calculate various beam widths and such. */
160  ngram_search_calc_beams(ngs);
161 
162  /* Allocate a billion different tables for stuff. */
163  ngs->word_chan = ckd_calloc(dict_size(dict),
164  sizeof(*ngs->word_chan));
165  ngs->word_lat_idx = ckd_calloc(dict_size(dict),
166  sizeof(*ngs->word_lat_idx));
167  ngs->word_active = bitvec_alloc(dict_size(dict));
168  ngs->last_ltrans = ckd_calloc(dict_size(dict),
169  sizeof(*ngs->last_ltrans));
170 
171  /* FIXME: All these structures need to be made dynamic with
172  * garbage collection. */
173  ngs->bp_table_size = cmd_ln_int32_r(config, "-latsize");
174  ngs->bp_table = ckd_calloc(ngs->bp_table_size,
175  sizeof(*ngs->bp_table));
176  /* FIXME: This thing is frickin' huge. */
177  ngs->bscore_stack_size = ngs->bp_table_size * 20;
178  ngs->bscore_stack = ckd_calloc(ngs->bscore_stack_size,
179  sizeof(*ngs->bscore_stack));
180  ngs->n_frame_alloc = 256;
181  ngs->bp_table_idx = ckd_calloc(ngs->n_frame_alloc + 1,
182  sizeof(*ngs->bp_table_idx));
183  ++ngs->bp_table_idx; /* Make bptableidx[-1] valid */
184 
185  /* Allocate active word list array */
186  ngs->active_word_list = ckd_calloc_2d(2, dict_size(dict),
187  sizeof(**ngs->active_word_list));
188 
189  /* Load language model(s) */
190  if ((path = cmd_ln_str_r(config, "-lmctl"))) {
191  ngs->lmset = ngram_model_set_read(config, path, acmod->lmath);
192  if (ngs->lmset == NULL) {
193  E_ERROR("Failed to read language model control file: %s\n",
194  path);
195  goto error_out;
196  }
197  /* Set the default language model if needed. */
198  if ((path = cmd_ln_str_r(config, "-lmname"))) {
199  ngram_model_set_select(ngs->lmset, path);
200  }
201  }
202  else if ((path = cmd_ln_str_r(config, "-lm"))) {
203  static const char *name = "default";
204  ngram_model_t *lm;
205 
206  lm = ngram_model_read(config, path, NGRAM_AUTO, acmod->lmath);
207  if (lm == NULL) {
208  E_ERROR("Failed to read language model file: %s\n", path);
209  goto error_out;
210  }
211  ngs->lmset = ngram_model_set_init(config,
212  &lm, (char **)&name,
213  NULL, 1);
214  if (ngs->lmset == NULL) {
215  E_ERROR("Failed to initialize language model set\n");
216  goto error_out;
217  }
218  }
219  if (ngs->lmset != NULL
220  && ngram_wid(ngs->lmset, S3_FINISH_WORD) == ngram_unknown_wid(ngs->lmset)) {
221  E_ERROR("Language model/set does not contain </s>, recognition will fail\n");
222  goto error_out;
223  }
224 
225  /* Create word mappings. */
226  ngram_search_update_widmap(ngs);
227 
228  /* Initialize fwdtree, fwdflat, bestpath modules if necessary. */
229  if (cmd_ln_boolean_r(config, "-fwdtree")) {
230  ngram_fwdtree_init(ngs);
231  ngs->fwdtree = TRUE;
232  ngs->fwdtree_perf.name = "fwdtree";
233  ptmr_init(&ngs->fwdtree_perf);
234  }
235  if (cmd_ln_boolean_r(config, "-fwdflat")) {
236  ngram_fwdflat_init(ngs);
237  ngs->fwdflat = TRUE;
238  ngs->fwdflat_perf.name = "fwdflat";
239  ptmr_init(&ngs->fwdflat_perf);
240  }
241  if (cmd_ln_boolean_r(config, "-bestpath")) {
242  ngs->bestpath = TRUE;
243  ngs->bestpath_perf.name = "bestpath";
244  ptmr_init(&ngs->bestpath_perf);
245  }
246 
247  return (ps_search_t *)ngs;
248 
249 error_out:
251  return NULL;
252 }
253 
254 static int
255 ngram_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
256 {
257  ngram_search_t *ngs = (ngram_search_t *)search;
258  int old_n_words;
259  int rv = 0;
260 
261  /* Update the number of words. */
262  old_n_words = search->n_words;
263  if (old_n_words != dict_size(dict)) {
264  search->n_words = dict_size(dict);
265  /* Reallocate these temporary arrays. */
266  ckd_free(ngs->word_lat_idx);
267  ckd_free(ngs->word_active);
268  ckd_free(ngs->last_ltrans);
269  ckd_free_2d(ngs->active_word_list);
270  ngs->word_lat_idx = ckd_calloc(search->n_words, sizeof(*ngs->word_lat_idx));
271  ngs->word_active = bitvec_alloc(search->n_words);
272  ngs->last_ltrans = ckd_calloc(search->n_words, sizeof(*ngs->last_ltrans));
273  ngs->active_word_list
274  = ckd_calloc_2d(2, search->n_words,
275  sizeof(**ngs->active_word_list));
276  }
277 
278  /* Free old dict2pid, dict */
279  ps_search_base_reinit(search, dict, d2p);
280 
281  if (ngs->lmset == NULL)
282  return 0;
283 
284  /* Update beam widths. */
285  ngram_search_calc_beams(ngs);
286 
287  /* Update word mappings. */
288  ngram_search_update_widmap(ngs);
289 
290  /* Now rebuild lextrees. */
291  if (ngs->fwdtree) {
292  if ((rv = ngram_fwdtree_reinit(ngs)) < 0)
293  return rv;
294  }
295  if (ngs->fwdflat) {
296  if ((rv = ngram_fwdflat_reinit(ngs)) < 0)
297  return rv;
298  }
299 
300  return rv;
301 }
302 
303 void
305 {
306  ngram_search_t *ngs = (ngram_search_t *)search;
307 
308  ps_search_deinit(search);
309  if (ngs->fwdtree)
311  if (ngs->fwdflat)
313  if (ngs->bestpath) {
314  double n_speech = (double)ngs->n_tot_frame
315  / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
316 
317  E_INFO("TOTAL bestpath %.2f CPU %.3f xRT\n",
318  ngs->bestpath_perf.t_tot_cpu,
319  ngs->bestpath_perf.t_tot_cpu / n_speech);
320  E_INFO("TOTAL bestpath %.2f wall %.3f xRT\n",
321  ngs->bestpath_perf.t_tot_elapsed,
322  ngs->bestpath_perf.t_tot_elapsed / n_speech);
323  }
324 
325  hmm_context_free(ngs->hmmctx);
326  listelem_alloc_free(ngs->chan_alloc);
327  listelem_alloc_free(ngs->root_chan_alloc);
328  listelem_alloc_free(ngs->latnode_alloc);
329  ngram_model_free(ngs->lmset);
330 
331  ckd_free(ngs->word_chan);
332  ckd_free(ngs->word_lat_idx);
333  bitvec_free(ngs->word_active);
334  ckd_free(ngs->bp_table);
335  ckd_free(ngs->bscore_stack);
336  if (ngs->bp_table_idx != NULL)
337  ckd_free(ngs->bp_table_idx - 1);
338  ckd_free_2d(ngs->active_word_list);
339  ckd_free(ngs->last_ltrans);
340  ckd_free(ngs);
341 }
342 
343 int
345 {
346  if (frame_idx >= ngs->n_frame_alloc) {
347  ngs->n_frame_alloc *= 2;
348  ngs->bp_table_idx = ckd_realloc(ngs->bp_table_idx - 1,
349  (ngs->n_frame_alloc + 1)
350  * sizeof(*ngs->bp_table_idx));
351  if (ngs->frm_wordlist) {
352  ngs->frm_wordlist = ckd_realloc(ngs->frm_wordlist,
353  ngs->n_frame_alloc
354  * sizeof(*ngs->frm_wordlist));
355  }
356  ++ngs->bp_table_idx; /* Make bptableidx[-1] valid */
357  }
358  ngs->bp_table_idx[frame_idx] = ngs->bpidx;
359  return ngs->bpidx;
360 }
361 
362 static void
363 set_real_wid(ngram_search_t *ngs, int32 bp)
364 {
365  bptbl_t *ent, *prev;
366 
367  assert(bp != NO_BP);
368  ent = ngs->bp_table + bp;
369  if (ent->bp == NO_BP)
370  prev = NULL;
371  else
372  prev = ngs->bp_table + ent->bp;
373 
374  /* Propagate lm state for fillers, rotate it for words. */
375  if (dict_filler_word(ps_search_dict(ngs), ent->wid)) {
376  if (prev != NULL) {
377  ent->real_wid = prev->real_wid;
378  ent->prev_real_wid = prev->prev_real_wid;
379  }
380  else {
381  ent->real_wid = dict_basewid(ps_search_dict(ngs),
382  ent->wid);
383  ent->prev_real_wid = BAD_S3WID;
384  }
385  }
386  else {
387  ent->real_wid = dict_basewid(ps_search_dict(ngs), ent->wid);
388  if (prev != NULL)
389  ent->prev_real_wid = prev->real_wid;
390  else
391  ent->prev_real_wid = BAD_S3WID;
392  }
393 }
394 
395 #define NGRAM_HISTORY_LONG_WORD 2000 /* 20s */
396 
397 void
399  int32 w, int32 score, int32 path, int32 rc)
400 {
401  int32 bp;
402 
403  /* Look for an existing exit for this word in this frame. The
404  * only reason one would exist is from a different right context
405  * triphone, but of course that happens quite frequently. */
406  bp = ngs->word_lat_idx[w];
407  if (bp != NO_BP) {
408 
409  if (frame_idx - ngs->bp_table[path].frame > NGRAM_HISTORY_LONG_WORD) {
410  E_WARN("Word '%s' survived for %d frames, potential overpruning\n", dict_wordstr(ps_search_dict(ngs), w),
411  frame_idx - ngs->bp_table[path].frame);
412  }
413 
414  /* Keep only the best scoring one, we will reconstruct the
415  * others from the right context scores - usually the history
416  * is not lost. */
417  if (ngs->bp_table[bp].score WORSE_THAN score) {
418  assert(path != bp); /* Pathological. */
419  if (ngs->bp_table[bp].bp != path) {
420  int32 bplh[2], newlh[2];
421  /* But, sometimes, the history *is* lost. If we wanted to
422  * do exact language model scoring we'd have to preserve
423  * these alternate histories. */
424  E_DEBUG(2,("Updating path history %d => %d frame %d\n",
425  ngs->bp_table[bp].bp, path, frame_idx));
426  bplh[0] = ngs->bp_table[bp].bp == -1
427  ? -1 : ngs->bp_table[ngs->bp_table[bp].bp].prev_real_wid;
428  bplh[1] = ngs->bp_table[bp].bp == -1
429  ? -1 : ngs->bp_table[ngs->bp_table[bp].bp].real_wid;
430  newlh[0] = path == -1
431  ? -1 : ngs->bp_table[path].prev_real_wid;
432  newlh[1] = path == -1
433  ? -1 : ngs->bp_table[path].real_wid;
434  /* Actually it's worth checking how often the actual
435  * language model state changes. */
436  if (bplh[0] != newlh[0] || bplh[1] != newlh[1]) {
437  /* It's fairly rare that the actual language model
438  * state changes, but it does happen some
439  * times. */
440  E_DEBUG(1, ("Updating language model state %s,%s => %s,%s frame %d\n",
441  dict_wordstr(ps_search_dict(ngs), bplh[0]),
442  dict_wordstr(ps_search_dict(ngs), bplh[1]),
443  dict_wordstr(ps_search_dict(ngs), newlh[0]),
444  dict_wordstr(ps_search_dict(ngs), newlh[1]),
445  frame_idx));
446  set_real_wid(ngs, bp);
447  }
448  ngs->bp_table[bp].bp = path;
449  }
450  ngs->bp_table[bp].score = score;
451  }
452  /* But do keep track of scores for all right contexts, since
453  * we need them to determine the starting path scores for any
454  * successors of this word exit. */
455  if (ngs->bp_table[bp].s_idx != -1)
456  ngs->bscore_stack[ngs->bp_table[bp].s_idx + rc] = score;
457  }
458  else {
459  int32 i, rcsize;
460  bptbl_t *be;
461 
462  /* This might happen if recognition fails. */
463  if (ngs->bpidx == NO_BP) {
464  E_ERROR("No entries in backpointer table!");
465  return;
466  }
467 
468  /* Expand the backpointer tables if necessary. */
469  if (ngs->bpidx >= ngs->bp_table_size) {
470  ngs->bp_table_size *= 2;
471  ngs->bp_table = ckd_realloc(ngs->bp_table,
472  ngs->bp_table_size
473  * sizeof(*ngs->bp_table));
474  E_INFO("Resized backpointer table to %d entries\n", ngs->bp_table_size);
475  }
476  if (ngs->bss_head >= ngs->bscore_stack_size
477  - bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef)) {
478  ngs->bscore_stack_size *= 2;
479  ngs->bscore_stack = ckd_realloc(ngs->bscore_stack,
480  ngs->bscore_stack_size
481  * sizeof(*ngs->bscore_stack));
482  E_INFO("Resized score stack to %d entries\n", ngs->bscore_stack_size);
483  }
484 
485  ngs->word_lat_idx[w] = ngs->bpidx;
486  be = &(ngs->bp_table[ngs->bpidx]);
487  be->wid = w;
488  be->frame = frame_idx;
489  be->bp = path;
490  be->score = score;
491  be->s_idx = ngs->bss_head;
492  be->valid = TRUE;
493  assert(path != ngs->bpidx);
494 
495  /* DICT2PID */
496  /* Get diphone ID for final phone and number of ssids corresponding to it. */
497  be->last_phone = dict_last_phone(ps_search_dict(ngs),w);
498  if (dict_is_single_phone(ps_search_dict(ngs), w)) {
499  be->last2_phone = -1;
500  be->s_idx = -1;
501  rcsize = 0;
502  }
503  else {
504  be->last2_phone = dict_second_last_phone(ps_search_dict(ngs),w);
505  rcsize = dict2pid_rssid(ps_search_dict2pid(ngs),
506  be->last_phone, be->last2_phone)->n_ssid;
507  }
508  /* Allocate some space on the bscore_stack for all of these triphones. */
509  for (i = 0; i < rcsize; ++i)
510  ngs->bscore_stack[ngs->bss_head + i] = WORST_SCORE;
511  if (rcsize)
512  ngs->bscore_stack[ngs->bss_head + rc] = score;
513  set_real_wid(ngs, ngs->bpidx);
514 
515  ngs->bpidx++;
516  ngs->bss_head += rcsize;
517  }
518 }
519 
520 int
521 ngram_search_find_exit(ngram_search_t *ngs, int frame_idx, int32 *out_best_score, int32 *out_is_final)
522 {
523  /* End of backpointers for this frame. */
524  int end_bpidx;
525  int best_exit, bp;
526  int32 best_score;
527 
528  /* No hypothesis means no exit node! */
529  if (ngs->n_frame == 0)
530  return NO_BP;
531 
532  if (frame_idx == -1 || frame_idx >= ngs->n_frame)
533  frame_idx = ngs->n_frame - 1;
534  end_bpidx = ngs->bp_table_idx[frame_idx];
535 
536  best_score = WORST_SCORE;
537  best_exit = NO_BP;
538 
539  /* Scan back to find a frame with some backpointers in it. */
540  while (frame_idx >= 0 && ngs->bp_table_idx[frame_idx] == end_bpidx)
541  --frame_idx;
542  /* This is NOT an error, it just means there is no hypothesis yet. */
543  if (frame_idx < 0)
544  return NO_BP;
545 
546  /* Now find the entry for </s> OR the best scoring entry. */
547  assert(end_bpidx < ngs->bp_table_size);
548  for (bp = ngs->bp_table_idx[frame_idx]; bp < end_bpidx; ++bp) {
549  if (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs)
550  || ngs->bp_table[bp].score BETTER_THAN best_score) {
551  best_score = ngs->bp_table[bp].score;
552  best_exit = bp;
553  }
554  if (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs))
555  break;
556  }
557 
558  if (out_best_score) {
559  *out_best_score = best_score;
560  }
561  if (out_is_final) {
562  *out_is_final = (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs));
563  }
564  return best_exit;
565 }
566 
567 char const *
569 {
570  ps_search_t *base = ps_search_base(ngs);
571  char *c;
572  size_t len;
573  int bp;
574 
575  if (bpidx == NO_BP)
576  return NULL;
577 
578  bp = bpidx;
579  len = 0;
580  while (bp != NO_BP) {
581  bptbl_t *be = &ngs->bp_table[bp];
582  bp = be->bp;
583  if (dict_real_word(ps_search_dict(ngs), be->wid))
584  len += strlen(dict_basestr(ps_search_dict(ngs), be->wid)) + 1;
585  }
586 
587  ckd_free(base->hyp_str);
588  if (len == 0) {
589  base->hyp_str = NULL;
590  return base->hyp_str;
591  }
592  base->hyp_str = ckd_calloc(1, len);
593 
594  bp = bpidx;
595  c = base->hyp_str + len - 1;
596  while (bp != NO_BP) {
597  bptbl_t *be = &ngs->bp_table[bp];
598  size_t len;
599 
600  bp = be->bp;
601  if (dict_real_word(ps_search_dict(ngs), be->wid)) {
602  len = strlen(dict_basestr(ps_search_dict(ngs), be->wid));
603  c -= len;
604  memcpy(c, dict_basestr(ps_search_dict(ngs), be->wid), len);
605  if (c > base->hyp_str) {
606  --c;
607  *c = ' ';
608  }
609  }
610  }
611 
612  return base->hyp_str;
613 }
614 
615 void
617 {
618  chan_t *hmm, *thmm;
619  xwdssid_t *rssid;
620  int32 i, tmatid, ciphone;
621 
622  /* DICT2PID */
623  /* Get pointer to array of triphones for final diphone. */
624  assert(!dict_is_single_phone(ps_search_dict(ngs), w));
625  ciphone = dict_last_phone(ps_search_dict(ngs),w);
626  rssid = dict2pid_rssid(ps_search_dict2pid(ngs),
627  ciphone,
628  dict_second_last_phone(ps_search_dict(ngs),w));
629  tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ciphone);
630  hmm = ngs->word_chan[w];
631  if ((hmm == NULL) || (hmm_nonmpx_ssid(&hmm->hmm) != rssid->ssid[0])) {
632  hmm = listelem_malloc(ngs->chan_alloc);
633  hmm->next = ngs->word_chan[w];
634  ngs->word_chan[w] = hmm;
635 
636  hmm->info.rc_id = 0;
637  hmm->ciphone = ciphone;
638  hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, rssid->ssid[0], tmatid);
639  E_DEBUG(3,("allocated rc_id 0 ssid %d ciphone %d lc %d word %s\n",
640  rssid->ssid[0], hmm->ciphone,
641  dict_second_last_phone(ps_search_dict(ngs),w),
642  dict_wordstr(ps_search_dict(ngs),w)));
643  }
644  for (i = 1; i < rssid->n_ssid; ++i) {
645  if ((hmm->next == NULL) || (hmm_nonmpx_ssid(&hmm->next->hmm) != rssid->ssid[i])) {
646  thmm = listelem_malloc(ngs->chan_alloc);
647  thmm->next = hmm->next;
648  hmm->next = thmm;
649  hmm = thmm;
650 
651  hmm->info.rc_id = i;
652  hmm->ciphone = ciphone;
653  hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, rssid->ssid[i], tmatid);
654  E_DEBUG(3,("allocated rc_id %d ssid %d ciphone %d lc %d word %s\n",
655  i, rssid->ssid[i], hmm->ciphone,
656  dict_second_last_phone(ps_search_dict(ngs),w),
657  dict_wordstr(ps_search_dict(ngs),w)));
658  }
659  else
660  hmm = hmm->next;
661  }
662 }
663 
664 void
666 {
667  chan_t *hmm, *thmm;
668 
669  for (hmm = ngs->word_chan[w]; hmm; hmm = thmm) {
670  thmm = hmm->next;
671  hmm_deinit(&hmm->hmm);
672  listelem_free(ngs->chan_alloc, hmm);
673  }
674  ngs->word_chan[w] = NULL;
675 }
676 
677 int32
679 {
680  /* DICT2PID */
681  /* Get the mapping from right context phone ID to index in the
682  * right context table and the bscore_stack. */
683  if (pbe->last2_phone == -1) {
684  /* No right context for single phone predecessor words. */
685  return pbe->score;
686  }
687  else {
688  xwdssid_t *rssid;
689  /* Find the index for the last diphone of the previous word +
690  * the first phone of the current word. */
691  rssid = dict2pid_rssid(ps_search_dict2pid(ngs),
692  pbe->last_phone, pbe->last2_phone);
693  /* This may be WORST_SCORE, which means that there was no exit
694  * with rcphone as right context. */
695  return ngs->bscore_stack[pbe->s_idx + rssid->cimap[rcphone]];
696  }
697 }
698 
699 /*
700  * Compute acoustic and LM scores for a BPTable entry (segment).
701  */
702 void
703 ngram_compute_seg_score(ngram_search_t *ngs, bptbl_t *be, float32 lwf,
704  int32 *out_ascr, int32 *out_lscr)
705 {
706  bptbl_t *pbe;
707  int32 start_score;
708 
709  /* Start of utterance. */
710  if (be->bp == NO_BP) {
711  *out_ascr = be->score;
712  *out_lscr = 0;
713  return;
714  }
715 
716  /* Otherwise, calculate lscr and ascr. */
717  pbe = ngs->bp_table + be->bp;
718  start_score = ngram_search_exit_score(ngs, pbe,
719  dict_first_phone(ps_search_dict(ngs),be->wid));
720  assert(start_score BETTER_THAN WORST_SCORE);
721 
722  /* FIXME: These result in positive acoustic scores when filler
723  words have non-filler pronunciations. That whole business
724  is still pretty much broken but at least it doesn't
725  segfault. */
726  if (be->wid == ps_search_silence_wid(ngs)) {
727  *out_lscr = ngs->silpen;
728  }
729  else if (dict_filler_word(ps_search_dict(ngs), be->wid)) {
730  *out_lscr = ngs->fillpen;
731  }
732  else {
733  int32 n_used;
734  *out_lscr = ngram_tg_score(ngs->lmset,
735  be->real_wid,
736  pbe->real_wid,
737  pbe->prev_real_wid,
738  &n_used)>>SENSCR_SHIFT;
739  *out_lscr = *out_lscr * lwf;
740  }
741  *out_ascr = be->score - start_score - *out_lscr;
742 }
743 
744 static int
745 ngram_search_start(ps_search_t *search)
746 {
747  ngram_search_t *ngs = (ngram_search_t *)search;
748 
749  ngs->done = FALSE;
750  ngram_model_flush(ngs->lmset);
751  if (ngs->fwdtree)
752  ngram_fwdtree_start(ngs);
753  else if (ngs->fwdflat)
754  ngram_fwdflat_start(ngs);
755  else
756  return -1;
757  return 0;
758 }
759 
760 static int
761 ngram_search_step(ps_search_t *search, int frame_idx)
762 {
763  ngram_search_t *ngs = (ngram_search_t *)search;
764 
765  if (ngs->fwdtree)
766  return ngram_fwdtree_search(ngs, frame_idx);
767  else if (ngs->fwdflat)
768  return ngram_fwdflat_search(ngs, frame_idx);
769  else
770  return -1;
771 }
772 
773 void
774 dump_bptable(ngram_search_t *ngs)
775 {
776  int i;
777  E_INFO("Backpointer table (%d entries):\n", ngs->bpidx);
778  for (i = 0; i < ngs->bpidx; ++i) {
779  bptbl_t *bpe = ngs->bp_table + i;
780  int j, rcsize;
781 
782  E_INFO_NOFN("%-5d %-10s start %-3d end %-3d score %-8d bp %-3d real_wid %-5d prev_real_wid %-5d",
783  i, dict_wordstr(ps_search_dict(ngs), bpe->wid),
784  (bpe->bp == -1
785  ? 0 : ngs->bp_table[bpe->bp].frame + 1),
786  bpe->frame, bpe->score, bpe->bp,
787  bpe->real_wid, bpe->prev_real_wid);
788 
789  if (bpe->last2_phone == -1)
790  rcsize = 0;
791  else
792  rcsize = dict2pid_rssid(ps_search_dict2pid(ngs),
793  bpe->last_phone, bpe->last2_phone)->n_ssid;
794  if (rcsize) {
795  E_INFOCONT("\tbss");
796  for (j = 0; j < rcsize; ++j)
797  if (ngs->bscore_stack[bpe->s_idx + j] != WORST_SCORE)
798  E_INFOCONT(" %d", bpe->score - ngs->bscore_stack[bpe->s_idx + j]);
799  }
800  E_INFOCONT("\n");
801  }
802 }
803 
804 static int
805 ngram_search_finish(ps_search_t *search)
806 {
807  ngram_search_t *ngs = (ngram_search_t *)search;
808 
809  ngs->n_tot_frame += ngs->n_frame;
810  if (ngs->fwdtree) {
812  /* dump_bptable(ngs); */
813 
814  /* Now do fwdflat search in its entirety, if requested. */
815  if (ngs->fwdflat) {
816  int i;
817  /* Rewind the acoustic model. */
818  if (acmod_rewind(ps_search_acmod(ngs)) < 0)
819  return -1;
820  /* Now redo search. */
821  ngram_fwdflat_start(ngs);
822  i = 0;
823  while (ps_search_acmod(ngs)->n_feat_frame > 0) {
824  int nfr;
825  if ((nfr = ngram_fwdflat_search(ngs, i)) < 0)
826  return nfr;
827  acmod_advance(ps_search_acmod(ngs));
828  ++i;
829  }
831  /* And now, we should have a result... */
832  /* dump_bptable(ngs); */
833  }
834  }
835  else if (ngs->fwdflat) {
837  }
838 
839  /* Mark the current utterance as done. */
840  ngs->done = TRUE;
841  return 0;
842 }
843 
844 static ps_latlink_t *
845 ngram_search_bestpath(ps_search_t *search, int32 *out_score, int backward)
846 {
847  ngram_search_t *ngs = (ngram_search_t *)search;
848 
849  if (search->last_link == NULL) {
850  search->last_link = ps_lattice_bestpath(search->dag, ngs->lmset,
851  ngs->bestpath_fwdtree_lw_ratio,
852  ngs->ascale);
853  if (search->last_link == NULL)
854  return NULL;
855  /* Also calculate betas so we can fill in the posterior
856  * probability field in the segmentation. */
857  if (search->post == 0)
858  search->post = ps_lattice_posterior(search->dag, ngs->lmset,
859  ngs->ascale);
860  }
861  if (out_score)
862  *out_score = search->last_link->path_scr + search->dag->final_node_ascr;
863  return search->last_link;
864 }
865 
866 static char const *
867 ngram_search_hyp(ps_search_t *search, int32 *out_score, int32 *out_is_final)
868 {
869  ngram_search_t *ngs = (ngram_search_t *)search;
870 
871  /* Only do bestpath search if the utterance is complete. */
872  if (ngs->bestpath && ngs->done) {
873  ps_lattice_t *dag;
874  ps_latlink_t *link;
875  char const *hyp;
876  double n_speech;
877 
878  ptmr_reset(&ngs->bestpath_perf);
879  ptmr_start(&ngs->bestpath_perf);
880  if ((dag = ngram_search_lattice(search)) == NULL)
881  return NULL;
882  if ((link = ngram_search_bestpath(search, out_score, FALSE)) == NULL)
883  return NULL;
884  hyp = ps_lattice_hyp(dag, link);
885  ptmr_stop(&ngs->bestpath_perf);
886  n_speech = (double)dag->n_frames
887  / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
888  E_INFO("bestpath %.2f CPU %.3f xRT\n",
889  ngs->bestpath_perf.t_cpu,
890  ngs->bestpath_perf.t_cpu / n_speech);
891  E_INFO("bestpath %.2f wall %.3f xRT\n",
892  ngs->bestpath_perf.t_elapsed,
893  ngs->bestpath_perf.t_elapsed / n_speech);
894  return hyp;
895  }
896  else {
897  int32 bpidx;
898 
899  /* fwdtree and fwdflat use same backpointer table. */
900  bpidx = ngram_search_find_exit(ngs, -1, out_score, out_is_final);
901  if (bpidx != NO_BP)
902  return ngram_search_bp_hyp(ngs, bpidx);
903  }
904 
905  return NULL;
906 }
907 
908 static void
909 ngram_search_bp2itor(ps_seg_t *seg, int bp)
910 {
911  ngram_search_t *ngs = (ngram_search_t *)seg->search;
912  bptbl_t *be, *pbe;
913 
914  be = &ngs->bp_table[bp];
915  pbe = be->bp == -1 ? NULL : &ngs->bp_table[be->bp];
916  seg->word = dict_wordstr(ps_search_dict(ngs), be->wid);
917  seg->ef = be->frame;
918  seg->sf = pbe ? pbe->frame + 1 : 0;
919  seg->prob = 0; /* Bogus value... */
920  /* Compute acoustic and LM scores for this segment. */
921  if (pbe == NULL) {
922  seg->ascr = be->score;
923  seg->lscr = 0;
924  seg->lback = 0;
925  }
926  else {
927  int32 start_score;
928 
929  /* Find ending path score of previous word. */
930  start_score = ngram_search_exit_score(ngs, pbe,
931  dict_first_phone(ps_search_dict(ngs), be->wid));
932  assert(start_score BETTER_THAN WORST_SCORE);
933  if (be->wid == ps_search_silence_wid(ngs)) {
934  seg->lscr = ngs->silpen;
935  }
936  else if (dict_filler_word(ps_search_dict(ngs), be->wid)) {
937  seg->lscr = ngs->fillpen;
938  }
939  else {
940  seg->lscr = ngram_tg_score(ngs->lmset,
941  be->real_wid,
942  pbe->real_wid,
943  pbe->prev_real_wid,
944  &seg->lback)>>SENSCR_SHIFT;
945  seg->lscr = (int32)(seg->lscr * seg->lwf);
946  }
947  seg->ascr = be->score - start_score - seg->lscr;
948  }
949 }
950 
951 static void
952 ngram_bp_seg_free(ps_seg_t *seg)
953 {
954  bptbl_seg_t *itor = (bptbl_seg_t *)seg;
955 
956  ckd_free(itor->bpidx);
957  ckd_free(itor);
958 }
959 
960 static ps_seg_t *
961 ngram_bp_seg_next(ps_seg_t *seg)
962 {
963  bptbl_seg_t *itor = (bptbl_seg_t *)seg;
964 
965  if (++itor->cur == itor->n_bpidx) {
966  ngram_bp_seg_free(seg);
967  return NULL;
968  }
969 
970  ngram_search_bp2itor(seg, itor->bpidx[itor->cur]);
971  return seg;
972 }
973 
974 static ps_segfuncs_t ngram_bp_segfuncs = {
975  /* seg_next */ ngram_bp_seg_next,
976  /* seg_free */ ngram_bp_seg_free
977 };
978 
979 static ps_seg_t *
980 ngram_search_bp_iter(ngram_search_t *ngs, int bpidx, float32 lwf)
981 {
982  bptbl_seg_t *itor;
983  int bp, cur;
984 
985  /* Calling this an "iterator" is a bit of a misnomer since we have
986  * to get the entire backtrace in order to produce it. On the
987  * other hand, all we actually need is the bptbl IDs, and we can
988  * allocate a fixed-size array of them. */
989  itor = ckd_calloc(1, sizeof(*itor));
990  itor->base.vt = &ngram_bp_segfuncs;
991  itor->base.search = ps_search_base(ngs);
992  itor->base.lwf = lwf;
993  itor->n_bpidx = 0;
994  bp = bpidx;
995  while (bp != NO_BP) {
996  bptbl_t *be = &ngs->bp_table[bp];
997  bp = be->bp;
998  ++itor->n_bpidx;
999  }
1000  if (itor->n_bpidx == 0) {
1001  ckd_free(itor);
1002  return NULL;
1003  }
1004  itor->bpidx = ckd_calloc(itor->n_bpidx, sizeof(*itor->bpidx));
1005  cur = itor->n_bpidx - 1;
1006  bp = bpidx;
1007  while (bp != NO_BP) {
1008  bptbl_t *be = &ngs->bp_table[bp];
1009  itor->bpidx[cur] = bp;
1010  bp = be->bp;
1011  --cur;
1012  }
1013 
1014  /* Fill in relevant fields for first element. */
1015  ngram_search_bp2itor((ps_seg_t *)itor, itor->bpidx[0]);
1016 
1017  return (ps_seg_t *)itor;
1018 }
1019 
1020 static ps_seg_t *
1021 ngram_search_seg_iter(ps_search_t *search, int32 *out_score)
1022 {
1023  ngram_search_t *ngs = (ngram_search_t *)search;
1024 
1025  /* Only do bestpath search if the utterance is done. */
1026  if (ngs->bestpath && ngs->done) {
1027  ps_lattice_t *dag;
1028  ps_latlink_t *link;
1029  double n_speech;
1030  ps_seg_t *itor;
1031 
1032  ptmr_reset(&ngs->bestpath_perf);
1033  ptmr_start(&ngs->bestpath_perf);
1034  if ((dag = ngram_search_lattice(search)) == NULL)
1035  return NULL;
1036  if ((link = ngram_search_bestpath(search, out_score, TRUE)) == NULL)
1037  return NULL;
1038  itor = ps_lattice_seg_iter(dag, link,
1039  ngs->bestpath_fwdtree_lw_ratio);
1040  ptmr_stop(&ngs->bestpath_perf);
1041  n_speech = (double)dag->n_frames
1042  / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
1043  E_INFO("bestpath %.2f CPU %.3f xRT\n",
1044  ngs->bestpath_perf.t_cpu,
1045  ngs->bestpath_perf.t_cpu / n_speech);
1046  E_INFO("bestpath %.2f wall %.3f xRT\n",
1047  ngs->bestpath_perf.t_elapsed,
1048  ngs->bestpath_perf.t_elapsed / n_speech);
1049  return itor;
1050  }
1051  else {
1052  int32 bpidx;
1053 
1054  /* fwdtree and fwdflat use same backpointer table. */
1055  bpidx = ngram_search_find_exit(ngs, -1, out_score, NULL);
1056  return ngram_search_bp_iter(ngs, bpidx,
1057  /* but different language weights... */
1058  (ngs->done && ngs->fwdflat)
1059  ? ngs->fwdflat_fwdtree_lw_ratio : 1.0);
1060  }
1061 
1062  return NULL;
1063 }
1064 
1065 static int32
1066 ngram_search_prob(ps_search_t *search)
1067 {
1068  ngram_search_t *ngs = (ngram_search_t *)search;
1069 
1070  /* Only do bestpath search if the utterance is done. */
1071  if (ngs->bestpath && ngs->done) {
1072  ps_lattice_t *dag;
1073  ps_latlink_t *link;
1074 
1075  if ((dag = ngram_search_lattice(search)) == NULL)
1076  return 0;
1077  if ((link = ngram_search_bestpath(search, NULL, TRUE)) == NULL)
1078  return 0;
1079  return search->post;
1080  }
1081  else {
1082  /* FIXME: Give some kind of good estimate here, eventually. */
1083  return 0;
1084  }
1085 }
1086 
1087 static void
1088 create_dag_nodes(ngram_search_t *ngs, ps_lattice_t *dag)
1089 {
1090  bptbl_t *bp_ptr;
1091  int32 i;
1092 
1093  for (i = 0, bp_ptr = ngs->bp_table; i < ngs->bpidx; ++i, ++bp_ptr) {
1094  int32 sf, ef, wid;
1095  ps_latnode_t *node;
1096 
1097  /* Skip invalid backpointers (these result from -maxwpf pruning) */
1098  if (!bp_ptr->valid)
1099  continue;
1100 
1101  sf = (bp_ptr->bp < 0) ? 0 : ngs->bp_table[bp_ptr->bp].frame + 1;
1102  ef = bp_ptr->frame;
1103  wid = bp_ptr->wid;
1104 
1105  assert(ef < dag->n_frames);
1106  /* Skip non-final </s> entries. */
1107  if ((wid == ps_search_finish_wid(ngs)) && (ef < dag->n_frames - 1))
1108  continue;
1109 
1110  /* Skip if word not in LM */
1111  if ((!dict_filler_word(ps_search_dict(ngs), wid))
1112  && (!ngram_model_set_known_wid(ngs->lmset,
1113  dict_basewid(ps_search_dict(ngs), wid))))
1114  continue;
1115 
1116  /* See if bptbl entry <wid,sf> already in lattice */
1117  for (node = dag->nodes; node; node = node->next) {
1118  if ((node->wid == wid) && (node->sf == sf))
1119  break;
1120  }
1121 
1122  /* For the moment, store bptbl indices in node.{fef,lef} */
1123  if (node)
1124  node->lef = i;
1125  else {
1126  /* New node; link to head of list */
1127  node = listelem_malloc(dag->latnode_alloc);
1128  node->wid = wid;
1129  node->sf = sf; /* This is a frame index. */
1130  node->fef = node->lef = i; /* These are backpointer indices (argh) */
1131  node->reachable = FALSE;
1132  node->entries = NULL;
1133  node->exits = NULL;
1134 
1135  /* NOTE: This creates the list of nodes in reverse
1136  * topological order, i.e. a node always precedes its
1137  * antecedents in this list. */
1138  node->next = dag->nodes;
1139  dag->nodes = node;
1140  ++dag->n_nodes;
1141  }
1142  }
1143 }
1144 
1145 static ps_latnode_t *
1146 find_start_node(ngram_search_t *ngs, ps_lattice_t *dag)
1147 {
1148  ps_latnode_t *node;
1149 
1150  /* Find start node <s>.0 */
1151  for (node = dag->nodes; node; node = node->next) {
1152  if ((node->wid == ps_search_start_wid(ngs)) && (node->sf == 0))
1153  break;
1154  }
1155  if (!node) {
1156  /* This is probably impossible. */
1157  E_ERROR("Couldn't find <s> in first frame\n");
1158  return NULL;
1159  }
1160  return node;
1161 }
1162 
1163 static ps_latnode_t *
1164 find_end_node(ngram_search_t *ngs, ps_lattice_t *dag, float32 lwf)
1165 {
1166  ps_latnode_t *node;
1167  int32 ef, bestbp, bp, bestscore;
1168 
1169  /* Find final node </s>.last_frame; nothing can follow this node */
1170  for (node = dag->nodes; node; node = node->next) {
1171  int32 lef = ngs->bp_table[node->lef].frame;
1172  if ((node->wid == ps_search_finish_wid(ngs))
1173  && (lef == dag->n_frames - 1))
1174  break;
1175  }
1176  if (node != NULL)
1177  return node;
1178 
1179  /* It is quite likely that no </s> exited in the last frame. So,
1180  * find the node corresponding to the best exit. */
1181  /* Find the last frame containing a word exit. */
1182  for (ef = dag->n_frames - 1;
1183  ef >= 0 && ngs->bp_table_idx[ef] == ngs->bpidx;
1184  --ef);
1185  if (ef < 0) {
1186  E_ERROR("Empty backpointer table: can not build DAG.\n");
1187  return NULL;
1188  }
1189 
1190  /* Find best word exit in that frame. */
1191  bestscore = WORST_SCORE;
1192  bestbp = NO_BP;
1193  for (bp = ngs->bp_table_idx[ef]; bp < ngs->bp_table_idx[ef + 1]; ++bp) {
1194  int32 n_used, l_scr, wid, prev_wid;
1195  wid = ngs->bp_table[bp].real_wid;
1196  prev_wid = ngs->bp_table[bp].prev_real_wid;
1197  /* Always prefer </s>, of which there will only be one per frame. */
1198  if (wid == ps_search_finish_wid(ngs)) {
1199  bestbp = bp;
1200  break;
1201  }
1202  l_scr = ngram_tg_score(ngs->lmset, ps_search_finish_wid(ngs),
1203  wid, prev_wid, &n_used) >>SENSCR_SHIFT;
1204  l_scr = l_scr * lwf;
1205  if (ngs->bp_table[bp].score + l_scr BETTER_THAN bestscore) {
1206  bestscore = ngs->bp_table[bp].score + l_scr;
1207  bestbp = bp;
1208  }
1209  }
1210  if (bestbp == NO_BP) {
1211  E_ERROR("No word exits found in last frame (%d), assuming no recognition\n", ef);
1212  return NULL;
1213  }
1214  E_INFO("</s> not found in last frame, using %s.%d instead\n",
1215  dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].wid), ef);
1216 
1217  /* Now find the node that corresponds to it. */
1218  for (node = dag->nodes; node; node = node->next) {
1219  if (node->lef == bestbp)
1220  return node;
1221  }
1222 
1223  /* FIXME: This seems to happen a lot! */
1224  E_ERROR("Failed to find DAG node corresponding to %s\n",
1225  dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].wid));
1226  return NULL;
1227 }
1228 
1229 /*
1230  * Build lattice from bptable.
1231  */
1232 ps_lattice_t *
1234 {
1235  int32 i, score, ascr, lscr;
1236  ps_latnode_t *node, *from, *to;
1237  ngram_search_t *ngs;
1238  ps_lattice_t *dag;
1239  int min_endfr, nlink;
1240  float lwf;
1241 
1242  ngs = (ngram_search_t *)search;
1243  min_endfr = cmd_ln_int32_r(ps_search_config(search), "-min_endfr");
1244 
1245  /* If the best score is WORST_SCORE or worse, there is no way to
1246  * make a lattice. */
1248  return NULL;
1249 
1250  /* Check to see if a lattice has previously been created over the
1251  * same number of frames, and reuse it if so. */
1252  if (search->dag && search->dag->n_frames == ngs->n_frame)
1253  return search->dag;
1254 
1255  /* Nope, create a new one. */
1256  ps_lattice_free(search->dag);
1257  search->dag = NULL;
1258  dag = ps_lattice_init_search(search, ngs->n_frame);
1259  /* Compute these such that they agree with the fwdtree language weight. */
1260  lwf = ngs->fwdflat ? ngs->fwdflat_fwdtree_lw_ratio : 1.0;
1261  create_dag_nodes(ngs, dag);
1262  if ((dag->start = find_start_node(ngs, dag)) == NULL)
1263  goto error_out;
1264  if ((dag->end = find_end_node(ngs, dag, ngs->bestpath_fwdtree_lw_ratio)) == NULL)
1265  goto error_out;
1266  E_INFO("lattice start node %s.%d end node %s.%d\n",
1267  dict_wordstr(search->dict, dag->start->wid), dag->start->sf,
1268  dict_wordstr(search->dict, dag->end->wid), dag->end->sf);
1269 
1270  ngram_compute_seg_score(ngs, ngs->bp_table + dag->end->lef, lwf,
1271  &dag->final_node_ascr, &lscr);
1272 
1273  /*
1274  * At this point, dag->nodes is ordered such that nodes earlier in
1275  * the list can follow (in time) those later in the list, but not
1276  * vice versa (see above - also note that adjacency is purely
1277  * determined by time which is why we can make this claim). Now
1278  * create precedence links and simultanesously mark all nodes that
1279  * can reach dag->end. (All nodes are reached from dag->start
1280  * simply by definition - they were created that way).
1281  *
1282  * Note that this also means that any nodes before dag->end in the
1283  * list can be discarded, meaning that dag->end will always be
1284  * equal to dag->nodes (FIXME: except when loading from a file but
1285  * we can fix that...)
1286  */
1287  i = 0;
1288  while (dag->nodes && dag->nodes != dag->end) {
1289  ps_latnode_t *next = dag->nodes->next;
1290  listelem_free(dag->latnode_alloc, dag->nodes);
1291  dag->nodes = next;
1292  ++i;
1293  }
1294  E_INFO("Eliminated %d nodes before end node\n", i);
1295  dag->end->reachable = TRUE;
1296  nlink = 0;
1297  for (to = dag->end; to; to = to->next) {
1298  int fef, lef;
1299 
1300  /* Skip if not reachable; it will never be reachable from dag->end */
1301  if (!to->reachable)
1302  continue;
1303 
1304  /* Prune nodes with too few endpoints - heuristic
1305  borrowed from Sphinx3 */
1306  fef = ngs->bp_table[to->fef].frame;
1307  lef = ngs->bp_table[to->lef].frame;
1308  if (to != dag->end && lef - fef < min_endfr) {
1309  to->reachable = FALSE;
1310  continue;
1311  }
1312 
1313  /* Find predecessors of to : from->fef+1 <= to->sf <= from->lef+1 */
1314  for (from = to->next; from; from = from->next) {
1315  bptbl_t *from_bpe;
1316 
1317  fef = ngs->bp_table[from->fef].frame;
1318  lef = ngs->bp_table[from->lef].frame;
1319 
1320  if ((to->sf <= fef) || (to->sf > lef + 1))
1321  continue;
1322  if (lef - fef < min_endfr) {
1323  assert(!from->reachable);
1324  continue;
1325  }
1326 
1327  /* Find bptable entry for "from" that exactly precedes "to" */
1328  i = from->fef;
1329  from_bpe = ngs->bp_table + i;
1330  for (; i <= from->lef; i++, from_bpe++) {
1331  if (from_bpe->wid != from->wid)
1332  continue;
1333  if (from_bpe->frame >= to->sf - 1)
1334  break;
1335  }
1336 
1337  if ((i > from->lef) || (from_bpe->frame != to->sf - 1))
1338  continue;
1339 
1340  /* Find acoustic score from.sf->to.sf-1 with right context = to */
1341  /* This gives us from_bpe's best acoustic score. */
1342  ngram_compute_seg_score(ngs, from_bpe, lwf,
1343  &ascr, &lscr);
1344  /* Now find the exact path score for from->to, including
1345  * the appropriate final triphone. In fact this might not
1346  * exist. */
1347  score = ngram_search_exit_score(ngs, from_bpe,
1348  dict_first_phone(ps_search_dict(ngs), to->wid));
1349  /* Does not exist. Can't create a link here. */
1350  if (score == WORST_SCORE)
1351  continue;
1352  /* Adjust the arc score to match the correct triphone. */
1353  else
1354  score = ascr + (score - from_bpe->score);
1355  if (score BETTER_THAN 0) {
1356  /* Scores must be negative, or Bad Things will happen.
1357  In general, they are, except in corner cases
1358  involving filler words. We don't want to throw any
1359  links away so we'll keep these, but with some
1360  arbitrarily improbable but recognizable score. */
1361  ps_lattice_link(dag, from, to, -424242, from_bpe->frame);
1362  ++nlink;
1363  from->reachable = TRUE;
1364  }
1365  else if (score BETTER_THAN WORST_SCORE) {
1366  ps_lattice_link(dag, from, to, score, from_bpe->frame);
1367  ++nlink;
1368  from->reachable = TRUE;
1369  }
1370  }
1371  }
1372 
1373  /* There must be at least one path between dag->start and dag->end */
1374  if (!dag->start->reachable) {
1375  E_ERROR("End node of lattice isolated; unreachable\n");
1376  goto error_out;
1377  }
1378 
1379  for (node = dag->nodes; node; node = node->next) {
1380  /* Change node->{fef,lef} from bptbl indices to frames. */
1381  node->fef = ngs->bp_table[node->fef].frame;
1382  node->lef = ngs->bp_table[node->lef].frame;
1383  /* Find base wid for nodes. */
1384  node->basewid = dict_basewid(search->dict, node->wid);
1385  }
1386 
1387  /* Link nodes with alternate pronunciations at the same timepoint. */
1388  for (node = dag->nodes; node; node = node->next) {
1389  ps_latnode_t *alt;
1390  /* Scan forward to find the next alternate, then stop. */
1391  for (alt = node->next; alt && alt->sf == node->sf; alt = alt->next) {
1392  if (alt->basewid == node->basewid) {
1393  alt->alt = node->alt;
1394  node->alt = alt;
1395  break;
1396  }
1397  }
1398  }
1399  E_INFO("Lattice has %d nodes, %d links\n", dag->n_nodes, nlink);
1400 
1401  /* Minor hack: If the final node is a filler word and not </s>,
1402  * then set its base word ID to </s>, so that the language model
1403  * scores won't be screwed up. */
1404  if (dict_filler_word(ps_search_dict(ngs), dag->end->wid))
1405  dag->end->basewid = ps_search_finish_wid(ngs);
1406 
1407  /* Free nodes unreachable from dag->end and their links */
1409 
1410  /* Build links around silence and filler words, since they do not
1411  * exist in the language model. */
1412  ps_lattice_bypass_fillers(dag, ngs->silpen, ngs->fillpen);
1413 
1414  search->dag = dag;
1415  return dag;
1416 
1417 error_out:
1418  ps_lattice_free(dag);
1419  return NULL;
1420 }
hmm_t hmm
Basic HMM structure.
Definition: ngram_search.h:65
Internal implementation of PocketSphinx decoder.
void ngram_fwdtree_finish(ngram_search_t *ngs)
Finish fwdtree decoding for an utterance.
int16 reachable
From.
int32 n_frame_alloc
Number of frames allocated in bp_table_idx and friends.
Definition: ngram_search.h:307
int32 wid
Word index.
Definition: ngram_search.h:113
void ngram_fwdtree_deinit(ngram_search_t *ngs)
Release memory associated with fwdtree decoding.
Base structure for search module.
void ngram_search_alloc_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
Definition: ngram_search.c:616
ps_seg_t * ps_lattice_seg_iter(ps_lattice_t *dag, ps_latlink_t *link, float32 lwf)
Get hypothesis segmentation iterator after bestpath search.
Definition: ps_lattice.c:1029
void hmm_init(hmm_context_t *ctx, hmm_t *hmm, int mpx, int ssid, int tmatid)
Populate a previously-allocated HMM structure, allocating internal data.
Definition: hmm.c:89
int acmod_rewind(acmod_t *acmod)
Rewind the current utterance, allowing it to be rescored.
Definition: acmod.c:864
int32 lef
Last end frame.
listelem_alloc_t * chan_alloc
For chan_t.
Definition: ngram_search.h:211
void ngram_fwdtree_start(ngram_search_t *ngs)
Start fwdtree decoding for an utterance.
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
Re-initialize base structure with new dictionary.
frame_idx_t frame
start or end frame
Definition: ngram_search.h:110
ps_latnode_t * start
Starting node.
uint8 *** tp
The transition matrices; kept in the same scale as acoustic scores; tp[tmatid][from-state][to-state]...
Definition: tmat.h:110
hmm_context_t * hmmctx
HMM context.
Definition: ngram_search.h:200
ps_segfuncs_t * vt
V-table of seg methods.
logmath_t * lmath
Log-math computation.
Definition: acmod.h:151
uint16 ** sseq
Unique senone sequences (2D array built at load time)
Definition: bin_mdef.h:137
void hmm_deinit(hmm_t *hmm)
Free an HMM structure, releasing internal data (but not the HMM structure itself).
Definition: hmm.c:111
int32 lscr
Language model score.
int32 n_words
Number of words known to search (may be less than in the dictionary)
int16 last2_phone
next-to-last phone of this word
Definition: ngram_search.h:120
#define BAD_S3WID
Dictionary word id.
Definition: s3types.h:136
int32 n_ssid
#Unique ssid in above, compressed ssid list
Definition: dict2pid.h:140
frame_idx_t n_frames
Number of frames for this utterance.
int ngram_fwdflat_reinit(ngram_search_t *ngs)
Rebuild search structures for updated language models.
Word graph search implementation.
bitvec_t * word_active
array of active flags for all words.
Definition: ngram_search.h:247
void ngram_fwdflat_finish(ngram_search_t *ngs)
Finish fwdflat decoding for an utterance.
ps_latnode_t * nodes
List of all nodes.
int ngram_fwdflat_search(ngram_search_t *ngs, int frame_idx)
Search one frame forward in an utterance.
int32 ngram_search_exit_score(ngram_search_t *ngs, bptbl_t *pbe, int rcphone)
Get the exit score for a backpointer entry with a given right context.
Definition: ngram_search.c:678
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
listelem_alloc_t * latnode_alloc
Node allocator for this DAG.
int16 n_nodes
Number of nodes in this lattice.
int32 prob
Log posterior probability.
latlink_list_t * entries
Links into this node.
POCKETSPHINX_EXPORT int32 ps_lattice_posterior(ps_lattice_t *dag, ngram_model_t *lmset, float32 ascale)
Calculate link posterior probabilities on a word graph.
Definition: ps_lattice.c:1408
struct ps_latnode_s * alt
Node with alternate pronunciation for this word.
char const * word
Word string (pointer into dictionary hash)
int32 ** active_word_list
Array of active multi-phone words for current and next frame.
Definition: ngram_search.h:287
struct chan_s * next
first descendant of this channel; or, in the case of the last phone of a word, the next alternative r...
Definition: ngram_search.h:68
void ngram_search_save_bp(ngram_search_t *ngs, int frame_idx, int32 w, int32 score, int32 path, int32 rc)
Enter a word in the backpointer table.
Definition: ngram_search.c:398
ps_search_t * search
Search object from whence this came.
int32 final_node_ascr
Acoustic score of implicit link exiting final node.
Lexicon tree based Viterbi search.
int ngram_search_mark_bptable(ngram_search_t *ngs, int frame_idx)
Record the current frame&#39;s index in the backpointer table.
Definition: ngram_search.c:344
int32 bp
Back Pointer.
Definition: ngram_search.h:114
int32 rc_id
right-context id for last phone of words
Definition: ngram_search.h:79
#define dict2pid_rssid(d, ci, lc)
Access macros; not designed for arbitrary use.
Definition: dict2pid.h:179
void ngram_fwdflat_start(ngram_search_t *ngs)
Start fwdflat decoding for an utterance.
N-Gram search module structure.
Definition: ngram_search.h:197
int ngram_fwdtree_search(ngram_search_t *ngs, int frame_idx)
Search one frame forward in an utterance.
hmm_context_t * hmm_context_init(int32 n_emit_state, uint8 **const *tp, int16 const *senscore, uint16 *const *sseq)
Create an HMM context.
Definition: hmm.c:56
void ps_lattice_delete_unreachable(ps_lattice_t *dag)
Remove nodes marked as unreachable.
Definition: ps_lattice.c:197
ps_latnode_t * end
Ending node.
frame_idx_t sf
Start frame.
int32 real_wid
wid of this or latest predecessor real word
Definition: ngram_search.h:117
int32 prev_real_wid
wid of second-last real word
Definition: ngram_search.h:118
POCKETSPHINX_EXPORT ps_latlink_t * ps_lattice_bestpath(ps_lattice_t *dag, ngram_model_t *lmset, float32 lwf, float32 ascale)
Do N-Gram based best-path search on a word graph.
Definition: ps_lattice.c:1238
ps_lattice_t * ngram_search_lattice(ps_search_t *search)
Construct a word lattice from the current hypothesis.
latlink_list_t * exits
Links out of this node.
#define WORST_SCORE
Large "bad" score.
Definition: hmm.h:88
N-Gram based multi-pass search ("FBS")
tmat_t * tmat
Transition matrices.
Definition: acmod.h:160
frame_idx_t ef
End frame.
int32 ascr
Acoustic score.
int acmod_advance(acmod_t *acmod)
Advance the frame index.
Definition: acmod.c:886
listelem_alloc_t * latnode_alloc
For latnode_t.
Definition: ngram_search.h:213
int dict_filler_word(dict_t *d, s3wid_t w)
Return 1 if w is a filler word, 0 if not.
Definition: dict.c:396
void ngram_fwdtree_init(ngram_search_t *ngs)
Initialize N-Gram search for fwdtree decoding.
Segmentation "iterator" for backpointer table results.
Definition: ngram_search.h:126
ps_latnode_t ** frm_wordlist
List of active words in each frame.
Definition: ngram_search.h:316
Lexical tree node data type for the first phone (root) of each dynamic HMM tree structure.
Definition: ngram_search.h:90
Lexical tree node data type.
Definition: ngram_search.h:64
int32 wid
Dictionary word id.
ps_search_t * ngram_search_init(cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize the N-Gram search module.
Definition: ngram_search.c:139
int16 cur
Current position in bpidx.
Definition: ngram_search.h:130
#define SENSCR_SHIFT
Shift count for senone scores.
Definition: hmm.h:77
a structure for a dictionary.
Definition: dict.h:79
char const * ps_lattice_hyp(ps_lattice_t *dag, ps_latlink_t *link)
Get hypothesis string after bestpath search.
Definition: ps_lattice.c:853
POCKETSPHINX_EXPORT int dict_real_word(dict_t *d, s3wid_t w)
Test if w is a "real" word, i.e.
Definition: dict.c:410
void ngram_search_free(ps_search_t *search)
Finalize the N-Gram search module.
Definition: ngram_search.c:304
Word graph structure used in bestpath/nbest search.
#define WORSE_THAN
Is one score worse than another?
Definition: hmm.h:104
int16 n_bpidx
Number of backpointer IDs.
Definition: ngram_search.h:129
int32 best_score
Best Viterbi path score.
Definition: ngram_search.h:325
Back pointer table (forward pass lattice; actually a tree)
Definition: ngram_search.h:109
cross word triphone model structure
Definition: dict2pid.h:137
int ngram_fwdtree_reinit(ngram_search_t *ngs)
Rebuild search structures for updated language models.
void ngram_search_free_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
Definition: ngram_search.c:665
int32 post
Utterance posterior probability.
char * hyp_str
Current hypothesis string.
#define BETTER_THAN
Is one score better than another?
Definition: hmm.h:99
dict_t * dict
Pronunciation dictionary.
void ngram_fwdflat_deinit(ngram_search_t *ngs)
Release memory associated with fwdflat decoding.
int32 s_idx
Start of BScoreStack for various right contexts.
Definition: ngram_search.h:116
int32 fef
First end frame.
int32 n_frame
Number of frames actually present.
Definition: ngram_search.h:308
Flat lexicon based Viterbi search.
ngram_model_t * lmset
Set of language models.
Definition: ngram_search.h:199
uint8 valid
For absolute pruning.
Definition: ngram_search.h:111
int32 lback
Language model backoff.
listelem_alloc_t * root_chan_alloc
For root_chan_t.
Definition: ngram_search.h:212
int32 basewid
Dictionary base word id.
int32 ciphone
ciphone for this node
Definition: ngram_search.h:73
void ngram_fwdflat_init(ngram_search_t *ngs)
Initialize N-Gram search for fwdflat decoding.
ps_lattice_t * ps_lattice_init_search(ps_search_t *search, int n_frame)
Construct an empty word graph with reference to a search structure.
Definition: ps_lattice.c:662
void hmm_context_free(hmm_context_t *ctx)
Free an HMM context.
Definition: hmm.c:80
int32 * bpidx
Sequence of backpointer IDs.
Definition: ngram_search.h:128
chan_t ** word_chan
Channels associated with a given word (only used for right contexts, single-phone words in fwdtree se...
Definition: ngram_search.h:246
bin_mdef_t * mdef
Model definition.
Definition: acmod.h:159
int ngram_search_find_exit(ngram_search_t *ngs, int frame_idx, int32 *out_best_score, int32 *out_is_final)
Find the best word exit for the current frame in the backpointer table.
Definition: ngram_search.c:521
ps_latlink_t * last_link
Final link in best path.
struct ps_latnode_s * next
Next node in DAG (no ordering implied)
int32 score
Score (best among all right contexts)
Definition: ngram_search.h:115
void ps_lattice_bypass_fillers(ps_lattice_t *dag, int32 silpen, int32 fillpen)
Bypass filler words.
Definition: ps_lattice.c:106
V-table for search algorithm.
ps_lattice_t * dag
Current hypothesis word graph.
Base structure for hypothesis segmentation iterator.
#define dict_size(d)
Packaged macro access to dictionary members.
Definition: dict.h:154
s3cipid_t * cimap
Index into ssid[] above for each ci phone.
Definition: dict2pid.h:139
POCKETSPHINX_EXPORT int ps_lattice_free(ps_lattice_t *dag)
Free a lattice.
Definition: ps_lattice.c:688
ps_seg_t base
Base structure.
Definition: ngram_search.h:127
float32 ascale
Acoustic score scale for posterior probabilities.
Definition: ngram_search.h:333
Acoustic model structure.
Definition: acmod.h:148
float32 lwf
Language weight factor (for second-pass searches)
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:148
void ps_search_deinit(ps_search_t *search)
De-initialize base structure.
POCKETSPHINX_EXPORT void ps_lattice_link(ps_lattice_t *dag, ps_latnode_t *from, ps_latnode_t *to, int32 score, int32 ef)
Create a directed link between "from" and "to" nodes, but if a link already exists, choose one with the best link_scr.
Definition: ps_lattice.c:65
s3ssid_t * ssid
Senone Sequence ID list for all context ciphones.
Definition: dict2pid.h:138
frame_idx_t sf
Start frame.
int16 last_phone
last phone of this word
Definition: ngram_search.h:119
char const * ngram_search_bp_hyp(ngram_search_t *ngs, int bpidx)
Backtrace from a given backpointer index to obtain a word hypothesis.
Definition: ngram_search.c:568