PocketSphinx  0.6
ngram_search_fwdtree.c
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2008 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
42 /* System headers. */
43 #include <string.h>
44 #include <assert.h>
45 
46 /* SphinxBase headers. */
47 #include <sphinxbase/ckd_alloc.h>
48 #include <sphinxbase/listelem_alloc.h>
49 #include <sphinxbase/err.h>
50 
51 /* Local headers. */
52 #include "ngram_search_fwdtree.h"
53 #include "phone_loop_search.h"
54 
55 /* Turn this on to dump channels for debugging */
56 #define __CHAN_DUMP__ 0
57 #if __CHAN_DUMP__
58 #define chan_v_eval(chan) hmm_dump_vit_eval(&(chan)->hmm, stderr)
59 #else
60 #define chan_v_eval(chan) hmm_vit_eval(&(chan)->hmm)
61 #endif
62 
63 /*
64  * Allocate that part of the search channel tree structure that is independent of the
65  * LM in use.
66  */
67 static void
68 init_search_tree(ngram_search_t *ngs)
69 {
70  int32 w, ndiph, i, n_words, n_ci;
71  dict_t *dict = ps_search_dict(ngs);
72  bitvec_t *dimap;
73 
74  n_words = ps_search_n_words(ngs);
75  ngs->homophone_set = ckd_calloc(n_words, sizeof(*ngs->homophone_set));
76 
77  /* Find #single phone words, and #unique first diphones (#root channels) in dict. */
78  ndiph = 0;
79  ngs->n_1ph_words = 0;
80  n_ci = bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef);
81  /* Allocate a bitvector with flags for each possible diphone. */
82  dimap = bitvec_alloc(n_ci * n_ci);
83  for (w = 0; w < n_words; w++) {
84  if (!dict_real_word(dict, w))
85  continue;
86  if (dict_is_single_phone(dict, w))
87  ++ngs->n_1ph_words;
88  else {
89  int ph0, ph1;
90  ph0 = dict_first_phone(dict, w);
91  ph1 = dict_second_phone(dict, w);
92  /* Increment ndiph the first time we see a diphone. */
93  if (bitvec_is_clear(dimap, ph0 * n_ci + ph1)) {
94  bitvec_set(dimap, ph0 * n_ci + ph1);
95  ++ndiph;
96  }
97  }
98  }
99  E_INFO("%d unique initial diphones\n", ndiph);
100  bitvec_free(dimap);
101 
102  /* Add remaining dict words (</s>, <s>, <sil>, noise words) to single-phone words */
103  ngs->n_1ph_words += dict_num_fillers(dict) + 2;
104  ngs->n_root_chan_alloc = ndiph + 1;
105  /* Verify that these are all *actually* single-phone words,
106  * otherwise really bad things will happen to us. */
107  for (w = 0; w < n_words; ++w) {
108  if (dict_real_word(dict, w))
109  continue;
110  if (!dict_is_single_phone(dict, w)) {
111  E_WARN("Filler word %d = %s has more than one phone, ignoring it.\n",
112  w, dict_wordstr(dict, w));
113  --ngs->n_1ph_words;
114  }
115  }
116 
117  /* Allocate and initialize root channels */
118  ngs->root_chan =
119  ckd_calloc(ngs->n_root_chan_alloc, sizeof(*ngs->root_chan));
120  for (i = 0; i < ngs->n_root_chan_alloc; i++) {
121  hmm_init(ngs->hmmctx, &ngs->root_chan[i].hmm, TRUE, -1, -1);
122  ngs->root_chan[i].penult_phn_wid = -1;
123  ngs->root_chan[i].next = NULL;
124  }
125 
126  /* Permanently allocate and initialize channels for single-phone
127  * words (1/word). */
128  ngs->rhmm_1ph = ckd_calloc(ngs->n_1ph_words, sizeof(*ngs->rhmm_1ph));
129  i = 0;
130  for (w = 0; w < n_words; w++) {
131  if (!dict_is_single_phone(dict, w))
132  continue;
133  /* Use SIL as right context for these. */
134  ngs->rhmm_1ph[i].ci2phone = bin_mdef_silphone(ps_search_acmod(ngs)->mdef);
135  ngs->rhmm_1ph[i].ciphone = dict_first_phone(dict, w);
136  hmm_init(ngs->hmmctx, &ngs->rhmm_1ph[i].hmm, TRUE,
137  bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, ngs->rhmm_1ph[i].ciphone),
138  bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ngs->rhmm_1ph[i].ciphone));
139  ngs->rhmm_1ph[i].next = NULL;
140 
141  ngs->word_chan[w] = (chan_t *) &(ngs->rhmm_1ph[i]);
142  i++;
143  }
144 
145  ngs->single_phone_wid = ckd_calloc(ngs->n_1ph_words,
146  sizeof(*ngs->single_phone_wid));
147  E_INFO("%d root, %d non-root channels, %d single-phone words\n",
148  ngs->n_root_chan, ngs->n_nonroot_chan, ngs->n_1ph_words);
149 }
150 
151 /*
152  * One-time initialization of internal channels in HMM tree.
153  */
154 static void
155 init_nonroot_chan(ngram_search_t *ngs, chan_t * hmm, int32 ph, int32 ci, int32 tmatid)
156 {
157  hmm->next = NULL;
158  hmm->alt = NULL;
159  hmm->info.penult_phn_wid = -1;
160  hmm->ciphone = ci;
161  hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, ph, tmatid);
162 }
163 
164 /*
165  * Allocate and initialize search channel-tree structure.
166  * At this point, all the root-channels have been allocated and partly initialized
167  * (as per init_search_tree()), and channels for all the single-phone words have been
168  * allocated and initialized. None of the interior channels of search-trees have
169  * been allocated.
170  * This routine may be called on every utterance, after reinit_search_tree() clears
171  * the search tree created for the previous utterance. Meant for reconfiguring the
172  * search tree to suit the currently active LM.
173  */
174 static void
175 create_search_tree(ngram_search_t *ngs)
176 {
177  chan_t *hmm;
178  root_chan_t *rhmm;
179  int32 w, i, j, p, ph, tmatid;
180  int32 n_words;
181  dict_t *dict = ps_search_dict(ngs);
182  dict2pid_t *d2p = ps_search_dict2pid(ngs);
183 
184  n_words = ps_search_n_words(ngs);
185 
186  E_INFO("Creating search tree\n");
187 
188  for (w = 0; w < n_words; w++)
189  ngs->homophone_set[w] = -1;
190 
191  E_INFO("before: %d root, %d non-root channels, %d single-phone words\n",
192  ngs->n_root_chan, ngs->n_nonroot_chan, ngs->n_1ph_words);
193 
194  ngs->n_1ph_LMwords = 0;
195  ngs->n_root_chan = 0;
196  ngs->n_nonroot_chan = 0;
197 
198  for (w = 0; w < n_words; w++) {
199  int ciphone, ci2phone;
200 
201  /* Ignore dictionary words not in LM */
202  if (!ngram_model_set_known_wid(ngs->lmset, dict_basewid(dict, w)))
203  continue;
204 
205  /* Handle single-phone words individually; not in channel tree */
206  if (dict_is_single_phone(dict, w)) {
207  E_DEBUG(1,("single_phone_wid[%d] = %s\n",
208  ngs->n_1ph_LMwords, dict_wordstr(dict, w)));
209  ngs->single_phone_wid[ngs->n_1ph_LMwords++] = w;
210  continue;
211  }
212 
213  /* Find a root channel matching the initial diphone, or
214  * allocate one if not found. */
215  ciphone = dict_first_phone(dict, w);
216  ci2phone = dict_second_phone(dict, w);
217  for (i = 0; i < ngs->n_root_chan; ++i) {
218  if (ngs->root_chan[i].ciphone == ciphone
219  && ngs->root_chan[i].ci2phone == ci2phone)
220  break;
221  }
222  if (i == ngs->n_root_chan) {
223  rhmm = &(ngs->root_chan[ngs->n_root_chan]);
224  rhmm->hmm.tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ciphone);
225  /* Begin with CI phone? Not sure this makes a difference... */
226  hmm_mpx_ssid(&rhmm->hmm, 0) =
227  bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, ciphone);
228  rhmm->ciphone = ciphone;
229  rhmm->ci2phone = ci2phone;
230  ngs->n_root_chan++;
231  }
232  else
233  rhmm = &(ngs->root_chan[i]);
234 
235  E_DEBUG(3,("word %s rhmm %d\n", dict_wordstr(dict, w), rhmm - ngs->root_chan));
236  /* Now, rhmm = root channel for w. Go on to remaining phones */
237  if (dict_pronlen(dict, w) == 2) {
238  /* Next phone is the last; not kept in tree; add w to penult_phn_wid set */
239  if ((j = rhmm->penult_phn_wid) < 0)
240  rhmm->penult_phn_wid = w;
241  else {
242  for (; ngs->homophone_set[j] >= 0; j = ngs->homophone_set[j]);
243  ngs->homophone_set[j] = w;
244  }
245  }
246  else {
247  /* Add remaining phones, except the last, to tree */
248  ph = dict2pid_internal(d2p, w, 1);
249  tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, dict_pron(dict, w, 1));
250  hmm = rhmm->next;
251  if (hmm == NULL) {
252  rhmm->next = hmm = listelem_malloc(ngs->chan_alloc);
253  init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, 1), tmatid);
254  ngs->n_nonroot_chan++;
255  }
256  else {
257  chan_t *prev_hmm = NULL;
258 
259  for (; hmm && (hmm_nonmpx_ssid(&hmm->hmm) != ph); hmm = hmm->alt)
260  prev_hmm = hmm;
261  if (!hmm) { /* thanks, rkm! */
262  prev_hmm->alt = hmm = listelem_malloc(ngs->chan_alloc);
263  init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, 1), tmatid);
264  ngs->n_nonroot_chan++;
265  }
266  }
267  E_DEBUG(3,("phone %s = %d\n",
268  bin_mdef_ciphone_str(ps_search_acmod(ngs)->mdef,
269  dict_second_phone(dict, w)), ph));
270  for (p = 2; p < dict_pronlen(dict, w) - 1; p++) {
271  ph = dict2pid_internal(d2p, w, p);
272  tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, dict_pron(dict, w, p));
273  if (!hmm->next) {
274  hmm->next = listelem_malloc(ngs->chan_alloc);
275  hmm = hmm->next;
276  init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, p), tmatid);
277  ngs->n_nonroot_chan++;
278  }
279  else {
280  chan_t *prev_hmm = NULL;
281 
282  for (hmm = hmm->next; hmm && (hmm_nonmpx_ssid(&hmm->hmm) != ph);
283  hmm = hmm->alt)
284  prev_hmm = hmm;
285  if (!hmm) { /* thanks, rkm! */
286  prev_hmm->alt = hmm = listelem_malloc(ngs->chan_alloc);
287  init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, p), tmatid);
288  ngs->n_nonroot_chan++;
289  }
290  }
291  E_DEBUG(3,("phone %s = %d\n",
292  bin_mdef_ciphone_str(ps_search_acmod(ngs)->mdef,
293  dict_pron(dict, w, p)), ph));
294  }
295 
296  /* All but last phone of w in tree; add w to hmm->info.penult_phn_wid set */
297  if ((j = hmm->info.penult_phn_wid) < 0)
298  hmm->info.penult_phn_wid = w;
299  else {
300  for (; ngs->homophone_set[j] >= 0; j = ngs->homophone_set[j]);
301  ngs->homophone_set[j] = w;
302  }
303  }
304  }
305 
306  ngs->n_1ph_words = ngs->n_1ph_LMwords;
307 
308  /* Add filler words to the array of 1ph words. */
309  for (w = 0; w < n_words; ++w) {
310  /* Skip anything that doesn't actually have a single phone. */
311  if (!dict_is_single_phone(dict, w))
312  continue;
313  /* Also skip "real words" and things that are in the LM. */
314  if (dict_real_word(dict, w))
315  continue;
316  if (ngram_model_set_known_wid(ngs->lmset, dict_basewid(dict, w)))
317  continue;
318  E_DEBUG(1,("single_phone_wid[%d] = %s\n",
319  ngs->n_1ph_words, dict_wordstr(dict, w)));
320  ngs->single_phone_wid[ngs->n_1ph_words++] = w;
321  }
322 
323  if (ngs->n_nonroot_chan >= ngs->max_nonroot_chan) {
324  /* Give some room for channels for new words added dynamically at run time */
325  ngs->max_nonroot_chan = ngs->n_nonroot_chan + 128;
326  E_INFO("after: max nonroot chan increased to %d\n", ngs->max_nonroot_chan);
327 
328  /* Free old active channel list array if any and allocate new one */
329  if (ngs->active_chan_list)
330  ckd_free_2d(ngs->active_chan_list);
331  ngs->active_chan_list = ckd_calloc_2d(2, ngs->max_nonroot_chan,
332  sizeof(**ngs->active_chan_list));
333  }
334 
335  if (!ngs->n_root_chan)
336  E_ERROR("No word from the language model has pronunciation in the dictionary\n");
337 
338  E_INFO("after: %d root, %d non-root channels, %d single-phone words\n",
339  ngs->n_root_chan, ngs->n_nonroot_chan, ngs->n_1ph_words);
340 }
341 
342 static void
343 reinit_search_subtree(ngram_search_t *ngs, chan_t * hmm)
344 {
345  chan_t *child, *sibling;
346 
347  /* First free all children under hmm */
348  for (child = hmm->next; child; child = sibling) {
349  sibling = child->alt;
350  reinit_search_subtree(ngs, child);
351  }
352 
353  /* Now free hmm */
354  hmm_deinit(&hmm->hmm);
355  listelem_free(ngs->chan_alloc, hmm);
356 }
357 
358 /*
359  * Delete search tree by freeing all interior channels within search tree and
360  * restoring root channel state to the init state (i.e., just after init_search_tree()).
361  */
362 static void
363 reinit_search_tree(ngram_search_t *ngs)
364 {
365  int32 i;
366  chan_t *hmm, *sibling;
367 
368  for (i = 0; i < ngs->n_root_chan; i++) {
369  hmm = ngs->root_chan[i].next;
370 
371  while (hmm) {
372  sibling = hmm->alt;
373  reinit_search_subtree(ngs, hmm);
374  hmm = sibling;
375  }
376 
377  ngs->root_chan[i].penult_phn_wid = -1;
378  ngs->root_chan[i].next = NULL;
379  }
380  ngs->n_nonroot_chan = 0;
381 }
382 
383 void
385 {
386  /* Allocate bestbp_rc, lastphn_cand, last_ltrans */
387  ngs->bestbp_rc = ckd_calloc(bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef),
388  sizeof(*ngs->bestbp_rc));
389  ngs->lastphn_cand = ckd_calloc(ps_search_n_words(ngs),
390  sizeof(*ngs->lastphn_cand));
391  init_search_tree(ngs);
392  create_search_tree(ngs);
393 }
394 
395 static void
396 deinit_search_tree(ngram_search_t *ngs)
397 {
398  int i, w, n_words;
399 
400  n_words = ps_search_n_words(ngs);
401  for (i = 0; i < ngs->n_root_chan_alloc; i++) {
402  hmm_deinit(&ngs->root_chan[i].hmm);
403  }
404  if (ngs->rhmm_1ph) {
405  for (i = w = 0; w < n_words; ++w) {
406  if (!dict_is_single_phone(ps_search_dict(ngs), w))
407  continue;
408  hmm_deinit(&ngs->rhmm_1ph[i].hmm);
409  ++i;
410  }
411  ckd_free(ngs->rhmm_1ph);
412  ngs->rhmm_1ph = NULL;
413  }
414  ngs->n_root_chan = 0;
415  ngs->n_root_chan_alloc = 0;
416  ckd_free(ngs->root_chan);
417  ngs->root_chan = NULL;
418  ckd_free(ngs->single_phone_wid);
419  ngs->single_phone_wid = NULL;
420  ckd_free(ngs->homophone_set);
421  ngs->homophone_set = NULL;
422 }
423 
424 void
426 {
427  double n_speech = (double)ngs->n_tot_frame
428  / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
429 
430  E_INFO("TOTAL fwdtree %.2f CPU %.3f xRT\n",
431  ngs->fwdtree_perf.t_tot_cpu,
432  ngs->fwdtree_perf.t_tot_cpu / n_speech);
433  E_INFO("TOTAL fwdtree %.2f wall %.3f xRT\n",
434  ngs->fwdtree_perf.t_tot_elapsed,
435  ngs->fwdtree_perf.t_tot_elapsed / n_speech);
436 
437  /* Reset non-root channels. */
438  reinit_search_tree(ngs);
439  /* Free the search tree. */
440  deinit_search_tree(ngs);
441  /* Free other stuff. */
442  ngs->max_nonroot_chan = 0;
443  ckd_free_2d(ngs->active_chan_list);
444  ngs->active_chan_list = NULL;
445  ckd_free(ngs->cand_sf);
446  ngs->cand_sf = NULL;
447  ckd_free(ngs->bestbp_rc);
448  ngs->bestbp_rc = NULL;
449  ckd_free(ngs->lastphn_cand);
450  ngs->lastphn_cand = NULL;
451 }
452 
453 int
455 {
456  /* Reset non-root channels. */
457  reinit_search_tree(ngs);
458  /* Free the search tree. */
459  deinit_search_tree(ngs);
460  /* Reallocate things that depend on the number of words. */
461  ckd_free(ngs->lastphn_cand);
462  ngs->lastphn_cand = ckd_calloc(ps_search_n_words(ngs),
463  sizeof(*ngs->lastphn_cand));
464  ckd_free(ngs->word_chan);
465  ngs->word_chan = ckd_calloc(ps_search_n_words(ngs),
466  sizeof(*ngs->word_chan));
467  /* Rebuild the search tree. */
468  init_search_tree(ngs);
469  create_search_tree(ngs);
470  return 0;
471 }
472 
473 void
475 {
476  ps_search_t *base = (ps_search_t *)ngs;
477  int32 i, w, n_words;
478  root_chan_t *rhmm;
479 
480  n_words = ps_search_n_words(ngs);
481 
482  /* Reset utterance statistics. */
483  memset(&ngs->st, 0, sizeof(ngs->st));
484  ptmr_reset(&ngs->fwdtree_perf);
485  ptmr_start(&ngs->fwdtree_perf);
486 
487  /* Reset backpointer table. */
488  ngs->bpidx = 0;
489  ngs->bss_head = 0;
490 
491  /* Reset word lattice. */
492  for (i = 0; i < n_words; ++i)
493  ngs->word_lat_idx[i] = NO_BP;
494 
495  /* Reset active HMM and word lists. */
496  ngs->n_active_chan[0] = ngs->n_active_chan[1] = 0;
497  ngs->n_active_word[0] = ngs->n_active_word[1] = 0;
498 
499  /* Reset scores. */
500  ngs->best_score = 0;
501  ngs->renormalized = 0;
502 
503  /* Reset other stuff. */
504  for (i = 0; i < n_words; i++)
505  ngs->last_ltrans[i].sf = -1;
506  ngs->n_frame = 0;
507 
508  /* Clear the hypothesis string. */
509  ckd_free(base->hyp_str);
510  base->hyp_str = NULL;
511 
512  /* Reset the permanently allocated single-phone words, since they
513  * may have junk left over in them from FWDFLAT. */
514  for (i = 0; i < ngs->n_1ph_words; i++) {
515  w = ngs->single_phone_wid[i];
516  rhmm = (root_chan_t *) ngs->word_chan[w];
517  hmm_clear(&rhmm->hmm);
518  }
519 
520  /* Start search with <s>; word_chan[<s>] is permanently allocated */
521  rhmm = (root_chan_t *) ngs->word_chan[dict_startwid(ps_search_dict(ngs))];
522  hmm_clear(&rhmm->hmm);
523  hmm_enter(&rhmm->hmm, 0, NO_BP, 0);
524 }
525 
526 /*
527  * Mark the active senones for all senones belonging to channels that are active in the
528  * current frame.
529  */
530 static void
531 compute_sen_active(ngram_search_t *ngs, int frame_idx)
532 {
533  root_chan_t *rhmm;
534  chan_t *hmm, **acl;
535  int32 i, w, *awl;
536 
537  acmod_clear_active(ps_search_acmod(ngs));
538 
539  /* Flag active senones for root channels */
540  for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
541  if (hmm_frame(&rhmm->hmm) == frame_idx)
542  acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm);
543  }
544 
545  /* Flag active senones for nonroot channels in HMM tree */
546  i = ngs->n_active_chan[frame_idx & 0x1];
547  acl = ngs->active_chan_list[frame_idx & 0x1];
548  for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
549  acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm);
550  }
551 
552  /* Flag active senones for individual word channels */
553  i = ngs->n_active_word[frame_idx & 0x1];
554  awl = ngs->active_word_list[frame_idx & 0x1];
555  for (w = *(awl++); i > 0; --i, w = *(awl++)) {
556  for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
557  acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm);
558  }
559  }
560  for (i = 0; i < ngs->n_1ph_words; i++) {
561  w = ngs->single_phone_wid[i];
562  rhmm = (root_chan_t *) ngs->word_chan[w];
563 
564  if (hmm_frame(&rhmm->hmm) == frame_idx)
565  acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm);
566  }
567 }
568 
569 static void
570 renormalize_scores(ngram_search_t *ngs, int frame_idx, int32 norm)
571 {
572  root_chan_t *rhmm;
573  chan_t *hmm, **acl;
574  int32 i, w, *awl;
575 
576  /* Renormalize root channels */
577  for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
578  if (hmm_frame(&rhmm->hmm) == frame_idx) {
579  hmm_normalize(&rhmm->hmm, norm);
580  }
581  }
582 
583  /* Renormalize nonroot channels in HMM tree */
584  i = ngs->n_active_chan[frame_idx & 0x1];
585  acl = ngs->active_chan_list[frame_idx & 0x1];
586  for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
587  hmm_normalize(&hmm->hmm, norm);
588  }
589 
590  /* Renormalize individual word channels */
591  i = ngs->n_active_word[frame_idx & 0x1];
592  awl = ngs->active_word_list[frame_idx & 0x1];
593  for (w = *(awl++); i > 0; --i, w = *(awl++)) {
594  for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
595  hmm_normalize(&hmm->hmm, norm);
596  }
597  }
598  for (i = 0; i < ngs->n_1ph_words; i++) {
599  w = ngs->single_phone_wid[i];
600  rhmm = (root_chan_t *) ngs->word_chan[w];
601  if (hmm_frame(&rhmm->hmm) == frame_idx) {
602  hmm_normalize(&rhmm->hmm, norm);
603  }
604  }
605 
606  ngs->renormalized = TRUE;
607 }
608 
609 static int32
610 eval_root_chan(ngram_search_t *ngs, int frame_idx)
611 {
612  root_chan_t *rhmm;
613  int32 i, bestscore;
614 
615  bestscore = WORST_SCORE;
616  for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
617  if (hmm_frame(&rhmm->hmm) == frame_idx) {
618  int32 score = chan_v_eval(rhmm);
619  if (score BETTER_THAN bestscore)
620  bestscore = score;
621  ++ngs->st.n_root_chan_eval;
622  }
623  }
624  return (bestscore);
625 }
626 
627 static int32
628 eval_nonroot_chan(ngram_search_t *ngs, int frame_idx)
629 {
630  chan_t *hmm, **acl;
631  int32 i, bestscore;
632 
633  i = ngs->n_active_chan[frame_idx & 0x1];
634  acl = ngs->active_chan_list[frame_idx & 0x1];
635  bestscore = WORST_SCORE;
636  ngs->st.n_nonroot_chan_eval += i;
637 
638  for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
639  int32 score = chan_v_eval(hmm);
640  assert(hmm_frame(&hmm->hmm) == frame_idx);
641  if (score BETTER_THAN bestscore)
642  bestscore = score;
643  }
644 
645  return bestscore;
646 }
647 
648 static int32
649 eval_word_chan(ngram_search_t *ngs, int frame_idx)
650 {
651  root_chan_t *rhmm;
652  chan_t *hmm;
653  int32 i, w, bestscore, *awl, j, k;
654 
655  k = 0;
656  bestscore = WORST_SCORE;
657  awl = ngs->active_word_list[frame_idx & 0x1];
658 
659  i = ngs->n_active_word[frame_idx & 0x1];
660  for (w = *(awl++); i > 0; --i, w = *(awl++)) {
661  assert(bitvec_is_set(ngs->word_active, w));
662  bitvec_clear(ngs->word_active, w);
663  assert(ngs->word_chan[w] != NULL);
664 
665  for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
666  int32 score;
667 
668  assert(hmm_frame(&hmm->hmm) == frame_idx);
669  score = chan_v_eval(hmm);
670  /*printf("eval word chan %d score %d\n", w, score); */
671 
672  if (score BETTER_THAN bestscore)
673  bestscore = score;
674 
675  k++;
676  }
677  }
678 
679  /* Similarly for statically allocated single-phone words */
680  j = 0;
681  for (i = 0; i < ngs->n_1ph_words; i++) {
682  int32 score;
683 
684  w = ngs->single_phone_wid[i];
685  rhmm = (root_chan_t *) ngs->word_chan[w];
686  if (hmm_frame(&rhmm->hmm) < frame_idx)
687  continue;
688 
689  score = chan_v_eval(rhmm);
690  /* printf("eval 1ph word chan %d score %d\n", w, score); */
691  if (score BETTER_THAN bestscore && w != ps_search_finish_wid(ngs))
692  bestscore = score;
693 
694  j++;
695  }
696 
697  ngs->st.n_last_chan_eval += k + j;
698  ngs->st.n_nonroot_chan_eval += k + j;
699  ngs->st.n_word_lastchan_eval +=
700  ngs->n_active_word[frame_idx & 0x1] + j;
701 
702  return bestscore;
703 }
704 
705 static int32
706 evaluate_channels(ngram_search_t *ngs, int16 const *senone_scores, int frame_idx)
707 {
708  int32 bs;
709 
710  hmm_context_set_senscore(ngs->hmmctx, senone_scores);
711  ngs->best_score = eval_root_chan(ngs, frame_idx);
712  if ((bs = eval_nonroot_chan(ngs, frame_idx)) BETTER_THAN ngs->best_score)
713  ngs->best_score = bs;
714  if ((bs = eval_word_chan(ngs, frame_idx)) BETTER_THAN ngs->best_score)
715  ngs->best_score = bs;
716  ngs->last_phone_best_score = bs;
717 
718  return ngs->best_score;
719 }
720 
721 /*
722  * Prune currently active root channels for next frame. Also, perform exit
723  * transitions out of them and activate successors.
724  * score[] of pruned root chan set to WORST_SCORE elsewhere.
725  */
726 static void
727 prune_root_chan(ngram_search_t *ngs, int frame_idx)
728 {
729  root_chan_t *rhmm;
730  chan_t *hmm;
731  int32 i, nf, w;
732  int32 thresh, newphone_thresh, lastphn_thresh, newphone_score;
733  chan_t **nacl; /* next active list */
734  lastphn_cand_t *candp;
735  phone_loop_search_t *pls;
736 
737  nf = frame_idx + 1;
738  thresh = ngs->best_score + ngs->dynamic_beam;
739  newphone_thresh = ngs->best_score + ngs->pbeam;
740  lastphn_thresh = ngs->best_score + ngs->lpbeam;
741  nacl = ngs->active_chan_list[nf & 0x1];
742  pls = (phone_loop_search_t *)ps_search_lookahead(ngs);
743 
744  for (i = 0, rhmm = ngs->root_chan; i < ngs->n_root_chan; i++, rhmm++) {
745  E_DEBUG(3,("Root channel %d frame %d score %d thresh %d\n",
746  i, hmm_frame(&rhmm->hmm), hmm_bestscore(&rhmm->hmm), thresh));
747  /* First check if this channel was active in current frame */
748  if (hmm_frame(&rhmm->hmm) < frame_idx)
749  continue;
750 
751  if (hmm_bestscore(&rhmm->hmm) BETTER_THAN thresh) {
752  hmm_frame(&rhmm->hmm) = nf; /* rhmm will be active in next frame */
753  E_DEBUG(3,("Preserving root channel %d score %d\n", i, hmm_bestscore(&rhmm->hmm)));
754  /* transitions out of this root channel */
755  /* transition to all next-level channels in the HMM tree */
756  newphone_score = hmm_out_score(&rhmm->hmm) + ngs->pip;
757  if (pls != NULL || newphone_score BETTER_THAN newphone_thresh) {
758  for (hmm = rhmm->next; hmm; hmm = hmm->alt) {
759  int32 pl_newphone_score = newphone_score
760  + phone_loop_search_score(pls, hmm->ciphone);
761  if (pl_newphone_score BETTER_THAN newphone_thresh) {
762  if ((hmm_frame(&hmm->hmm) < frame_idx)
763  || (pl_newphone_score BETTER_THAN hmm_in_score(&hmm->hmm))) {
764  hmm_enter(&hmm->hmm, pl_newphone_score,
765  hmm_out_history(&rhmm->hmm), nf);
766  *(nacl++) = hmm;
767  }
768  }
769  }
770  }
771 
772  /*
773  * Transition to last phone of all words for which this is the
774  * penultimate phone (the last phones may need multiple right contexts).
775  * Remember to remove the temporary newword_penalty.
776  */
777  if (pls != NULL || newphone_score BETTER_THAN lastphn_thresh) {
778  for (w = rhmm->penult_phn_wid; w >= 0;
779  w = ngs->homophone_set[w]) {
780  int32 pl_newphone_score = newphone_score
782  (pls, dict_last_phone(ps_search_dict(ngs),w));
783  E_DEBUG(3,("word %s newphone_score %d\n", dict_wordstr(ps_search_dict(ngs), w), newphone_score));
784  if (pl_newphone_score BETTER_THAN lastphn_thresh) {
785  candp = ngs->lastphn_cand + ngs->n_lastphn_cand;
786  ngs->n_lastphn_cand++;
787  candp->wid = w;
788  candp->score =
789  pl_newphone_score - ngs->nwpen;
790  candp->bp = hmm_out_history(&rhmm->hmm);
791  }
792  }
793  }
794  }
795  }
796  ngs->n_active_chan[nf & 0x1] = nacl - ngs->active_chan_list[nf & 0x1];
797 }
798 
799 /*
800  * Prune currently active nonroot channels in HMM tree for next frame. Also, perform
801  * exit transitions out of such channels and activate successors.
802  */
803 static void
804 prune_nonroot_chan(ngram_search_t *ngs, int frame_idx)
805 {
806  chan_t *hmm, *nexthmm;
807  int32 nf, w, i;
808  int32 thresh, newphone_thresh, lastphn_thresh, newphone_score;
809  chan_t **acl, **nacl; /* active list, next active list */
810  lastphn_cand_t *candp;
811  phone_loop_search_t *pls;
812 
813  nf = frame_idx + 1;
814 
815  thresh = ngs->best_score + ngs->dynamic_beam;
816  newphone_thresh = ngs->best_score + ngs->pbeam;
817  lastphn_thresh = ngs->best_score + ngs->lpbeam;
818  pls = (phone_loop_search_t *)ps_search_lookahead(ngs);
819 
820  acl = ngs->active_chan_list[frame_idx & 0x1]; /* currently active HMMs in tree */
821  nacl = ngs->active_chan_list[nf & 0x1] + ngs->n_active_chan[nf & 0x1];
822 
823  for (i = ngs->n_active_chan[frame_idx & 0x1], hmm = *(acl++); i > 0;
824  --i, hmm = *(acl++)) {
825  assert(hmm_frame(&hmm->hmm) >= frame_idx);
826 
827  if (hmm_bestscore(&hmm->hmm) BETTER_THAN thresh) {
828  /* retain this channel in next frame */
829  if (hmm_frame(&hmm->hmm) != nf) {
830  hmm_frame(&hmm->hmm) = nf;
831  *(nacl++) = hmm;
832  }
833 
834  /* transition to all next-level channel in the HMM tree */
835  newphone_score = hmm_out_score(&hmm->hmm) + ngs->pip;
836  if (pls != NULL || newphone_score BETTER_THAN newphone_thresh) {
837  for (nexthmm = hmm->next; nexthmm; nexthmm = nexthmm->alt) {
838  int32 pl_newphone_score = newphone_score
839  + phone_loop_search_score(pls, nexthmm->ciphone);
840  if ((pl_newphone_score BETTER_THAN newphone_thresh)
841  && ((hmm_frame(&nexthmm->hmm) < frame_idx)
842  || (pl_newphone_score
843  BETTER_THAN hmm_in_score(&nexthmm->hmm)))) {
844  if (hmm_frame(&nexthmm->hmm) != nf) {
845  /* Keep this HMM on the active list */
846  *(nacl++) = nexthmm;
847  }
848  hmm_enter(&nexthmm->hmm, pl_newphone_score,
849  hmm_out_history(&hmm->hmm), nf);
850  }
851  }
852  }
853 
854  /*
855  * Transition to last phone of all words for which this is the
856  * penultimate phone (the last phones may need multiple right contexts).
857  * Remember to remove the temporary newword_penalty.
858  */
859  if (pls != NULL || newphone_score BETTER_THAN lastphn_thresh) {
860  for (w = hmm->info.penult_phn_wid; w >= 0;
861  w = ngs->homophone_set[w]) {
862  int32 pl_newphone_score = newphone_score
864  (pls, dict_last_phone(ps_search_dict(ngs),w));
865  if (pl_newphone_score BETTER_THAN lastphn_thresh) {
866  candp = ngs->lastphn_cand + ngs->n_lastphn_cand;
867  ngs->n_lastphn_cand++;
868  candp->wid = w;
869  candp->score =
870  pl_newphone_score - ngs->nwpen;
871  candp->bp = hmm_out_history(&hmm->hmm);
872  }
873  }
874  }
875  }
876  else if (hmm_frame(&hmm->hmm) != nf) {
877  hmm_clear(&hmm->hmm);
878  }
879  }
880  ngs->n_active_chan[nf & 0x1] = nacl - ngs->active_chan_list[nf & 0x1];
881 }
882 
883 /*
884  * Execute the transition into the last phone for all candidates words emerging from
885  * the HMM tree. Attach LM scores to such transitions.
886  * (Executed after pruning root and non-root, but before pruning word-chan.)
887  */
888 static void
889 last_phone_transition(ngram_search_t *ngs, int frame_idx)
890 {
891  int32 i, j, k, nf, bp, bpend, w;
892  lastphn_cand_t *candp;
893  int32 *nawl;
894  int32 thresh;
895  int32 bestscore, dscr;
896  chan_t *hmm;
897  bptbl_t *bpe;
898  int32 n_cand_sf = 0;
899 
900  nf = frame_idx + 1;
901  nawl = ngs->active_word_list[nf & 0x1];
902  ngs->st.n_lastphn_cand_utt += ngs->n_lastphn_cand;
903 
904  /* For each candidate word (entering its last phone) */
905  /* If best LM score and bp for candidate known use it, else sort cands by startfrm */
906  for (i = 0, candp = ngs->lastphn_cand; i < ngs->n_lastphn_cand; i++, candp++) {
907  int32 start_score;
908 
909  /* This can happen if recognition fails. */
910  if (candp->bp == -1)
911  continue;
912  /* Backpointer entry for it. */
913  bpe = &(ngs->bp_table[candp->bp]);
914 
915  /* Subtract starting score for candidate, leave it with only word score */
916  start_score = ngram_search_exit_score
917  (ngs, bpe, dict_first_phone(ps_search_dict(ngs), candp->wid));
918  assert(start_score BETTER_THAN WORST_SCORE);
919  candp->score -= start_score;
920 
921  /*
922  * If this candidate not occurred in an earlier frame, prepare for finding
923  * best transition score into last phone; sort by start frame.
924  */
925  /* i.e. if we don't have an entry in last_ltrans for this
926  * <word,sf>, then create one */
927  if (ngs->last_ltrans[candp->wid].sf != bpe->frame + 1) {
928  /* Look for an entry in cand_sf matching the backpointer
929  * for this candidate. */
930  for (j = 0; j < n_cand_sf; j++) {
931  if (ngs->cand_sf[j].bp_ef == bpe->frame)
932  break;
933  }
934  /* Oh, we found one, so chain onto it. */
935  if (j < n_cand_sf)
936  candp->next = ngs->cand_sf[j].cand;
937  else {
938  /* Nope, let's make a new one, allocating cand_sf if necessary. */
939  if (n_cand_sf >= ngs->cand_sf_alloc) {
940  if (ngs->cand_sf_alloc == 0) {
941  ngs->cand_sf =
942  ckd_calloc(CAND_SF_ALLOCSIZE,
943  sizeof(*ngs->cand_sf));
944  ngs->cand_sf_alloc = CAND_SF_ALLOCSIZE;
945  }
946  else {
947  ngs->cand_sf_alloc += CAND_SF_ALLOCSIZE;
948  ngs->cand_sf = ckd_realloc(ngs->cand_sf,
949  ngs->cand_sf_alloc
950  * sizeof(*ngs->cand_sf));
951  E_INFO("cand_sf[] increased to %d entries\n",
952  ngs->cand_sf_alloc);
953  }
954  }
955 
956  /* Use the newly created cand_sf. */
957  j = n_cand_sf++;
958  candp->next = -1; /* End of the chain. */
959  ngs->cand_sf[j].bp_ef = bpe->frame;
960  }
961  /* Update it to point to this candidate. */
962  ngs->cand_sf[j].cand = i;
963 
964  ngs->last_ltrans[candp->wid].dscr = WORST_SCORE;
965  ngs->last_ltrans[candp->wid].sf = bpe->frame + 1;
966  }
967  }
968 
969  /* Compute best LM score and bp for new cands entered in the sorted lists above */
970  for (i = 0; i < n_cand_sf; i++) {
971  /* For the i-th unique end frame... */
972  bp = ngs->bp_table_idx[ngs->cand_sf[i].bp_ef];
973  bpend = ngs->bp_table_idx[ngs->cand_sf[i].bp_ef + 1];
974  for (bpe = &(ngs->bp_table[bp]); bp < bpend; bp++, bpe++) {
975  if (!bpe->valid)
976  continue;
977  /* For each candidate at the start frame find bp->cand transition-score */
978  for (j = ngs->cand_sf[i].cand; j >= 0; j = candp->next) {
979  int32 n_used;
980  candp = &(ngs->lastphn_cand[j]);
981  dscr =
983  (ngs, bpe, dict_first_phone(ps_search_dict(ngs), candp->wid));
984  if (dscr BETTER_THAN WORST_SCORE) {
985  assert(!dict_filler_word(ps_search_dict(ngs), candp->wid));
986  dscr += ngram_tg_score(ngs->lmset,
987  dict_basewid(ps_search_dict(ngs), candp->wid),
988  bpe->real_wid,
989  bpe->prev_real_wid,
990  &n_used)>>SENSCR_SHIFT;
991  }
992 
993  if (dscr BETTER_THAN ngs->last_ltrans[candp->wid].dscr) {
994  ngs->last_ltrans[candp->wid].dscr = dscr;
995  ngs->last_ltrans[candp->wid].bp = bp;
996  }
997  }
998  }
999  }
1000 
1001  /* Update best transitions for all candidates; also update best lastphone score */
1002  bestscore = ngs->last_phone_best_score;
1003  for (i = 0, candp = ngs->lastphn_cand; i < ngs->n_lastphn_cand; i++, candp++) {
1004  candp->score += ngs->last_ltrans[candp->wid].dscr;
1005  candp->bp = ngs->last_ltrans[candp->wid].bp;
1006 
1007  if (candp->score BETTER_THAN bestscore)
1008  bestscore = candp->score;
1009  }
1010  ngs->last_phone_best_score = bestscore;
1011 
1012  /* At this pt, we know the best entry score (with LM component) for all candidates */
1013  thresh = bestscore + ngs->lponlybeam;
1014  for (i = ngs->n_lastphn_cand, candp = ngs->lastphn_cand; i > 0; --i, candp++) {
1015  if (candp->score BETTER_THAN thresh) {
1016  w = candp->wid;
1017 
1018  ngram_search_alloc_all_rc(ngs, w);
1019 
1020  k = 0;
1021  for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
1022  if ((hmm_frame(&hmm->hmm) < frame_idx)
1023  || (candp->score BETTER_THAN hmm_in_score(&hmm->hmm))) {
1024  assert(hmm_frame(&hmm->hmm) != nf);
1025  hmm_enter(&hmm->hmm,
1026  candp->score, candp->bp, nf);
1027  k++;
1028  }
1029  }
1030  if (k > 0) {
1031  assert(bitvec_is_clear(ngs->word_active, w));
1032  assert(!dict_is_single_phone(ps_search_dict(ngs), w));
1033  *(nawl++) = w;
1034  bitvec_set(ngs->word_active, w);
1035  }
1036  }
1037  }
1038  ngs->n_active_word[nf & 0x1] = nawl - ngs->active_word_list[nf & 0x1];
1039 }
1040 
1041 /*
1042  * Prune currently active word channels for next frame. Also, perform exit
1043  * transitions out of such channels and active successors.
1044  */
1045 static void
1046 prune_word_chan(ngram_search_t *ngs, int frame_idx)
1047 {
1048  root_chan_t *rhmm;
1049  chan_t *hmm, *thmm;
1050  chan_t **phmmp; /* previous HMM-pointer */
1051  int32 nf, w, i, k;
1052  int32 newword_thresh, lastphn_thresh;
1053  int32 *awl, *nawl;
1054 
1055  nf = frame_idx + 1;
1056  newword_thresh = ngs->last_phone_best_score + ngs->wbeam;
1057  lastphn_thresh = ngs->last_phone_best_score + ngs->lponlybeam;
1058 
1059  awl = ngs->active_word_list[frame_idx & 0x1];
1060  nawl = ngs->active_word_list[nf & 0x1] + ngs->n_active_word[nf & 0x1];
1061 
1062  /* Dynamically allocated last channels of multi-phone words */
1063  for (i = ngs->n_active_word[frame_idx & 0x1], w = *(awl++); i > 0;
1064  --i, w = *(awl++)) {
1065  k = 0;
1066  phmmp = &(ngs->word_chan[w]);
1067  for (hmm = ngs->word_chan[w]; hmm; hmm = thmm) {
1068  assert(hmm_frame(&hmm->hmm) >= frame_idx);
1069 
1070  thmm = hmm->next;
1071  if (hmm_bestscore(&hmm->hmm) BETTER_THAN lastphn_thresh) {
1072  /* retain this channel in next frame */
1073  hmm_frame(&hmm->hmm) = nf;
1074  k++;
1075  phmmp = &(hmm->next);
1076 
1077  /* Could if ((! skip_alt_frm) || (frame_idx & 0x1)) the following */
1078  if (hmm_out_score(&hmm->hmm) BETTER_THAN newword_thresh) {
1079  /* can exit channel and recognize word */
1080  ngram_search_save_bp(ngs, frame_idx, w,
1081  hmm_out_score(&hmm->hmm),
1082  hmm_out_history(&hmm->hmm),
1083  hmm->info.rc_id);
1084  }
1085  }
1086  else if (hmm_frame(&hmm->hmm) == nf) {
1087  phmmp = &(hmm->next);
1088  }
1089  else {
1090  hmm_deinit(&hmm->hmm);
1091  listelem_free(ngs->chan_alloc, hmm);
1092  *phmmp = thmm;
1093  }
1094  }
1095  if ((k > 0) && (bitvec_is_clear(ngs->word_active, w))) {
1096  assert(!dict_is_single_phone(ps_search_dict(ngs), w));
1097  *(nawl++) = w;
1098  bitvec_set(ngs->word_active, w);
1099  }
1100  }
1101  ngs->n_active_word[nf & 0x1] = nawl - ngs->active_word_list[nf & 0x1];
1102 
1103  /*
1104  * Prune permanently allocated single-phone channels.
1105  * NOTES: score[] of pruned channels set to WORST_SCORE elsewhere.
1106  */
1107  for (i = 0; i < ngs->n_1ph_words; i++) {
1108  w = ngs->single_phone_wid[i];
1109  rhmm = (root_chan_t *) ngs->word_chan[w];
1110  E_DEBUG(3,("Single phone word %s frame %d score %d thresh %d outscore %d nwthresh %d\n",
1111  dict_wordstr(ps_search_dict(ngs),w),
1112  hmm_frame(&rhmm->hmm), hmm_bestscore(&rhmm->hmm),
1113  lastphn_thresh, hmm_out_score(&rhmm->hmm), newword_thresh));
1114  if (hmm_frame(&rhmm->hmm) < frame_idx)
1115  continue;
1116  if (hmm_bestscore(&rhmm->hmm) BETTER_THAN lastphn_thresh) {
1117  hmm_frame(&rhmm->hmm) = nf;
1118 
1119  /* Could if ((! skip_alt_frm) || (frame_idx & 0x1)) the following */
1120  if (hmm_out_score(&rhmm->hmm) BETTER_THAN newword_thresh) {
1121  E_DEBUG(4,("Exiting single phone word %s with %d > %d, %d\n",
1122  dict_wordstr(ps_search_dict(ngs),w),
1123  hmm_out_score(&rhmm->hmm),
1124  lastphn_thresh, newword_thresh));
1125  ngram_search_save_bp(ngs, frame_idx, w,
1126  hmm_out_score(&rhmm->hmm),
1127  hmm_out_history(&rhmm->hmm), 0);
1128  }
1129  }
1130  }
1131 }
1132 
1133 static void
1134 prune_channels(ngram_search_t *ngs, int frame_idx)
1135 {
1136  /* Clear last phone candidate list. */
1137  ngs->n_lastphn_cand = 0;
1138  /* Set the dynamic beam based on maxhmmpf here. */
1139  ngs->dynamic_beam = ngs->beam;
1140  if (ngs->maxhmmpf != -1
1141  && ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval > ngs->maxhmmpf) {
1142  /* Build a histogram to approximately prune them. */
1143  int32 bins[256], bw, nhmms, i;
1144  root_chan_t *rhmm;
1145  chan_t **acl, *hmm;
1146 
1147  /* Bins go from zero (best score) to edge of beam. */
1148  bw = -ngs->beam / 256;
1149  memset(bins, 0, sizeof(bins));
1150  /* For each active root channel. */
1151  for (i = 0, rhmm = ngs->root_chan; i < ngs->n_root_chan; i++, rhmm++) {
1152  int32 b;
1153 
1154  /* Put it in a bin according to its bestscore. */
1155  b = (ngs->best_score - hmm_bestscore(&rhmm->hmm)) / bw;
1156  if (b >= 256)
1157  b = 255;
1158  ++bins[b];
1159  }
1160  /* For each active non-root channel. */
1161  acl = ngs->active_chan_list[frame_idx & 0x1]; /* currently active HMMs in tree */
1162  for (i = ngs->n_active_chan[frame_idx & 0x1], hmm = *(acl++);
1163  i > 0; --i, hmm = *(acl++)) {
1164  int32 b;
1165 
1166  /* Put it in a bin according to its bestscore. */
1167  b = (ngs->best_score - hmm_bestscore(&hmm->hmm)) / bw;
1168  if (b >= 256)
1169  b = 255;
1170  ++bins[b];
1171  }
1172  /* Walk down the bins to find the new beam. */
1173  for (i = nhmms = 0; i < 256; ++i) {
1174  nhmms += bins[i];
1175  if (nhmms > ngs->maxhmmpf)
1176  break;
1177  }
1178  ngs->dynamic_beam = -(i * bw);
1179  }
1180 
1181  prune_root_chan(ngs, frame_idx);
1182  prune_nonroot_chan(ngs, frame_idx);
1183  last_phone_transition(ngs, frame_idx);
1184  prune_word_chan(ngs, frame_idx);
1185 }
1186 
1187 /*
1188  * Limit the number of word exits in each frame to maxwpf. And also limit the number of filler
1189  * words to 1.
1190  */
1191 static void
1192 bptable_maxwpf(ngram_search_t *ngs, int frame_idx)
1193 {
1194  int32 bp, n;
1195  int32 bestscr, worstscr;
1196  bptbl_t *bpe, *bestbpe, *worstbpe;
1197 
1198  /* Don't prune if no pruing. */
1199  if (ngs->maxwpf == -1 || ngs->maxwpf == ps_search_n_words(ngs))
1200  return;
1201 
1202  /* Allow only one filler word exit (the best) per frame */
1203  bestscr = (int32) 0x80000000;
1204  bestbpe = NULL;
1205  n = 0;
1206  for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
1207  bpe = &(ngs->bp_table[bp]);
1208  if (dict_filler_word(ps_search_dict(ngs), bpe->wid)) {
1209  if (bpe->score BETTER_THAN bestscr) {
1210  bestscr = bpe->score;
1211  bestbpe = bpe;
1212  }
1213  bpe->valid = FALSE;
1214  n++; /* No. of filler words */
1215  }
1216  }
1217  /* Restore bestbpe to valid state */
1218  if (bestbpe != NULL) {
1219  bestbpe->valid = TRUE;
1220  --n;
1221  }
1222 
1223  /* Allow up to maxwpf best entries to survive; mark the remaining with valid = 0 */
1224  n = (ngs->bpidx
1225  - ngs->bp_table_idx[frame_idx]) - n; /* No. of entries after limiting fillers */
1226  for (; n > ngs->maxwpf; --n) {
1227  /* Find worst BPTable entry */
1228  worstscr = (int32) 0x7fffffff;
1229  worstbpe = NULL;
1230  for (bp = ngs->bp_table_idx[frame_idx]; (bp < ngs->bpidx); bp++) {
1231  bpe = &(ngs->bp_table[bp]);
1232  if (bpe->valid && (bpe->score WORSE_THAN worstscr)) {
1233  worstscr = bpe->score;
1234  worstbpe = bpe;
1235  }
1236  }
1237  /* FIXME: Don't panic! */
1238  if (worstbpe == NULL)
1239  E_FATAL("PANIC: No worst BPtable entry remaining\n");
1240  worstbpe->valid = FALSE;
1241  }
1242 }
1243 
1244 static void
1245 word_transition(ngram_search_t *ngs, int frame_idx)
1246 {
1247  int32 i, k, bp, w, nf;
1248  int32 rc;
1249  int32 thresh, newscore;
1250  bptbl_t *bpe;
1251  root_chan_t *rhmm;
1252  struct bestbp_rc_s *bestbp_rc_ptr;
1253  phone_loop_search_t *pls;
1254  dict_t *dict = ps_search_dict(ngs);
1255  dict2pid_t *d2p = ps_search_dict2pid(ngs);
1256 
1257  /*
1258  * Transition to start of new word instances (HMM tree roots); but only if words
1259  * other than </s> finished here.
1260  * But, first, find the best starting score for each possible right context phone.
1261  */
1262  for (i = bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef) - 1; i >= 0; --i)
1263  ngs->bestbp_rc[i].score = WORST_SCORE;
1264  k = 0;
1265  pls = (phone_loop_search_t *)ps_search_lookahead(ngs);
1266  /* Ugh, this is complicated. Scan all word exits for this frame
1267  * (they have already been created by prune_word_chan()). */
1268  for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
1269  bpe = &(ngs->bp_table[bp]);
1270  ngs->word_lat_idx[bpe->wid] = NO_BP;
1271 
1272  if (bpe->wid == ps_search_finish_wid(ngs))
1273  continue;
1274  k++;
1275 
1276  /* DICT2PID */
1277  /* Array of HMM scores corresponding to all the possible right
1278  * context expansions of the final phone. It's likely that a
1279  * lot of these are going to be missing, actually. */
1280  if (bpe->last2_phone == -1) { /* implies s_idx == -1 */
1281  /* No right context expansion. */
1282  for (rc = 0; rc < bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef); ++rc) {
1283  if (bpe->score BETTER_THAN ngs->bestbp_rc[rc].score) {
1284  E_DEBUG(4,("bestbp_rc[0] = %d lc %d\n",
1285  bpe->score, bpe->last_phone));
1286  ngs->bestbp_rc[rc].score = bpe->score;
1287  ngs->bestbp_rc[rc].path = bp;
1288  ngs->bestbp_rc[rc].lc = bpe->last_phone;
1289  }
1290  }
1291  }
1292  else {
1293  xwdssid_t *rssid = dict2pid_rssid(d2p, bpe->last_phone, bpe->last2_phone);
1294  int32 *rcss = &(ngs->bscore_stack[bpe->s_idx]);
1295  for (rc = 0; rc < bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef); ++rc) {
1296  if (rcss[rssid->cimap[rc]] BETTER_THAN ngs->bestbp_rc[rc].score) {
1297  E_DEBUG(4,("bestbp_rc[%d] = %d lc %d\n",
1298  rc, rcss[rssid->cimap[rc]], bpe->last_phone));
1299  ngs->bestbp_rc[rc].score = rcss[rssid->cimap[rc]];
1300  ngs->bestbp_rc[rc].path = bp;
1301  ngs->bestbp_rc[rc].lc = bpe->last_phone;
1302  }
1303  }
1304  }
1305  }
1306  if (k == 0)
1307  return;
1308 
1309  nf = frame_idx + 1;
1310  thresh = ngs->best_score + ngs->dynamic_beam;
1311  /*
1312  * Hypothesize successors to words finished in this frame.
1313  * Main dictionary, multi-phone words transition to HMM-trees roots.
1314  */
1315  for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
1316  bestbp_rc_ptr = &(ngs->bestbp_rc[rhmm->ciphone]);
1317 
1318  newscore = bestbp_rc_ptr->score + ngs->nwpen + ngs->pip
1319  + phone_loop_search_score(pls, rhmm->ciphone);
1320  if (newscore BETTER_THAN thresh) {
1321  if ((hmm_frame(&rhmm->hmm) < frame_idx)
1322  || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
1323  hmm_enter(&rhmm->hmm, newscore,
1324  bestbp_rc_ptr->path, nf);
1325  /* DICT2PID: Another place where mpx ssids are entered. */
1326  /* Look up the ssid to use when entering this mpx triphone. */
1327  hmm_mpx_ssid(&rhmm->hmm, 0) =
1328  dict2pid_ldiph_lc(d2p, rhmm->ciphone, rhmm->ci2phone, bestbp_rc_ptr->lc);
1329  assert(hmm_mpx_ssid(&rhmm->hmm, 0) != BAD_SSID);
1330  }
1331  }
1332  }
1333 
1334  /*
1335  * Single phone words; no right context for these. Cannot use bestbp_rc as
1336  * LM scores have to be included. First find best transition to these words.
1337  */
1338  for (i = 0; i < ngs->n_1ph_LMwords; i++) {
1339  w = ngs->single_phone_wid[i];
1340  ngs->last_ltrans[w].dscr = (int32) 0x80000000;
1341  }
1342  for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
1343  bpe = &(ngs->bp_table[bp]);
1344  if (!bpe->valid)
1345  continue;
1346 
1347  for (i = 0; i < ngs->n_1ph_LMwords; i++) {
1348  int32 n_used;
1349  w = ngs->single_phone_wid[i];
1350  newscore = ngram_search_exit_score
1351  (ngs, bpe, dict_first_phone(dict, w));
1352  E_DEBUG(4, ("initial newscore for %s: %d\n",
1353  dict_wordstr(dict, w), newscore));
1354  if (newscore != WORST_SCORE)
1355  newscore += ngram_tg_score(ngs->lmset,
1356  dict_basewid(dict, w),
1357  bpe->real_wid,
1358  bpe->prev_real_wid,
1359  &n_used)>>SENSCR_SHIFT;
1360 
1361  /* FIXME: Not sure how WORST_SCORE could be better, but it
1362  * apparently happens. */
1363  if (newscore BETTER_THAN ngs->last_ltrans[w].dscr) {
1364  ngs->last_ltrans[w].dscr = newscore;
1365  ngs->last_ltrans[w].bp = bp;
1366  }
1367  }
1368  }
1369 
1370  /* Now transition to in-LM single phone words */
1371  for (i = 0; i < ngs->n_1ph_LMwords; i++) {
1372  w = ngs->single_phone_wid[i];
1373  /* Never transition into the start word (for one thing, it is
1374  a non-event in the language model.) */
1375  if (w == dict_startwid(ps_search_dict(ngs)))
1376  continue;
1377  rhmm = (root_chan_t *) ngs->word_chan[w];
1378  newscore = ngs->last_ltrans[w].dscr + ngs->pip
1379  + phone_loop_search_score(pls, rhmm->ciphone);
1380  if (newscore BETTER_THAN thresh) {
1381  bpe = ngs->bp_table + ngs->last_ltrans[w].bp;
1382  if ((hmm_frame(&rhmm->hmm) < frame_idx)
1383  || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
1384  hmm_enter(&rhmm->hmm,
1385  newscore, ngs->last_ltrans[w].bp, nf);
1386  /* DICT2PID: another place where mpx ssids are entered. */
1387  /* Look up the ssid to use when entering this mpx triphone. */
1388  hmm_mpx_ssid(&rhmm->hmm, 0) =
1389  dict2pid_ldiph_lc(d2p, rhmm->ciphone, rhmm->ci2phone,
1390  dict_last_phone(dict, bpe->wid));
1391  assert(hmm_mpx_ssid(&rhmm->hmm, 0) != BAD_SSID);
1392  }
1393  }
1394  }
1395 
1396  /* Remaining words: <sil>, noise words. No mpx for these! */
1397  w = ps_search_silence_wid(ngs);
1398  rhmm = (root_chan_t *) ngs->word_chan[w];
1399  bestbp_rc_ptr = &(ngs->bestbp_rc[ps_search_acmod(ngs)->mdef->sil]);
1400  newscore = bestbp_rc_ptr->score + ngs->silpen + ngs->pip
1401  + phone_loop_search_score(pls, rhmm->ciphone);
1402  if (newscore BETTER_THAN thresh) {
1403  if ((hmm_frame(&rhmm->hmm) < frame_idx)
1404  || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
1405  hmm_enter(&rhmm->hmm,
1406  newscore, bestbp_rc_ptr->path, nf);
1407  }
1408  }
1409  for (w = dict_filler_start(dict); w <= dict_filler_end(dict); w++) {
1410  if (w == ps_search_silence_wid(ngs))
1411  continue;
1412  /* Never transition into the start word (for one thing, it is
1413  a non-event in the language model.) */
1414  if (w == dict_startwid(ps_search_dict(ngs)))
1415  continue;
1416  rhmm = (root_chan_t *) ngs->word_chan[w];
1417  /* If this was not actually a single-phone word, rhmm will be NULL. */
1418  if (rhmm == NULL)
1419  continue;
1420  newscore = bestbp_rc_ptr->score + ngs->fillpen + ngs->pip
1421  + phone_loop_search_score(pls, rhmm->ciphone);
1422  if (newscore BETTER_THAN thresh) {
1423  if ((hmm_frame(&rhmm->hmm) < frame_idx)
1424  || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
1425  hmm_enter(&rhmm->hmm,
1426  newscore, bestbp_rc_ptr->path, nf);
1427  }
1428  }
1429  }
1430 }
1431 
1432 static void
1433 deactivate_channels(ngram_search_t *ngs, int frame_idx)
1434 {
1435  root_chan_t *rhmm;
1436  int i;
1437 
1438  /* Clear score[] of pruned root channels */
1439  for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
1440  if (hmm_frame(&rhmm->hmm) == frame_idx) {
1441  hmm_clear(&rhmm->hmm);
1442  }
1443  }
1444  /* Clear score[] of pruned single-phone channels */
1445  for (i = 0; i < ngs->n_1ph_words; i++) {
1446  int32 w = ngs->single_phone_wid[i];
1447  rhmm = (root_chan_t *) ngs->word_chan[w];
1448  if (hmm_frame(&rhmm->hmm) == frame_idx) {
1449  hmm_clear(&rhmm->hmm);
1450  }
1451  }
1452 }
1453 
1454 int
1456 {
1457  int16 const *senscr;
1458 
1459  /* Activate our HMMs for the current frame if need be. */
1460  if (!ps_search_acmod(ngs)->compallsen)
1461  compute_sen_active(ngs, frame_idx);
1462 
1463  /* Compute GMM scores for the current frame. */
1464  if ((senscr = acmod_score(ps_search_acmod(ngs), &frame_idx)) == NULL)
1465  return 0;
1466  ngs->st.n_senone_active_utt += ps_search_acmod(ngs)->n_senone_active;
1467 
1468  /* Mark backpointer table for current frame. */
1469  ngram_search_mark_bptable(ngs, frame_idx);
1470 
1471  /* If the best score is equal to or worse than WORST_SCORE,
1472  * recognition has failed, don't bother to keep trying. */
1474  return 0;
1475  /* Renormalize if necessary */
1476  if (ngs->best_score + (2 * ngs->beam) WORSE_THAN WORST_SCORE) {
1477  E_INFO("Renormalizing Scores at frame %d, best score %d\n",
1478  frame_idx, ngs->best_score);
1479  renormalize_scores(ngs, frame_idx, ngs->best_score);
1480  }
1481 
1482  /* Evaluate HMMs */
1483  evaluate_channels(ngs, senscr, frame_idx);
1484  /* Prune HMMs and do phone transitions. */
1485  prune_channels(ngs, frame_idx);
1486  /* Do absolute pruning on word exits. */
1487  bptable_maxwpf(ngs, frame_idx);
1488  /* Do word transitions. */
1489  word_transition(ngs, frame_idx);
1490  /* Deactivate pruned HMMs. */
1491  deactivate_channels(ngs, frame_idx);
1492 
1493  ++ngs->n_frame;
1494  /* Return the number of frames processed. */
1495  return 1;
1496 }
1497 
1498 void
1500 {
1501  int32 i, w, cf, *awl;
1502  root_chan_t *rhmm;
1503  chan_t *hmm, **acl;
1504 
1505  /* This is the number of frames processed. */
1506  cf = ps_search_acmod(ngs)->output_frame;
1507  /* Add a mark in the backpointer table for one past the final frame. */
1508  ngram_search_mark_bptable(ngs, cf);
1509 
1510  /* Deactivate channels lined up for the next frame */
1511  /* First, root channels of HMM tree */
1512  for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
1513  hmm_clear(&rhmm->hmm);
1514  }
1515 
1516  /* nonroot channels of HMM tree */
1517  i = ngs->n_active_chan[cf & 0x1];
1518  acl = ngs->active_chan_list[cf & 0x1];
1519  for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
1520  hmm_clear(&hmm->hmm);
1521  }
1522 
1523  /* word channels */
1524  i = ngs->n_active_word[cf & 0x1];
1525  awl = ngs->active_word_list[cf & 0x1];
1526  for (w = *(awl++); i > 0; --i, w = *(awl++)) {
1527  /* Don't accidentally free single-phone words! */
1528  if (dict_is_single_phone(ps_search_dict(ngs), w))
1529  continue;
1530  bitvec_clear(ngs->word_active, w);
1531  if (ngs->word_chan[w] == NULL)
1532  continue;
1533  ngram_search_free_all_rc(ngs, w);
1534  }
1535 
1536  /*
1537  * The previous search code did a postprocessing of the
1538  * backpointer table here, but we will postpone this until it is
1539  * absolutely necessary, i.e. when generating a word graph.
1540  * Likewise we don't actually have to decide what the exit word is
1541  * until somebody requests a backtrace.
1542  */
1543 
1544  ptmr_stop(&ngs->fwdtree_perf);
1545  /* Print out some statistics. */
1546  if (cf > 0) {
1547  double n_speech = (double)(cf + 1)
1548  / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
1549  E_INFO("%8d words recognized (%d/fr)\n",
1550  ngs->bpidx, (ngs->bpidx + (cf >> 1)) / (cf + 1));
1551  E_INFO("%8d senones evaluated (%d/fr)\n", ngs->st.n_senone_active_utt,
1552  (ngs->st.n_senone_active_utt + (cf >> 1)) / (cf + 1));
1553  E_INFO("%8d channels searched (%d/fr), %d 1st, %d last\n",
1554  ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval,
1555  (ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval) / (cf + 1),
1556  ngs->st.n_root_chan_eval, ngs->st.n_last_chan_eval);
1557  E_INFO("%8d words for which last channels evaluated (%d/fr)\n",
1558  ngs->st.n_word_lastchan_eval,
1559  ngs->st.n_word_lastchan_eval / (cf + 1));
1560  E_INFO("%8d candidate words for entering last phone (%d/fr)\n",
1561  ngs->st.n_lastphn_cand_utt, ngs->st.n_lastphn_cand_utt / (cf + 1));
1562  E_INFO("fwdtree %.2f CPU %.3f xRT\n",
1563  ngs->fwdtree_perf.t_cpu,
1564  ngs->fwdtree_perf.t_cpu / n_speech);
1565  E_INFO("fwdtree %.2f wall %.3f xRT\n",
1566  ngs->fwdtree_perf.t_elapsed,
1567  ngs->fwdtree_perf.t_elapsed / n_speech);
1568  }
1569  /* dump_bptable(ngs); */
1570 }
hmm_t hmm
Basic HMM structure.
Definition: ngram_search.h:65
void ngram_fwdtree_finish(ngram_search_t *ngs)
Finish fwdtree decoding for an utterance.
int32 wid
Word index.
Definition: ngram_search.h:113
void ngram_fwdtree_deinit(ngram_search_t *ngs)
Release memory associated with fwdtree decoding.
Base structure for search module.
int32 n_nonroot_chan
Number of valid non-root channels.
Definition: ngram_search.h:234
void ngram_search_alloc_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
Definition: ngram_search.c:616
void hmm_init(hmm_context_t *ctx, hmm_t *hmm, int mpx, int ssid, int tmatid)
Populate a previously-allocated HMM structure, allocating internal data.
Definition: hmm.c:89
chan_t * next
first descendant of this channel
Definition: ngram_search.h:94
listelem_alloc_t * chan_alloc
For chan_t.
Definition: ngram_search.h:211
void ngram_fwdtree_start(ngram_search_t *ngs)
Start fwdtree decoding for an utterance.
const char * bin_mdef_ciphone_str(bin_mdef_t *m, int32 ci)
In: ciphone id for which name wanted.
Definition: bin_mdef.c:738
frame_idx_t frame
start or end frame
Definition: ngram_search.h:110
hmm_context_t * hmmctx
HMM context.
Definition: ngram_search.h:200
int32 n_active_chan[2]
Number entries in active_chan_list.
Definition: ngram_search.h:276
void hmm_deinit(hmm_t *hmm)
Free an HMM structure, releasing internal data (but not the HMM structure itself).
Definition: hmm.c:111
int16 last2_phone
next-to-last phone of this word
Definition: ngram_search.h:120
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
Definition: acmod.c:1191
#define BAD_SSID
Invalid senone sequence ID (limited to 16 bits for PocketSphinx).
Definition: bin_mdef.h:97
bitvec_t * word_active
array of active flags for all words.
Definition: ngram_search.h:247
int32 ngram_search_exit_score(ngram_search_t *ngs, bptbl_t *pbe, int rcphone)
Get the exit score for a backpointer entry with a given right context.
Definition: ngram_search.c:678
int16 ciphone
first ciphone of this node; all words rooted at this node begin with this ciphone ...
Definition: ngram_search.h:100
int32 ** active_word_list
Array of active multi-phone words for current and next frame.
Definition: ngram_search.h:287
struct chan_s * next
first descendant of this channel; or, in the case of the last phone of a word, the next alternative r...
Definition: ngram_search.h:68
void ngram_search_save_bp(ngram_search_t *ngs, int frame_idx, int32 w, int32 score, int32 path, int32 rc)
Enter a word in the backpointer table.
Definition: ngram_search.c:398
Lexicon tree based Viterbi search.
int32 * single_phone_wid
list of single-phone word ids
Definition: ngram_search.h:264
int ngram_search_mark_bptable(ngram_search_t *ngs, int frame_idx)
Record the current frame&#39;s index in the backpointer table.
Definition: ngram_search.c:344
int32 n_root_chan_alloc
Number of root_chan allocated.
Definition: ngram_search.h:232
int16 ci2phone
second ciphone of this node; one root HMM for each unique right context
Definition: ngram_search.h:102
int32 penult_phn_wid
list of words whose last phone follows this one; this field indicates the first of the list; the rest...
Definition: ngram_search.h:75
int32 n_active_word[2]
Number entries in active_word_list.
Definition: ngram_search.h:288
int32 rc_id
right-context id for last phone of words
Definition: ngram_search.h:79
#define dict2pid_rssid(d, ci, lc)
Access macros; not designed for arbitrary use.
Definition: dict2pid.h:179
N-Gram search module structure.
Definition: ngram_search.h:197
int ngram_fwdtree_search(ngram_search_t *ngs, int frame_idx)
Search one frame forward in an utterance.
void hmm_normalize(hmm_t *h, int32 bestscr)
Renormalize the scores in this HMM based on the given best score.
Definition: hmm.c:209
int32 max_nonroot_chan
Maximum possible number of non-root channels.
Definition: ngram_search.h:235
int32 last_phone_best_score
Best Viterbi path score for last phone.
Definition: ngram_search.h:326
int32 real_wid
wid of this or latest predecessor real word
Definition: ngram_search.h:117
root_chan_t * rhmm_1ph
Root HMMs for single-phone words.
Definition: ngram_search.h:236
int32 prev_real_wid
wid of second-last real word
Definition: ngram_search.h:118
#define WORST_SCORE
Large "bad" score.
Definition: hmm.h:88
int dict_filler_word(dict_t *d, s3wid_t w)
Return 1 if w is a filler word, 0 if not.
Definition: dict.c:396
void ngram_fwdtree_init(ngram_search_t *ngs)
Initialize N-Gram search for fwdtree decoding.
Lexical tree node data type for the first phone (root) of each dynamic HMM tree structure.
Definition: ngram_search.h:90
void hmm_enter(hmm_t *h, int32 score, int32 histid, int frame)
Enter an HMM with the given path score and history ID.
Definition: hmm.c:201
Lexical tree node data type.
Definition: ngram_search.h:64
hmm_t hmm
Basic HMM structure.
Definition: ngram_search.h:91
void acmod_clear_active(acmod_t *acmod)
Clear set of active senones.
Definition: acmod.c:1175
#define hmm_context_set_senscore(ctx, senscr)
Change the senone score array for a context.
Definition: hmm.h:231
#define SENSCR_SHIFT
Shift count for senone scores.
Definition: hmm.h:77
chan_t *** active_chan_list
Array of active channels for current and next frame.
Definition: ngram_search.h:275
a structure for a dictionary.
Definition: dict.h:79
POCKETSPHINX_EXPORT int dict_real_word(dict_t *d, s3wid_t w)
Test if w is a "real" word, i.e.
Definition: dict.c:410
struct chan_s * alt
sibling; i.e., next descendant of parent HMM
Definition: ngram_search.h:71
#define WORSE_THAN
Is one score worse than another?
Definition: hmm.h:104
s3ssid_t dict2pid_internal(dict2pid_t *d2p, int32 wid, int pos)
Return the senone sequence ID for the given word position.
Definition: dict2pid.c:367
void hmm_clear(hmm_t *h)
Reset the states of the HMM to the invalid condition.
Definition: hmm.c:183
int32 best_score
Best Viterbi path score.
Definition: ngram_search.h:325
Back pointer table (forward pass lattice; actually a tree)
Definition: ngram_search.h:109
int32 n_1ph_LMwords
Number single phone dict words also in LM; these come first in single_phone_wid.
Definition: ngram_search.h:266
cross word triphone model structure
Definition: dict2pid.h:137
int ngram_fwdtree_reinit(ngram_search_t *ngs)
Rebuild search structures for updated language models.
Fast and rough context-independent phoneme loop search.
void ngram_search_free_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
Definition: ngram_search.c:665
root_chan_t * root_chan
Search structure of HMM instances.
Definition: ngram_search.h:231
char * hyp_str
Current hypothesis string.
#define BETTER_THAN
Is one score better than another?
Definition: hmm.h:99
int32 s_idx
Start of BScoreStack for various right contexts.
Definition: ngram_search.h:116
int32 n_frame
Number of frames actually present.
Definition: ngram_search.h:308
ngram_model_t * lmset
Set of language models.
Definition: ngram_search.h:199
uint8 valid
For absolute pruning.
Definition: ngram_search.h:111
int32 n_1ph_words
Number single phone words in dict (total)
Definition: ngram_search.h:265
int32 ciphone
ciphone for this node
Definition: ngram_search.h:73
ngram_search_stats_t st
Various statistics for profiling.
Definition: ngram_search.h:335
chan_t ** word_chan
Channels associated with a given word (only used for right contexts, single-phone words in fwdtree se...
Definition: ngram_search.h:246
int32 score
Score (best among all right contexts)
Definition: ngram_search.h:115
int32 n_root_chan
Number of valid root_chan.
Definition: ngram_search.h:233
s3cipid_t * cimap
Index into ssid[] above for each ci phone.
Definition: dict2pid.h:139
int32 * homophone_set
Each node in the HMM tree structure may point to a set of words whose last phone would follow that no...
Definition: ngram_search.h:263
#define dict_pron(d, w, p)
The CI phones of the word w at position p.
Definition: dict.h:168
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:148
#define phone_loop_search_score(pls, ci)
Return lookahead heuristic score for a specific phone.
Phone loop search structure.
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
Definition: acmod.c:1088
int16 last_phone
last phone of this word
Definition: ngram_search.h:119