47 #include <sphinxbase/ckd_alloc.h> 48 #include <sphinxbase/listelem_alloc.h> 49 #include <sphinxbase/err.h> 59 static int ngram_search_step(
ps_search_t *search,
int frame_idx);
60 static int ngram_search_finish(
ps_search_t *search);
62 static char const *ngram_search_hyp(
ps_search_t *search, int32 *out_score, int32 *out_is_final);
63 static int32 ngram_search_prob(
ps_search_t *search);
76 ngram_search_seg_iter,
86 n_words = ps_search_n_words(ngs);
87 words = ckd_calloc(n_words,
sizeof(*words));
89 for (i = 0; i < n_words; ++i)
90 words[i] = (
const char *)dict_wordstr(ps_search_dict(ngs), i);
91 ngram_model_set_map_words(ngs->
lmset, words, n_words);
101 config = ps_search_config(ngs);
102 acmod = ps_search_acmod(ngs);
105 ngs->beam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-beam"))>>
SENSCR_SHIFT;
106 ngs->wbeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-wbeam"))>>
SENSCR_SHIFT;
107 ngs->pbeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-pbeam"))>>
SENSCR_SHIFT;
108 ngs->lpbeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-lpbeam"))>>
SENSCR_SHIFT;
109 ngs->lponlybeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-lponlybeam"))>>
SENSCR_SHIFT;
110 ngs->fwdflatbeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-fwdflatbeam"))>>
SENSCR_SHIFT;
111 ngs->fwdflatwbeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-fwdflatwbeam"))>>
SENSCR_SHIFT;
114 ngs->maxwpf = cmd_ln_int32_r(config,
"-maxwpf");
115 ngs->maxhmmpf = cmd_ln_int32_r(config,
"-maxhmmpf");
118 ngs->wip = logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-wip")) >>
SENSCR_SHIFT;
119 ngs->nwpen = logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-nwpen")) >>
SENSCR_SHIFT;
120 ngs->pip = logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-pip")) >>
SENSCR_SHIFT;
121 ngs->silpen = ngs->pip
122 + (logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-silprob"))>>
SENSCR_SHIFT);
123 ngs->fillpen = ngs->pip
124 + (logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-fillprob"))>>
SENSCR_SHIFT);
127 ngs->fwdflat_fwdtree_lw_ratio =
128 cmd_ln_float32_r(config,
"-fwdflatlw")
129 / cmd_ln_float32_r(config,
"-lw");
130 ngs->bestpath_fwdtree_lw_ratio =
131 cmd_ln_float32_r(config,
"-bestpathlw")
132 / cmd_ln_float32_r(config,
"-lw");
135 ngs->
ascale = 1.0 / cmd_ln_float32_r(config,
"-ascale");
147 ngs = ckd_calloc(1,
sizeof(*ngs));
148 ps_search_init(&ngs->base, &ngram_funcs, config, acmod, dict, d2p);
151 if (ngs->
hmmctx == NULL) {
152 ps_search_free(ps_search_base(ngs));
160 ngram_search_calc_beams(ngs);
165 ngs->word_lat_idx = ckd_calloc(
dict_size(dict),
166 sizeof(*ngs->word_lat_idx));
168 ngs->last_ltrans = ckd_calloc(
dict_size(dict),
169 sizeof(*ngs->last_ltrans));
173 ngs->bp_table_size = cmd_ln_int32_r(config,
"-latsize");
174 ngs->bp_table = ckd_calloc(ngs->bp_table_size,
175 sizeof(*ngs->bp_table));
177 ngs->bscore_stack_size = ngs->bp_table_size * 20;
178 ngs->bscore_stack = ckd_calloc(ngs->bscore_stack_size,
179 sizeof(*ngs->bscore_stack));
182 sizeof(*ngs->bp_table_idx));
190 if ((path = cmd_ln_str_r(config,
"-lmctl"))) {
191 ngs->
lmset = ngram_model_set_read(config, path, acmod->
lmath);
192 if (ngs->
lmset == NULL) {
193 E_ERROR(
"Failed to read language model control file: %s\n",
198 if ((path = cmd_ln_str_r(config,
"-lmname"))) {
199 ngram_model_set_select(ngs->
lmset, path);
202 else if ((path = cmd_ln_str_r(config,
"-lm"))) {
203 static const char *name =
"default";
206 lm = ngram_model_read(config, path, NGRAM_AUTO, acmod->
lmath);
208 E_ERROR(
"Failed to read language model file: %s\n", path);
211 ngs->
lmset = ngram_model_set_init(config,
214 if (ngs->
lmset == NULL) {
215 E_ERROR(
"Failed to initialize language model set\n");
219 if (ngs->
lmset != NULL
220 && ngram_wid(ngs->
lmset, S3_FINISH_WORD) == ngram_unknown_wid(ngs->
lmset)) {
221 E_ERROR(
"Language model/set does not contain </s>, recognition will fail\n");
226 ngram_search_update_widmap(ngs);
229 if (cmd_ln_boolean_r(config,
"-fwdtree")) {
232 ngs->fwdtree_perf.name =
"fwdtree";
233 ptmr_init(&ngs->fwdtree_perf);
235 if (cmd_ln_boolean_r(config,
"-fwdflat")) {
238 ngs->fwdflat_perf.name =
"fwdflat";
239 ptmr_init(&ngs->fwdflat_perf);
241 if (cmd_ln_boolean_r(config,
"-bestpath")) {
242 ngs->bestpath = TRUE;
243 ngs->bestpath_perf.name =
"bestpath";
244 ptmr_init(&ngs->bestpath_perf);
266 ckd_free(ngs->word_lat_idx);
268 ckd_free(ngs->last_ltrans);
270 ngs->word_lat_idx = ckd_calloc(search->
n_words,
sizeof(*ngs->word_lat_idx));
272 ngs->last_ltrans = ckd_calloc(search->
n_words,
sizeof(*ngs->last_ltrans));
274 = ckd_calloc_2d(2, search->
n_words,
281 if (ngs->
lmset == NULL)
285 ngram_search_calc_beams(ngs);
288 ngram_search_update_widmap(ngs);
314 double n_speech = (double)ngs->n_tot_frame
315 / cmd_ln_int32_r(ps_search_config(ngs),
"-frate");
317 E_INFO(
"TOTAL bestpath %.2f CPU %.3f xRT\n",
318 ngs->bestpath_perf.t_tot_cpu,
319 ngs->bestpath_perf.t_tot_cpu / n_speech);
320 E_INFO(
"TOTAL bestpath %.2f wall %.3f xRT\n",
321 ngs->bestpath_perf.t_tot_elapsed,
322 ngs->bestpath_perf.t_tot_elapsed / n_speech);
329 ngram_model_free(ngs->
lmset);
332 ckd_free(ngs->word_lat_idx);
334 ckd_free(ngs->bp_table);
335 ckd_free(ngs->bscore_stack);
336 if (ngs->bp_table_idx != NULL)
337 ckd_free(ngs->bp_table_idx - 1);
339 ckd_free(ngs->last_ltrans);
348 ngs->bp_table_idx = ckd_realloc(ngs->bp_table_idx - 1,
350 *
sizeof(*ngs->bp_table_idx));
358 ngs->bp_table_idx[frame_idx] = ngs->bpidx;
368 ent = ngs->bp_table + bp;
369 if (ent->
bp == NO_BP)
372 prev = ngs->bp_table + ent->
bp;
381 ent->
real_wid = dict_basewid(ps_search_dict(ngs),
387 ent->
real_wid = dict_basewid(ps_search_dict(ngs), ent->
wid);
395 #define NGRAM_HISTORY_LONG_WORD 2000 399 int32 w, int32 score, int32 path, int32 rc)
406 bp = ngs->word_lat_idx[w];
409 if (frame_idx - ngs->bp_table[path].
frame > NGRAM_HISTORY_LONG_WORD) {
410 E_WARN(
"Word '%s' survived for %d frames, potential overpruning\n", dict_wordstr(ps_search_dict(ngs), w),
411 frame_idx - ngs->bp_table[path].
frame);
419 if (ngs->bp_table[bp].
bp != path) {
420 int32 bplh[2], newlh[2];
424 E_DEBUG(2,(
"Updating path history %d => %d frame %d\n",
425 ngs->bp_table[bp].
bp, path, frame_idx));
426 bplh[0] = ngs->bp_table[bp].
bp == -1
428 bplh[1] = ngs->bp_table[bp].
bp == -1
429 ? -1 : ngs->bp_table[ngs->bp_table[bp].
bp].
real_wid;
430 newlh[0] = path == -1
432 newlh[1] = path == -1
433 ? -1 : ngs->bp_table[path].
real_wid;
436 if (bplh[0] != newlh[0] || bplh[1] != newlh[1]) {
440 E_DEBUG(1, (
"Updating language model state %s,%s => %s,%s frame %d\n",
441 dict_wordstr(ps_search_dict(ngs), bplh[0]),
442 dict_wordstr(ps_search_dict(ngs), bplh[1]),
443 dict_wordstr(ps_search_dict(ngs), newlh[0]),
444 dict_wordstr(ps_search_dict(ngs), newlh[1]),
446 set_real_wid(ngs, bp);
448 ngs->bp_table[bp].
bp = path;
450 ngs->bp_table[bp].
score = score;
455 if (ngs->bp_table[bp].
s_idx != -1)
456 ngs->bscore_stack[ngs->bp_table[bp].
s_idx + rc] = score;
463 if (ngs->bpidx == NO_BP) {
464 E_ERROR(
"No entries in backpointer table!");
469 if (ngs->bpidx >= ngs->bp_table_size) {
470 ngs->bp_table_size *= 2;
471 ngs->bp_table = ckd_realloc(ngs->bp_table,
473 *
sizeof(*ngs->bp_table));
474 E_INFO(
"Resized backpointer table to %d entries\n", ngs->bp_table_size);
476 if (ngs->bss_head >= ngs->bscore_stack_size
477 - bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef)) {
478 ngs->bscore_stack_size *= 2;
479 ngs->bscore_stack = ckd_realloc(ngs->bscore_stack,
480 ngs->bscore_stack_size
481 *
sizeof(*ngs->bscore_stack));
482 E_INFO(
"Resized score stack to %d entries\n", ngs->bscore_stack_size);
485 ngs->word_lat_idx[w] = ngs->bpidx;
486 be = &(ngs->bp_table[ngs->bpidx]);
488 be->
frame = frame_idx;
491 be->
s_idx = ngs->bss_head;
493 assert(path != ngs->bpidx);
497 be->
last_phone = dict_last_phone(ps_search_dict(ngs),w);
498 if (dict_is_single_phone(ps_search_dict(ngs), w)) {
504 be->
last2_phone = dict_second_last_phone(ps_search_dict(ngs),w);
509 for (i = 0; i < rcsize; ++i)
510 ngs->bscore_stack[ngs->bss_head + i] =
WORST_SCORE;
512 ngs->bscore_stack[ngs->bss_head + rc] = score;
513 set_real_wid(ngs, ngs->bpidx);
516 ngs->bss_head += rcsize;
532 if (frame_idx == -1 || frame_idx >= ngs->
n_frame)
534 end_bpidx = ngs->bp_table_idx[frame_idx];
540 while (frame_idx >= 0 && ngs->bp_table_idx[frame_idx] == end_bpidx)
547 assert(end_bpidx < ngs->bp_table_size);
548 for (bp = ngs->bp_table_idx[frame_idx]; bp < end_bpidx; ++bp) {
549 if (ngs->bp_table[bp].
wid == ps_search_finish_wid(ngs)
551 best_score = ngs->bp_table[bp].
score;
554 if (ngs->bp_table[bp].
wid == ps_search_finish_wid(ngs))
558 if (out_best_score) {
559 *out_best_score = best_score;
562 *out_is_final = (ngs->bp_table[bp].
wid == ps_search_finish_wid(ngs));
580 while (bp != NO_BP) {
581 bptbl_t *be = &ngs->bp_table[bp];
584 len += strlen(dict_basestr(ps_search_dict(ngs), be->
wid)) + 1;
592 base->
hyp_str = ckd_calloc(1, len);
596 while (bp != NO_BP) {
597 bptbl_t *be = &ngs->bp_table[bp];
602 len = strlen(dict_basestr(ps_search_dict(ngs), be->
wid));
604 memcpy(c, dict_basestr(ps_search_dict(ngs), be->
wid), len);
620 int32 i, tmatid, ciphone;
624 assert(!dict_is_single_phone(ps_search_dict(ngs), w));
625 ciphone = dict_last_phone(ps_search_dict(ngs),w);
628 dict_second_last_phone(ps_search_dict(ngs),w));
629 tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ciphone);
631 if ((hmm == NULL) || (hmm_nonmpx_ssid(&hmm->
hmm) != rssid->
ssid[0])) {
639 E_DEBUG(3,(
"allocated rc_id 0 ssid %d ciphone %d lc %d word %s\n",
641 dict_second_last_phone(ps_search_dict(ngs),w),
642 dict_wordstr(ps_search_dict(ngs),w)));
644 for (i = 1; i < rssid->
n_ssid; ++i) {
645 if ((hmm->
next == NULL) || (hmm_nonmpx_ssid(&hmm->
next->
hmm) != rssid->
ssid[i])) {
654 E_DEBUG(3,(
"allocated rc_id %d ssid %d ciphone %d lc %d word %s\n",
656 dict_second_last_phone(ps_search_dict(ngs),w),
657 dict_wordstr(ps_search_dict(ngs),w)));
669 for (hmm = ngs->
word_chan[w]; hmm; hmm = thmm) {
695 return ngs->bscore_stack[pbe->
s_idx + rssid->
cimap[rcphone]];
704 int32 *out_ascr, int32 *out_lscr)
710 if (be->
bp == NO_BP) {
711 *out_ascr = be->
score;
717 pbe = ngs->bp_table + be->
bp;
719 dict_first_phone(ps_search_dict(ngs),be->
wid));
726 if (be->
wid == ps_search_silence_wid(ngs)) {
727 *out_lscr = ngs->silpen;
730 *out_lscr = ngs->fillpen;
734 *out_lscr = ngram_tg_score(ngs->
lmset,
739 *out_lscr = *out_lscr * lwf;
741 *out_ascr = be->
score - start_score - *out_lscr;
750 ngram_model_flush(ngs->
lmset);
753 else if (ngs->fwdflat)
761 ngram_search_step(
ps_search_t *search,
int frame_idx)
767 else if (ngs->fwdflat)
777 E_INFO(
"Backpointer table (%d entries):\n", ngs->bpidx);
778 for (i = 0; i < ngs->bpidx; ++i) {
779 bptbl_t *bpe = ngs->bp_table + i;
782 E_INFO_NOFN(
"%-5d %-10s start %-3d end %-3d score %-8d bp %-3d real_wid %-5d prev_real_wid %-5d",
783 i, dict_wordstr(ps_search_dict(ngs), bpe->
wid),
785 ? 0 : ngs->bp_table[bpe->
bp].
frame + 1),
796 for (j = 0; j < rcsize; ++j)
798 E_INFOCONT(
" %d", bpe->
score - ngs->bscore_stack[bpe->
s_idx + j]);
809 ngs->n_tot_frame += ngs->
n_frame;
823 while (ps_search_acmod(ngs)->n_feat_frame > 0) {
835 else if (ngs->fwdflat) {
845 ngram_search_bestpath(
ps_search_t *search, int32 *out_score,
int backward)
851 ngs->bestpath_fwdtree_lw_ratio,
857 if (search->
post == 0)
867 ngram_search_hyp(
ps_search_t *search, int32 *out_score, int32 *out_is_final)
872 if (ngs->bestpath && ngs->done) {
878 ptmr_reset(&ngs->bestpath_perf);
879 ptmr_start(&ngs->bestpath_perf);
882 if ((link = ngram_search_bestpath(search, out_score, FALSE)) == NULL)
885 ptmr_stop(&ngs->bestpath_perf);
887 / cmd_ln_int32_r(ps_search_config(ngs),
"-frate");
888 E_INFO(
"bestpath %.2f CPU %.3f xRT\n",
889 ngs->bestpath_perf.t_cpu,
890 ngs->bestpath_perf.t_cpu / n_speech);
891 E_INFO(
"bestpath %.2f wall %.3f xRT\n",
892 ngs->bestpath_perf.t_elapsed,
893 ngs->bestpath_perf.t_elapsed / n_speech);
909 ngram_search_bp2itor(
ps_seg_t *seg,
int bp)
914 be = &ngs->bp_table[bp];
915 pbe = be->
bp == -1 ? NULL : &ngs->bp_table[be->
bp];
916 seg->
word = dict_wordstr(ps_search_dict(ngs), be->
wid);
918 seg->
sf = pbe ? pbe->frame + 1 : 0;
931 dict_first_phone(ps_search_dict(ngs), be->
wid));
933 if (be->
wid == ps_search_silence_wid(ngs)) {
934 seg->
lscr = ngs->silpen;
937 seg->
lscr = ngs->fillpen;
956 ckd_free(itor->
bpidx);
966 ngram_bp_seg_free(seg);
970 ngram_search_bp2itor(seg, itor->
bpidx[itor->
cur]);
989 itor = ckd_calloc(1,
sizeof(*itor));
990 itor->
base.
vt = &ngram_bp_segfuncs;
995 while (bp != NO_BP) {
996 bptbl_t *be = &ngs->bp_table[bp];
1007 while (bp != NO_BP) {
1008 bptbl_t *be = &ngs->bp_table[bp];
1009 itor->
bpidx[cur] = bp;
1015 ngram_search_bp2itor((
ps_seg_t *)itor, itor->bpidx[0]);
1021 ngram_search_seg_iter(
ps_search_t *search, int32 *out_score)
1026 if (ngs->bestpath && ngs->done) {
1032 ptmr_reset(&ngs->bestpath_perf);
1033 ptmr_start(&ngs->bestpath_perf);
1036 if ((link = ngram_search_bestpath(search, out_score, TRUE)) == NULL)
1039 ngs->bestpath_fwdtree_lw_ratio);
1040 ptmr_stop(&ngs->bestpath_perf);
1042 / cmd_ln_int32_r(ps_search_config(ngs),
"-frate");
1043 E_INFO(
"bestpath %.2f CPU %.3f xRT\n",
1044 ngs->bestpath_perf.t_cpu,
1045 ngs->bestpath_perf.t_cpu / n_speech);
1046 E_INFO(
"bestpath %.2f wall %.3f xRT\n",
1047 ngs->bestpath_perf.t_elapsed,
1048 ngs->bestpath_perf.t_elapsed / n_speech);
1056 return ngram_search_bp_iter(ngs, bpidx,
1058 (ngs->done && ngs->fwdflat)
1059 ? ngs->fwdflat_fwdtree_lw_ratio : 1.0);
1071 if (ngs->bestpath && ngs->done) {
1077 if ((link = ngram_search_bestpath(search, NULL, TRUE)) == NULL)
1079 return search->
post;
1093 for (i = 0, bp_ptr = ngs->bp_table; i < ngs->bpidx; ++i, ++bp_ptr) {
1101 sf = (bp_ptr->
bp < 0) ? 0 : ngs->bp_table[bp_ptr->
bp].
frame + 1;
1105 assert(ef < dag->n_frames);
1107 if ((wid == ps_search_finish_wid(ngs)) && (ef < dag->
n_frames - 1))
1112 && (!ngram_model_set_known_wid(ngs->
lmset,
1113 dict_basewid(ps_search_dict(ngs), wid))))
1117 for (node = dag->
nodes; node; node = node->
next) {
1118 if ((node->
wid == wid) && (node->
sf == sf))
1130 node->
fef = node->
lef = i;
1151 for (node = dag->
nodes; node; node = node->
next) {
1152 if ((node->
wid == ps_search_start_wid(ngs)) && (node->
sf == 0))
1157 E_ERROR(
"Couldn't find <s> in first frame\n");
1167 int32 ef, bestbp, bp, bestscore;
1170 for (node = dag->
nodes; node; node = node->
next) {
1171 int32 lef = ngs->bp_table[node->
lef].
frame;
1172 if ((node->
wid == ps_search_finish_wid(ngs))
1183 ef >= 0 && ngs->bp_table_idx[ef] == ngs->bpidx;
1186 E_ERROR(
"Empty backpointer table: can not build DAG.\n");
1193 for (bp = ngs->bp_table_idx[ef]; bp < ngs->bp_table_idx[ef + 1]; ++bp) {
1194 int32 n_used, l_scr, wid, prev_wid;
1198 if (wid == ps_search_finish_wid(ngs)) {
1202 l_scr = ngram_tg_score(ngs->
lmset, ps_search_finish_wid(ngs),
1204 l_scr = l_scr * lwf;
1206 bestscore = ngs->bp_table[bp].
score + l_scr;
1210 if (bestbp == NO_BP) {
1211 E_ERROR(
"No word exits found in last frame (%d), assuming no recognition\n", ef);
1214 E_INFO(
"</s> not found in last frame, using %s.%d instead\n",
1215 dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].
wid), ef);
1218 for (node = dag->
nodes; node; node = node->
next) {
1219 if (node->
lef == bestbp)
1224 E_ERROR(
"Failed to find DAG node corresponding to %s\n",
1225 dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].
wid));
1235 int32 i, score, ascr, lscr;
1239 int min_endfr, nlink;
1243 min_endfr = cmd_ln_int32_r(ps_search_config(search),
"-min_endfr");
1260 lwf = ngs->fwdflat ? ngs->fwdflat_fwdtree_lw_ratio : 1.0;
1261 create_dag_nodes(ngs, dag);
1262 if ((dag->
start = find_start_node(ngs, dag)) == NULL)
1264 if ((dag->
end = find_end_node(ngs, dag, ngs->bestpath_fwdtree_lw_ratio)) == NULL)
1266 E_INFO(
"lattice start node %s.%d end node %s.%d\n",
1270 ngram_compute_seg_score(ngs, ngs->bp_table + dag->
end->
lef, lwf,
1294 E_INFO(
"Eliminated %d nodes before end node\n", i);
1297 for (to = dag->
end; to; to = to->
next) {
1306 fef = ngs->bp_table[to->
fef].
frame;
1307 lef = ngs->bp_table[to->
lef].
frame;
1308 if (to != dag->
end && lef - fef < min_endfr) {
1314 for (from = to->
next; from; from = from->
next) {
1317 fef = ngs->bp_table[from->
fef].
frame;
1318 lef = ngs->bp_table[from->
lef].
frame;
1320 if ((to->
sf <= fef) || (to->
sf > lef + 1))
1322 if (lef - fef < min_endfr) {
1329 from_bpe = ngs->bp_table + i;
1330 for (; i <= from->
lef; i++, from_bpe++) {
1331 if (from_bpe->
wid != from->
wid)
1333 if (from_bpe->
frame >= to->
sf - 1)
1337 if ((i > from->
lef) || (from_bpe->
frame != to->
sf - 1))
1342 ngram_compute_seg_score(ngs, from_bpe, lwf,
1348 dict_first_phone(ps_search_dict(ngs), to->
wid));
1350 if (score == WORST_SCORE)
1354 score = ascr + (score - from_bpe->
score);
1365 else if (score BETTER_THAN WORST_SCORE) {
1375 E_ERROR(
"End node of lattice isolated; unreachable\n");
1379 for (node = dag->
nodes; node; node = node->
next) {
1388 for (node = dag->
nodes; node; node = node->
next) {
1391 for (alt = node->
next; alt && alt->
sf == node->
sf; alt = alt->
next) {
1399 E_INFO(
"Lattice has %d nodes, %d links\n", dag->
n_nodes, nlink);
1405 dag->
end->
basewid = ps_search_finish_wid(ngs);
hmm_t hmm
Basic HMM structure.
Internal implementation of PocketSphinx decoder.
void ngram_fwdtree_finish(ngram_search_t *ngs)
Finish fwdtree decoding for an utterance.
int32 n_frame_alloc
Number of frames allocated in bp_table_idx and friends.
void ngram_fwdtree_deinit(ngram_search_t *ngs)
Release memory associated with fwdtree decoding.
Base structure for search module.
void ngram_search_alloc_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
ps_seg_t * ps_lattice_seg_iter(ps_lattice_t *dag, ps_latlink_t *link, float32 lwf)
Get hypothesis segmentation iterator after bestpath search.
void hmm_init(hmm_context_t *ctx, hmm_t *hmm, int mpx, int ssid, int tmatid)
Populate a previously-allocated HMM structure, allocating internal data.
int acmod_rewind(acmod_t *acmod)
Rewind the current utterance, allowing it to be rescored.
listelem_alloc_t * chan_alloc
For chan_t.
void ngram_fwdtree_start(ngram_search_t *ngs)
Start fwdtree decoding for an utterance.
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
Re-initialize base structure with new dictionary.
frame_idx_t frame
start or end frame
ps_latnode_t * start
Starting node.
uint8 *** tp
The transition matrices; kept in the same scale as acoustic scores; tp[tmatid][from-state][to-state]...
hmm_context_t * hmmctx
HMM context.
ps_segfuncs_t * vt
V-table of seg methods.
logmath_t * lmath
Log-math computation.
uint16 ** sseq
Unique senone sequences (2D array built at load time)
void hmm_deinit(hmm_t *hmm)
Free an HMM structure, releasing internal data (but not the HMM structure itself).
int32 lscr
Language model score.
int32 n_words
Number of words known to search (may be less than in the dictionary)
int16 last2_phone
next-to-last phone of this word
#define BAD_S3WID
Dictionary word id.
int32 n_ssid
#Unique ssid in above, compressed ssid list
frame_idx_t n_frames
Number of frames for this utterance.
int ngram_fwdflat_reinit(ngram_search_t *ngs)
Rebuild search structures for updated language models.
Word graph search implementation.
bitvec_t * word_active
array of active flags for all words.
void ngram_fwdflat_finish(ngram_search_t *ngs)
Finish fwdflat decoding for an utterance.
ps_latnode_t * nodes
List of all nodes.
int ngram_fwdflat_search(ngram_search_t *ngs, int frame_idx)
Search one frame forward in an utterance.
int32 ngram_search_exit_score(ngram_search_t *ngs, bptbl_t *pbe, int rcphone)
Get the exit score for a backpointer entry with a given right context.
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
listelem_alloc_t * latnode_alloc
Node allocator for this DAG.
int16 n_nodes
Number of nodes in this lattice.
int32 prob
Log posterior probability.
latlink_list_t * entries
Links into this node.
POCKETSPHINX_EXPORT int32 ps_lattice_posterior(ps_lattice_t *dag, ngram_model_t *lmset, float32 ascale)
Calculate link posterior probabilities on a word graph.
struct ps_latnode_s * alt
Node with alternate pronunciation for this word.
char const * word
Word string (pointer into dictionary hash)
int32 ** active_word_list
Array of active multi-phone words for current and next frame.
struct chan_s * next
first descendant of this channel; or, in the case of the last phone of a word, the next alternative r...
void ngram_search_save_bp(ngram_search_t *ngs, int frame_idx, int32 w, int32 score, int32 path, int32 rc)
Enter a word in the backpointer table.
ps_search_t * search
Search object from whence this came.
int32 final_node_ascr
Acoustic score of implicit link exiting final node.
Lexicon tree based Viterbi search.
int ngram_search_mark_bptable(ngram_search_t *ngs, int frame_idx)
Record the current frame's index in the backpointer table.
int32 rc_id
right-context id for last phone of words
#define dict2pid_rssid(d, ci, lc)
Access macros; not designed for arbitrary use.
void ngram_fwdflat_start(ngram_search_t *ngs)
Start fwdflat decoding for an utterance.
N-Gram search module structure.
int ngram_fwdtree_search(ngram_search_t *ngs, int frame_idx)
Search one frame forward in an utterance.
hmm_context_t * hmm_context_init(int32 n_emit_state, uint8 **const *tp, int16 const *senscore, uint16 *const *sseq)
Create an HMM context.
void ps_lattice_delete_unreachable(ps_lattice_t *dag)
Remove nodes marked as unreachable.
ps_latnode_t * end
Ending node.
frame_idx_t sf
Start frame.
int32 real_wid
wid of this or latest predecessor real word
int32 prev_real_wid
wid of second-last real word
POCKETSPHINX_EXPORT ps_latlink_t * ps_lattice_bestpath(ps_lattice_t *dag, ngram_model_t *lmset, float32 lwf, float32 ascale)
Do N-Gram based best-path search on a word graph.
ps_lattice_t * ngram_search_lattice(ps_search_t *search)
Construct a word lattice from the current hypothesis.
latlink_list_t * exits
Links out of this node.
#define WORST_SCORE
Large "bad" score.
N-Gram based multi-pass search ("FBS")
tmat_t * tmat
Transition matrices.
int32 ascr
Acoustic score.
int acmod_advance(acmod_t *acmod)
Advance the frame index.
listelem_alloc_t * latnode_alloc
For latnode_t.
int dict_filler_word(dict_t *d, s3wid_t w)
Return 1 if w is a filler word, 0 if not.
void ngram_fwdtree_init(ngram_search_t *ngs)
Initialize N-Gram search for fwdtree decoding.
Segmentation "iterator" for backpointer table results.
ps_latnode_t ** frm_wordlist
List of active words in each frame.
int32 path_scr
Best path score from root of DAG.
Lexical tree node data type for the first phone (root) of each dynamic HMM tree structure.
Lexical tree node data type.
int32 wid
Dictionary word id.
ps_search_t * ngram_search_init(cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize the N-Gram search module.
int16 cur
Current position in bpidx.
#define SENSCR_SHIFT
Shift count for senone scores.
a structure for a dictionary.
char const * ps_lattice_hyp(ps_lattice_t *dag, ps_latlink_t *link)
Get hypothesis string after bestpath search.
POCKETSPHINX_EXPORT int dict_real_word(dict_t *d, s3wid_t w)
Test if w is a "real" word, i.e.
void ngram_search_free(ps_search_t *search)
Finalize the N-Gram search module.
Word graph structure used in bestpath/nbest search.
#define WORSE_THAN
Is one score worse than another?
int16 n_bpidx
Number of backpointer IDs.
int32 best_score
Best Viterbi path score.
Back pointer table (forward pass lattice; actually a tree)
cross word triphone model structure
int ngram_fwdtree_reinit(ngram_search_t *ngs)
Rebuild search structures for updated language models.
void ngram_search_free_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
int32 post
Utterance posterior probability.
char * hyp_str
Current hypothesis string.
#define BETTER_THAN
Is one score better than another?
dict_t * dict
Pronunciation dictionary.
void ngram_fwdflat_deinit(ngram_search_t *ngs)
Release memory associated with fwdflat decoding.
int32 s_idx
Start of BScoreStack for various right contexts.
int32 fef
First end frame.
int32 n_frame
Number of frames actually present.
Flat lexicon based Viterbi search.
ngram_model_t * lmset
Set of language models.
uint8 valid
For absolute pruning.
int32 lback
Language model backoff.
listelem_alloc_t * root_chan_alloc
For root_chan_t.
int32 basewid
Dictionary base word id.
int32 ciphone
ciphone for this node
void ngram_fwdflat_init(ngram_search_t *ngs)
Initialize N-Gram search for fwdflat decoding.
ps_lattice_t * ps_lattice_init_search(ps_search_t *search, int n_frame)
Construct an empty word graph with reference to a search structure.
void hmm_context_free(hmm_context_t *ctx)
Free an HMM context.
int32 * bpidx
Sequence of backpointer IDs.
chan_t ** word_chan
Channels associated with a given word (only used for right contexts, single-phone words in fwdtree se...
bin_mdef_t * mdef
Model definition.
int ngram_search_find_exit(ngram_search_t *ngs, int frame_idx, int32 *out_best_score, int32 *out_is_final)
Find the best word exit for the current frame in the backpointer table.
ps_latlink_t * last_link
Final link in best path.
struct ps_latnode_s * next
Next node in DAG (no ordering implied)
int32 score
Score (best among all right contexts)
void ps_lattice_bypass_fillers(ps_lattice_t *dag, int32 silpen, int32 fillpen)
Bypass filler words.
V-table for search algorithm.
ps_lattice_t * dag
Current hypothesis word graph.
Base structure for hypothesis segmentation iterator.
#define dict_size(d)
Packaged macro access to dictionary members.
s3cipid_t * cimap
Index into ssid[] above for each ci phone.
POCKETSPHINX_EXPORT int ps_lattice_free(ps_lattice_t *dag)
Free a lattice.
ps_seg_t base
Base structure.
float32 ascale
Acoustic score scale for posterior probabilities.
Acoustic model structure.
float32 lwf
Language weight factor (for second-pass searches)
Building composite triphone (as well as word internal triphones) with the dictionary.
void ps_search_deinit(ps_search_t *search)
De-initialize base structure.
POCKETSPHINX_EXPORT void ps_lattice_link(ps_lattice_t *dag, ps_latnode_t *from, ps_latnode_t *to, int32 score, int32 ef)
Create a directed link between "from" and "to" nodes, but if a link already exists, choose one with the best link_scr.
s3ssid_t * ssid
Senone Sequence ID list for all context ciphones.
frame_idx_t sf
Start frame.
int16 last_phone
last phone of this word
char const * ngram_search_bp_hyp(ngram_search_t *ngs, int bpidx)
Backtrace from a given backpointer index to obtain a word hypothesis.