47 #include <sphinxbase/ckd_alloc.h> 48 #include <sphinxbase/listelem_alloc.h> 49 #include <sphinxbase/err.h> 56 #define __CHAN_DUMP__ 0 58 #define chan_v_eval(chan) hmm_dump_vit_eval(&(chan)->hmm, stderr) 60 #define chan_v_eval(chan) hmm_vit_eval(&(chan)->hmm) 70 int32 w, ndiph, i, n_words, n_ci;
71 dict_t *dict = ps_search_dict(ngs);
74 n_words = ps_search_n_words(ngs);
80 n_ci = bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef);
82 dimap = bitvec_alloc(n_ci * n_ci);
83 for (w = 0; w < n_words; w++) {
86 if (dict_is_single_phone(dict, w))
90 ph0 = dict_first_phone(dict, w);
91 ph1 = dict_second_phone(dict, w);
93 if (bitvec_is_clear(dimap, ph0 * n_ci + ph1)) {
94 bitvec_set(dimap, ph0 * n_ci + ph1);
99 E_INFO(
"%d unique initial diphones\n", ndiph);
107 for (w = 0; w < n_words; ++w) {
110 if (!dict_is_single_phone(dict, w)) {
111 E_WARN(
"Filler word %d = %s has more than one phone, ignoring it.\n",
112 w, dict_wordstr(dict, w));
130 for (w = 0; w < n_words; w++) {
131 if (!dict_is_single_phone(dict, w))
137 bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, ngs->
rhmm_1ph[i].
ciphone),
138 bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ngs->
rhmm_1ph[i].
ciphone));
147 E_INFO(
"%d root, %d non-root channels, %d single-phone words\n",
179 int32 w, i, j, p, ph, tmatid;
181 dict_t *dict = ps_search_dict(ngs);
184 n_words = ps_search_n_words(ngs);
186 E_INFO(
"Creating search tree\n");
188 for (w = 0; w < n_words; w++)
191 E_INFO(
"before: %d root, %d non-root channels, %d single-phone words\n",
198 for (w = 0; w < n_words; w++) {
199 int ciphone, ci2phone;
202 if (!ngram_model_set_known_wid(ngs->
lmset, dict_basewid(dict, w)))
206 if (dict_is_single_phone(dict, w)) {
207 E_DEBUG(1,(
"single_phone_wid[%d] = %s\n",
215 ciphone = dict_first_phone(dict, w);
216 ci2phone = dict_second_phone(dict, w);
224 rhmm->
hmm.tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ciphone);
226 hmm_mpx_ssid(&rhmm->
hmm, 0) =
227 bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, ciphone);
235 E_DEBUG(3,(
"word %s rhmm %d\n", dict_wordstr(dict, w), rhmm - ngs->
root_chan));
237 if (dict_pronlen(dict, w) == 2) {
239 if ((j = rhmm->penult_phn_wid) < 0)
240 rhmm->penult_phn_wid = w;
249 tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef,
dict_pron(dict, w, 1));
253 init_nonroot_chan(ngs, hmm, ph,
dict_pron(dict, w, 1), tmatid);
259 for (; hmm && (hmm_nonmpx_ssid(&hmm->
hmm) != ph); hmm = hmm->
alt)
263 init_nonroot_chan(ngs, hmm, ph,
dict_pron(dict, w, 1), tmatid);
267 E_DEBUG(3,(
"phone %s = %d\n",
269 dict_second_phone(dict, w)), ph));
270 for (p = 2; p < dict_pronlen(dict, w) - 1; p++) {
272 tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef,
dict_pron(dict, w, p));
276 init_nonroot_chan(ngs, hmm, ph,
dict_pron(dict, w, p), tmatid);
282 for (hmm = hmm->next; hmm && (hmm_nonmpx_ssid(&hmm->hmm) != ph);
287 init_nonroot_chan(ngs, hmm, ph,
dict_pron(dict, w, p), tmatid);
291 E_DEBUG(3,(
"phone %s = %d\n",
297 if ((j = hmm->info.penult_phn_wid) < 0)
298 hmm->info.penult_phn_wid = w;
309 for (w = 0; w < n_words; ++w) {
311 if (!dict_is_single_phone(dict, w))
316 if (ngram_model_set_known_wid(ngs->
lmset, dict_basewid(dict, w)))
318 E_DEBUG(1,(
"single_phone_wid[%d] = %s\n",
336 E_ERROR(
"No word from the language model has pronunciation in the dictionary\n");
338 E_INFO(
"after: %d root, %d non-root channels, %d single-phone words\n",
348 for (child = hmm->
next; child; child = sibling) {
349 sibling = child->
alt;
350 reinit_search_subtree(ngs, child);
373 reinit_search_subtree(ngs, hmm);
387 ngs->bestbp_rc = ckd_calloc(bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef),
388 sizeof(*ngs->bestbp_rc));
389 ngs->lastphn_cand = ckd_calloc(ps_search_n_words(ngs),
390 sizeof(*ngs->lastphn_cand));
391 init_search_tree(ngs);
392 create_search_tree(ngs);
400 n_words = ps_search_n_words(ngs);
405 for (i = w = 0; w < n_words; ++w) {
406 if (!dict_is_single_phone(ps_search_dict(ngs), w))
427 double n_speech = (double)ngs->n_tot_frame
428 / cmd_ln_int32_r(ps_search_config(ngs),
"-frate");
430 E_INFO(
"TOTAL fwdtree %.2f CPU %.3f xRT\n",
431 ngs->fwdtree_perf.t_tot_cpu,
432 ngs->fwdtree_perf.t_tot_cpu / n_speech);
433 E_INFO(
"TOTAL fwdtree %.2f wall %.3f xRT\n",
434 ngs->fwdtree_perf.t_tot_elapsed,
435 ngs->fwdtree_perf.t_tot_elapsed / n_speech);
438 reinit_search_tree(ngs);
440 deinit_search_tree(ngs);
445 ckd_free(ngs->cand_sf);
447 ckd_free(ngs->bestbp_rc);
448 ngs->bestbp_rc = NULL;
449 ckd_free(ngs->lastphn_cand);
450 ngs->lastphn_cand = NULL;
457 reinit_search_tree(ngs);
459 deinit_search_tree(ngs);
461 ckd_free(ngs->lastphn_cand);
462 ngs->lastphn_cand = ckd_calloc(ps_search_n_words(ngs),
463 sizeof(*ngs->lastphn_cand));
465 ngs->
word_chan = ckd_calloc(ps_search_n_words(ngs),
468 init_search_tree(ngs);
469 create_search_tree(ngs);
480 n_words = ps_search_n_words(ngs);
483 memset(&ngs->
st, 0,
sizeof(ngs->
st));
484 ptmr_reset(&ngs->fwdtree_perf);
485 ptmr_start(&ngs->fwdtree_perf);
492 for (i = 0; i < n_words; ++i)
493 ngs->word_lat_idx[i] = NO_BP;
501 ngs->renormalized = 0;
504 for (i = 0; i < n_words; i++)
505 ngs->last_ltrans[i].sf = -1;
541 if (hmm_frame(&rhmm->
hmm) == frame_idx)
548 for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
555 for (w = *(awl++); i > 0; --i, w = *(awl++)) {
564 if (hmm_frame(&rhmm->
hmm) == frame_idx)
570 renormalize_scores(
ngram_search_t *ngs,
int frame_idx, int32 norm)
578 if (hmm_frame(&rhmm->
hmm) == frame_idx) {
586 for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
593 for (w = *(awl++); i > 0; --i, w = *(awl++)) {
601 if (hmm_frame(&rhmm->
hmm) == frame_idx) {
606 ngs->renormalized = TRUE;
617 if (hmm_frame(&rhmm->
hmm) == frame_idx) {
618 int32 score = chan_v_eval(rhmm);
621 ++ngs->
st.n_root_chan_eval;
636 ngs->
st.n_nonroot_chan_eval += i;
638 for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
639 int32 score = chan_v_eval(hmm);
640 assert(hmm_frame(&hmm->hmm) == frame_idx);
653 int32 i, w, bestscore, *awl, j, k;
660 for (w = *(awl++); i > 0; --i, w = *(awl++)) {
668 assert(hmm_frame(&hmm->
hmm) == frame_idx);
669 score = chan_v_eval(hmm);
686 if (hmm_frame(&rhmm->
hmm) < frame_idx)
689 score = chan_v_eval(rhmm);
691 if (score
BETTER_THAN bestscore && w != ps_search_finish_wid(ngs))
697 ngs->
st.n_last_chan_eval += k + j;
698 ngs->
st.n_nonroot_chan_eval += k + j;
699 ngs->
st.n_word_lastchan_eval +=
706 evaluate_channels(
ngram_search_t *ngs, int16
const *senone_scores,
int frame_idx)
711 ngs->
best_score = eval_root_chan(ngs, frame_idx);
714 if ((bs = eval_word_chan(ngs, frame_idx)) BETTER_THAN ngs->
best_score)
732 int32 thresh, newphone_thresh, lastphn_thresh, newphone_score;
739 newphone_thresh = ngs->
best_score + ngs->pbeam;
740 lastphn_thresh = ngs->
best_score + ngs->lpbeam;
745 E_DEBUG(3,(
"Root channel %d frame %d score %d thresh %d\n",
746 i, hmm_frame(&rhmm->
hmm), hmm_bestscore(&rhmm->
hmm), thresh));
748 if (hmm_frame(&rhmm->
hmm) < frame_idx)
752 hmm_frame(&rhmm->
hmm) = nf;
753 E_DEBUG(3,(
"Preserving root channel %d score %d\n", i, hmm_bestscore(&rhmm->
hmm)));
756 newphone_score = hmm_out_score(&rhmm->
hmm) + ngs->pip;
757 if (pls != NULL || newphone_score
BETTER_THAN newphone_thresh) {
758 for (hmm = rhmm->
next; hmm; hmm = hmm->
alt) {
759 int32 pl_newphone_score = newphone_score
761 if (pl_newphone_score
BETTER_THAN newphone_thresh) {
762 if ((hmm_frame(&hmm->
hmm) < frame_idx)
765 hmm_out_history(&rhmm->
hmm), nf);
777 if (pls != NULL || newphone_score
BETTER_THAN lastphn_thresh) {
778 for (w = rhmm->penult_phn_wid; w >= 0;
780 int32 pl_newphone_score = newphone_score
782 (pls, dict_last_phone(ps_search_dict(ngs),w));
783 E_DEBUG(3,(
"word %s newphone_score %d\n", dict_wordstr(ps_search_dict(ngs), w), newphone_score));
784 if (pl_newphone_score
BETTER_THAN lastphn_thresh) {
785 candp = ngs->lastphn_cand + ngs->n_lastphn_cand;
786 ngs->n_lastphn_cand++;
789 pl_newphone_score - ngs->nwpen;
790 candp->bp = hmm_out_history(&rhmm->
hmm);
808 int32 thresh, newphone_thresh, lastphn_thresh, newphone_score;
816 newphone_thresh = ngs->
best_score + ngs->pbeam;
817 lastphn_thresh = ngs->
best_score + ngs->lpbeam;
823 for (i = ngs->
n_active_chan[frame_idx & 0x1], hmm = *(acl++); i > 0;
824 --i, hmm = *(acl++)) {
825 assert(hmm_frame(&hmm->hmm) >= frame_idx);
829 if (hmm_frame(&hmm->hmm) != nf) {
830 hmm_frame(&hmm->hmm) = nf;
835 newphone_score = hmm_out_score(&hmm->hmm) + ngs->pip;
836 if (pls != NULL || newphone_score
BETTER_THAN newphone_thresh) {
837 for (nexthmm = hmm->
next; nexthmm; nexthmm = nexthmm->
alt) {
838 int32 pl_newphone_score = newphone_score
840 if ((pl_newphone_score
BETTER_THAN newphone_thresh)
841 && ((hmm_frame(&nexthmm->
hmm) < frame_idx)
842 || (pl_newphone_score
844 if (hmm_frame(&nexthmm->
hmm) != nf) {
849 hmm_out_history(&hmm->hmm), nf);
859 if (pls != NULL || newphone_score
BETTER_THAN lastphn_thresh) {
860 for (w = hmm->info.penult_phn_wid; w >= 0;
862 int32 pl_newphone_score = newphone_score
864 (pls, dict_last_phone(ps_search_dict(ngs),w));
865 if (pl_newphone_score
BETTER_THAN lastphn_thresh) {
866 candp = ngs->lastphn_cand + ngs->n_lastphn_cand;
867 ngs->n_lastphn_cand++;
870 pl_newphone_score - ngs->nwpen;
871 candp->bp = hmm_out_history(&hmm->hmm);
876 else if (hmm_frame(&hmm->hmm) != nf) {
891 int32 i, j, k, nf, bp, bpend, w;
895 int32 bestscore, dscr;
902 ngs->
st.n_lastphn_cand_utt += ngs->n_lastphn_cand;
906 for (i = 0, candp = ngs->lastphn_cand; i < ngs->n_lastphn_cand; i++, candp++) {
913 bpe = &(ngs->bp_table[candp->bp]);
917 (ngs, bpe, dict_first_phone(ps_search_dict(ngs), candp->wid));
919 candp->score -= start_score;
927 if (ngs->last_ltrans[candp->wid].sf != bpe->
frame + 1) {
930 for (j = 0; j < n_cand_sf; j++) {
931 if (ngs->cand_sf[j].bp_ef == bpe->
frame)
936 candp->next = ngs->cand_sf[j].cand;
939 if (n_cand_sf >= ngs->cand_sf_alloc) {
940 if (ngs->cand_sf_alloc == 0) {
942 ckd_calloc(CAND_SF_ALLOCSIZE,
943 sizeof(*ngs->cand_sf));
944 ngs->cand_sf_alloc = CAND_SF_ALLOCSIZE;
947 ngs->cand_sf_alloc += CAND_SF_ALLOCSIZE;
948 ngs->cand_sf = ckd_realloc(ngs->cand_sf,
950 *
sizeof(*ngs->cand_sf));
951 E_INFO(
"cand_sf[] increased to %d entries\n",
959 ngs->cand_sf[j].bp_ef = bpe->
frame;
962 ngs->cand_sf[j].cand = i;
965 ngs->last_ltrans[candp->wid].sf = bpe->
frame + 1;
970 for (i = 0; i < n_cand_sf; i++) {
972 bp = ngs->bp_table_idx[ngs->cand_sf[i].bp_ef];
973 bpend = ngs->bp_table_idx[ngs->cand_sf[i].bp_ef + 1];
974 for (bpe = &(ngs->bp_table[bp]); bp < bpend; bp++, bpe++) {
978 for (j = ngs->cand_sf[i].cand; j >= 0; j = candp->next) {
980 candp = &(ngs->lastphn_cand[j]);
983 (ngs, bpe, dict_first_phone(ps_search_dict(ngs), candp->wid));
986 dscr += ngram_tg_score(ngs->
lmset,
987 dict_basewid(ps_search_dict(ngs), candp->wid),
993 if (dscr
BETTER_THAN ngs->last_ltrans[candp->wid].dscr) {
994 ngs->last_ltrans[candp->wid].dscr = dscr;
995 ngs->last_ltrans[candp->wid].bp = bp;
1003 for (i = 0, candp = ngs->lastphn_cand; i < ngs->n_lastphn_cand; i++, candp++) {
1004 candp->score += ngs->last_ltrans[candp->wid].dscr;
1005 candp->bp = ngs->last_ltrans[candp->wid].bp;
1008 bestscore = candp->score;
1013 thresh = bestscore + ngs->lponlybeam;
1014 for (i = ngs->n_lastphn_cand, candp = ngs->lastphn_cand; i > 0; --i, candp++) {
1022 if ((hmm_frame(&hmm->
hmm) < frame_idx)
1024 assert(hmm_frame(&hmm->
hmm) != nf);
1026 candp->score, candp->bp, nf);
1032 assert(!dict_is_single_phone(ps_search_dict(ngs), w));
1052 int32 newword_thresh, lastphn_thresh;
1063 for (i = ngs->
n_active_word[frame_idx & 0x1], w = *(awl++); i > 0;
1064 --i, w = *(awl++)) {
1067 for (hmm = ngs->
word_chan[w]; hmm; hmm = thmm) {
1068 assert(hmm_frame(&hmm->
hmm) >= frame_idx);
1073 hmm_frame(&hmm->
hmm) = nf;
1075 phmmp = &(hmm->
next);
1081 hmm_out_score(&hmm->
hmm),
1082 hmm_out_history(&hmm->
hmm),
1086 else if (hmm_frame(&hmm->
hmm) == nf) {
1087 phmmp = &(hmm->
next);
1095 if ((k > 0) && (bitvec_is_clear(ngs->
word_active, w))) {
1096 assert(!dict_is_single_phone(ps_search_dict(ngs), w));
1110 E_DEBUG(3,(
"Single phone word %s frame %d score %d thresh %d outscore %d nwthresh %d\n",
1111 dict_wordstr(ps_search_dict(ngs),w),
1112 hmm_frame(&rhmm->
hmm), hmm_bestscore(&rhmm->
hmm),
1113 lastphn_thresh, hmm_out_score(&rhmm->
hmm), newword_thresh));
1114 if (hmm_frame(&rhmm->
hmm) < frame_idx)
1117 hmm_frame(&rhmm->
hmm) = nf;
1121 E_DEBUG(4,(
"Exiting single phone word %s with %d > %d, %d\n",
1122 dict_wordstr(ps_search_dict(ngs),w),
1123 hmm_out_score(&rhmm->
hmm),
1124 lastphn_thresh, newword_thresh));
1126 hmm_out_score(&rhmm->
hmm),
1127 hmm_out_history(&rhmm->
hmm), 0);
1137 ngs->n_lastphn_cand = 0;
1139 ngs->dynamic_beam = ngs->beam;
1140 if (ngs->maxhmmpf != -1
1141 && ngs->
st.n_root_chan_eval + ngs->
st.n_nonroot_chan_eval > ngs->maxhmmpf) {
1143 int32 bins[256], bw, nhmms, i;
1148 bw = -ngs->beam / 256;
1149 memset(bins, 0,
sizeof(bins));
1162 for (i = ngs->
n_active_chan[frame_idx & 0x1], hmm = *(acl++);
1163 i > 0; --i, hmm = *(acl++)) {
1167 b = (ngs->
best_score - hmm_bestscore(&hmm->hmm)) / bw;
1173 for (i = nhmms = 0; i < 256; ++i) {
1175 if (nhmms > ngs->maxhmmpf)
1178 ngs->dynamic_beam = -(i * bw);
1181 prune_root_chan(ngs, frame_idx);
1182 prune_nonroot_chan(ngs, frame_idx);
1183 last_phone_transition(ngs, frame_idx);
1184 prune_word_chan(ngs, frame_idx);
1195 int32 bestscr, worstscr;
1196 bptbl_t *bpe, *bestbpe, *worstbpe;
1199 if (ngs->maxwpf == -1 || ngs->maxwpf == ps_search_n_words(ngs))
1203 bestscr = (int32) 0x80000000;
1206 for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
1207 bpe = &(ngs->bp_table[bp]);
1210 bestscr = bpe->
score;
1218 if (bestbpe != NULL) {
1219 bestbpe->
valid = TRUE;
1225 - ngs->bp_table_idx[frame_idx]) - n;
1226 for (; n > ngs->maxwpf; --n) {
1228 worstscr = (int32) 0x7fffffff;
1230 for (bp = ngs->bp_table_idx[frame_idx]; (bp < ngs->bpidx); bp++) {
1231 bpe = &(ngs->bp_table[bp]);
1233 worstscr = bpe->
score;
1238 if (worstbpe == NULL)
1239 E_FATAL(
"PANIC: No worst BPtable entry remaining\n");
1240 worstbpe->
valid = FALSE;
1247 int32 i, k, bp, w, nf;
1249 int32 thresh, newscore;
1254 dict_t *dict = ps_search_dict(ngs);
1262 for (i = bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef) - 1; i >= 0; --i)
1268 for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
1269 bpe = &(ngs->bp_table[bp]);
1270 ngs->word_lat_idx[bpe->
wid] = NO_BP;
1272 if (bpe->
wid == ps_search_finish_wid(ngs))
1282 for (rc = 0; rc < bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef); ++rc) {
1284 E_DEBUG(4,(
"bestbp_rc[0] = %d lc %d\n",
1286 ngs->bestbp_rc[rc].score = bpe->
score;
1287 ngs->bestbp_rc[rc].path = bp;
1294 int32 *rcss = &(ngs->bscore_stack[bpe->
s_idx]);
1295 for (rc = 0; rc < bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef); ++rc) {
1297 E_DEBUG(4,(
"bestbp_rc[%d] = %d lc %d\n",
1299 ngs->bestbp_rc[rc].score = rcss[rssid->
cimap[rc]];
1300 ngs->bestbp_rc[rc].path = bp;
1310 thresh = ngs->
best_score + ngs->dynamic_beam;
1316 bestbp_rc_ptr = &(ngs->bestbp_rc[rhmm->
ciphone]);
1318 newscore = bestbp_rc_ptr->score + ngs->nwpen + ngs->pip
1321 if ((hmm_frame(&rhmm->
hmm) < frame_idx)
1324 bestbp_rc_ptr->path, nf);
1327 hmm_mpx_ssid(&rhmm->
hmm, 0) =
1328 dict2pid_ldiph_lc(d2p, rhmm->
ciphone, rhmm->
ci2phone, bestbp_rc_ptr->lc);
1340 ngs->last_ltrans[w].dscr = (int32) 0x80000000;
1342 for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
1343 bpe = &(ngs->bp_table[bp]);
1351 (ngs, bpe, dict_first_phone(dict, w));
1352 E_DEBUG(4, (
"initial newscore for %s: %d\n",
1353 dict_wordstr(dict, w), newscore));
1355 newscore += ngram_tg_score(ngs->
lmset,
1356 dict_basewid(dict, w),
1363 if (newscore
BETTER_THAN ngs->last_ltrans[w].dscr) {
1364 ngs->last_ltrans[w].dscr = newscore;
1365 ngs->last_ltrans[w].bp = bp;
1375 if (w == dict_startwid(ps_search_dict(ngs)))
1378 newscore = ngs->last_ltrans[w].dscr + ngs->pip
1381 bpe = ngs->bp_table + ngs->last_ltrans[w].bp;
1382 if ((hmm_frame(&rhmm->
hmm) < frame_idx)
1385 newscore, ngs->last_ltrans[w].bp, nf);
1388 hmm_mpx_ssid(&rhmm->
hmm, 0) =
1390 dict_last_phone(dict, bpe->
wid));
1397 w = ps_search_silence_wid(ngs);
1399 bestbp_rc_ptr = &(ngs->bestbp_rc[ps_search_acmod(ngs)->mdef->sil]);
1400 newscore = bestbp_rc_ptr->score + ngs->silpen + ngs->pip
1403 if ((hmm_frame(&rhmm->
hmm) < frame_idx)
1406 newscore, bestbp_rc_ptr->path, nf);
1409 for (w = dict_filler_start(dict); w <= dict_filler_end(dict); w++) {
1410 if (w == ps_search_silence_wid(ngs))
1414 if (w == dict_startwid(ps_search_dict(ngs)))
1420 newscore = bestbp_rc_ptr->score + ngs->fillpen + ngs->pip
1423 if ((hmm_frame(&rhmm->
hmm) < frame_idx)
1426 newscore, bestbp_rc_ptr->path, nf);
1440 if (hmm_frame(&rhmm->
hmm) == frame_idx) {
1448 if (hmm_frame(&rhmm->
hmm) == frame_idx) {
1457 int16
const *senscr;
1460 if (!ps_search_acmod(ngs)->compallsen)
1461 compute_sen_active(ngs, frame_idx);
1464 if ((senscr =
acmod_score(ps_search_acmod(ngs), &frame_idx)) == NULL)
1466 ngs->
st.n_senone_active_utt += ps_search_acmod(ngs)->n_senone_active;
1477 E_INFO(
"Renormalizing Scores at frame %d, best score %d\n",
1479 renormalize_scores(ngs, frame_idx, ngs->
best_score);
1483 evaluate_channels(ngs, senscr, frame_idx);
1485 prune_channels(ngs, frame_idx);
1487 bptable_maxwpf(ngs, frame_idx);
1489 word_transition(ngs, frame_idx);
1491 deactivate_channels(ngs, frame_idx);
1501 int32 i, w, cf, *awl;
1506 cf = ps_search_acmod(ngs)->output_frame;
1519 for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
1526 for (w = *(awl++); i > 0; --i, w = *(awl++)) {
1528 if (dict_is_single_phone(ps_search_dict(ngs), w))
1544 ptmr_stop(&ngs->fwdtree_perf);
1547 double n_speech = (double)(cf + 1)
1548 / cmd_ln_int32_r(ps_search_config(ngs),
"-frate");
1549 E_INFO(
"%8d words recognized (%d/fr)\n",
1550 ngs->bpidx, (ngs->bpidx + (cf >> 1)) / (cf + 1));
1551 E_INFO(
"%8d senones evaluated (%d/fr)\n", ngs->
st.n_senone_active_utt,
1552 (ngs->
st.n_senone_active_utt + (cf >> 1)) / (cf + 1));
1553 E_INFO(
"%8d channels searched (%d/fr), %d 1st, %d last\n",
1554 ngs->
st.n_root_chan_eval + ngs->
st.n_nonroot_chan_eval,
1555 (ngs->
st.n_root_chan_eval + ngs->
st.n_nonroot_chan_eval) / (cf + 1),
1556 ngs->
st.n_root_chan_eval, ngs->
st.n_last_chan_eval);
1557 E_INFO(
"%8d words for which last channels evaluated (%d/fr)\n",
1558 ngs->
st.n_word_lastchan_eval,
1559 ngs->
st.n_word_lastchan_eval / (cf + 1));
1560 E_INFO(
"%8d candidate words for entering last phone (%d/fr)\n",
1561 ngs->
st.n_lastphn_cand_utt, ngs->
st.n_lastphn_cand_utt / (cf + 1));
1562 E_INFO(
"fwdtree %.2f CPU %.3f xRT\n",
1563 ngs->fwdtree_perf.t_cpu,
1564 ngs->fwdtree_perf.t_cpu / n_speech);
1565 E_INFO(
"fwdtree %.2f wall %.3f xRT\n",
1566 ngs->fwdtree_perf.t_elapsed,
1567 ngs->fwdtree_perf.t_elapsed / n_speech);
hmm_t hmm
Basic HMM structure.
void ngram_fwdtree_finish(ngram_search_t *ngs)
Finish fwdtree decoding for an utterance.
void ngram_fwdtree_deinit(ngram_search_t *ngs)
Release memory associated with fwdtree decoding.
Base structure for search module.
int32 n_nonroot_chan
Number of valid non-root channels.
void ngram_search_alloc_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
void hmm_init(hmm_context_t *ctx, hmm_t *hmm, int mpx, int ssid, int tmatid)
Populate a previously-allocated HMM structure, allocating internal data.
chan_t * next
first descendant of this channel
listelem_alloc_t * chan_alloc
For chan_t.
void ngram_fwdtree_start(ngram_search_t *ngs)
Start fwdtree decoding for an utterance.
const char * bin_mdef_ciphone_str(bin_mdef_t *m, int32 ci)
In: ciphone id for which name wanted.
frame_idx_t frame
start or end frame
hmm_context_t * hmmctx
HMM context.
int32 n_active_chan[2]
Number entries in active_chan_list.
void hmm_deinit(hmm_t *hmm)
Free an HMM structure, releasing internal data (but not the HMM structure itself).
int16 last2_phone
next-to-last phone of this word
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
#define BAD_SSID
Invalid senone sequence ID (limited to 16 bits for PocketSphinx).
bitvec_t * word_active
array of active flags for all words.
int32 ngram_search_exit_score(ngram_search_t *ngs, bptbl_t *pbe, int rcphone)
Get the exit score for a backpointer entry with a given right context.
int16 ciphone
first ciphone of this node; all words rooted at this node begin with this ciphone ...
int32 ** active_word_list
Array of active multi-phone words for current and next frame.
struct chan_s * next
first descendant of this channel; or, in the case of the last phone of a word, the next alternative r...
void ngram_search_save_bp(ngram_search_t *ngs, int frame_idx, int32 w, int32 score, int32 path, int32 rc)
Enter a word in the backpointer table.
Lexicon tree based Viterbi search.
int32 * single_phone_wid
list of single-phone word ids
int ngram_search_mark_bptable(ngram_search_t *ngs, int frame_idx)
Record the current frame's index in the backpointer table.
int32 n_root_chan_alloc
Number of root_chan allocated.
int16 ci2phone
second ciphone of this node; one root HMM for each unique right context
int32 penult_phn_wid
list of words whose last phone follows this one; this field indicates the first of the list; the rest...
int32 n_active_word[2]
Number entries in active_word_list.
int32 rc_id
right-context id for last phone of words
#define dict2pid_rssid(d, ci, lc)
Access macros; not designed for arbitrary use.
N-Gram search module structure.
int ngram_fwdtree_search(ngram_search_t *ngs, int frame_idx)
Search one frame forward in an utterance.
void hmm_normalize(hmm_t *h, int32 bestscr)
Renormalize the scores in this HMM based on the given best score.
int32 max_nonroot_chan
Maximum possible number of non-root channels.
int32 last_phone_best_score
Best Viterbi path score for last phone.
int32 real_wid
wid of this or latest predecessor real word
root_chan_t * rhmm_1ph
Root HMMs for single-phone words.
int32 prev_real_wid
wid of second-last real word
#define WORST_SCORE
Large "bad" score.
int dict_filler_word(dict_t *d, s3wid_t w)
Return 1 if w is a filler word, 0 if not.
void ngram_fwdtree_init(ngram_search_t *ngs)
Initialize N-Gram search for fwdtree decoding.
Lexical tree node data type for the first phone (root) of each dynamic HMM tree structure.
void hmm_enter(hmm_t *h, int32 score, int32 histid, int frame)
Enter an HMM with the given path score and history ID.
Lexical tree node data type.
hmm_t hmm
Basic HMM structure.
void acmod_clear_active(acmod_t *acmod)
Clear set of active senones.
#define hmm_context_set_senscore(ctx, senscr)
Change the senone score array for a context.
#define SENSCR_SHIFT
Shift count for senone scores.
chan_t *** active_chan_list
Array of active channels for current and next frame.
a structure for a dictionary.
POCKETSPHINX_EXPORT int dict_real_word(dict_t *d, s3wid_t w)
Test if w is a "real" word, i.e.
struct chan_s * alt
sibling; i.e., next descendant of parent HMM
#define WORSE_THAN
Is one score worse than another?
s3ssid_t dict2pid_internal(dict2pid_t *d2p, int32 wid, int pos)
Return the senone sequence ID for the given word position.
void hmm_clear(hmm_t *h)
Reset the states of the HMM to the invalid condition.
int32 best_score
Best Viterbi path score.
Back pointer table (forward pass lattice; actually a tree)
int32 n_1ph_LMwords
Number single phone dict words also in LM; these come first in single_phone_wid.
cross word triphone model structure
int ngram_fwdtree_reinit(ngram_search_t *ngs)
Rebuild search structures for updated language models.
Fast and rough context-independent phoneme loop search.
void ngram_search_free_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
root_chan_t * root_chan
Search structure of HMM instances.
char * hyp_str
Current hypothesis string.
#define BETTER_THAN
Is one score better than another?
int32 s_idx
Start of BScoreStack for various right contexts.
int32 n_frame
Number of frames actually present.
ngram_model_t * lmset
Set of language models.
uint8 valid
For absolute pruning.
int32 n_1ph_words
Number single phone words in dict (total)
int32 ciphone
ciphone for this node
ngram_search_stats_t st
Various statistics for profiling.
chan_t ** word_chan
Channels associated with a given word (only used for right contexts, single-phone words in fwdtree se...
int32 score
Score (best among all right contexts)
int32 n_root_chan
Number of valid root_chan.
s3cipid_t * cimap
Index into ssid[] above for each ci phone.
int32 * homophone_set
Each node in the HMM tree structure may point to a set of words whose last phone would follow that no...
#define dict_pron(d, w, p)
The CI phones of the word w at position p.
Building composite triphone (as well as word internal triphones) with the dictionary.
#define phone_loop_search_score(pls, ci)
Return lookahead heuristic score for a specific phone.
Phone loop search structure.
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
int16 last_phone
last phone of this word