71 for (i = 0; i < sas->
n_phones; ++i) {
75 if (hmm_frame(hmm) < frame_idx)
88 int nf = frame_idx + 1;
92 for (i = 0; i < sas->
n_phones; ++i) {
94 if (hmm_frame(hmm) < frame_idx)
103 int nf = frame_idx + 1;
106 for (i = 0; i < sas->
n_phones - 1; ++i) {
108 int32 newphone_score;
111 if (hmm_frame(hmm) != nf)
114 newphone_score = hmm_out_score(hmm);
117 if (hmm_frame(nhmm) < frame_idx
118 || newphone_score
BETTER_THAN hmm_in_score(nhmm)) {
119 hmm_enter(nhmm, newphone_score, hmm_out_history(hmm), nf);
124 #define TOKEN_STEP 20 145 extend_tokenstack(sas, frame_idx);
149 for (i = 0; i < sas->
n_phones; ++i) {
153 if (hmm_frame(hmm) < frame_idx)
155 for (j = 0; j < sas->
hmmctx->n_emit_state; ++j) {
156 int state_idx = i * sas->
hmmctx->n_emit_state + j;
158 tokens[state_idx] = hmm_history(hmm, j);
160 hmm_history(hmm, j) = state_idx;
166 state_align_search_step(
ps_search_t *search,
int frame_idx)
169 acmod_t *acmod = ps_search_acmod(search);
181 E_INFO(
"Renormalizing Scores at frame %d, best score %d\n",
183 renormalize_hmms(sas, frame_idx, sas->
best_score);
187 sas->
best_score = evaluate_hmms(sas, senscr, frame_idx);
188 prune_hmms(sas, frame_idx);
191 phone_transition(sas, frame_idx);
194 record_transitions(sas, frame_idx);
197 sas->
frame = frame_idx;
209 int next_state, next_start, state, frame;
212 next_state = state = hmm_out_history(final_phone);
213 if (state == 0xffff) {
214 E_ERROR(
"Failed to reach final state in alignment\n");
218 next_start = sas->
frame + 1;
219 for (frame = sas->
frame - 1; frame >= 0; --frame) {
222 if (state != next_state) {
224 assert(itor != NULL);
226 ent->start = frame + 1;
227 ent->duration = next_start - ent->start;
228 E_DEBUG(1,(
"state %d start %d end %d\n", next_state,
229 ent->start, next_start));
231 next_start = frame + 1;
236 assert(itor != NULL);
239 ent->duration = next_start;
240 E_DEBUG(1,(
"state %d start %d end %d\n", 0,
241 ent->start, next_start));
268 state_align_search_start,
269 state_align_search_step,
270 state_align_search_finish,
271 state_align_search_reinit,
272 state_align_search_free,
280 state_align_search_init(cmd_ln_t *config,
288 sas = ckd_calloc(1,
sizeof(*sas));
290 config, acmod, al->d2p->
dict, al->d2p);
293 if (sas->
hmmctx == NULL) {
307 ent->id.pid.ssid, ent->id.pid.tmatid);
309 return ps_search_base(sas);
int ps_alignment_n_states(ps_alignment_t *al)
Number of states.
int n_phones
Number of HMMs (phones).
Base structure for search module.
void hmm_init(hmm_context_t *ctx, hmm_t *hmm, int mpx, int ssid, int tmatid)
Populate a previously-allocated HMM structure, allocating internal data.
An individual HMM among the HMM search space.
uint8 *** tp
The transition matrices; kept in the same scale as acoustic scores; tp[tmatid][from-state][to-state]...
uint16 ** sseq
Unique senone sequences (2D array built at load time)
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
ps_alignment_iter_t * ps_alignment_iter_goto(ps_alignment_iter_t *itor, int pos)
Move alignment iterator to given index.
ps_alignment_iter_t * ps_alignment_iter_next(ps_alignment_iter_t *itor)
Move an alignment iterator forward.
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
int n_fr_alloc
Number of frames of tokens allocated.
hmm_t * hmms
Vector of HMMs corresponding to phone level.
int32 hmm_vit_eval(hmm_t *hmm)
Viterbi evaluation of given HMM.
int ps_alignment_propagate(ps_alignment_t *al)
Propagate timing information up from state sequence.
void hmm_normalize(hmm_t *h, int32 bestscr)
Renormalize the scores in this HMM based on the given best score.
hmm_context_t * hmm_context_init(int32 n_emit_state, uint8 **const *tp, int16 const *senscore, uint16 *const *sseq)
Create an HMM context.
int frame
Current frame being processed.
#define WORST_SCORE
Large "bad" score.
tmat_t * tmat
Transition matrices.
State (and phone and word) alignment search.
void hmm_enter(hmm_t *h, int32 score, int32 histid, int frame)
Enter an HMM with the given path score and history ID.
dict_t * dict
Dictionary this table refers to.
#define hmm_context_set_senscore(ctx, senscr)
Change the senone score array for a context.
a structure for a dictionary.
#define WORSE_THAN
Is one score worse than another?
uint16 * tokens
Tokens (backpointers) for state alignment.
ps_alignment_t * al
Alignment structure being operated on.
#define BETTER_THAN
Is one score better than another?
int32 best_score
Best score in current frame.
Phone loop search structure.
ps_alignment_iter_t * ps_alignment_states(ps_alignment_t *al)
Iterate over the alignment starting at the first state.
void hmm_context_free(hmm_context_t *ctx)
Free an HMM context.
bin_mdef_t * mdef
Model definition.
V-table for search algorithm.
int ps_alignment_n_phones(ps_alignment_t *al)
Number of phones.
int ps_alignment_iter_free(ps_alignment_iter_t *itor)
Release an iterator before completing all iterations.
ps_alignment_iter_t * ps_alignment_phones(ps_alignment_t *al)
Iterate over the alignment starting at the first phone.
Acoustic model structure.
Building composite triphone (as well as word internal triphones) with the dictionary.
void ps_search_deinit(ps_search_t *search)
De-initialize base structure.
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
ps_alignment_entry_t * ps_alignment_iter_get(ps_alignment_iter_t *itor)
Get the alignment entry pointed to by an iterator.
hmm_context_t * hmmctx
HMM context structure.
int n_emit_state
Number of emitting states (tokens per frame)