PocketSphinx  0.6
acmod.h
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2008 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
43 #ifndef __ACMOD_H__
44 #define __ACMOD_H__
45 
46 /* System headers. */
47 #include <stdio.h>
48 
49 /* SphinxBase headers. */
50 #include <sphinxbase/cmd_ln.h>
51 #include <sphinxbase/logmath.h>
52 #include <sphinxbase/fe.h>
53 #include <sphinxbase/feat.h>
54 #include <sphinxbase/bitvec.h>
55 #include <sphinxbase/err.h>
56 #include <sphinxbase/prim_type.h>
57 
58 /* Local headers. */
59 #include "ps_mllr.h"
60 #include "bin_mdef.h"
61 #include "tmat.h"
62 #include "hmm.h"
63 
67 typedef enum acmod_state_e {
73 
77 #define SENSCR_DUMMY 0x7fff
78 
82 struct ps_mllr_s {
83  int refcnt;
84  int n_class;
85  int n_feat;
86  int *veclen;
87  float32 ****A;
88  float32 ***b;
89  float32 ***h;
90  int32 *cb2mllr;
91 };
92 
96 typedef struct ps_mgau_s ps_mgau_t;
97 
98 typedef struct ps_mgaufuncs_s {
99  char const *name;
100 
101  int (*frame_eval)(ps_mgau_t *mgau,
102  int16 *senscr,
103  uint8 *senone_active,
104  int32 n_senone_active,
105  mfcc_t ** feat,
106  int32 frame,
107  int32 compallsen);
108  int (*transform)(ps_mgau_t *mgau,
109  ps_mllr_t *mllr);
110  void (*free)(ps_mgau_t *mgau);
111 } ps_mgaufuncs_t;
112 
113 struct ps_mgau_s {
115  int frame_idx;
116 };
117 
118 #define ps_mgau_base(mg) ((ps_mgau_t *)(mg))
119 #define ps_mgau_frame_eval(mg,senscr,senone_active,n_senone_active,feat,frame,compallsen) \
120  (*ps_mgau_base(mg)->vt->frame_eval) \
121  (mg, senscr, senone_active, n_senone_active, feat, frame, compallsen)
122 #define ps_mgau_transform(mg, mllr) \
123  (*ps_mgau_base(mg)->vt->transform)(mg, mllr)
124 #define ps_mgau_free(mg) \
125  (*ps_mgau_base(mg)->vt->free)(mg)
126 
148 struct acmod_s {
149  /* Global objects, not retained. */
150  cmd_ln_t *config;
151  logmath_t *lmath;
152  glist_t strings;
154  /* Feature computation: */
155  fe_t *fe;
156  feat_t *fcb;
158  /* Model parameters: */
164  /* Senone scoring: */
165  int16 *senone_scores;
166  bitvec_t *senone_active_vec;
167  uint8 *senone_active;
170  int log_zero;
172  /* Utterance processing: */
173  mfcc_t **mfc_buf;
174  mfcc_t ***feat_buf;
175  FILE *rawfh;
176  FILE *mfcfh;
177  FILE *senfh;
178  FILE *insenfh;
179  long *framepos;
181  /* A whole bunch of flags and counters: */
182  uint8 state;
183  uint8 compallsen;
184  uint8 grow_feat;
185  uint8 insen_swap;
194 };
195 typedef struct acmod_s acmod_t;
196 
213 acmod_t *acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb);
214 
227 
235 int acmod_set_senfh(acmod_t *acmod, FILE *senfh);
236 
244 int acmod_set_mfcfh(acmod_t *acmod, FILE *logfh);
245 
253 int acmod_set_rawfh(acmod_t *acmod, FILE *logfh);
254 
258 void acmod_free(acmod_t *acmod);
259 
263 int acmod_start_utt(acmod_t *acmod);
264 
268 int acmod_end_utt(acmod_t *acmod);
269 
282 int acmod_rewind(acmod_t *acmod);
283 
293 int acmod_advance(acmod_t *acmod);
294 
303 int acmod_set_grow(acmod_t *acmod, int grow_feat);
304 
323 int acmod_process_raw(acmod_t *acmod,
324  int16 const **inout_raw,
325  size_t *inout_n_samps,
326  int full_utt);
327 
339 int acmod_process_cep(acmod_t *acmod,
340  mfcc_t ***inout_cep,
341  int *inout_n_frames,
342  int full_utt);
343 
357 int acmod_process_feat(acmod_t *acmod,
358  mfcc_t **feat);
359 
366 int acmod_set_insenfh(acmod_t *acmod, FILE *insenfh);
367 
373 int acmod_read_scores(acmod_t *acmod);
374 
384 mfcc_t **acmod_get_frame(acmod_t *acmod, int *inout_frame_idx);
385 
399 int16 const *acmod_score(acmod_t *acmod,
400  int *inout_frame_idx);
401 
405 int acmod_write_senfh_header(acmod_t *acmod, FILE *logfh);
406 
410 int acmod_write_scores(acmod_t *acmod, int n_active, uint8 const *active,
411  int16 const *senscr, FILE *senfh);
412 
413 
417 int acmod_best_score(acmod_t *acmod, int *out_best_senid);
418 
422 void acmod_clear_active(acmod_t *acmod);
423 
427 void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm);
428 
432 #define acmod_activate_sen(acmod, sen) bitvec_set((acmod)->senone_active_vec, sen)
433 
437 int32 acmod_flags2list(acmod_t *acmod);
438 
439 #endif /* __ACMOD_H__ */
FILE * insenfh
Input senone score file.
Definition: acmod.h:178
uint8 grow_feat
Whether to grow feat_buf.
Definition: acmod.h:184
ps_mgau_t * mgau
Model parameters.
Definition: acmod.h:161
Not in an utterance.
Definition: acmod.h:68
uint8 * senone_active
Array of deltas to active GMMs.
Definition: acmod.h:167
long * framepos
File positions of recent frames in senone file.
Definition: acmod.h:179
int acmod_set_mfcfh(acmod_t *acmod, FILE *logfh)
Start logging MFCCs to a filehandle.
Definition: acmod.c:385
acmod_state_e
States in utterance processing.
Definition: acmod.h:67
Utterance started, no data yet.
Definition: acmod.h:69
mfcc_t ** acmod_get_frame(acmod_t *acmod, int *inout_frame_idx)
Get a frame of dynamic feature data.
Definition: acmod.c:1070
int16 * senone_scores
GMM scores for current frame.
Definition: acmod.h:165
Utterance in progress.
Definition: acmod.h:70
int n_senone_active
Number of active GMMs.
Definition: acmod.h:169
int acmod_best_score(acmod_t *acmod, int *out_best_senid)
Get best score and senone index for current frame.
Definition: acmod.c:1146
An individual HMM among the HMM search space.
int acmod_process_cep(acmod_t *acmod, mfcc_t ***inout_cep, int *inout_n_frames, int full_utt)
Feed acoustic feature data into the acoustic model for scoring.
Definition: acmod.c:692
logmath_t * lmath
Log-math computation.
Definition: acmod.h:151
fe_t * fe
Acoustic feature computation.
Definition: acmod.h:155
int acmod_end_utt(acmod_t *acmod)
Mark the end of an utterance.
Definition: acmod.c:448
int acmod_advance(acmod_t *acmod)
Advance the frame index.
Definition: acmod.c:886
frame_idx_t n_mfc_frame
Number of frames active in mfc_buf.
Definition: acmod.h:189
float32 *** h
Diagonal transformation of variances.
Definition: acmod.h:89
int acmod_process_raw(acmod_t *acmod, int16 const **inout_raw, size_t *inout_n_samps, int full_utt)
TODO: Set queue length for utterance processing.
Definition: acmod.c:623
Utterance ended, still buffering.
Definition: acmod.h:71
int32 * cb2mllr
Mapping from codebooks to transformations.
Definition: acmod.h:90
int * veclen
Length of input vectors for each stream.
Definition: acmod.h:86
void acmod_clear_active(acmod_t *acmod)
Clear set of active senones.
Definition: acmod.c:1175
Binary format model definition files, with support for heterogeneous topologies and variable-size N-p...
float32 *** b
Bias part of mean transformations.
Definition: acmod.h:88
FILE * rawfh
File for writing raw audio data.
Definition: acmod.h:175
mfcc_t ** mfc_buf
Temporary buffer of acoustic features.
Definition: acmod.h:173
int acmod_start_utt(acmod_t *acmod)
Mark the start of an utterance.
Definition: acmod.c:432
int acmod_set_insenfh(acmod_t *acmod, FILE *insenfh)
Set up a senone score dump file for input.
Definition: acmod.c:851
int acmod_set_senfh(acmod_t *acmod, FILE *senfh)
Start logging senone scores to a filehandle.
Definition: acmod.c:374
int n_class
Number of MLLR classes.
Definition: acmod.h:84
int acmod_write_senfh_header(acmod_t *acmod, FILE *logfh)
Write senone dump file header.
Definition: acmod.c:360
int acmod_set_rawfh(acmod_t *acmod, FILE *logfh)
Start logging raw audio to a filehandle.
Definition: acmod.c:397
Implementation of HMM base structure.
frame_idx_t n_feat_alloc
Number of frames allocated in feat_buf.
Definition: acmod.h:191
mfcc_t *** feat_buf
Temporary buffer of dynamic features.
Definition: acmod.h:174
acmod_t * acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
Initialize an acoustic model.
Definition: acmod.c:233
ps_mllr_t * mllr
Speaker transformation.
Definition: acmod.h:162
uint8 compallsen
Compute all senones?
Definition: acmod.h:183
int32 acmod_flags2list(acmod_t *acmod)
Build active list from.
Definition: acmod.c:1232
int n_feat
Number of feature streams.
Definition: acmod.h:85
cmd_ln_t * config
Configuration.
Definition: acmod.h:150
frame_idx_t output_frame
Index of next frame of dynamic features.
Definition: acmod.h:187
int acmod_process_feat(acmod_t *acmod, mfcc_t **feat)
Feed dynamic feature data into the acoustic model for scoring.
Definition: acmod.c:787
tmat_t * tmat
Transition matrices.
Definition: acmod.h:160
void acmod_free(acmod_t *acmod)
Finalize an acoustic model.
Definition: acmod.c:310
int acmod_read_scores(acmod_t *acmod)
Read one frame of scores from senone score dump file.
Definition: acmod.c:997
enum acmod_state_e acmod_state_t
States in utterance processing.
Model-space linear transforms for speaker adaptation.
uint8 state
State of utterance processing.
Definition: acmod.h:182
Feature space linear transform structure.
Definition: acmod.h:82
frame_idx_t feat_outidx
Start of active frames in feat_buf.
Definition: acmod.h:193
float32 **** A
Rotation part of mean transformations.
Definition: acmod.h:87
int acmod_rewind(acmod_t *acmod)
Rewind the current utterance, allowing it to be rescored.
Definition: acmod.c:864
Transition matrix data structure.
feat_t * fcb
Dynamic feature computation.
Definition: acmod.h:156
int log_zero
Zero log-probability value.
Definition: acmod.h:170
FILE * senfh
File for writing senone score data.
Definition: acmod.h:177
frame_idx_t mfc_outidx
Start of active frames in mfc_buf.
Definition: acmod.h:190
Transition matrix data structure.
Definition: tmat.h:109
frame_idx_t n_mfc_alloc
Number of frames allocated in mfc_buf.
Definition: acmod.h:188
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
Definition: acmod.c:1191
int32 frame_idx_t
Type for frame index values.
Definition: hmm.h:68
uint8 insen_swap
Whether to swap input senone score.
Definition: acmod.h:185
int senscr_frame
Frame index for senone_scores.
Definition: acmod.h:168
ps_mgaufuncs_t * vt
vtable of mgau functions.
Definition: acmod.h:114
bin_mdef_t * mdef
Model definition.
Definition: acmod.h:159
int refcnt
Reference count.
Definition: acmod.h:83
frame_idx_t n_feat_frame
Number of frames active in feat_buf.
Definition: acmod.h:192
int acmod_set_grow(acmod_t *acmod, int grow_feat)
Set memory allocation policy for utterance processing.
Definition: acmod.c:419
FILE * mfcfh
File for writing acoustic feature data.
Definition: acmod.h:176
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
Definition: acmod.c:1088
Acoustic model structure.
Definition: acmod.h:148
int frame_idx
frame counter.
Definition: acmod.h:115
int acmod_write_scores(acmod_t *acmod, int n_active, uint8 const *active, int16 const *senscr, FILE *senfh)
Write a frame of senone scores to a dump file.
Definition: acmod.c:898
glist_t strings
Temporary acoustic model filenames.
Definition: acmod.h:152
ps_mllr_t * acmod_update_mllr(acmod_t *acmod, ps_mllr_t *mllr)
Adapt acoustic model using a linear transform.
Definition: acmod.c:349
bitvec_t * senone_active_vec
Active GMMs in current frame.
Definition: acmod.h:166