45 #if defined(__ADSPBLACKFIN__) 46 #elif !defined(_WIN32_WCE) 47 #include <sys/types.h> 51 #include <sphinx_config.h> 52 #include <sphinxbase/cmd_ln.h> 53 #include <sphinxbase/fixpoint.h> 54 #include <sphinxbase/ckd_alloc.h> 55 #include <sphinxbase/bio.h> 56 #include <sphinxbase/err.h> 57 #include <sphinxbase/prim_type.h> 66 ptm_mgau_mllr_transform,
70 #define COMPUTE_GMM_MAP(_idx) \ 71 diff[_idx] = obs[_idx] - mean[_idx]; \ 72 sqdiff[_idx] = MFCCMUL(diff[_idx], diff[_idx]); \ 73 compl[_idx] = MFCCMUL(sqdiff[_idx], var[_idx]); 74 #define COMPUTE_GMM_REDUCE(_idx) \ 75 d = GMMSUB(d, compl[_idx]); 78 insertion_sort_topn(
ptm_topn_t *topn,
int i, int32 d)
87 for (j = i - 1; j >= 0 && d > topn[j].
score; j--) {
88 topn[j + 1] = topn[j];
94 eval_topn(
ptm_mgau_t *s,
int cb,
int feat, mfcc_t *z)
99 topn = s->
f->
topn[cb][feat];
102 for (i = 0; i < s->max_topn; i++) {
103 mfcc_t *mean, diff[4], sqdiff[4], compl[4];
109 mean = s->
g->
mean[cb][feat][0] + cw * ceplen;
110 var = s->
g->
var[cb][feat][0] + cw * ceplen;
111 d = s->
g->
det[cb][feat][cw];
113 for (j = 0; j < ceplen % 4; ++j) {
114 diff[0] = *obs++ - *mean++;
115 sqdiff[0] = MFCCMUL(diff[0], diff[0]);
116 compl[0] = MFCCMUL(sqdiff[0], *var);
122 for (;j < ceplen; j += 4) {
127 COMPUTE_GMM_REDUCE(0);
128 COMPUTE_GMM_REDUCE(1);
129 COMPUTE_GMM_REDUCE(2);
130 COMPUTE_GMM_REDUCE(3);
135 insertion_sort_topn(topn, i, (int32)d);
138 return topn[0].
score;
147 for (*cur = worst - 1; *cur >= best && intd >= (*cur)->
score; --*cur)
148 memcpy(*cur + 1, *cur,
sizeof(**cur));
151 (*cur)->score = intd;
155 eval_cb(
ptm_mgau_t *s,
int cb,
int feat, mfcc_t *z)
159 mfcc_t *var, *det, *detP, *detE;
162 best = topn = s->
f->
topn[cb][feat];
163 worst = topn + (s->max_topn - 1);
164 mean = s->
g->
mean[cb][feat][0];
165 var = s->
g->
var[cb][feat][0];
166 det = s->
g->
det[cb][feat];
170 for (detP = det; detP < detE; ++detP) {
171 mfcc_t diff[4], sqdiff[4], compl[4];
178 thresh = (mfcc_t) worst->
score;
185 for (j = 0; (j < ceplen % 4) && (d >= thresh); ++j) {
186 diff[0] = *obs++ - *mean++;
187 sqdiff[0] = MFCCMUL(diff[0], diff[0]);
188 compl[0] = MFCCMUL(sqdiff[0], *var++);
194 for (; j < ceplen && d >= thresh; j += 4) {
199 COMPUTE_GMM_REDUCE(0);
200 COMPUTE_GMM_REDUCE(1);
201 COMPUTE_GMM_REDUCE(2);
202 COMPUTE_GMM_REDUCE(3);
209 mean += (ceplen - j);
215 for (i = 0; i < s->max_topn; i++) {
217 if (topn[i].cw == cw)
222 insertion_sort_cb(&cur, worst, best, cw, (int32)d);
232 ptm_mgau_codebook_eval(
ptm_mgau_t *s, mfcc_t **z,
int frame)
237 for (i = 0; i < s->
g->
n_mgau; ++i)
238 for (j = 0; j < s->
g->
n_feat; ++j)
239 eval_topn(s, i, j, z[j]);
242 if (frame % s->ds_ratio)
246 for (i = 0; i < s->
g->
n_mgau; ++i) {
249 for (j = 0; j < s->
g->
n_feat; ++j) {
250 eval_cb(s, i, j, z[j]);
262 for (j = 0; j < s->
g->
n_feat; ++j) {
263 int32 norm = 0x7fffffff;
264 for (i = 0; i < s->
g->
n_mgau; ++i) {
270 assert(norm != 0x7fffffff);
271 for (i = 0; i < s->
g->
n_mgau; ++i) {
275 for (k = 0; k < s->max_topn; ++k) {
289 ptm_mgau_calc_cb_active(
ptm_mgau_t *s, uint8 *senone_active,
290 int32 n_senone_active,
int compallsen)
299 for (lastsen = i = 0; i < n_senone_active; ++i) {
300 int sen = senone_active[i] + lastsen;
305 E_DEBUG(1, (
"Active codebooks:"));
306 for (i = 0; i < s->
g->
n_mgau; ++i) {
309 E_DEBUGCONT(1, (
" %d", i));
311 E_DEBUGCONT(1, (
"\n"));
319 ptm_mgau_senone_eval(
ptm_mgau_t *s, int16 *senone_scores,
320 uint8 *senone_active, int32 n_senone_active,
323 int i, lastsen, bestscore;
325 memset(senone_scores, 0, s->
n_sen *
sizeof(*senone_scores));
332 n_senone_active = s->
n_sen;
333 bestscore = 0x7fffffff;
334 for (lastsen = i = 0; i < n_senone_active; ++i) {
341 sen = senone_active[i] + lastsen;
351 for (f = 0; f < s->
g->
n_feat; ++f) {
352 for (j = 0; j < s->max_topn; ++j) {
360 for (f = 0; f < s->
g->
n_feat; ++f) {
363 topn = s->
f->
topn[cb][f];
364 for (j = 0; j < s->max_topn; ++j) {
368 int dcw = s->
mixw[f][topn[j].
cw][sen/2];
369 dcw = (dcw & 1) ? dcw >> 4 : dcw & 0x0f;
370 mixw = s->mixw_cb[dcw];
373 mixw = s->
mixw[f][topn[j].
cw][sen];
376 fden = mixw + topn[j].
score;
379 mixw + topn[j].
score);
380 E_DEBUG(3, (
"fden[%d][%d] l+= %d + %d = %d\n",
381 sen, f, mixw, topn[j].score, fden));
385 if (ascore < bestscore) bestscore = ascore;
386 senone_scores[sen] = ascore;
390 for (i = 0; i < s->
n_sen; ++i) {
391 senone_scores[i] -= bestscore;
402 int16 *senone_scores,
403 uint8 *senone_active,
404 int32 n_senone_active,
405 mfcc_t ** featbuf, int32 frame,
418 s->
f = s->
hist + fast_eval_idx;
422 if (frame >= ps_mgau_base(ps)->frame_idx) {
427 if (fast_eval_idx == 0)
430 lastf = s->
hist + fast_eval_idx - 1;
432 memcpy(s->
f->
topn[0][0], lastf->
topn[0][0],
436 ptm_mgau_calc_cb_active(s, senone_active, n_senone_active, compallsen);
438 ptm_mgau_codebook_eval(s, featbuf, frame);
441 ptm_mgau_senone_eval(s, senone_scores, senone_active,
442 n_senone_active, compallsen);
453 int32 do_swap, do_mmap;
458 int n_sen = bin_mdef_n_sen(mdef);
462 do_mmap = cmd_ln_boolean_r(s->
config,
"-mmap");
464 if ((fp = fopen(file,
"rb")) == NULL)
467 E_INFO(
"Loading senones from dump file %s\n", file);
469 if (fread(&n,
sizeof(int32), 1, fp) != 1) {
470 E_ERROR_SYSTEM(
"Failed to read title size from %s", file);
475 if (n < 1 || n > 999) {
477 if (n < 1 || n > 999) {
478 E_ERROR(
"Title length %x in dump file %s out of range\n", n, file);
483 if (fread(line,
sizeof(
char), n, fp) != n) {
484 E_ERROR_SYSTEM(
"Cannot read title");
487 if (line[n - 1] !=
'\0') {
488 E_ERROR(
"Bad title in dump file\n");
491 E_INFO(
"%s\n", line);
494 if (fread(&n,
sizeof(n), 1, fp) != 1) {
495 E_ERROR_SYSTEM(
"Failed to read header size from %s", file);
498 if (do_swap) SWAP_INT32(&n);
499 if (fread(line,
sizeof(
char), n, fp) != n) {
500 E_ERROR_SYSTEM(
"Cannot read header");
503 if (line[n - 1] !=
'\0') {
504 E_ERROR(
"Bad header in dump file\n");
510 if (fread(&n,
sizeof(n), 1, fp) != 1) {
511 E_ERROR_SYSTEM(
"Failed to read header string size from %s", file);
514 if (do_swap) SWAP_INT32(&n);
517 if (fread(line,
sizeof(
char), n, fp) != n) {
518 E_ERROR_SYSTEM(
"Cannot read header");
522 if (!strncmp(line,
"feature_count ", strlen(
"feature_count "))) {
523 n_feat = atoi(line + strlen(
"feature_count "));
525 if (!strncmp(line,
"mixture_count ", strlen(
"mixture_count "))) {
526 n_density = atoi(line + strlen(
"mixture_count "));
528 if (!strncmp(line,
"model_count ", strlen(
"model_count "))) {
529 n_sen = atoi(line + strlen(
"model_count "));
531 if (!strncmp(line,
"cluster_count ", strlen(
"cluster_count "))) {
532 n_clust = atoi(line + strlen(
"cluster_count "));
534 if (!strncmp(line,
"cluster_bits ", strlen(
"cluster_bits "))) {
535 n_bits = atoi(line + strlen(
"cluster_bits "));
544 if (fread(&r,
sizeof(r), 1, fp) != 1) {
545 E_ERROR_SYSTEM(
"Cannot read #rows");
548 if (do_swap) SWAP_INT32(&r);
549 if (fread(&c,
sizeof(c), 1, fp) != 1) {
550 E_ERROR_SYSTEM(
"Cannot read #columns");
553 if (do_swap) SWAP_INT32(&c);
554 E_INFO(
"Rows: %d, Columns: %d\n", r, c);
558 E_ERROR(
"Number of feature streams mismatch: %d != %d\n",
563 E_ERROR(
"Number of densities mismatch: %d != %d\n",
567 if (n_sen != s->
n_sen) {
568 E_ERROR(
"Number of senones mismatch: %d != %d\n",
573 if (!((n_clust == 0) || (n_clust == 15) || (n_clust == 16))) {
574 E_ERROR(
"Cluster count must be 0, 15, or 16\n");
580 if (!((n_bits == 8) || (n_bits == 4))) {
581 E_ERROR(
"Cluster count must be 4 or 8\n");
586 E_INFO(
"Using memory-mapped I/O for senones\n");
592 s->sendump_mmap = mmio_file_read(file);
595 s->mixw_cb = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
602 s->mixw_cb = ckd_calloc(1, n_clust);
603 if (fread(s->mixw_cb, 1, n_clust, fp) != (
size_t) n_clust) {
604 E_ERROR(
"Failed to read %d bytes from sendump\n", n_clust);
611 if (s->sendump_mmap) {
612 s->
mixw = ckd_calloc_2d(n_feat, n_density,
sizeof(*s->
mixw));
613 for (n = 0; n < n_feat; n++) {
616 step = (step + 1) / 2;
617 for (i = 0; i < r; i++) {
618 s->
mixw[n][i] = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
624 s->
mixw = ckd_calloc_3d(n_feat, n_density, n_sen,
sizeof(***s->
mixw));
626 for (n = 0; n < n_feat; n++) {
629 step = (step + 1) / 2;
630 for (i = 0; i < r; i++) {
631 if (fread(s->
mixw[n][i],
sizeof(***s->
mixw), step, fp)
633 E_ERROR(
"Failed to read %d bytes from sendump\n", step);
648 read_mixw(
ptm_mgau_t * s,
char const *file_name,
double SmoothMin)
650 char **argname, **argval;
653 int32 byteswap, chksum_present;
662 E_INFO(
"Reading mixture weights file '%s'\n", file_name);
664 if ((fp = fopen(file_name,
"rb")) == NULL)
665 E_FATAL_SYSTEM(
"Failed to open mixture file '%s' for reading", file_name);
668 if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
669 E_FATAL(
"Failed to read header from '%s'\n", file_name);
673 for (i = 0; argname[i]; i++) {
674 if (strcmp(argname[i],
"version") == 0) {
675 if (strcmp(argval[i], MGAU_MIXW_VERSION) != 0)
676 E_WARN(
"Version mismatch(%s): %s, expecting %s\n",
677 file_name, argval[i], MGAU_MIXW_VERSION);
679 else if (strcmp(argname[i],
"chksum0") == 0) {
683 bio_hdrarg_free(argname, argval);
684 argname = argval = NULL;
689 if ((bio_fread(&n_sen,
sizeof(int32), 1, fp, byteswap, &chksum) != 1)
690 || (bio_fread(&n_feat,
sizeof(int32), 1, fp, byteswap, &chksum) !=
692 || (bio_fread(&n_comp,
sizeof(int32), 1, fp, byteswap, &chksum) !=
694 || (bio_fread(&n,
sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
695 E_FATAL(
"bio_fread(%s) (arraysize) failed\n", file_name);
698 E_FATAL(
"#Features streams(%d) != %d\n", n_feat, s->
g->
n_feat);
699 if (n != n_sen * n_feat * n_comp) {
701 (
"%s: #float32s(%d) doesn't match header dimensions: %d x %d x %d\n",
702 file_name, i, n_sen, n_feat, n_comp);
712 n_sen,
sizeof(***s->
mixw));
715 pdf = (float32 *) ckd_calloc(n_comp,
sizeof(float32));
719 for (i = 0; i < n_sen; i++) {
720 for (f = 0; f < n_feat; f++) {
721 if (bio_fread((
void *) pdf,
sizeof(float32),
722 n_comp, fp, byteswap, &chksum) != n_comp) {
723 E_FATAL(
"bio_fread(%s) (arraydata) failed\n", file_name);
727 if (vector_sum_norm(pdf, n_comp) <= 0.0)
729 vector_floor(pdf, n_comp, SmoothMin);
730 vector_sum_norm(pdf, n_comp);
733 for (c = 0; c < n_comp; c++) {
736 qscr = -logmath_log(s->lmath_8b, pdf[c]);
739 s->
mixw[f][c][i] = qscr;
744 E_WARN(
"Weight normalization failed for %d mixture weights components\n", n_err);
749 bio_verify_chksum(fp, byteswap, chksum);
751 if (fread(&eofchk, 1, 1, fp) == 1)
752 E_FATAL(
"More data than expected in %s\n", file_name);
756 E_INFO(
"Read %d x %d x %d mixture weights\n", n_sen, n_feat, n_comp);
765 char const *sendump_path;
768 s = ckd_calloc(1,
sizeof(*s));
771 s->lmath = logmath_retain(acmod->
lmath);
774 if (s->lmath_8b == NULL)
777 if (logmath_get_width(s->lmath_8b) != 1) {
778 E_ERROR(
"Log base %f is too small to represent add table in 8 bits\n",
779 logmath_get_base(s->lmath_8b));
785 cmd_ln_str_r(s->
config,
"-var"),
786 cmd_ln_float32_r(s->
config,
"-varfloor"),
792 E_INFO(
"Number of codebooks exceeds 256: %d\n", s->
g->
n_mgau);
795 if (s->
g->
n_mgau != bin_mdef_n_ciphone(mdef)) {
796 E_INFO(
"Number of codebooks doesn't match number of ciphones, doesn't look like PTM: %d != %d\n", s->
g->
n_mgau, bin_mdef_n_ciphone(mdef));
800 if (s->
g->
n_feat != feat_dimension1(acmod->
fcb)) {
801 E_ERROR(
"Number of streams does not match: %d != %d\n",
805 for (i = 0; i < s->
g->
n_feat; ++i) {
806 if (s->
g->
featlen[i] != feat_dimension2(acmod->
fcb, i)) {
807 E_ERROR(
"Dimension of stream %d does not match: %d != %d\n",
813 if ((sendump_path = cmd_ln_str_r(s->
config,
"-sendump"))) {
814 if (read_sendump(s, acmod->
mdef, sendump_path) < 0) {
819 if (read_mixw(s, cmd_ln_str_r(s->
config,
"-mixw"),
820 cmd_ln_float32_r(s->
config,
"-mixwfloor")) < 0) {
824 s->ds_ratio = cmd_ln_int32_r(s->
config,
"-ds");
825 s->max_topn = cmd_ln_int32_r(s->
config,
"-topn");
826 E_INFO(
"Maximum top-N: %d\n", s->max_topn);
831 for (i = 0; i < s->
n_sen; ++i)
832 s->
sen2cb[i] = bin_mdef_sen2cimap(acmod->
mdef, i);
847 for (j = 0; j < s->
g->
n_mgau; ++j) {
848 for (k = 0; k < s->
g->
n_feat; ++k) {
849 for (m = 0; m < s->max_topn; ++m) {
863 ps->
vt = &ptm_mgau_funcs;
866 ptm_mgau_free(ps_mgau_base(s));
883 logmath_free(s->lmath);
884 logmath_free(s->lmath_8b);
885 if (s->sendump_mmap) {
886 ckd_free_2d(s->
mixw);
887 mmio_file_unmap(s->sendump_mmap);
890 ckd_free_3d(s->
mixw);
int32 n_density
Number gaussian densities in each codebook-feature stream.
ptm_topn_t *** topn
Top-N for each codebook (mgau x feature x topn)
void gauden_free(gauden_t *g)
Release memory allocated by gauden_init.
mfcc_t *** det
log(determinant) for each variance vector; actually, log(sqrt(2*pi*det))
uint8 * sen2cb
Senone to codebook mapping.
logmath_t * lmath
Log-math computation.
int n_fast_hist
Number of past frames tracked.
gauden_t * g
Set of Gaussians.
int32 gauden_mllr_transform(gauden_t *s, ps_mllr_t *mllr, cmd_ln_t *config)
Transform Gaussians according to an MLLR matrix (or, eventually, more).
gauden_t * gauden_init(char const *meanfile, char const *varfile, float32 varfloor, logmath_t *lmath)
Read mixture gaussian codebooks from the given files.
int ptm_mgau_frame_eval(ps_mgau_t *s, int16 *senone_scores, uint8 *senone_active, int32 n_senone_active, mfcc_t **featbuf, int32 frame, int32 compallsen)
Compute senone scores for the active senones.
Fast phonetically-tied mixture evaluation.
cmd_ln_t * config
Configuration.
int32 * featlen
feature length for each feature
#define GMMSUB(a, b)
Subtract GMM component b (assumed to be positive) and saturate.
int32 n_mgau
Number codebooks.
Feature space linear transform structure.
#define SENSCR_SHIFT
Shift count for senone scores.
mfcc_t **** mean
mean[codebook][feature][codeword] vector
feat_t * fcb
Dynamic feature computation.
cmd_ln_t * config
Configuration parameters.
uint8 *** mixw
Mixture weight distributions by feature, codeword, senone.
ptm_fast_eval_t * hist
Fast evaluation info for past frames.
int32 n_feat
Number feature streams in each codebook.
ptm_fast_eval_t * f
Fast eval info for current frame.
ps_mgaufuncs_t * vt
vtable of mgau functions.
LOGMATH_INLINE int fast_logmath_add(logmath_t *lmath, int mlx, int mly)
Quickly log-add two negated log probabilities.
bin_mdef_t * mdef
Model definition.
bitvec_t * mgau_active
Set of active codebooks.
#define MAX_NEG_ASCR
Maximum negated acoustic score value.
int32 n_sen
Number of senones.
#define MAX_NEG_MIXW
Maximum negated mixture weight value.
Acoustic model structure.
mfcc_t **** var
like mean; diagonal covariance vector only
Common code shared between SC and PTM (tied-state) models.