43 #include <sphinxbase/err.h> 44 #include <sphinxbase/strfuncs.h> 45 #include <sphinxbase/filename.h> 46 #include <sphinxbase/pio.h> 49 #include "cmdln_macro.h" 53 #include "fsg_search_internal.h" 58 static const arg_t ps_args_def[] = {
65 file_exists(
const char *path)
69 tmp = fopen(path,
"rb");
75 hmmdir_exists(
const char *path)
78 char *mdef = string_join(path,
"/mdef", NULL);
80 tmp = fopen(mdef,
"rb");
88 const char *hmmdir,
const char *file)
90 char *tmp = string_join(hmmdir,
"/", file, NULL);
92 if (cmd_ln_str_r(ps->
config, arg) == NULL && file_exists(tmp))
93 cmd_ln_set_str_r(ps->
config, arg, tmp);
100 char const *hmmdir, *lmfile, *dictfile;
103 #ifdef __ADSPBLACKFIN__ 104 E_INFO(
"Will not use mmap() on uClinux/Blackfin.");
105 cmd_ln_set_boolean_r(ps->
config,
"-mmap", FALSE);
110 hmmdir = cmd_ln_str_r(ps->
config,
"-hmm");
111 lmfile = cmd_ln_str_r(ps->
config,
"-lm");
112 dictfile = cmd_ln_str_r(ps->
config,
"-dict");
113 if (hmmdir == NULL && hmmdir_exists(MODELDIR
"/hmm/en_US/hub4wsj_sc_8k")) {
114 hmmdir = MODELDIR
"/hmm/en_US/hub4wsj_sc_8k";
115 cmd_ln_set_str_r(ps->
config,
"-hmm", hmmdir);
117 if (lmfile == NULL && !cmd_ln_str_r(ps->
config,
"-fsg")
118 && !cmd_ln_str_r(ps->
config,
"-jsgf")
119 && file_exists(MODELDIR
"/lm/en_US/hub4.5000.DMP")) {
120 lmfile = MODELDIR
"/lm/en_US/hub4.5000.DMP";
121 cmd_ln_set_str_r(ps->
config,
"-lm", lmfile);
123 if (dictfile == NULL && file_exists(MODELDIR
"/lm/en_US/cmu07a.dic")) {
124 dictfile = MODELDIR
"/lm/en_US/cmu07a.dic";
125 cmd_ln_set_str_r(ps->
config,
"-dict", dictfile);
129 if (hmmdir && !path_is_absolute(hmmdir) && !hmmdir_exists(hmmdir)) {
130 char *tmphmm = string_join(MODELDIR
"/hmm/", hmmdir, NULL);
131 if (hmmdir_exists(tmphmm)) {
132 cmd_ln_set_str_r(ps->
config,
"-hmm", tmphmm);
134 E_ERROR(
"Failed to find mdef file inside the model folder specified with -hmm '%s'\n", hmmdir);
138 if (lmfile && !path_is_absolute(lmfile) && !file_exists(lmfile)) {
139 char *tmplm = string_join(MODELDIR
"/lm/", lmfile, NULL);
140 cmd_ln_set_str_r(ps->
config,
"-lm", tmplm);
143 if (dictfile && !path_is_absolute(dictfile) && !file_exists(dictfile)) {
144 char *tmpdict = string_join(MODELDIR
"/lm/", dictfile, NULL);
145 cmd_ln_set_str_r(ps->
config,
"-dict", tmpdict);
151 if ((hmmdir = cmd_ln_str_r(ps->
config,
"-hmm")) != NULL) {
152 ps_add_file(ps,
"-mdef", hmmdir,
"mdef");
153 ps_add_file(ps,
"-mean", hmmdir,
"means");
154 ps_add_file(ps,
"-var", hmmdir,
"variances");
155 ps_add_file(ps,
"-tmat", hmmdir,
"transition_matrices");
156 ps_add_file(ps,
"-mixw", hmmdir,
"mixture_weights");
157 ps_add_file(ps,
"-sendump", hmmdir,
"sendump");
158 ps_add_file(ps,
"-fdict", hmmdir,
"noisedict");
159 ps_add_file(ps,
"-lda", hmmdir,
"feature_transform");
160 ps_add_file(ps,
"-featparams", hmmdir,
"feat.params");
161 ps_add_file(ps,
"-senmgau", hmmdir,
"senmgau");
173 for (gn = ps->
searches; gn; gn = gnode_next(gn))
174 ps_search_free(gnode_ptr(gn));
185 for (gn = ps->
searches; gn; gn = gnode_next(gn)) {
186 if (0 == strcmp(ps_search_name(gnode_ptr(gn)), name))
195 char const *lmfile, *lmctl = NULL;
197 if (config && config != ps->
config) {
198 cmd_ln_free_r(ps->
config);
199 ps->
config = cmd_ln_retain(config);
202 err_set_debug_level(cmd_ln_int32_r(ps->
config,
"-debug"));
208 ps_init_defaults(ps);
211 ps_free_searches(ps);
226 if (ps->
lmath == NULL
227 || (logmath_get_base(ps->
lmath) !=
228 (float64)cmd_ln_float32_r(ps->
config,
"-logbase"))) {
230 logmath_free(ps->
lmath);
231 ps->
lmath = logmath_init
232 ((float64)cmd_ln_float32_r(ps->
config,
"-logbase"), 0,
233 cmd_ln_boolean_r(ps->
config,
"-bestpath"));
256 if (cmd_ln_str_r(ps->
config,
"-fsg") || cmd_ln_str_r(ps->
config,
"-jsgf")) {
267 else if ((lmfile = cmd_ln_str_r(ps->
config,
"-lm"))
268 || (lmctl = cmd_ln_str_r(ps->
config,
"-lmctl"))) {
272 if (cmd_ln_boolean_r(ps->
config,
"-fwdflat")
273 && cmd_ln_boolean_r(ps->
config,
"-fwdtree"))
292 ps->
perf.name =
"decode";
293 ptmr_init(&ps->
perf);
303 ps = ckd_calloc(1,
sizeof(*ps));
332 ps_free_searches(ps);
336 logmath_free(ps->
lmath);
337 cmd_ln_free_r(ps->
config);
383 || 0 != strcmp(ps_search_name(ps->
search),
"ngram"))
395 search = ps_find_search(ps,
"ngram");
396 if (search == NULL) {
405 else if (lmset != NULL) {
408 if (ngs->
lmset != NULL && ngs->
lmset != lmset)
409 ngram_model_free(ngs->
lmset);
412 if (ps_search_reinit(search, ps->
dict, ps->
d2p) < 0)
426 || 0 != strcmp(ps_search_name(ps->
search),
"fsg"))
437 search = ps_find_search(ps,
"fsg");
438 if (search == NULL) {
440 if ((search = fsg_search_init(ps->
config,
449 if (ps_search_reinit(search, ps->
dict, ps->
d2p) < 0)
458 char const *fdictfile,
char const *format)
468 newconfig = cmd_ln_init(NULL,
ps_args(), TRUE, NULL);
469 cmd_ln_set_boolean_r(newconfig,
"-dictcase",
470 cmd_ln_boolean_r(ps->
config,
"-dictcase"));
471 cmd_ln_set_str_r(newconfig,
"-dict", dictfile);
473 cmd_ln_set_str_r(newconfig,
"-fdict", fdictfile);
475 cmd_ln_set_str_r(newconfig,
"-fdict",
476 cmd_ln_str_r(ps->
config,
"-fdict"));
480 cmd_ln_free_r(newconfig);
486 cmd_ln_free_r(newconfig);
492 cmd_ln_free_r(newconfig);
493 cmd_ln_set_str_r(ps->
config,
"-dict", dictfile);
495 cmd_ln_set_str_r(ps->
config,
"-fdict", fdictfile);
502 for (gn = ps->
searches; gn; gn = gnode_next(gn)) {
504 if ((rv = ps_search_reinit(search, dict, d2p)) < 0)
525 ngram_model_t *lmset;
527 char **phonestr, *tmp;
531 tmp = ckd_salloc(phones);
532 np = str2words(tmp, NULL, 0);
533 phonestr = ckd_calloc(np,
sizeof(*phonestr));
534 str2words(tmp, phonestr, np);
535 pron = ckd_calloc(np,
sizeof(*pron));
536 for (i = 0; i < np; ++i) {
539 E_ERROR(
"Unknown phone %s in phone string %s\n",
566 if ((lmwid = ngram_model_add_word(lmset, word, 1.0))
567 == NGRAM_INVALID_WID)
573 if ((rv = ps_search_reinit(ps->
search, ps->
dict, ps->
d2p) < 0))
581 char const *uttid,
long maxsamps)
589 if (maxsamps != -1 || (pos = FTELL(rawfh)) >= 0) {
592 if (maxsamps == -1) {
594 FSEEK(rawfh, 0, SEEK_END);
595 endpos = FTELL(rawfh);
596 FSEEK(rawfh, pos, SEEK_SET);
597 maxsamps = endpos - pos;
599 data = ckd_calloc(maxsamps,
sizeof(*data));
600 total = fread(data,
sizeof(*data), maxsamps, rawfh);
607 while (!feof(rawfh)) {
611 nread = fread(data,
sizeof(*data),
sizeof(data)/
sizeof(*data), rawfh);
626 E_ERROR(
"No search module is selected, did you forget to " 627 "specify a language model or grammar?\n");
631 ptmr_reset(&ps->
perf);
632 ptmr_start(&ps->
perf);
636 ps->
uttid = ckd_salloc(uttid);
641 sprintf(nuttid,
"%09u", ps->
uttno);
642 ps->
uttid = ckd_salloc(nuttid);
658 char *logfn = string_join(ps->
mfclogdir,
"/",
659 ps->
uttid,
".mfc", NULL);
661 E_INFO(
"Writing MFCC log file: %s\n", logfn);
662 if ((mfcfh = fopen(logfn,
"wb")) == NULL) {
663 E_ERROR_SYSTEM(
"Failed to open MFCC log file %s", logfn);
671 char *logfn = string_join(ps->
rawlogdir,
"/",
672 ps->
uttid,
".raw", NULL);
674 E_INFO(
"Writing raw audio log file: %s\n", logfn);
675 if ((rawfh = fopen(logfn,
"wb")) == NULL) {
676 E_ERROR_SYSTEM(
"Failed to open raw audio log file %s", logfn);
684 char *logfn = string_join(ps->
senlogdir,
"/",
685 ps->
uttid,
".sen", NULL);
687 E_INFO(
"Writing senone score log file: %s\n", logfn);
688 if ((senfh = fopen(logfn,
"wb")) == NULL) {
689 E_ERROR_SYSTEM(
"Failed to open senone score log file %s", logfn);
701 return ps_search_start(ps->
search);
716 if ((k = ps_search_step(ps->
search,
736 if ((nfr = ps_search_forward(ps)) < 0) {
758 E_ERROR(
"Failed to process data, utterance is not started. Use start_utt to start it\n");
770 &n_samples, full_utt)) < 0)
776 if ((nfr = ps_search_forward(ps)) < 0)
801 &n_frames, full_utt)) < 0)
807 if ((nfr = ps_search_forward(ps)) < 0)
823 if ((rv = ps_search_forward(ps)) < 0) {
824 ptmr_stop(&ps->
perf);
829 if ((rv = ps_search_finish(ps->
phone_loop)) < 0) {
830 ptmr_stop(&ps->
perf);
836 i < ps->acmod->output_frame; ++i)
837 ps_search_step(ps->
search, i);
839 if ((rv = ps_search_finish(ps->
search)) < 0) {
840 ptmr_stop(&ps->
perf);
843 ptmr_stop(&ps->
perf);
846 if (cmd_ln_boolean_r(ps->
config,
"-backtrace")) {
847 char const *uttid, *hyp;
852 E_INFO(
"%s: %s (%d)\n", uttid, hyp, score);
853 E_INFO_NOFN(
"%-20s %-5s %-5s %-5s %-10s %-10s %-3s\n",
854 "word",
"start",
"end",
"pprob",
"ascr",
"lscr",
"lback");
859 int32 post, lscr, ascr, lback;
864 E_INFO_NOFN(
"%-20s %-5d %-5d %-1.3f %-10d %-10d %-3d\n",
865 word, sf, ef, logmath_exp(
ps_get_logmath(ps), post), ascr, lscr, lback);
876 ptmr_start(&ps->
perf);
877 hyp = ps_search_hyp(ps->
search, out_best_score, NULL);
879 *out_uttid = ps->
uttid;
880 ptmr_stop(&ps->
perf);
889 ptmr_start(&ps->
perf);
890 hyp = ps_search_hyp(ps->
search, NULL, out_is_final);
891 ptmr_stop(&ps->
perf);
901 ptmr_start(&ps->
perf);
902 prob = ps_search_prob(ps->
search);
904 *out_uttid = ps->
uttid;
905 ptmr_stop(&ps->
perf);
914 ptmr_start(&ps->
perf);
915 itor = ps_search_seg_iter(ps->
search, out_best_score);
916 ptmr_stop(&ps->
perf);
923 return ps_search_seg_next(seg);
935 if (out_sf) *out_sf = seg->
sf;
936 if (out_ef) *out_ef = seg->
ef;
942 if (out_ascr) *out_ascr = seg->
ascr;
943 if (out_lscr) *out_lscr = seg->
lscr;
944 if (out_lback) *out_lback = seg->
lback;
951 ps_search_seg_free(seg);
957 return ps_search_lattice(ps->
search);
962 char const *ctx1,
char const *ctx2)
965 ngram_model_t *lmset;
978 if (0 != strcmp(ps_search_name(ps->
search),
"ngram")) {
1016 assert(nbest != NULL);
1018 if (nbest->top == NULL)
1020 if (out_score) *out_score = nbest->top->
score;
1027 if (nbest->top == NULL)
1029 if (out_score) *out_score = nbest->top->
score;
1041 double *out_ncpu,
double *out_nwall)
1045 frate = cmd_ln_int32_r(ps->
config,
"-frate");
1047 *out_ncpu = ps->
perf.t_cpu;
1048 *out_nwall = ps->
perf.t_elapsed;
1053 double *out_ncpu,
double *out_nwall)
1057 frate = cmd_ln_int32_r(ps->
config,
"-frate");
1058 *out_nspeech = (double)ps->
n_frame / frate;
1059 *out_ncpu = ps->
perf.t_tot_cpu;
1060 *out_nwall = ps->
perf.t_tot_elapsed;
1070 search->
acmod = acmod;
1077 search->
start_wid = dict_startwid(dict);
1083 search->
dict = NULL;
1098 search->
start_wid = dict_startwid(dict);
1104 search->
dict = NULL;
Implementation of FSG search (and "FSG set") structure.
ptmr_t perf
Performance counter for all of decoding.
dict_t * dict_init(cmd_ln_t *config, bin_mdef_t *mdef)
Initialize a new dictionary.
Internal implementation of PocketSphinx decoder.
POCKETSPHINX_EXPORT void ps_get_utt_time(ps_decoder_t *ps, double *out_nspeech, double *out_ncpu, double *out_nwall)
Get performance information for the current utterance.
POCKETSPHINX_EXPORT feat_t * ps_get_feat(ps_decoder_t *ps)
Get the dynamic feature computation object for this decoder.
int acmod_read_scores(acmod_t *acmod)
Read one frame of scores from senone score dump file.
void ps_astar_finish(ps_astar_t *nbest)
Finish N-best search, releasing resources associated with it.
ps_latpath_t * ps_astar_next(ps_astar_t *nbest)
Find next best hypothesis of A* on a word graph.
char const * ps_astar_hyp(ps_astar_t *nbest, ps_latpath_t *path)
Get hypothesis string from A* search.
POCKETSPHINX_EXPORT ps_seg_t * ps_seg_iter(ps_decoder_t *ps, int32 *out_best_score)
Get an iterator over the word segmentation for the best hypothesis.
POCKETSPHINX_EXPORT int ps_process_cep(ps_decoder_t *ps, mfcc_t **data, int n_frames, int no_search, int full_utt)
Decode acoustic feature data.
Base structure for search module.
POCKETSPHINX_EXPORT arg_t const * ps_args(void)
Returns the argument definitions used in ps_init().
dict_t * dict
Pronunciation dictionary.
int acmod_set_insenfh(acmod_t *acmod, FILE *senfh)
Set up a senone score dump file for input.
POCKETSPHINX_EXPORT s3wid_t dict_wordid(dict_t *d, const char *word)
Return word id for given word string if present.
ps_mllr_t * acmod_update_mllr(acmod_t *acmod, ps_mllr_t *mllr)
Adapt acoustic model using a linear transform.
int acmod_process_cep(acmod_t *acmod, mfcc_t ***inout_cep, int *inout_n_frames, int full_utt)
Feed acoustic feature data into the acoustic model for scoring.
POCKETSPHINX_EXPORT int ps_load_dict(ps_decoder_t *ps, char const *dictfile, char const *fdictfile, char const *format)
Reload the pronunciation dictionary from a file.
POCKETSPHINX_EXPORT void ps_nbest_free(ps_nbest_t *nbest)
Finish N-best search early, releasing resources.
int32 silence_wid
Silence word ID.
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
Re-initialize base structure with new dictionary.
acmod_t * acmod
Acoustic model.
int dict_free(dict_t *d)
Release a pointer to a dictionary.
POCKETSPHINX_EXPORT ps_nbest_t * ps_nbest(ps_decoder_t *ps, int sf, int ef, char const *ctx1, char const *ctx2)
Get an iterator over the best hypotheses, optionally within a selected region of the utterance...
POCKETSPHINX_EXPORT ps_seg_t * ps_nbest_seg(ps_nbest_t *nbest, int32 *out_score)
Get the word segmentation from an N-best list iterator.
POCKETSPHINX_EXPORT void ps_seg_frames(ps_seg_t *seg, int *out_sf, int *out_ef)
Get inclusive start and end frames from a segmentation iterator.
POCKETSPHINX_EXPORT ps_mllr_t * ps_update_mllr(ps_decoder_t *ps, ps_mllr_t *mllr)
Adapt current acoustic model using a linear transform.
int32 finish_wid
Finish word ID.
fe_t * fe
Acoustic feature computation.
int bin_mdef_ciphone_id(bin_mdef_t *m, const char *ciphone)
Context-independent phone lookup.
int32 lscr
Language model score.
int32 n_words
Number of words known to search (may be less than in the dictionary)
POCKETSPHINX_EXPORT int ps_save_dict(ps_decoder_t *ps, char const *dictfile, char const *format)
Dump the current pronunciation dictionary to a file.
POCKETSPHINX_EXPORT int ps_decode_raw(ps_decoder_t *ps, FILE *rawfh, char const *uttid, long maxsamps)
Decode a raw audio stream.
char const * mfclogdir
Log directory for MFCC files.
Word graph search implementation.
POCKETSPHINX_EXPORT char const * ps_nbest_hyp(ps_nbest_t *nbest, int32 *out_score)
Get the hypothesis string from an N-best list iterator.
int refcount
Reference count.
int dict2pid_free(dict2pid_t *d2p)
Free the memory dict2pid structure.
POCKETSPHINX_EXPORT ngram_model_t * ps_get_lmset(ps_decoder_t *ps)
Get the language model set object for this decoder.
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
POCKETSPHINX_EXPORT ps_lattice_t * ps_get_lattice(ps_decoder_t *ps)
Get word lattice.
POCKETSPHINX_EXPORT ps_nbest_t * ps_nbest_next(ps_nbest_t *nbest)
Move an N-best list iterator forward.
int32 prob
Log posterior probability.
dict_t * dict_retain(dict_t *d)
Retain a pointer to an dict_t.
POCKETSPHINX_EXPORT int32 ps_get_prob(ps_decoder_t *ps, char const **out_uttid)
Get posterior probability.
char const * word
Word string (pointer into dictionary hash)
int acmod_set_senfh(acmod_t *acmod, FILE *logfh)
Start logging senone scores to a filehandle.
acmod_t * acmod
Acoustic model.
ps_search_t * search
Currently active search module.
POCKETSPHINX_EXPORT ngram_model_t * ps_update_lmset(ps_decoder_t *ps, ngram_model_t *lmset)
Update the language model set object for this decoder.
Lexicon tree based Viterbi search.
int acmod_set_rawfh(acmod_t *acmod, FILE *logfh)
Start logging raw audio to a filehandle.
int dict_write(dict_t *dict, char const *filename, char const *format)
Write dictionary to a file.
POCKETSPHINX_EXPORT int ps_start_utt(ps_decoder_t *ps, char const *uttid)
Start utterance processing.
logmath_t * lmath
Log math computation.
int32 start_wid
Start word ID.
POCKETSPHINX_EXPORT ps_seg_t * ps_seg_next(ps_seg_t *seg)
Get the next segment in a word segmentation.
char * uttid
Utterance ID for current utterance.
N-Gram search module structure.
dict2pid_t * d2p
Dictionary to senone mappings.
char const * rawlogdir
Log directory for audio files.
POCKETSPHINX_EXPORT int ps_decode_senscr(ps_decoder_t *ps, FILE *senfh, char const *uttid)
Decode a senone score dump file.
frame_idx_t output_frame
Index of next frame of dynamic features.
POCKETSPHINX_EXPORT int ps_free(ps_decoder_t *ps)
Finalize the decoder.
int16 s3cipid_t
Size definitions for more semantially meaningful units.
N-Gram based multi-pass search ("FBS")
POCKETSPHINX_EXPORT void ps_seg_free(ps_seg_t *seg)
Finish iterating over a word segmentation early, freeing resources.
POCKETSPHINX_EXPORT fsg_set_t * ps_update_fsgset(ps_decoder_t *ps)
Update the finite-state grammar set object for this decoder.
int32 ascr
Acoustic score.
cmd_ln_t * config
Configuration.
int acmod_end_utt(acmod_t *acmod)
Mark the end of an utterance.
POCKETSPHINX_EXPORT ps_decoder_t * ps_retain(ps_decoder_t *ps)
Retain a pointer to the decoder.
int dict2pid_add_word(dict2pid_t *d2p, int32 wid)
Add a word to the dict2pid structure (after adding it to dict).
int acmod_advance(acmod_t *acmod)
Advance the frame index.
uint8 state
State of utterance processing.
int acmod_set_mfcfh(acmod_t *acmod, FILE *logfh)
Start logging MFCCs to a filehandle.
void acmod_free(acmod_t *acmod)
Finalize an acoustic model.
POCKETSPHINX_EXPORT cmd_ln_t * ps_get_config(ps_decoder_t *ps)
Get the configuration object for this decoder.
glist_t searches
List of search modules.
phone_loop_t * phones
Array of phone arcs.
ps_search_t * phone_loop
Phone loop search for lookahead.
ps_search_t * ngram_search_init(cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize the N-Gram search module.
Feature space linear transform structure.
POCKETSPHINX_EXPORT int ps_process_raw(ps_decoder_t *ps, int16 const *data, size_t n_samples, int no_search, int full_utt)
Decode raw audio data.
a structure for a dictionary.
feat_t * fcb
Dynamic feature computation.
Word graph structure used in bestpath/nbest search.
char const * senlogdir
Log directory for senone score files.
ps_searchfuncs_t * vt
V-table of search methods.
ps_astar_t * ps_astar_start(ps_lattice_t *dag, ngram_model_t *lmset, float32 lwf, int sf, int ef, int w1, int w2)
Begin N-Gram based A* search on a word graph.
uint32 n_frame
Total number of frames processed.
POCKETSPHINX_EXPORT int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config)
Reinitialize the decoder with updated configuration.
Fast and rough context-independent phoneme loop search.
dict2pid_t * d2p
Dictionary to senone mapping.
dict2pid_t * dict2pid_retain(dict2pid_t *d2p)
Retain a pointer to dict2pid.
POCKETSPHINX_EXPORT int ps_end_utt(ps_decoder_t *ps)
End utterance processing.
int32 post
Utterance posterior probability.
char * hyp_str
Current hypothesis string.
Partial path structure used in N-best (A*) search.
dict_t * dict
Pronunciation dictionary.
int acmod_process_raw(acmod_t *acmod, int16 const **inout_raw, size_t *inout_n_samps, int full_utt)
TODO: Set queue length for utterance processing.
Flat lexicon based Viterbi search.
POCKETSPHINX_EXPORT int ps_add_word(ps_decoder_t *ps, char const *word, char const *phones, int update)
Add a word to the pronunciation dictionary.
ngram_model_t * lmset
Set of language models.
int32 lback
Language model backoff.
POCKETSPHINX_EXPORT void ps_get_all_time(ps_decoder_t *ps, double *out_nspeech, double *out_ncpu, double *out_nwall)
Get overall performance information.
POCKETSPHINX_EXPORT logmath_t * ps_get_logmath(ps_decoder_t *ps)
Get the log-math computation object for this decoder.
POCKETSPHINX_EXPORT char const * ps_get_uttid(ps_decoder_t *ps)
Get current utterance ID.
int acmod_start_utt(acmod_t *acmod)
Mark the start of an utterance.
POCKETSPHINX_EXPORT int32 ps_seg_prob(ps_seg_t *seg, int32 *out_ascr, int32 *out_lscr, int32 *out_lback)
Get language, acoustic, and posterior probabilities from a segmentation iterator. ...
POCKETSPHINX_EXPORT char const * ps_get_hyp_final(ps_decoder_t *ps, int32 *out_is_final)
Get hypothesis string and final flag.
POCKETSPHINX_EXPORT fsg_set_t * ps_get_fsgset(ps_decoder_t *ps)
Get the finite-state grammar set object for this decoder.
s3wid_t dict_add_word(dict_t *d, char const *word, s3cipid_t const *p, int32 np)
Add a word with the given ciphone pronunciation list to the dictionary.
bin_mdef_t * mdef
Model definition.
ps_latlink_t * last_link
Final link in best path.
dict2pid_t * dict2pid_build(bin_mdef_t *mdef, dict_t *dict)
Build the dict2pid structure for the given model/dictionary.
acmod_t * acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
Initialize an acoustic model.
frame_idx_t n_feat_frame
Number of frames active in feat_buf.
POCKETSPHINX_EXPORT fe_t * ps_get_fe(ps_decoder_t *ps)
Get the feature extraction object for this decoder.
V-table for search algorithm.
ps_search_t * pls
Phoneme loop for lookahead.
ps_seg_t * ps_astar_seg_iter(ps_astar_t *astar, ps_latpath_t *path, float32 lwf)
Get hypothesis segmentation from A* search.
ps_lattice_t * dag
Current hypothesis word graph.
POCKETSPHINX_EXPORT ps_decoder_t * ps_init(cmd_ln_t *config)
Initialize the decoder from a configuration object.
Base structure for hypothesis segmentation iterator.
cmd_ln_t * config
Configuration.
#define dict_size(d)
Packaged macro access to dictionary members.
int32 score
Exact score from start node up to node->sf.
POCKETSPHINX_EXPORT int ps_lattice_free(ps_lattice_t *dag)
Free a lattice.
POCKETSPHINX_EXPORT int ps_get_n_frames(ps_decoder_t *ps)
Get the number of frames of data searched.
Acoustic model structure.
Building composite triphone (as well as word internal triphones) with the dictionary.
void ps_search_deinit(ps_search_t *search)
De-initialize base structure.
int acmod_set_grow(acmod_t *acmod, int grow_feat)
Set memory allocation policy for utterance processing.
POCKETSPHINX_EXPORT char const * ps_seg_word(ps_seg_t *seg)
Get word string from a segmentation iterator.
uint32 uttno
Utterance counter.
frame_idx_t sf
Start frame.
int pl_window
Window size for phoneme lookahead.
POCKETSPHINX_EXPORT char const * ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score, char const **out_uttid)
Get hypothesis string and path score.