PocketSphinx  0.6
pocketsphinx_internal.h
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2008 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
44 #ifndef __POCKETSPHINX_INTERNAL_H__
45 #define __POCKETSPHINX_INTERNAL_H__
46 
47 /* SphinxBase headers. */
48 #include <sphinxbase/cmd_ln.h>
49 #include <sphinxbase/logmath.h>
50 #include <sphinxbase/fe.h>
51 #include <sphinxbase/feat.h>
52 #include <sphinxbase/profile.h>
53 
54 /* Local headers. */
55 #include "pocketsphinx.h"
56 #include "acmod.h"
57 #include "dict.h"
58 #include "dict2pid.h"
59 
63 typedef struct ps_search_s ps_search_t;
64 
68 typedef struct ps_searchfuncs_s {
69  char const *name;
70 
71  int (*start)(ps_search_t *search);
72  int (*step)(ps_search_t *search, int frame_idx);
73  int (*finish)(ps_search_t *search);
74  int (*reinit)(ps_search_t *search, dict_t *dict, dict2pid_t *d2p);
75  void (*free)(ps_search_t *search);
76 
77  ps_lattice_t *(*lattice)(ps_search_t *search);
78  char const *(*hyp)(ps_search_t *search, int32 *out_score, int32 *out_is_final);
79  int32 (*prob)(ps_search_t *search);
80  ps_seg_t *(*seg_iter)(ps_search_t *search, int32 *out_score);
82 
86 struct ps_search_s {
89  cmd_ln_t *config;
93  char *hyp_str;
96  int32 post;
97  int32 n_words;
100  /* Magical word IDs that must exist in the dictionary: */
101  int32 start_wid;
102  int32 silence_wid;
103  int32 finish_wid;
104 };
105 
106 #define ps_search_base(s) ((ps_search_t *)s)
107 #define ps_search_config(s) ps_search_base(s)->config
108 #define ps_search_acmod(s) ps_search_base(s)->acmod
109 #define ps_search_dict(s) ps_search_base(s)->dict
110 #define ps_search_dict2pid(s) ps_search_base(s)->d2p
111 #define ps_search_dag(s) ps_search_base(s)->dag
112 #define ps_search_last_link(s) ps_search_base(s)->last_link
113 #define ps_search_post(s) ps_search_base(s)->post
114 #define ps_search_lookahead(s) ps_search_base(s)->pls
115 #define ps_search_n_words(s) ps_search_base(s)->n_words
116 
117 #define ps_search_name(s) ps_search_base(s)->vt->name
118 #define ps_search_start(s) (*(ps_search_base(s)->vt->start))(s)
119 #define ps_search_step(s,i) (*(ps_search_base(s)->vt->step))(s,i)
120 #define ps_search_finish(s) (*(ps_search_base(s)->vt->finish))(s)
121 #define ps_search_reinit(s,d,d2p) (*(ps_search_base(s)->vt->reinit))(s,d,d2p)
122 #define ps_search_free(s) (*(ps_search_base(s)->vt->free))(s)
123 #define ps_search_lattice(s) (*(ps_search_base(s)->vt->lattice))(s)
124 #define ps_search_hyp(s,sc,final) (*(ps_search_base(s)->vt->hyp))(s,sc,final)
125 #define ps_search_prob(s) (*(ps_search_base(s)->vt->prob))(s)
126 #define ps_search_seg_iter(s,sc) (*(ps_search_base(s)->vt->seg_iter))(s,sc)
127 
128 /* For convenience... */
129 #define ps_search_silence_wid(s) ps_search_base(s)->silence_wid
130 #define ps_search_start_wid(s) ps_search_base(s)->start_wid
131 #define ps_search_finish_wid(s) ps_search_base(s)->finish_wid
132 
137  cmd_ln_t *config, acmod_t *acmod, dict_t *dict,
138  dict2pid_t *d2p);
139 
143 void ps_search_base_reinit(ps_search_t *search, dict_t *dict,
144  dict2pid_t *d2p);
145 
149 void ps_search_deinit(ps_search_t *search);
150 
151 typedef struct ps_segfuncs_s {
152  ps_seg_t *(*seg_next)(ps_seg_t *seg);
153  void (*seg_free)(ps_seg_t *seg);
154 } ps_segfuncs_t;
155 
159 struct ps_seg_s {
162  char const *word;
165  int32 ascr;
166  int32 lscr;
167  int32 prob;
168  /* This doesn't need to be 32 bits, so once the scores above are
169  * reduced to 16 bits (or less!), this will be too. */
170  int32 lback;
171  /* Not sure if this should be here at all. */
172  float32 lwf;
173 };
174 
175 #define ps_search_seg_next(seg) (*(seg->vt->seg_next))(seg)
176 #define ps_search_seg_free(s) (*(seg->vt->seg_free))(seg)
177 
178 
182 struct ps_decoder_s {
183  /* Model parameters and such. */
184  cmd_ln_t *config;
185  int refcount;
187  /* Basic units of computation. */
191  logmath_t *lmath;
193  /* Search modules. */
194  glist_t searches;
195  /* TODO: Convert this to a stack of searches each with their own
196  * lookahead value. */
199  int pl_window;
201  /* Utterance-processing related stuff. */
202  uint32 uttno;
203  char *uttid;
204  ptmr_t perf;
205  uint32 n_frame;
206  char const *mfclogdir;
207  char const *rawlogdir;
208  char const *senlogdir;
209 };
210 
211 #endif /* __POCKETSPHINX_INTERNAL_H__ */
ptmr_t perf
Performance counter for all of decoding.
Building triphones for a dictionary.
Base structure for search module.
dict_t * dict
Pronunciation dictionary.
int32 silence_wid
Silence word ID.
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
Re-initialize base structure with new dictionary.
acmod_t * acmod
Acoustic model.
Main header file for the PocketSphinx decoder.
ps_segfuncs_t * vt
V-table of seg methods.
int32 finish_wid
Finish word ID.
int32 lscr
Language model score.
int32 n_words
Number of words known to search (may be less than in the dictionary)
Operations on dictionary.
struct ps_searchfuncs_s ps_searchfuncs_t
V-table for search algorithm.
char const * mfclogdir
Log directory for MFCC files.
int refcount
Reference count.
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
int32 prob
Log posterior probability.
char const * word
Word string (pointer into dictionary hash)
acmod_t * acmod
Acoustic model.
ps_search_t * search
Search object from whence this came.
ps_search_t * search
Currently active search module.
logmath_t * lmath
Log math computation.
int32 start_wid
Start word ID.
char * uttid
Utterance ID for current utterance.
dict2pid_t * d2p
Dictionary to senone mappings.
Decoder object.
char const * rawlogdir
Log directory for audio files.
frame_idx_t ef
End frame.
int32 ascr
Acoustic score.
cmd_ln_t * config
Configuration.
glist_t searches
List of search modules.
ps_search_t * phone_loop
Phone loop search for lookahead.
a structure for a dictionary.
Definition: dict.h:79
Word graph structure used in bestpath/nbest search.
char const * senlogdir
Log directory for senone score files.
ps_searchfuncs_t * vt
V-table of search methods.
uint32 n_frame
Total number of frames processed.
dict2pid_t * d2p
Dictionary to senone mapping.
int32 post
Utterance posterior probability.
char * hyp_str
Current hypothesis string.
dict_t * dict
Pronunciation dictionary.
int32 frame_idx_t
Type for frame index values.
Definition: hmm.h:68
int32 lback
Language model backoff.
ps_latlink_t * last_link
Final link in best path.
V-table for search algorithm.
ps_search_t * pls
Phoneme loop for lookahead.
ps_lattice_t * dag
Current hypothesis word graph.
Acoustic model structures for PocketSphinx.
Base structure for hypothesis segmentation iterator.
cmd_ln_t * config
Configuration.
Acoustic model structure.
Definition: acmod.h:148
float32 lwf
Language weight factor (for second-pass searches)
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:148
void ps_search_deinit(ps_search_t *search)
De-initialize base structure.
uint32 uttno
Utterance counter.
frame_idx_t sf
Start frame.
int pl_window
Window size for phoneme lookahead.