PocketSphinx  0.6
cmdln_macro.h
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2006 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 /* cmdln_macro.h - Command line definitions for PocketSphinx */
39 
40 #ifndef __PS_CMDLN_MACRO_H__
41 #define __PS_CMDLN_MACRO_H__
42 
43 #include <sphinxbase/cmd_ln.h>
44 #include <sphinxbase/feat.h>
45 #include <sphinxbase/fe.h>
46 
48 #define POCKETSPHINX_OPTIONS \
49  waveform_to_cepstral_command_line_macro(), \
50  cepstral_to_feature_command_line_macro(), \
51  POCKETSPHINX_ACMOD_OPTIONS, \
52  POCKETSPHINX_BEAM_OPTIONS, \
53  POCKETSPHINX_SEARCH_OPTIONS, \
54  POCKETSPHINX_DICT_OPTIONS, \
55  POCKETSPHINX_NGRAM_OPTIONS, \
56  POCKETSPHINX_FSG_OPTIONS, \
57  POCKETSPHINX_DEBUG_OPTIONS
58 
60 #define POCKETSPHINX_DEBUG_OPTIONS \
61  { "-logfn", \
62  ARG_STRING, \
63  NULL, \
64  "File to write log messages in" \
65  }, \
66  { "-debug", \
67  ARG_INT32, \
68  NULL, \
69  "Verbosity level for debugging messages" \
70  }, \
71  { "-mfclogdir", \
72  ARG_STRING, \
73  NULL, \
74  "Directory to log feature files to" \
75  }, \
76  { "-rawlogdir", \
77  ARG_STRING, \
78  NULL, \
79  "Directory to log raw audio files to" }, \
80  { "-senlogdir", \
81  ARG_STRING, \
82  NULL, \
83  "Directory to log senone score files to" \
84  }
85 
87 #define POCKETSPHINX_BEAM_OPTIONS \
88 { "-beam", \
89  ARG_FLOAT64, \
90  "1e-48", \
91  "Beam width applied to every frame in Viterbi search (smaller values mean wider beam)" }, \
92 { "-wbeam", \
93  ARG_FLOAT64, \
94  "7e-29", \
95  "Beam width applied to word exits" }, \
96 { "-pbeam", \
97  ARG_FLOAT64, \
98  "1e-48", \
99  "Beam width applied to phone transitions" }, \
100 { "-lpbeam", \
101  ARG_FLOAT64, \
102  "1e-40", \
103  "Beam width applied to last phone in words" }, \
104 { "-lponlybeam", \
105  ARG_FLOAT64, \
106  "7e-29", \
107  "Beam width applied to last phone in single-phone words" }, \
108 { "-fwdflatbeam", \
109  ARG_FLOAT64, \
110  "1e-64", \
111  "Beam width applied to every frame in second-pass flat search" }, \
112 { "-fwdflatwbeam", \
113  ARG_FLOAT64, \
114  "7e-29", \
115  "Beam width applied to word exits in second-pass flat search" }, \
116 { "-pl_window", \
117  ARG_INT32, \
118  "0", \
119  "Phoneme lookahead window size, in frames" }, \
120 { "-pl_beam", \
121  ARG_FLOAT64, \
122  "1e-10", \
123  "Beam width applied to phone loop search for lookahead" }, \
124 { "-pl_pbeam", \
125  ARG_FLOAT64, \
126  "1e-5", \
127  "Beam width applied to phone loop transitions for lookahead" }
128 
130 #define POCKETSPHINX_SEARCH_OPTIONS \
131 { "-compallsen", \
132  ARG_BOOLEAN, \
133  "no", \
134  "Compute all senone scores in every frame (can be faster when there are many senones)" }, \
135 { "-fwdtree", \
136  ARG_BOOLEAN, \
137  "yes", \
138  "Run forward lexicon-tree search (1st pass)" }, \
139 { "-fwdflat", \
140  ARG_BOOLEAN, \
141  "yes", \
142  "Run forward flat-lexicon search over word lattice (2nd pass)" }, \
143 { "-bestpath", \
144  ARG_BOOLEAN, \
145  "yes", \
146  "Run bestpath (Dijkstra) search over word lattice (3rd pass)" }, \
147 { "-backtrace", \
148  ARG_BOOLEAN, \
149  "no", \
150  "Print results and backtraces to log file." }, \
151 { "-latsize", \
152  ARG_INT32, \
153  "5000", \
154  "Initial backpointer table size" }, \
155 { "-maxwpf", \
156  ARG_INT32, \
157  "-1", \
158  "Maximum number of distinct word exits at each frame (or -1 for no pruning)" }, \
159 { "-maxhmmpf", \
160  ARG_INT32, \
161  "-1", \
162  "Maximum number of active HMMs to maintain at each frame (or -1 for no pruning)" }, \
163 { "-min_endfr", \
164  ARG_INT32, \
165  "0", \
166  "Nodes ignored in lattice construction if they persist for fewer than N frames" }, \
167 { "-fwdflatefwid", \
168  ARG_INT32, \
169  "4", \
170  "Minimum number of end frames for a word to be searched in fwdflat search" }, \
171 { "-fwdflatsfwin", \
172  ARG_INT32, \
173  "25", \
174  "Window of frames in lattice to search for successor words in fwdflat search " }
175 
177 #define POCKETSPHINX_FSG_OPTIONS \
178  { "-fsg", \
179  ARG_STRING, \
180  NULL, \
181  "Sphinx format finite state grammar file"}, \
182 { "-jsgf", \
183  ARG_STRING, \
184  NULL, \
185  "JSGF grammar file" }, \
186 { "-toprule", \
187  ARG_STRING, \
188  NULL, \
189  "Start rule for JSGF (first public rule is default)" }, \
190 { "-fsgusealtpron", \
191  ARG_BOOLEAN, \
192  "yes", \
193  "Add alternate pronunciations to FSG"}, \
194 { "-fsgusefiller", \
195  ARG_BOOLEAN, \
196  "yes", \
197  "Insert filler words at each state."}
198 
200 #define POCKETSPHINX_NGRAM_OPTIONS \
201 { "-lm", \
202  ARG_STRING, \
203  NULL, \
204  "Word trigram language model input file" }, \
205 { "-lmctl", \
206  ARG_STRING, \
207  NULL, \
208  "Specify a set of language model\n"}, \
209 { "-lmname", \
210  ARG_STRING, \
211  "default", \
212  "Which language model in -lmctl to use by default"}, \
213 { "-lw", \
214  ARG_FLOAT32, \
215  "6.5", \
216  "Language model probability weight" }, \
217 { "-fwdflatlw", \
218  ARG_FLOAT32, \
219  "8.5", \
220  "Language model probability weight for flat lexicon (2nd pass) decoding" }, \
221 { "-bestpathlw", \
222  ARG_FLOAT32, \
223  "9.5", \
224  "Language model probability weight for bestpath search" }, \
225 { "-ascale", \
226  ARG_FLOAT32, \
227  "20.0", \
228  "Inverse of acoustic model scale for confidence score calculation" }, \
229 { "-wip", \
230  ARG_FLOAT32, \
231  "0.65", \
232  "Word insertion penalty" }, \
233 { "-nwpen", \
234  ARG_FLOAT32, \
235  "1.0", \
236  "New word transition penalty" }, \
237 { "-pip", \
238  ARG_FLOAT32, \
239  "1.0", \
240  "Phone insertion penalty" }, \
241 { "-uw", \
242  ARG_FLOAT32, \
243  "1.0", \
244  "Unigram weight" }, \
245 { "-silprob", \
246  ARG_FLOAT32, \
247  "0.005", \
248  "Silence word transition probability" }, \
249 { "-fillprob", \
250  ARG_FLOAT32, \
251  "1e-8", \
252  "Filler word transition probability" }, \
253 { "-bghist", \
254  ARG_BOOLEAN, \
255  "no", \
256  "Bigram-mode: If TRUE only one BP entry/frame; else one per LM state" }, \
257 { "-lextreedump", \
258  ARG_INT32, \
259  "0", \
260  "Whether to dump the lextree structure to stderr (for debugging), 1 for Ravi's format, 2 for Dot format, Larger than 2 will be treated as Ravi's format" }
261 
263 #define POCKETSPHINX_DICT_OPTIONS \
264  { "-dict", \
265  REQARG_STRING, \
266  NULL, \
267  "Main pronunciation dictionary (lexicon) input file" }, \
268  { "-fdict", \
269  ARG_STRING, \
270  NULL, \
271  "Noise word pronunciation dictionary input file" }, \
272  { "-dictcase", \
273  ARG_BOOLEAN, \
274  "no", \
275  "Dictionary is case sensitive (NOTE: case insensitivity applies to ASCII characters only)" }, \
276  { "-maxnewoov", \
277  ARG_INT32, \
278  "20", \
279  "Maximum new OOVs that can be added at run time" }, \
280  { "-usewdphones", \
281  ARG_BOOLEAN, \
282  "no", \
283  "Use within-word phones only" }
284 
286 #define POCKETSPHINX_ACMOD_OPTIONS \
287 { "-hmm", \
288  ARG_STRING, \
289  NULL, \
290  "Directory containing acoustic model files."}, \
291 { "-featparams", \
292  ARG_STRING, \
293  NULL, \
294  "File containing feature extraction parameters."}, \
295 { "-mdef", \
296  ARG_STRING, \
297  NULL, \
298  "Model definition input file" }, \
299 { "-senmgau", \
300  ARG_STRING, \
301  NULL, \
302  "Senone to codebook mapping input file (usually not needed)" }, \
303 { "-tmat", \
304  ARG_STRING, \
305  NULL, \
306  "HMM state transition matrix input file" }, \
307 { "-tmatfloor", \
308  ARG_FLOAT32, \
309  "0.0001", \
310  "HMM state transition probability floor (applied to -tmat file)" }, \
311 { "-mean", \
312  ARG_STRING, \
313  NULL, \
314  "Mixture gaussian means input file" }, \
315 { "-var", \
316  ARG_STRING, \
317  NULL, \
318  "Mixture gaussian variances input file" }, \
319 { "-varfloor", \
320  ARG_FLOAT32, \
321  "0.0001", \
322  "Mixture gaussian variance floor (applied to data from -var file)" }, \
323 { "-mixw", \
324  ARG_STRING, \
325  NULL, \
326  "Senone mixture weights input file (uncompressed)" }, \
327 { "-mixwfloor", \
328  ARG_FLOAT32, \
329  "0.0000001", \
330  "Senone mixture weights floor (applied to data from -mixw file)" }, \
331 { "-aw", \
332  ARG_INT32, \
333  "1", \
334  "Inverse weight applied to acoustic scores." }, \
335 { "-sendump", \
336  ARG_STRING, \
337  NULL, \
338  "Senone dump (compressed mixture weights) input file" }, \
339 { "-mllr", \
340  ARG_STRING, \
341  NULL, \
342  "MLLR transformation to apply to means and variances" }, \
343 { "-mmap", \
344  ARG_BOOLEAN, \
345  "yes", \
346  "Use memory-mapped I/O (if possible) for model files" }, \
347 { "-ds", \
348  ARG_INT32, \
349  "1", \
350  "Frame GMM computation downsampling ratio" }, \
351 { "-topn", \
352  ARG_INT32, \
353  "4", \
354  "Maximum number of top Gaussians to use in scoring." }, \
355 { "-topn_beam", \
356  ARG_STRING, \
357  "0", \
358  "Beam width used to determine top-N Gaussians (or a list, per-feature)" },\
359 { "-kdtree", \
360  ARG_STRING, \
361  NULL, \
362  "kd-Tree file for Gaussian selection" }, \
363 { "-kdmaxdepth", \
364  ARG_INT32, \
365  "0", \
366  "Maximum depth of kd-Trees to use" }, \
367 { "-kdmaxbbi", \
368  ARG_INT32, \
369  "-1", \
370  "Maximum number of Gaussians per leaf node in kd-Trees" }, \
371 { "-logbase", \
372  ARG_FLOAT32, \
373  "1.0001", \
374  "Base in which all log-likelihoods calculated" }
375 
376 #define CMDLN_EMPTY_OPTION { NULL, 0, NULL, NULL }
377 
378 #endif /* __PS_CMDLN_MACRO_H__ */