PocketSphinx  0.6
fsg_lextree.h
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  *
19  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
20  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
23  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * ====================================================================
32  *
33  */
34 /*
35  * fsg_lextree.h -- The collection of all the lextrees for the entire FSM.
36  *
37  * **********************************************
38  * CMU ARPA Speech Project
39  *
40  * Copyright (c) 2004 Carnegie Mellon University.
41  * ALL RIGHTS RESERVED.
42  * **********************************************
43  *
44  * HISTORY
45  *
46  * $Log: fsg_lextree.h,v $
47  * Revision 1.1.1.1 2006/05/23 18:45:02 dhuggins
48  * re-importation
49  *
50  * Revision 1.1 2004/07/16 00:57:12 egouvea
51  * Added Ravi's implementation of FSG support.
52  *
53  * Revision 1.3 2004/06/23 20:32:16 rkm
54  * *** empty log message ***
55  *
56  * Revision 1.2 2004/05/27 14:22:57 rkm
57  * FSG cross-word triphones completed (but for single-phone words)
58  *
59  * Revision 1.1.1.1 2004/03/01 14:30:31 rkm
60  *
61  *
62  * Revision 1.1 2004/02/23 15:53:45 rkm
63  * Renamed from fst to fsg
64  *
65  * Revision 1.2 2004/02/19 21:16:54 rkm
66  * Added fsg_search.{c,h}
67  *
68  * Revision 1.1 2004/02/18 15:02:34 rkm
69  * Added fsg_lextree.{c,h}
70  *
71  *
72  * 18-Feb-2004 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
73  * Started.
74  */
75 
76 
77 #ifndef __S2_FSG_LEXTREE_H__
78 #define __S2_FSG_LEXTREE_H__
79 
80 /* SphinxBase headers. */
81 #include <sphinxbase/cmd_ln.h>
82 #include <sphinxbase/fsg_model.h>
83 
84 /* Local headers. */
85 #include "hmm.h"
86 #include "dict.h"
87 #include "dict2pid.h"
88 
89 /*
90  * **HACK-ALERT**!! Compile-time constant determining the size of the
91  * bitvector fsg_pnode_t.fsg_pnode_ctxt_t.bv. (See below.)
92  * But it makes memory allocation simpler and more efficient.
93  */
94 #define FSG_PNODE_CTXT_BVSZ 2
95 
96 typedef struct {
97  uint32 bv[FSG_PNODE_CTXT_BVSZ];
99 
100 
101 /*
102  * All transitions (words) out of any given FSG state represented are by a
103  * phonetic prefix lextree (except for epsilon or null transitions; they
104  * are not part of the lextree). Lextree leaf nodes represent individual
105  * FSG transitions, so no sharing is allowed at the leaf nodes. The FSG
106  * transition probs are distributed along the lextree: the prob at a node
107  * is the max of the probs of all leaf nodes (and, hence, FSG transitions)
108  * reachable from that node.
109  *
110  * To conserve memory, the underlying HMMs with state-level information are
111  * allocated only as needed. Root and leaf nodes must also account for all
112  * the possible phonetic contexts, with an independent HMM for each distinct
113  * context.
114  */
115 typedef struct fsg_pnode_s {
116  /*
117  * If this is not a leaf node, the first successor (child) node. Otherwise
118  * the parent FSG transition for which this is the leaf node (for figuring
119  * the FSG destination state, and word emitted by the transition). A node
120  * may have several children. The succ ptr gives just the first; the rest
121  * are linked via the sibling ptr below.
122  */
123  union {
124  struct fsg_pnode_s *succ;
125  fsg_link_t *fsglink;
126  } next;
127 
128  /*
129  * For simplicity of memory management (i.e., freeing the pnodes), all
130  * pnodes allocated for all transitions out of a state are maintained in a
131  * linear linked list through the alloc_next pointer.
132  */
133  struct fsg_pnode_s *alloc_next;
134 
135  /*
136  * The next node that is also a child of the parent of this node; NULL if
137  * none.
138  */
139  struct fsg_pnode_s *sibling;
140 
141  /*
142  * The transition (log) probability to be incurred upon transitioning to
143  * this node. (Transition probabilities are really associated with the
144  * transitions. But a lextree node has exactly one incoming transition.
145  * Hence, the prob can be associated with the node.)
146  * This is a logs2(prob) value, and includes the language weight.
147  */
148  int32 logs2prob;
149 
150  /*
151  * The root and leaf positions associated with any transition have to deal
152  * with multiple phonetic contexts. However, different contexts may result
153  * in the same SSID (senone-seq ID), and can share a single pnode with that
154  * SSID. But the pnode should track the set of context CI phones that share
155  * it. Hence the fsg_pnode_ctxt_t bit-vector set-representation. (For
156  * simplicity of implementation, its size is a compile-time constant for
157  * now.) Single phone words would need a 2-D array of context, but that's
158  * too expensive. For now, they simply use SIL as right context, so only
159  * the left context is properly modelled.
160  * (For word-internal phones, this field is unused, of course.)
161  */
162  fsg_pnode_ctxt_t ctxt;
163 
164  uint16 ci_ext; /* This node's CIphone as viewed externally (context) */
165  uint8 ppos; /* Phoneme position in pronunciation */
166  uint8 leaf; /* Whether this is a leaf node */
167 
168  /* HMM-state-level stuff here */
169  hmm_context_t *ctx;
170  hmm_t hmm;
171 } fsg_pnode_t;
172 
173 /* Access macros */
174 #define fsg_pnode_leaf(p) ((p)->leaf)
175 #define fsg_pnode_logs2prob(p) ((p)->logs2prob)
176 #define fsg_pnode_succ(p) ((p)->next.succ)
177 #define fsg_pnode_fsglink(p) ((p)->next.fsglink)
178 #define fsg_pnode_sibling(p) ((p)->sibling)
179 #define fsg_pnode_hmmptr(p) (&((p)->hmm))
180 #define fsg_pnode_ci_ext(p) ((p)->ci_ext)
181 #define fsg_pnode_ppos(p) ((p)->ppos)
182 #define fsg_pnode_leaf(p) ((p)->leaf)
183 #define fsg_pnode_ctxt(p) ((p)->ctxt)
184 
185 #define fsg_pnode_add_ctxt(p,c) ((p)->ctxt.bv[(c)>>5] |= (1 << ((c)&0x001f)))
186 
187 /*
188  * The following is macroized because its called very frequently
189  * ::: uint32 fsg_pnode_ctxt_sub (fsg_pnode_ctxt_t *src, fsg_pnode_ctxt_t *sub);
190  */
191 /*
192  * Subtract bitvector sub from bitvector src (src updated with the result).
193  * Return 0 if result is all 0, non-zero otherwise.
194  */
195 
196 #if (FSG_PNODE_CTXT_BVSZ == 1)
197  #define FSG_PNODE_CTXT_SUB(src,sub) \
198  ((src)->bv[0] = (~((sub)->bv[0]) & (src)->bv[0]))
199 #elif (FSG_PNODE_CTXT_BVSZ == 2)
200  #define FSG_PNODE_CTXT_SUB(src,sub) \
201  (((src)->bv[0] = (~((sub)->bv[0]) & (src)->bv[0])) | \
202  ((src)->bv[1] = (~((sub)->bv[1]) & (src)->bv[1])))
203 #elif (FSG_PNODE_CTXT_BVSZ == 4)
204  #define FSG_PNODE_CTXT_SUB(src,sub) \
205  (((src)->bv[0] = (~((sub)->bv[0]) & (src)->bv[0])) | \
206  ((src)->bv[1] = (~((sub)->bv[1]) & (src)->bv[1])) | \
207  ((src)->bv[2] = (~((sub)->bv[2]) & (src)->bv[2])) | \
208  ((src)->bv[3] = (~((sub)->bv[3]) & (src)->bv[3])))
209 #else
210  #define FSG_PNODE_CTXT_SUB(src,sub) fsg_pnode_ctxt_sub_generic((src),(sub))
211 #endif
212 
216 typedef struct fsg_lextree_s {
217  fsg_model_t *fsg;
223  /*
224  * Left and right CIphone sets for each state.
225  * Left context CIphones for a state S: If word W transitions into S, W's
226  * final CIphone is in S's {lc}. Words transitioning out of S must consider
227  * these left context CIphones.
228  * Similarly, right contexts for state S: If word W transitions out of S,
229  * W's first CIphone is in S's {rc}. Words transitioning into S must consider
230  * these right contexts.
231  *
232  * NOTE: Words may transition into and out of S INDIRECTLY, with intermediate
233  * null transitions.
234  * NOTE: Single-phone words are difficult; only SILENCE right context is
235  * modelled for them.
236  * NOTE: Non-silence filler phones aren't included in these sets. Filler
237  * words don't use context, and present the SILENCE phone as context to
238  * adjacent words.
239  */
240  int16 **lc;
241  int16 **rc;
243  fsg_pnode_t **root; /* root[s] = lextree representing all transitions
244  out of state s. Note that the "tree" for each
245  state is actually a collection of trees, linked
246  via fsg_pnode_t.sibling (root[s]->sibling) */
247  fsg_pnode_t **alloc_head; /* alloc_head[s] = head of linear list of all
248  pnodes allocated for state s */
249  int32 n_pnode; /* #HMM nodes in search structure */
250  int32 wip;
251  int32 pip;
252 } fsg_lextree_t;
253 
254 /* Access macros */
255 #define fsg_lextree_root(lt,s) ((lt)->root[s])
256 #define fsg_lextree_n_pnode(lt) ((lt)->n_pnode)
257 
261 fsg_lextree_t *fsg_lextree_init(fsg_model_t *fsg, dict_t *dict,
262  dict2pid_t *d2p,
263  bin_mdef_t *mdef, hmm_context_t *ctx,
264  int32 wip, int32 pip);
265 
270 
274 void fsg_lextree_dump(fsg_lextree_t *fsg, FILE *fh);
275 
280 
285 
290 
291 #endif
void fsg_lextree_free(fsg_lextree_t *lextree)
Free lextrees for an FSG.
Definition: fsg_lextree.c:284
hmm_context_t * ctx
HMM context structure.
Definition: fsg_lextree.h:218
Building triphones for a dictionary.
void fsg_pnode_add_all_ctxt(fsg_pnode_ctxt_t *ctxt)
Set all flags on in the given context bitvector.
Definition: fsg_lextree.c:326
An individual HMM among the HMM search space.
bin_mdef_t * mdef
Model definition (triphone mappings).
Definition: fsg_lextree.h:221
uint32 fsg_pnode_ctxt_sub_generic(fsg_pnode_ctxt_t *src, fsg_pnode_ctxt_t *sub)
Generic variant for arbitrary size.
Definition: fsg_lextree.c:334
void fsg_lextree_dump(fsg_lextree_t *lextree, FILE *fp)
Print an FSG lextree to a file for debugging.
Definition: fsg_lextree.c:271
Operations on dictionary.
Implementation of HMM base structure.
void fsg_psubtree_pnode_deactivate(fsg_pnode_t *pnode)
Mark the given pnode as inactive (for search).
Definition: fsg_lextree.c:831
dict_t * dict
Pronunciation dictionary for this FSG.
Definition: fsg_lextree.h:219
Shared information between a set of HMMs.
Collection of lextrees for an FSG.
Definition: fsg_lextree.h:216
a structure for a dictionary.
Definition: dict.h:79
int16 ** rc
Right context triphone mappings for FSG.
Definition: fsg_lextree.h:241
int16 ** lc
Left context triphone mappings for FSG.
Definition: fsg_lextree.h:240
fsg_lextree_t * fsg_lextree_init(fsg_model_t *fsg, dict_t *dict, dict2pid_t *d2p, bin_mdef_t *mdef, hmm_context_t *ctx, int32 wip, int32 pip)
Create, initialize, and return a new phonetic lextree for the given FSG.
Definition: fsg_lextree.c:213
fsg_model_t * fsg
The fsg for which this lextree is built.
Definition: fsg_lextree.h:217
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:148
dict2pid_t * d2p
Context-dependent phone mappings for this FSG.
Definition: fsg_lextree.h:220