PocketSphinx  0.6
fsg_lextree.c
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2010 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  *
19  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
20  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
23  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * ====================================================================
32  *
33  */
41 /* System headers. */
42 #include <stdio.h>
43 #include <string.h>
44 #include <assert.h>
45 
46 /* SphinxBase headers. */
47 #include <sphinxbase/ckd_alloc.h>
48 #include <sphinxbase/err.h>
49 
50 /* Local headers. */
51 #include "fsg_lextree.h"
52 
53 #define __FSG_DBG__ 0
54 
55 /* A linklist structure that is actually used to build local lextrees at grammar nodes */
56 typedef struct fsg_glist_linklist_t {
57  int32 ci, rc;
58  glist_t glist;
59  struct fsg_glist_linklist_t *next;
61 
68 static fsg_pnode_t *fsg_psubtree_init(fsg_lextree_t *tree,
69  fsg_model_t *fsg,
70  int32 from_state,
71  fsg_pnode_t **alloc_head);
72 
77 static void fsg_psubtree_free(fsg_pnode_t *alloc_head);
78 
83 static void fsg_psubtree_dump(fsg_lextree_t *tree, fsg_pnode_t *root, FILE *fp);
84 
88 static void
89 fsg_lextree_lc_rc(fsg_lextree_t *lextree)
90 {
91  int32 s, i, j;
92  int32 n_ci;
93  fsg_model_t *fsg;
94  int32 silcipid;
95  int32 len;
96 
97  silcipid = bin_mdef_silphone(lextree->mdef);
98  assert(silcipid >= 0);
99  n_ci = bin_mdef_n_ciphone(lextree->mdef);
100 
101  fsg = lextree->fsg;
102  /*
103  * lextree->lc[s] = set of left context CIphones for state s. Similarly, rc[s]
104  * for right context CIphones.
105  */
106  lextree->lc = ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(**lextree->lc));
107  lextree->rc = ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(**lextree->rc));
108  E_INFO("Allocated %d bytes (%d KiB) for left and right context phones\n",
109  fsg->n_state * (n_ci + 1) * 2,
110  fsg->n_state * (n_ci + 1) * 2 / 1024);
111 
112 
113  for (s = 0; s < fsg->n_state; s++) {
114  fsg_arciter_t *itor;
115  for (itor = fsg_model_arcs(fsg, s); itor; itor = fsg_arciter_next(itor)) {
116  fsg_link_t *l = fsg_arciter_get(itor);
117  int32 dictwid;
119  if (fsg_link_wid(l) >= 0) {
120  dictwid = dict_wordid(lextree->dict,
121  fsg_model_word_str(lextree->fsg, l->wid));
122 
123  /*
124  * Add the first CIphone of l->wid to the rclist of state s, and
125  * the last CIphone to lclist of state d.
126  * (Filler phones are a pain to deal with. There is no direct
127  * marking of a filler phone; but only filler words are supposed to
128  * use such phones, so we use that fact. HACK!! FRAGILE!!)
129  */
130  if (fsg_model_is_filler(fsg, fsg_link_wid(l))) {
131  /* Filler phone; use silence phone as context */
132  lextree->rc[fsg_link_from_state(l)][silcipid] = 1;
133  lextree->lc[fsg_link_to_state(l)][silcipid] = 1;
134  }
135  else {
136  len = dict_pronlen(lextree->dict, dictwid);
137  lextree->rc[fsg_link_from_state(l)][dict_pron(lextree->dict, dictwid, 0)] = 1;
138  lextree->lc[fsg_link_to_state(l)][dict_pron(lextree->dict, dictwid, len - 1)] = 1;
139  }
140  }
141  }
142  }
143 
144  for (s = 0; s < fsg->n_state; s++) {
145  /*
146  * Add SIL phone to the lclist and rclist of each state. Strictly
147  * speaking, only needed at start and final states, respectively, but
148  * all states considered since the user may change the start and final
149  * states. In any case, most applications would have a silence self
150  * loop at each state, hence these would be needed anyway.
151  */
152  lextree->lc[s][silcipid] = 1;
153  lextree->rc[s][silcipid] = 1;
154  }
155 
156  /*
157  * Propagate lc and rc lists past null transitions. (Since FSG contains
158  * null transitions closure, no need to worry about a chain of successive
159  * null transitions. Right??)
160  *
161  * This can't be joined with the previous loop because we first calculate
162  * contexts and only then we can propagate them.
163  */
164  for (s = 0; s < fsg->n_state; s++) {
165  fsg_arciter_t *itor;
166  for (itor = fsg_model_arcs(fsg, s); itor; itor = fsg_arciter_next(itor)) {
167  fsg_link_t *l = fsg_arciter_get(itor);
168  if (fsg_link_wid(l) < 0) {
169 
170  /*
171  * lclist(d) |= lclist(s), because all the words ending up at s, can
172  * now also end at d, becoming the left context for words leaving d.
173  */
174  for (i = 0; i < n_ci; i++)
175  lextree->lc[fsg_link_to_state(l)][i] |= lextree->lc[fsg_link_from_state(l)][i];
176  /*
177  * Similarly, rclist(s) |= rclist(d), because all the words leaving d
178  * can equivalently leave s, becoming the right context for words
179  * ending up at s.
180  */
181  for (i = 0; i < n_ci; i++)
182  lextree->rc[fsg_link_from_state(l)][i] |= lextree->rc[fsg_link_to_state(l)][i];
183  }
184  }
185  }
186 
187  /* Convert the bit-vector representation into a list */
188  for (s = 0; s < fsg->n_state; s++) {
189  j = 0;
190  for (i = 0; i < n_ci; i++) {
191  if (lextree->lc[s][i]) {
192  lextree->lc[s][j] = i;
193  j++;
194  }
195  }
196  lextree->lc[s][j] = -1; /* Terminate the list */
197 
198  j = 0;
199  for (i = 0; i < n_ci; i++) {
200  if (lextree->rc[s][i]) {
201  lextree->rc[s][j] = i;
202  j++;
203  }
204  }
205  lextree->rc[s][j] = -1; /* Terminate the list */
206  }
207 }
208 
209 /*
210  * For now, allocate the entire lextree statically.
211  */
213 fsg_lextree_init(fsg_model_t * fsg, dict_t *dict, dict2pid_t *d2p,
214  bin_mdef_t *mdef, hmm_context_t *ctx,
215  int32 wip, int32 pip)
216 {
217  int32 s, n_leaves;
218  fsg_lextree_t *lextree;
219  fsg_pnode_t *pn;
220 
221  lextree = ckd_calloc(1, sizeof(fsg_lextree_t));
222  lextree->fsg = fsg;
223  lextree->root = ckd_calloc(fsg_model_n_state(fsg),
224  sizeof(fsg_pnode_t *));
225  lextree->alloc_head = ckd_calloc(fsg_model_n_state(fsg),
226  sizeof(fsg_pnode_t *));
227  lextree->ctx = ctx;
228  lextree->dict = dict;
229  lextree->d2p = d2p;
230  lextree->mdef = mdef;
231  lextree->wip = wip;
232  lextree->pip = pip;
233 
234  /* Compute lc and rc for fsg. */
235  fsg_lextree_lc_rc(lextree);
236 
237  /* Create lextree for each state, i.e. an HMM network that
238  * represents words for all arcs exiting that state. Note that
239  * for a dense grammar such as an N-gram model, this will
240  * rapidly exhaust all available memory. */
241  lextree->n_pnode = 0;
242  n_leaves = 0;
243  for (s = 0; s < fsg_model_n_state(fsg); s++) {
244  lextree->root[s] =
245  fsg_psubtree_init(lextree, fsg, s, &(lextree->alloc_head[s]));
246 
247  for (pn = lextree->alloc_head[s]; pn; pn = pn->alloc_next) {
248  lextree->n_pnode++;
249  if (pn->leaf)
250  ++n_leaves;
251  }
252  }
253  E_INFO("%d HMM nodes in lextree (%d leaves)\n",
254  lextree->n_pnode, n_leaves);
255  E_INFO("Allocated %d bytes (%d KiB) for all lextree nodes\n",
256  lextree->n_pnode * sizeof(fsg_pnode_t),
257  lextree->n_pnode * sizeof(fsg_pnode_t) / 1024);
258  E_INFO("Allocated %d bytes (%d KiB) for lextree leafnodes\n",
259  n_leaves * sizeof(fsg_pnode_t),
260  n_leaves * sizeof(fsg_pnode_t) / 1024);
261 
262 #if __FSG_DBG__
263  fsg_lextree_dump(lextree, stdout);
264 #endif
265 
266  return lextree;
267 }
268 
269 
270 void
271 fsg_lextree_dump(fsg_lextree_t * lextree, FILE * fp)
272 {
273  int32 s;
274 
275  for (s = 0; s < fsg_model_n_state(lextree->fsg); s++) {
276  fprintf(fp, "State %5d root %p\n", s, lextree->root[s]);
277  fsg_psubtree_dump(lextree, lextree->root[s], fp);
278  }
279  fflush(fp);
280 }
281 
282 
283 void
285 {
286  int32 s;
287 
288  if (lextree == NULL)
289  return;
290 
291  if (lextree->fsg)
292  for (s = 0; s < fsg_model_n_state(lextree->fsg); s++)
293  fsg_psubtree_free(lextree->alloc_head[s]);
294 
295  ckd_free_2d(lextree->lc);
296  ckd_free_2d(lextree->rc);
297  ckd_free(lextree->root);
298  ckd_free(lextree->alloc_head);
299  ckd_free(lextree);
300 }
301 
302 /******************************
303  * psubtree stuff starts here *
304  ******************************/
305 
306 void fsg_glist_linklist_free(fsg_glist_linklist_t *glist)
307 {
308  if (glist) {
309  fsg_glist_linklist_t *nxtglist;
310  if (glist->glist)
311  glist_free(glist->glist);
312  nxtglist = glist->next;
313  while (nxtglist) {
314  ckd_free(glist);
315  glist = nxtglist;
316  if (glist->glist)
317  glist_free(glist->glist);
318  nxtglist = glist->next;
319  }
320  ckd_free(glist);
321  }
322  return;
323 }
324 
325 void
327 {
328  int32 i;
329 
330  for (i = 0; i < FSG_PNODE_CTXT_BVSZ; i++)
331  ctxt->bv[i] = 0xffffffff;
332 }
333 
335 {
336  int32 i;
337  uint32 res = 0;
338 
339  for (i = 0; i < FSG_PNODE_CTXT_BVSZ; i++)
340  res |= (src->bv[i] = ~(sub->bv[i]) & src->bv[i]);
341  return res;
342 }
343 
344 
345 /*
346  * fsg_pnode_ctxt_sub(fsg_pnode_ctxt_t * src, fsg_pnode_ctxt_t * sub)
347  * This has been moved into a macro in fsg_psubtree.h
348  * because it is called so frequently!
349  */
350 
351 
352 /*
353  * Add the word emitted by the given transition (fsglink) to the given lextree
354  * (rooted at root), and return the new lextree root. (There may actually be
355  * several root nodes, maintained in a linked list via fsg_pnode_t.sibling.
356  * "root" is the head of this list.)
357  * lclist, rclist: sets of left and right context phones for this link.
358  * alloc_head: head of a linear list of all allocated pnodes for the parent
359  * FSG state, kept elsewhere and updated by this routine.
360  */
361 static fsg_pnode_t *
362 psubtree_add_trans(fsg_lextree_t *lextree,
363  fsg_pnode_t * root,
364  fsg_glist_linklist_t **curglist,
365  fsg_link_t * fsglink,
366  int16 *lclist, int16 *rclist,
367  fsg_pnode_t ** alloc_head)
368 {
369  int32 silcipid; /* Silence CI phone ID */
370  int32 pronlen; /* Pronunciation length */
371  int32 wid; /* FSG (not dictionary!!) word ID */
372  int32 dictwid; /* Dictionary (not FSG!!) word ID */
373  int32 ssid; /* Senone Sequence ID */
374  int32 tmatid;
375  gnode_t *gn;
376  fsg_pnode_t *pnode, *pred, *head;
377  int32 n_ci, p, lc, rc;
378  glist_t lc_pnodelist; /* Temp pnodes list for different left contexts */
379  glist_t rc_pnodelist; /* Temp pnodes list for different right contexts */
380  int32 i, j;
381  int n_lc_alloc = 0, n_int_alloc = 0, n_rc_alloc = 0;
382 
383  silcipid = bin_mdef_silphone(lextree->mdef);
384  n_ci = bin_mdef_n_ciphone(lextree->mdef);
385 
386  wid = fsg_link_wid(fsglink);
387  assert(wid >= 0); /* Cannot be a null transition */
388  dictwid = dict_wordid(lextree->dict,
389  fsg_model_word_str(lextree->fsg, wid));
390  pronlen = dict_pronlen(lextree->dict, dictwid);
391  assert(pronlen >= 1);
392 
393  assert(lclist[0] >= 0); /* At least one phonetic context provided */
394  assert(rclist[0] >= 0);
395 
396  head = *alloc_head;
397  pred = NULL;
398 
399  if (pronlen == 1) { /* Single-phone word */
400  int ci = dict_first_phone(lextree->dict, dictwid);
401  /* Only non-filler words are mpx */
402  if (dict_filler_word(lextree->dict, dictwid)) {
403  /*
404  * Left diphone ID for single-phone words already assumes SIL is right
405  * context; only left contexts need to be handled.
406  */
407  lc_pnodelist = NULL;
408 
409  for (i = 0; lclist[i] >= 0; i++) {
410  lc = lclist[i];
411  ssid = dict2pid_lrdiph_rc(lextree->d2p, ci, lc, silcipid);
412  tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_first_phone(lextree->dict, dictwid));
413  /* Check if this ssid already allocated for some other context */
414  for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) {
415  pnode = (fsg_pnode_t *) gnode_ptr(gn);
416 
417  if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) {
418  /* already allocated; share it for this context phone */
419  fsg_pnode_add_ctxt(pnode, lc);
420  break;
421  }
422  }
423 
424  if (!gn) { /* ssid not already allocated */
425  pnode =
426  (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t));
427  pnode->ctx = lextree->ctx;
428  pnode->next.fsglink = fsglink;
429  pnode->logs2prob =
430  (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT)
431  + lextree->wip + lextree->pip;
432  pnode->ci_ext = dict_first_phone(lextree->dict, dictwid);
433  pnode->ppos = 0;
434  pnode->leaf = TRUE;
435  pnode->sibling = root; /* All root nodes linked together */
436  fsg_pnode_add_ctxt(pnode, lc); /* Initially zeroed by calloc above */
437  pnode->alloc_next = head;
438  head = pnode;
439  root = pnode;
440  ++n_lc_alloc;
441 
442  hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid);
443 
444  lc_pnodelist =
445  glist_add_ptr(lc_pnodelist, (void *) pnode);
446  }
447  }
448 
449  glist_free(lc_pnodelist);
450  }
451  else { /* Filler word; no context modelled */
452  ssid = bin_mdef_pid2ssid(lextree->mdef, ci); /* probably the same... */
453  tmatid = bin_mdef_pid2tmatid(lextree->mdef, ci);
454 
455  pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t));
456  pnode->ctx = lextree->ctx;
457  pnode->next.fsglink = fsglink;
458  pnode->logs2prob = (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT)
459  + lextree->wip + lextree->pip;
460  pnode->ci_ext = silcipid; /* Presents SIL as context to neighbors */
461  pnode->ppos = 0;
462  pnode->leaf = TRUE;
463  pnode->sibling = root;
464  fsg_pnode_add_all_ctxt(&(pnode->ctxt));
465  pnode->alloc_next = head;
466  head = pnode;
467  root = pnode;
468  ++n_int_alloc;
469 
470  hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid);
471  }
472  }
473  else { /* Multi-phone word */
474  fsg_pnode_t **ssid_pnode_map; /* Temp array of ssid->pnode mapping */
475  ssid_pnode_map =
476  (fsg_pnode_t **) ckd_calloc(n_ci, sizeof(fsg_pnode_t *));
477  lc_pnodelist = NULL;
478  rc_pnodelist = NULL;
479 
480  for (p = 0; p < pronlen; p++) {
481  int ci = dict_pron(lextree->dict, dictwid, p);
482  if (p == 0) { /* Root phone, handle required left contexts */
483  /* Find if we already have an lc_pnodelist for the first phone of this word */
484  fsg_glist_linklist_t *glist;
485 
486  rc = dict_pron(lextree->dict, dictwid, 1);
487  for (glist = *curglist;
488  glist && glist->glist;
489  glist = glist->next) {
490  if (glist->ci == ci && glist->rc == rc)
491  break;
492  }
493  if (glist && glist->glist) {
494  assert(glist->ci == ci && glist->rc == rc);
495  /* We've found a valid glist. Hook to it and move to next phoneme */
496  E_DEBUG(2,("Found match for (%d,%d)\n", ci, rc));
497  lc_pnodelist = glist->glist;
498  /* Set the predecessor node for the future tree first */
499  pred = (fsg_pnode_t *) gnode_ptr(lc_pnodelist);
500  continue;
501  }
502  else {
503  /* Two cases that can bring us here
504  * a. glist == NULL, i.e. end of current list. Create new entry.
505  * b. glist->glist == NULL, i.e. first entry into list.
506  */
507  if (glist == NULL) { /* Case a; reduce it to case b by allocing glist */
508  glist = (fsg_glist_linklist_t*) ckd_calloc(1, sizeof(fsg_glist_linklist_t));
509  glist->next = *curglist;
510  *curglist = glist;
511  }
512  glist->ci = ci;
513  glist->rc = rc;
514  lc_pnodelist = glist->glist = NULL; /* Gets created below */
515  }
516 
517  for (i = 0; lclist[i] >= 0; i++) {
518  lc = lclist[i];
519  ssid = dict2pid_ldiph_lc(lextree->d2p, ci, rc, lc);
520  tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_first_phone(lextree->dict, dictwid));
521  /* Compression is not done by d2p, so we do it
522  * here. This might be slow, but it might not
523  * be... we'll see. */
524  pnode = ssid_pnode_map[0];
525  for (j = 0; j < n_ci && ssid_pnode_map[j] != NULL; ++j) {
526  pnode = ssid_pnode_map[j];
527  if (hmm_nonmpx_ssid(&pnode->hmm) == ssid)
528  break;
529  }
530  assert(j < n_ci);
531  if (!pnode) { /* Allocate pnode for this new ssid */
532  pnode =
533  (fsg_pnode_t *) ckd_calloc(1,
534  sizeof
535  (fsg_pnode_t));
536  pnode->ctx = lextree->ctx;
537  /* This bit is tricky! For now we'll put the prob in the final link only */
538  /* pnode->logs2prob = (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT)
539  + lextree->wip + lextree->pip; */
540  pnode->logs2prob = lextree->wip + lextree->pip;
541  pnode->ci_ext = dict_first_phone(lextree->dict, dictwid);
542  pnode->ppos = 0;
543  pnode->leaf = FALSE;
544  pnode->sibling = root; /* All root nodes linked together */
545  pnode->alloc_next = head;
546  head = pnode;
547  root = pnode;
548  ++n_lc_alloc;
549 
550  hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid);
551 
552  lc_pnodelist =
553  glist_add_ptr(lc_pnodelist, (void *) pnode);
554  ssid_pnode_map[j] = pnode;
555  }
556  fsg_pnode_add_ctxt(pnode, lc);
557  }
558  /* Put the lc_pnodelist back into glist */
559  glist->glist = lc_pnodelist;
560 
561  /* The predecessor node for the future tree is the root */
562  pred = root;
563  }
564  else if (p != pronlen - 1) { /* Word internal phone */
565  fsg_pnode_t *pnodeyoungest;
566 
567  ssid = dict2pid_internal(lextree->d2p, dictwid, p);
568  tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_pron (lextree->dict, dictwid, p));
569  /* First check if we already have this ssid in our tree */
570  pnode = pred->next.succ;
571  pnodeyoungest = pnode; /* The youngest sibling */
572  while (pnode && (hmm_nonmpx_ssid(&pnode->hmm) != ssid || pnode->leaf)) {
573  pnode = pnode->sibling;
574  }
575  if (pnode && (hmm_nonmpx_ssid(&pnode->hmm) == ssid && !pnode->leaf)) {
576  /* Found the ssid; go to next phoneme */
577  E_DEBUG(2,("Found match for %d\n", ci));
578  pred = pnode;
579  continue;
580  }
581 
582  /* pnode not found, allocate it */
583  pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t));
584  pnode->ctx = lextree->ctx;
585  pnode->logs2prob = lextree->pip;
586  pnode->ci_ext = dict_pron(lextree->dict, dictwid, p);
587  pnode->ppos = p;
588  pnode->leaf = FALSE;
589  pnode->sibling = pnodeyoungest; /* May be NULL */
590  if (p == 1) { /* Predecessor = set of root nodes for left ctxts */
591  for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) {
592  pred = (fsg_pnode_t *) gnode_ptr(gn);
593  pred->next.succ = pnode;
594  }
595  }
596  else { /* Predecessor = word internal node */
597  pred->next.succ = pnode;
598  }
599  pnode->alloc_next = head;
600  head = pnode;
601  ++n_int_alloc;
602 
603  hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid);
604 
605  pred = pnode;
606  }
607  else { /* Leaf phone, handle required right contexts */
608  /* Note, leaf phones are not part of the tree */
609  xwdssid_t *rssid;
610  memset((void *) ssid_pnode_map, 0,
611  n_ci * sizeof(fsg_pnode_t *));
612  lc = dict_pron(lextree->dict, dictwid, p-1);
613  rssid = dict2pid_rssid(lextree->d2p, ci, lc);
614  tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_pron (lextree->dict, dictwid, p));
615 
616  for (i = 0; rclist[i] >= 0; i++) {
617  rc = rclist[i];
618 
619  j = rssid->cimap[rc];
620  ssid = rssid->ssid[j];
621  pnode = ssid_pnode_map[j];
622 
623  if (!pnode) { /* Allocate pnode for this new ssid */
624  pnode =
625  (fsg_pnode_t *) ckd_calloc(1,
626  sizeof
627  (fsg_pnode_t));
628  pnode->ctx = lextree->ctx;
629  /* We are plugging the word prob here. Ugly */
630  /* pnode->logs2prob = lextree->pip; */
631  pnode->logs2prob = (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT)
632  + lextree->pip;
633  pnode->ci_ext = dict_pron(lextree->dict, dictwid, p);
634  pnode->ppos = p;
635  pnode->leaf = TRUE;
636  pnode->sibling = rc_pnodelist ?
637  (fsg_pnode_t *) gnode_ptr(rc_pnodelist) : NULL;
638  pnode->next.fsglink = fsglink;
639  pnode->alloc_next = head;
640  head = pnode;
641  ++n_rc_alloc;
642 
643  hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid);
644 
645  rc_pnodelist =
646  glist_add_ptr(rc_pnodelist, (void *) pnode);
647  ssid_pnode_map[j] = pnode;
648  }
649  else {
650  assert(hmm_nonmpx_ssid(&pnode->hmm) == ssid);
651  }
652  fsg_pnode_add_ctxt(pnode, rc);
653  }
654 
655  if (p == 1) { /* Predecessor = set of root nodes for left ctxts */
656  for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) {
657  pred = (fsg_pnode_t *) gnode_ptr(gn);
658  if (!pred->next.succ)
659  pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist);
660  else {
661  /* Link to the end of the sibling chain */
662  fsg_pnode_t *succ = pred->next.succ;
663  while (succ->sibling) succ = succ->sibling;
664  succ->sibling = (fsg_pnode_t*) gnode_ptr(rc_pnodelist);
665  /* Since all entries of lc_pnodelist point
666  to the same array, sufficient to update it once */
667  break;
668  }
669  }
670  }
671  else { /* Predecessor = word internal node */
672  if (!pred->next.succ)
673  pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist);
674  else {
675  /* Link to the end of the sibling chain */
676  fsg_pnode_t *succ = pred->next.succ;
677  while (succ->sibling) succ = succ->sibling;
678  succ->sibling = (fsg_pnode_t *) gnode_ptr(rc_pnodelist);
679  }
680  }
681  }
682  }
683 
684  ckd_free((void *) ssid_pnode_map);
685  /* glist_free(lc_pnodelist); Nope; this gets freed outside */
686  glist_free(rc_pnodelist);
687  }
688 
689  E_DEBUG(2,("Allocated %d HMMs (%d lc, %d rc, %d internal)\n",
690  n_lc_alloc + n_rc_alloc + n_int_alloc,
691  n_lc_alloc, n_rc_alloc, n_int_alloc));
692  *alloc_head = head;
693 
694  return root;
695 }
696 
697 
698 static fsg_pnode_t *
699 fsg_psubtree_init(fsg_lextree_t *lextree,
700  fsg_model_t * fsg, int32 from_state,
701  fsg_pnode_t ** alloc_head)
702 {
703  int32 dst;
704  gnode_t *gn;
705  fsg_link_t *fsglink;
706  fsg_pnode_t *root;
707  int32 n_ci, n_arc;
708  fsg_glist_linklist_t *glist = NULL;
709 
710  root = NULL;
711  assert(*alloc_head == NULL);
712 
713  n_ci = bin_mdef_n_ciphone(lextree->mdef);
714  if (n_ci > (FSG_PNODE_CTXT_BVSZ * 32)) {
715  E_FATAL
716  ("#phones > %d; increase FSG_PNODE_CTXT_BVSZ and recompile\n",
717  FSG_PNODE_CTXT_BVSZ * 32);
718  }
719  n_arc = 0;
720  for (dst = 0; dst < fsg_model_n_state(fsg); dst++) {
721  /* Add all links from from_state to dst */
722  for (gn = fsg_model_trans(fsg, from_state, dst); gn;
723  gn = gnode_next(gn)) {
724  /* Add word emitted by this transition (fsglink) to lextree */
725  fsglink = (fsg_link_t *) gnode_ptr(gn);
726 
727  assert(fsg_link_wid(fsglink) >= 0); /* Cannot be a null trans */
728 
729  E_DEBUG(2,("Building lextree for arc from %d to %d: %s\n",
730  from_state, dst, fsg_model_word_str(fsg, fsg_link_wid(fsglink))));
731  root = psubtree_add_trans(lextree, root, &glist, fsglink,
732  lextree->lc[from_state],
733  lextree->rc[dst],
734  alloc_head);
735  ++n_arc;
736  }
737  }
738  E_DEBUG(2,("State %d has %d outgoing arcs\n", from_state, n_arc));
739 
740  fsg_glist_linklist_free(glist);
741 
742  return root;
743 }
744 
745 
746 static void
747 fsg_psubtree_free(fsg_pnode_t * head)
748 {
749  fsg_pnode_t *next;
750 
751  while (head) {
752  next = head->alloc_next;
753  hmm_deinit(&head->hmm);
754  ckd_free(head);
755  head = next;
756  }
757 }
758 
759 void fsg_psubtree_dump_node(fsg_lextree_t *tree, fsg_pnode_t *node, FILE *fp)
760 {
761  int32 i;
762  fsg_link_t *tl;
763 
764  /* Indentation */
765  for (i = 0; i <= node->ppos; i++)
766  fprintf(fp, " ");
767 
768  fprintf(fp, "%p.@", node); /* Pointer used as node
769  * ID */
770  fprintf(fp, " %5d.SS", hmm_nonmpx_ssid(&node->hmm));
771  fprintf(fp, " %10d.LP", node->logs2prob);
772  fprintf(fp, " %p.SIB", node->sibling);
773  fprintf(fp, " %s.%d", bin_mdef_ciphone_str(tree->mdef, node->ci_ext), node->ppos);
774  if ((node->ppos == 0) || node->leaf) {
775  fprintf(fp, " [");
776  for (i = 0; i < FSG_PNODE_CTXT_BVSZ; i++)
777  fprintf(fp, "%08x", node->ctxt.bv[i]);
778  fprintf(fp, "]");
779  }
780  if (node->leaf) {
781  tl = node->next.fsglink;
782  fprintf(fp, " {%s[%d->%d](%d)}",
783  fsg_model_word_str(tree->fsg, tl->wid),
784  tl->from_state, tl->to_state, tl->logs2prob);
785  } else {
786  fprintf(fp, " %p.NXT", node->next.succ);
787  }
788  fprintf(fp, "\n");
789 
790  return;
791 }
792 
793 void
794 fsg_psubtree_dump(fsg_lextree_t *tree, fsg_pnode_t *root, FILE * fp)
795 {
796  fsg_pnode_t *succ;
797 
798  if (root == NULL) return;
799  if (root->ppos == 0) {
800  while(root->sibling && root->sibling->next.succ == root->next.succ) {
801  fsg_psubtree_dump_node(tree, root, fp);
802  root = root->sibling;
803  }
804  fflush(fp);
805  }
806 
807  fsg_psubtree_dump_node(tree, root, fp);
808 
809  if (root->leaf) {
810  if (root->ppos == 0 && root->sibling) { // For single-phone words
811  fsg_psubtree_dump(tree, root->sibling,fp);
812  }
813  return;
814  }
815 
816  succ = root->next.succ;
817  while(succ) {
818  fsg_psubtree_dump(tree, succ,fp);
819  succ = succ->sibling;
820  }
821 
822  if (root->ppos == 0) {
823  fsg_psubtree_dump(tree, root->sibling,fp);
824  fflush(fp);
825  }
826 
827  return;
828 }
829 
830 void
832 {
833  hmm_clear(&pnode->hmm);
834 }
void fsg_lextree_free(fsg_lextree_t *lextree)
Free lextrees for an FSG.
Definition: fsg_lextree.c:284
hmm_context_t * ctx
HMM context structure.
Definition: fsg_lextree.h:218
void hmm_init(hmm_context_t *ctx, hmm_t *hmm, int mpx, int ssid, int tmatid)
Populate a previously-allocated HMM structure, allocating internal data.
Definition: hmm.c:89
POCKETSPHINX_EXPORT s3wid_t dict_wordid(dict_t *d, const char *word)
Return word id for given word string if present.
Definition: dict.c:382
void fsg_pnode_add_all_ctxt(fsg_pnode_ctxt_t *ctxt)
Set all flags on in the given context bitvector.
Definition: fsg_lextree.c:326
const char * bin_mdef_ciphone_str(bin_mdef_t *m, int32 ci)
In: ciphone id for which name wanted.
Definition: bin_mdef.c:738
bin_mdef_t * mdef
Model definition (triphone mappings).
Definition: fsg_lextree.h:221
uint32 fsg_pnode_ctxt_sub_generic(fsg_pnode_ctxt_t *src, fsg_pnode_ctxt_t *sub)
Generic variant for arbitrary size.
Definition: fsg_lextree.c:334
void fsg_lextree_dump(fsg_lextree_t *lextree, FILE *fp)
Print an FSG lextree to a file for debugging.
Definition: fsg_lextree.c:271
void hmm_deinit(hmm_t *hmm)
Free an HMM structure, releasing internal data (but not the HMM structure itself).
Definition: hmm.c:111
#define dict2pid_rssid(d, ci, lc)
Access macros; not designed for arbitrary use.
Definition: dict2pid.h:179
void fsg_psubtree_pnode_deactivate(fsg_pnode_t *pnode)
Mark the given pnode as inactive (for search).
Definition: fsg_lextree.c:831
dict_t * dict
Pronunciation dictionary for this FSG.
Definition: fsg_lextree.h:219
int dict_filler_word(dict_t *d, s3wid_t w)
Return 1 if w is a filler word, 0 if not.
Definition: dict.c:396
Shared information between a set of HMMs.
Collection of lextrees for an FSG.
Definition: fsg_lextree.h:216
#define SENSCR_SHIFT
Shift count for senone scores.
Definition: hmm.h:77
a structure for a dictionary.
Definition: dict.h:79
s3ssid_t dict2pid_internal(dict2pid_t *d2p, int32 wid, int pos)
Return the senone sequence ID for the given word position.
Definition: dict2pid.c:367
void hmm_clear(hmm_t *h)
Reset the states of the HMM to the invalid condition.
Definition: hmm.c:183
cross word triphone model structure
Definition: dict2pid.h:137
int16 ** rc
Right context triphone mappings for FSG.
Definition: fsg_lextree.h:241
int16 ** lc
Left context triphone mappings for FSG.
Definition: fsg_lextree.h:240
fsg_lextree_t * fsg_lextree_init(fsg_model_t *fsg, dict_t *dict, dict2pid_t *d2p, bin_mdef_t *mdef, hmm_context_t *ctx, int32 wip, int32 pip)
Create, initialize, and return a new phonetic lextree for the given FSG.
Definition: fsg_lextree.c:213
fsg_model_t * fsg
The fsg for which this lextree is built.
Definition: fsg_lextree.h:217
s3cipid_t * cimap
Index into ssid[] above for each ci phone.
Definition: dict2pid.h:139
#define dict_pron(d, w, p)
The CI phones of the word w at position p.
Definition: dict.h:168
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:148
s3ssid_t * ssid
Senone Sequence ID list for all context ciphones.
Definition: dict2pid.h:138
dict2pid_t * d2p
Context-dependent phone mappings for this FSG.
Definition: fsg_lextree.h:220