PocketSphinx  0.6
ps_lattice.c
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2008 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
42 /* System headers. */
43 #include <assert.h>
44 #include <string.h>
45 #include <math.h>
46 
47 /* SphinxBase headers. */
48 #include <sphinxbase/ckd_alloc.h>
49 #include <sphinxbase/listelem_alloc.h>
50 #include <sphinxbase/strfuncs.h>
51 #include <sphinxbase/err.h>
52 #include <sphinxbase/pio.h>
53 
54 /* Local headers. */
55 #include "pocketsphinx_internal.h"
56 #include "ps_lattice_internal.h"
57 #include "ngram_search.h"
58 #include "dict.h"
59 
60 /*
61  * Create a directed link between "from" and "to" nodes, but if a link already exists,
62  * choose one with the best ascr.
63  */
64 void
66  int32 score, int32 ef)
67 {
68  latlink_list_t *fwdlink;
69 
70  /* Look for an existing link between "from" and "to" nodes */
71  for (fwdlink = from->exits; fwdlink; fwdlink = fwdlink->next)
72  if (fwdlink->link->to == to)
73  break;
74 
75  if (fwdlink == NULL) {
76  latlink_list_t *revlink;
77  ps_latlink_t *link;
78 
79  /* No link between the two nodes; create a new one */
80  link = listelem_malloc(dag->latlink_alloc);
81  fwdlink = listelem_malloc(dag->latlink_list_alloc);
82  revlink = listelem_malloc(dag->latlink_list_alloc);
83 
84  link->from = from;
85  link->to = to;
86  link->ascr = score;
87  link->ef = ef;
88  link->best_prev = NULL;
89 
90  fwdlink->link = revlink->link = link;
91  fwdlink->next = from->exits;
92  from->exits = fwdlink;
93  revlink->next = to->entries;
94  to->entries = revlink;
95  }
96  else {
97  /* Link already exists; just retain the best ascr */
98  if (score BETTER_THAN fwdlink->link->ascr) {
99  fwdlink->link->ascr = score;
100  fwdlink->link->ef = ef;
101  }
102  }
103 }
104 
105 void
106 ps_lattice_bypass_fillers(ps_lattice_t *dag, int32 silpen, int32 fillpen)
107 {
108  ps_latnode_t *node;
109  int32 score;
110 
111  /* Bypass filler nodes */
112  for (node = dag->nodes; node; node = node->next) {
113  latlink_list_t *revlink;
114  if (node == dag->end || !dict_filler_word(dag->dict, node->basewid))
115  continue;
116 
117  /* Replace each link entering filler node with links to all its successors */
118  for (revlink = node->entries; revlink; revlink = revlink->next) {
119  latlink_list_t *forlink;
120  ps_latlink_t *rlink = revlink->link;
121 
122  score = (node->basewid == dag->silence) ? silpen : fillpen;
123  score += rlink->ascr;
124  /*
125  * Make links from predecessor of filler (from) to successors of filler.
126  * But if successor is a filler, it has already been eliminated since it
127  * appears earlier in latnode_list (see build...). So it can be skipped.
128  */
129  for (forlink = node->exits; forlink; forlink = forlink->next) {
130  ps_latlink_t *flink = forlink->link;
131  if (flink->to && rlink->from &&
132  !dict_filler_word(dag->dict, flink->to->basewid)) {
133  ps_lattice_link(dag, rlink->from, flink->to,
134  score + flink->ascr, flink->ef);
135  }
136  }
137  }
138  node->reachable = FALSE;
139  }
140 }
141 
142 static void
143 delete_node(ps_lattice_t *dag, ps_latnode_t *node)
144 {
145  latlink_list_t *x, *next_x;
146 
147  for (x = node->exits; x; x = next_x) {
148  next_x = x->next;
149  x->link->from = NULL;
150  listelem_free(dag->latlink_list_alloc, x);
151  }
152  for (x = node->entries; x; x = next_x) {
153  next_x = x->next;
154  x->link->to = NULL;
155  listelem_free(dag->latlink_list_alloc, x);
156  }
157  listelem_free(dag->latnode_alloc, node);
158 }
159 
160 
161 static void
162 remove_dangling_links(ps_lattice_t *dag, ps_latnode_t *node)
163 {
164  latlink_list_t *x, *prev_x, *next_x;
165 
166  prev_x = NULL;
167  for (x = node->exits; x; x = next_x) {
168  next_x = x->next;
169  if (x->link->to == NULL) {
170  if (prev_x)
171  prev_x->next = next_x;
172  else
173  node->exits = next_x;
174  listelem_free(dag->latlink_alloc, x->link);
175  listelem_free(dag->latlink_list_alloc, x);
176  }
177  else
178  prev_x = x;
179  }
180  prev_x = NULL;
181  for (x = node->entries; x; x = next_x) {
182  next_x = x->next;
183  if (x->link->from == NULL) {
184  if (prev_x)
185  prev_x->next = next_x;
186  else
187  node->entries = next_x;
188  listelem_free(dag->latlink_alloc, x->link);
189  listelem_free(dag->latlink_list_alloc, x);
190  }
191  else
192  prev_x = x;
193  }
194 }
195 
196 void
198 {
199  ps_latnode_t *node, *prev_node, *next_node;
200  int i;
201 
202  /* Remove unreachable nodes from the list of nodes. */
203  prev_node = NULL;
204  for (node = dag->nodes; node; node = next_node) {
205  next_node = node->next;
206  if (!node->reachable) {
207  if (prev_node)
208  prev_node->next = next_node;
209  else
210  dag->nodes = next_node;
211  /* Delete this node and NULLify links to it. */
212  delete_node(dag, node);
213  }
214  else
215  prev_node = node;
216  }
217 
218  /* Remove all links to and from unreachable nodes. */
219  i = 0;
220  for (node = dag->nodes; node; node = node->next) {
221  /* Assign sequence numbers. */
222  node->id = i++;
223 
224  /* We should obviously not encounter unreachable nodes here! */
225  assert(node->reachable);
226 
227  /* Remove all links that go nowhere. */
228  remove_dangling_links(dag, node);
229  }
230 }
231 
232 int32
233 ps_lattice_write(ps_lattice_t *dag, char const *filename)
234 {
235  FILE *fp;
236  int32 i;
237  ps_latnode_t *d, *initial, *final;
238 
239  initial = dag->start;
240  final = dag->end;
241 
242  E_INFO("Writing lattice file: %s\n", filename);
243  if ((fp = fopen(filename, "w")) == NULL) {
244  E_ERROR_SYSTEM("Failed to open lattice file '%s' for writing", filename);
245  return -1;
246  }
247 
248  /* Stupid Sphinx-III lattice code expects 'getcwd:' here */
249  fprintf(fp, "# getcwd: /this/is/bogus\n");
250  fprintf(fp, "# -logbase %e\n", logmath_get_base(dag->lmath));
251  fprintf(fp, "#\n");
252 
253  fprintf(fp, "Frames %d\n", dag->n_frames);
254  fprintf(fp, "#\n");
255 
256  for (i = 0, d = dag->nodes; d; d = d->next, i++);
257  fprintf(fp,
258  "Nodes %d (NODEID WORD STARTFRAME FIRST-ENDFRAME LAST-ENDFRAME)\n",
259  i);
260  for (i = 0, d = dag->nodes; d; d = d->next, i++) {
261  d->id = i;
262  fprintf(fp, "%d %s %d %d %d ; %d\n",
263  i, dict_wordstr(dag->dict, d->wid),
264  d->sf, d->fef, d->lef, d->node_id);
265  }
266  fprintf(fp, "#\n");
267 
268  fprintf(fp, "Initial %d\nFinal %d\n", initial->id, final->id);
269  fprintf(fp, "#\n");
270 
271  /* Don't bother with this, it's not used by anything. */
272  fprintf(fp, "BestSegAscr %d (NODEID ENDFRAME ASCORE)\n",
273  0 /* #BPTable entries */ );
274  fprintf(fp, "#\n");
275 
276  fprintf(fp, "Edges (FROM-NODEID TO-NODEID ASCORE)\n");
277  for (d = dag->nodes; d; d = d->next) {
278  latlink_list_t *l;
279  for (l = d->exits; l; l = l->next) {
280  if (l->link->ascr WORSE_THAN WORST_SCORE || l->link->ascr BETTER_THAN 0)
281  continue;
282  fprintf(fp, "%d %d %d\n",
283  d->id, l->link->to->id, l->link->ascr << SENSCR_SHIFT);
284  }
285  }
286  fprintf(fp, "End\n");
287  fclose(fp);
288 
289  return 0;
290 }
291 
292 int32
293 ps_lattice_write_htk(ps_lattice_t *dag, char const *filename)
294 {
295  FILE *fp;
296  ps_latnode_t *d, *initial, *final;
297  int32 j, n_links, n_nodes;
298 
299  initial = dag->start;
300  final = dag->end;
301 
302  E_INFO("Writing lattice file: %s\n", filename);
303  if ((fp = fopen(filename, "w")) == NULL) {
304  E_ERROR_SYSTEM("Failed to open lattice file '%s' for writing", filename);
305  return -1;
306  }
307 
308  for (n_links = n_nodes = 0, d = dag->nodes; d; d = d->next) {
309  latlink_list_t *l;
310  if (!d->reachable)
311  continue;
312  d->id = n_nodes;
313  for (l = d->exits; l; l = l->next) {
314  if (l->link->to == NULL || !l->link->to->reachable)
315  continue;
316  if (l->link->ascr WORSE_THAN WORST_SCORE || l->link->ascr BETTER_THAN 0)
317  continue;
318  ++n_links;
319  }
320  ++n_nodes;
321  }
322 
323  fprintf(fp, "# Lattice generated by PocketSphinx\n");
324  fprintf(fp, "#\n# Header\n#\n");
325  fprintf(fp, "VERSION=1.0\n");
326  fprintf(fp, "start=%d\n", initial->id);
327  fprintf(fp, "end=%d\n", final->id);
328  fprintf(fp, "#\n");
329 
330  fprintf(fp, "N=%d\tL=%d\n", n_nodes, n_links);
331  fprintf(fp, "#\n# Node definitions\n#\n");
332  for (d = dag->nodes; d; d = d->next) {
333  char const *word = dict_wordstr(dag->dict, d->wid);
334  char const *c = strrchr(word, '(');
335  int altpron = 1;
336  if (!d->reachable)
337  continue;
338  if (c)
339  altpron = atoi(c + 1);
340  word = dict_basestr(dag->dict, d->wid);
341  if (d->wid == dict_startwid(dag->dict))
342  word = "!SENT_START";
343  else if (d->wid == dict_finishwid(dag->dict))
344  word = "!SENT_END";
345  else if (dict_filler_word(dag->dict, d->wid))
346  word = "!NULL";
347  fprintf(fp, "I=%d\tt=%.2f\tW=%s\tv=%d\n",
348  d->id, (double)d->sf / dag->frate,
349  word, altpron);
350  }
351  fprintf(fp, "#\n# Link definitions\n#\n");
352  for (j = 0, d = dag->nodes; d; d = d->next) {
353  latlink_list_t *l;
354  if (!d->reachable)
355  continue;
356  for (l = d->exits; l; l = l->next) {
357  if (l->link->to == NULL || !l->link->to->reachable)
358  continue;
359  if (l->link->ascr WORSE_THAN WORST_SCORE || l->link->ascr BETTER_THAN 0)
360  continue;
361  fprintf(fp, "J=%d\tS=%d\tE=%d\ta=%f\tp=%g\n", j++,
362  d->id, l->link->to->id,
363  logmath_log_to_ln(dag->lmath, l->link->ascr << SENSCR_SHIFT),
364  logmath_exp(dag->lmath, l->link->alpha + l->link->beta - dag->norm));
365  }
366  }
367  fclose(fp);
368 
369  return 0;
370 }
371 
372 /* Read parameter from a lattice file*/
373 static int
374 dag_param_read(lineiter_t *li, char *param)
375 {
376  int32 n;
377 
378  while ((li = lineiter_next(li)) != NULL) {
379  char *c;
380 
381  /* Ignore comments. */
382  if (li->buf[0] == '#')
383  continue;
384 
385  /* Find the first space. */
386  c = strchr(li->buf, ' ');
387  if (c == NULL) continue;
388 
389  /* Check that the first field equals param and that there's a number after it. */
390  if (strncmp(li->buf, param, strlen(param)) == 0
391  && sscanf(c + 1, "%d", &n) == 1)
392  return n;
393  }
394  return -1;
395 }
396 
397 /* Mark every node that has a path to the argument dagnode as "reachable". */
398 static void
399 dag_mark_reachable(ps_latnode_t * d)
400 {
401  latlink_list_t *l;
402 
403  d->reachable = 1;
404  for (l = d->entries; l; l = l->next)
405  if (l->link->from && !l->link->from->reachable)
406  dag_mark_reachable(l->link->from);
407 }
408 
409 ps_lattice_t *
411  char const *file)
412 {
413  FILE *fp;
414  int32 ispipe;
415  lineiter_t *line;
416  float64 lb;
417  float32 logratio;
418  ps_latnode_t *tail;
419  ps_latnode_t **darray;
420  ps_lattice_t *dag;
421  int i, k, n_nodes;
422  int32 pip, silpen, fillpen;
423 
424  dag = ckd_calloc(1, sizeof(*dag));
425 
426  if (ps) {
427  dag->search = ps->search;
428  dag->dict = dict_retain(ps->dict);
429  dag->lmath = logmath_retain(ps->lmath);
430  dag->frate = cmd_ln_int32_r(dag->search->config, "-frate");
431  }
432  else {
433  dag->dict = dict_init(NULL, NULL);
434  dag->lmath = logmath_init(1.0001, 0, FALSE);
435  dag->frate = 100;
436  }
437  dag->silence = dict_silwid(dag->dict);
438  dag->latnode_alloc = listelem_alloc_init(sizeof(ps_latnode_t));
439  dag->latlink_alloc = listelem_alloc_init(sizeof(ps_latlink_t));
440  dag->latlink_list_alloc = listelem_alloc_init(sizeof(latlink_list_t));
441  dag->refcount = 1;
442 
443  tail = NULL;
444  darray = NULL;
445 
446  E_INFO("Reading DAG file: %s\n", file);
447  if ((fp = fopen_compchk(file, &ispipe)) == NULL) {
448  E_ERROR_SYSTEM("Failed to open DAG file '%s' for reading", file);
449  return NULL;
450  }
451  line = lineiter_start(fp);
452 
453  /* Read and verify logbase (ONE BIG HACK!!) */
454  if (line == NULL) {
455  E_ERROR("Premature EOF(%s)\n", file);
456  goto load_error;
457  }
458  if (strncmp(line->buf, "# getcwd: ", 10) != 0) {
459  E_ERROR("%s does not begin with '# getcwd: '\n%s", file, line->buf);
460  goto load_error;
461  }
462  if ((line = lineiter_next(line)) == NULL) {
463  E_ERROR("Premature EOF(%s)\n", file);
464  goto load_error;
465  }
466  if ((strncmp(line->buf, "# -logbase ", 11) != 0)
467  || (sscanf(line->buf + 11, "%lf", &lb) != 1)) {
468  E_WARN("%s: Cannot find -logbase in header\n", file);
469  lb = 1.0001;
470  }
471  logratio = 1.0f;
472  if (dag->lmath == NULL)
473  dag->lmath = logmath_init(lb, 0, TRUE);
474  else {
475  float32 pb = logmath_get_base(dag->lmath);
476  if (fabs(lb - pb) >= 0.0001) {
477  E_WARN("Inconsistent logbases: %f vs %f: will compensate\n", lb, pb);
478  logratio = (float32)(log(lb) / log(pb));
479  E_INFO("Lattice log ratio: %f\n", logratio);
480  }
481  }
482  /* Read Frames parameter */
483  dag->n_frames = dag_param_read(line, "Frames");
484  if (dag->n_frames <= 0) {
485  E_ERROR("Frames parameter missing or invalid\n");
486  goto load_error;
487  }
488  /* Read Nodes parameter */
489  n_nodes = dag_param_read(line, "Nodes");
490  if (n_nodes <= 0) {
491  E_ERROR("Nodes parameter missing or invalid\n");
492  goto load_error;
493  }
494 
495  /* Read nodes */
496  darray = ckd_calloc(n_nodes, sizeof(*darray));
497  for (i = 0; i < n_nodes; i++) {
498  ps_latnode_t *d;
499  int32 w;
500  int seqid, sf, fef, lef;
501  char wd[256];
502 
503  if ((line = lineiter_next(line)) == NULL) {
504  E_ERROR("Premature EOF while loading Nodes(%s)\n", file);
505  goto load_error;
506  }
507 
508  if ((k =
509  sscanf(line->buf, "%d %255s %d %d %d", &seqid, wd, &sf, &fef,
510  &lef)) != 5) {
511  E_ERROR("Cannot parse line: %s, value of count %d\n", line->buf, k);
512  goto load_error;
513  }
514 
515  w = dict_wordid(dag->dict, wd);
516  if (w < 0) {
517  if (dag->search == NULL) {
518  char *ww = ckd_salloc(wd);
519  if (dict_word2basestr(ww) != -1) {
520  if (dict_wordid(dag->dict, ww) == BAD_S3WID)
521  dict_add_word(dag->dict, ww, NULL, 0);
522  }
523  ckd_free(ww);
524  w = dict_add_word(dag->dict, wd, NULL, 0);
525  }
526  if (w < 0) {
527  E_ERROR("Unknown word in line: %s\n", line->buf);
528  goto load_error;
529  }
530  }
531 
532  if (seqid != i) {
533  E_ERROR("Seqno error: %s\n", line->buf);
534  goto load_error;
535  }
536 
537  d = listelem_malloc(dag->latnode_alloc);
538  darray[i] = d;
539  d->wid = w;
540  d->basewid = dict_basewid(dag->dict, w);
541  d->id = seqid;
542  d->sf = sf;
543  d->fef = fef;
544  d->lef = lef;
545  d->reachable = 0;
546  d->exits = d->entries = NULL;
547  d->next = NULL;
548 
549  if (!dag->nodes)
550  dag->nodes = d;
551  else
552  tail->next = d;
553  tail = d;
554  }
555 
556  /* Read initial node ID */
557  k = dag_param_read(line, "Initial");
558  if ((k < 0) || (k >= n_nodes)) {
559  E_ERROR("Initial node parameter missing or invalid\n");
560  goto load_error;
561  }
562  dag->start = darray[k];
563 
564  /* Read final node ID */
565  k = dag_param_read(line, "Final");
566  if ((k < 0) || (k >= n_nodes)) {
567  E_ERROR("Final node parameter missing or invalid\n");
568  goto load_error;
569  }
570  dag->end = darray[k];
571 
572  /* Read bestsegscore entries and ignore them. */
573  if ((k = dag_param_read(line, "BestSegAscr")) < 0) {
574  E_ERROR("BestSegAscr parameter missing\n");
575  goto load_error;
576  }
577  for (i = 0; i < k; i++) {
578  if ((line = lineiter_next(line)) == NULL) {
579  E_ERROR("Premature EOF while (%s) ignoring BestSegAscr\n",
580  line);
581  goto load_error;
582  }
583  }
584 
585  /* Read in edges. */
586  while ((line = lineiter_next(line)) != NULL) {
587  if (line->buf[0] == '#')
588  continue;
589  if (0 == strncmp(line->buf, "Edges", 5))
590  break;
591  }
592  if (line == NULL) {
593  E_ERROR("Edges missing\n");
594  goto load_error;
595  }
596  while ((line = lineiter_next(line)) != NULL) {
597  int from, to, ascr;
598  ps_latnode_t *pd, *d;
599 
600  if (sscanf(line->buf, "%d %d %d", &from, &to, &ascr) != 3)
601  break;
602  if (ascr WORSE_THAN WORST_SCORE)
603  continue;
604  pd = darray[from];
605  d = darray[to];
606  if (logratio != 1.0f)
607  ascr = (int32)(ascr * logratio);
608  ps_lattice_link(dag, pd, d, ascr, d->sf - 1);
609  }
610  if (strcmp(line->buf, "End\n") != 0) {
611  E_ERROR("Terminating 'End' missing\n");
612  goto load_error;
613  }
614  lineiter_free(line);
615  fclose_comp(fp, ispipe);
616  ckd_free(darray);
617 
618  /* Minor hack: If the final node is a filler word and not </s>,
619  * then set its base word ID to </s>, so that the language model
620  * scores won't be screwed up. */
621  if (dict_filler_word(dag->dict, dag->end->wid))
622  dag->end->basewid = dag->search
623  ? ps_search_finish_wid(dag->search)
624  : dict_wordid(dag->dict, S3_FINISH_WORD);
625 
626  /* Mark reachable from dag->end */
627  dag_mark_reachable(dag->end);
628 
629  /* Free nodes unreachable from dag->end and their links */
631 
632  if (ps) {
633  /* Build links around silence and filler words, since they do
634  * not exist in the language model. FIXME: This is
635  * potentially buggy, as we already do this before outputing
636  * lattices. */
637  pip = logmath_log(dag->lmath, cmd_ln_float32_r(ps->config, "-pip"));
638  silpen = pip + logmath_log(dag->lmath,
639  cmd_ln_float32_r(ps->config, "-silprob"));
640  fillpen = pip + logmath_log(dag->lmath,
641  cmd_ln_float32_r(ps->config, "-fillprob"));
642  ps_lattice_bypass_fillers(dag, silpen, fillpen);
643  }
644 
645  return dag;
646 
647  load_error:
648  E_ERROR("Failed to load %s\n", file);
649  lineiter_free(line);
650  if (fp) fclose_comp(fp, ispipe);
651  ckd_free(darray);
652  return NULL;
653 }
654 
655 int
657 {
658  return dag->n_frames;
659 }
660 
661 ps_lattice_t *
662 ps_lattice_init_search(ps_search_t *search, int n_frame)
663 {
664  ps_lattice_t *dag;
665 
666  dag = ckd_calloc(1, sizeof(*dag));
667  dag->search = search;
668  dag->dict = dict_retain(search->dict);
669  dag->lmath = logmath_retain(search->acmod->lmath);
670  dag->frate = cmd_ln_int32_r(dag->search->config, "-frate");
671  dag->silence = dict_silwid(dag->dict);
672  dag->n_frames = n_frame;
673  dag->latnode_alloc = listelem_alloc_init(sizeof(ps_latnode_t));
674  dag->latlink_alloc = listelem_alloc_init(sizeof(ps_latlink_t));
675  dag->latlink_list_alloc = listelem_alloc_init(sizeof(latlink_list_t));
676  dag->refcount = 1;
677  return dag;
678 }
679 
680 ps_lattice_t *
682 {
683  ++dag->refcount;
684  return dag;
685 }
686 
687 int
689 {
690  if (dag == NULL)
691  return 0;
692  if (--dag->refcount > 0)
693  return dag->refcount;
694  logmath_free(dag->lmath);
695  dict_free(dag->dict);
696  listelem_alloc_free(dag->latnode_alloc);
697  listelem_alloc_free(dag->latlink_alloc);
698  listelem_alloc_free(dag->latlink_list_alloc);
699  ckd_free(dag->hyp_str);
700  ckd_free(dag);
701  return 0;
702 }
703 
704 logmath_t *
706 {
707  return dag->lmath;
708 }
709 
712 {
713  return dag->nodes;
714 }
715 
718 {
719  return itor->next;
720 }
721 
722 void
724 {
725  /* Do absolutely nothing. */
726 }
727 
728 ps_latnode_t *
730 {
731  return itor;
732 }
733 
734 int
735 ps_latnode_times(ps_latnode_t *node, int16 *out_fef, int16 *out_lef)
736 {
737  if (out_fef) *out_fef = (int16)node->fef;
738  if (out_lef) *out_lef = (int16)node->lef;
739  return node->sf;
740 }
741 
742 char const *
744 {
745  return dict_wordstr(dag->dict, node->wid);
746 }
747 
748 char const *
750 {
751  return dict_wordstr(dag->dict, node->basewid);
752 }
753 
754 int32
756  ps_latlink_t **out_link)
757 {
758  latlink_list_t *links;
759  int32 bestpost = logmath_get_zero(dag->lmath);
760 
761  for (links = node->exits; links; links = links->next) {
762  int32 post = links->link->alpha + links->link->beta - dag->norm;
763  if (post > bestpost) {
764  if (out_link) *out_link = links->link;
765  bestpost = post;
766  }
767  }
768  return bestpost;
769 }
770 
773 {
774  return node->exits;
775 }
776 
779 {
780  return node->entries;
781 }
782 
785 {
786  return itor->next;
787 }
788 
789 void
791 {
792  /* Do absolutely nothing. */
793 }
794 
795 ps_latlink_t *
797 {
798  return itor->link;
799 }
800 
801 int
802 ps_latlink_times(ps_latlink_t *link, int16 *out_sf)
803 {
804  if (out_sf) {
805  if (link->from) {
806  *out_sf = link->from->sf;
807  }
808  else {
809  *out_sf = 0;
810  }
811  }
812  return link->ef;
813 }
814 
815 ps_latnode_t *
817 {
818  if (out_src) *out_src = link->from;
819  return link->to;
820 }
821 
822 char const *
824 {
825  if (link->from == NULL)
826  return NULL;
827  return dict_wordstr(dag->dict, link->from->wid);
828 }
829 
830 char const *
832 {
833  if (link->from == NULL)
834  return NULL;
835  return dict_wordstr(dag->dict, link->from->basewid);
836 }
837 
838 ps_latlink_t *
840 {
841  return link->best_prev;
842 }
843 
844 int32
845 ps_latlink_prob(ps_lattice_t *dag, ps_latlink_t *link, int32 *out_ascr)
846 {
847  int32 post = link->alpha + link->beta - dag->norm;
848  if (out_ascr) *out_ascr = link->ascr << SENSCR_SHIFT;
849  return post;
850 }
851 
852 char const *
854 {
855  ps_latlink_t *l;
856  size_t len;
857  char *c;
858 
859  /* Backtrace once to get hypothesis length. */
860  len = 0;
861  /* FIXME: There may not be a search, but actually there should be a dict. */
862  if (dict_real_word(dag->dict, link->to->basewid)) {
863  char *wstr = dict_wordstr(dag->dict, link->to->basewid);
864  if (wstr != NULL)
865  len += strlen(wstr) + 1;
866  }
867  for (l = link; l; l = l->best_prev) {
868  if (dict_real_word(dag->dict, l->from->basewid)) {
869  char *wstr = dict_wordstr(dag->dict, l->from->basewid);
870  if (wstr != NULL)
871  len += strlen(wstr) + 1;
872  }
873  }
874 
875  /* Backtrace again to construct hypothesis string. */
876  ckd_free(dag->hyp_str);
877  dag->hyp_str = ckd_calloc(1, len+1); /* extra one incase the hyp is empty */
878  c = dag->hyp_str + len - 1;
879  if (dict_real_word(dag->dict, link->to->basewid)) {
880  char *wstr = dict_wordstr(dag->dict, link->to->basewid);
881  if (wstr != NULL) {
882  len = strlen(wstr);
883  c -= len;
884  memcpy(c, wstr, len);
885  if (c > dag->hyp_str) {
886  --c;
887  *c = ' ';
888  }
889  }
890  }
891  for (l = link; l; l = l->best_prev) {
892  if (dict_real_word(dag->dict, l->from->basewid)) {
893  char *wstr = dict_wordstr(dag->dict, l->from->basewid);
894  if (wstr != NULL) {
895  len = strlen(wstr);
896  c -= len;
897  memcpy(c, wstr, len);
898  if (c > dag->hyp_str) {
899  --c;
900  *c = ' ';
901  }
902  }
903  }
904  }
905 
906  return dag->hyp_str;
907 }
908 
909 static void
910 ps_lattice_compute_lscr(ps_seg_t *seg, ps_latlink_t *link, int to)
911 {
912  ngram_model_t *lmset;
913 
914  /* Language model score is included in the link score for FSG
915  * search. FIXME: Of course, this is sort of a hack :( */
916  if (0 != strcmp(ps_search_name(seg->search), "ngram")) {
917  seg->lback = 1; /* Unigram... */
918  seg->lscr = 0;
919  return;
920  }
921 
922  lmset = ((ngram_search_t *)seg->search)->lmset;
923 
924  if (link->best_prev == NULL) {
925  if (to) /* Sentence has only two words. */
926  seg->lscr = ngram_bg_score(lmset, link->to->basewid,
927  link->from->basewid, &seg->lback)
928  >> SENSCR_SHIFT;
929  else {/* This is the start symbol, its lscr is always 0. */
930  seg->lscr = 0;
931  seg->lback = 1;
932  }
933  }
934  else {
935  /* Find the two predecessor words. */
936  if (to) {
937  seg->lscr = ngram_tg_score(lmset, link->to->basewid,
938  link->from->basewid,
939  link->best_prev->from->basewid,
940  &seg->lback) >> SENSCR_SHIFT;
941  }
942  else {
943  if (link->best_prev->best_prev)
944  seg->lscr = ngram_tg_score(lmset, link->from->basewid,
945  link->best_prev->from->basewid,
946  link->best_prev->best_prev->from->basewid,
947  &seg->lback) >> SENSCR_SHIFT;
948  else
949  seg->lscr = ngram_bg_score(lmset, link->from->basewid,
950  link->best_prev->from->basewid,
951  &seg->lback) >> SENSCR_SHIFT;
952  }
953  }
954 }
955 
956 static void
957 ps_lattice_link2itor(ps_seg_t *seg, ps_latlink_t *link, int to)
958 {
959  dag_seg_t *itor = (dag_seg_t *)seg;
960  ps_latnode_t *node;
961 
962  if (to) {
963  node = link->to;
964  seg->ef = node->lef;
965  seg->prob = 0; /* norm + beta - norm */
966  }
967  else {
968  latlink_list_t *x;
969  ps_latnode_t *n;
970  logmath_t *lmath = ps_search_acmod(seg->search)->lmath;
971 
972  node = link->from;
973  seg->ef = link->ef;
974  seg->prob = link->alpha + link->beta - itor->norm;
975  /* Sum over all exits for this word and any alternate
976  pronunciations at the same frame. */
977  for (n = node; n; n = n->alt) {
978  for (x = n->exits; x; x = x->next) {
979  if (x->link == link)
980  continue;
981  seg->prob = logmath_add(lmath, seg->prob,
982  x->link->alpha + x->link->beta - itor->norm);
983  }
984  }
985  }
986  seg->word = dict_wordstr(ps_search_dict(seg->search), node->wid);
987  seg->sf = node->sf;
988  seg->ascr = link->ascr << SENSCR_SHIFT;
989  /* Compute language model score from best predecessors. */
990  ps_lattice_compute_lscr(seg, link, to);
991 }
992 
993 static void
994 ps_lattice_seg_free(ps_seg_t *seg)
995 {
996  dag_seg_t *itor = (dag_seg_t *)seg;
997 
998  ckd_free(itor->links);
999  ckd_free(itor);
1000 }
1001 
1002 static ps_seg_t *
1003 ps_lattice_seg_next(ps_seg_t *seg)
1004 {
1005  dag_seg_t *itor = (dag_seg_t *)seg;
1006 
1007  ++itor->cur;
1008  if (itor->cur == itor->n_links + 1) {
1009  ps_lattice_seg_free(seg);
1010  return NULL;
1011  }
1012  else if (itor->cur == itor->n_links) {
1013  /* Re-use the last link but with the "to" node. */
1014  ps_lattice_link2itor(seg, itor->links[itor->cur - 1], TRUE);
1015  }
1016  else {
1017  ps_lattice_link2itor(seg, itor->links[itor->cur], FALSE);
1018  }
1019 
1020  return seg;
1021 }
1022 
1023 static ps_segfuncs_t ps_lattice_segfuncs = {
1024  /* seg_next */ ps_lattice_seg_next,
1025  /* seg_free */ ps_lattice_seg_free
1026 };
1027 
1028 ps_seg_t *
1030 {
1031  dag_seg_t *itor;
1032  ps_latlink_t *l;
1033  int cur;
1034 
1035  /* Calling this an "iterator" is a bit of a misnomer since we have
1036  * to get the entire backtrace in order to produce it.
1037  */
1038  itor = ckd_calloc(1, sizeof(*itor));
1039  itor->base.vt = &ps_lattice_segfuncs;
1040  itor->base.search = dag->search;
1041  itor->base.lwf = lwf;
1042  itor->n_links = 0;
1043  itor->norm = dag->norm;
1044 
1045  for (l = link; l; l = l->best_prev) {
1046  ++itor->n_links;
1047  }
1048  if (itor->n_links == 0) {
1049  ckd_free(itor);
1050  return NULL;
1051  }
1052 
1053  itor->links = ckd_calloc(itor->n_links, sizeof(*itor->links));
1054  cur = itor->n_links - 1;
1055  for (l = link; l; l = l->best_prev) {
1056  itor->links[cur] = l;
1057  --cur;
1058  }
1059 
1060  ps_lattice_link2itor((ps_seg_t *)itor, itor->links[0], FALSE);
1061  return (ps_seg_t *)itor;
1062 }
1063 
1066 {
1067  latlink_list_t *ll;
1068 
1069  ll = listelem_malloc(dag->latlink_list_alloc);
1070  ll->link = link;
1071  ll->next = next;
1072 
1073  return ll;
1074 }
1075 
1076 void
1078 {
1079  if (dag->q_head == NULL)
1080  dag->q_head = dag->q_tail = latlink_list_new(dag, link, NULL);
1081  else {
1082  dag->q_tail->next = latlink_list_new(dag, link, NULL);
1083  dag->q_tail = dag->q_tail->next;
1084  }
1085 
1086 }
1087 
1088 ps_latlink_t *
1090 {
1091  latlink_list_t *x;
1092  ps_latlink_t *link;
1093 
1094  if (dag->q_head == NULL)
1095  return NULL;
1096  link = dag->q_head->link;
1097  x = dag->q_head->next;
1098  listelem_free(dag->latlink_list_alloc, dag->q_head);
1099  dag->q_head = x;
1100  if (dag->q_head == NULL)
1101  dag->q_tail = NULL;
1102  return link;
1103 }
1104 
1105 void
1107 {
1108  while (ps_lattice_popq(dag)) {
1109  /* Do nothing. */
1110  }
1111 }
1112 
1113 ps_latlink_t *
1115 {
1116  ps_latnode_t *node;
1117  latlink_list_t *x;
1118 
1119  /* Cancel any unfinished traversal. */
1120  ps_lattice_delq(dag);
1121 
1122  /* Initialize node fanin counts and path scores. */
1123  for (node = dag->nodes; node; node = node->next)
1124  node->info.fanin = 0;
1125  for (node = dag->nodes; node; node = node->next) {
1126  for (x = node->exits; x; x = x->next)
1127  (x->link->to->info.fanin)++;
1128  }
1129 
1130  /* Initialize agenda with all exits from start. */
1131  if (start == NULL) start = dag->start;
1132  for (x = start->exits; x; x = x->next)
1133  ps_lattice_pushq(dag, x->link);
1134 
1135  /* Pull the first edge off the queue. */
1136  return ps_lattice_traverse_next(dag, end);
1137 }
1138 
1139 ps_latlink_t *
1141 {
1142  ps_latlink_t *next;
1143 
1144  next = ps_lattice_popq(dag);
1145  if (next == NULL)
1146  return NULL;
1147 
1148  /* Decrease fanin count for destination node and expand outgoing
1149  * edges if all incoming edges have been seen. */
1150  --next->to->info.fanin;
1151  if (next->to->info.fanin == 0) {
1152  latlink_list_t *x;
1153 
1154  if (end == NULL) end = dag->end;
1155  if (next->to == end) {
1156  /* If we have traversed all links entering the end node,
1157  * clear the queue, causing future calls to this function
1158  * to return NULL. */
1159  ps_lattice_delq(dag);
1160  return next;
1161  }
1162 
1163  /* Extend all outgoing edges. */
1164  for (x = next->to->exits; x; x = x->next)
1165  ps_lattice_pushq(dag, x->link);
1166  }
1167  return next;
1168 }
1169 
1170 ps_latlink_t *
1172 {
1173  ps_latnode_t *node;
1174  latlink_list_t *x;
1175 
1176  /* Cancel any unfinished traversal. */
1177  ps_lattice_delq(dag);
1178 
1179  /* Initialize node fanout counts and path scores. */
1180  for (node = dag->nodes; node; node = node->next) {
1181  node->info.fanin = 0;
1182  for (x = node->exits; x; x = x->next)
1183  ++node->info.fanin;
1184  }
1185 
1186  /* Initialize agenda with all entries from end. */
1187  if (end == NULL) end = dag->end;
1188  for (x = end->entries; x; x = x->next)
1189  ps_lattice_pushq(dag, x->link);
1190 
1191  /* Pull the first edge off the queue. */
1192  return ps_lattice_reverse_next(dag, start);
1193 }
1194 
1195 ps_latlink_t *
1197 {
1198  ps_latlink_t *next;
1199 
1200  next = ps_lattice_popq(dag);
1201  if (next == NULL)
1202  return NULL;
1203 
1204  /* Decrease fanout count for source node and expand incoming
1205  * edges if all incoming edges have been seen. */
1206  --next->from->info.fanin;
1207  if (next->from->info.fanin == 0) {
1208  latlink_list_t *x;
1209 
1210  if (start == NULL) start = dag->start;
1211  if (next->from == start) {
1212  /* If we have traversed all links entering the start node,
1213  * clear the queue, causing future calls to this function
1214  * to return NULL. */
1215  ps_lattice_delq(dag);
1216  return next;
1217  }
1218 
1219  /* Extend all outgoing edges. */
1220  for (x = next->from->entries; x; x = x->next)
1221  ps_lattice_pushq(dag, x->link);
1222  }
1223  return next;
1224 }
1225 
1226 /*
1227  * Find the best score from dag->start to end point of any link and
1228  * use it to update links further down the path. This is like
1229  * single-source shortest path search, except that it is done over
1230  * edges rather than nodes, which allows us to do exact trigram scoring.
1231  *
1232  * Helpfully enough, we get half of the posterior probability
1233  * calculation for free that way too. (interesting research topic: is
1234  * there a reliable Viterbi analogue to word-level Forward-Backward
1235  * like there is for state-level? Or, is it just lattice density?)
1236  */
1237 ps_latlink_t *
1238 ps_lattice_bestpath(ps_lattice_t *dag, ngram_model_t *lmset,
1239  float32 lwf, float32 ascale)
1240 {
1241  ps_search_t *search;
1242  ps_latnode_t *node;
1243  ps_latlink_t *link;
1244  ps_latlink_t *bestend;
1245  latlink_list_t *x;
1246  logmath_t *lmath;
1247  int32 bestescr;
1248 
1249  search = dag->search;
1250  lmath = dag->lmath;
1251 
1252  /* Initialize path scores for all links exiting dag->start, and
1253  * set all other scores to the minimum. Also initialize alphas to
1254  * log-zero. */
1255  for (node = dag->nodes; node; node = node->next) {
1256  for (x = node->exits; x; x = x->next) {
1257  x->link->path_scr = MAX_NEG_INT32;
1258  x->link->alpha = logmath_get_zero(lmath);
1259  }
1260  }
1261  for (x = dag->start->exits; x; x = x->next) {
1262  int32 n_used;
1263 
1264  /* Ignore filler words. */
1265  if (dict_filler_word(ps_search_dict(search), x->link->to->basewid)
1266  && x->link->to != dag->end)
1267  continue;
1268 
1269  /* Best path points to dag->start, obviously. */
1270  if (lmset)
1271  x->link->path_scr = x->link->ascr +
1272  (ngram_bg_score(lmset, x->link->to->basewid,
1273  ps_search_start_wid(search), &n_used)
1274  >> SENSCR_SHIFT)
1275  * lwf;
1276  else
1277  x->link->path_scr = x->link->ascr;
1278  x->link->best_prev = NULL;
1279  /* No predecessors for start links. */
1280  x->link->alpha = 0;
1281  }
1282 
1283  /* Traverse the edges in the graph, updating path scores. */
1284  for (link = ps_lattice_traverse_edges(dag, NULL, NULL);
1285  link; link = ps_lattice_traverse_next(dag, NULL)) {
1286  int32 bprob, n_used;
1287 
1288  /* Skip filler nodes in traversal. */
1289  if (dict_filler_word(ps_search_dict(search), link->from->basewid) && link->from != dag->start)
1290  continue;
1291  if (dict_filler_word(ps_search_dict(search), link->to->basewid) && link->to != dag->end)
1292  continue;
1293 
1294  /* Sanity check, we should not be traversing edges that
1295  * weren't previously updated, otherwise nasty overflows will result. */
1296  assert(link->path_scr != MAX_NEG_INT32);
1297 
1298  /* Calculate common bigram probability for all alphas. */
1299  if (lmset)
1300  bprob = ngram_ng_prob(lmset,
1301  link->to->basewid,
1302  &link->from->basewid, 1, &n_used);
1303  else
1304  bprob = 0;
1305  /* Add in this link's acoustic score, which was a constant
1306  factor in previous computations (if any). */
1307  link->alpha += (link->ascr << SENSCR_SHIFT) * ascale;
1308 
1309  /* Update scores for all paths exiting link->to. */
1310  for (x = link->to->exits; x; x = x->next) {
1311  int32 tscore, score;
1312 
1313  /* Skip links to filler words in update. */
1314  if (dict_filler_word(ps_search_dict(search), x->link->to->basewid)
1315  && x->link->to != dag->end)
1316  continue;
1317 
1318  /* Update alpha with sum of previous alphas. */
1319  x->link->alpha = logmath_add(lmath, x->link->alpha, link->alpha + bprob);
1320  /* Calculate trigram score for bestpath. */
1321  if (lmset)
1322  tscore = (ngram_tg_score(lmset, x->link->to->basewid,
1323  link->to->basewid,
1324  link->from->basewid, &n_used) >> SENSCR_SHIFT)
1325  * lwf;
1326  else
1327  tscore = 0;
1328  /* Update link score with maximum link score. */
1329  score = link->path_scr + tscore + x->link->ascr;
1330  if (score BETTER_THAN x->link->path_scr) {
1331  x->link->path_scr = score;
1332  x->link->best_prev = link;
1333  }
1334  }
1335  }
1336 
1337  /* Find best link entering final node, and calculate normalizer
1338  * for posterior probabilities. */
1339  bestend = NULL;
1340  bestescr = MAX_NEG_INT32;
1341 
1342  /* Normalizer is the alpha for the imaginary link exiting the
1343  final node. */
1344  dag->norm = logmath_get_zero(lmath);
1345  for (x = dag->end->entries; x; x = x->next) {
1346  int32 bprob, n_used;
1347 
1348  if (dict_filler_word(ps_search_dict(search), x->link->from->basewid))
1349  continue;
1350  if (lmset)
1351  bprob = ngram_ng_prob(lmset,
1352  x->link->to->basewid,
1353  &x->link->from->basewid, 1, &n_used);
1354  else
1355  bprob = 0;
1356  dag->norm = logmath_add(lmath, dag->norm, x->link->alpha + bprob);
1357  if (x->link->path_scr BETTER_THAN bestescr) {
1358  bestescr = x->link->path_scr;
1359  bestend = x->link;
1360  }
1361  }
1362  /* FIXME: floating point... */
1363  dag->norm += (int32)(dag->final_node_ascr << SENSCR_SHIFT) * ascale;
1364 
1365  E_INFO("Normalizer P(O) = alpha(%s:%d:%d) = %d\n",
1366  dict_wordstr(dag->search->dict, dag->end->wid),
1367  dag->end->sf, dag->end->lef,
1368  dag->norm);
1369  return bestend;
1370 }
1371 
1372 static int32
1373 ps_lattice_joint(ps_lattice_t *dag, ps_latlink_t *link, float32 ascale)
1374 {
1375  ngram_model_t *lmset;
1376  int32 jprob;
1377 
1378  /* Sort of a hack... */
1379  if (dag->search && 0 == strcmp(ps_search_name(dag->search), "ngram"))
1380  lmset = ((ngram_search_t *)dag->search)->lmset;
1381  else
1382  lmset = NULL;
1383 
1384  jprob = (dag->final_node_ascr << SENSCR_SHIFT) * ascale;
1385  while (link) {
1386  if (lmset) {
1387  int lback;
1388  /* Compute unscaled language model probability. Note that
1389  this is actually not the language model probability
1390  that corresponds to this link, but that is okay,
1391  because we are just taking the sum over all links in
1392  the best path. */
1393  jprob += ngram_ng_prob(lmset, link->to->basewid,
1394  &link->from->basewid, 1, &lback);
1395  }
1396  /* If there is no language model, we assume that the language
1397  model probability (such as it is) has been included in the
1398  link score. */
1399  jprob += (link->ascr << SENSCR_SHIFT) * ascale;
1400  link = link->best_prev;
1401  }
1402 
1403  E_INFO("Joint P(O,S) = %d P(S|O) = %d\n", jprob, jprob - dag->norm);
1404  return jprob;
1405 }
1406 
1407 int32
1408 ps_lattice_posterior(ps_lattice_t *dag, ngram_model_t *lmset,
1409  float32 ascale)
1410 {
1411  ps_search_t *search;
1412  logmath_t *lmath;
1413  ps_latnode_t *node;
1414  ps_latlink_t *link;
1415  latlink_list_t *x;
1416  ps_latlink_t *bestend;
1417  int32 bestescr;
1418 
1419  search = dag->search;
1420  lmath = dag->lmath;
1421 
1422  /* Reset all betas to zero. */
1423  for (node = dag->nodes; node; node = node->next) {
1424  for (x = node->exits; x; x = x->next) {
1425  x->link->beta = logmath_get_zero(lmath);
1426  }
1427  }
1428 
1429  bestend = NULL;
1430  bestescr = MAX_NEG_INT32;
1431  /* Accumulate backward probabilities for all links. */
1432  for (link = ps_lattice_reverse_edges(dag, NULL, NULL);
1433  link; link = ps_lattice_reverse_next(dag, NULL)) {
1434  int32 bprob, n_used;
1435 
1436  /* Skip filler nodes in traversal. */
1437  if (dict_filler_word(ps_search_dict(search), link->from->basewid) && link->from != dag->start)
1438  continue;
1439  if (dict_filler_word(ps_search_dict(search), link->to->basewid) && link->to != dag->end)
1440  continue;
1441 
1442  /* Calculate LM probability. */
1443  if (lmset)
1444  bprob = ngram_ng_prob(lmset, link->to->basewid,
1445  &link->from->basewid, 1, &n_used);
1446  else
1447  bprob = 0;
1448 
1449  if (link->to == dag->end) {
1450  /* Track the best path - we will backtrace in order to
1451  calculate the unscaled joint probability for sentence
1452  posterior. */
1453  if (link->path_scr BETTER_THAN bestescr) {
1454  bestescr = link->path_scr;
1455  bestend = link;
1456  }
1457  /* Imaginary exit link from final node has beta = 1.0 */
1458  link->beta = bprob + (dag->final_node_ascr << SENSCR_SHIFT) * ascale;
1459  }
1460  else {
1461  /* Update beta from all outgoing betas. */
1462  for (x = link->to->exits; x; x = x->next) {
1463  if (dict_filler_word(ps_search_dict(search), x->link->to->basewid) && x->link->to != dag->end)
1464  continue;
1465  link->beta = logmath_add(lmath, link->beta,
1466  x->link->beta + bprob
1467  + (x->link->ascr << SENSCR_SHIFT) * ascale);
1468  }
1469  }
1470  }
1471 
1472  /* Return P(S|O) = P(O,S)/P(O) */
1473  return ps_lattice_joint(dag, bestend, ascale) - dag->norm;
1474 }
1475 
1476 int32
1478 {
1479  ps_latlink_t *link;
1480  int npruned = 0;
1481 
1482  for (link = ps_lattice_traverse_edges(dag, dag->start, dag->end);
1483  link; link = ps_lattice_traverse_next(dag, dag->end)) {
1484  link->from->reachable = FALSE;
1485  if (link->alpha + link->beta - dag->norm < beam) {
1486  latlink_list_t *x, *tmp, *next;
1487  tmp = NULL;
1488  for (x = link->from->exits; x; x = next) {
1489  next = x->next;
1490  if (x->link == link) {
1491  listelem_free(dag->latlink_list_alloc, x);
1492  }
1493  else {
1494  x->next = tmp;
1495  tmp = x;
1496  }
1497  }
1498  link->from->exits = tmp;
1499  tmp = NULL;
1500  for (x = link->to->entries; x; x = next) {
1501  next = x->next;
1502  if (x->link == link) {
1503  listelem_free(dag->latlink_list_alloc, x);
1504  }
1505  else {
1506  x->next = tmp;
1507  tmp = x;
1508  }
1509  }
1510  link->to->entries = tmp;
1511  listelem_free(dag->latlink_alloc, link);
1512  ++npruned;
1513  }
1514  }
1515  dag_mark_reachable(dag->end);
1517  return npruned;
1518 }
1519 
1520 
1521 /* Parameters to prune n-best alternatives search */
1522 #define MAX_PATHS 500 /* Max allowed active paths at any time */
1523 #define MAX_HYP_TRIES 10000
1524 
1525 /*
1526  * For each node in any path between from and end of utt, find the
1527  * best score from "from".sf to end of utt. (NOTE: Uses bigram probs;
1528  * this is an estimate of the best score from "from".) (NOTE #2: yes,
1529  * this is the "heuristic score" used in A* search)
1530  */
1531 static int32
1532 best_rem_score(ps_astar_t *nbest, ps_latnode_t * from)
1533 {
1534  latlink_list_t *x;
1535  int32 bestscore, score;
1536 
1537  if (from->info.rem_score <= 0)
1538  return (from->info.rem_score);
1539 
1540  /* Best score from "from" to end of utt not known; compute from successors */
1541  bestscore = WORST_SCORE;
1542  for (x = from->exits; x; x = x->next) {
1543  int32 n_used;
1544 
1545  score = best_rem_score(nbest, x->link->to);
1546  score += x->link->ascr;
1547  if (nbest->lmset)
1548  score += (ngram_bg_score(nbest->lmset, x->link->to->basewid,
1549  from->basewid, &n_used) >> SENSCR_SHIFT)
1550  * nbest->lwf;
1551  if (score BETTER_THAN bestscore)
1552  bestscore = score;
1553  }
1554  from->info.rem_score = bestscore;
1555 
1556  return bestscore;
1557 }
1558 
1559 /*
1560  * Insert newpath in sorted (by path score) list of paths. But if newpath is
1561  * too far down the list, drop it (FIXME: necessary?)
1562  * total_score = path score (newpath) + rem_score to end of utt.
1563  */
1564 static void
1565 path_insert(ps_astar_t *nbest, ps_latpath_t *newpath, int32 total_score)
1566 {
1567  ps_latpath_t *prev, *p;
1568  int32 i;
1569 
1570  prev = NULL;
1571  for (i = 0, p = nbest->path_list; (i < MAX_PATHS) && p; p = p->next, i++) {
1572  if ((p->score + p->node->info.rem_score) < total_score)
1573  break;
1574  prev = p;
1575  }
1576 
1577  /* newpath should be inserted between prev and p */
1578  if (i < MAX_PATHS) {
1579  /* Insert new partial hyp */
1580  newpath->next = p;
1581  if (!prev)
1582  nbest->path_list = newpath;
1583  else
1584  prev->next = newpath;
1585  if (!p)
1586  nbest->path_tail = newpath;
1587 
1588  nbest->n_path++;
1589  nbest->n_hyp_insert++;
1590  nbest->insert_depth += i;
1591  }
1592  else {
1593  /* newpath score too low; reject it and also prune paths beyond MAX_PATHS */
1594  nbest->path_tail = prev;
1595  prev->next = NULL;
1596  nbest->n_path = MAX_PATHS;
1597  listelem_free(nbest->latpath_alloc, newpath);
1598 
1599  nbest->n_hyp_reject++;
1600  for (; p; p = newpath) {
1601  newpath = p->next;
1602  listelem_free(nbest->latpath_alloc, p);
1603  nbest->n_hyp_reject++;
1604  }
1605  }
1606 }
1607 
1608 /* Find all possible extensions to given partial path */
1609 static void
1610 path_extend(ps_astar_t *nbest, ps_latpath_t * path)
1611 {
1612  latlink_list_t *x;
1613  ps_latpath_t *newpath;
1614  int32 total_score, tail_score;
1615 
1616  /* Consider all successors of path->node */
1617  for (x = path->node->exits; x; x = x->next) {
1618  int32 n_used;
1619 
1620  /* Skip successor if no path from it reaches the final node */
1621  if (x->link->to->info.rem_score <= WORST_SCORE)
1622  continue;
1623 
1624  /* Create path extension and compute exact score for this extension */
1625  newpath = listelem_malloc(nbest->latpath_alloc);
1626  newpath->node = x->link->to;
1627  newpath->parent = path;
1628  newpath->score = path->score + x->link->ascr;
1629  if (nbest->lmset) {
1630  if (path->parent) {
1631  newpath->score += nbest->lwf
1632  * (ngram_tg_score(nbest->lmset, newpath->node->basewid,
1633  path->node->basewid,
1634  path->parent->node->basewid, &n_used)
1635  >> SENSCR_SHIFT);
1636  }
1637  else
1638  newpath->score += nbest->lwf
1639  * (ngram_bg_score(nbest->lmset, newpath->node->basewid,
1640  path->node->basewid, &n_used)
1641  >> SENSCR_SHIFT);
1642  }
1643 
1644  /* Insert new partial path hypothesis into sorted path_list */
1645  nbest->n_hyp_tried++;
1646  total_score = newpath->score + newpath->node->info.rem_score;
1647 
1648  /* First see if hyp would be worse than the worst */
1649  if (nbest->n_path >= MAX_PATHS) {
1650  tail_score =
1651  nbest->path_tail->score
1652  + nbest->path_tail->node->info.rem_score;
1653  if (total_score < tail_score) {
1654  listelem_free(nbest->latpath_alloc, newpath);
1655  nbest->n_hyp_reject++;
1656  continue;
1657  }
1658  }
1659 
1660  path_insert(nbest, newpath, total_score);
1661  }
1662 }
1663 
1664 ps_astar_t *
1666  ngram_model_t *lmset,
1667  float32 lwf,
1668  int sf, int ef,
1669  int w1, int w2)
1670 {
1671  ps_astar_t *nbest;
1672  ps_latnode_t *node;
1673 
1674  nbest = ckd_calloc(1, sizeof(*nbest));
1675  nbest->dag = dag;
1676  nbest->lmset = lmset;
1677  nbest->lwf = lwf;
1678  nbest->sf = sf;
1679  if (ef < 0)
1680  nbest->ef = dag->n_frames + 1;
1681  else
1682  nbest->ef = ef;
1683  nbest->w1 = w1;
1684  nbest->w2 = w2;
1685  nbest->latpath_alloc = listelem_alloc_init(sizeof(ps_latpath_t));
1686 
1687  /* Initialize rem_score (A* heuristic) to default values */
1688  for (node = dag->nodes; node; node = node->next) {
1689  if (node == dag->end)
1690  node->info.rem_score = 0;
1691  else if (node->exits == NULL)
1692  node->info.rem_score = WORST_SCORE;
1693  else
1694  node->info.rem_score = 1; /* +ve => unknown value */
1695  }
1696 
1697  /* Create initial partial hypotheses list consisting of nodes starting at sf */
1698  nbest->path_list = nbest->path_tail = NULL;
1699  for (node = dag->nodes; node; node = node->next) {
1700  if (node->sf == sf) {
1701  ps_latpath_t *path;
1702  int32 n_used;
1703 
1704  best_rem_score(nbest, node);
1705  path = listelem_malloc(nbest->latpath_alloc);
1706  path->node = node;
1707  path->parent = NULL;
1708  if (nbest->lmset)
1709  path->score = nbest->lwf *
1710  ((w1 < 0)
1711  ? ngram_bg_score(nbest->lmset, node->basewid, w2, &n_used)
1712  : ngram_tg_score(nbest->lmset, node->basewid, w2, w1, &n_used));
1713  else
1714  path->score = 0;
1715  path->score >>= SENSCR_SHIFT;
1716  path_insert(nbest, path, path->score + node->info.rem_score);
1717  }
1718  }
1719 
1720  return nbest;
1721 }
1722 
1723 ps_latpath_t *
1725 {
1726  ps_lattice_t *dag;
1727 
1728  dag = nbest->dag;
1729 
1730  /* Pop the top (best) partial hypothesis */
1731  while ((nbest->top = nbest->path_list) != NULL) {
1732  nbest->path_list = nbest->path_list->next;
1733  if (nbest->top == nbest->path_tail)
1734  nbest->path_tail = NULL;
1735  nbest->n_path--;
1736 
1737  /* Complete hypothesis? */
1738  if ((nbest->top->node->sf >= nbest->ef)
1739  || ((nbest->top->node == dag->end) &&
1740  (nbest->ef > dag->end->sf))) {
1741  /* FIXME: Verify that it is non-empty. Also we may want
1742  * to verify that it is actually distinct from other
1743  * paths, since often this is not the case*/
1744  return nbest->top;
1745  }
1746  else {
1747  if (nbest->top->node->fef < nbest->ef)
1748  path_extend(nbest, nbest->top);
1749  }
1750  }
1751 
1752  /* Did not find any more paths to extend. */
1753  return NULL;
1754 }
1755 
1756 char const *
1758 {
1759  ps_search_t *search;
1760  ps_latpath_t *p;
1761  size_t len;
1762  char *c;
1763  char *hyp;
1764 
1765  search = nbest->dag->search;
1766 
1767  /* Backtrace once to get hypothesis length. */
1768  len = 0;
1769  for (p = path; p; p = p->parent) {
1770  if (dict_real_word(ps_search_dict(search), p->node->basewid)) {
1771  char *wstr = dict_wordstr(ps_search_dict(search), p->node->basewid);
1772  if (wstr != NULL)
1773  len += strlen(wstr) + 1;
1774  }
1775  }
1776 
1777  if (len == 0) {
1778  return NULL;
1779  }
1780 
1781  /* Backtrace again to construct hypothesis string. */
1782  hyp = ckd_calloc(1, len);
1783  c = hyp + len - 1;
1784  for (p = path; p; p = p->parent) {
1785  if (dict_real_word(ps_search_dict(search), p->node->basewid)) {
1786  char *wstr = dict_wordstr(ps_search_dict(search), p->node->basewid);
1787  if (wstr != NULL) {
1788  len = strlen(wstr);
1789  c -= len;
1790  memcpy(c, wstr, len);
1791  if (c > hyp) {
1792  --c;
1793  *c = ' ';
1794  }
1795  }
1796  }
1797  }
1798 
1799  nbest->hyps = glist_add_ptr(nbest->hyps, hyp);
1800  return hyp;
1801 }
1802 
1803 static void
1804 ps_astar_node2itor(astar_seg_t *itor)
1805 {
1806  ps_seg_t *seg = (ps_seg_t *)itor;
1807  ps_latnode_t *node;
1808 
1809  assert(itor->cur < itor->n_nodes);
1810  node = itor->nodes[itor->cur];
1811  if (itor->cur == itor->n_nodes - 1)
1812  seg->ef = node->lef;
1813  else
1814  seg->ef = itor->nodes[itor->cur + 1]->sf - 1;
1815  seg->word = dict_wordstr(ps_search_dict(seg->search), node->wid);
1816  seg->sf = node->sf;
1817  seg->prob = 0; /* FIXME: implement forward-backward */
1818 }
1819 
1820 static void
1821 ps_astar_seg_free(ps_seg_t *seg)
1822 {
1823  astar_seg_t *itor = (astar_seg_t *)seg;
1824  ckd_free(itor->nodes);
1825  ckd_free(itor);
1826 }
1827 
1828 static ps_seg_t *
1829 ps_astar_seg_next(ps_seg_t *seg)
1830 {
1831  astar_seg_t *itor = (astar_seg_t *)seg;
1832 
1833  ++itor->cur;
1834  if (itor->cur == itor->n_nodes) {
1835  ps_astar_seg_free(seg);
1836  return NULL;
1837  }
1838  else {
1839  ps_astar_node2itor(itor);
1840  }
1841 
1842  return seg;
1843 }
1844 
1845 static ps_segfuncs_t ps_astar_segfuncs = {
1846  /* seg_next */ ps_astar_seg_next,
1847  /* seg_free */ ps_astar_seg_free
1848 };
1849 
1850 ps_seg_t *
1851 ps_astar_seg_iter(ps_astar_t *astar, ps_latpath_t *path, float32 lwf)
1852 {
1853  astar_seg_t *itor;
1854  ps_latpath_t *p;
1855  int cur;
1856 
1857  /* Backtrace and make an iterator, this should look familiar by now. */
1858  itor = ckd_calloc(1, sizeof(*itor));
1859  itor->base.vt = &ps_astar_segfuncs;
1860  itor->base.search = astar->dag->search;
1861  itor->base.lwf = lwf;
1862  itor->n_nodes = itor->cur = 0;
1863  for (p = path; p; p = p->parent) {
1864  ++itor->n_nodes;
1865  }
1866  itor->nodes = ckd_calloc(itor->n_nodes, sizeof(*itor->nodes));
1867  cur = itor->n_nodes - 1;
1868  for (p = path; p; p = p->parent) {
1869  itor->nodes[cur] = p->node;
1870  --cur;
1871  }
1872 
1873  ps_astar_node2itor(itor);
1874  return (ps_seg_t *)itor;
1875 }
1876 
1877 void
1879 {
1880  gnode_t *gn;
1881 
1882  /* Free all hyps. */
1883  for (gn = nbest->hyps; gn; gn = gnode_next(gn)) {
1884  ckd_free(gnode_ptr(gn));
1885  }
1886  glist_free(nbest->hyps);
1887  /* Free all paths. */
1888  listelem_alloc_free(nbest->latpath_alloc);
1889  /* Free the Henge. */
1890  ckd_free(nbest);
1891 }
dict_t * dict_init(cmd_ln_t *config, bin_mdef_t *mdef)
Initialize a new dictionary.
Definition: dict.c:251
ps_latlink_iter_t * ps_latnode_exits(ps_latnode_t *node)
Iterate over exits from this node.
Definition: ps_lattice.c:772
Internal implementation of PocketSphinx decoder.
int32 ps_lattice_posterior(ps_lattice_t *dag, ngram_model_t *lmset, float32 ascale)
Calculate link posterior probabilities on a word graph.
Definition: ps_lattice.c:1408
ps_latlink_t * ps_lattice_traverse_edges(ps_lattice_t *dag, ps_latnode_t *start, ps_latnode_t *end)
Start a forward traversal of edges in a word graph.
Definition: ps_lattice.c:1114
int16 reachable
From.
void ps_astar_finish(ps_astar_t *nbest)
Finish N-best search, releasing resources associated with it.
Definition: ps_lattice.c:1878
ps_latpath_t * ps_astar_next(ps_astar_t *nbest)
Find next best hypothesis of A* on a word graph.
Definition: ps_lattice.c:1724
void ps_lattice_delq(ps_lattice_t *dag)
Clear and reset the traversal queue.
Definition: ps_lattice.c:1106
char const * ps_astar_hyp(ps_astar_t *nbest, ps_latpath_t *path)
Get hypothesis string from A* search.
Definition: ps_lattice.c:1757
listelem_alloc_t * latlink_list_alloc
List element allocator for this DAG.
ps_latlink_t * ps_lattice_bestpath(ps_lattice_t *dag, ngram_model_t *lmset, float32 lwf, float32 ascale)
Do N-Gram based best-path search on a word graph.
Definition: ps_lattice.c:1238
Base structure for search module.
ps_seg_t * ps_lattice_seg_iter(ps_lattice_t *dag, ps_latlink_t *link, float32 lwf)
Get hypothesis segmentation iterator after bestpath search.
Definition: ps_lattice.c:1029
logmath_t * lmath
Log-math object.
dict_t * dict
Pronunciation dictionary.
int32 fanin
Number nodes with links to this node.
POCKETSPHINX_EXPORT s3wid_t dict_wordid(dict_t *d, const char *word)
Return word id for given word string if present.
Definition: dict.c:382
int32 lef
Last end frame.
ps_latnode_iter_t * ps_latnode_iter(ps_lattice_t *dag)
Start iterating over nodes in the lattice.
Definition: ps_lattice.c:711
acmod_t * acmod
Acoustic model.
int dict_free(dict_t *d)
Release a pointer to a dictionary.
Definition: dict.c:451
int32 id
Unique id for this node.
ps_seg_t base
Base structure.
char const * ps_latlink_baseword(ps_lattice_t *dag, ps_latlink_t *link)
Get base word string from a lattice link.
Definition: ps_lattice.c:831
glist_t hyps
List of hypothesis strings.
int ps_lattice_free(ps_lattice_t *dag)
Free a lattice.
Definition: ps_lattice.c:688
ps_latnode_t * start
Starting node.
ps_segfuncs_t * vt
V-table of seg methods.
logmath_t * lmath
Log-math computation.
Definition: acmod.h:151
ps_latnode_t * ps_latnode_iter_node(ps_latnode_iter_t *itor)
Get node from iterator.
Definition: ps_lattice.c:729
ps_latlink_t ** links
Array of lattice links.
int32 lscr
Language model score.
Operations on dictionary.
latlink_list_t * q_head
Queue of links for traversal.
ps_latnode_t * ps_latlink_nodes(ps_latlink_t *link, ps_latnode_t **out_src)
Get destination and source nodes from a lattice link.
Definition: ps_lattice.c:816
ps_search_t * search
Search (if generated by search).
#define BAD_S3WID
Dictionary word id.
Definition: s3types.h:136
ps_lattice_t * ps_lattice_retain(ps_lattice_t *dag)
Retain a lattice.
Definition: ps_lattice.c:681
frame_idx_t n_frames
Number of frames for this utterance.
ps_latlink_t * ps_lattice_reverse_next(ps_lattice_t *dag, ps_latnode_t *start)
Get the next link in reverse traversal.
Definition: ps_lattice.c:1196
Word graph search implementation.
void ps_lattice_link(ps_lattice_t *dag, ps_latnode_t *from, ps_latnode_t *to, int32 score, int32 ef)
Create a directed link between "from" and "to" nodes, but if a link already exists, choose one with the best link_scr.
Definition: ps_lattice.c:65
int32 frate
Frame rate.
ps_latnode_t * nodes
List of all nodes.
listelem_alloc_t * latnode_alloc
Node allocator for this DAG.
A* search structure.
ps_latlink_t * ps_latlink_pred(ps_latlink_t *link)
Get predecessor link in best path.
Definition: ps_lattice.c:839
int32 prob
Log posterior probability.
int ps_latlink_times(ps_latlink_t *link, int16 *out_sf)
Get start and end times from a lattice link.
Definition: ps_lattice.c:802
dict_t * dict_retain(dict_t *d)
Retain a pointer to an dict_t.
Definition: dict.c:444
latlink_list_t * entries
Links into this node.
struct ps_latnode_s * alt
Node with alternate pronunciation for this word.
char const * ps_latnode_word(ps_lattice_t *dag, ps_latnode_t *node)
Get word string for this node.
Definition: ps_lattice.c:743
char const * ps_latnode_baseword(ps_lattice_t *dag, ps_latnode_t *node)
Get base word string for this node.
Definition: ps_lattice.c:749
char const * word
Word string (pointer into dictionary hash)
ps_search_t * search
Search object from whence this came.
int32 final_node_ascr
Acoustic score of implicit link exiting final node.
ps_search_t * search
Currently active search module.
ps_latlink_t * ps_lattice_traverse_next(ps_lattice_t *dag, ps_latnode_t *end)
Get the next link in forward traversal.
Definition: ps_lattice.c:1140
ps_latlink_t * ps_lattice_popq(ps_lattice_t *dag)
Remove an edge from the traversal queue.
Definition: ps_lattice.c:1089
ps_latlink_iter_t * ps_latlink_iter_next(ps_latlink_iter_t *itor)
Get next link from a lattice link iterator.
Definition: ps_lattice.c:784
struct ps_latpath_s * next
Pointer to next path in list of paths.
logmath_t * lmath
Log math computation.
void ps_lattice_pushq(ps_lattice_t *dag, ps_latlink_t *link)
Add an edge to the traversal queue.
Definition: ps_lattice.c:1077
int32 ps_latnode_prob(ps_lattice_t *dag, ps_latnode_t *node, ps_latlink_t **out_link)
Get best posterior probability and associated acoustic score from a lattice node. ...
Definition: ps_lattice.c:755
N-Gram search module structure.
Definition: ngram_search.h:197
struct ps_latpath_s * parent
Previous element in this path.
int32 ps_lattice_write_htk(ps_lattice_t *dag, char const *filename)
Write a lattice to disk in HTK format.
Definition: ps_lattice.c:293
Decoder object.
void ps_lattice_delete_unreachable(ps_lattice_t *dag)
Remove nodes marked as unreachable.
Definition: ps_lattice.c:197
int16 n_links
Number of lattice links.
ps_latnode_t * end
Ending node.
frame_idx_t sf
Start frame.
logmath_t * ps_lattice_get_logmath(ps_lattice_t *dag)
Get the log-math computation object for this lattice.
Definition: ps_lattice.c:705
latlink_list_t * exits
Links out of this node.
int32 silence
Silence word ID.
#define WORST_SCORE
Large "bad" score.
Definition: hmm.h:88
N-Gram based multi-pass search ("FBS")
frame_idx_t ef
End frame.
int32 ascr
Acoustic score.
cmd_ln_t * config
Configuration.
void ps_latlink_iter_free(ps_latlink_iter_t *itor)
Stop iterating over links.
Definition: ps_lattice.c:790
int dict_filler_word(dict_t *d, s3wid_t w)
Return 1 if w is a filler word, 0 if not.
Definition: dict.c:396
dict_t * dict
Dictionary for this DAG.
listelem_alloc_t * latpath_alloc
Path allocator for N-best search.
listelem_alloc_t * latlink_alloc
Link allocator for this DAG.
int32 wid
Dictionary word id.
int32 node_id
Node from fsg model, used to map lattice back to model.
#define SENSCR_SHIFT
Shift count for senone scores.
Definition: hmm.h:77
ps_latlink_iter_t * ps_latnode_entries(ps_latnode_t *node)
Iterate over entries to this node.
Definition: ps_lattice.c:778
char const * ps_lattice_hyp(ps_lattice_t *dag, ps_latlink_t *link)
Get hypothesis string after bestpath search.
Definition: ps_lattice.c:853
POCKETSPHINX_EXPORT int dict_real_word(dict_t *d, s3wid_t w)
Test if w is a "real" word, i.e.
Definition: dict.c:410
Word graph structure used in bestpath/nbest search.
#define WORSE_THAN
Is one score worse than another?
Definition: hmm.h:104
ps_astar_t * ps_astar_start(ps_lattice_t *dag, ngram_model_t *lmset, float32 lwf, int sf, int ef, int w1, int w2)
Begin N-Gram based A* search on a word graph.
Definition: ps_lattice.c:1665
int32 norm
Normalizer for posterior probabilities.
Segmentation "iterator" for A* search results.
int refcount
Reference count.
latlink_list_t * q_tail
Queue of links for traversal.
int ps_lattice_n_frames(ps_lattice_t *dag)
Get the number of frames in the lattice.
Definition: ps_lattice.c:656
int16 cur
Current position in bpidx.
Partial path structure used in N-best (A*) search.
ps_latlink_t * ps_latlink_iter_link(ps_latlink_iter_t *itor)
Get link from iterator.
Definition: ps_lattice.c:796
Segmentation "iterator" for backpointer table results.
#define BETTER_THAN
Is one score better than another?
Definition: hmm.h:99
dict_t * dict
Pronunciation dictionary.
int32 fef
First end frame.
int32 norm
Normalizer for posterior probabilities.
int32 lback
Language model backoff.
int32 basewid
Dictionary base word id.
latlink_list_t * latlink_list_new(ps_lattice_t *dag, ps_latlink_t *link, latlink_list_t *next)
Create a new lattice link element.
Definition: ps_lattice.c:1065
int ps_latnode_times(ps_latnode_t *node, int16 *out_fef, int16 *out_lef)
Get start and end time range for a node.
Definition: ps_lattice.c:735
ps_latlink_t * ps_lattice_reverse_edges(ps_lattice_t *dag, ps_latnode_t *start, ps_latnode_t *end)
Start a reverse traversal of edges in a word graph.
Definition: ps_lattice.c:1171
ps_latnode_iter_t * ps_latnode_iter_next(ps_latnode_iter_t *itor)
Move to next node in iteration.
Definition: ps_lattice.c:717
ps_lattice_t * ps_lattice_init_search(ps_search_t *search, int n_frame)
Construct an empty word graph with reference to a search structure.
Definition: ps_lattice.c:662
ps_lattice_t * ps_lattice_read(ps_decoder_t *ps, char const *file)
Read a lattice from a file on disk.
Definition: ps_lattice.c:410
s3wid_t dict_add_word(dict_t *d, char const *word, s3cipid_t const *p, int32 np)
Add a word with the given ciphone pronunciation list to the dictionary.
Definition: dict.c:80
char const * ps_latlink_word(ps_lattice_t *dag, ps_latlink_t *link)
Get word string from a lattice link.
Definition: ps_lattice.c:823
struct ps_latnode_s * next
Next node in DAG (no ordering implied)
void ps_lattice_bypass_fillers(ps_lattice_t *dag, int32 silpen, int32 fillpen)
Bypass filler words.
Definition: ps_lattice.c:106
int32 rem_score
Estimated best score from node.sf to end.
void ps_latnode_iter_free(ps_latnode_iter_t *itor)
Stop iterating over nodes.
Definition: ps_lattice.c:723
ps_seg_t * ps_astar_seg_iter(ps_astar_t *astar, ps_latpath_t *path, float32 lwf)
Get hypothesis segmentation from A* search.
Definition: ps_lattice.c:1851
char * hyp_str
Current hypothesis string.
Base structure for hypothesis segmentation iterator.
cmd_ln_t * config
Configuration.
ps_latnode_t * node
Node ending this path.
int32 score
Exact score from start node up to node->sf.
int32 ps_lattice_posterior_prune(ps_lattice_t *dag, int32 beam)
Prune all links (and associated nodes) below a certain posterior probability.
Definition: ps_lattice.c:1477
int32 ps_latlink_prob(ps_lattice_t *dag, ps_latlink_t *link, int32 *out_ascr)
Get acoustic score and posterior probability from a lattice link.
Definition: ps_lattice.c:845
float32 lwf
Language weight factor (for second-pass searches)
frame_idx_t sf
Start frame.
int32 ps_lattice_write(ps_lattice_t *dag, char const *filename)
Write a lattice to disk.
Definition: ps_lattice.c:233
int32 dict_word2basestr(char *word)
If the given word contains a trailing "(....)" (i.e., a Sphinx-II style alternative pronunciation spe...
Definition: dict.c:425