PocketSphinx  0.6
bin_mdef.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2005 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 /*********************************************************************
38  *
39  * File: bin_mdef.c
40  *
41  * Description:
42  * Binary format model definition files, with support for
43  * heterogeneous topologies and variable-size N-phones
44  *
45  * Author:
46  * David Huggins-Daines <dhuggins@cs.cmu.edu>
47  *********************************************************************/
48 
49 /* System headers. */
50 #include <stdio.h>
51 #include <string.h>
52 #include <assert.h>
53 
54 /* SphinxBase headers. */
55 #include <sphinxbase/prim_type.h>
56 #include <sphinxbase/ckd_alloc.h>
57 #include <sphinxbase/byteorder.h>
58 #include <sphinxbase/case.h>
59 #include <sphinxbase/err.h>
60 
61 /* Local headers. */
62 #include "mdef.h"
63 #include "bin_mdef.h"
64 
65 bin_mdef_t *
66 bin_mdef_read_text(cmd_ln_t *config, const char *filename)
67 {
68  bin_mdef_t *bmdef;
69  mdef_t *mdef;
70  int i, nodes, ci_idx, lc_idx, rc_idx;
71  int nchars;
72 
73  if ((mdef = mdef_init((char *) filename, TRUE)) == NULL)
74  return NULL;
75 
76  /* Enforce some limits. */
77  if (mdef->n_sen > BAD_SENID) {
78  E_ERROR("Number of senones exceeds limit: %d > %d\n",
79  mdef->n_sen, BAD_SENID);
80  mdef_free(mdef);
81  return NULL;
82  }
83  if (mdef->n_sseq > BAD_SSID) {
84  E_ERROR("Number of senone sequences exceeds limit: %d > %d\n",
85  mdef->n_sseq, BAD_SSID);
86  mdef_free(mdef);
87  return NULL;
88  }
89  /* We use uint8 for ciphones */
90  if (mdef->n_ciphone > 255) {
91  E_ERROR("Number of phones exceeds limit: %d > %d\n",
92  mdef->n_ciphone, 255);
93  mdef_free(mdef);
94  return NULL;
95  }
96 
97  bmdef = ckd_calloc(1, sizeof(*bmdef));
98  bmdef->refcnt = 1;
99 
100  /* Easy stuff. The mdef.c code has done the heavy lifting for us. */
101  bmdef->n_ciphone = mdef->n_ciphone;
102  bmdef->n_phone = mdef->n_phone;
103  bmdef->n_emit_state = mdef->n_emit_state;
104  bmdef->n_ci_sen = mdef->n_ci_sen;
105  bmdef->n_sen = mdef->n_sen;
106  bmdef->n_tmat = mdef->n_tmat;
107  bmdef->n_sseq = mdef->n_sseq;
108  bmdef->sseq = mdef->sseq;
109  bmdef->cd2cisen = mdef->cd2cisen;
110  bmdef->sen2cimap = mdef->sen2cimap;
111  bmdef->n_ctx = 3; /* Triphones only. */
112  bmdef->sil = mdef->sil;
113  mdef->sseq = NULL; /* We are taking over this one. */
114  mdef->cd2cisen = NULL; /* And this one. */
115  mdef->sen2cimap = NULL; /* And this one. */
116 
117  /* Get the phone names. If they are not sorted
118  * ASCII-betically then we are in a world of hurt and
119  * therefore will simply refuse to continue. */
120  bmdef->ciname = ckd_calloc(bmdef->n_ciphone, sizeof(*bmdef->ciname));
121  nchars = 0;
122  for (i = 0; i < bmdef->n_ciphone; ++i)
123  nchars += strlen(mdef->ciphone[i].name) + 1;
124  bmdef->ciname[0] = ckd_calloc(nchars, 1);
125  strcpy(bmdef->ciname[0], mdef->ciphone[0].name);
126  for (i = 1; i < bmdef->n_ciphone; ++i) {
127  bmdef->ciname[i] =
128  bmdef->ciname[i - 1] + strlen(bmdef->ciname[i - 1]) + 1;
129  strcpy(bmdef->ciname[i], mdef->ciphone[i].name);
130  if (i > 0 && strcmp(bmdef->ciname[i - 1], bmdef->ciname[i]) > 0) {
131  /* FIXME: there should be a solution to this, actually. */
132  E_ERROR("Phone names are not in sorted order, sorry.");
133  bin_mdef_free(bmdef);
134  return NULL;
135  }
136  }
137 
138  /* Copy over phone information. */
139  bmdef->phone = ckd_calloc(bmdef->n_phone, sizeof(*bmdef->phone));
140  for (i = 0; i < mdef->n_phone; ++i) {
141  bmdef->phone[i].ssid = mdef->phone[i].ssid;
142  bmdef->phone[i].tmat = mdef->phone[i].tmat;
143  if (i < bmdef->n_ciphone) {
144  bmdef->phone[i].info.ci.filler = mdef->ciphone[i].filler;
145  }
146  else {
147  bmdef->phone[i].info.cd.wpos = mdef->phone[i].wpos;
148  bmdef->phone[i].info.cd.ctx[0] = mdef->phone[i].ci;
149  bmdef->phone[i].info.cd.ctx[1] = mdef->phone[i].lc;
150  bmdef->phone[i].info.cd.ctx[2] = mdef->phone[i].rc;
151  }
152  }
153 
154  /* Walk the wpos_ci_lclist once to find the total number of
155  * nodes and the starting locations for each level. */
156  nodes = lc_idx = ci_idx = rc_idx = 0;
157  for (i = 0; i < N_WORD_POSN; ++i) {
158  int j;
159  for (j = 0; j < mdef->n_ciphone; ++j) {
160  ph_lc_t *lc;
161 
162  for (lc = mdef->wpos_ci_lclist[i][j]; lc; lc = lc->next) {
163  ph_rc_t *rc;
164  for (rc = lc->rclist; rc; rc = rc->next) {
165  ++nodes; /* RC node */
166  }
167  ++nodes; /* LC node */
168  ++rc_idx; /* Start of RC nodes (after LC nodes) */
169  }
170  ++nodes; /* CI node */
171  ++lc_idx; /* Start of LC nodes (after CI nodes) */
172  ++rc_idx; /* Start of RC nodes (after CI and LC nodes) */
173  }
174  ++nodes; /* wpos node */
175  ++ci_idx; /* Start of CI nodes (after wpos nodes) */
176  ++lc_idx; /* Start of LC nodes (after CI nodes) */
177  ++rc_idx; /* STart of RC nodes (after wpos, CI, and LC nodes) */
178  }
179  E_INFO("Allocating %d * %d bytes (%d KiB) for CD tree\n",
180  nodes, sizeof(*bmdef->cd_tree),
181  nodes * sizeof(*bmdef->cd_tree) / 1024);
182  bmdef->n_cd_tree = nodes;
183  bmdef->cd_tree = ckd_calloc(nodes, sizeof(*bmdef->cd_tree));
184  for (i = 0; i < N_WORD_POSN; ++i) {
185  int j;
186 
187  bmdef->cd_tree[i].ctx = i;
188  bmdef->cd_tree[i].n_down = mdef->n_ciphone;
189  bmdef->cd_tree[i].c.down = ci_idx;
190 #if 0
191  E_INFO("%d => %c (%d@%d)\n",
192  i, (WPOS_NAME)[i],
193  bmdef->cd_tree[i].n_down, bmdef->cd_tree[i].c.down);
194 #endif
195 
196  /* Now we can build the rest of the tree. */
197  for (j = 0; j < mdef->n_ciphone; ++j) {
198  ph_lc_t *lc;
199 
200  bmdef->cd_tree[ci_idx].ctx = j;
201  bmdef->cd_tree[ci_idx].c.down = lc_idx;
202  for (lc = mdef->wpos_ci_lclist[i][j]; lc; lc = lc->next) {
203  ph_rc_t *rc;
204 
205  bmdef->cd_tree[lc_idx].ctx = lc->lc;
206  bmdef->cd_tree[lc_idx].c.down = rc_idx;
207  for (rc = lc->rclist; rc; rc = rc->next) {
208  bmdef->cd_tree[rc_idx].ctx = rc->rc;
209  bmdef->cd_tree[rc_idx].n_down = 0;
210  bmdef->cd_tree[rc_idx].c.pid = rc->pid;
211 #if 0
212  E_INFO("%d => %s %s %s %c (%d@%d)\n",
213  rc_idx,
214  bmdef->ciname[j],
215  bmdef->ciname[lc->lc],
216  bmdef->ciname[rc->rc],
217  (WPOS_NAME)[i],
218  bmdef->cd_tree[rc_idx].n_down,
219  bmdef->cd_tree[rc_idx].c.down);
220 #endif
221 
222  ++bmdef->cd_tree[lc_idx].n_down;
223  ++rc_idx;
224  }
225  /* If there are no triphones here,
226  * this is considered a leafnode, so
227  * set the pid to -1. */
228  if (bmdef->cd_tree[lc_idx].n_down == 0)
229  bmdef->cd_tree[lc_idx].c.pid = -1;
230 #if 0
231  E_INFO("%d => %s %s %c (%d@%d)\n",
232  lc_idx,
233  bmdef->ciname[j],
234  bmdef->ciname[lc->lc],
235  (WPOS_NAME)[i],
236  bmdef->cd_tree[lc_idx].n_down,
237  bmdef->cd_tree[lc_idx].c.down);
238 #endif
239 
240  ++bmdef->cd_tree[ci_idx].n_down;
241  ++lc_idx;
242  }
243 
244  /* As above, so below. */
245  if (bmdef->cd_tree[ci_idx].n_down == 0)
246  bmdef->cd_tree[ci_idx].c.pid = -1;
247 #if 0
248  E_INFO("%d => %d=%s (%d@%d)\n",
249  ci_idx, j, bmdef->ciname[j],
250  bmdef->cd_tree[ci_idx].n_down,
251  bmdef->cd_tree[ci_idx].c.down);
252 #endif
253 
254  ++ci_idx;
255  }
256  }
257 
258  mdef_free(mdef);
259 
260  bmdef->alloc_mode = BIN_MDEF_FROM_TEXT;
261  return bmdef;
262 }
263 
264 bin_mdef_t *
266 {
267  ++m->refcnt;
268  return m;
269 }
270 
271 int
273 {
274  if (m == NULL)
275  return 0;
276  if (--m->refcnt > 0)
277  return m->refcnt;
278 
279  switch (m->alloc_mode) {
280  case BIN_MDEF_FROM_TEXT:
281  ckd_free(m->ciname[0]);
282  ckd_free(m->sseq[0]);
283  ckd_free(m->phone);
284  ckd_free(m->cd_tree);
285  break;
286  case BIN_MDEF_IN_MEMORY:
287  ckd_free(m->ciname[0]);
288  break;
289  case BIN_MDEF_ON_DISK:
290  break;
291  }
292  if (m->filemap)
293  mmio_file_unmap(m->filemap);
294  ckd_free(m->cd2cisen);
295  ckd_free(m->sen2cimap);
296  ckd_free(m->ciname);
297  ckd_free(m->sseq);
298  ckd_free(m);
299  return 0;
300 }
301 
302 static const char format_desc[] =
303  "BEGIN FILE FORMAT DESCRIPTION\n"
304  "int32 n_ciphone; /**< Number of base (CI) phones */\n"
305  "int32 n_phone; /**< Number of base (CI) phones + (CD) triphones */\n"
306  "int32 n_emit_state; /**< Number of emitting states per phone (0 if heterogeneous) */\n"
307  "int32 n_ci_sen; /**< Number of CI senones; these are the first */\n"
308  "int32 n_sen; /**< Number of senones (CI+CD) */\n"
309  "int32 n_tmat; /**< Number of transition matrices */\n"
310  "int32 n_sseq; /**< Number of unique senone sequences */\n"
311  "int32 n_ctx; /**< Number of phones of context */\n"
312  "int32 n_cd_tree; /**< Number of nodes in CD tree structure */\n"
313  "int32 sil; /**< CI phone ID for silence */\n"
314  "char ciphones[][]; /**< CI phone strings (null-terminated) */\n"
315  "char padding[]; /**< Padding to a 4-bytes boundary */\n"
316  "struct { int16 ctx; int16 n_down; int32 pid/down } cd_tree[];\n"
317  "struct { int32 ssid; int32 tmat; int8 attr[4] } phones[];\n"
318  "int16 sseq[]; /**< Unique senone sequences */\n"
319  "int8 sseq_len[]; /**< Number of states in each sseq (none if homogeneous) */\n"
320  "END FILE FORMAT DESCRIPTION\n";
321 
322 bin_mdef_t *
323 bin_mdef_read(cmd_ln_t *config, const char *filename)
324 {
325  bin_mdef_t *m;
326  FILE *fh;
327  size_t tree_start;
328  int32 val, i, swap;
329  OFF_T pos, end;
330  int32 *sseq_size;
331  int do_mmap;
332 
333  /* Try to read it as text first. */
334  if ((m = bin_mdef_read_text(config, filename)) != NULL)
335  return m;
336 
337  E_INFO("Reading binary model definition: %s\n", filename);
338  if ((fh = fopen(filename, "rb")) == NULL)
339  return NULL;
340 
341  if (fread(&val, 4, 1, fh) != 1) {
342  fclose(fh);
343  E_ERROR_SYSTEM("Failed to read byte-order marker from %s\n",
344  filename);
345  return NULL;
346  }
347  swap = 0;
348  if (val == BIN_MDEF_OTHER_ENDIAN) {
349  swap = 1;
350  E_INFO("Must byte-swap %s\n", filename);
351  }
352  if (fread(&val, 4, 1, fh) != 1) {
353  fclose(fh);
354  E_ERROR_SYSTEM("Failed to read version from %s\n", filename);
355  return NULL;
356  }
357  if (swap)
358  SWAP_INT32(&val);
359  if (val > BIN_MDEF_FORMAT_VERSION) {
360  E_ERROR("File format version %d for %s is newer than library\n",
361  val, filename);
362  fclose(fh);
363  return NULL;
364  }
365  if (fread(&val, 4, 1, fh) != 1) {
366  fclose(fh);
367  E_ERROR_SYSTEM("Failed to read header length from %s\n", filename);
368  return NULL;
369  }
370  if (swap)
371  SWAP_INT32(&val);
372  /* Skip format descriptor. */
373  FSEEK(fh, val, SEEK_CUR);
374 
375  /* Finally allocate it. */
376  m = ckd_calloc(1, sizeof(*m));
377  m->refcnt = 1;
378 
379  /* Check these, to make gcc/glibc shut up. */
380 #define FREAD_SWAP32_CHK(dest) \
381  if (fread((dest), 4, 1, fh) != 1) { \
382  fclose(fh); \
383  ckd_free(m); \
384  E_ERROR_SYSTEM("Failed to read %s from %s\n", #dest, filename); \
385  return NULL; \
386  } \
387  if (swap) SWAP_INT32(dest);
388 
389  FREAD_SWAP32_CHK(&m->n_ciphone);
390  FREAD_SWAP32_CHK(&m->n_phone);
391  FREAD_SWAP32_CHK(&m->n_emit_state);
392  FREAD_SWAP32_CHK(&m->n_ci_sen);
393  FREAD_SWAP32_CHK(&m->n_sen);
394  FREAD_SWAP32_CHK(&m->n_tmat);
395  FREAD_SWAP32_CHK(&m->n_sseq);
396  FREAD_SWAP32_CHK(&m->n_ctx);
397  FREAD_SWAP32_CHK(&m->n_cd_tree);
398  FREAD_SWAP32_CHK(&m->sil);
399 
400  /* CI names are first in the file. */
401  m->ciname = ckd_calloc(m->n_ciphone, sizeof(*m->ciname));
402 
403  /* Decide whether to read in the whole file or mmap it. */
404  do_mmap = config ? cmd_ln_boolean_r(config, "-mmap") : TRUE;
405  if (swap) {
406  E_WARN("-mmap specified, but mdef is other-endian. Will not memory-map.\n");
407  do_mmap = FALSE;
408  }
409  /* Actually try to mmap it. */
410  if (do_mmap) {
411  m->filemap = mmio_file_read(filename);
412  if (m->filemap == NULL)
413  do_mmap = FALSE;
414  }
415  pos = FTELL(fh);
416  if (do_mmap) {
417  /* Get the base pointer from the memory map. */
418  m->ciname[0] = (char *)mmio_file_ptr(m->filemap) + pos;
419  /* Success! */
420  m->alloc_mode = BIN_MDEF_ON_DISK;
421  }
422  else {
423  /* Read everything into memory. */
424  m->alloc_mode = BIN_MDEF_IN_MEMORY;
425  FSEEK(fh, 0, SEEK_END);
426  end = FTELL(fh);
427  FSEEK(fh, pos, SEEK_SET);
428  m->ciname[0] = ckd_malloc(end - pos);
429  if (fread(m->ciname[0], 1, end - pos, fh) != end - pos)
430  E_FATAL("Failed to read %" PRIdOFF_T " bytes of data from %s\n", end - pos, filename);
431  }
432 
433  for (i = 1; i < m->n_ciphone; ++i)
434  m->ciname[i] = m->ciname[i - 1] + strlen(m->ciname[i - 1]) + 1;
435 
436  /* Skip past the padding. */
437  tree_start =
438  m->ciname[i - 1] + strlen(m->ciname[i - 1]) + 1 - m->ciname[0];
439  tree_start = (tree_start + 3) & ~3;
440  m->cd_tree = (cd_tree_t *) (m->ciname[0] + tree_start);
441  if (swap) {
442  for (i = 0; i < m->n_cd_tree; ++i) {
443  SWAP_INT16(&m->cd_tree[i].ctx);
444  SWAP_INT16(&m->cd_tree[i].n_down);
445  SWAP_INT32(&m->cd_tree[i].c.down);
446  }
447  }
448  m->phone = (mdef_entry_t *) (m->cd_tree + m->n_cd_tree);
449  if (swap) {
450  for (i = 0; i < m->n_phone; ++i) {
451  SWAP_INT32(&m->phone[i].ssid);
452  SWAP_INT32(&m->phone[i].tmat);
453  }
454  }
455  sseq_size = (int32 *) (m->phone + m->n_phone);
456  if (swap)
457  SWAP_INT32(sseq_size);
458  m->sseq = ckd_calloc(m->n_sseq, sizeof(*m->sseq));
459  m->sseq[0] = (uint16 *) (sseq_size + 1);
460  if (swap) {
461  for (i = 0; i < *sseq_size; ++i)
462  SWAP_INT16(m->sseq[0] + i);
463  }
464  if (m->n_emit_state) {
465  for (i = 1; i < m->n_sseq; ++i)
466  m->sseq[i] = m->sseq[0] + i * m->n_emit_state;
467  }
468  else {
469  m->sseq_len = (uint8 *) (m->sseq[0] + *sseq_size);
470  for (i = 1; i < m->n_sseq; ++i)
471  m->sseq[i] = m->sseq[i - 1] + m->sseq_len[i - 1];
472  }
473 
474  /* Now build the CD-to-CI mappings using the senone sequences.
475  * This is the only really accurate way to do it, though it is
476  * still inaccurate in the case of heterogeneous topologies or
477  * cross-state tying. */
478  m->cd2cisen = (int16 *) ckd_malloc(m->n_sen * sizeof(*m->cd2cisen));
479  m->sen2cimap = (int16 *) ckd_malloc(m->n_sen * sizeof(*m->sen2cimap));
480 
481  /* Default mappings (identity, none) */
482  for (i = 0; i < m->n_ci_sen; ++i)
483  m->cd2cisen[i] = i;
484  for (; i < m->n_sen; ++i)
485  m->cd2cisen[i] = -1;
486  for (i = 0; i < m->n_sen; ++i)
487  m->sen2cimap[i] = -1;
488  for (i = 0; i < m->n_phone; ++i) {
489  int32 j, ssid = m->phone[i].ssid;
490 
491  for (j = 0; j < bin_mdef_n_emit_state_phone(m, i); ++j) {
492  int s = bin_mdef_sseq2sen(m, ssid, j);
493  int ci = bin_mdef_pid2ci(m, i);
494  /* Take the first one and warn if we have cross-state tying. */
495  if (m->sen2cimap[s] == -1)
496  m->sen2cimap[s] = ci;
497  if (m->sen2cimap[s] != ci)
498  E_WARN
499  ("Senone %d is shared between multiple base phones\n",
500  s);
501 
502  if (j > bin_mdef_n_emit_state_phone(m, ci))
503  E_WARN("CD phone %d has fewer states than CI phone %d\n",
504  i, ci);
505  else
506  m->cd2cisen[s] =
507  bin_mdef_sseq2sen(m, m->phone[ci].ssid, j);
508  }
509  }
510 
511  /* Set the silence phone. */
513 
514  E_INFO
515  ("%d CI-phone, %d CD-phone, %d emitstate/phone, %d CI-sen, %d Sen, %d Sen-Seq\n",
516  m->n_ciphone, m->n_phone - m->n_ciphone, m->n_emit_state,
517  m->n_ci_sen, m->n_sen, m->n_sseq);
518  fclose(fh);
519  return m;
520 }
521 
522 int
523 bin_mdef_write(bin_mdef_t * m, const char *filename)
524 {
525  FILE *fh;
526  int32 val, i;
527 
528  if ((fh = fopen(filename, "wb")) == NULL)
529  return -1;
530 
531  /* Byteorder marker. */
532  val = BIN_MDEF_NATIVE_ENDIAN;
533  fwrite(&val, 1, 4, fh);
534  /* Version. */
535  val = BIN_MDEF_FORMAT_VERSION;
536  fwrite(&val, 1, sizeof(val), fh);
537 
538  /* Round the format descriptor size up to a 4-byte boundary. */
539  val = ((sizeof(format_desc) + 3) & ~3);
540  fwrite(&val, 1, sizeof(val), fh);
541  fwrite(format_desc, 1, sizeof(format_desc), fh);
542  /* Pad it with zeros. */
543  i = 0;
544  fwrite(&i, 1, val - sizeof(format_desc), fh);
545 
546  /* Binary header things. */
547  fwrite(&m->n_ciphone, 4, 1, fh);
548  fwrite(&m->n_phone, 4, 1, fh);
549  fwrite(&m->n_emit_state, 4, 1, fh);
550  fwrite(&m->n_ci_sen, 4, 1, fh);
551  fwrite(&m->n_sen, 4, 1, fh);
552  fwrite(&m->n_tmat, 4, 1, fh);
553  fwrite(&m->n_sseq, 4, 1, fh);
554  fwrite(&m->n_ctx, 4, 1, fh);
555  fwrite(&m->n_cd_tree, 4, 1, fh);
556  /* Write this as a 32-bit value to preserve alignment for the
557  * non-mmap case (we want things aligned both from the
558  * beginning of the file and the beginning of the phone
559  * strings). */
560  val = m->sil;
561  fwrite(&val, 4, 1, fh);
562 
563  /* Phone strings. */
564  for (i = 0; i < m->n_ciphone; ++i)
565  fwrite(m->ciname[i], 1, strlen(m->ciname[i]) + 1, fh);
566  /* Pad with zeros. */
567  val = (FTELL(fh) + 3) & ~3;
568  i = 0;
569  fwrite(&i, 1, val - FTELL(fh), fh);
570 
571  /* Write CD-tree */
572  fwrite(m->cd_tree, sizeof(*m->cd_tree), m->n_cd_tree, fh);
573  /* Write phones */
574  fwrite(m->phone, sizeof(*m->phone), m->n_phone, fh);
575  if (m->n_emit_state) {
576  /* Write size of sseq */
577  val = m->n_sseq * m->n_emit_state;
578  fwrite(&val, 4, 1, fh);
579 
580  /* Write sseq */
581  fwrite(m->sseq[0], sizeof(**m->sseq),
582  m->n_sseq * m->n_emit_state, fh);
583  }
584  else {
585  int32 n;
586 
587  /* Calcluate size of sseq */
588  n = 0;
589  for (i = 0; i < m->n_sseq; ++i)
590  n += m->sseq_len[i];
591 
592  /* Write size of sseq */
593  fwrite(&n, 4, 1, fh);
594 
595  /* Write sseq */
596  fwrite(m->sseq[0], sizeof(**m->sseq), n, fh);
597 
598  /* Write sseq_len */
599  fwrite(m->sseq_len, 1, m->n_sseq, fh);
600  }
601  fclose(fh);
602 
603  return 0;
604 }
605 
606 int
607 bin_mdef_write_text(bin_mdef_t * m, const char *filename)
608 {
609  FILE *fh;
610  int p, i, n_total_state;
611 
612  if (strcmp(filename, "-") == 0)
613  fh = stdout;
614  else {
615  if ((fh = fopen(filename, "w")) == NULL)
616  return -1;
617  }
618 
619  fprintf(fh, "0.3\n");
620  fprintf(fh, "%d n_base\n", m->n_ciphone);
621  fprintf(fh, "%d n_tri\n", m->n_phone - m->n_ciphone);
622  if (m->n_emit_state)
623  n_total_state = m->n_phone * (m->n_emit_state + 1);
624  else {
625  n_total_state = 0;
626  for (i = 0; i < m->n_phone; ++i)
627  n_total_state += m->sseq_len[m->phone[i].ssid] + 1;
628  }
629  fprintf(fh, "%d n_state_map\n", n_total_state);
630  fprintf(fh, "%d n_tied_state\n", m->n_sen);
631  fprintf(fh, "%d n_tied_ci_state\n", m->n_ci_sen);
632  fprintf(fh, "%d n_tied_tmat\n", m->n_tmat);
633  fprintf(fh, "#\n# Columns definitions\n");
634  fprintf(fh, "#%4s %3s %3s %1s %6s %4s %s\n",
635  "base", "lft", "rt", "p", "attrib", "tmat",
636  " ... state id's ...");
637 
638  for (p = 0; p < m->n_ciphone; p++) {
639  int n_state;
640 
641  fprintf(fh, "%5s %3s %3s %1s", m->ciname[p], "-", "-", "-");
642 
643  if (bin_mdef_is_fillerphone(m, p))
644  fprintf(fh, " %6s", "filler");
645  else
646  fprintf(fh, " %6s", "n/a");
647  fprintf(fh, " %4d", m->phone[p].tmat);
648 
649  if (m->n_emit_state)
650  n_state = m->n_emit_state;
651  else
652  n_state = m->sseq_len[m->phone[p].ssid];
653  for (i = 0; i < n_state; i++) {
654  fprintf(fh, " %6u", m->sseq[m->phone[p].ssid][i]);
655  }
656  fprintf(fh, " N\n");
657  }
658 
659 
660  for (; p < m->n_phone; p++) {
661  int n_state;
662 
663  fprintf(fh, "%5s %3s %3s %c",
664  m->ciname[m->phone[p].info.cd.ctx[0]],
665  m->ciname[m->phone[p].info.cd.ctx[1]],
666  m->ciname[m->phone[p].info.cd.ctx[2]],
667  (WPOS_NAME)[m->phone[p].info.cd.wpos]);
668 
669  if (bin_mdef_is_fillerphone(m, p))
670  fprintf(fh, " %6s", "filler");
671  else
672  fprintf(fh, " %6s", "n/a");
673  fprintf(fh, " %4d", m->phone[p].tmat);
674 
675 
676  if (m->n_emit_state)
677  n_state = m->n_emit_state;
678  else
679  n_state = m->sseq_len[m->phone[p].ssid];
680  for (i = 0; i < n_state; i++) {
681  fprintf(fh, " %6u", m->sseq[m->phone[p].ssid][i]);
682  }
683  fprintf(fh, " N\n");
684  }
685 
686  if (strcmp(filename, "-") != 0)
687  fclose(fh);
688  return 0;
689 }
690 
691 int
692 bin_mdef_ciphone_id(bin_mdef_t * m, const char *ciphone)
693 {
694  int low, mid, high;
695 
696  /* Exact binary search on m->ciphone */
697  low = 0;
698  high = m->n_ciphone;
699  while (low < high) {
700  int c;
701 
702  mid = (low + high) / 2;
703  c = strcmp(ciphone, m->ciname[mid]);
704  if (c == 0)
705  return mid;
706  else if (c > 0)
707  low = mid + 1;
708  else if (c < 0)
709  high = mid;
710  }
711  return -1;
712 }
713 
714 int
715 bin_mdef_ciphone_id_nocase(bin_mdef_t * m, const char *ciphone)
716 {
717  int low, mid, high;
718 
719  /* Exact binary search on m->ciphone */
720  low = 0;
721  high = m->n_ciphone;
722  while (low < high) {
723  int c;
724 
725  mid = (low + high) / 2;
726  c = strcmp_nocase(ciphone, m->ciname[mid]);
727  if (c == 0)
728  return mid;
729  else if (c > 0)
730  low = mid + 1;
731  else if (c < 0)
732  high = mid;
733  }
734  return -1;
735 }
736 
737 const char *
739 {
740  assert(m != NULL);
741  assert(ci < m->n_ciphone);
742  return m->ciname[ci];
743 }
744 
745 int
746 bin_mdef_phone_id(bin_mdef_t * m, int32 ci, int32 lc, int32 rc, int32 wpos)
747 {
748  cd_tree_t *cd_tree;
749  int level, max;
750  int16 ctx[4];
751 
752  assert(m);
753 
754  /* In the future, we might back off when context is not available,
755  * but for now we'll just return the CI phone. */
756  if (lc < 0 || rc < 0)
757  return ci;
758 
759  assert((ci >= 0) && (ci < m->n_ciphone));
760  assert((lc >= 0) && (lc < m->n_ciphone));
761  assert((rc >= 0) && (rc < m->n_ciphone));
762  assert((wpos >= 0) && (wpos < N_WORD_POSN));
763 
764  /* Create a context list, mapping fillers to silence. */
765  ctx[0] = wpos;
766  ctx[1] = ci;
767  ctx[2] = (m->sil >= 0
768  && m->phone[lc].info.ci.filler) ? m->sil : lc;
769  ctx[3] = (m->sil >= 0
770  && m->phone[rc].info.ci.filler) ? m->sil : rc;
771 
772  /* Walk down the cd_tree. */
773  cd_tree = m->cd_tree;
774  level = 0; /* What level we are on. */
775  max = N_WORD_POSN; /* Number of nodes on this level. */
776  while (level < 4) {
777  int i;
778 
779 #if 0
780  E_INFO("Looking for context %d=%s in %d at %d\n",
781  ctx[level], m->ciname[ctx[level]],
782  max, cd_tree - m->cd_tree);
783 #endif
784  for (i = 0; i < max; ++i) {
785 #if 0
786  E_INFO("Look at context %d=%s at %d\n",
787  cd_tree[i].ctx,
788  m->ciname[cd_tree[i].ctx], cd_tree + i - m->cd_tree);
789 #endif
790  if (cd_tree[i].ctx == ctx[level])
791  break;
792  }
793  if (i == max)
794  return -1;
795 #if 0
796  E_INFO("Found context %d=%s at %d, n_down=%d, down=%d\n",
797  ctx[level], m->ciname[ctx[level]],
798  cd_tree + i - m->cd_tree,
799  cd_tree[i].n_down, cd_tree[i].c.down);
800 #endif
801  /* Leaf node, stop here. */
802  if (cd_tree[i].n_down == 0)
803  return cd_tree[i].c.pid;
804 
805  /* Go down one level. */
806  max = cd_tree[i].n_down;
807  cd_tree = m->cd_tree + cd_tree[i].c.down;
808  ++level;
809  }
810  /* We probably shouldn't get here. */
811  return -1;
812 }
813 
814 int
815 bin_mdef_phone_id_nearest(bin_mdef_t * m, int32 b, int32 l, int32 r, int32 pos)
816 {
817  int p, tmppos;
818 
819 
820 
821  /* In the future, we might back off when context is not available,
822  * but for now we'll just return the CI phone. */
823  if (l < 0 || r < 0)
824  return b;
825 
826  p = bin_mdef_phone_id(m, b, l, r, pos);
827  if (p >= 0)
828  return p;
829 
830  /* Exact triphone not found; backoff to other word positions */
831  for (tmppos = 0; tmppos < N_WORD_POSN; tmppos++) {
832  if (tmppos != pos) {
833  p = bin_mdef_phone_id(m, b, l, r, tmppos);
834  if (p >= 0)
835  return p;
836  }
837  }
838 
839  /* Nothing yet; backoff to silence phone if non-silence filler context */
840  /* In addition, backoff to silence phone on left/right if in beginning/end position */
841  if (m->sil >= 0) {
842  int newl = l, newr = r;
843  if (m->phone[(int)l].info.ci.filler
844  || pos == WORD_POSN_BEGIN || pos == WORD_POSN_SINGLE)
845  newl = m->sil;
846  if (m->phone[(int)r].info.ci.filler
847  || pos == WORD_POSN_END || pos == WORD_POSN_SINGLE)
848  newr = m->sil;
849  if ((newl != l) || (newr != r)) {
850  p = bin_mdef_phone_id(m, b, newl, newr, pos);
851  if (p >= 0)
852  return p;
853 
854  for (tmppos = 0; tmppos < N_WORD_POSN; tmppos++) {
855  if (tmppos != pos) {
856  p = bin_mdef_phone_id(m, b, newl, newr, tmppos);
857  if (p >= 0)
858  return p;
859  }
860  }
861  }
862  }
863 
864  /* Nothing yet; backoff to base phone */
865  return b;
866 }
867 
868 int
869 bin_mdef_phone_str(bin_mdef_t * m, int pid, char *buf)
870 {
871  char *wpos_name;
872 
873  assert(m);
874  assert((pid >= 0) && (pid < m->n_phone));
875  wpos_name = WPOS_NAME;
876 
877  buf[0] = '\0';
878  if (pid < m->n_ciphone)
879  sprintf(buf, "%s", bin_mdef_ciphone_str(m, pid));
880  else {
881  sprintf(buf, "%s %s %s %c",
882  bin_mdef_ciphone_str(m, m->phone[pid].info.cd.ctx[0]),
883  bin_mdef_ciphone_str(m, m->phone[pid].info.cd.ctx[1]),
884  bin_mdef_ciphone_str(m, m->phone[pid].info.cd.ctx[2]),
885  wpos_name[m->phone[pid].info.cd.wpos]);
886  }
887  return 0;
888 }
int32 n_sseq
No.
Definition: mdef.h:151
The main model definition structure.
Definition: mdef.h:138
int16 ctx
Context (word position or CI phone)
Definition: bin_mdef.h:108
int16 n_down
Number of children (0 for leafnode)
Definition: bin_mdef.h:109
int32 n_ciphone
number basephones actually present
Definition: mdef.h:139
POCKETSPHINX_EXPORT int bin_mdef_write(bin_mdef_t *m, const char *filename)
Write a binary mdef to a file.
Definition: bin_mdef.c:523
const char * bin_mdef_ciphone_str(bin_mdef_t *m, int32 ci)
In: ciphone id for which name wanted.
Definition: bin_mdef.c:738
word_posn_t wpos
Word position.
Definition: mdef.h:106
int32 n_tmat
Number of transition matrices.
Definition: bin_mdef.h:127
Single phone word (i.e.
Definition: mdef.h:79
int32 n_sen
Number of senones (CI+CD)
Definition: bin_mdef.h:126
int32 ssid
Senone sequence ID.
Definition: bin_mdef.h:77
int bin_mdef_ciphone_id(bin_mdef_t *m, const char *ciphone)
Context-independent phone lookup.
Definition: bin_mdef.c:692
int32 n_cd_tree
Number of nodes in cd_tree (below)
Definition: bin_mdef.h:130
uint16 ** sseq
Unique senone sequences (2D array built at load time)
Definition: bin_mdef.h:137
#define WPOS_NAME
Printable code for each word position above.
Definition: mdef.h:83
int32 tmat
Transition matrix ID.
Definition: bin_mdef.h:78
cd_tree_t * cd_tree
Tree mapping CD phones to phone IDs.
Definition: bin_mdef.h:135
Binary format model definition files, with support for heterogeneous topologies and variable-size N-p...
#define BAD_SSID
Invalid senone sequence ID (limited to 16 bits for PocketSphinx).
Definition: bin_mdef.h:97
int32 filler
Whether a filler phone; if so, can be substituted by silence phone in left or right context position...
Definition: mdef.h:92
Ending phone of word.
Definition: mdef.h:78
mmio_file_t * filemap
File map for this file (if any)
Definition: bin_mdef.h:133
int32 n_phone
number basephones + number triphones actually present
Definition: mdef.h:140
int16 rc
Base, left, right context ciphones.
Definition: mdef.h:105
char * name
The name of the CI phone.
Definition: mdef.h:91
int16 * cd2cisen
Parent CI-senone id for each senone; the first n_ci_sen are identity mappings; the CD-senones are con...
Definition: mdef.h:153
int32 n_phone
Number of base (CI) phones + (CD) triphones.
Definition: bin_mdef.h:123
uint8 * sseq_len
Number of states in each sseq (NULL for homogeneous)
Definition: bin_mdef.h:138
struct mdef_entry_s::@0::@1 ci
< CI phone information - attributes (just "filler" for now)
int32 n_sseq
Number of unique senone sequences.
Definition: bin_mdef.h:128
char ** ciname
CI phone names.
Definition: bin_mdef.h:134
POCKETSPHINX_EXPORT bin_mdef_t * bin_mdef_read(cmd_ln_t *config, const char *filename)
Read a binary mdef from a file.
Definition: bin_mdef.c:323
int32 n_ctx
Number of phones of context.
Definition: bin_mdef.h:129
void mdef_free(mdef_t *mdef)
Free an mdef_t.
Definition: mdef.c:719
#define N_WORD_POSN
total # of word positions (excluding undefined)
Definition: mdef.h:82
int32 n_ci_sen
Number of CI senones; these are the first.
Definition: bin_mdef.h:125
int32 n_ci_sen
number CI senones; these are the first
Definition: mdef.h:142
cmd_ln_t * config
Configuration.
Definition: acmod.h:150
int32 ssid
State sequence (or senone sequence) ID, considering the n_emit_state senone-ids are a unit...
Definition: mdef.h:101
#define BAD_SENID
Invalid senone ID (limited to 16 bits for PocketSphinx).
Definition: bin_mdef.h:101
int bin_mdef_phone_id(bin_mdef_t *m, int32 b, int32 l, int32 r, int32 pos)
In: Word position.
Definition: bin_mdef.c:746
int16 sil
CI phone ID for silence.
Definition: bin_mdef.h:131
int16 * cd2cisen
Parent CI-senone id for each senone.
Definition: bin_mdef.h:141
#define S3_SILENCE_CIPHONE
Hard-coded silence CI phone name.
Definition: mdef.h:84
enum bin_mdef_s::@4 alloc_mode
Allocation mode for this object.
int32 n_emit_state
Number of emitting states per phone (0 for heterogeneous)
Definition: bin_mdef.h:124
Structures for storing the left context.
Definition: bin_mdef.h:76
uint16 ** sseq
Unique state (or senone) sequences in this model, shared among all phones/triphones.
Definition: mdef.h:149
mdef_t * mdef_init(char *mdeffile, int breport)
Initialize the phone structure from the given model definition file.
int32 down
Next level of the tree (offset from start of cd_trees)
Definition: bin_mdef.h:112
int bin_mdef_ciphone_id_nocase(bin_mdef_t *m, const char *ciphone)
Case-insensitive context-independent phone lookup.
Definition: bin_mdef.c:715
int16 * sen2cimap
Parent CI-phone for each senone (CI or CD)
Definition: bin_mdef.h:142
int bin_mdef_free(bin_mdef_t *m)
Release a pointer to a binary mdef.
Definition: bin_mdef.c:272
Model definition.
ciphone_t * ciphone
CI-phone information for all ciphones.
Definition: mdef.h:147
POCKETSPHINX_EXPORT bin_mdef_t * bin_mdef_read_text(cmd_ln_t *config, const char *filename)
Read a text mdef from a file (creating an in-memory binary mdef).
Definition: bin_mdef.c:66
bin_mdef_t * bin_mdef_retain(bin_mdef_t *m)
Retain a pointer to a bin_mdef_t.
Definition: bin_mdef.c:265
int32 pid
Phone ID (leafnode)
Definition: bin_mdef.h:111
int32 n_sen
number senones (CI+CD)
Definition: mdef.h:143
Beginning phone of word.
Definition: mdef.h:77
int32 tmat
Transition matrix id.
Definition: mdef.h:104
int bin_mdef_phone_str(bin_mdef_t *m, int pid, char *buf)
Create a phone string for the given phone (base or triphone) id in the given buf. ...
Definition: bin_mdef.c:869
bin_mdef_t * mdef
Model definition.
Definition: acmod.h:159
int16 * sen2cimap
Parent CI-phone for each senone (CI or CD)
Definition: mdef.h:156
mdef_entry_t * phone
All phone structures.
Definition: bin_mdef.h:136
POCKETSPHINX_EXPORT int bin_mdef_write_text(bin_mdef_t *m, const char *filename)
Write a binary mdef to a text file.
Definition: bin_mdef.c:607
int32 n_ciphone
Number of base (CI) phones.
Definition: bin_mdef.h:122
int32 n_tmat
number transition matrices
Definition: mdef.h:144
int32 n_emit_state
number emitting states per phone
Definition: mdef.h:141
int16 sil
SILENCE_CIPHONE id.
Definition: mdef.h:158
ph_lc_t *** wpos_ci_lclist
wpos_ci_lclist[wpos][ci] = list of lc for <wpos,ci>.
Definition: mdef.h:160
Structures needed for mapping <ci,lc,rc,wpos> into pid.
phone_t * phone
Information for all ciphones and triphones.
Definition: mdef.h:148