PocketSphinx  0.6
dict.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 /* System headers. */
39 #include <string.h>
40 
41 /* SphinxBase headers. */
42 #include <sphinxbase/pio.h>
43 #include <sphinxbase/strfuncs.h>
44 
45 /* Local headers. */
46 #include "dict.h"
47 
48 
49 #define DELIM " \t\n" /* Set of field separator characters */
50 #define DEFAULT_NUM_PHONE (MAX_S3CIPID+1)
51 
52 #if WIN32
53 #define snprintf sprintf_s
54 #endif
55 
56 extern const char *const cmu6_lts_phone_table[];
57 
58 static s3cipid_t
59 dict_ciphone_id(dict_t * d, const char *str)
60 {
61  if (d->nocase)
62  return bin_mdef_ciphone_id_nocase(d->mdef, str);
63  else
64  return bin_mdef_ciphone_id(d->mdef, str);
65 }
66 
67 
68 const char *
69 dict_ciphone_str(dict_t * d, s3wid_t wid, int32 pos)
70 {
71  assert(d != NULL);
72  assert((wid >= 0) && (wid < d->n_word));
73  assert((pos >= 0) && (pos < d->word[wid].pronlen));
74 
75  return bin_mdef_ciphone_str(d->mdef, d->word[wid].ciphone[pos]);
76 }
77 
78 
79 s3wid_t
80 dict_add_word(dict_t * d, char const *word, s3cipid_t const * p, int32 np)
81 {
82  int32 len;
83  dictword_t *wordp;
84  s3wid_t newwid;
85  char *wword;
86 
87  if (d->n_word >= d->max_words) {
88  E_INFO("Reallocating to %d KiB for word entries\n",
89  (d->max_words + S3DICT_INC_SZ) * sizeof(dictword_t) / 1024);
90  d->word =
91  (dictword_t *) ckd_realloc(d->word,
92  (d->max_words +
93  S3DICT_INC_SZ) * sizeof(dictword_t));
94  d->max_words = d->max_words + S3DICT_INC_SZ;
95  }
96 
97  wordp = d->word + d->n_word;
98  wordp->word = (char *) ckd_salloc(word); /* Freed in dict_free */
99 
100  /* Associate word string with d->n_word in hash table */
101  if (hash_table_enter_int32(d->ht, wordp->word, d->n_word) != d->n_word) {
102  ckd_free(wordp->word);
103  wordp->word = NULL;
104  return BAD_S3WID;
105  }
106 
107  /* Fill in word entry, and set defaults */
108  if (p && (np > 0)) {
109  wordp->ciphone = (s3cipid_t *) ckd_malloc(np * sizeof(s3cipid_t)); /* Freed in dict_free */
110  memcpy(wordp->ciphone, p, np * sizeof(s3cipid_t));
111  wordp->pronlen = np;
112  }
113  else {
114  wordp->ciphone = NULL;
115  wordp->pronlen = 0;
116  }
117  wordp->alt = BAD_S3WID;
118  wordp->basewid = d->n_word;
119 
120  /* Determine base/alt wids */
121  wword = ckd_salloc(word);
122  if ((len = dict_word2basestr(wword)) > 0) {
123  int32 w;
124 
125  /* Truncated to a baseword string; find its ID */
126  if (hash_table_lookup_int32(d->ht, wword, &w) < 0) {
127  E_ERROR("Missing base word for: %s\n", word);
128  ckd_free(wword);
129  ckd_free(wordp->word);
130  wordp->word = NULL;
131  return BAD_S3WID;
132  }
133 
134  /* Link into alt list */
135  wordp->basewid = w;
136  wordp->alt = d->word[w].alt;
137  d->word[w].alt = d->n_word;
138  }
139  ckd_free(wword);
140 
141  newwid = d->n_word++;
142 
143  return newwid;
144 }
145 
146 
147 static int32
148 dict_read(FILE * fp, dict_t * d)
149 {
150  lineiter_t *li;
151  char **wptr;
152  s3cipid_t *p;
153  int32 lineno, nwd;
154  s3wid_t w;
155  int32 i, maxwd;
156  size_t stralloc, phnalloc;
157 
158  maxwd = 512;
159  p = (s3cipid_t *) ckd_calloc(maxwd + 4, sizeof(*p));
160  wptr = (char **) ckd_calloc(maxwd, sizeof(char *)); /* Freed below */
161 
162  lineno = 0;
163  stralloc = phnalloc = 0;
164  for (li = lineiter_start(fp); li; li = lineiter_next(li)) {
165  lineno++;
166  if (0 == strncmp(li->buf, "##", 2)
167  || 0 == strncmp(li->buf, ";;", 2))
168  continue;
169 
170  if ((nwd = str2words(li->buf, wptr, maxwd)) < 0) {
171  /* Increase size of p, wptr. */
172  nwd = str2words(li->buf, NULL, 0);
173  assert(nwd > maxwd); /* why else would it fail? */
174  maxwd = nwd;
175  p = (s3cipid_t *) ckd_realloc(p, (maxwd + 4) * sizeof(*p));
176  wptr = (char **) ckd_realloc(wptr, maxwd * sizeof(*wptr));
177  }
178 
179  if (nwd == 0) /* Empty line */
180  continue;
181  /* wptr[0] is the word-string and wptr[1..nwd-1] the pronunciation sequence */
182  if (nwd == 1) {
183  E_ERROR("Line %d: No pronunciation for word '%s'; ignored\n",
184  lineno, wptr[0]);
185  continue;
186  }
187 
188 
189  /* Convert pronunciation string to CI-phone-ids */
190  for (i = 1; i < nwd; i++) {
191  p[i - 1] = dict_ciphone_id(d, wptr[i]);
192  if (NOT_S3CIPID(p[i - 1])) {
193  E_ERROR("Line %d: Phone '%s' is mising in the acoustic model; word '%s' ignored\n",
194  lineno, wptr[i], wptr[0]);
195  break;
196  }
197  }
198 
199  if (i == nwd) { /* All CI-phones successfully converted to IDs */
200  w = dict_add_word(d, wptr[0], p, nwd - 1);
201  if (NOT_S3WID(w))
202  E_ERROR
203  ("Line %d: Failed to add the word '%s' (duplicate?); ignored\n",
204  lineno, wptr[0]);
205  else {
206  stralloc += strlen(d->word[w].word);
207  phnalloc += d->word[w].pronlen * sizeof(s3cipid_t);
208  }
209  }
210  }
211  E_INFO("Allocated %d KiB for strings, %d KiB for phones\n",
212  (int)stralloc / 1024, (int)phnalloc / 1024);
213  ckd_free(p);
214  ckd_free(wptr);
215 
216  return 0;
217 }
218 
219 int
220 dict_write(dict_t *dict, char const *filename, char const *format)
221 {
222  FILE *fh;
223  int i;
224 
225  if ((fh = fopen(filename, "w")) == NULL) {
226  E_ERROR_SYSTEM("Failed to open '%s'", filename);
227  return -1;
228  }
229  for (i = 0; i < dict->n_word; ++i) {
230  char *phones;
231  int j, phlen;
232  if (!dict_real_word(dict, i))
233  continue;
234  for (phlen = j = 0; j < dict_pronlen(dict, i); ++j)
235  phlen += strlen(dict_ciphone_str(dict, i, j)) + 1;
236  phones = ckd_calloc(1, phlen);
237  for (j = 0; j < dict_pronlen(dict, i); ++j) {
238  strcat(phones, dict_ciphone_str(dict, i, j));
239  if (j != dict_pronlen(dict, i) - 1)
240  strcat(phones, " ");
241  }
242  fprintf(fh, "%-30s %s\n", dict_wordstr(dict, i), phones);
243  ckd_free(phones);
244  }
245  fclose(fh);
246  return 0;
247 }
248 
249 
250 dict_t *
251 dict_init(cmd_ln_t *config, bin_mdef_t * mdef)
252 {
253  FILE *fp, *fp2;
254  int32 n;
255  lineiter_t *li;
256  dict_t *d;
257  s3cipid_t sil;
258  char const *dictfile = NULL, *fillerfile = NULL;
259 
260  if (config) {
261  dictfile = cmd_ln_str_r(config, "-dict");
262  fillerfile = cmd_ln_str_r(config, "-fdict");
263  }
264 
265  /*
266  * First obtain #words in dictionary (for hash table allocation).
267  * Reason: The PC NT system doesn't like to grow memory gradually. Better to allocate
268  * all the required memory in one go.
269  */
270  fp = NULL;
271  n = 0;
272  if (dictfile) {
273  if ((fp = fopen(dictfile, "r")) == NULL) {
274  E_ERROR_SYSTEM("Failed to open dictionary file '%s' for reading", dictfile);
275  return NULL;
276  }
277  for (li = lineiter_start(fp); li; li = lineiter_next(li)) {
278  if (0 != strncmp(li->buf, "##", 2)
279  && 0 != strncmp(li->buf, ";;", 2))
280  n++;
281  }
282  rewind(fp);
283  }
284 
285  fp2 = NULL;
286  if (fillerfile) {
287  if ((fp2 = fopen(fillerfile, "r")) == NULL) {
288  E_ERROR_SYSTEM("Failed to open filler dictionary file '%s' for reading", fillerfile);
289  fclose(fp);
290  return NULL;
291  }
292  for (li = lineiter_start(fp2); li; li = lineiter_next(li)) {
293  if (0 != strncmp(li->buf, "##", 2)
294  && 0 != strncmp(li->buf, ";;", 2))
295  n++;
296  }
297  rewind(fp2);
298  }
299 
300  /*
301  * Allocate dict entries. HACK!! Allow some extra entries for words not in file.
302  * Also check for type size restrictions.
303  */
304  d = (dict_t *) ckd_calloc(1, sizeof(dict_t)); /* freed in dict_free() */
305  d->refcnt = 1;
306  d->max_words =
307  (n + S3DICT_INC_SZ < MAX_S3WID) ? n + S3DICT_INC_SZ : MAX_S3WID;
308  if (n >= MAX_S3WID) {
309  E_ERROR("Number of words in dictionaries (%d) exceeds limit (%d)\n", n,
310  MAX_S3WID);
311  fclose(fp);
312  fclose(fp2);
313  ckd_free(d);
314  return NULL;
315  }
316 
317  E_INFO("Allocating %d * %d bytes (%d KiB) for word entries\n",
318  d->max_words, sizeof(dictword_t),
319  d->max_words * sizeof(dictword_t) / 1024);
320  d->word = (dictword_t *) ckd_calloc(d->max_words, sizeof(dictword_t)); /* freed in dict_free() */
321  d->n_word = 0;
322  if (mdef)
323  d->mdef = bin_mdef_retain(mdef);
324 
325  /* Create new hash table for word strings; case-insensitive word strings */
326  if (config && cmd_ln_exists_r(config, "-dictcase"))
327  d->nocase = cmd_ln_boolean_r(config, "-dictcase");
328  d->ht = hash_table_new(d->max_words, d->nocase);
329 
330  /* Digest main dictionary file */
331  if (fp) {
332  E_INFO("Reading main dictionary: %s\n", dictfile);
333  dict_read(fp, d);
334  fclose(fp);
335  E_INFO("%d words read\n", d->n_word);
336  }
337 
338  /* Now the filler dictionary file, if it exists */
339  d->filler_start = d->n_word;
340  if (fillerfile) {
341  E_INFO("Reading filler dictionary: %s\n", fillerfile);
342  dict_read(fp2, d);
343  fclose(fp2);
344  E_INFO("%d words read\n", d->n_word - d->filler_start);
345  }
346  if (mdef)
347  sil = bin_mdef_silphone(mdef);
348  else
349  sil = 0;
350  if (dict_wordid(d, S3_START_WORD) == BAD_S3WID) {
351  dict_add_word(d, S3_START_WORD, &sil, 1);
352  }
353  if (dict_wordid(d, S3_FINISH_WORD) == BAD_S3WID) {
354  dict_add_word(d, S3_FINISH_WORD, &sil, 1);
355  }
356  if (dict_wordid(d, S3_SILENCE_WORD) == BAD_S3WID) {
357  dict_add_word(d, S3_SILENCE_WORD, &sil, 1);
358  }
359 
360  d->filler_end = d->n_word - 1;
361 
362  /* Initialize distinguished word-ids */
363  d->startwid = dict_wordid(d, S3_START_WORD);
364  d->finishwid = dict_wordid(d, S3_FINISH_WORD);
365  d->silwid = dict_wordid(d, S3_SILENCE_WORD);
366 
367  if ((d->filler_start > d->filler_end)
368  || (!dict_filler_word(d, d->silwid))) {
369  E_ERROR("Word '%s' must occur (only) in filler dictionary\n",
370  S3_SILENCE_WORD);
371  dict_free(d);
372  return NULL;
373  }
374 
375  /* No check that alternative pronunciations for filler words are in filler range!! */
376 
377  return d;
378 }
379 
380 
381 s3wid_t
382 dict_wordid(dict_t *d, const char *word)
383 {
384  int32 w;
385 
386  assert(d);
387  assert(word);
388 
389  if (hash_table_lookup_int32(d->ht, word, &w) < 0)
390  return (BAD_S3WID);
391  return w;
392 }
393 
394 
395 int
396 dict_filler_word(dict_t *d, s3wid_t w)
397 {
398  assert(d);
399  assert((w >= 0) && (w < d->n_word));
400 
401  w = dict_basewid(d, w);
402  if ((w == d->startwid) || (w == d->finishwid))
403  return 0;
404  if ((w >= d->filler_start) && (w <= d->filler_end))
405  return 1;
406  return 0;
407 }
408 
409 int
410 dict_real_word(dict_t *d, s3wid_t w)
411 {
412  assert(d);
413  assert((w >= 0) && (w < d->n_word));
414 
415  w = dict_basewid(d, w);
416  if ((w == d->startwid) || (w == d->finishwid))
417  return 0;
418  if ((w >= d->filler_start) && (w <= d->filler_end))
419  return 0;
420  return 1;
421 }
422 
423 
424 int32
425 dict_word2basestr(char *word)
426 {
427  int32 i, len;
428 
429  len = strlen(word);
430  if (word[len - 1] == ')') {
431  for (i = len - 2; (i > 0) && (word[i] != '('); --i);
432 
433  if (i > 0) {
434  /* The word is of the form <baseword>(...); strip from left-paren */
435  word[i] = '\0';
436  return i;
437  }
438  }
439 
440  return -1;
441 }
442 
443 dict_t *
445 {
446  ++d->refcnt;
447  return d;
448 }
449 
450 int
452 {
453  int i;
454  dictword_t *word;
455 
456  if (d == NULL)
457  return 0;
458  if (--d->refcnt > 0)
459  return d->refcnt;
460 
461  /* First Step, free all memory allocated for each word */
462  for (i = 0; i < d->n_word; i++) {
463  word = (dictword_t *) & (d->word[i]);
464  if (word->word)
465  ckd_free((void *) word->word);
466  if (word->ciphone)
467  ckd_free((void *) word->ciphone);
468  }
469 
470  if (d->word)
471  ckd_free((void *) d->word);
472  if (d->ht)
473  hash_table_free(d->ht);
474  if (d->mdef)
475  bin_mdef_free(d->mdef);
476  ckd_free((void *) d);
477 
478  return 0;
479 }
480 
481 void
483 {
484  E_INFO_NOFN("Initialization of dict_t, report:\n");
485  E_INFO_NOFN("Max word: %d\n", d->max_words);
486  E_INFO_NOFN("No of word: %d\n", d->n_word);
487  E_INFO_NOFN("\n");
488 }
dict_t * dict_init(cmd_ln_t *config, bin_mdef_t *mdef)
Initialize a new dictionary.
Definition: dict.c:251
POCKETSPHINX_EXPORT s3wid_t dict_wordid(dict_t *d, const char *word)
Return word id for given word string if present.
Definition: dict.c:382
char * word
Ascii word string.
Definition: dict.h:67
const char * bin_mdef_ciphone_str(bin_mdef_t *m, int32 ci)
In: ciphone id for which name wanted.
Definition: bin_mdef.c:738
int dict_free(dict_t *d)
Release a pointer to a dictionary.
Definition: dict.c:451
int bin_mdef_ciphone_id(bin_mdef_t *m, const char *ciphone)
Context-independent phone lookup.
Definition: bin_mdef.c:692
Operations on dictionary.
#define BAD_S3WID
Dictionary word id.
Definition: s3types.h:136
int32 n_word
#Occupied entries in dict; ie, excluding empty slots
Definition: dict.h:85
const char * dict_ciphone_str(dict_t *d, s3wid_t wid, int32 pos)
Return value: CI phone string for the given word, phone position.
Definition: dict.c:69
dict_t * dict_retain(dict_t *d)
Retain a pointer to an dict_t.
Definition: dict.c:444
int32 filler_end
Last filler word id (read from filler dict)
Definition: dict.h:87
int dict_write(dict_t *dict, char const *filename, char const *format)
Write dictionary to a file.
Definition: dict.c:220
s3wid_t startwid
FOR INTERNAL-USE ONLY.
Definition: dict.h:88
s3wid_t silwid
FOR INTERNAL-USE ONLY.
Definition: dict.h:90
s3wid_t alt
Next alternative pronunciation id, NOT_S3WID if none.
Definition: dict.h:70
int16 s3cipid_t
Size definitions for more semantially meaningful units.
Definition: s3types.h:109
int dict_filler_word(dict_t *d, s3wid_t w)
Return 1 if w is a filler word, 0 if not.
Definition: dict.c:396
a structure for one dictionary word.
Definition: dict.h:66
s3wid_t basewid
Base pronunciation id.
Definition: dict.h:71
s3wid_t finishwid
FOR INTERNAL-USE ONLY.
Definition: dict.h:89
a structure for a dictionary.
Definition: dict.h:79
POCKETSPHINX_EXPORT int dict_real_word(dict_t *d, s3wid_t w)
Test if w is a "real" word, i.e.
Definition: dict.c:410
int32 filler_start
First filler word id (read from filler dict)
Definition: dict.h:86
int bin_mdef_ciphone_id_nocase(bin_mdef_t *m, const char *ciphone)
Case-insensitive context-independent phone lookup.
Definition: bin_mdef.c:715
int bin_mdef_free(bin_mdef_t *m)
Release a pointer to a binary mdef.
Definition: bin_mdef.c:272
int32 max_words
#Entries allocated in dict, including empty slots
Definition: dict.h:84
s3cipid_t * ciphone
Pronunciation.
Definition: dict.h:68
bin_mdef_t * bin_mdef_retain(bin_mdef_t *m)
Retain a pointer to a bin_mdef_t.
Definition: bin_mdef.c:265
dictword_t * word
Array of entries in dictionary.
Definition: dict.h:82
s3wid_t dict_add_word(dict_t *d, char const *word, s3cipid_t const *p, int32 np)
Add a word with the given ciphone pronunciation list to the dictionary.
Definition: dict.c:80
int32 pronlen
Pronunciation length.
Definition: dict.h:69
bin_mdef_t * mdef
Model definition used for phone IDs; NULL if none used.
Definition: dict.h:81
hash_table_t * ht
Hash table for mapping word strings to word ids.
Definition: dict.h:83
void dict_report(dict_t *d)
Report a dictionary structure.
Definition: dict.c:482
int32 dict_word2basestr(char *word)
If the given word contains a trailing "(....)" (i.e., a Sphinx-II style alternative pronunciation spe...
Definition: dict.c:425