PocketSphinx  0.6
dict.h
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 #ifndef _S3_DICT_H_
39 #define _S3_DICT_H_
40 
45 /* SphinxBase headers. */
46 #include <sphinxbase/hash_table.h>
47 
48 /* Local headers. */
49 #include "s3types.h"
50 #include "bin_mdef.h"
51 #include "pocketsphinx_export.h"
52 
53 #define S3DICT_INC_SZ 4096
54 
55 #ifdef __cplusplus
56 extern "C" {
57 #endif
58 #if 0
59 } /* Fool Emacs into not indenting things. */
60 #endif
61 
66 typedef struct {
67  char *word;
69  int32 pronlen;
70  s3wid_t alt;
71  s3wid_t basewid;
72 } dictword_t;
73 
79 typedef struct {
80  int refcnt;
83  hash_table_t *ht;
84  int32 max_words;
85  int32 n_word;
86  int32 filler_start;
87  int32 filler_end;
88  s3wid_t startwid;
89  s3wid_t finishwid;
90  s3wid_t silwid;
91  int nocase;
92 } dict_t;
93 
94 
106 dict_t *dict_init(cmd_ln_t *config,
107  bin_mdef_t *mdef
108  );
109 
113 int dict_write(dict_t *dict, char const *filename, char const *format);
114 
116 POCKETSPHINX_EXPORT
117 s3wid_t dict_wordid(dict_t *d, const char *word);
118 
123 int dict_filler_word(dict_t *d,
124  s3wid_t w
125  );
126 
130 POCKETSPHINX_EXPORT
131 int dict_real_word(dict_t *d,
132  s3wid_t w
133  );
134 
139 s3wid_t dict_add_word(dict_t *d,
140  char const *word,
141  s3cipid_t const *p,
142  int32 np
143  );
144 
148 const char *dict_ciphone_str(dict_t *d,
149  s3wid_t wid,
150  int32 pos
151  );
152 
154 #define dict_size(d) ((d)->n_word)
155 #define dict_num_fillers(d) (dict_filler_end(d) - dict_filler_start(d))
156 
161 #define dict_num_real_words(d) \
162  (dict_size(d) - (dict_filler_end(d) - dict_filler_start(d)) - 2)
163 #define dict_basewid(d,w) ((d)->word[w].basewid)
164 #define dict_wordstr(d,w) ((w) < 0 ? NULL : (d)->word[w].word)
165 #define dict_basestr(d,w) ((d)->word[dict_basewid(d,w)].word)
166 #define dict_nextalt(d,w) ((d)->word[w].alt)
167 #define dict_pronlen(d,w) ((d)->word[w].pronlen)
168 #define dict_pron(d,w,p) ((d)->word[w].ciphone[p])
169 #define dict_filler_start(d) ((d)->filler_start)
170 #define dict_filler_end(d) ((d)->filler_end)
171 #define dict_startwid(d) ((d)->startwid)
172 #define dict_finishwid(d) ((d)->finishwid)
173 #define dict_silwid(d) ((d)->silwid)
174 #define dict_is_single_phone(d,w) ((d)->word[w].pronlen == 1)
175 #define dict_first_phone(d,w) ((d)->word[w].ciphone[0])
176 #define dict_second_phone(d,w) ((d)->word[w].ciphone[1])
177 #define dict_second_last_phone(d,w) ((d)->word[w].ciphone[(d)->word[w].pronlen - 2])
178 #define dict_last_phone(d,w) ((d)->word[w].ciphone[(d)->word[w].pronlen - 1])
179 
180 /* Hard-coded special words */
181 #define S3_START_WORD "<s>"
182 #define S3_FINISH_WORD "</s>"
183 #define S3_SILENCE_WORD "<sil>"
184 #define S3_UNKNOWN_WORD "<UNK>"
185 
193 int32 dict_word2basestr(char *word);
194 
199 
203 int dict_free(dict_t *d);
204 
206 void dict_report(dict_t *d
207  );
208 
209 #if 0
210 { /* Stop indent from complaining */
211 #endif
212 #ifdef __cplusplus
213 }
214 #endif
215 
216 #endif
dict_t * dict_init(cmd_ln_t *config, bin_mdef_t *mdef)
Initialize a new dictionary.
Definition: dict.c:251
POCKETSPHINX_EXPORT s3wid_t dict_wordid(dict_t *d, const char *word)
Return word id for given word string if present.
Definition: dict.c:382
char * word
Ascii word string.
Definition: dict.h:67
int dict_free(dict_t *d)
Release a pointer to a dictionary.
Definition: dict.c:451
Binary format model definition files, with support for heterogeneous topologies and variable-size N-p...
int32 n_word
#Occupied entries in dict; ie, excluding empty slots
Definition: dict.h:85
const char * dict_ciphone_str(dict_t *d, s3wid_t wid, int32 pos)
Return value: CI phone string for the given word, phone position.
Definition: dict.c:69
dict_t * dict_retain(dict_t *d)
Retain a pointer to an dict_t.
Definition: dict.c:444
int32 filler_end
Last filler word id (read from filler dict)
Definition: dict.h:87
int dict_write(dict_t *dict, char const *filename, char const *format)
Write dictionary to a file.
Definition: dict.c:220
s3wid_t startwid
FOR INTERNAL-USE ONLY.
Definition: dict.h:88
s3wid_t silwid
FOR INTERNAL-USE ONLY.
Definition: dict.h:90
s3wid_t alt
Next alternative pronunciation id, NOT_S3WID if none.
Definition: dict.h:70
int16 s3cipid_t
Size definitions for more semantially meaningful units.
Definition: s3types.h:109
int dict_filler_word(dict_t *d, s3wid_t w)
Return 1 if w is a filler word, 0 if not.
Definition: dict.c:396
Size definition of semantically units.
a structure for one dictionary word.
Definition: dict.h:66
s3wid_t basewid
Base pronunciation id.
Definition: dict.h:71
s3wid_t finishwid
FOR INTERNAL-USE ONLY.
Definition: dict.h:89
a structure for a dictionary.
Definition: dict.h:79
POCKETSPHINX_EXPORT int dict_real_word(dict_t *d, s3wid_t w)
Test if w is a "real" word, i.e.
Definition: dict.c:410
int32 filler_start
First filler word id (read from filler dict)
Definition: dict.h:86
int32 max_words
#Entries allocated in dict, including empty slots
Definition: dict.h:84
s3cipid_t * ciphone
Pronunciation.
Definition: dict.h:68
dictword_t * word
Array of entries in dictionary.
Definition: dict.h:82
s3wid_t dict_add_word(dict_t *d, char const *word, s3cipid_t const *p, int32 np)
Add a word with the given ciphone pronunciation list to the dictionary.
Definition: dict.c:80
int32 pronlen
Pronunciation length.
Definition: dict.h:69
bin_mdef_t * mdef
Model definition used for phone IDs; NULL if none used.
Definition: dict.h:81
hash_table_t * ht
Hash table for mapping word strings to word ids.
Definition: dict.h:83
void dict_report(dict_t *d)
Report a dictionary structure.
Definition: dict.c:482
int32 dict_word2basestr(char *word)
If the given word contains a trailing "(....)" (i.e., a Sphinx-II style alternative pronunciation spe...
Definition: dict.c:425