PocketSphinx  0.6
ps_alignment.c
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2010 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
42 /* System headers. */
43 
44 /* SphinxBase headers. */
45 #include <sphinxbase/ckd_alloc.h>
46 
47 /* Local headers. */
48 #include "ps_alignment.h"
49 
52 {
53  ps_alignment_t *al = ckd_calloc(1, sizeof(*al));
54  al->d2p = dict2pid_retain(d2p);
55  return al;
56 }
57 
58 int
60 {
61  if (al == NULL)
62  return 0;
63  dict2pid_free(al->d2p);
64  ckd_free(al->word.seq);
65  ckd_free(al->sseq.seq);
66  ckd_free(al->state.seq);
67  ckd_free(al);
68  return 0;
69 }
70 
71 #define VECTOR_GROW 10
72 static void *
73 vector_grow_one(void *ptr, uint16 *n_alloc, uint16 *n, size_t item_size)
74 {
75  int newsize = *n + 1;
76  if (newsize < *n_alloc) {
77  *n += 1;
78  return ptr;
79  }
80  newsize += VECTOR_GROW;
81  if (newsize > 0xffff)
82  return NULL;
83  ptr = ckd_realloc(ptr, newsize * item_size);
84  *n += 1;
85  *n_alloc = newsize;
86  return ptr;
87 }
88 
89 static ps_alignment_entry_t *
90 ps_alignment_vector_grow_one(ps_alignment_vector_t *vec)
91 {
92  void *ptr;
93  ptr = vector_grow_one(vec->seq, &vec->n_alloc,
94  &vec->n_ent, sizeof(*vec->seq));
95  if (ptr == NULL)
96  return NULL;
97  vec->seq = ptr;
98  return vec->seq + vec->n_ent - 1;
99 }
100 
101 static void
102 ps_alignment_vector_empty(ps_alignment_vector_t *vec)
103 {
104  vec->n_ent = 0;
105 }
106 
107 int
109  int32 wid, int duration)
110 {
112 
113  if ((ent = ps_alignment_vector_grow_one(&al->word)) == NULL)
114  return 0;
115  ent->id.wid = wid;
116  if (al->word.n_ent > 1)
117  ent->start = ent[-1].start + ent[-1].duration;
118  else
119  ent->start = 0;
120  ent->duration = duration;
121  ent->parent = PS_ALIGNMENT_NONE;
122  ent->child = PS_ALIGNMENT_NONE;
123 
124  return al->word.n_ent;
125 }
126 
127 int
129 {
130  dict2pid_t *d2p;
131  dict_t *dict;
132  bin_mdef_t *mdef;
133  int i, lc;
134 
135  /* Clear phone and state sequences. */
136  ps_alignment_vector_empty(&al->sseq);
137  ps_alignment_vector_empty(&al->state);
138 
139  /* For each word, expand to phones/senone sequences. */
140  d2p = al->d2p;
141  dict = d2p->dict;
142  mdef = d2p->mdef;
143  lc = bin_mdef_silphone(mdef);
144  for (i = 0; i < al->word.n_ent; ++i) {
145  ps_alignment_entry_t *went = al->word.seq + i;
146  ps_alignment_entry_t *sent;
147  int wid = went->id.wid;
148  int len = dict_pronlen(dict, wid);
149  int j, rc;
150 
151  if (i < al->word.n_ent - 1)
152  rc = dict_first_phone(dict, al->word.seq[i+1].id.wid);
153  else
154  rc = bin_mdef_silphone(mdef);
155 
156  /* First phone. */
157  if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
158  E_ERROR("Failed to add phone entry!\n");
159  return -1;
160  }
161  sent->id.pid.cipid = dict_first_phone(dict, wid);
162  sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
163  sent->start = went->start;
164  sent->duration = went->duration;
165  sent->parent = i;
166  went->child = (uint16)(sent - al->sseq.seq);
167  if (len == 1)
168  sent->id.pid.ssid
169  = dict2pid_lrdiph_rc(d2p, sent->id.pid.cipid, lc, rc);
170  else
171  sent->id.pid.ssid
172  = dict2pid_ldiph_lc(d2p, sent->id.pid.cipid,
173  dict_second_phone(dict, wid), lc);
174  assert(sent->id.pid.ssid != BAD_SSID);
175 
176  /* Internal phones. */
177  for (j = 1; j < len - 1; ++j) {
178  if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
179  E_ERROR("Failed to add phone entry!\n");
180  return -1;
181  }
182  sent->id.pid.cipid = dict_pron(dict, wid, j);
183  sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
184  sent->id.pid.ssid = dict2pid_internal(d2p, wid, j);
185  assert(sent->id.pid.ssid != BAD_SSID);
186  sent->start = went->start;
187  sent->duration = went->duration;
188  sent->parent = i;
189  }
190 
191  /* Last phone. */
192  if (j < len) {
193  xwdssid_t *rssid;
194  assert(j == len - 1);
195  if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
196  E_ERROR("Failed to add phone entry!\n");
197  return -1;
198  }
199  sent->id.pid.cipid = dict_last_phone(dict, wid);
200  sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
201  rssid = dict2pid_rssid(d2p, sent->id.pid.cipid,
202  dict_second_last_phone(dict, wid));
203  sent->id.pid.ssid = rssid->ssid[rssid->cimap[rc]];
204  assert(sent->id.pid.ssid != BAD_SSID);
205  sent->start = went->start;
206  sent->duration = went->duration;
207  sent->parent = i;
208  }
209  /* Update lc. Could just use sent->id.pid.cipid here but that
210  * seems needlessly obscure. */
211  lc = dict_last_phone(dict, wid);
212  }
213 
214  /* For each senone sequence, expand to senones. (we could do this
215  * nested above but this makes it more clear and easier to
216  * refactor) */
217  for (i = 0; i < al->sseq.n_ent; ++i) {
218  ps_alignment_entry_t *pent = al->sseq.seq + i;
219  ps_alignment_entry_t *sent;
220  int j;
221 
222  for (j = 0; j < bin_mdef_n_emit_state(mdef); ++j) {
223  if ((sent = ps_alignment_vector_grow_one(&al->state)) == NULL) {
224  E_ERROR("Failed to add state entry!\n");
225  return -1;
226  }
227  sent->id.senid = bin_mdef_sseq2sen(mdef, pent->id.pid.ssid, j);
228  assert(sent->id.senid != BAD_SENID);
229  sent->start = pent->start;
230  sent->duration = pent->duration;
231  sent->parent = i;
232  if (j == 0)
233  pent->child = (uint16)(sent - al->state.seq);
234  }
235  }
236 
237  return 0;
238 }
239 
240 /* FIXME: Somewhat the same as the above function, needs refactoring */
241 int
243 {
244  dict2pid_t *d2p;
245  dict_t *dict;
246  bin_mdef_t *mdef;
247  int i;
248 
249  /* Clear phone and state sequences. */
250  ps_alignment_vector_empty(&al->sseq);
251  ps_alignment_vector_empty(&al->state);
252 
253  /* For each word, expand to phones/senone sequences. */
254  d2p = al->d2p;
255  dict = d2p->dict;
256  mdef = d2p->mdef;
257  for (i = 0; i < al->word.n_ent; ++i) {
258  ps_alignment_entry_t *went = al->word.seq + i;
259  ps_alignment_entry_t *sent;
260  int wid = went->id.wid;
261  int len = dict_pronlen(dict, wid);
262  int j;
263 
264  for (j = 0; j < len; ++j) {
265  if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
266  E_ERROR("Failed to add phone entry!\n");
267  return -1;
268  }
269  sent->id.pid.cipid = dict_pron(dict, wid, j);
270  sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
271  sent->id.pid.ssid = bin_mdef_pid2ssid(mdef, sent->id.pid.cipid);
272  assert(sent->id.pid.ssid != BAD_SSID);
273  sent->start = went->start;
274  sent->duration = went->duration;
275  sent->parent = i;
276  }
277  }
278 
279  /* For each senone sequence, expand to senones. (we could do this
280  * nested above but this makes it more clear and easier to
281  * refactor) */
282  for (i = 0; i < al->sseq.n_ent; ++i) {
283  ps_alignment_entry_t *pent = al->sseq.seq + i;
284  ps_alignment_entry_t *sent;
285  int j;
286 
287  for (j = 0; j < bin_mdef_n_emit_state(mdef); ++j) {
288  if ((sent = ps_alignment_vector_grow_one(&al->state)) == NULL) {
289  E_ERROR("Failed to add state entry!\n");
290  return -1;
291  }
292  sent->id.senid = bin_mdef_sseq2sen(mdef, pent->id.pid.ssid, j);
293  assert(sent->id.senid != BAD_SENID);
294  sent->start = pent->start;
295  sent->duration = pent->duration;
296  sent->parent = i;
297  if (j == 0)
298  pent->child = (uint16)(sent - al->state.seq);
299  }
300  }
301 
302  return 0;
303 }
304 
305 int
307 {
308  ps_alignment_entry_t *last_ent = NULL;
309  int i;
310 
311  /* Propagate duration up from states to phones. */
312  for (i = 0; i < al->state.n_ent; ++i) {
313  ps_alignment_entry_t *sent = al->state.seq + i;
314  ps_alignment_entry_t *pent = al->sseq.seq + sent->parent;
315  if (pent != last_ent) {
316  pent->start = sent->start;
317  pent->duration = 0;
318  }
319  pent->duration += sent->duration;
320  last_ent = pent;
321  }
322 
323  /* Propagate duration up from phones to words. */
324  last_ent = NULL;
325  for (i = 0; i < al->sseq.n_ent; ++i) {
326  ps_alignment_entry_t *pent = al->sseq.seq + i;
327  ps_alignment_entry_t *went = al->word.seq + pent->parent;
328  if (went != last_ent) {
329  went->start = pent->start;
330  went->duration = 0;
331  }
332  went->duration += pent->duration;
333  last_ent = went;
334  }
335 
336  return 0;
337 }
338 
339 int
341 {
342  return (int)al->word.n_ent;
343 }
344 
345 int
347 {
348  return (int)al->sseq.n_ent;
349 }
350 
351 int
353 {
354  return (int)al->state.n_ent;
355 }
356 
359 {
360  ps_alignment_iter_t *itor;
361 
362  if (al->word.n_ent == 0)
363  return NULL;
364  itor = ckd_calloc(1, sizeof(*itor));
365  itor->al = al;
366  itor->vec = &al->word;
367  itor->pos = 0;
368  return itor;
369 }
370 
373 {
374  ps_alignment_iter_t *itor;
375 
376  if (al->sseq.n_ent == 0)
377  return NULL;
378  itor = ckd_calloc(1, sizeof(*itor));
379  itor->al = al;
380  itor->vec = &al->sseq;
381  itor->pos = 0;
382  return itor;
383 }
384 
387 {
388  ps_alignment_iter_t *itor;
389 
390  if (al->state.n_ent == 0)
391  return NULL;
392  itor = ckd_calloc(1, sizeof(*itor));
393  itor->al = al;
394  itor->vec = &al->state;
395  itor->pos = 0;
396  return itor;
397 }
398 
401 {
402  return itor->vec->seq + itor->pos;
403 }
404 
405 int
407 {
408  ckd_free(itor);
409  return 0;
410 }
411 
414 {
415  if (itor == NULL)
416  return NULL;
417  if (pos >= itor->vec->n_ent) {
419  return NULL;
420  }
421  itor->pos = pos;
422  return itor;
423 }
424 
427 {
428  if (itor == NULL)
429  return NULL;
430  if (++itor->pos >= itor->vec->n_ent) {
432  return NULL;
433  }
434  return itor;
435 }
436 
439 {
440  if (itor == NULL)
441  return NULL;
442  if (--itor->pos < 0) {
444  return NULL;
445  }
446  return itor;
447 }
448 
451 {
452  ps_alignment_iter_t *itor2;
453  if (itor == NULL)
454  return NULL;
455  if (itor->vec == &itor->al->word)
456  return NULL;
457  if (itor->vec->seq[itor->pos].parent == PS_ALIGNMENT_NONE)
458  return NULL;
459  itor2 = ckd_calloc(1, sizeof(*itor2));
460  itor2->al = itor->al;
461  itor2->pos = itor->vec->seq[itor->pos].parent;
462  if (itor->vec == &itor->al->sseq)
463  itor2->vec = &itor->al->word;
464  else
465  itor2->vec = &itor->al->sseq;
466  return itor2;
467 }
468 
471 {
472  ps_alignment_iter_t *itor2;
473  if (itor == NULL)
474  return NULL;
475  if (itor->vec == &itor->al->state)
476  return NULL;
477  if (itor->vec->seq[itor->pos].child == PS_ALIGNMENT_NONE)
478  return NULL;
479  itor2 = ckd_calloc(1, sizeof(*itor2));
480  itor2->al = itor->al;
481  itor2->pos = itor->vec->seq[itor->pos].child;
482  if (itor->vec == &itor->al->word)
483  itor2->vec = &itor->al->sseq;
484  else
485  itor2->vec = &itor->al->state;
486  return itor2;
487 }
int ps_alignment_n_states(ps_alignment_t *al)
Number of states.
Definition: ps_alignment.c:352
Definition: ps_alignment.h:56
int ps_alignment_populate_ci(ps_alignment_t *al)
Populate lower layers using context-independent phones.
Definition: ps_alignment.c:242
ps_alignment_t * ps_alignment_init(dict2pid_t *d2p)
Create a new, empty alignment.
Definition: ps_alignment.c:51
ps_alignment_iter_t * ps_alignment_iter_goto(ps_alignment_iter_t *itor, int pos)
Move alignment iterator to given index.
Definition: ps_alignment.c:413
#define BAD_SSID
Invalid senone sequence ID (limited to 16 bits for PocketSphinx).
Definition: bin_mdef.h:97
ps_alignment_iter_t * ps_alignment_iter_next(ps_alignment_iter_t *itor)
Move an alignment iterator forward.
Definition: ps_alignment.c:426
int dict2pid_free(dict2pid_t *d2p)
Free the memory dict2pid structure.
Definition: dict2pid.c:507
int ps_alignment_free(ps_alignment_t *al)
Release an alignment.
Definition: ps_alignment.c:59
#define dict2pid_rssid(d, ci, lc)
Access macros; not designed for arbitrary use.
Definition: dict2pid.h:179
int ps_alignment_propagate(ps_alignment_t *al)
Propagate timing information up from state sequence.
Definition: ps_alignment.c:306
#define BAD_SENID
Invalid senone ID (limited to 16 bits for PocketSphinx).
Definition: bin_mdef.h:101
bin_mdef_t * mdef
Model definition, used to generate internal ssids on the fly.
Definition: dict2pid.h:151
ps_alignment_iter_t * ps_alignment_words(ps_alignment_t *al)
Iterate over the alignment starting at the first word.
Definition: ps_alignment.c:358
dict_t * dict
Dictionary this table refers to.
Definition: dict2pid.h:153
a structure for a dictionary.
Definition: dict.h:79
s3ssid_t dict2pid_internal(dict2pid_t *d2p, int32 wid, int pos)
Return the senone sequence ID for the given word position.
Definition: dict2pid.c:367
cross word triphone model structure
Definition: dict2pid.h:137
dict2pid_t * dict2pid_retain(dict2pid_t *d2p)
Retain a pointer to dict2pid.
Definition: dict2pid.c:500
ps_alignment_iter_t * ps_alignment_iter_prev(ps_alignment_iter_t *itor)
Move an alignment iterator back.
Definition: ps_alignment.c:438
ps_alignment_iter_t * ps_alignment_states(ps_alignment_t *al)
Iterate over the alignment starting at the first state.
Definition: ps_alignment.c:386
ps_alignment_iter_t * ps_alignment_iter_up(ps_alignment_iter_t *itor)
Get a new iterator starting at the parent of the current node.
Definition: ps_alignment.c:450
ps_alignment_iter_t * ps_alignment_iter_down(ps_alignment_iter_t *itor)
Get a new iterator starting at the first child of the current node.
Definition: ps_alignment.c:470
int ps_alignment_n_phones(ps_alignment_t *al)
Number of phones.
Definition: ps_alignment.c:346
int ps_alignment_iter_free(ps_alignment_iter_t *itor)
Release an iterator before completing all iterations.
Definition: ps_alignment.c:406
s3cipid_t * cimap
Index into ssid[] above for each ci phone.
Definition: dict2pid.h:139
Multi-level alignment structure.
int ps_alignment_populate(ps_alignment_t *al)
Populate lower layers using available word information.
Definition: ps_alignment.c:128
#define dict_pron(d, w, p)
The CI phones of the word w at position p.
Definition: dict.h:168
ps_alignment_iter_t * ps_alignment_phones(ps_alignment_t *al)
Iterate over the alignment starting at the first phone.
Definition: ps_alignment.c:372
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:148
s3ssid_t * ssid
Senone Sequence ID list for all context ciphones.
Definition: dict2pid.h:138
int ps_alignment_add_word(ps_alignment_t *al, int32 wid, int duration)
Append a word.
Definition: ps_alignment.c:108
ps_alignment_entry_t * ps_alignment_iter_get(ps_alignment_iter_t *itor)
Get the alignment entry pointed to by an iterator.
Definition: ps_alignment.c:400
int ps_alignment_n_words(ps_alignment_t *al)
Number of words.
Definition: ps_alignment.c:340