PocketSphinx  5prealpha
tmat.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 /*
38  * tmat.c
39  *
40  * **********************************************
41  * CMU ARPA Speech Project
42  *
43  * Copyright (c) 1997 Carnegie Mellon University.
44  * ALL RIGHTS RESERVED.
45  * **********************************************
46  *
47  * HISTORY
48  * $Log: tmat.c,v $
49  * Revision 1.1.1.1 2006/05/23 18:45:01 dhuggins
50  * re-importation
51  *
52  * Revision 1.4 2005/11/14 16:14:34 dhuggins
53  * Use LOG() instead of logs3() for loading tmats, makes startup
54  * ***much*** faster.
55  *
56  * Revision 1.3 2005/10/10 14:50:35 dhuggins
57  * Deal properly with empty transition matrices.
58  *
59  * Revision 1.2 2005/09/30 15:01:23 dhuggins
60  * More robust tmat reading - read the tmat in accordance with the fixed s2 topology
61  *
62  * Revision 1.1 2005/09/29 21:51:19 dhuggins
63  * Add support for Sphinx3 tmat files. Amazingly enough, it Just Works
64  * (but it isn't terribly robust)
65  *
66  * Revision 1.6 2005/07/05 13:12:39 dhdfu
67  * Add new arguments to logs3_init() in some tests, main_ep
68  *
69  * Revision 1.5 2005/06/21 19:23:35 arthchan2003
70  * 1, Fixed doxygen documentation. 2, Added $ keyword.
71  *
72  * Revision 1.5 2005/05/03 04:09:09 archan
73  * Implemented the heart of word copy search. For every ci-phone, every word end, a tree will be allocated to preserve its pathscore. This is different from 3.5 or below, only the best score for a particular ci-phone, regardless of the word-ends will be preserved at every frame. The graph propagation will not collect unused word tree at this point. srch_WST_propagate_wd_lv2 is also as the most stupid in the century. But well, after all, everything needs a start. I will then really get the results from the search and see how it looks.
74  *
75  * Revision 1.4 2005/04/21 23:50:26 archan
76  * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in. At this moment, everything in search mode 5 is already done. It is time to test the idea whether the search can really be used.
77  *
78  * Revision 1.3 2005/03/30 01:22:47 archan
79  * Fixed mistakes in last updates. Add
80  *
81  *
82  * 20.Apr.2001 RAH (rhoughton@mediasite.com, ricky.houghton@cs.cmu.edu)
83  * Added tmat_free to free allocated memory
84  *
85  * 29-Feb-2000 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University.
86  * Added tmat_chk_1skip(), and made tmat_chk_uppertri() public.
87  *
88  * 10-Dec-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University.
89  * Made tmat_dump() public.
90  *
91  * 11-Mar-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University.
92  * Started based on original S3 implementation.
93  */
94 
95 /* System headers. */
96 #include <string.h>
97 
98 /* SphinxBase headers. */
99 #include <sphinxbase/logmath.h>
100 #include <sphinxbase/err.h>
101 #include <sphinxbase/ckd_alloc.h>
102 #include <sphinxbase/bio.h>
103 
104 /* Local headers. */
105 #include "tmat.h"
106 #include "hmm.h"
107 #include "vector.h"
108 
109 #define TMAT_PARAM_VERSION "1.0"
110 
111 
116 static int32 tmat_chk_uppertri(tmat_t *tmat, logmath_t *lmath);
117 
118 
125 static int32 tmat_chk_1skip(tmat_t *tmat, logmath_t *lmath);
126 
127 
128 void
129 tmat_dump(tmat_t * tmat, FILE * fp)
130 {
131  int32 i, src, dst;
132 
133  for (i = 0; i < tmat->n_tmat; i++) {
134  fprintf(fp, "TMAT %d = %d x %d\n", i, tmat->n_state,
135  tmat->n_state + 1);
136  for (src = 0; src < tmat->n_state; src++) {
137  for (dst = 0; dst <= tmat->n_state; dst++)
138  fprintf(fp, " %12d", tmat->tp[i][src][dst]);
139  fprintf(fp, "\n");
140  }
141  fprintf(fp, "\n");
142  }
143  fflush(fp);
144 }
145 
146 
147 /*
148  * Check model tprob matrices that they conform to upper-triangular assumption;
149  * i.e. no "backward" transitions allowed.
150  */
151 int32
152 tmat_chk_uppertri(tmat_t * tmat, logmath_t *lmath)
153 {
154  int32 i, src, dst;
155 
156  /* Check that each tmat is upper-triangular */
157  for (i = 0; i < tmat->n_tmat; i++) {
158  for (dst = 0; dst < tmat->n_state; dst++)
159  for (src = dst + 1; src < tmat->n_state; src++)
160  if (tmat->tp[i][src][dst] < 255) {
161  E_ERROR("tmat[%d][%d][%d] = %d\n",
162  i, src, dst, tmat->tp[i][src][dst]);
163  return -1;
164  }
165  }
166 
167  return 0;
168 }
169 
170 
171 int32
172 tmat_chk_1skip(tmat_t * tmat, logmath_t *lmath)
173 {
174  int32 i, src, dst;
175 
176  for (i = 0; i < tmat->n_tmat; i++) {
177  for (src = 0; src < tmat->n_state; src++)
178  for (dst = src + 3; dst <= tmat->n_state; dst++)
179  if (tmat->tp[i][src][dst] < 255) {
180  E_ERROR("tmat[%d][%d][%d] = %d\n",
181  i, src, dst, tmat->tp[i][src][dst]);
182  return -1;
183  }
184  }
185 
186  return 0;
187 }
188 
189 
190 tmat_t *
191 tmat_init(char const *file_name, logmath_t *lmath, float64 tpfloor, int32 breport)
192 {
193  char tmp;
194  int32 n_src, n_dst, n_tmat;
195  FILE *fp;
196  int32 byteswap, chksum_present;
197  uint32 chksum;
198  float32 **tp;
199  int32 i, j, k, tp_per_tmat;
200  char **argname, **argval;
201  tmat_t *t;
202 
203 
204  if (breport) {
205  E_INFO("Reading HMM transition probability matrices: %s\n",
206  file_name);
207  }
208 
209  t = (tmat_t *) ckd_calloc(1, sizeof(tmat_t));
210 
211  if ((fp = fopen(file_name, "rb")) == NULL)
212  E_FATAL_SYSTEM("Failed to open transition file '%s' for reading", file_name);
213 
214  /* Read header, including argument-value info and 32-bit byteorder magic */
215  if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
216  E_FATAL("Failed to read header from file '%s'\n", file_name);
217 
218  /* Parse argument-value list */
219  chksum_present = 0;
220  for (i = 0; argname[i]; i++) {
221  if (strcmp(argname[i], "version") == 0) {
222  if (strcmp(argval[i], TMAT_PARAM_VERSION) != 0)
223  E_WARN("Version mismatch(%s): %s, expecting %s\n",
224  file_name, argval[i], TMAT_PARAM_VERSION);
225  }
226  else if (strcmp(argname[i], "chksum0") == 0) {
227  chksum_present = 1; /* Ignore the associated value */
228  }
229  }
230  bio_hdrarg_free(argname, argval);
231  argname = argval = NULL;
232 
233  chksum = 0;
234 
235  /* Read #tmat, #from-states, #to-states, arraysize */
236  if ((bio_fread(&n_tmat, sizeof(int32), 1, fp, byteswap, &chksum)
237  != 1)
238  || (bio_fread(&n_src, sizeof(int32), 1, fp, byteswap, &chksum) !=
239  1)
240  || (bio_fread(&n_dst, sizeof(int32), 1, fp, byteswap, &chksum) !=
241  1)
242  || (bio_fread(&i, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
243  E_FATAL("Failed to read header from '%s'\n", file_name);
244  }
245  if (n_tmat >= MAX_INT16)
246  E_FATAL("%s: Number of transition matrices (%d) exceeds limit (%d)\n", file_name,
247  n_tmat, MAX_INT16);
248  t->n_tmat = n_tmat;
249 
250  if (n_dst != n_src + 1)
251  E_FATAL("%s: Unsupported transition matrix. Number of source states (%d) != number of target states (%d)-1\n", file_name,
252  n_src, n_dst);
253  t->n_state = n_src;
254 
255  if (i != t->n_tmat * n_src * n_dst) {
256  E_FATAL
257  ("%s: Invalid transitions. Number of coefficients (%d) doesn't match expected array dimension: %d x %d x %d\n",
258  file_name, i, t->n_tmat, n_src, n_dst);
259  }
260 
261  /* Allocate memory for tmat data */
262  t->tp = ckd_calloc_3d(t->n_tmat, n_src, n_dst, sizeof(***t->tp));
263 
264  /* Temporary structure to read in the float data */
265  tp = ckd_calloc_2d(n_src, n_dst, sizeof(**tp));
266 
267  /* Read transition matrices, normalize and floor them, and convert to log domain */
268  tp_per_tmat = n_src * n_dst;
269  for (i = 0; i < t->n_tmat; i++) {
270  if (bio_fread(tp[0], sizeof(float32), tp_per_tmat, fp,
271  byteswap, &chksum) != tp_per_tmat) {
272  E_FATAL("Failed to read transition matrix %d from '%s'\n", i, file_name);
273  }
274 
275  /* Normalize and floor */
276  for (j = 0; j < n_src; j++) {
277  if (vector_sum_norm(tp[j], n_dst) == 0.0)
278  E_WARN("Normalization failed for transition matrix %d from state %d\n",
279  i, j);
280  vector_nz_floor(tp[j], n_dst, tpfloor);
281  vector_sum_norm(tp[j], n_dst);
282 
283  /* Convert to logs3. */
284  for (k = 0; k < n_dst; k++) {
285  int ltp;
286 #if 0 /* No, don't do this! It will subtly break 3-state HMMs. */
287  /* For these ones, we floor them even if they are
288  * zero, otherwise HMM evaluation goes nuts. */
289  if (k >= j && k-j < 3 && tp[j][k] == 0.0f)
290  tp[j][k] = tpfloor;
291 #endif
292  /* Log and quantize them. */
293  ltp = -logmath_log(lmath, tp[j][k]) >> SENSCR_SHIFT;
294  if (ltp > 255) ltp = 255;
295  t->tp[i][j][k] = (uint8)ltp;
296  }
297  }
298  }
299 
300  ckd_free_2d(tp);
301 
302  if (chksum_present)
303  bio_verify_chksum(fp, byteswap, chksum);
304 
305  if (fread(&tmp, 1, 1, fp) == 1)
306  E_ERROR("Non-empty file beyond end of data\n");
307 
308  fclose(fp);
309 
310  if (tmat_chk_uppertri(t, lmath) < 0)
311  E_FATAL("Tmat not upper triangular\n");
312  if (tmat_chk_1skip(t, lmath) < 0)
313  E_FATAL("Topology not Left-to-Right or Bakis\n");
314 
315  return t;
316 }
317 
318 void
320 {
321  E_INFO_NOFN("Initialization of tmat_t, report:\n");
322  E_INFO_NOFN("Read %d transition matrices of size %dx%d\n",
323  t->n_tmat, t->n_state, t->n_state + 1);
324  E_INFO_NOFN("\n");
325 
326 }
327 
328 /*
329  * RAH, Free memory allocated in tmat_init ()
330  */
331 void
333 {
334  if (t) {
335  if (t->tp)
336  ckd_free_3d(t->tp);
337  ckd_free(t);
338  }
339 }
uint8 *** tp
The transition matrices; kept in the same scale as acoustic scores; tp[tmatid][from-state][to-state]...
Definition: tmat.h:107
int16 n_tmat
Number matrices.
Definition: tmat.h:109
void tmat_free(tmat_t *t)
RAH, add code to remove memory allocated by tmat_init.
Definition: tmat.c:332
Implementation of HMM base structure.
tmat_t * tmat_init(char const *tmatfile, logmath_t *lmath, float64 tpfloor, int32 breport)
Initialize transition matrix.
Definition: tmat.c:191
int16 n_state
Number source states in matrix (only the emitting states); Number destination states = n_state+1...
Definition: tmat.h:110
void tmat_dump(tmat_t *tmat, FILE *fp)
Dumping the transition matrix for debugging.
Definition: tmat.c:129
#define SENSCR_SHIFT
Shift count for senone scores.
Definition: hmm.h:73
Transition matrix data structure.
Transition matrix data structure.
Definition: tmat.h:106
void tmat_report(tmat_t *t)
Report the detail of the transition matrix structure.
Definition: tmat.c:319