PocketSphinx  5prealpha
ngram_search.c
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2008 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
42 /* System headers. */
43 #include <string.h>
44 #include <assert.h>
45 
46 /* SphinxBase headers. */
47 #include <sphinxbase/ckd_alloc.h>
48 #include <sphinxbase/listelem_alloc.h>
49 #include <sphinxbase/err.h>
50 
51 /* Local headers. */
52 #include "pocketsphinx_internal.h"
53 #include "ps_lattice_internal.h"
54 #include "ngram_search.h"
55 #include "ngram_search_fwdtree.h"
56 #include "ngram_search_fwdflat.h"
57 
58 static int ngram_search_start(ps_search_t *search);
59 static int ngram_search_step(ps_search_t *search, int frame_idx);
60 static int ngram_search_finish(ps_search_t *search);
61 static int ngram_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p);
62 static char const *ngram_search_hyp(ps_search_t *search, int32 *out_score, int32 *out_is_final);
63 static int32 ngram_search_prob(ps_search_t *search);
64 static ps_seg_t *ngram_search_seg_iter(ps_search_t *search, int32 *out_score);
65 
66 static ps_searchfuncs_t ngram_funcs = {
67  /* start: */ ngram_search_start,
68  /* step: */ ngram_search_step,
69  /* finish: */ ngram_search_finish,
70  /* reinit: */ ngram_search_reinit,
71  /* free: */ ngram_search_free,
72  /* lattice: */ ngram_search_lattice,
73  /* hyp: */ ngram_search_hyp,
74  /* prob: */ ngram_search_prob,
75  /* seg_iter: */ ngram_search_seg_iter,
76 };
77 
78 static ngram_model_t *default_lm;
79 
80 static void
81 ngram_search_update_widmap(ngram_search_t *ngs)
82 {
83  char const **words;
84  int32 i, n_words;
85 
86  /* It's okay to include fillers since they won't be in the LM */
87  n_words = ps_search_n_words(ngs);
88  words = (char const**)ckd_calloc(n_words, sizeof(*words));
89  /* This will include alternates, again, that's okay since they aren't in the LM */
90  for (i = 0; i < n_words; ++i)
91  words[i] = dict_wordstr(ps_search_dict(ngs), i);
92  ngram_model_set_map_words(ngs->lmset, words, n_words);
93  ckd_free(words);
94 }
95 
96 static void
97 ngram_search_calc_beams(ngram_search_t *ngs)
98 {
99  cmd_ln_t *config;
100  acmod_t *acmod;
101 
102  config = ps_search_config(ngs);
103  acmod = ps_search_acmod(ngs);
104 
105  /* Log beam widths. */
106  ngs->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-beam"))>>SENSCR_SHIFT;
107  ngs->wbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-wbeam"))>>SENSCR_SHIFT;
108  ngs->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pbeam"))>>SENSCR_SHIFT;
109  ngs->lpbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-lpbeam"))>>SENSCR_SHIFT;
110  ngs->lponlybeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-lponlybeam"))>>SENSCR_SHIFT;
111  ngs->fwdflatbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-fwdflatbeam"))>>SENSCR_SHIFT;
112  ngs->fwdflatwbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-fwdflatwbeam"))>>SENSCR_SHIFT;
113 
114  /* Absolute pruning parameters. */
115  ngs->maxwpf = cmd_ln_int32_r(config, "-maxwpf");
116  ngs->maxhmmpf = cmd_ln_int32_r(config, "-maxhmmpf");
117 
118  /* Various penalties which may or may not be useful. */
119  ngs->wip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-wip")) >>SENSCR_SHIFT;
120  ngs->nwpen = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-nwpen")) >>SENSCR_SHIFT;
121  ngs->pip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pip")) >>SENSCR_SHIFT;
122  ngs->silpen = ngs->pip
123  + (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-silprob"))>>SENSCR_SHIFT);
124  ngs->fillpen = ngs->pip
125  + (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-fillprob"))>>SENSCR_SHIFT);
126 
127  /* Language weight ratios for fwdflat and bestpath search. */
128  ngs->fwdflat_fwdtree_lw_ratio =
129  cmd_ln_float32_r(config, "-fwdflatlw")
130  / cmd_ln_float32_r(config, "-lw");
131  ngs->bestpath_fwdtree_lw_ratio =
132  cmd_ln_float32_r(config, "-bestpathlw")
133  / cmd_ln_float32_r(config, "-lw");
134 
135  /* Acoustic score scale for posterior probabilities. */
136  ngs->ascale = 1.0 / cmd_ln_float32_r(config, "-ascale");
137 }
138 
139 ps_search_t *
140 ngram_search_init(const char *name,
141  ngram_model_t *lm,
142  cmd_ln_t *config,
143  acmod_t *acmod,
144  dict_t *dict,
145  dict2pid_t *d2p)
146 {
147  ngram_search_t *ngs;
148  static char *lmname = "default";
149 
150  /* Make the acmod's feature buffer growable if we are doing two-pass
151  * search. */
152  acmod_set_grow(acmod, cmd_ln_boolean_r(config, "-fwdflat") &&
153  cmd_ln_boolean_r(config, "-fwdtree"));
154 
155  ngs = ckd_calloc(1, sizeof(*ngs));
156  ps_search_init(&ngs->base, &ngram_funcs, PS_SEARCH_TYPE_NGRAM, name, config, acmod, dict, d2p);
157 
158  ngs->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
159  acmod->tmat->tp, NULL, acmod->mdef->sseq);
160  if (ngs->hmmctx == NULL) {
161  ps_search_free(ps_search_base(ngs));
162  return NULL;
163  }
164  ngs->chan_alloc = listelem_alloc_init(sizeof(chan_t));
165  ngs->root_chan_alloc = listelem_alloc_init(sizeof(root_chan_t));
166  ngs->latnode_alloc = listelem_alloc_init(sizeof(ps_latnode_t));
167 
168  /* Calculate various beam widths and such. */
169  ngram_search_calc_beams(ngs);
170 
171  /* Allocate a billion different tables for stuff. */
172  ngs->word_chan = ckd_calloc(dict_size(dict),
173  sizeof(*ngs->word_chan));
174  ngs->word_lat_idx = ckd_calloc(dict_size(dict),
175  sizeof(*ngs->word_lat_idx));
176  ngs->word_active = bitvec_alloc(dict_size(dict));
177  ngs->last_ltrans = ckd_calloc(dict_size(dict),
178  sizeof(*ngs->last_ltrans));
179 
180  /* FIXME: All these structures need to be made dynamic with
181  * garbage collection. */
182  ngs->bp_table_size = cmd_ln_int32_r(config, "-latsize");
183  ngs->bp_table = ckd_calloc(ngs->bp_table_size,
184  sizeof(*ngs->bp_table));
185  /* FIXME: This thing is frickin' huge. */
186  ngs->bscore_stack_size = ngs->bp_table_size * 20;
187  ngs->bscore_stack = ckd_calloc(ngs->bscore_stack_size,
188  sizeof(*ngs->bscore_stack));
189  ngs->n_frame_alloc = 256;
190  ngs->bp_table_idx = ckd_calloc(ngs->n_frame_alloc + 1,
191  sizeof(*ngs->bp_table_idx));
192  ++ngs->bp_table_idx; /* Make bptableidx[-1] valid */
193 
194  /* Allocate active word list array */
195  ngs->active_word_list = ckd_calloc_2d(2, dict_size(dict),
196  sizeof(**ngs->active_word_list));
197 
198  ngs->lmset = ngram_model_set_init(config, &lm, &lmname, NULL, 1);
199  if (!ngs->lmset)
200  goto error_out;
201 
202  if (ngram_wid(ngs->lmset, S3_FINISH_WORD) ==
203  ngram_unknown_wid(ngs->lmset))
204  {
205  E_ERROR("Language model/set does not contain </s>, "
206  "recognition will fail\n");
207  goto error_out;
208  }
209 
210  /* Create word mappings. */
211  ngram_search_update_widmap(ngs);
212 
213  /* Initialize fwdtree, fwdflat, bestpath modules if necessary. */
214  if (cmd_ln_boolean_r(config, "-fwdtree")) {
215  ngram_fwdtree_init(ngs);
216  ngs->fwdtree = TRUE;
217  ngs->fwdtree_perf.name = "fwdtree";
218  ptmr_init(&ngs->fwdtree_perf);
219  }
220  if (cmd_ln_boolean_r(config, "-fwdflat")) {
221  ngram_fwdflat_init(ngs);
222  ngs->fwdflat = TRUE;
223  ngs->fwdflat_perf.name = "fwdflat";
224  ptmr_init(&ngs->fwdflat_perf);
225  }
226  if (cmd_ln_boolean_r(config, "-bestpath")) {
227  ngs->bestpath = TRUE;
228  ngs->bestpath_perf.name = "bestpath";
229  ptmr_init(&ngs->bestpath_perf);
230  }
231 
232  return (ps_search_t *)ngs;
233 
234 error_out:
236  return NULL;
237 }
238 
239 static int
240 ngram_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
241 {
242  ngram_search_t *ngs = (ngram_search_t *)search;
243  int old_n_words;
244  int rv = 0;
245 
246  /* Update the number of words. */
247  old_n_words = search->n_words;
248  if (old_n_words != dict_size(dict)) {
249  search->n_words = dict_size(dict);
250  /* Reallocate these temporary arrays. */
251  ckd_free(ngs->word_lat_idx);
252  ckd_free(ngs->word_active);
253  ckd_free(ngs->last_ltrans);
254  ckd_free_2d(ngs->active_word_list);
255  ngs->word_lat_idx = ckd_calloc(search->n_words, sizeof(*ngs->word_lat_idx));
256  ngs->word_active = bitvec_alloc(search->n_words);
257  ngs->last_ltrans = ckd_calloc(search->n_words, sizeof(*ngs->last_ltrans));
258  ngs->active_word_list
259  = ckd_calloc_2d(2, search->n_words,
260  sizeof(**ngs->active_word_list));
261  }
262 
263  /* Free old dict2pid, dict */
264  ps_search_base_reinit(search, dict, d2p);
265 
266  if (ngs->lmset == NULL)
267  return 0;
268 
269  /* Update beam widths. */
270  ngram_search_calc_beams(ngs);
271 
272  /* Update word mappings. */
273  ngram_search_update_widmap(ngs);
274 
275  /* Now rebuild lextrees. */
276  if (ngs->fwdtree) {
277  if ((rv = ngram_fwdtree_reinit(ngs)) < 0)
278  return rv;
279  }
280  if (ngs->fwdflat) {
281  if ((rv = ngram_fwdflat_reinit(ngs)) < 0)
282  return rv;
283  }
284 
285  return rv;
286 }
287 
288 void
290 {
291  ngram_search_t *ngs = (ngram_search_t *)search;
292 
293  if (ngs->fwdtree)
295  if (ngs->fwdflat)
297  if (ngs->bestpath) {
298  double n_speech = (double)ngs->n_tot_frame
299  / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
300 
301  E_INFO("TOTAL bestpath %.2f CPU %.3f xRT\n",
302  ngs->bestpath_perf.t_tot_cpu,
303  ngs->bestpath_perf.t_tot_cpu / n_speech);
304  E_INFO("TOTAL bestpath %.2f wall %.3f xRT\n",
305  ngs->bestpath_perf.t_tot_elapsed,
306  ngs->bestpath_perf.t_tot_elapsed / n_speech);
307  }
308 
309  ps_search_base_free(search);
310  hmm_context_free(ngs->hmmctx);
311  listelem_alloc_free(ngs->chan_alloc);
312  listelem_alloc_free(ngs->root_chan_alloc);
313  listelem_alloc_free(ngs->latnode_alloc);
314  ngram_model_free(ngs->lmset);
315 
316  ckd_free(ngs->word_chan);
317  ckd_free(ngs->word_lat_idx);
318  bitvec_free(ngs->word_active);
319  ckd_free(ngs->bp_table);
320  ckd_free(ngs->bscore_stack);
321  if (ngs->bp_table_idx != NULL)
322  ckd_free(ngs->bp_table_idx - 1);
323  ckd_free_2d(ngs->active_word_list);
324  ckd_free(ngs->last_ltrans);
325  ckd_free(ngs);
326 }
327 
328 int
330 {
331  if (frame_idx >= ngs->n_frame_alloc) {
332  ngs->n_frame_alloc *= 2;
333  ngs->bp_table_idx = ckd_realloc(ngs->bp_table_idx - 1,
334  (ngs->n_frame_alloc + 1)
335  * sizeof(*ngs->bp_table_idx));
336  if (ngs->frm_wordlist) {
337  ngs->frm_wordlist = ckd_realloc(ngs->frm_wordlist,
338  ngs->n_frame_alloc
339  * sizeof(*ngs->frm_wordlist));
340  }
341  ++ngs->bp_table_idx; /* Make bptableidx[-1] valid */
342  }
343  ngs->bp_table_idx[frame_idx] = ngs->bpidx;
344  return ngs->bpidx;
345 }
346 
347 static void
348 set_real_wid(ngram_search_t *ngs, int32 bp)
349 {
350  bptbl_t *ent, *prev;
351 
352  assert(bp != NO_BP);
353  ent = ngs->bp_table + bp;
354  if (ent->bp == NO_BP)
355  prev = NULL;
356  else
357  prev = ngs->bp_table + ent->bp;
358 
359  /* Propagate lm state for fillers, rotate it for words. */
360  if (dict_filler_word(ps_search_dict(ngs), ent->wid)) {
361  if (prev != NULL) {
362  ent->real_wid = prev->real_wid;
363  ent->prev_real_wid = prev->prev_real_wid;
364  }
365  else {
366  ent->real_wid = dict_basewid(ps_search_dict(ngs),
367  ent->wid);
368  ent->prev_real_wid = BAD_S3WID;
369  }
370  }
371  else {
372  ent->real_wid = dict_basewid(ps_search_dict(ngs), ent->wid);
373  if (prev != NULL)
374  ent->prev_real_wid = prev->real_wid;
375  else
376  ent->prev_real_wid = BAD_S3WID;
377  }
378 }
379 
380 #define NGRAM_HISTORY_LONG_WORD 2000 /* 20s */
381 
382 void
384  int32 w, int32 score, int32 path, int32 rc)
385 {
386  int32 bp;
387 
388  /* Look for an existing exit for this word in this frame. The
389  * only reason one would exist is from a different right context
390  * triphone, but of course that happens quite frequently. */
391  bp = ngs->word_lat_idx[w];
392  if (bp != NO_BP) {
393 
394  if (frame_idx - ngs->bp_table[path].frame > NGRAM_HISTORY_LONG_WORD) {
395  E_WARN("Word '%s' survived for %d frames, potential overpruning\n", dict_wordstr(ps_search_dict(ngs), w),
396  frame_idx - ngs->bp_table[path].frame);
397  }
398 
399  /* Keep only the best scoring one, we will reconstruct the
400  * others from the right context scores - usually the history
401  * is not lost. */
402  if (ngs->bp_table[bp].score WORSE_THAN score) {
403  assert(path != bp); /* Pathological. */
404  if (ngs->bp_table[bp].bp != path) {
405  int32 bplh[2], newlh[2];
406  /* But, sometimes, the history *is* lost. If we wanted to
407  * do exact language model scoring we'd have to preserve
408  * these alternate histories. */
409  E_DEBUG(2,("Updating path history %d => %d frame %d\n",
410  ngs->bp_table[bp].bp, path, frame_idx));
411  bplh[0] = ngs->bp_table[bp].bp == -1
412  ? -1 : ngs->bp_table[ngs->bp_table[bp].bp].prev_real_wid;
413  bplh[1] = ngs->bp_table[bp].bp == -1
414  ? -1 : ngs->bp_table[ngs->bp_table[bp].bp].real_wid;
415  newlh[0] = path == -1
416  ? -1 : ngs->bp_table[path].prev_real_wid;
417  newlh[1] = path == -1
418  ? -1 : ngs->bp_table[path].real_wid;
419  /* Actually it's worth checking how often the actual
420  * language model state changes. */
421  if (bplh[0] != newlh[0] || bplh[1] != newlh[1]) {
422  /* It's fairly rare that the actual language model
423  * state changes, but it does happen some
424  * times. */
425  E_DEBUG(1, ("Updating language model state %s,%s => %s,%s frame %d\n",
426  dict_wordstr(ps_search_dict(ngs), bplh[0]),
427  dict_wordstr(ps_search_dict(ngs), bplh[1]),
428  dict_wordstr(ps_search_dict(ngs), newlh[0]),
429  dict_wordstr(ps_search_dict(ngs), newlh[1]),
430  frame_idx));
431  set_real_wid(ngs, bp);
432  }
433  ngs->bp_table[bp].bp = path;
434  }
435  ngs->bp_table[bp].score = score;
436  }
437  /* But do keep track of scores for all right contexts, since
438  * we need them to determine the starting path scores for any
439  * successors of this word exit. */
440  if (ngs->bp_table[bp].s_idx != -1)
441  ngs->bscore_stack[ngs->bp_table[bp].s_idx + rc] = score;
442  }
443  else {
444  int32 i, rcsize;
445  bptbl_t *be;
446 
447  /* This might happen if recognition fails. */
448  if (ngs->bpidx == NO_BP) {
449  E_ERROR("No entries in backpointer table!");
450  return;
451  }
452 
453  /* Expand the backpointer tables if necessary. */
454  if (ngs->bpidx >= ngs->bp_table_size) {
455  ngs->bp_table_size *= 2;
456  ngs->bp_table = ckd_realloc(ngs->bp_table,
457  ngs->bp_table_size
458  * sizeof(*ngs->bp_table));
459  E_INFO("Resized backpointer table to %d entries\n", ngs->bp_table_size);
460  }
461  if (ngs->bss_head >= ngs->bscore_stack_size
462  - bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef)) {
463  ngs->bscore_stack_size *= 2;
464  ngs->bscore_stack = ckd_realloc(ngs->bscore_stack,
465  ngs->bscore_stack_size
466  * sizeof(*ngs->bscore_stack));
467  E_INFO("Resized score stack to %d entries\n", ngs->bscore_stack_size);
468  }
469 
470  ngs->word_lat_idx[w] = ngs->bpidx;
471  be = &(ngs->bp_table[ngs->bpidx]);
472  be->wid = w;
473  be->frame = frame_idx;
474  be->bp = path;
475  be->score = score;
476  be->s_idx = ngs->bss_head;
477  be->valid = TRUE;
478  assert(path != ngs->bpidx);
479 
480  /* DICT2PID */
481  /* Get diphone ID for final phone and number of ssids corresponding to it. */
482  be->last_phone = dict_last_phone(ps_search_dict(ngs),w);
483  if (dict_is_single_phone(ps_search_dict(ngs), w)) {
484  be->last2_phone = -1;
485  be->s_idx = -1;
486  rcsize = 0;
487  }
488  else {
489  be->last2_phone = dict_second_last_phone(ps_search_dict(ngs),w);
490  rcsize = dict2pid_rssid(ps_search_dict2pid(ngs),
491  be->last_phone, be->last2_phone)->n_ssid;
492  }
493  /* Allocate some space on the bscore_stack for all of these triphones. */
494  for (i = 0; i < rcsize; ++i)
495  ngs->bscore_stack[ngs->bss_head + i] = WORST_SCORE;
496  if (rcsize)
497  ngs->bscore_stack[ngs->bss_head + rc] = score;
498  set_real_wid(ngs, ngs->bpidx);
499 
500  ngs->bpidx++;
501  ngs->bss_head += rcsize;
502  }
503 }
504 
505 int
506 ngram_search_find_exit(ngram_search_t *ngs, int frame_idx, int32 *out_best_score, int32 *out_is_final)
507 {
508  /* End of backpointers for this frame. */
509  int end_bpidx;
510  int best_exit, bp;
511  int32 best_score;
512 
513  /* No hypothesis means no exit node! */
514  if (ngs->n_frame == 0)
515  return NO_BP;
516 
517  if (frame_idx == -1 || frame_idx >= ngs->n_frame)
518  frame_idx = ngs->n_frame - 1;
519  end_bpidx = ngs->bp_table_idx[frame_idx];
520 
521  best_score = WORST_SCORE;
522  best_exit = NO_BP;
523 
524  /* Scan back to find a frame with some backpointers in it. */
525  while (frame_idx >= 0 && ngs->bp_table_idx[frame_idx] == end_bpidx)
526  --frame_idx;
527  /* This is NOT an error, it just means there is no hypothesis yet. */
528  if (frame_idx < 0)
529  return NO_BP;
530 
531  /* Now find the entry for </s> OR the best scoring entry. */
532  assert(end_bpidx < ngs->bp_table_size);
533  for (bp = ngs->bp_table_idx[frame_idx]; bp < end_bpidx; ++bp) {
534  if (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs)
535  || ngs->bp_table[bp].score BETTER_THAN best_score) {
536  best_score = ngs->bp_table[bp].score;
537  best_exit = bp;
538  }
539  if (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs))
540  break;
541  }
542 
543  if (out_best_score) {
544  *out_best_score = best_score;
545  }
546  if (out_is_final) {
547  *out_is_final = (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs));
548  }
549  return best_exit;
550 }
551 
552 char const *
554 {
555  ps_search_t *base = ps_search_base(ngs);
556  char *c;
557  size_t len;
558  int bp;
559 
560  if (bpidx == NO_BP)
561  return NULL;
562 
563  bp = bpidx;
564  len = 0;
565  while (bp != NO_BP) {
566  bptbl_t *be = &ngs->bp_table[bp];
567  bp = be->bp;
568  if (dict_real_word(ps_search_dict(ngs), be->wid))
569  len += strlen(dict_basestr(ps_search_dict(ngs), be->wid)) + 1;
570  }
571 
572  ckd_free(base->hyp_str);
573  if (len == 0) {
574  base->hyp_str = NULL;
575  return base->hyp_str;
576  }
577  base->hyp_str = ckd_calloc(1, len);
578 
579  bp = bpidx;
580  c = base->hyp_str + len - 1;
581  while (bp != NO_BP) {
582  bptbl_t *be = &ngs->bp_table[bp];
583  size_t len;
584 
585  bp = be->bp;
586  if (dict_real_word(ps_search_dict(ngs), be->wid)) {
587  len = strlen(dict_basestr(ps_search_dict(ngs), be->wid));
588  c -= len;
589  memcpy(c, dict_basestr(ps_search_dict(ngs), be->wid), len);
590  if (c > base->hyp_str) {
591  --c;
592  *c = ' ';
593  }
594  }
595  }
596 
597  return base->hyp_str;
598 }
599 
600 void
602 {
603  chan_t *hmm, *thmm;
604  xwdssid_t *rssid;
605  int32 i, tmatid, ciphone;
606 
607  /* DICT2PID */
608  /* Get pointer to array of triphones for final diphone. */
609  assert(!dict_is_single_phone(ps_search_dict(ngs), w));
610  ciphone = dict_last_phone(ps_search_dict(ngs),w);
611  rssid = dict2pid_rssid(ps_search_dict2pid(ngs),
612  ciphone,
613  dict_second_last_phone(ps_search_dict(ngs),w));
614  tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ciphone);
615  hmm = ngs->word_chan[w];
616  if ((hmm == NULL) || (hmm_nonmpx_ssid(&hmm->hmm) != rssid->ssid[0])) {
617  hmm = listelem_malloc(ngs->chan_alloc);
618  hmm->next = ngs->word_chan[w];
619  ngs->word_chan[w] = hmm;
620 
621  hmm->info.rc_id = 0;
622  hmm->ciphone = ciphone;
623  hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, rssid->ssid[0], tmatid);
624  E_DEBUG(3,("allocated rc_id 0 ssid %d ciphone %d lc %d word %s\n",
625  rssid->ssid[0], hmm->ciphone,
626  dict_second_last_phone(ps_search_dict(ngs),w),
627  dict_wordstr(ps_search_dict(ngs),w)));
628  }
629  for (i = 1; i < rssid->n_ssid; ++i) {
630  if ((hmm->next == NULL) || (hmm_nonmpx_ssid(&hmm->next->hmm) != rssid->ssid[i])) {
631  thmm = listelem_malloc(ngs->chan_alloc);
632  thmm->next = hmm->next;
633  hmm->next = thmm;
634  hmm = thmm;
635 
636  hmm->info.rc_id = i;
637  hmm->ciphone = ciphone;
638  hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, rssid->ssid[i], tmatid);
639  E_DEBUG(3,("allocated rc_id %d ssid %d ciphone %d lc %d word %s\n",
640  i, rssid->ssid[i], hmm->ciphone,
641  dict_second_last_phone(ps_search_dict(ngs),w),
642  dict_wordstr(ps_search_dict(ngs),w)));
643  }
644  else
645  hmm = hmm->next;
646  }
647 }
648 
649 void
651 {
652  chan_t *hmm, *thmm;
653 
654  for (hmm = ngs->word_chan[w]; hmm; hmm = thmm) {
655  thmm = hmm->next;
656  hmm_deinit(&hmm->hmm);
657  listelem_free(ngs->chan_alloc, hmm);
658  }
659  ngs->word_chan[w] = NULL;
660 }
661 
662 int32
664 {
665  /* DICT2PID */
666  /* Get the mapping from right context phone ID to index in the
667  * right context table and the bscore_stack. */
668  if (pbe->last2_phone == -1) {
669  /* No right context for single phone predecessor words. */
670  return pbe->score;
671  }
672  else {
673  xwdssid_t *rssid;
674  /* Find the index for the last diphone of the previous word +
675  * the first phone of the current word. */
676  rssid = dict2pid_rssid(ps_search_dict2pid(ngs),
677  pbe->last_phone, pbe->last2_phone);
678  /* This may be WORST_SCORE, which means that there was no exit
679  * with rcphone as right context. */
680  return ngs->bscore_stack[pbe->s_idx + rssid->cimap[rcphone]];
681  }
682 }
683 
684 /*
685  * Compute acoustic and LM scores for a BPTable entry (segment).
686  */
687 void
688 ngram_compute_seg_score(ngram_search_t *ngs, bptbl_t *be, float32 lwf,
689  int32 *out_ascr, int32 *out_lscr)
690 {
691  bptbl_t *pbe;
692  int32 start_score;
693 
694  /* Start of utterance. */
695  if (be->bp == NO_BP) {
696  *out_ascr = be->score;
697  *out_lscr = 0;
698  return;
699  }
700 
701  /* Otherwise, calculate lscr and ascr. */
702  pbe = ngs->bp_table + be->bp;
703  start_score = ngram_search_exit_score(ngs, pbe,
704  dict_first_phone(ps_search_dict(ngs),be->wid));
705  assert(start_score BETTER_THAN WORST_SCORE);
706 
707  /* FIXME: These result in positive acoustic scores when filler
708  words have non-filler pronunciations. That whole business
709  is still pretty much broken but at least it doesn't
710  segfault. */
711  if (be->wid == ps_search_silence_wid(ngs)) {
712  *out_lscr = ngs->silpen;
713  }
714  else if (dict_filler_word(ps_search_dict(ngs), be->wid)) {
715  *out_lscr = ngs->fillpen;
716  }
717  else {
718  int32 n_used;
719  *out_lscr = ngram_tg_score(ngs->lmset,
720  be->real_wid,
721  pbe->real_wid,
722  pbe->prev_real_wid,
723  &n_used)>>SENSCR_SHIFT;
724  *out_lscr = *out_lscr * lwf;
725  }
726  *out_ascr = be->score - start_score - *out_lscr;
727 }
728 
729 static int
730 ngram_search_start(ps_search_t *search)
731 {
732  ngram_search_t *ngs = (ngram_search_t *)search;
733 
734  ngs->done = FALSE;
735  ngram_model_flush(ngs->lmset);
736  if (ngs->fwdtree)
737  ngram_fwdtree_start(ngs);
738  else if (ngs->fwdflat)
739  ngram_fwdflat_start(ngs);
740  else
741  return -1;
742  return 0;
743 }
744 
745 static int
746 ngram_search_step(ps_search_t *search, int frame_idx)
747 {
748  ngram_search_t *ngs = (ngram_search_t *)search;
749 
750  if (ngs->fwdtree)
751  return ngram_fwdtree_search(ngs, frame_idx);
752  else if (ngs->fwdflat)
753  return ngram_fwdflat_search(ngs, frame_idx);
754  else
755  return -1;
756 }
757 
758 void
759 dump_bptable(ngram_search_t *ngs)
760 {
761  int i;
762  E_INFO("Backpointer table (%d entries):\n", ngs->bpidx);
763  for (i = 0; i < ngs->bpidx; ++i) {
764  bptbl_t *bpe = ngs->bp_table + i;
765  int j, rcsize;
766 
767  E_INFO_NOFN("%-5d %-10s start %-3d end %-3d score %-8d bp %-3d real_wid %-5d prev_real_wid %-5d",
768  i, dict_wordstr(ps_search_dict(ngs), bpe->wid),
769  (bpe->bp == -1
770  ? 0 : ngs->bp_table[bpe->bp].frame + 1),
771  bpe->frame, bpe->score, bpe->bp,
772  bpe->real_wid, bpe->prev_real_wid);
773 
774  if (bpe->last2_phone == -1)
775  rcsize = 0;
776  else
777  rcsize = dict2pid_rssid(ps_search_dict2pid(ngs),
778  bpe->last_phone, bpe->last2_phone)->n_ssid;
779  if (rcsize) {
780  E_INFOCONT("\tbss");
781  for (j = 0; j < rcsize; ++j)
782  if (ngs->bscore_stack[bpe->s_idx + j] != WORST_SCORE)
783  E_INFOCONT(" %d", bpe->score - ngs->bscore_stack[bpe->s_idx + j]);
784  }
785  E_INFOCONT("\n");
786  }
787 }
788 
789 static int
790 ngram_search_finish(ps_search_t *search)
791 {
792  ngram_search_t *ngs = (ngram_search_t *)search;
793 
794  ngs->n_tot_frame += ngs->n_frame;
795  if (ngs->fwdtree) {
797  /* dump_bptable(ngs); */
798 
799  /* Now do fwdflat search in its entirety, if requested. */
800  if (ngs->fwdflat) {
801  int i;
802  /* Rewind the acoustic model. */
803  if (acmod_rewind(ps_search_acmod(ngs)) < 0)
804  return -1;
805  /* Now redo search. */
806  ngram_fwdflat_start(ngs);
807  i = 0;
808  while (ps_search_acmod(ngs)->n_feat_frame > 0) {
809  int nfr;
810  if ((nfr = ngram_fwdflat_search(ngs, i)) < 0)
811  return nfr;
812  acmod_advance(ps_search_acmod(ngs));
813  ++i;
814  }
816  /* And now, we should have a result... */
817  /* dump_bptable(ngs); */
818  }
819  }
820  else if (ngs->fwdflat) {
822  }
823 
824  /* Mark the current utterance as done. */
825  ngs->done = TRUE;
826  return 0;
827 }
828 
829 static ps_latlink_t *
830 ngram_search_bestpath(ps_search_t *search, int32 *out_score, int backward)
831 {
832  ngram_search_t *ngs = (ngram_search_t *)search;
833 
834  if (search->last_link == NULL) {
835  search->last_link = ps_lattice_bestpath(search->dag, ngs->lmset,
836  ngs->bestpath_fwdtree_lw_ratio,
837  ngs->ascale);
838  if (search->last_link == NULL)
839  return NULL;
840  /* Also calculate betas so we can fill in the posterior
841  * probability field in the segmentation. */
842  if (search->post == 0)
843  search->post = ps_lattice_posterior(search->dag, ngs->lmset,
844  ngs->ascale);
845  }
846  if (out_score)
847  *out_score = search->last_link->path_scr + search->dag->final_node_ascr;
848  return search->last_link;
849 }
850 
851 static char const *
852 ngram_search_hyp(ps_search_t *search, int32 *out_score, int32 *out_is_final)
853 {
854  ngram_search_t *ngs = (ngram_search_t *)search;
855 
856  /* Only do bestpath search if the utterance is complete. */
857  if (ngs->bestpath && ngs->done) {
858  ps_lattice_t *dag;
859  ps_latlink_t *link;
860  char const *hyp;
861  double n_speech;
862 
863  ptmr_reset(&ngs->bestpath_perf);
864  ptmr_start(&ngs->bestpath_perf);
865  if ((dag = ngram_search_lattice(search)) == NULL)
866  return NULL;
867  if ((link = ngram_search_bestpath(search, out_score, FALSE)) == NULL)
868  return NULL;
869  hyp = ps_lattice_hyp(dag, link);
870  ptmr_stop(&ngs->bestpath_perf);
871  n_speech = (double)dag->n_frames
872  / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
873  E_INFO("bestpath %.2f CPU %.3f xRT\n",
874  ngs->bestpath_perf.t_cpu,
875  ngs->bestpath_perf.t_cpu / n_speech);
876  E_INFO("bestpath %.2f wall %.3f xRT\n",
877  ngs->bestpath_perf.t_elapsed,
878  ngs->bestpath_perf.t_elapsed / n_speech);
879  return hyp;
880  }
881  else {
882  int32 bpidx;
883 
884  /* fwdtree and fwdflat use same backpointer table. */
885  bpidx = ngram_search_find_exit(ngs, -1, out_score, out_is_final);
886  if (bpidx != NO_BP)
887  return ngram_search_bp_hyp(ngs, bpidx);
888  }
889 
890  return NULL;
891 }
892 
893 static void
894 ngram_search_bp2itor(ps_seg_t *seg, int bp)
895 {
896  ngram_search_t *ngs = (ngram_search_t *)seg->search;
897  bptbl_t *be, *pbe;
898 
899  be = &ngs->bp_table[bp];
900  pbe = be->bp == -1 ? NULL : &ngs->bp_table[be->bp];
901  seg->word = dict_wordstr(ps_search_dict(ngs), be->wid);
902  seg->ef = be->frame;
903  seg->sf = pbe ? pbe->frame + 1 : 0;
904  seg->prob = 0; /* Bogus value... */
905  /* Compute acoustic and LM scores for this segment. */
906  if (pbe == NULL) {
907  seg->ascr = be->score;
908  seg->lscr = 0;
909  seg->lback = 0;
910  }
911  else {
912  int32 start_score;
913 
914  /* Find ending path score of previous word. */
915  start_score = ngram_search_exit_score(ngs, pbe,
916  dict_first_phone(ps_search_dict(ngs), be->wid));
917  assert(start_score BETTER_THAN WORST_SCORE);
918  if (be->wid == ps_search_silence_wid(ngs)) {
919  seg->lscr = ngs->silpen;
920  }
921  else if (dict_filler_word(ps_search_dict(ngs), be->wid)) {
922  seg->lscr = ngs->fillpen;
923  }
924  else {
925  seg->lscr = ngram_tg_score(ngs->lmset,
926  be->real_wid,
927  pbe->real_wid,
928  pbe->prev_real_wid,
929  &seg->lback)>>SENSCR_SHIFT;
930  seg->lscr = (int32)(seg->lscr * seg->lwf);
931  }
932  seg->ascr = be->score - start_score - seg->lscr;
933  }
934 }
935 
936 static void
937 ngram_bp_seg_free(ps_seg_t *seg)
938 {
939  bptbl_seg_t *itor = (bptbl_seg_t *)seg;
940 
941  ckd_free(itor->bpidx);
942  ckd_free(itor);
943 }
944 
945 static ps_seg_t *
946 ngram_bp_seg_next(ps_seg_t *seg)
947 {
948  bptbl_seg_t *itor = (bptbl_seg_t *)seg;
949 
950  if (++itor->cur == itor->n_bpidx) {
951  ngram_bp_seg_free(seg);
952  return NULL;
953  }
954 
955  ngram_search_bp2itor(seg, itor->bpidx[itor->cur]);
956  return seg;
957 }
958 
959 static ps_segfuncs_t ngram_bp_segfuncs = {
960  /* seg_next */ ngram_bp_seg_next,
961  /* seg_free */ ngram_bp_seg_free
962 };
963 
964 static ps_seg_t *
965 ngram_search_bp_iter(ngram_search_t *ngs, int bpidx, float32 lwf)
966 {
967  bptbl_seg_t *itor;
968  int bp, cur;
969 
970  /* Calling this an "iterator" is a bit of a misnomer since we have
971  * to get the entire backtrace in order to produce it. On the
972  * other hand, all we actually need is the bptbl IDs, and we can
973  * allocate a fixed-size array of them. */
974  itor = ckd_calloc(1, sizeof(*itor));
975  itor->base.vt = &ngram_bp_segfuncs;
976  itor->base.search = ps_search_base(ngs);
977  itor->base.lwf = lwf;
978  itor->n_bpidx = 0;
979  bp = bpidx;
980  while (bp != NO_BP) {
981  bptbl_t *be = &ngs->bp_table[bp];
982  bp = be->bp;
983  ++itor->n_bpidx;
984  }
985  if (itor->n_bpidx == 0) {
986  ckd_free(itor);
987  return NULL;
988  }
989  itor->bpidx = ckd_calloc(itor->n_bpidx, sizeof(*itor->bpidx));
990  cur = itor->n_bpidx - 1;
991  bp = bpidx;
992  while (bp != NO_BP) {
993  bptbl_t *be = &ngs->bp_table[bp];
994  itor->bpidx[cur] = bp;
995  bp = be->bp;
996  --cur;
997  }
998 
999  /* Fill in relevant fields for first element. */
1000  ngram_search_bp2itor((ps_seg_t *)itor, itor->bpidx[0]);
1001 
1002  return (ps_seg_t *)itor;
1003 }
1004 
1005 static ps_seg_t *
1006 ngram_search_seg_iter(ps_search_t *search, int32 *out_score)
1007 {
1008  ngram_search_t *ngs = (ngram_search_t *)search;
1009 
1010  /* Only do bestpath search if the utterance is done. */
1011  if (ngs->bestpath && ngs->done) {
1012  ps_lattice_t *dag;
1013  ps_latlink_t *link;
1014  double n_speech;
1015  ps_seg_t *itor;
1016 
1017  ptmr_reset(&ngs->bestpath_perf);
1018  ptmr_start(&ngs->bestpath_perf);
1019  if ((dag = ngram_search_lattice(search)) == NULL)
1020  return NULL;
1021  if ((link = ngram_search_bestpath(search, out_score, TRUE)) == NULL)
1022  return NULL;
1023  itor = ps_lattice_seg_iter(dag, link,
1024  ngs->bestpath_fwdtree_lw_ratio);
1025  ptmr_stop(&ngs->bestpath_perf);
1026  n_speech = (double)dag->n_frames
1027  / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
1028  E_INFO("bestpath %.2f CPU %.3f xRT\n",
1029  ngs->bestpath_perf.t_cpu,
1030  ngs->bestpath_perf.t_cpu / n_speech);
1031  E_INFO("bestpath %.2f wall %.3f xRT\n",
1032  ngs->bestpath_perf.t_elapsed,
1033  ngs->bestpath_perf.t_elapsed / n_speech);
1034  return itor;
1035  }
1036  else {
1037  int32 bpidx;
1038 
1039  /* fwdtree and fwdflat use same backpointer table. */
1040  bpidx = ngram_search_find_exit(ngs, -1, out_score, NULL);
1041  return ngram_search_bp_iter(ngs, bpidx,
1042  /* but different language weights... */
1043  (ngs->done && ngs->fwdflat)
1044  ? ngs->fwdflat_fwdtree_lw_ratio : 1.0);
1045  }
1046 
1047  return NULL;
1048 }
1049 
1050 static int32
1051 ngram_search_prob(ps_search_t *search)
1052 {
1053  ngram_search_t *ngs = (ngram_search_t *)search;
1054 
1055  /* Only do bestpath search if the utterance is done. */
1056  if (ngs->bestpath && ngs->done) {
1057  ps_lattice_t *dag;
1058  ps_latlink_t *link;
1059 
1060  if ((dag = ngram_search_lattice(search)) == NULL)
1061  return 0;
1062  if ((link = ngram_search_bestpath(search, NULL, TRUE)) == NULL)
1063  return 0;
1064  return search->post;
1065  }
1066  else {
1067  /* FIXME: Give some kind of good estimate here, eventually. */
1068  return 0;
1069  }
1070 }
1071 
1072 static void
1073 create_dag_nodes(ngram_search_t *ngs, ps_lattice_t *dag)
1074 {
1075  bptbl_t *bp_ptr;
1076  int32 i;
1077 
1078  for (i = 0, bp_ptr = ngs->bp_table; i < ngs->bpidx; ++i, ++bp_ptr) {
1079  int32 sf, ef, wid;
1080  ps_latnode_t *node;
1081 
1082  /* Skip invalid backpointers (these result from -maxwpf pruning) */
1083  if (!bp_ptr->valid)
1084  continue;
1085 
1086  sf = (bp_ptr->bp < 0) ? 0 : ngs->bp_table[bp_ptr->bp].frame + 1;
1087  ef = bp_ptr->frame;
1088  wid = bp_ptr->wid;
1089 
1090  assert(ef < dag->n_frames);
1091  /* Skip non-final </s> entries. */
1092  if ((wid == ps_search_finish_wid(ngs)) && (ef < dag->n_frames - 1))
1093  continue;
1094 
1095  /* Skip if word not in LM */
1096  if ((!dict_filler_word(ps_search_dict(ngs), wid))
1097  && (!ngram_model_set_known_wid(ngs->lmset,
1098  dict_basewid(ps_search_dict(ngs), wid))))
1099  continue;
1100 
1101  /* See if bptbl entry <wid,sf> already in lattice */
1102  for (node = dag->nodes; node; node = node->next) {
1103  if ((node->wid == wid) && (node->sf == sf))
1104  break;
1105  }
1106 
1107  /* For the moment, store bptbl indices in node.{fef,lef} */
1108  if (node)
1109  node->lef = i;
1110  else {
1111  /* New node; link to head of list */
1112  node = listelem_malloc(dag->latnode_alloc);
1113  node->wid = wid;
1114  node->sf = sf; /* This is a frame index. */
1115  node->fef = node->lef = i; /* These are backpointer indices (argh) */
1116  node->reachable = FALSE;
1117  node->entries = NULL;
1118  node->exits = NULL;
1119 
1120  /* NOTE: This creates the list of nodes in reverse
1121  * topological order, i.e. a node always precedes its
1122  * antecedents in this list. */
1123  node->next = dag->nodes;
1124  dag->nodes = node;
1125  ++dag->n_nodes;
1126  }
1127  }
1128 }
1129 
1130 static ps_latnode_t *
1131 find_start_node(ngram_search_t *ngs, ps_lattice_t *dag)
1132 {
1133  ps_latnode_t *node;
1134 
1135  /* Find start node <s>.0 */
1136  for (node = dag->nodes; node; node = node->next) {
1137  if ((node->wid == ps_search_start_wid(ngs)) && (node->sf == 0))
1138  break;
1139  }
1140  if (!node) {
1141  /* This is probably impossible. */
1142  E_ERROR("Couldn't find <s> in first frame\n");
1143  return NULL;
1144  }
1145  return node;
1146 }
1147 
1148 static ps_latnode_t *
1149 find_end_node(ngram_search_t *ngs, ps_lattice_t *dag, float32 lwf)
1150 {
1151  ps_latnode_t *node;
1152  int32 ef, bestbp, bp, bestscore;
1153 
1154  /* Find final node </s>.last_frame; nothing can follow this node */
1155  for (node = dag->nodes; node; node = node->next) {
1156  int32 lef = ngs->bp_table[node->lef].frame;
1157  if ((node->wid == ps_search_finish_wid(ngs))
1158  && (lef == dag->n_frames - 1))
1159  break;
1160  }
1161  if (node != NULL)
1162  return node;
1163 
1164  /* It is quite likely that no </s> exited in the last frame. So,
1165  * find the node corresponding to the best exit. */
1166  /* Find the last frame containing a word exit. */
1167  for (ef = dag->n_frames - 1;
1168  ef >= 0 && ngs->bp_table_idx[ef] == ngs->bpidx;
1169  --ef);
1170  if (ef < 0) {
1171  E_ERROR("Empty backpointer table: can not build DAG.\n");
1172  return NULL;
1173  }
1174 
1175  /* Find best word exit in that frame. */
1176  bestscore = WORST_SCORE;
1177  bestbp = NO_BP;
1178  for (bp = ngs->bp_table_idx[ef]; bp < ngs->bp_table_idx[ef + 1]; ++bp) {
1179  int32 n_used, l_scr, wid, prev_wid;
1180  wid = ngs->bp_table[bp].real_wid;
1181  prev_wid = ngs->bp_table[bp].prev_real_wid;
1182  /* Always prefer </s>, of which there will only be one per frame. */
1183  if (wid == ps_search_finish_wid(ngs)) {
1184  bestbp = bp;
1185  break;
1186  }
1187  l_scr = ngram_tg_score(ngs->lmset, ps_search_finish_wid(ngs),
1188  wid, prev_wid, &n_used) >>SENSCR_SHIFT;
1189  l_scr = l_scr * lwf;
1190  if (ngs->bp_table[bp].score + l_scr BETTER_THAN bestscore) {
1191  bestscore = ngs->bp_table[bp].score + l_scr;
1192  bestbp = bp;
1193  }
1194  }
1195  if (bestbp == NO_BP) {
1196  E_ERROR("No word exits found in last frame (%d), assuming no recognition\n", ef);
1197  return NULL;
1198  }
1199  E_INFO("</s> not found in last frame, using %s.%d instead\n",
1200  dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].wid), ef);
1201 
1202  /* Now find the node that corresponds to it. */
1203  for (node = dag->nodes; node; node = node->next) {
1204  if (node->lef == bestbp)
1205  return node;
1206  }
1207 
1208  /* FIXME: This seems to happen a lot! */
1209  E_ERROR("Failed to find DAG node corresponding to %s\n",
1210  dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].wid));
1211  return NULL;
1212 }
1213 
1214 /*
1215  * Build lattice from bptable.
1216  */
1217 ps_lattice_t *
1219 {
1220  int32 i, score, ascr, lscr;
1221  ps_latnode_t *node, *from, *to;
1222  ngram_search_t *ngs;
1223  ps_lattice_t *dag;
1224  int min_endfr, nlink;
1225  float lwf;
1226 
1227  ngs = (ngram_search_t *)search;
1228  min_endfr = cmd_ln_int32_r(ps_search_config(search), "-min_endfr");
1229 
1230  /* If the best score is WORST_SCORE or worse, there is no way to
1231  * make a lattice. */
1233  return NULL;
1234 
1235  /* Check to see if a lattice has previously been created over the
1236  * same number of frames, and reuse it if so. */
1237  if (search->dag && search->dag->n_frames == ngs->n_frame)
1238  return search->dag;
1239 
1240  /* Nope, create a new one. */
1241  ps_lattice_free(search->dag);
1242  search->dag = NULL;
1243  dag = ps_lattice_init_search(search, ngs->n_frame);
1244  /* Compute these such that they agree with the fwdtree language weight. */
1245  lwf = ngs->fwdflat ? ngs->fwdflat_fwdtree_lw_ratio : 1.0;
1246  create_dag_nodes(ngs, dag);
1247  if ((dag->start = find_start_node(ngs, dag)) == NULL)
1248  goto error_out;
1249  if ((dag->end = find_end_node(ngs, dag, ngs->bestpath_fwdtree_lw_ratio)) == NULL)
1250  goto error_out;
1251  E_INFO("lattice start node %s.%d end node %s.%d\n",
1252  dict_wordstr(search->dict, dag->start->wid), dag->start->sf,
1253  dict_wordstr(search->dict, dag->end->wid), dag->end->sf);
1254 
1255  ngram_compute_seg_score(ngs, ngs->bp_table + dag->end->lef, lwf,
1256  &dag->final_node_ascr, &lscr);
1257 
1258  /*
1259  * At this point, dag->nodes is ordered such that nodes earlier in
1260  * the list can follow (in time) those later in the list, but not
1261  * vice versa (see above - also note that adjacency is purely
1262  * determined by time which is why we can make this claim). Now
1263  * create precedence links and simultanesously mark all nodes that
1264  * can reach dag->end. (All nodes are reached from dag->start
1265  * simply by definition - they were created that way).
1266  *
1267  * Note that this also means that any nodes before dag->end in the
1268  * list can be discarded, meaning that dag->end will always be
1269  * equal to dag->nodes (FIXME: except when loading from a file but
1270  * we can fix that...)
1271  */
1272  i = 0;
1273  while (dag->nodes && dag->nodes != dag->end) {
1274  ps_latnode_t *next = dag->nodes->next;
1275  listelem_free(dag->latnode_alloc, dag->nodes);
1276  dag->nodes = next;
1277  ++i;
1278  }
1279  E_INFO("Eliminated %d nodes before end node\n", i);
1280  dag->end->reachable = TRUE;
1281  nlink = 0;
1282  for (to = dag->end; to; to = to->next) {
1283  int fef, lef;
1284 
1285  /* Skip if not reachable; it will never be reachable from dag->end */
1286  if (!to->reachable)
1287  continue;
1288 
1289  /* Prune nodes with too few endpoints - heuristic
1290  borrowed from Sphinx3 */
1291  fef = ngs->bp_table[to->fef].frame;
1292  lef = ngs->bp_table[to->lef].frame;
1293  if (to != dag->end && lef - fef < min_endfr) {
1294  to->reachable = FALSE;
1295  continue;
1296  }
1297 
1298  /* Find predecessors of to : from->fef+1 <= to->sf <= from->lef+1 */
1299  for (from = to->next; from; from = from->next) {
1300  bptbl_t *from_bpe;
1301 
1302  fef = ngs->bp_table[from->fef].frame;
1303  lef = ngs->bp_table[from->lef].frame;
1304 
1305  if ((to->sf <= fef) || (to->sf > lef + 1))
1306  continue;
1307  if (lef - fef < min_endfr) {
1308  assert(!from->reachable);
1309  continue;
1310  }
1311 
1312  /* Find bptable entry for "from" that exactly precedes "to" */
1313  i = from->fef;
1314  from_bpe = ngs->bp_table + i;
1315  for (; i <= from->lef; i++, from_bpe++) {
1316  if (from_bpe->wid != from->wid)
1317  continue;
1318  if (from_bpe->frame >= to->sf - 1)
1319  break;
1320  }
1321 
1322  if ((i > from->lef) || (from_bpe->frame != to->sf - 1))
1323  continue;
1324 
1325  /* Find acoustic score from.sf->to.sf-1 with right context = to */
1326  /* This gives us from_bpe's best acoustic score. */
1327  ngram_compute_seg_score(ngs, from_bpe, lwf,
1328  &ascr, &lscr);
1329  /* Now find the exact path score for from->to, including
1330  * the appropriate final triphone. In fact this might not
1331  * exist. */
1332  score = ngram_search_exit_score(ngs, from_bpe,
1333  dict_first_phone(ps_search_dict(ngs), to->wid));
1334  /* Does not exist. Can't create a link here. */
1335  if (score == WORST_SCORE)
1336  continue;
1337  /* Adjust the arc score to match the correct triphone. */
1338  else
1339  score = ascr + (score - from_bpe->score);
1340  if (score BETTER_THAN 0) {
1341  /* Scores must be negative, or Bad Things will happen.
1342  In general, they are, except in corner cases
1343  involving filler words. We don't want to throw any
1344  links away so we'll keep these, but with some
1345  arbitrarily improbable but recognizable score. */
1346  ps_lattice_link(dag, from, to, -424242, from_bpe->frame);
1347  ++nlink;
1348  from->reachable = TRUE;
1349  }
1350  else if (score BETTER_THAN WORST_SCORE) {
1351  ps_lattice_link(dag, from, to, score, from_bpe->frame);
1352  ++nlink;
1353  from->reachable = TRUE;
1354  }
1355  }
1356  }
1357 
1358  /* There must be at least one path between dag->start and dag->end */
1359  if (!dag->start->reachable) {
1360  E_ERROR("End node of lattice isolated; unreachable\n");
1361  goto error_out;
1362  }
1363 
1364  for (node = dag->nodes; node; node = node->next) {
1365  /* Change node->{fef,lef} from bptbl indices to frames. */
1366  node->fef = ngs->bp_table[node->fef].frame;
1367  node->lef = ngs->bp_table[node->lef].frame;
1368  /* Find base wid for nodes. */
1369  node->basewid = dict_basewid(search->dict, node->wid);
1370  }
1371 
1372  /* Link nodes with alternate pronunciations at the same timepoint. */
1373  for (node = dag->nodes; node; node = node->next) {
1374  ps_latnode_t *alt;
1375  /* Scan forward to find the next alternate, then stop. */
1376  for (alt = node->next; alt && alt->sf == node->sf; alt = alt->next) {
1377  if (alt->basewid == node->basewid) {
1378  alt->alt = node->alt;
1379  node->alt = alt;
1380  break;
1381  }
1382  }
1383  }
1384  E_INFO("Lattice has %d nodes, %d links\n", dag->n_nodes, nlink);
1385 
1386  /* Minor hack: If the final node is a filler word and not </s>,
1387  * then set its base word ID to </s>, so that the language model
1388  * scores won't be screwed up. */
1389  if (dict_filler_word(ps_search_dict(ngs), dag->end->wid))
1390  dag->end->basewid = ps_search_finish_wid(ngs);
1391 
1392  /* Free nodes unreachable from dag->end and their links */
1394 
1395  /* Add silprob and fillprob to corresponding links */
1396  ps_lattice_penalize_fillers(dag, ngs->silpen, ngs->fillpen);
1397 
1398  search->dag = dag;
1399  return dag;
1400 
1401 error_out:
1402  ps_lattice_free(dag);
1403  return NULL;
1404 }
1405 
1406 void ngram_search_set_lm(ngram_model_t *lm)
1407 {
1408  default_lm = ngram_model_retain(lm);
1409 }
1410 
hmm_t hmm
Basic HMM structure.
Definition: ngram_search.h:65
Internal implementation of PocketSphinx decoder.
void ngram_fwdtree_finish(ngram_search_t *ngs)
Finish fwdtree decoding for an utterance.
int16 reachable
From.
int32 n_frame_alloc
Number of frames allocated in bp_table_idx and friends.
Definition: ngram_search.h:307
int32 wid
Word index.
Definition: ngram_search.h:113
void ngram_fwdtree_deinit(ngram_search_t *ngs)
Release memory associated with fwdtree decoding.
Base structure for search module.
int32 n_nodes
Number of nodes in this lattice.
void ngram_search_alloc_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
Definition: ngram_search.c:601
ps_seg_t * ps_lattice_seg_iter(ps_lattice_t *dag, ps_latlink_t *link, float32 lwf)
Get hypothesis segmentation iterator after bestpath search.
Definition: ps_lattice.c:1006
void hmm_init(hmm_context_t *ctx, hmm_t *hmm, int mpx, int ssid, int tmatid)
Populate a previously-allocated HMM structure, allocating internal data.
Definition: hmm.c:89
int acmod_rewind(acmod_t *acmod)
Rewind the current utterance, allowing it to be rescored.
Definition: acmod.c:897
int32 lef
Last end frame.
listelem_alloc_t * chan_alloc
For chan_t.
Definition: ngram_search.h:211
void ngram_fwdtree_start(ngram_search_t *ngs)
Start fwdtree decoding for an utterance.
void ngram_search_set_lm(ngram_model_t *lm)
Sets the global language model.
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
Re-initialize base structure with new dictionary.
void ps_lattice_penalize_fillers(ps_lattice_t *dag, int32 silpen, int32 fillpen)
Insert penalty for fillers.
Definition: ps_lattice.c:106
frame_idx_t frame
start or end frame
Definition: ngram_search.h:110
ps_latnode_t * start
Starting node.
uint8 *** tp
The transition matrices; kept in the same scale as acoustic scores; tp[tmatid][from-state][to-state]...
Definition: tmat.h:107
hmm_context_t * hmmctx
HMM context.
Definition: ngram_search.h:200
ps_segfuncs_t * vt
V-table of seg methods.
logmath_t * lmath
Log-math computation.
Definition: acmod.h:151
uint16 ** sseq
Unique senone sequences (2D array built at load time)
Definition: bin_mdef.h:134
void hmm_deinit(hmm_t *hmm)
Free an HMM structure, releasing internal data (but not the HMM structure itself).
Definition: hmm.c:111
int32 lscr
Language model score.
int32 n_words
Number of words known to search (may be less than in the dictionary)
int16 last2_phone
next-to-last phone of this word
Definition: ngram_search.h:120
#define BAD_S3WID
Dictionary word id.
Definition: s3types.h:90
int32 n_ssid
#Unique ssid in above, compressed ssid list
Definition: dict2pid.h:76
frame_idx_t n_frames
Number of frames for this utterance.
int ngram_fwdflat_reinit(ngram_search_t *ngs)
Rebuild search structures for updated language models.
Word graph search implementation.
bitvec_t * word_active
array of active flags for all words.
Definition: ngram_search.h:247
void ngram_fwdflat_finish(ngram_search_t *ngs)
Finish fwdflat decoding for an utterance.
ps_latnode_t * nodes
List of all nodes.
int ngram_fwdflat_search(ngram_search_t *ngs, int frame_idx)
Search one frame forward in an utterance.
int32 ngram_search_exit_score(ngram_search_t *ngs, bptbl_t *pbe, int rcphone)
Get the exit score for a backpointer entry with a given right context.
Definition: ngram_search.c:663
listelem_alloc_t * latnode_alloc
Node allocator for this DAG.
int32 prob
Log posterior probability.
latlink_list_t * entries
Links into this node.
POCKETSPHINX_EXPORT int32 ps_lattice_posterior(ps_lattice_t *dag, ngram_model_t *lmset, float32 ascale)
Calculate link posterior probabilities on a word graph.
Definition: ps_lattice.c:1446
struct ps_latnode_s * alt
Node with alternate pronunciation for this word.
char const * word
Word string (pointer into dictionary hash)
int32 ** active_word_list
Array of active multi-phone words for current and next frame.
Definition: ngram_search.h:287
struct chan_s * next
first descendant of this channel; or, in the case of the last phone of a word, the next alternative r...
Definition: ngram_search.h:68
void ngram_search_save_bp(ngram_search_t *ngs, int frame_idx, int32 w, int32 score, int32 path, int32 rc)
Enter a word in the backpointer table.
Definition: ngram_search.c:383
ps_search_t * search
Search object from whence this came.
int32 final_node_ascr
Acoustic score of implicit link exiting final node.
Lexicon tree based Viterbi search.
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, const char *type, const char *name, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
int ngram_search_mark_bptable(ngram_search_t *ngs, int frame_idx)
Record the current frame's index in the backpointer table.
Definition: ngram_search.c:329
int32 bp
Back Pointer.
Definition: ngram_search.h:114
int32 rc_id
right-context id for last phone of words
Definition: ngram_search.h:79
#define dict2pid_rssid(d, ci, lc)
Access macros; not designed for arbitrary use.
Definition: dict2pid.h:115
void ngram_fwdflat_start(ngram_search_t *ngs)
Start fwdflat decoding for an utterance.
N-Gram search module structure.
Definition: ngram_search.h:197
int ngram_fwdtree_search(ngram_search_t *ngs, int frame_idx)
Search one frame forward in an utterance.
hmm_context_t * hmm_context_init(int32 n_emit_state, uint8 **const *tp, int16 const *senscore, uint16 *const *sseq)
Create an HMM context.
Definition: hmm.c:56
void ps_search_base_free(ps_search_t *search)
Free search.
void ps_lattice_delete_unreachable(ps_lattice_t *dag)
Remove nodes marked as unreachable.
Definition: ps_lattice.c:174
ps_latnode_t * end
Ending node.
frame_idx_t sf
Start frame.
int32 real_wid
wid of this or latest predecessor real word
Definition: ngram_search.h:117
int32 prev_real_wid
wid of second-last real word
Definition: ngram_search.h:118
POCKETSPHINX_EXPORT ps_latlink_t * ps_lattice_bestpath(ps_lattice_t *dag, ngram_model_t *lmset, float32 lwf, float32 ascale)
Do N-Gram based best-path search on a word graph.
Definition: ps_lattice.c:1215
ps_lattice_t * ngram_search_lattice(ps_search_t *search)
Construct a word lattice from the current hypothesis.
latlink_list_t * exits
Links out of this node.
#define WORST_SCORE
Large "bad" score.
Definition: hmm.h:84
N-Gram based multi-pass search ("FBS")
tmat_t * tmat
Transition matrices.
Definition: acmod.h:160
frame_idx_t ef
End frame.
int32 ascr
Acoustic score.
int acmod_advance(acmod_t *acmod)
Advance the frame index.
Definition: acmod.c:919
listelem_alloc_t * latnode_alloc
For latnode_t.
Definition: ngram_search.h:213
int dict_filler_word(dict_t *d, s3wid_t w)
Return 1 if w is a filler word, 0 if not.
Definition: dict.c:413
void ngram_fwdtree_init(ngram_search_t *ngs)
Initialize N-Gram search for fwdtree decoding.
Segmentation "iterator" for backpointer table results.
Definition: ngram_search.h:126
ps_latnode_t ** frm_wordlist
List of active words in each frame.
Definition: ngram_search.h:316
Lexical tree node data type for the first phone (root) of each dynamic HMM tree structure.
Definition: ngram_search.h:90
Lexical tree node data type.
Definition: ngram_search.h:64
int32 wid
Dictionary word id.
int16 cur
Current position in bpidx.
Definition: ngram_search.h:130
#define SENSCR_SHIFT
Shift count for senone scores.
Definition: hmm.h:73
a structure for a dictionary.
Definition: dict.h:76
char const * ps_lattice_hyp(ps_lattice_t *dag, ps_latlink_t *link)
Get hypothesis string after bestpath search.
Definition: ps_lattice.c:830
POCKETSPHINX_EXPORT int dict_real_word(dict_t *d, s3wid_t w)
Test if w is a "real" word, i.e.
Definition: dict.c:427
void ngram_search_free(ps_search_t *search)
Finalize the N-Gram search module.
Definition: ngram_search.c:289
Word graph structure used in bestpath/nbest search.
#define WORSE_THAN
Is one score worse than another?
Definition: hmm.h:100
int16 n_bpidx
Number of backpointer IDs.
Definition: ngram_search.h:129
int32 best_score
Best Viterbi path score.
Definition: ngram_search.h:325
Back pointer table (forward pass lattice; actually a tree)
Definition: ngram_search.h:109
cross word triphone model structure
Definition: dict2pid.h:73
int ngram_fwdtree_reinit(ngram_search_t *ngs)
Rebuild search structures for updated language models.
void ngram_search_free_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
Definition: ngram_search.c:650
int32 post
Utterance posterior probability.
char * hyp_str
Current hypothesis string.
#define BETTER_THAN
Is one score better than another?
Definition: hmm.h:95
dict_t * dict
Pronunciation dictionary.
void ngram_fwdflat_deinit(ngram_search_t *ngs)
Release memory associated with fwdflat decoding.
int32 s_idx
Start of BScoreStack for various right contexts.
Definition: ngram_search.h:116
int32 fef
First end frame.
int32 n_frame
Number of frames actually present.
Definition: ngram_search.h:308
Flat lexicon based Viterbi search.
ngram_model_t * lmset
Set of language models.
Definition: ngram_search.h:199
uint8 valid
For absolute pruning.
Definition: ngram_search.h:111
int32 lback
Language model backoff.
listelem_alloc_t * root_chan_alloc
For root_chan_t.
Definition: ngram_search.h:212
int32 basewid
Dictionary base word id.
int32 ciphone
ciphone for this node
Definition: ngram_search.h:73
void ngram_fwdflat_init(ngram_search_t *ngs)
Initialize N-Gram search for fwdflat decoding.
ps_lattice_t * ps_lattice_init_search(ps_search_t *search, int n_frame)
Construct an empty word graph with reference to a search structure.
Definition: ps_lattice.c:639
void hmm_context_free(hmm_context_t *ctx)
Free an HMM context.
Definition: hmm.c:80
int32 * bpidx
Sequence of backpointer IDs.
Definition: ngram_search.h:128
chan_t ** word_chan
Channels associated with a given word (only used for right contexts, single-phone words in fwdtree se...
Definition: ngram_search.h:246
bin_mdef_t * mdef
Model definition.
Definition: acmod.h:159
int ngram_search_find_exit(ngram_search_t *ngs, int frame_idx, int32 *out_best_score, int32 *out_is_final)
Find the best word exit for the current frame in the backpointer table.
Definition: ngram_search.c:506
ps_latlink_t * last_link
Final link in best path.
struct ps_latnode_s * next
Next node in DAG (no ordering implied)
int32 score
Score (best among all right contexts)
Definition: ngram_search.h:115
V-table for search algorithm.
ps_lattice_t * dag
Current hypothesis word graph.
Base structure for hypothesis segmentation iterator.
#define dict_size(d)
Packaged macro access to dictionary members.
Definition: dict.h:151
s3cipid_t * cimap
Index into ssid[] above for each ci phone.
Definition: dict2pid.h:75
POCKETSPHINX_EXPORT int ps_lattice_free(ps_lattice_t *dag)
Free a lattice.
Definition: ps_lattice.c:665
ps_seg_t base
Base structure.
Definition: ngram_search.h:127
float32 ascale
Acoustic score scale for posterior probabilities.
Definition: ngram_search.h:333
ps_search_t * ngram_search_init(const char *name, ngram_model_t *lm, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize the N-Gram search module.
Definition: ngram_search.c:140
Acoustic model structure.
Definition: acmod.h:148
float32 lwf
Language weight factor (for second-pass searches)
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:84
int acmod_set_grow(acmod_t *acmod, int grow_feat)
Set memory allocation policy for utterance processing.
Definition: acmod.c:412
POCKETSPHINX_EXPORT void ps_lattice_link(ps_lattice_t *dag, ps_latnode_t *from, ps_latnode_t *to, int32 score, int32 ef)
Create a directed link between "from" and "to" nodes, but if a link already exists, choose one with the best link_scr.
Definition: ps_lattice.c:65
s3ssid_t * ssid
Senone Sequence ID list for all context ciphones.
Definition: dict2pid.h:74
frame_idx_t sf
Start frame.
int16 last_phone
last phone of this word
Definition: ngram_search.h:119
char const * ngram_search_bp_hyp(ngram_search_t *ngs, int bpidx)
Backtrace from a given backpointer index to obtain a word hypothesis.
Definition: ngram_search.c:553