PocketSphinx  5prealpha
fsg_search.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  *
19  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
20  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
23  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * ====================================================================
32  *
33  */
34 
35 /*
36  * fsg_search.c -- Search structures for FSM decoding.
37  *
38  * **********************************************
39  * CMU ARPA Speech Project
40  *
41  * Copyright (c) 2004 Carnegie Mellon University.
42  * ALL RIGHTS RESERVED.
43  * **********************************************
44  *
45  * HISTORY
46  *
47  * 18-Feb-2004 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
48  * Started.
49  */
50 
51 /* System headers. */
52 #include <stdio.h>
53 #include <string.h>
54 #include <assert.h>
55 
56 /* SphinxBase headers. */
57 #include <sphinxbase/err.h>
58 #include <sphinxbase/ckd_alloc.h>
59 #include <sphinxbase/strfuncs.h>
60 #include <sphinxbase/cmd_ln.h>
61 
62 /* Local headers. */
63 #include "pocketsphinx_internal.h"
64 #include "ps_lattice_internal.h"
65 #include "fsg_search_internal.h"
66 #include "fsg_history.h"
67 #include "fsg_lextree.h"
68 
69 /* Turn this on for detailed debugging dump */
70 #define __FSG_DBG__ 0
71 #define __FSG_DBG_CHAN__ 0
72 
73 static ps_seg_t *fsg_search_seg_iter(ps_search_t *search, int32 *out_score);
74 static ps_lattice_t *fsg_search_lattice(ps_search_t *search);
75 static int fsg_search_prob(ps_search_t *search);
76 
77 static ps_searchfuncs_t fsg_funcs = {
78  /* start: */ fsg_search_start,
79  /* step: */ fsg_search_step,
80  /* finish: */ fsg_search_finish,
81  /* reinit: */ fsg_search_reinit,
82  /* free: */ fsg_search_free,
83  /* lattice: */ fsg_search_lattice,
84  /* hyp: */ fsg_search_hyp,
85  /* prob: */ fsg_search_prob,
86  /* seg_iter: */ fsg_search_seg_iter,
87 };
88 
89 static int
90 fsg_search_add_silences(fsg_search_t *fsgs, fsg_model_t *fsg)
91 {
92  dict_t *dict;
93  int32 wid;
94  int n_sil;
95 
96  dict = ps_search_dict(fsgs);
97  /*
98  * NOTE: Unlike N-Gram search, we do not use explicit start and
99  * end symbols. This is because the start and end nodes are
100  * defined in the grammar. We do add silence/filler self-loops to
101  * all states in order to allow for silence between words and at
102  * the beginning and end of utterances.
103  *
104  * This has some implications for word graph generation, namely,
105  * that there can (and usually will) be multiple start and end
106  * states in the word graph. We therefore do add explicit start
107  * and end nodes to the graph.
108  */
109  /* Add silence self-loops to all states. */
110  fsg_model_add_silence(fsg, "<sil>", -1,
111  cmd_ln_float32_r(ps_search_config(fsgs), "-silprob"));
112  n_sil = 0;
113  /* Add self-loops for all other fillers. */
114  for (wid = dict_filler_start(dict); wid < dict_filler_end(dict); ++wid) {
115  char const *word = dict_wordstr(dict, wid);
116  if (wid == dict_startwid(dict) || wid == dict_finishwid(dict))
117  continue;
118  fsg_model_add_silence(fsg, word, -1,
119  cmd_ln_float32_r(ps_search_config(fsgs), "-fillprob"));
120  ++n_sil;
121  }
122 
123  return n_sil;
124 }
125 
126 /* Scans the dictionary and check if all words are present. */
127 static int
128 fsg_search_check_dict(fsg_search_t *fsgs, fsg_model_t *fsg)
129 {
130  dict_t *dict;
131  int i;
132 
133  dict = ps_search_dict(fsgs);
134  for (i = 0; i < fsg_model_n_word(fsg); ++i) {
135  char const *word;
136  int32 wid;
137 
138  word = fsg_model_word_str(fsg, i);
139  wid = dict_wordid(dict, word);
140  if (wid == BAD_S3WID) {
141  E_ERROR("The word '%s' is missing in the dictionary\n", word);
142  return FALSE;
143  }
144  }
145 
146  return TRUE;
147 }
148 
149 static int
150 fsg_search_add_altpron(fsg_search_t *fsgs, fsg_model_t *fsg)
151 {
152  dict_t *dict;
153  int n_alt, n_word;
154  int i;
155 
156  dict = ps_search_dict(fsgs);
157  /* Scan FSG's vocabulary for words that have alternate pronunciations. */
158  n_alt = 0;
159  n_word = fsg_model_n_word(fsg);
160  for (i = 0; i < n_word; ++i) {
161  char const *word;
162  int32 wid;
163 
164  word = fsg_model_word_str(fsg, i);
165  wid = dict_wordid(dict, word);
166  if (wid != BAD_S3WID) {
167  while ((wid = dict_nextalt(dict, wid)) != BAD_S3WID) {
168  n_alt += fsg_model_add_alt(fsg, word, dict_wordstr(dict, wid));
169  }
170  }
171  }
172 
173  E_INFO("Added %d alternate word transitions\n", n_alt);
174  return n_alt;
175 }
176 
177 ps_search_t *
178 fsg_search_init(const char *name,
179  fsg_model_t *fsg,
180  cmd_ln_t *config,
181  acmod_t *acmod,
182  dict_t *dict,
183  dict2pid_t *d2p)
184 {
185  fsg_search_t *fsgs = ckd_calloc(1, sizeof(*fsgs));
186  ps_search_init(ps_search_base(fsgs), &fsg_funcs, PS_SEARCH_TYPE_FSG, name, config, acmod, dict, d2p);
187 
188  fsgs->fsg = fsg_model_retain(fsg);
189  /* Initialize HMM context. */
190  fsgs->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
191  acmod->tmat->tp, NULL, acmod->mdef->sseq);
192  if (fsgs->hmmctx == NULL) {
193  ps_search_free(ps_search_base(fsgs));
194  return NULL;
195  }
196 
197  /* Intialize the search history object */
198  fsgs->history = fsg_history_init(NULL, dict);
199  fsgs->frame = -1;
200 
201  /* Get search pruning parameters */
202  fsgs->beam_factor = 1.0f;
203  fsgs->beam = fsgs->beam_orig
204  = (int32) logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-beam"))
205  >> SENSCR_SHIFT;
206  fsgs->pbeam = fsgs->pbeam_orig
207  = (int32) logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pbeam"))
208  >> SENSCR_SHIFT;
209  fsgs->wbeam = fsgs->wbeam_orig
210  = (int32) logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-wbeam"))
211  >> SENSCR_SHIFT;
212 
213  /* LM related weights/penalties */
214  fsgs->lw = cmd_ln_float32_r(config, "-lw");
215  fsgs->pip = (int32) (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pip"))
216  * fsgs->lw)
217  >> SENSCR_SHIFT;
218  fsgs->wip = (int32) (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-wip"))
219  * fsgs->lw)
220  >> SENSCR_SHIFT;
221 
222  /* Acoustic score scale for posterior probabilities. */
223  fsgs->ascale = 1.0 / cmd_ln_float32_r(config, "-ascale");
224 
225  E_INFO("FSG(beam: %d, pbeam: %d, wbeam: %d; wip: %d, pip: %d)\n",
226  fsgs->beam_orig, fsgs->pbeam_orig, fsgs->wbeam_orig,
227  fsgs->wip, fsgs->pip);
228 
229  if (!fsg_search_check_dict(fsgs, fsg)) {
230  fsg_search_free(ps_search_base(fsgs));
231  return NULL;
232  }
233 
234  if (cmd_ln_boolean_r(config, "-fsgusefiller") &&
235  !fsg_model_has_sil(fsg))
236  fsg_search_add_silences(fsgs, fsg);
237 
238  if (cmd_ln_boolean_r(config, "-fsgusealtpron") &&
239  !fsg_model_has_alt(fsg))
240  fsg_search_add_altpron(fsgs, fsg);
241 
242  if (fsg_search_reinit(ps_search_base(fsgs),
243  ps_search_dict(fsgs),
244  ps_search_dict2pid(fsgs)) < 0)
245  {
246  ps_search_free(ps_search_base(fsgs));
247  return NULL;
248  }
249 
250  return ps_search_base(fsgs);
251 }
252 
253 void
254 fsg_search_free(ps_search_t *search)
255 {
256  fsg_search_t *fsgs = (fsg_search_t *)search;
257 
258  ps_search_base_free(search);
259  fsg_lextree_free(fsgs->lextree);
260  if (fsgs->history) {
261  fsg_history_reset(fsgs->history);
262  fsg_history_set_fsg(fsgs->history, NULL, NULL);
263  fsg_history_free(fsgs->history);
264  }
265  hmm_context_free(fsgs->hmmctx);
266  fsg_model_free(fsgs->fsg);
267  ckd_free(fsgs);
268 }
269 
270 int
271 fsg_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
272 {
273  fsg_search_t *fsgs = (fsg_search_t *)search;
274 
275  /* Free the old lextree */
276  if (fsgs->lextree)
277  fsg_lextree_free(fsgs->lextree);
278 
279  /* Free old dict2pid, dict */
280  ps_search_base_reinit(search, dict, d2p);
281 
282  /* Update the number of words (not used by this module though). */
283  search->n_words = dict_size(dict);
284 
285  /* Allocate new lextree for the given FSG */
286  fsgs->lextree = fsg_lextree_init(fsgs->fsg, dict, d2p,
287  ps_search_acmod(fsgs)->mdef,
288  fsgs->hmmctx, fsgs->wip, fsgs->pip);
289 
290  /* Inform the history module of the new fsg */
291  fsg_history_set_fsg(fsgs->history, fsgs->fsg, dict);
292 
293  return 0;
294 }
295 
296 
297 static void
298 fsg_search_sen_active(fsg_search_t *fsgs)
299 {
300  gnode_t *gn;
301  fsg_pnode_t *pnode;
302  hmm_t *hmm;
303 
304  acmod_clear_active(ps_search_acmod(fsgs));
305 
306  for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) {
307  pnode = (fsg_pnode_t *) gnode_ptr(gn);
308  hmm = fsg_pnode_hmmptr(pnode);
309  assert(hmm_frame(hmm) == fsgs->frame);
310  acmod_activate_hmm(ps_search_acmod(fsgs), hmm);
311  }
312 }
313 
314 
315 /*
316  * Evaluate all the active HMMs.
317  * (Executed once per frame.)
318  */
319 static void
320 fsg_search_hmm_eval(fsg_search_t *fsgs)
321 {
322  gnode_t *gn;
323  fsg_pnode_t *pnode;
324  hmm_t *hmm;
325  int32 bestscore;
326  int32 n, maxhmmpf;
327 
328  bestscore = WORST_SCORE;
329 
330  if (!fsgs->pnode_active) {
331  E_ERROR("Frame %d: No active HMM!!\n", fsgs->frame);
332  return;
333  }
334 
335  for (n = 0, gn = fsgs->pnode_active; gn; gn = gnode_next(gn), n++) {
336  int32 score;
337 
338  pnode = (fsg_pnode_t *) gnode_ptr(gn);
339  hmm = fsg_pnode_hmmptr(pnode);
340  assert(hmm_frame(hmm) == fsgs->frame);
341 
342 #if __FSG_DBG__
343  E_INFO("pnode(%08x) active @frm %5d\n", (int32) pnode,
344  fsgs->frame);
345  hmm_dump(hmm, stdout);
346 #endif
347  score = hmm_vit_eval(hmm);
348 #if __FSG_DBG_CHAN__
349  E_INFO("pnode(%08x) after eval @frm %5d\n",
350  (int32) pnode, fsgs->frame);
351  hmm_dump(hmm, stdout);
352 #endif
353 
354  if (score BETTER_THAN bestscore)
355  bestscore = score;
356  }
357 
358 #if __FSG_DBG__
359  E_INFO("[%5d] %6d HMM; bestscr: %11d\n", fsgs->frame, n, bestscore);
360 #endif
361  fsgs->n_hmm_eval += n;
362 
363  /* Adjust beams if #active HMMs larger than absolute threshold */
364  maxhmmpf = cmd_ln_int32_r(ps_search_config(fsgs), "-maxhmmpf");
365  if (maxhmmpf != -1 && n > maxhmmpf) {
366  /*
367  * Too many HMMs active; reduce the beam factor applied to the default
368  * beams, but not if the factor is already at a floor (0.1).
369  */
370  if (fsgs->beam_factor > 0.1) { /* Hack!! Hardwired constant 0.1 */
371  fsgs->beam_factor *= 0.9f; /* Hack!! Hardwired constant 0.9 */
372  fsgs->beam =
373  (int32) (fsgs->beam_orig * fsgs->beam_factor);
374  fsgs->pbeam =
375  (int32) (fsgs->pbeam_orig * fsgs->beam_factor);
376  fsgs->wbeam =
377  (int32) (fsgs->wbeam_orig * fsgs->beam_factor);
378  }
379  }
380  else {
381  fsgs->beam_factor = 1.0f;
382  fsgs->beam = fsgs->beam_orig;
383  fsgs->pbeam = fsgs->pbeam_orig;
384  fsgs->wbeam = fsgs->wbeam_orig;
385  }
386 
387  if (n > fsg_lextree_n_pnode(fsgs->lextree))
388  E_FATAL("PANIC! Frame %d: #HMM evaluated(%d) > #PNodes(%d)\n",
389  fsgs->frame, n, fsg_lextree_n_pnode(fsgs->lextree));
390 
391  fsgs->bestscore = bestscore;
392 }
393 
394 
395 static void
396 fsg_search_pnode_trans(fsg_search_t *fsgs, fsg_pnode_t * pnode)
397 {
398  fsg_pnode_t *child;
399  hmm_t *hmm;
400  int32 newscore, thresh, nf;
401 
402  assert(pnode);
403  assert(!fsg_pnode_leaf(pnode));
404 
405  nf = fsgs->frame + 1;
406  thresh = fsgs->bestscore + fsgs->beam;
407 
408  hmm = fsg_pnode_hmmptr(pnode);
409 
410  for (child = fsg_pnode_succ(pnode);
411  child; child = fsg_pnode_sibling(child)) {
412  newscore = hmm_out_score(hmm) + child->logs2prob;
413 
414  if ((newscore BETTER_THAN thresh)
415  && (newscore BETTER_THAN hmm_in_score(&child->hmm))) {
416  /* Incoming score > pruning threshold and > target's existing score */
417  if (hmm_frame(&child->hmm) < nf) {
418  /* Child node not yet activated; do so */
419  fsgs->pnode_active_next =
420  glist_add_ptr(fsgs->pnode_active_next,
421  (void *) child);
422  }
423 
424  hmm_enter(&child->hmm, newscore, hmm_out_history(hmm), nf);
425  }
426  }
427 }
428 
429 
430 static void
431 fsg_search_pnode_exit(fsg_search_t *fsgs, fsg_pnode_t * pnode)
432 {
433  hmm_t *hmm;
434  fsg_link_t *fl;
435  int32 wid;
436  fsg_pnode_ctxt_t ctxt;
437 
438  assert(pnode);
439  assert(fsg_pnode_leaf(pnode));
440 
441  hmm = fsg_pnode_hmmptr(pnode);
442  fl = fsg_pnode_fsglink(pnode);
443  assert(fl);
444 
445  wid = fsg_link_wid(fl);
446  assert(wid >= 0);
447 
448 #if __FSG_DBG__
449  E_INFO("[%5d] Exit(%08x) %10d(score) %5d(pred)\n",
450  fsgs->frame, (int32) pnode,
451  hmm_out_score(hmm), hmm_out_history(hmm));
452 #endif
453 
454  /*
455  * Check if this is filler or single phone word; these do not model right
456  * context (i.e., the exit score applies to all right contexts).
457  */
458  if (fsg_model_is_filler(fsgs->fsg, wid)
459  /* FIXME: This might be slow due to repeated calls to dict_to_id(). */
460  || (dict_is_single_phone(ps_search_dict(fsgs),
461  dict_wordid(ps_search_dict(fsgs),
462  fsg_model_word_str(fsgs->fsg, wid))))) {
463  /* Create a dummy context structure that applies to all right contexts */
464  fsg_pnode_add_all_ctxt(&ctxt);
465 
466  /* Create history table entry for this word exit */
467  fsg_history_entry_add(fsgs->history,
468  fl,
469  fsgs->frame,
470  hmm_out_score(hmm),
471  hmm_out_history(hmm),
472  pnode->ci_ext, ctxt);
473 
474  }
475  else {
476  /* Create history table entry for this word exit */
477  fsg_history_entry_add(fsgs->history,
478  fl,
479  fsgs->frame,
480  hmm_out_score(hmm),
481  hmm_out_history(hmm),
482  pnode->ci_ext, pnode->ctxt);
483  }
484 }
485 
486 
487 /*
488  * (Beam) prune the just evaluated HMMs, determine which ones remain
489  * active, which ones transition to successors, which ones exit and
490  * terminate in their respective destination FSM states.
491  * (Executed once per frame.)
492  */
493 static void
494 fsg_search_hmm_prune_prop(fsg_search_t *fsgs)
495 {
496  gnode_t *gn;
497  fsg_pnode_t *pnode;
498  hmm_t *hmm;
499  int32 thresh, word_thresh, phone_thresh;
500 
501  assert(fsgs->pnode_active_next == NULL);
502 
503  thresh = fsgs->bestscore + fsgs->beam;
504  phone_thresh = fsgs->bestscore + fsgs->pbeam;
505  word_thresh = fsgs->bestscore + fsgs->wbeam;
506 
507  for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) {
508  pnode = (fsg_pnode_t *) gnode_ptr(gn);
509  hmm = fsg_pnode_hmmptr(pnode);
510 
511  if (hmm_bestscore(hmm) >= thresh) {
512  /* Keep this HMM active in the next frame */
513  if (hmm_frame(hmm) == fsgs->frame) {
514  hmm_frame(hmm) = fsgs->frame + 1;
515  fsgs->pnode_active_next =
516  glist_add_ptr(fsgs->pnode_active_next,
517  (void *) pnode);
518  }
519  else {
520  assert(hmm_frame(hmm) == fsgs->frame + 1);
521  }
522 
523  if (!fsg_pnode_leaf(pnode)) {
524  if (hmm_out_score(hmm) >= phone_thresh) {
525  /* Transition out of this phone into its children */
526  fsg_search_pnode_trans(fsgs, pnode);
527  }
528  }
529  else {
530  if (hmm_out_score(hmm) >= word_thresh) {
531  /* Transition out of leaf node into destination FSG state */
532  fsg_search_pnode_exit(fsgs, pnode);
533  }
534  }
535  }
536  }
537 }
538 
539 
540 /*
541  * Propagate newly created history entries through null transitions.
542  */
543 static void
544 fsg_search_null_prop(fsg_search_t *fsgs)
545 {
546  int32 bpidx, n_entries, thresh, newscore;
547  fsg_hist_entry_t *hist_entry;
548  fsg_link_t *l;
549  int32 s;
550  fsg_model_t *fsg;
551 
552  fsg = fsgs->fsg;
553  thresh = fsgs->bestscore + fsgs->wbeam; /* Which beam really?? */
554 
555  n_entries = fsg_history_n_entries(fsgs->history);
556 
557  for (bpidx = fsgs->bpidx_start; bpidx < n_entries; bpidx++) {
558  fsg_arciter_t *itor;
559  hist_entry = fsg_history_entry_get(fsgs->history, bpidx);
560 
561  l = fsg_hist_entry_fsglink(hist_entry);
562 
563  /* Destination FSG state for history entry */
564  s = l ? fsg_link_to_state(l) : fsg_model_start_state(fsg);
565 
566  /*
567  * Check null transitions from d to all other states. (Only need to
568  * propagate one step, since FSG contains transitive closure of null
569  * transitions.)
570  */
571  /* Add all links from from_state to dst */
572  for (itor = fsg_model_arcs(fsg, s); itor;
573  itor = fsg_arciter_next(itor)) {
574  fsg_link_t *l = fsg_arciter_get(itor);
575 
576  /* FIXME: Need to deal with tag transitions somehow. */
577  if (fsg_link_wid(l) != -1)
578  continue;
579  newscore =
580  fsg_hist_entry_score(hist_entry) +
581  (fsg_link_logs2prob(l) >> SENSCR_SHIFT);
582 
583  if (newscore >= thresh) {
584  fsg_history_entry_add(fsgs->history, l,
585  fsg_hist_entry_frame(hist_entry),
586  newscore,
587  bpidx,
588  fsg_hist_entry_lc(hist_entry),
589  fsg_hist_entry_rc(hist_entry));
590  }
591  }
592  }
593 }
594 
595 
596 /*
597  * Perform cross-word transitions; propagate each history entry created in this
598  * frame to lextree roots attached to the target FSG state for that entry.
599  */
600 static void
601 fsg_search_word_trans(fsg_search_t *fsgs)
602 {
603  int32 bpidx, n_entries;
604  fsg_hist_entry_t *hist_entry;
605  fsg_link_t *l;
606  int32 score, newscore, thresh, nf, d;
607  fsg_pnode_t *root;
608  int32 lc, rc;
609 
610  n_entries = fsg_history_n_entries(fsgs->history);
611 
612  thresh = fsgs->bestscore + fsgs->beam;
613  nf = fsgs->frame + 1;
614 
615  for (bpidx = fsgs->bpidx_start; bpidx < n_entries; bpidx++) {
616  hist_entry = fsg_history_entry_get(fsgs->history, bpidx);
617  assert(hist_entry);
618  score = fsg_hist_entry_score(hist_entry);
619  assert(fsgs->frame == fsg_hist_entry_frame(hist_entry));
620 
621  l = fsg_hist_entry_fsglink(hist_entry);
622 
623  /* Destination state for hist_entry */
624  d = l ? fsg_link_to_state(l) : fsg_model_start_state(fsgs->
625  fsg);
626 
627  lc = fsg_hist_entry_lc(hist_entry);
628 
629  /* Transition to all root nodes attached to state d */
630  for (root = fsg_lextree_root(fsgs->lextree, d);
631  root; root = root->sibling) {
632  rc = root->ci_ext;
633 
634  if ((root->ctxt.bv[lc >> 5] & (1 << (lc & 0x001f))) &&
635  (hist_entry->rc.bv[rc >> 5] & (1 << (rc & 0x001f)))) {
636  /*
637  * Last CIphone of history entry is in left-context list supported by
638  * target root node, and
639  * first CIphone of target root node is in right context list supported
640  * by history entry;
641  * So the transition can go ahead (if new score is good enough).
642  */
643  newscore = score + root->logs2prob;
644 
645  if ((newscore BETTER_THAN thresh)
646  && (newscore BETTER_THAN hmm_in_score(&root->hmm))) {
647  if (hmm_frame(&root->hmm) < nf) {
648  /* Newly activated node; add to active list */
649  fsgs->pnode_active_next =
650  glist_add_ptr(fsgs->pnode_active_next,
651  (void *) root);
652 #if __FSG_DBG__
653  E_INFO
654  ("[%5d] WordTrans bpidx[%d] -> pnode[%08x] (activated)\n",
655  fsgs->frame, bpidx, (int32) root);
656 #endif
657  }
658  else {
659 #if __FSG_DBG__
660  E_INFO
661  ("[%5d] WordTrans bpidx[%d] -> pnode[%08x]\n",
662  fsgs->frame, bpidx, (int32) root);
663 #endif
664  }
665 
666  hmm_enter(&root->hmm, newscore, bpidx, nf);
667  }
668  }
669  }
670  }
671 }
672 
673 
674 int
675 fsg_search_step(ps_search_t *search, int frame_idx)
676 {
677  fsg_search_t *fsgs = (fsg_search_t *)search;
678  int16 const *senscr;
679  acmod_t *acmod = search->acmod;
680  gnode_t *gn;
681  fsg_pnode_t *pnode;
682  hmm_t *hmm;
683 
684  /* Activate our HMMs for the current frame if need be. */
685  if (!acmod->compallsen)
686  fsg_search_sen_active(fsgs);
687  /* Compute GMM scores for the current frame. */
688  senscr = acmod_score(acmod, &frame_idx);
689  fsgs->n_sen_eval += acmod->n_senone_active;
690  hmm_context_set_senscore(fsgs->hmmctx, senscr);
691 
692  /* Mark backpointer table for current frame. */
693  fsgs->bpidx_start = fsg_history_n_entries(fsgs->history);
694 
695  /* Evaluate all active pnodes (HMMs) */
696  fsg_search_hmm_eval(fsgs);
697 
698  /*
699  * Prune and propagate the HMMs evaluated; create history entries for
700  * word exits. The words exits are tentative, and may be pruned; make
701  * the survivors permanent via fsg_history_end_frame().
702  */
703  fsg_search_hmm_prune_prop(fsgs);
704  fsg_history_end_frame(fsgs->history);
705 
706  /*
707  * Propagate new history entries through any null transitions, creating
708  * new history entries, and then make the survivors permanent.
709  */
710  fsg_search_null_prop(fsgs);
711  fsg_history_end_frame(fsgs->history);
712 
713  /*
714  * Perform cross-word transitions; propagate each history entry across its
715  * terminating state to the root nodes of the lextree attached to the state.
716  */
717  fsg_search_word_trans(fsgs);
718 
719  /*
720  * We've now come full circle, HMM and FSG states have been updated for
721  * the next frame.
722  * Update the active lists, deactivate any currently active HMMs that
723  * did not survive into the next frame
724  */
725  for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) {
726  pnode = (fsg_pnode_t *) gnode_ptr(gn);
727  hmm = fsg_pnode_hmmptr(pnode);
728 
729  if (hmm_frame(hmm) == fsgs->frame) {
730  /* This HMM NOT activated for the next frame; reset it */
732  }
733  else {
734  assert(hmm_frame(hmm) == (fsgs->frame + 1));
735  }
736  }
737 
738  /* Free the currently active list */
739  glist_free(fsgs->pnode_active);
740 
741  /* Make the next-frame active list the current one */
742  fsgs->pnode_active = fsgs->pnode_active_next;
743  fsgs->pnode_active_next = NULL;
744 
745  /* End of this frame; ready for the next */
746  ++fsgs->frame;
747 
748  return 1;
749 }
750 
751 
752 /*
753  * Set all HMMs to inactive, clear active lists, initialize FSM start
754  * state to be the only active node.
755  * (Executed at the start of each utterance.)
756  */
757 int
758 fsg_search_start(ps_search_t *search)
759 {
760  fsg_search_t *fsgs = (fsg_search_t *)search;
761  int32 silcipid;
762  fsg_pnode_ctxt_t ctxt;
763 
764  /* Reset dynamic adjustment factor for beams */
765  fsgs->beam_factor = 1.0f;
766  fsgs->beam = fsgs->beam_orig;
767  fsgs->pbeam = fsgs->pbeam_orig;
768  fsgs->wbeam = fsgs->wbeam_orig;
769 
770  silcipid = bin_mdef_ciphone_id(ps_search_acmod(fsgs)->mdef, "SIL");
771 
772  /* Initialize EVERYTHING to be inactive */
773  assert(fsgs->pnode_active == NULL);
774  assert(fsgs->pnode_active_next == NULL);
775 
776  fsg_history_reset(fsgs->history);
777  fsg_history_utt_start(fsgs->history);
778  fsgs->final = FALSE;
779 
780  /* Dummy context structure that allows all right contexts to use this entry */
781  fsg_pnode_add_all_ctxt(&ctxt);
782 
783  /* Create dummy history entry leading to start state */
784  fsgs->frame = -1;
785  fsgs->bestscore = 0;
786  fsg_history_entry_add(fsgs->history,
787  NULL, -1, 0, -1, silcipid, ctxt);
788  fsgs->bpidx_start = 0;
789 
790  /* Propagate dummy history entry through NULL transitions from start state */
791  fsg_search_null_prop(fsgs);
792 
793  /* Perform word transitions from this dummy history entry */
794  fsg_search_word_trans(fsgs);
795 
796  /* Make the next-frame active list the current one */
797  fsgs->pnode_active = fsgs->pnode_active_next;
798  fsgs->pnode_active_next = NULL;
799 
800  ++fsgs->frame;
801 
802  fsgs->n_hmm_eval = 0;
803  fsgs->n_sen_eval = 0;
804 
805  return 0;
806 }
807 
808 /*
809  * Cleanup at the end of each utterance.
810  */
811 int
812 fsg_search_finish(ps_search_t *search)
813 {
814  fsg_search_t *fsgs = (fsg_search_t *)search;
815  gnode_t *gn;
816  fsg_pnode_t *pnode;
817  int32 n_hist;
818 
819  /* Deactivate all nodes in the current and next-frame active lists */
820  for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) {
821  pnode = (fsg_pnode_t *) gnode_ptr(gn);
823  }
824  for (gn = fsgs->pnode_active_next; gn; gn = gnode_next(gn)) {
825  pnode = (fsg_pnode_t *) gnode_ptr(gn);
827  }
828 
829  glist_free(fsgs->pnode_active);
830  fsgs->pnode_active = NULL;
831  glist_free(fsgs->pnode_active_next);
832  fsgs->pnode_active_next = NULL;
833 
834  fsgs->final = TRUE;
835 
836  n_hist = fsg_history_n_entries(fsgs->history);
837  E_INFO
838  ("%d frames, %d HMMs (%d/fr), %d senones (%d/fr), %d history entries (%d/fr)\n\n",
839  fsgs->frame, fsgs->n_hmm_eval,
840  (fsgs->frame > 0) ? fsgs->n_hmm_eval / fsgs->frame : 0,
841  fsgs->n_sen_eval,
842  (fsgs->frame > 0) ? fsgs->n_sen_eval / fsgs->frame : 0,
843  n_hist, (fsgs->frame > 0) ? n_hist / fsgs->frame : 0);
844 
845  return 0;
846 }
847 
848 static int
849 fsg_search_find_exit(fsg_search_t *fsgs, int frame_idx, int final, int32 *out_score, int32* out_is_final)
850 {
851  fsg_hist_entry_t *hist_entry = NULL;
852  fsg_model_t *fsg;
853  int bpidx, frm, last_frm, besthist;
854  int32 bestscore;
855 
856  if (out_is_final)
857  *out_is_final = FALSE;
858 
859  if (frame_idx == -1)
860  frame_idx = fsgs->frame - 1;
861  last_frm = frm = frame_idx;
862 
863  /* Scan backwards to find a word exit in frame_idx. */
864  bpidx = fsg_history_n_entries(fsgs->history) - 1;
865  while (bpidx > 0) {
866  hist_entry = fsg_history_entry_get(fsgs->history, bpidx);
867  if (fsg_hist_entry_frame(hist_entry) <= frame_idx) {
868  frm = last_frm = fsg_hist_entry_frame(hist_entry);
869  break;
870  }
871  bpidx--;
872  }
873 
874  /* No hypothesis (yet). */
875  if (bpidx <= 0)
876  return bpidx;
877 
878  /* Now find best word exit in this frame. */
879  bestscore = INT_MIN;
880  besthist = -1;
881  fsg = fsgs->fsg;
882  while (frm == last_frm) {
883  fsg_link_t *fl;
884  int32 score;
885 
886  fl = fsg_hist_entry_fsglink(hist_entry);
887  score = fsg_hist_entry_score(hist_entry);
888 
889  if (fl == NULL)
890  break;
891 
892  /* Prefer final hypothesis */
893  if (score == bestscore && fsg_link_to_state(fl) == fsg_model_final_state(fsg)) {
894  besthist = bpidx;
895  } else if (score BETTER_THAN bestscore) {
896  /* Only enforce the final state constraint if this is a final hypothesis. */
897  if ((!final)
898  || fsg_link_to_state(fl) == fsg_model_final_state(fsg)) {
899  bestscore = score;
900  besthist = bpidx;
901  }
902  }
903 
904  --bpidx;
905  if (bpidx < 0)
906  break;
907  hist_entry = fsg_history_entry_get(fsgs->history, bpidx);
908  frm = fsg_hist_entry_frame(hist_entry);
909  }
910 
911  /* Final state not reached. */
912  if (besthist == -1) {
913  E_ERROR("Final result does not match the grammar in frame %d\n", frame_idx);
914  return -1;
915  }
916 
917  /* This here's the one we want. */
918  if (out_score)
919  *out_score = bestscore;
920  if (out_is_final) {
921  fsg_link_t *fl;
922  hist_entry = fsg_history_entry_get(fsgs->history, besthist);
923  fl = fsg_hist_entry_fsglink(hist_entry);
924  *out_is_final = (fsg_link_to_state(fl) == fsg_model_final_state(fsg));
925  }
926  return besthist;
927 }
928 
929 /* FIXME: Mostly duplicated with ngram_search_bestpath(). */
930 static ps_latlink_t *
931 fsg_search_bestpath(ps_search_t *search, int32 *out_score, int backward)
932 {
933  fsg_search_t *fsgs = (fsg_search_t *)search;
934 
935  if (search->last_link == NULL) {
936  search->last_link = ps_lattice_bestpath(search->dag, NULL,
937  1.0, fsgs->ascale);
938  if (search->last_link == NULL)
939  return NULL;
940  /* Also calculate betas so we can fill in the posterior
941  * probability field in the segmentation. */
942  if (search->post == 0)
943  search->post = ps_lattice_posterior(search->dag, NULL, fsgs->ascale);
944  }
945  if (out_score)
946  *out_score = search->last_link->path_scr + search->dag->final_node_ascr;
947  return search->last_link;
948 }
949 
950 char const *
951 fsg_search_hyp(ps_search_t *search, int32 *out_score, int32 *out_is_final)
952 {
953  fsg_search_t *fsgs = (fsg_search_t *)search;
954  dict_t *dict = ps_search_dict(search);
955  char *c;
956  size_t len;
957  int bp, bpidx;
958 
959  /* Get last backpointer table index. */
960  bpidx = fsg_search_find_exit(fsgs, fsgs->frame, fsgs->final, out_score, out_is_final);
961  /* No hypothesis (yet). */
962  if (bpidx <= 0) {
963  return NULL;
964  }
965 
966  /* If bestpath is enabled and the utterance is complete, then run it. */
967  if (fsgs->bestpath && fsgs->final) {
968  ps_lattice_t *dag;
969  ps_latlink_t *link;
970 
971  if ((dag = fsg_search_lattice(search)) == NULL) {
972  E_WARN("Failed to obtain the lattice while bestpath enabled\n");
973  return NULL;
974  }
975  if ((link = fsg_search_bestpath(search, out_score, FALSE)) == NULL) {
976  E_WARN("Failed to find the bestpath in a lattice\n");
977  return NULL;
978  }
979  return ps_lattice_hyp(dag, link);
980  }
981 
982  bp = bpidx;
983  len = 0;
984  while (bp > 0) {
985  fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp);
986  fsg_link_t *fl = fsg_hist_entry_fsglink(hist_entry);
987  char const *baseword;
988  int32 wid;
989 
990  bp = fsg_hist_entry_pred(hist_entry);
991  wid = fsg_link_wid(fl);
992  if (wid < 0 || fsg_model_is_filler(fsgs->fsg, wid))
993  continue;
994  baseword = dict_basestr(dict,
995  dict_wordid(dict,
996  fsg_model_word_str(fsgs->fsg, wid)));
997  len += strlen(baseword) + 1;
998  }
999 
1000  ckd_free(search->hyp_str);
1001  if (len == 0) {
1002  search->hyp_str = NULL;
1003  return search->hyp_str;
1004  }
1005  search->hyp_str = ckd_calloc(1, len);
1006 
1007  bp = bpidx;
1008  c = search->hyp_str + len - 1;
1009  while (bp > 0) {
1010  fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp);
1011  fsg_link_t *fl = fsg_hist_entry_fsglink(hist_entry);
1012  char const *baseword;
1013  int32 wid;
1014 
1015  bp = fsg_hist_entry_pred(hist_entry);
1016  wid = fsg_link_wid(fl);
1017  if (wid < 0 || fsg_model_is_filler(fsgs->fsg, wid))
1018  continue;
1019  baseword = dict_basestr(dict,
1020  dict_wordid(dict,
1021  fsg_model_word_str(fsgs->fsg, wid)));
1022  len = strlen(baseword);
1023  c -= len;
1024  memcpy(c, baseword, len);
1025  if (c > search->hyp_str) {
1026  --c;
1027  *c = ' ';
1028  }
1029  }
1030 
1031  return search->hyp_str;
1032 }
1033 
1034 static void
1035 fsg_seg_bp2itor(ps_seg_t *seg, fsg_hist_entry_t *hist_entry)
1036 {
1037  fsg_search_t *fsgs = (fsg_search_t *)seg->search;
1038  fsg_hist_entry_t *ph = NULL;
1039  int32 bp;
1040 
1041  if ((bp = fsg_hist_entry_pred(hist_entry)) >= 0)
1042  ph = fsg_history_entry_get(fsgs->history, bp);
1043  seg->word = fsg_model_word_str(fsgs->fsg, hist_entry->fsglink->wid);
1044  seg->ef = fsg_hist_entry_frame(hist_entry);
1045  seg->sf = ph ? fsg_hist_entry_frame(ph) + 1 : 0;
1046  /* This is kind of silly but it happens for null transitions. */
1047  if (seg->sf > seg->ef) seg->sf = seg->ef;
1048  seg->prob = 0; /* Bogus value... */
1049  /* "Language model" score = transition probability. */
1050  seg->lback = 1;
1051  seg->lscr = fsg_link_logs2prob(hist_entry->fsglink) >> SENSCR_SHIFT;
1052  if (ph) {
1053  /* FIXME: Not sure exactly how cross-word triphones are handled. */
1054  seg->ascr = hist_entry->score - ph->score - seg->lscr;
1055  }
1056  else
1057  seg->ascr = hist_entry->score - seg->lscr;
1058 }
1059 
1060 
1061 static void
1062 fsg_seg_free(ps_seg_t *seg)
1063 {
1064  fsg_seg_t *itor = (fsg_seg_t *)seg;
1065  ckd_free(itor->hist);
1066  ckd_free(itor);
1067 }
1068 
1069 static ps_seg_t *
1070 fsg_seg_next(ps_seg_t *seg)
1071 {
1072  fsg_seg_t *itor = (fsg_seg_t *)seg;
1073 
1074  if (++itor->cur == itor->n_hist) {
1075  fsg_seg_free(seg);
1076  return NULL;
1077  }
1078 
1079  fsg_seg_bp2itor(seg, itor->hist[itor->cur]);
1080  return seg;
1081 }
1082 
1083 static ps_segfuncs_t fsg_segfuncs = {
1084  /* seg_next */ fsg_seg_next,
1085  /* seg_free */ fsg_seg_free
1086 };
1087 
1088 static ps_seg_t *
1089 fsg_search_seg_iter(ps_search_t *search, int32 *out_score)
1090 {
1091  fsg_search_t *fsgs = (fsg_search_t *)search;
1092  fsg_seg_t *itor;
1093  int bp, bpidx, cur;
1094 
1095  bpidx = fsg_search_find_exit(fsgs, fsgs->frame, fsgs->final, out_score, NULL);
1096  /* No hypothesis (yet). */
1097  if (bpidx <= 0)
1098  return NULL;
1099 
1100  /* If bestpath is enabled and the utterance is complete, then run it. */
1101  if (fsgs->bestpath && fsgs->final) {
1102  ps_lattice_t *dag;
1103  ps_latlink_t *link;
1104 
1105  if ((dag = fsg_search_lattice(search)) == NULL)
1106  return NULL;
1107  if ((link = fsg_search_bestpath(search, out_score, TRUE)) == NULL)
1108  return NULL;
1109  return ps_lattice_seg_iter(dag, link, 1.0);
1110  }
1111 
1112  /* Calling this an "iterator" is a bit of a misnomer since we have
1113  * to get the entire backtrace in order to produce it. On the
1114  * other hand, all we actually need is the bptbl IDs, and we can
1115  * allocate a fixed-size array of them. */
1116  itor = ckd_calloc(1, sizeof(*itor));
1117  itor->base.vt = &fsg_segfuncs;
1118  itor->base.search = search;
1119  itor->base.lwf = 1.0;
1120  itor->n_hist = 0;
1121  bp = bpidx;
1122  while (bp > 0) {
1123  fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp);
1124  bp = fsg_hist_entry_pred(hist_entry);
1125  ++itor->n_hist;
1126  }
1127  if (itor->n_hist == 0) {
1128  ckd_free(itor);
1129  return NULL;
1130  }
1131  itor->hist = ckd_calloc(itor->n_hist, sizeof(*itor->hist));
1132  cur = itor->n_hist - 1;
1133  bp = bpidx;
1134  while (bp > 0) {
1135  fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp);
1136  itor->hist[cur] = hist_entry;
1137  bp = fsg_hist_entry_pred(hist_entry);
1138  --cur;
1139  }
1140 
1141  /* Fill in relevant fields for first element. */
1142  fsg_seg_bp2itor((ps_seg_t *)itor, itor->hist[0]);
1143 
1144  return (ps_seg_t *)itor;
1145 }
1146 
1147 static int
1148 fsg_search_prob(ps_search_t *search)
1149 {
1150  fsg_search_t *fsgs = (fsg_search_t *)search;
1151 
1152  /* If bestpath is enabled and the utterance is complete, then run it. */
1153  if (fsgs->bestpath && fsgs->final) {
1154  ps_lattice_t *dag;
1155  ps_latlink_t *link;
1156 
1157  if ((dag = fsg_search_lattice(search)) == NULL)
1158  return 0;
1159  if ((link = fsg_search_bestpath(search, NULL, TRUE)) == NULL)
1160  return 0;
1161  return search->post;
1162  }
1163  else {
1164  /* FIXME: Give some kind of good estimate here, eventually. */
1165  return 0;
1166  }
1167 }
1168 
1169 static ps_latnode_t *
1170 find_node(ps_lattice_t *dag, fsg_model_t *fsg, int sf, int32 wid, int32 node_id)
1171 {
1172  ps_latnode_t *node;
1173 
1174  for (node = dag->nodes; node; node = node->next)
1175  if ((node->sf == sf) && (node->wid == wid) && (node->node_id == node_id))
1176  break;
1177  return node;
1178 }
1179 
1180 static ps_latnode_t *
1181 new_node(ps_lattice_t *dag, fsg_model_t *fsg, int sf, int ef, int32 wid, int32 node_id, int32 ascr)
1182 {
1183  ps_latnode_t *node;
1184 
1185  node = find_node(dag, fsg, sf, wid, node_id);
1186 
1187  if (node) {
1188  /* Update end frames. */
1189  if (node->lef == -1 || node->lef < ef)
1190  node->lef = ef;
1191  if (node->fef == -1 || node->fef > ef)
1192  node->fef = ef;
1193  /* Update best link score. */
1194  if (ascr BETTER_THAN node->info.best_exit)
1195  node->info.best_exit = ascr;
1196  }
1197  else {
1198  /* New node; link to head of list */
1199  node = listelem_malloc(dag->latnode_alloc);
1200  node->wid = wid;
1201  node->sf = sf;
1202  node->fef = node->lef = ef;
1203  node->reachable = FALSE;
1204  node->entries = NULL;
1205  node->exits = NULL;
1206  node->info.best_exit = ascr;
1207  node->node_id = node_id;
1208 
1209  node->next = dag->nodes;
1210  dag->nodes = node;
1211  ++dag->n_nodes;
1212  }
1213 
1214  return node;
1215 }
1216 
1217 static ps_latnode_t *
1218 find_start_node(fsg_search_t *fsgs, ps_lattice_t *dag)
1219 {
1220  ps_latnode_t *node;
1221  glist_t start = NULL;
1222  int nstart = 0;
1223 
1224  /* Look for all nodes starting in frame zero with some exits. */
1225  for (node = dag->nodes; node; node = node->next) {
1226  if (node->sf == 0 && node->exits) {
1227  E_INFO("Start node %s.%d:%d:%d\n",
1228  fsg_model_word_str(fsgs->fsg, node->wid),
1229  node->sf, node->fef, node->lef);
1230  start = glist_add_ptr(start, node);
1231  ++nstart;
1232  }
1233  }
1234 
1235  /* If there was more than one start node candidate, then we need
1236  * to create an artificial start node with epsilon transitions to
1237  * all of them. */
1238  if (nstart == 1) {
1239  node = gnode_ptr(start);
1240  }
1241  else {
1242  gnode_t *st;
1243  int wid;
1244 
1245  wid = fsg_model_word_add(fsgs->fsg, "<s>");
1246  if (fsgs->fsg->silwords)
1247  bitvec_set(fsgs->fsg->silwords, wid);
1248  node = new_node(dag, fsgs->fsg, 0, 0, wid, -1, 0);
1249  for (st = start; st; st = gnode_next(st))
1250  ps_lattice_link(dag, node, gnode_ptr(st), 0, 0);
1251  }
1252  glist_free(start);
1253  return node;
1254 }
1255 
1256 static ps_latnode_t *
1257 find_end_node(fsg_search_t *fsgs, ps_lattice_t *dag)
1258 {
1259  ps_latnode_t *node;
1260  glist_t end = NULL;
1261  int nend = 0;
1262 
1263  /* Look for all nodes ending in last frame with some entries. */
1264  for (node = dag->nodes; node; node = node->next) {
1265  if (node->lef == dag->n_frames - 1 && node->entries) {
1266  E_INFO("End node %s.%d:%d:%d (%d)\n",
1267  fsg_model_word_str(fsgs->fsg, node->wid),
1268  node->sf, node->fef, node->lef, node->info.best_exit);
1269  end = glist_add_ptr(end, node);
1270  ++nend;
1271  }
1272  }
1273 
1274  if (nend == 1) {
1275  node = gnode_ptr(end);
1276  }
1277  else if (nend == 0) {
1278  ps_latnode_t *last = NULL;
1279  int ef = 0;
1280 
1281  /* If there were no end node candidates, then just use the
1282  * node with the last exit frame. */
1283  for (node = dag->nodes; node; node = node->next) {
1284  if (node->lef > ef && node->entries) {
1285  last = node;
1286  ef = node->lef;
1287  }
1288  }
1289  node = last;
1290  if (node)
1291  E_INFO("End node %s.%d:%d:%d (%d)\n",
1292  fsg_model_word_str(fsgs->fsg, node->wid),
1293  node->sf, node->fef, node->lef, node->info.best_exit);
1294  }
1295  else {
1296  /* If there was more than one end node candidate, then we need
1297  * to create an artificial end node with epsilon transitions
1298  * out of all of them. */
1299  gnode_t *st;
1300  int wid;
1301  wid = fsg_model_word_add(fsgs->fsg, "</s>");
1302  if (fsgs->fsg->silwords)
1303  bitvec_set(fsgs->fsg->silwords, wid);
1304  node = new_node(dag, fsgs->fsg, fsgs->frame, fsgs->frame, wid, -1, 0);
1305  /* Use the "best" (in reality it will be the only) exit link
1306  * score from this final node as the link score. */
1307  for (st = end; st; st = gnode_next(st)) {
1308  ps_latnode_t *src = gnode_ptr(st);
1309  ps_lattice_link(dag, src, node, src->info.best_exit, fsgs->frame);
1310  }
1311  }
1312  glist_free(end);
1313  return node;
1314 }
1315 
1316 static void
1317 mark_reachable(ps_lattice_t *dag, ps_latnode_t *end)
1318 {
1319  glist_t q;
1320 
1321  /* It doesn't matter which order we do this in. */
1322  end->reachable = TRUE;
1323  q = glist_add_ptr(NULL, end);
1324  while (q) {
1325  ps_latnode_t *node = gnode_ptr(q);
1326  latlink_list_t *x;
1327 
1328  /* Pop the front of the list. */
1329  q = gnode_free(q, NULL);
1330  /* Expand all its predecessors that haven't been seen yet. */
1331  for (x = node->entries; x; x = x->next) {
1332  ps_latnode_t *next = x->link->from;
1333  if (!next->reachable) {
1334  next->reachable = TRUE;
1335  q = glist_add_ptr(q, next);
1336  }
1337  }
1338  }
1339 }
1340 
1349 static ps_lattice_t *
1350 fsg_search_lattice(ps_search_t *search)
1351 {
1352  fsg_search_t *fsgs;
1353  fsg_model_t *fsg;
1354  ps_latnode_t *node;
1355  ps_lattice_t *dag;
1356  int32 i, n;
1357 
1358  fsgs = (fsg_search_t *)search;
1359 
1360  /* Check to see if a lattice has previously been created over the
1361  * same number of frames, and reuse it if so. */
1362  if (search->dag && search->dag->n_frames == fsgs->frame)
1363  return search->dag;
1364 
1365  /* Nope, create a new one. */
1366  ps_lattice_free(search->dag);
1367  search->dag = NULL;
1368  dag = ps_lattice_init_search(search, fsgs->frame);
1369  fsg = fsgs->fsg;
1370 
1371  /*
1372  * Each history table entry represents a link in the word graph.
1373  * The set of nodes is determined by the number of unique
1374  * (word,start-frame) pairs in the history table. So we will
1375  * first find all those nodes.
1376  */
1377  n = fsg_history_n_entries(fsgs->history);
1378  for (i = 0; i < n; ++i) {
1379  fsg_hist_entry_t *fh = fsg_history_entry_get(fsgs->history, i);
1380  int32 ascr;
1381  int sf;
1382 
1383  /* Skip null transitions. */
1384  if (fh->fsglink == NULL || fh->fsglink->wid == -1)
1385  continue;
1386 
1387  /* Find the start node of this link. */
1388  if (fh->pred) {
1389  fsg_hist_entry_t *pfh = fsg_history_entry_get(fsgs->history, fh->pred);
1390  /* FIXME: We include the transition score in the lattice
1391  * link score. This is because of the practical
1392  * difficulty of obtaining it separately in bestpath or
1393  * forward-backward search, and because it is essentially
1394  * a unigram probability, so there is no need to treat it
1395  * separately from the acoustic score. However, it's not
1396  * clear that this will actually yield correct results.*/
1397  ascr = fh->score - pfh->score;
1398  sf = pfh->frame + 1;
1399  }
1400  else {
1401  ascr = fh->score;
1402  sf = 0;
1403  }
1404 
1405  /*
1406  * Note that although scores are tied to links rather than
1407  * nodes, it's possible that there are no links out of the
1408  * destination node, and thus we need to preserve its score in
1409  * case it turns out to be utterance-final.
1410  */
1411  new_node(dag, fsg, sf, fh->frame, fh->fsglink->wid, fsg_link_to_state(fh->fsglink), ascr);
1412  }
1413 
1414  /*
1415  * Now, we will create links only to nodes that actually exist.
1416  */
1417  n = fsg_history_n_entries(fsgs->history);
1418  for (i = 0; i < n; ++i) {
1419  fsg_hist_entry_t *fh = fsg_history_entry_get(fsgs->history, i);
1420  fsg_arciter_t *itor;
1421  ps_latnode_t *src, *dest;
1422  int32 ascr;
1423  int sf;
1424 
1425  /* Skip null transitions. */
1426  if (fh->fsglink == NULL || fh->fsglink->wid == -1)
1427  continue;
1428 
1429  /* Find the start node of this link and calculate its link score. */
1430  if (fh->pred) {
1431  fsg_hist_entry_t *pfh = fsg_history_entry_get(fsgs->history, fh->pred);
1432  sf = pfh->frame + 1;
1433  ascr = fh->score - pfh->score;
1434  }
1435  else {
1436  ascr = fh->score;
1437  sf = 0;
1438  }
1439  src = find_node(dag, fsg, sf, fh->fsglink->wid, fsg_link_to_state(fh->fsglink));
1440  sf = fh->frame + 1;
1441 
1442  for (itor = fsg_model_arcs(fsg, fsg_link_to_state(fh->fsglink));
1443  itor; itor = fsg_arciter_next(itor)) {
1444  fsg_link_t *link = fsg_arciter_get(itor);
1445 
1446  /* FIXME: Need to figure out what to do about tag transitions. */
1447  if (link->wid >= 0) {
1448  /*
1449  * For each non-epsilon link following this one, look for a
1450  * matching node in the lattice and link to it.
1451  */
1452  if ((dest = find_node(dag, fsg, sf, link->wid, fsg_link_to_state(link))) != NULL)
1453  ps_lattice_link(dag, src, dest, ascr, fh->frame);
1454  }
1455  else {
1456  /*
1457  * Transitive closure on nulls has already been done, so we
1458  * just need to look one link forward from them.
1459  */
1460  fsg_arciter_t *itor2;
1461 
1462  /* Add all non-null links out of j. */
1463  for (itor2 = fsg_model_arcs(fsg, fsg_link_to_state(link));
1464  itor2; itor2 = fsg_arciter_next(itor2)) {
1465  fsg_link_t *link = fsg_arciter_get(itor2);
1466 
1467  if (link->wid == -1)
1468  continue;
1469 
1470  if ((dest = find_node(dag, fsg, sf, link->wid, fsg_link_to_state(link))) != NULL) {
1471  ps_lattice_link(dag, src, dest, ascr, fh->frame);
1472  }
1473  }
1474  }
1475  }
1476  }
1477 
1478 
1479  /* Figure out which nodes are the start and end nodes. */
1480  if ((dag->start = find_start_node(fsgs, dag)) == NULL) {
1481  E_WARN("Failed to find the start node\n");
1482  goto error_out;
1483  }
1484  if ((dag->end = find_end_node(fsgs, dag)) == NULL) {
1485  E_WARN("Failed to find the end node\n");
1486  goto error_out;
1487  }
1488 
1489 
1490  E_INFO("lattice start node %s.%d end node %s.%d\n",
1491  fsg_model_word_str(fsg, dag->start->wid), dag->start->sf,
1492  fsg_model_word_str(fsg, dag->end->wid), dag->end->sf);
1493  /* FIXME: Need to calculate final_node_ascr here. */
1494 
1495  /*
1496  * Convert word IDs from FSG to dictionary.
1497  */
1498  for (node = dag->nodes; node; node = node->next) {
1499  node->wid = dict_wordid(dag->search->dict,
1500  fsg_model_word_str(fsg, node->wid));
1501  node->basewid = dict_basewid(dag->search->dict, node->wid);
1502  }
1503 
1504  /*
1505  * Now we are done, because the links in the graph are uniquely
1506  * defined by the history table. However we should remove any
1507  * nodes which are not reachable from the end node of the FSG.
1508  * Everything is reachable from the start node by definition.
1509  */
1510  mark_reachable(dag, dag->end);
1511 
1513  {
1514  int32 silpen, fillpen;
1515 
1516  silpen = (int32)(logmath_log(fsg->lmath,
1517  cmd_ln_float32_r(ps_search_config(fsgs), "-silprob"))
1518  * fsg->lw)
1519  >> SENSCR_SHIFT;
1520  fillpen = (int32)(logmath_log(fsg->lmath,
1521  cmd_ln_float32_r(ps_search_config(fsgs), "-fillprob"))
1522  * fsg->lw)
1523  >> SENSCR_SHIFT;
1524 
1525  ps_lattice_penalize_fillers(dag, silpen, fillpen);
1526  }
1527  search->dag = dag;
1528 
1529  return dag;
1530 
1531 
1532 error_out:
1533  ps_lattice_free(dag);
1534  return NULL;
1535 
1536 }
1537 
void fsg_lextree_free(fsg_lextree_t *lextree)
Free lextrees for an FSG.
Definition: fsg_lextree.c:286
glist_t pnode_active_next
Those activated for the next frame.
Implementation of FSG search (and "FSG set") structure.
Internal implementation of PocketSphinx decoder.
int16 reachable
From.
Base structure for search module.
int32 n_nodes
Number of nodes in this lattice.
ps_seg_t * ps_lattice_seg_iter(ps_lattice_t *dag, ps_latlink_t *link, float32 lwf)
Get hypothesis segmentation iterator after bestpath search.
Definition: ps_lattice.c:1006
int16 cur
Current position in hist.
Definition: fsg_history.h:95
POCKETSPHINX_EXPORT s3wid_t dict_wordid(dict_t *d, const char *word)
Return word id for given word string if present.
Definition: dict.c:399
int32 lef
Last end frame.
fsg_model_t * fsg
FSG model.
int32 wbeam_orig
Pruning threshold for word exit.
void fsg_pnode_add_all_ctxt(fsg_pnode_ctxt_t *ctxt)
Set all flags on in the given context bitvector.
Definition: fsg_lextree.c:328
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
Re-initialize base structure with new dictionary.
int n_senone_active
Number of active GMMs.
Definition: acmod.h:169
acmod_t * acmod
Acoustic model.
Segmentation "iterator" for FSG history.
void ps_lattice_penalize_fillers(ps_lattice_t *dag, int32 silpen, int32 fillpen)
Insert penalty for fillers.
Definition: ps_lattice.c:106
ps_seg_t base
Base structure.
An individual HMM among the HMM search space.
float32 beam_factor
Dynamic/adaptive factor (<=1) applied to above beams to determine actual effective beams...
ps_latnode_t * start
Starting node.
uint8 *** tp
The transition matrices; kept in the same scale as acoustic scores; tp[tmatid][from-state][to-state]...
Definition: tmat.h:107
ps_segfuncs_t * vt
V-table of seg methods.
logmath_t * lmath
Log-math computation.
Definition: acmod.h:151
int bin_mdef_ciphone_id(bin_mdef_t *m, const char *ciphone)
Context-independent phone lookup.
Definition: bin_mdef.c:691
struct fsg_history_s * history
For storing the Viterbi search history.
uint16 ** sseq
Unique senone sequences (2D array built at load time)
Definition: bin_mdef.h:134
int32 lscr
Language model score.
int32 n_words
Number of words known to search (may be less than in the dictionary)
int32 wbeam
Effective beams after applying beam_factor.
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
Definition: acmod.c:1233
float32 ascale
Acoustic score scale for posterior probabilities.
ps_search_t * search
Search (if generated by search).
#define BAD_S3WID
Dictionary word id.
Definition: s3types.h:90
frame_idx_t n_frames
Number of frames for this utterance.
Word graph search implementation.
int32 n_hmm_eval
Total HMMs evaluated this utt.
ps_latnode_t * nodes
List of all nodes.
listelem_alloc_t * latnode_alloc
Node allocator for this DAG.
int32 wip
Language weights.
int32 best_exit
Best exit score (used for final nodes only)
int32 prob
Log posterior probability.
latlink_list_t * entries
Links into this node.
POCKETSPHINX_EXPORT int32 ps_lattice_posterior(ps_lattice_t *dag, ngram_model_t *lmset, float32 ascale)
Calculate link posterior probabilities on a word graph.
Definition: ps_lattice.c:1446
hmm_context_t * hmmctx
HMM context.
char const * word
Word string (pointer into dictionary hash)
ps_search_t * search
Search object from whence this came.
int32 final_node_ascr
Acoustic score of implicit link exiting final node.
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, const char *type, const char *name, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
fsg_hist_entry_t ** hist
Sequence of history entries.
int32 hmm_vit_eval(hmm_t *hmm)
Viterbi evaluation of given HMM.
Definition: hmm.c:789
void fsg_psubtree_pnode_deactivate(fsg_pnode_t *pnode)
Mark the given pnode as inactive (for search).
Definition: fsg_lextree.c:832
uint8 compallsen
Compute all senones?
Definition: acmod.h:188
hmm_context_t * hmm_context_init(int32 n_emit_state, uint8 **const *tp, int16 const *senscore, uint16 *const *sseq)
Create an HMM context.
Definition: hmm.c:56
void ps_search_base_free(ps_search_t *search)
Free search.
void ps_lattice_delete_unreachable(ps_lattice_t *dag)
Remove nodes marked as unreachable.
Definition: ps_lattice.c:174
ps_latnode_t * end
Ending node.
frame_idx_t sf
Start frame.
POCKETSPHINX_EXPORT ps_latlink_t * ps_lattice_bestpath(ps_lattice_t *dag, ngram_model_t *lmset, float32 lwf, float32 ascale)
Do N-Gram based best-path search on a word graph.
Definition: ps_lattice.c:1215
int32 beam_orig
Global pruning threshold.
latlink_list_t * exits
Links out of this node.
#define WORST_SCORE
Large "bad" score.
Definition: hmm.h:84
tmat_t * tmat
Transition matrices.
Definition: acmod.h:160
frame_idx_t ef
End frame.
int32 ascr
Acoustic score.
void hmm_enter(hmm_t *h, int32 score, int32 histid, int frame)
Enter an HMM with the given path score and history ID.
Definition: hmm.c:201
void acmod_clear_active(acmod_t *acmod)
Clear set of active senones.
Definition: acmod.c:1217
int32 wid
Dictionary word id.
int32 node_id
Node from fsg model, used to map lattice back to model.
int32 pbeam_orig
Pruning threshold for phone transition.
#define hmm_context_set_senscore(ctx, senscr)
Change the senone score array for a context.
Definition: hmm.h:227
void hmm_dump(hmm_t *h, FILE *fp)
For debugging, dump the whole HMM out.
Definition: hmm.c:116
uint8 final
Decoding is finished for this utterance.
#define SENSCR_SHIFT
Shift count for senone scores.
Definition: hmm.h:73
a structure for a dictionary.
Definition: dict.h:76
char const * ps_lattice_hyp(ps_lattice_t *dag, ps_latlink_t *link)
Get hypothesis string after bestpath search.
Definition: ps_lattice.c:830
Word graph structure used in bestpath/nbest search.
int32 bestscore
For beam pruning.
int32 bpidx_start
First history entry index this frame.
int32 post
Utterance posterior probability.
char * hyp_str
Current hypothesis string.
fsg_lextree_t * fsg_lextree_init(fsg_model_t *fsg, dict_t *dict, dict2pid_t *d2p, bin_mdef_t *mdef, hmm_context_t *ctx, int32 wip, int32 pip)
Create, initialize, and return a new phonetic lextree for the given FSG.
Definition: fsg_lextree.c:215
#define BETTER_THAN
Is one score better than another?
Definition: hmm.h:95
dict_t * dict
Pronunciation dictionary.
int32 fef
First end frame.
uint8 bestpath
Whether to run bestpath search and confidence annotation at end.
int32 lback
Language model backoff.
int32 n_sen_eval
Total senones evaluated this utt.
int32 basewid
Dictionary base word id.
glist_t pnode_active
Those active in this frame.
ps_lattice_t * ps_lattice_init_search(ps_search_t *search, int n_frame)
Construct an empty word graph with reference to a search structure.
Definition: ps_lattice.c:639
void hmm_context_free(hmm_context_t *ctx)
Free an HMM context.
Definition: hmm.c:80
int16 n_hist
Number of history entries.
bin_mdef_t * mdef
Model definition.
Definition: acmod.h:159
ps_latlink_t * last_link
Final link in best path.
struct ps_latnode_s * next
Next node in DAG (no ordering implied)
V-table for search algorithm.
frame_idx_t frame
Current frame.
ps_lattice_t * dag
Current hypothesis word graph.
Base structure for hypothesis segmentation iterator.
struct fsg_lextree_s * lextree
Lextree structure for the currently active FSG.
#define dict_size(d)
Packaged macro access to dictionary members.
Definition: dict.h:151
POCKETSPHINX_EXPORT int ps_lattice_free(ps_lattice_t *dag)
Free a lattice.
Definition: ps_lattice.c:665
Acoustic model structure.
Definition: acmod.h:148
float32 lwf
Language weight factor (for second-pass searches)
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:84
POCKETSPHINX_EXPORT void ps_lattice_link(ps_lattice_t *dag, ps_latnode_t *from, ps_latnode_t *to, int32 score, int32 ef)
Create a directed link between "from" and "to" nodes, but if a link already exists, choose one with the best link_scr.
Definition: ps_lattice.c:65
frame_idx_t sf
Start frame.
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
Definition: acmod.c:1126