PocketSphinx  5prealpha
allphone_search.c
1 /* ====================================================================
2  * Copyright (c) 2014 Carnegie Mellon University. All rights
3  * reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  * notice, this list of conditions and the following disclaimer in
14  * the documentation and/or other materials provided with the
15  * distribution.
16  *
17  *
18  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
19  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
22  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * ====================================================================
31  *
32  */
33 
34 /*
35 * allphone_search.c -- Search for phonetic decoding.
36 */
37 
38 #include <stdio.h>
39 #include <string.h>
40 #include <assert.h>
41 
42 #include <sphinxbase/err.h>
43 #include <sphinxbase/ckd_alloc.h>
44 #include <sphinxbase/strfuncs.h>
45 #include <sphinxbase/pio.h>
46 #include <sphinxbase/cmd_ln.h>
47 
48 #include "pocketsphinx_internal.h"
49 #include "allphone_search.h"
50 
51 static ps_lattice_t *
52 allphone_search_lattice(ps_search_t * search)
53 {
54  //cap
55  return NULL;
56 }
57 
58 static int
59 allphone_search_prob(ps_search_t * search)
60 {
61  return 0;
62 }
63 
64 static void
65 allphone_backtrace(allphone_search_t * allphs, int32 f);
66 
67 static void
68 allphone_search_seg_free(ps_seg_t * seg)
69 {
70  ckd_free(seg);
71 }
72 
73 static void
74 allphone_search_fill_iter(ps_seg_t *seg, phseg_t *phseg)
75 {
76  seg->sf = phseg->sf;
77  seg->ef = phseg->ef;
78  seg->ascr = phseg->score;
79  seg->lscr = phseg->tscore;
80  seg->word = bin_mdef_ciphone_str(ps_search_acmod(seg->search)->mdef, phseg->ci);
81 }
82 
83 static ps_seg_t *
84 allphone_search_seg_next(ps_seg_t * seg)
85 {
86  phseg_iter_t *itor = (phseg_iter_t *) seg;
87  phseg_t *phseg;
88 
89  itor->seg = itor->seg->next;
90 
91  if (itor->seg == NULL) {
92  allphone_search_seg_free(seg);
93  return NULL;
94  }
95  phseg = gnode_ptr(itor->seg);
96  allphone_search_fill_iter(seg, phseg);
97 
98  return seg;
99 }
100 
101 static ps_segfuncs_t fsg_segfuncs = {
102  /* seg_next */ allphone_search_seg_next,
103  /* seg_free */ allphone_search_seg_free
104 };
105 
106 
107 static ps_seg_t *
108 allphone_search_seg_iter(ps_search_t * search, int32 * out_score)
109 {
110  allphone_search_t *allphs = (allphone_search_t *) search;
111  phseg_iter_t *iter;
112 
113  allphone_backtrace(allphs, allphs->frame - 1);
114  if (allphs->segments == NULL)
115  return NULL;
116 
117  iter = ckd_calloc(1, sizeof(phseg_iter_t));
118 
119  iter->base.vt = &fsg_segfuncs;
120  iter->base.search = search;
121  iter->seg = allphs->segments;
122  allphone_search_fill_iter((ps_seg_t *)iter, gnode_ptr(iter->seg));
123 
124  return (ps_seg_t *) iter;
125 }
126 
127 static ps_searchfuncs_t allphone_funcs = {
128  /* start: */ allphone_search_start,
129  /* step: */ allphone_search_step,
130  /* finish: */ allphone_search_finish,
131  /* reinit: */ allphone_search_reinit,
132  /* free: */ allphone_search_free,
133  /* lattice: */ allphone_search_lattice,
134  /* hyp: */ allphone_search_hyp,
135  /* prob: */ allphone_search_prob,
136  /* seg_iter: */ allphone_search_seg_iter,
137 };
138 
143 static phmm_t *
144 phmm_lookup(allphone_search_t * allphs, s3pid_t pid)
145 {
146  phmm_t *p;
147  bin_mdef_t *mdef;
148  phmm_t **ci_phmm;
149 
150  mdef = ((ps_search_t *) allphs)->acmod->mdef;
151  ci_phmm = allphs->ci_phmm;
152 
153  for (p = ci_phmm[bin_mdef_pid2ci(mdef, pid)]; p; p = p->next) {
154  if (mdef_pid2tmatid(mdef, p->pid) == mdef_pid2tmatid(mdef, pid))
155  if (mdef_pid2ssid(mdef, p->pid) == mdef_pid2ssid(mdef, pid))
156  return p;
157  }
158 
159  //not found
160  return NULL;
161 }
162 
163 static int32
164 phmm_link(allphone_search_t * allphs)
165 {
166  s3cipid_t ci, rc;
167  phmm_t *p, *p2;
168  int32 *rclist;
169  int32 i, n_link;
170  plink_t *l;
171  bin_mdef_t *mdef;
172  phmm_t **ci_phmm;
173 
174  mdef = ((ps_search_t *) allphs)->acmod->mdef;
175  ci_phmm = allphs->ci_phmm;
176 
177  rclist = (int32 *) ckd_calloc(mdef->n_ciphone + 1, sizeof(int32));
178 
179  /* Create successor links between PHMM nodes */
180  n_link = 0;
181  for (ci = 0; ci < mdef->n_ciphone; ci++) {
182  for (p = ci_phmm[ci]; p; p = p->next) {
183  /* Build rclist for p */
184  i = 0;
185  for (rc = 0; rc < mdef->n_ciphone; rc++) {
186  if (bitvec_is_set(p->rc, rc))
187  rclist[i++] = rc;
188  }
189  rclist[i] = BAD_S3CIPID;
190 
191  /* For each rc in rclist, transition to PHMMs for rc if left context = ci */
192  for (i = 0; IS_S3CIPID(rclist[i]); i++) {
193  for (p2 = ci_phmm[rclist[i]]; p2; p2 = p2->next) {
194  if (bitvec_is_set(p2->lc, ci)) {
195  /* transition from p to p2 */
196  l = (plink_t *) ckd_calloc(1, sizeof(*l));
197  l->phmm = p2;
198  l->next = p->succlist;
199  p->succlist = l;
200 
201  n_link++;
202  }
203  }
204  }
205  }
206  }
207 
208  ckd_free(rclist);
209 
210  return n_link;
211 }
212 
216 static int
217 phmm_build(allphone_search_t * allphs)
218 {
219  phmm_t *p, **pid2phmm;
220  bin_mdef_t *mdef;
221  int32 lrc_size;
222  uint32 *lc, *rc;
223  s3pid_t pid;
224  s3cipid_t ci;
225  s3cipid_t *filler;
226  int n_phmm, n_link;
227  int i, nphone;
228 
229  mdef = ((ps_search_t *) allphs)->acmod->mdef;
230  allphs->ci_phmm =
231  (phmm_t **) ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(phmm_t *));
232  pid2phmm =
233  (phmm_t **) ckd_calloc(bin_mdef_n_phone(mdef), sizeof(phmm_t *));
234 
235  /* For each unique ciphone/triphone entry in mdef, create a PHMM node */
236  n_phmm = 0;
237  nphone = allphs->ci_only ? bin_mdef_n_ciphone(mdef) : bin_mdef_n_phone(mdef);
238  E_INFO("Building PHMM net of %d phones\n", nphone);
239  for (pid = 0; pid < nphone; pid++) {
240  if ((p = phmm_lookup(allphs, pid)) == NULL) {
241  //not found, should be created
242  p = (phmm_t *) ckd_calloc(1, sizeof(*p));
243  hmm_init(allphs->hmmctx, &(p->hmm), FALSE,
244  mdef_pid2ssid(mdef, pid), mdef->phone[pid].tmat);
245  p->pid = pid;
246  p->ci = bin_mdef_pid2ci(mdef, pid);
247  p->succlist = NULL;
248  p->next = allphs->ci_phmm[p->ci];
249  allphs->ci_phmm[p->ci] = p;
250  n_phmm++;
251  }
252  pid2phmm[pid] = p;
253  }
254 
255  /* Fill out bitvecs of each PHMM node, alloc continuous memory chunk for context bitvectors */
256  lrc_size = bitvec_size(bin_mdef_n_ciphone(mdef));
257  lc = ckd_calloc(n_phmm * 2 * lrc_size, sizeof(bitvec_t));
258  rc = lc + (n_phmm * lrc_size);
259  for (ci = 0; ci < mdef->n_ciphone; ci++) {
260  for (p = allphs->ci_phmm[ci]; p; p = p->next) {
261  p->lc = lc;
262  lc += lrc_size;
263  p->rc = rc;
264  rc += lrc_size;
265  }
266  }
267 
268  /* Fill out lc and rc bitmaps (remember to map all fillers to each other!!) */
269  filler =
270  (s3cipid_t *) ckd_calloc(bin_mdef_n_ciphone(mdef) + 1,
271  sizeof(s3cipid_t));
272 
273  /* Connect fillers */
274  i = 0;
275  for (ci = 0; ci < bin_mdef_n_ciphone(mdef); ci++) {
276  p = pid2phmm[ci];
277  bitvec_set_all(p->lc, bin_mdef_n_ciphone(mdef));
278  bitvec_set_all(p->rc, bin_mdef_n_ciphone(mdef));
279  if (mdef->phone[ci].info.ci.filler) {
280  filler[i++] = ci;
281  }
282  }
283  filler[i] = BAD_S3CIPID;
284 
285 
286  /* Loop over cdphones only if ci_only is not set */
287  for (pid = bin_mdef_n_ciphone(mdef); pid < nphone;
288  pid++) {
289  p = pid2phmm[pid];
290 
291  if (mdef->phone[mdef->phone[pid].info.cd.ctx[1]].info.ci.filler) {
292  for (i = 0; IS_S3CIPID(filler[i]); i++)
293  bitvec_set(p->lc, filler[i]);
294  }
295  else
296  bitvec_set(p->lc, mdef->phone[pid].info.cd.ctx[1]);
297 
298  if (mdef->phone[mdef->phone[pid].info.cd.ctx[2]].info.ci.filler) {
299  for (i = 0; IS_S3CIPID(filler[i]); i++)
300  bitvec_set(p->rc, filler[i]);
301  }
302  else
303  bitvec_set(p->rc, mdef->phone[pid].info.cd.ctx[2]);
304  }
305  ckd_free(pid2phmm);
306  ckd_free(filler);
307 
308  /* Create links between PHMM nodes */
309  n_link = phmm_link(allphs);
310 
311  E_INFO("%d nodes, %d links\n", n_phmm, n_link);
312  return 0;
313 }
314 
315 static void
316 phmm_free(allphone_search_t * allphs)
317 {
318  s3cipid_t ci;
319  bin_mdef_t *mdef;
320 
321  if (!allphs->ci_phmm)
322  //nothing to free
323  return;
324  ckd_free(allphs->ci_phmm[0]->lc);
325  mdef = ((ps_search_t *) allphs)->acmod->mdef;
326  for (ci = 0; ci < mdef_n_ciphone(mdef); ++ci) {
327  phmm_t *p, *next;
328 
329  for (p = allphs->ci_phmm[ci]; p; p = next) {
330  plink_t *l, *lnext;
331 
332  next = p->next;
333  for (l = p->succlist; l; l = lnext) {
334  lnext = l->next;
335  ckd_free(l);
336  }
337  hmm_deinit(&(p->hmm));
338  ckd_free(p);
339  }
340  }
341  ckd_free(allphs->ci_phmm);
342 }
343 
345 static int32
346 phmm_eval_all(allphone_search_t * allphs, const int16 * senscr)
347 {
348  s3cipid_t ci;
349  phmm_t *p;
350  int32 best;
351  bin_mdef_t *mdef;
352  phmm_t **ci_phmm;
353 
354  mdef = ((ps_search_t *) allphs)->acmod->mdef;
355  ci_phmm = allphs->ci_phmm;
356 
357  best = WORST_SCORE;
358 
359  hmm_context_set_senscore(allphs->hmmctx, senscr);
360  for (ci = 0; ci < mdef->n_ciphone; ci++) {
361  for (p = ci_phmm[(unsigned) ci]; p; p = p->next) {
362  if (hmm_frame(&(p->hmm)) == allphs->frame) {
363  int32 score;
364  allphs->n_hmm_eval++;
365  score = hmm_vit_eval((hmm_t *) p);
366  if (score > best)
367  best = score;
368  }
369  }
370  }
371 
372  return best;
373 }
374 
375 static void
376 phmm_exit(allphone_search_t * allphs, int32 best)
377 {
378  s3cipid_t ci;
379  phmm_t *p;
380  int32 th, nf;
381  history_t *h;
382  blkarray_list_t *history;
383  bin_mdef_t *mdef;
384  int32 curfrm;
385  phmm_t **ci_phmm;
386  int32 *ci2lmwid;
387 
388  th = best + allphs->pbeam;
389 
390  history = allphs->history;
391  mdef = ps_search_acmod(allphs)->mdef;
392  curfrm = allphs->frame;
393  ci_phmm = allphs->ci_phmm;
394  ci2lmwid = allphs->ci2lmwid;
395 
396  nf = curfrm + 1;
397 
398  for (ci = 0; ci < mdef->n_ciphone; ci++) {
399  for (p = ci_phmm[(unsigned) ci]; p; p = p->next) {
400  if (hmm_frame(&(p->hmm)) == curfrm) {
401 
402  if (hmm_bestscore(&(p->hmm)) >= th) {
403 
404  h = (history_t *) ckd_calloc(1, sizeof(*h));
405  h->ef = curfrm;
406  h->phmm = p;
407  h->hist = hmm_out_history(&(p->hmm));
408  h->score = hmm_out_score(&(p->hmm));
409 
410  if (!allphs->lm) {
411  h->tscore = allphs->inspen;
412  }
413  else {
414  if (h->hist > 0) {
415  int32 n_used;
416  history_t *pred =
417  blkarray_list_get(history, h->hist);
418 
419  if (pred->hist > 0) {
420  history_t *pred_pred =
421  blkarray_list_get(history,
422  h->hist);
423  h->tscore =
424  ngram_tg_score(allphs->lm,
425  ci2lmwid
426  [pred_pred->phmm->ci],
427  ci2lmwid[pred->
428  phmm->ci],
429  ci2lmwid[p->ci],
430  &n_used) >>
431  SENSCR_SHIFT;
432  }
433  else {
434  h->tscore =
435  ngram_bg_score(allphs->lm,
436  ci2lmwid
437  [pred->phmm->ci],
438  ci2lmwid[p->ci],
439  &n_used) >>
440  SENSCR_SHIFT;
441  }
442  }
443  else {
444  /*
445  * This is the beginning SIL and in srch_allphone_begin()
446  * it's inscore is set to 0.
447  */
448  h->tscore = 0;
449  }
450  }
451 
452  blkarray_list_append(history, h);
453 
454  /* Mark PHMM active in next frame */
455  hmm_frame(&(p->hmm)) = nf;
456  }
457  else {
458  /* Reset state scores */
459  hmm_clear(&(p->hmm));
460  }
461  }
462  }
463  }
464 }
465 
466 static void
467 phmm_trans(allphone_search_t * allphs, int32 best,
468  int32 frame_history_start)
469 {
470  history_t *h;
471  phmm_t *from, *to;
472  plink_t *l;
473  int32 newscore, nf, curfrm;
474  int32 *ci2lmwid;
475  int32 hist_idx;
476 
477  curfrm = allphs->frame;
478  nf = curfrm + 1;
479  ci2lmwid = allphs->ci2lmwid;
480 
481  /* Transition from exited nodes to initial states of HMMs */
482  for (hist_idx = frame_history_start;
483  hist_idx < blkarray_list_n_valid(allphs->history); hist_idx++) {
484  h = blkarray_list_get(allphs->history, hist_idx);
485  from = h->phmm;
486  for (l = from->succlist; l; l = l->next) {
487  int32 tscore;
488  to = l->phmm;
489 
490  /* No LM, just use uniform (insertion penalty). */
491  if (!allphs->lm)
492  tscore = allphs->inspen;
493  /* If they are not in the LM, kill this
494  * transition. */
495  else if (ci2lmwid[to->ci] == NGRAM_INVALID_WID)
496  continue;
497  else {
498  int32 n_used;
499  if (h->hist > 0) {
500  history_t *pred =
501  blkarray_list_get(allphs->history, h->hist);
502  tscore =
503  ngram_tg_score(allphs->lm,
504  ci2lmwid[pred->phmm->ci],
505  ci2lmwid[from->ci],
506  ci2lmwid[to->ci],
507  &n_used) >> SENSCR_SHIFT;
508  }
509  else {
510  tscore = ngram_bg_score(allphs->lm,
511  ci2lmwid[from->ci],
512  ci2lmwid[to->ci],
513  &n_used) >> SENSCR_SHIFT;
514  }
515  }
516 
517  newscore = h->score + tscore;
518  if ((newscore > best + allphs->beam)
519  && (newscore > hmm_in_score(&(to->hmm)))) {
520  hmm_enter(&(to->hmm), newscore, hist_idx, nf);
521  }
522  }
523  }
524 }
525 
526 ps_search_t *
527 allphone_search_init(const char *name,
528  ngram_model_t * lm,
529  cmd_ln_t * config,
530  acmod_t * acmod, dict_t * dict, dict2pid_t * d2p)
531 {
532  int i;
533  bin_mdef_t *mdef;
534  allphone_search_t *allphs;
535  static char *lmname = "default";
536 
537  allphs = (allphone_search_t *) ckd_calloc(1, sizeof(*allphs));
538  ps_search_init(ps_search_base(allphs), &allphone_funcs, PS_SEARCH_TYPE_ALLPHONE, name, config, acmod,
539  dict, d2p);
540  mdef = acmod->mdef;
541 
542  allphs->hmmctx = hmm_context_init(bin_mdef_n_emit_state(mdef),
543  acmod->tmat->tp, NULL, mdef->sseq);
544  if (allphs->hmmctx == NULL) {
545  ps_search_free(ps_search_base(allphs));
546  return NULL;
547  }
548 
549  allphs->ci_only = cmd_ln_boolean_r(config, "-allphone_ci");
550  allphs->lw = cmd_ln_float32_r(config, "-lw");
551 
552  phmm_build(allphs);
553 
554  if (lm) {
555  //language model is defined
556  allphs->lm = ngram_model_set_init(config, &lm, &lmname, NULL, 1);
557  if (!allphs->lm) {
558  E_ERROR
559  ("Failed to initialize ngram model set for phoneme decoding");
560  allphone_search_free((ps_search_t *) allphs);
561  return NULL;
562  }
563  allphs->ci2lmwid =
564  (int32 *) ckd_calloc(mdef->n_ciphone,
565  sizeof(*allphs->ci2lmwid));
566  for (i = 0; i < mdef->n_ciphone; i++) {
567  allphs->ci2lmwid[i] =
568  ngram_wid(allphs->lm,
569  (char *) bin_mdef_ciphone_str(mdef, i));
570  /* Map filler phones to silence if not found */
571  if (allphs->ci2lmwid[i] == NGRAM_INVALID_WID
572  && bin_mdef_ciphone_str(mdef, i))
573  allphs->ci2lmwid[i] =
574  ngram_wid(allphs->lm,
575  (char *) bin_mdef_ciphone_str(mdef,
576  mdef_silphone
577  (mdef)));
578  }
579  }
580  else {
581  E_WARN
582  ("Failed to load language model specified in -allphone, doing unconstrained phone-loop decoding\n");
583  allphs->inspen =
584  (int32) (logmath_log
585  (acmod->lmath, cmd_ln_float32_r(config, "-pip"))
586  * allphs->lw) >> SENSCR_SHIFT;
587  }
588 
589  allphs->n_tot_frame = 0;
590  allphs->frame = -1;
591  allphs->segments = NULL;
592 
593  /* Get search pruning parameters */
594  allphs->beam
595  =
596  (int32) logmath_log(acmod->lmath,
597  cmd_ln_float64_r(config, "-beam"))
598  >> SENSCR_SHIFT;
599  allphs->pbeam
600  =
601  (int32) logmath_log(acmod->lmath,
602  cmd_ln_float64_r(config, "-pbeam"))
603  >> SENSCR_SHIFT;
604 
605  /* LM related weights/penalties */
606  allphs->history = blkarray_list_init();
607 
608  /* Acoustic score scale for posterior probabilities. */
609  allphs->ascale = 1.0 / cmd_ln_float32_r(config, "-ascale");
610 
611  E_INFO("Allphone(beam: %d, pbeam: %d)\n", allphs->beam, allphs->pbeam);
612 
613  ptmr_init(&allphs->perf);
614 
615  return (ps_search_t *) allphs;
616 }
617 
618 int
619 allphone_search_reinit(ps_search_t * search, dict_t * dict,
620  dict2pid_t * d2p)
621 {
622  allphone_search_t *allphs = (allphone_search_t *) search;
623 
624  /* Free old dict2pid, dict */
625  ps_search_base_reinit(search, dict, d2p);
626 
627  if (!allphs->lm) {
628  E_WARN
629  ("-lm argument missing; doing unconstrained phone-loop decoding\n");
630  allphs->inspen =
631  (int32) (logmath_log
632  (search->acmod->lmath,
633  cmd_ln_float32_r(search->config,
634  "-pip")) *
635  allphs->lw) >> SENSCR_SHIFT;
636  }
637 
638  return 0;
639 }
640 
641 void
642 allphone_search_free(ps_search_t * search)
643 {
644  allphone_search_t *allphs = (allphone_search_t *) search;
645 
646 
647  double n_speech = (double)allphs->n_tot_frame
648  / cmd_ln_int32_r(ps_search_config(allphs), "-frate");
649 
650  E_INFO("TOTAL fwdflat %.2f CPU %.3f xRT\n",
651  allphs->perf.t_tot_cpu,
652  allphs->perf.t_tot_cpu / n_speech);
653  E_INFO("TOTAL fwdflat %.2f wall %.3f xRT\n",
654  allphs->perf.t_tot_elapsed,
655  allphs->perf.t_tot_elapsed / n_speech);
656 
657  ps_search_base_free(search);
658 
659  hmm_context_free(allphs->hmmctx);
660  phmm_free(allphs);
661  if (allphs->lm)
662  ngram_model_free(allphs->lm);
663  if (allphs->ci2lmwid)
664  ckd_free(allphs->ci2lmwid);
665 
666  blkarray_list_free(allphs->history);
667 
668  ckd_free(allphs);
669 }
670 
671 int
672 allphone_search_start(ps_search_t * search)
673 {
674  allphone_search_t *allphs;
675  bin_mdef_t *mdef;
676  s3cipid_t ci;
677  phmm_t *p;
678 
679  allphs = (allphone_search_t *) search;
680  mdef = search->acmod->mdef;
681 
682  /* Reset all HMMs. */
683  for (ci = 0; ci < bin_mdef_n_ciphone(mdef); ci++) {
684  for (p = allphs->ci_phmm[(unsigned) ci]; p; p = p->next) {
685  hmm_clear(&(p->hmm));
686  }
687  }
688 
689  allphs->n_hmm_eval = 0;
690  allphs->n_sen_eval = 0;
691 
692  /* Free history nodes, if any */
693  blkarray_list_reset(allphs->history);
694 
695  /* Initialize start state of the SILENCE PHMM */
696  allphs->frame = 0;
697  ci = bin_mdef_silphone(mdef);
698  if (NOT_S3CIPID(ci))
699  E_FATAL("Cannot find CI-phone %s\n", S3_SILENCE_CIPHONE);
700  for (p = allphs->ci_phmm[ci]; p && (p->pid != ci); p = p->next);
701  if (!p)
702  E_FATAL("Cannot find HMM for %s\n", S3_SILENCE_CIPHONE);
703  hmm_enter(&(p->hmm), 0, 0, allphs->frame);
704 
705  ptmr_reset(&allphs->perf);
706  ptmr_start(&allphs->perf);
707 
708  return 0;
709 }
710 
711 static void
712 allphone_search_sen_active(allphone_search_t * allphs)
713 {
714  acmod_t *acmod;
715  bin_mdef_t *mdef;
716  phmm_t *p;
717  int32 ci;
718 
719  acmod = ps_search_acmod(allphs);
720  mdef = acmod->mdef;
721 
722  acmod_clear_active(acmod);
723  for (ci = 0; ci < bin_mdef_n_ciphone(mdef); ci++)
724  for (p = allphs->ci_phmm[ci]; p; p = p->next)
725  if (hmm_frame(&(p->hmm)) == allphs->frame)
726  acmod_activate_hmm(acmod, &(p->hmm));
727 }
728 
729 int
730 allphone_search_step(ps_search_t * search, int frame_idx)
731 {
732  int32 bestscr, frame_history_start;
733  const int16 *senscr;
734  allphone_search_t *allphs = (allphone_search_t *) search;
735  acmod_t *acmod = search->acmod;
736 
737  if (!acmod->compallsen)
738  allphone_search_sen_active(allphs);
739  senscr = acmod_score(acmod, &frame_idx);
740  allphs->n_sen_eval += acmod->n_senone_active;
741  bestscr = phmm_eval_all(allphs, senscr);
742 
743  frame_history_start = blkarray_list_n_valid(allphs->history);
744  phmm_exit(allphs, bestscr);
745  phmm_trans(allphs, bestscr, frame_history_start);
746 
747  allphs->frame++;
748 
749  return 0;
750 }
751 
752 static int32
753 ascore(allphone_search_t * allphs, history_t * h)
754 {
755  int32 score = h->score;
756 
757  if (h->hist > 0) {
758  history_t *pred = blkarray_list_get(allphs->history, h->hist);
759  score -= pred->score;
760  }
761 
762  return score - h->tscore;
763 }
764 
765 static void
766 allphone_clear_segments(allphone_search_t * allphs)
767 {
768  gnode_t *gn;
769  for (gn = allphs->segments; gn; gn = gn->next) {
770  ckd_free(gnode_ptr(gn));
771  }
772  glist_free(allphs->segments);
773  allphs->segments = NULL;
774 }
775 
776 static void
777 allphone_backtrace(allphone_search_t * allphs, int32 f)
778 {
779  int32 best, hist_idx, best_idx;
780  int32 frm, last_frm;
781  history_t *h;
782  phseg_t *s;
783 
784  /* Clear old list */
785  allphone_clear_segments(allphs);
786 
787  frm = last_frm = f;
788  /* Find the first history entry for the requested frame */
789  hist_idx = blkarray_list_n_valid(allphs->history) - 1;
790  while (hist_idx > 0) {
791  h = blkarray_list_get(allphs->history, hist_idx);
792  if (h->ef <= f) {
793  frm = last_frm = h->ef;
794  break;
795  }
796  hist_idx--;
797  }
798 
799  if (hist_idx < 0)
800  return;
801 
802  /* Find bestscore */
803  best = (int32) 0x80000000;
804  best_idx = -1;
805  while (frm == last_frm && hist_idx > 0) {
806  h = blkarray_list_get(allphs->history, hist_idx);
807  frm = h->ef;
808  if (h->score > best && frm == last_frm) {
809  best = h->score;
810  best_idx = hist_idx;
811  }
812  hist_idx--;
813  }
814 
815  if (best_idx < 0)
816  return;
817 
818  /* Backtrace */
819  while (best_idx > 0) {
820  h = blkarray_list_get(allphs->history, best_idx);
821  s = (phseg_t *) ckd_calloc(1, sizeof(phseg_t));
822  s->ci = h->phmm->ci;
823  s->sf =
824  (h->hist >
825  0) ? ((history_t *) blkarray_list_get(allphs->history,
826  h->hist))->ef + 1 : 0;
827  s->ef = h->ef;
828  s->score = ascore(allphs, h);
829  s->tscore = h->tscore;
830  allphs->segments = glist_add_ptr(allphs->segments, s);
831 
832  best_idx = h->hist;
833  }
834 
835  return;
836 }
837 
838 int
839 allphone_search_finish(ps_search_t * search)
840 {
841  allphone_search_t *allphs;
842  int32 cf, n_hist;
843 
844  allphs = (allphone_search_t *) search;
845 
846  allphs->n_tot_frame += allphs->frame;
847  n_hist = blkarray_list_n_valid(allphs->history);
848  E_INFO
849  ("%d frames, %d HMMs (%d/fr), %d senones (%d/fr), %d history entries (%d/fr)\n",
850  allphs->frame, allphs->n_hmm_eval,
851  (allphs->frame > 0) ? allphs->n_hmm_eval / allphs->frame : 0,
852  allphs->n_sen_eval,
853  (allphs->frame > 0) ? allphs->n_sen_eval / allphs->frame : 0,
854  n_hist, (allphs->frame > 0) ? n_hist / allphs->frame : 0);
855 
856  /* Now backtrace. */
857  allphone_backtrace(allphs, allphs->frame - 1);
858 
859  /* Print out some statistics. */
860  ptmr_stop(&allphs->perf);
861  /* This is the number of frames processed. */
862  cf = ps_search_acmod(allphs)->output_frame;
863  if (cf > 0) {
864  double n_speech = (double) (cf + 1)
865  / cmd_ln_int32_r(ps_search_config(allphs), "-frate");
866  E_INFO("allphone %.2f CPU %.3f xRT\n",
867  allphs->perf.t_cpu, allphs->perf.t_cpu / n_speech);
868  E_INFO("allphone %.2f wall %.3f xRT\n",
869  allphs->perf.t_elapsed, allphs->perf.t_elapsed / n_speech);
870  }
871 
872 
873  return 0;
874 }
875 
876 char const *
877 allphone_search_hyp(ps_search_t * search, int32 * out_score,
878  int32 * out_is_final)
879 {
880  allphone_search_t *allphs;
881  phseg_t *p;
882  gnode_t *gn;
883  const char *phone_str;
884  bin_mdef_t *mdef;
885  int len, hyp_idx, phone_idx;
886 
887  allphs = (allphone_search_t *) search;
888  mdef = search->acmod->mdef;
889 
890  /* Create hypothesis */
891  if (search->hyp_str)
892  ckd_free(search->hyp_str);
893  search->hyp_str = NULL;
894 
895  allphone_backtrace(allphs, allphs->frame - 1);
896  if (allphs->segments == NULL) {
897  return NULL;
898  }
899 
900  len = glist_count(allphs->segments) * 10; // maximum length of one phone with spacebar
901 
902  search->hyp_str = (char *) ckd_calloc(len, sizeof(*search->hyp_str));
903  hyp_idx = 0;
904  for (gn = allphs->segments; gn; gn = gn->next) {
905  p = gnode_ptr(gn);
906  phone_str = bin_mdef_ciphone_str(mdef, p->ci);
907  phone_idx = 0;
908  while (phone_str[phone_idx] != '\0')
909  search->hyp_str[hyp_idx++] = phone_str[phone_idx++];
910  search->hyp_str[hyp_idx++] = ' ';
911  }
912  search->hyp_str[--hyp_idx] = '\0';
913  E_INFO("Hyp: %s\n", search->hyp_str);
914  return search->hyp_str;
915 }
Internal implementation of PocketSphinx decoder.
History (paths) information at any point in allphone Viterbi search.
struct phmm_s * next
Next unique PHMM for same parent basephone.
Base structure for search module.
ptmr_t perf
Performance counter.
void hmm_init(hmm_context_t *ctx, hmm_t *hmm, int mpx, int ssid, int tmatid)
Populate a previously-allocated HMM structure, allocating internal data.
Definition: hmm.c:89
s3cipid_t ci
Parent basephone for this PHMM.
int32 n_sen_eval
Total senones evaluated this utt.
const char * bin_mdef_ciphone_str(bin_mdef_t *m, int32 ci)
In: ciphone id for which name wanted.
Definition: bin_mdef.c:737
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
Re-initialize base structure with new dictionary.
int n_senone_active
Number of active GMMs.
Definition: acmod.h:169
acmod_t * acmod
Acoustic model.
An individual HMM among the HMM search space.
uint8 *** tp
The transition matrices; kept in the same scale as acoustic scores; tp[tmatid][from-state][to-state]...
Definition: tmat.h:107
ps_segfuncs_t * vt
V-table of seg methods.
logmath_t * lmath
Log-math computation.
Definition: acmod.h:151
uint16 ** sseq
Unique senone sequences (2D array built at load time)
Definition: bin_mdef.h:134
void hmm_deinit(hmm_t *hmm)
Free an HMM structure, releasing internal data (but not the HMM structure itself).
Definition: hmm.c:111
int32 lscr
Language model score.
s3pid_t pid
Phone id (temp.
int32 tmat
Transition matrix ID.
Definition: bin_mdef.h:75
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
Definition: acmod.c:1233
int32 hist
Previous history entry.
bitvec_t * lc
Set (bit-vector) of left context phones seen for this PHMM.
int32 score
Path score for this path.
#define BAD_S3CIPID
Ci phone id.
Definition: s3types.h:64
char const * word
Word string (pointer into dictionary hash)
ps_search_t * search
Search object from whence this came.
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, const char *type, const char *name, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
int32 * ci2lmwid
Mapping of CI phones to LM word IDs.
int32 inspen
Language weights.
int32 hmm_vit_eval(hmm_t *hmm)
Viterbi evaluation of given HMM.
Definition: hmm.c:789
struct mdef_entry_s::@0::@1 ci
< CI phone information - attributes (just "filler" for now)
frame_idx_t ef
End frame.
uint8 compallsen
Compute all senones?
Definition: acmod.h:188
bitvec_t * rc
Set (bit-vector) of right context phones seen for this PHMM.
hmm_context_t * hmm_context_init(int32 n_emit_state, uint8 **const *tp, int16 const *senscore, uint16 *const *sseq)
Create an HMM context.
Definition: hmm.c:56
void ps_search_base_free(ps_search_t *search)
Free search.
Implementation of allphone search structure.
cmd_ln_t * config
Configuration.
Definition: acmod.h:150
int16 s3cipid_t
Size definitions for more semantially meaningful units.
Definition: s3types.h:63
#define WORST_SCORE
Large "bad" score.
Definition: hmm.h:84
tmat_t * tmat
Transition matrices.
Definition: acmod.h:160
frame_idx_t ef
End frame.
int32 ascr
Acoustic score.
int32 tscore
Transition score for this path.
hmm_context_t * hmmctx
HMM context.
void hmm_enter(hmm_t *h, int32 score, int32 histid, int frame)
Enter an HMM with the given path score and history ID.
Definition: hmm.c:201
Segment iterator over list of phseg.
void acmod_clear_active(acmod_t *acmod)
Clear set of active senones.
Definition: acmod.c:1217
#define S3_SILENCE_CIPHONE
Hard-coded silence CI phone name.
Definition: mdef.h:81
#define hmm_context_set_senscore(ctx, senscr)
Change the senone score array for a context.
Definition: hmm.h:227
#define SENSCR_SHIFT
Shift count for senone scores.
Definition: hmm.h:73
a structure for a dictionary.
Definition: dict.h:76
float32 ascale
Acoustic score scale for posterior probabilities.
int32 n_tot_frame
Total number of frames processed.
ngram_model_t * lm
Ngram model set.
Word graph structure used in bestpath/nbest search.
hmm_t hmm
Base HMM structure.
void hmm_clear(hmm_t *h)
Reset the states of the HMM to the invalid condition.
Definition: hmm.c:183
char * hyp_str
Current hypothesis string.
int32 ci_only
Use context-independent phones for decoding.
int32 n_hmm_eval
Total HMMs evaluated this utt.
struct plink_s * succlist
List of predecessor PHMM nodes.
Models a single unique pair.
phmm_t * phmm
PHMM ending this path.
void hmm_context_free(hmm_context_t *ctx)
Free an HMM context.
Definition: hmm.c:80
int32 pbeam
Effective beams after applying beam_factor.
bin_mdef_t * mdef
Model definition.
Definition: acmod.h:159
V-table for search algorithm.
mdef_entry_t * phone
All phone structures.
Definition: bin_mdef.h:133
blkarray_list_t * history
List of history nodes allocated in each frame.
Base structure for hypothesis segmentation iterator.
int32 n_ciphone
Number of base (CI) phones.
Definition: bin_mdef.h:119
cmd_ln_t * config
Configuration.
phmm_t ** ci_phmm
PHMM lists (for each CI phone)
Acoustic model structure.
Definition: acmod.h:148
Phone level segmentation information.
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:84
frame_idx_t sf
Start frame.
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
Definition: acmod.c:1126
frame_idx_t frame
Current frame.