47 #include <sphinxbase/ckd_alloc.h>
48 #include <sphinxbase/listelem_alloc.h>
49 #include <sphinxbase/err.h>
59 static int ngram_search_step(
ps_search_t *search,
int frame_idx);
60 static int ngram_search_finish(
ps_search_t *search);
62 static char const *ngram_search_hyp(
ps_search_t *search, int32 *out_score, int32 *out_is_final);
63 static int32 ngram_search_prob(
ps_search_t *search);
75 ngram_search_seg_iter,
78 static ngram_model_t *default_lm;
87 n_words = ps_search_n_words(ngs);
88 words = (
char const**)ckd_calloc(n_words,
sizeof(*words));
90 for (i = 0; i < n_words; ++i)
91 words[i] = dict_wordstr(ps_search_dict(ngs), i);
92 ngram_model_set_map_words(ngs->
lmset, words, n_words);
102 config = ps_search_config(ngs);
103 acmod = ps_search_acmod(ngs);
106 ngs->beam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-beam"))>>
SENSCR_SHIFT;
107 ngs->wbeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-wbeam"))>>
SENSCR_SHIFT;
108 ngs->pbeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-pbeam"))>>
SENSCR_SHIFT;
109 ngs->lpbeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-lpbeam"))>>
SENSCR_SHIFT;
110 ngs->lponlybeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-lponlybeam"))>>
SENSCR_SHIFT;
111 ngs->fwdflatbeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-fwdflatbeam"))>>
SENSCR_SHIFT;
112 ngs->fwdflatwbeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-fwdflatwbeam"))>>
SENSCR_SHIFT;
115 ngs->maxwpf = cmd_ln_int32_r(config,
"-maxwpf");
116 ngs->maxhmmpf = cmd_ln_int32_r(config,
"-maxhmmpf");
119 ngs->wip = logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-wip")) >>
SENSCR_SHIFT;
120 ngs->nwpen = logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-nwpen")) >>
SENSCR_SHIFT;
121 ngs->pip = logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-pip")) >>
SENSCR_SHIFT;
122 ngs->silpen = ngs->pip
123 + (logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-silprob"))>>
SENSCR_SHIFT);
124 ngs->fillpen = ngs->pip
125 + (logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-fillprob"))>>
SENSCR_SHIFT);
128 ngs->fwdflat_fwdtree_lw_ratio =
129 cmd_ln_float32_r(config,
"-fwdflatlw")
130 / cmd_ln_float32_r(config,
"-lw");
131 ngs->bestpath_fwdtree_lw_ratio =
132 cmd_ln_float32_r(config,
"-bestpathlw")
133 / cmd_ln_float32_r(config,
"-lw");
136 ngs->
ascale = 1.0 / cmd_ln_float32_r(config,
"-ascale");
148 static char *lmname =
"default";
153 cmd_ln_boolean_r(config,
"-fwdtree"));
155 ngs = ckd_calloc(1,
sizeof(*ngs));
156 ps_search_init(&ngs->base, &ngram_funcs, PS_SEARCH_TYPE_NGRAM, name, config, acmod, dict, d2p);
160 if (ngs->
hmmctx == NULL) {
161 ps_search_free(ps_search_base(ngs));
169 ngram_search_calc_beams(ngs);
174 ngs->word_lat_idx = ckd_calloc(
dict_size(dict),
175 sizeof(*ngs->word_lat_idx));
177 ngs->last_ltrans = ckd_calloc(
dict_size(dict),
178 sizeof(*ngs->last_ltrans));
182 ngs->bp_table_size = cmd_ln_int32_r(config,
"-latsize");
183 ngs->bp_table = ckd_calloc(ngs->bp_table_size,
184 sizeof(*ngs->bp_table));
186 ngs->bscore_stack_size = ngs->bp_table_size * 20;
187 ngs->bscore_stack = ckd_calloc(ngs->bscore_stack_size,
188 sizeof(*ngs->bscore_stack));
191 sizeof(*ngs->bp_table_idx));
198 ngs->
lmset = ngram_model_set_init(config, &lm, &lmname, NULL, 1);
202 if (ngram_wid(ngs->
lmset, S3_FINISH_WORD) ==
203 ngram_unknown_wid(ngs->
lmset))
205 E_ERROR(
"Language model/set does not contain </s>, "
206 "recognition will fail\n");
211 ngram_search_update_widmap(ngs);
214 if (cmd_ln_boolean_r(config,
"-fwdtree")) {
217 ngs->fwdtree_perf.name =
"fwdtree";
218 ptmr_init(&ngs->fwdtree_perf);
220 if (cmd_ln_boolean_r(config,
"-fwdflat")) {
223 ngs->fwdflat_perf.name =
"fwdflat";
224 ptmr_init(&ngs->fwdflat_perf);
226 if (cmd_ln_boolean_r(config,
"-bestpath")) {
227 ngs->bestpath = TRUE;
228 ngs->bestpath_perf.name =
"bestpath";
229 ptmr_init(&ngs->bestpath_perf);
251 ckd_free(ngs->word_lat_idx);
253 ckd_free(ngs->last_ltrans);
255 ngs->word_lat_idx = ckd_calloc(search->
n_words,
sizeof(*ngs->word_lat_idx));
257 ngs->last_ltrans = ckd_calloc(search->
n_words,
sizeof(*ngs->last_ltrans));
259 = ckd_calloc_2d(2, search->
n_words,
266 if (ngs->
lmset == NULL)
270 ngram_search_calc_beams(ngs);
273 ngram_search_update_widmap(ngs);
298 double n_speech = (double)ngs->n_tot_frame
299 / cmd_ln_int32_r(ps_search_config(ngs),
"-frate");
301 E_INFO(
"TOTAL bestpath %.2f CPU %.3f xRT\n",
302 ngs->bestpath_perf.t_tot_cpu,
303 ngs->bestpath_perf.t_tot_cpu / n_speech);
304 E_INFO(
"TOTAL bestpath %.2f wall %.3f xRT\n",
305 ngs->bestpath_perf.t_tot_elapsed,
306 ngs->bestpath_perf.t_tot_elapsed / n_speech);
314 ngram_model_free(ngs->
lmset);
317 ckd_free(ngs->word_lat_idx);
319 ckd_free(ngs->bp_table);
320 ckd_free(ngs->bscore_stack);
321 if (ngs->bp_table_idx != NULL)
322 ckd_free(ngs->bp_table_idx - 1);
324 ckd_free(ngs->last_ltrans);
333 ngs->bp_table_idx = ckd_realloc(ngs->bp_table_idx - 1,
335 *
sizeof(*ngs->bp_table_idx));
343 ngs->bp_table_idx[frame_idx] = ngs->bpidx;
353 ent = ngs->bp_table + bp;
354 if (ent->
bp == NO_BP)
357 prev = ngs->bp_table + ent->
bp;
366 ent->
real_wid = dict_basewid(ps_search_dict(ngs),
372 ent->
real_wid = dict_basewid(ps_search_dict(ngs), ent->
wid);
380 #define NGRAM_HISTORY_LONG_WORD 2000
384 int32 w, int32 score, int32 path, int32 rc)
391 bp = ngs->word_lat_idx[w];
394 if (frame_idx - ngs->bp_table[path].
frame > NGRAM_HISTORY_LONG_WORD) {
395 E_WARN(
"Word '%s' survived for %d frames, potential overpruning\n", dict_wordstr(ps_search_dict(ngs), w),
396 frame_idx - ngs->bp_table[path].
frame);
404 if (ngs->bp_table[bp].
bp != path) {
405 int32 bplh[2], newlh[2];
409 E_DEBUG(2,(
"Updating path history %d => %d frame %d\n",
410 ngs->bp_table[bp].
bp, path, frame_idx));
411 bplh[0] = ngs->bp_table[bp].
bp == -1
413 bplh[1] = ngs->bp_table[bp].
bp == -1
414 ? -1 : ngs->bp_table[ngs->bp_table[bp].
bp].
real_wid;
415 newlh[0] = path == -1
417 newlh[1] = path == -1
418 ? -1 : ngs->bp_table[path].
real_wid;
421 if (bplh[0] != newlh[0] || bplh[1] != newlh[1]) {
425 E_DEBUG(1, (
"Updating language model state %s,%s => %s,%s frame %d\n",
426 dict_wordstr(ps_search_dict(ngs), bplh[0]),
427 dict_wordstr(ps_search_dict(ngs), bplh[1]),
428 dict_wordstr(ps_search_dict(ngs), newlh[0]),
429 dict_wordstr(ps_search_dict(ngs), newlh[1]),
431 set_real_wid(ngs, bp);
433 ngs->bp_table[bp].
bp = path;
435 ngs->bp_table[bp].
score = score;
440 if (ngs->bp_table[bp].
s_idx != -1)
441 ngs->bscore_stack[ngs->bp_table[bp].
s_idx + rc] = score;
448 if (ngs->bpidx == NO_BP) {
449 E_ERROR(
"No entries in backpointer table!");
454 if (ngs->bpidx >= ngs->bp_table_size) {
455 ngs->bp_table_size *= 2;
456 ngs->bp_table = ckd_realloc(ngs->bp_table,
458 *
sizeof(*ngs->bp_table));
459 E_INFO(
"Resized backpointer table to %d entries\n", ngs->bp_table_size);
461 if (ngs->bss_head >= ngs->bscore_stack_size
462 - bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef)) {
463 ngs->bscore_stack_size *= 2;
464 ngs->bscore_stack = ckd_realloc(ngs->bscore_stack,
465 ngs->bscore_stack_size
466 *
sizeof(*ngs->bscore_stack));
467 E_INFO(
"Resized score stack to %d entries\n", ngs->bscore_stack_size);
470 ngs->word_lat_idx[w] = ngs->bpidx;
471 be = &(ngs->bp_table[ngs->bpidx]);
473 be->
frame = frame_idx;
476 be->
s_idx = ngs->bss_head;
478 assert(path != ngs->bpidx);
482 be->
last_phone = dict_last_phone(ps_search_dict(ngs),w);
483 if (dict_is_single_phone(ps_search_dict(ngs), w)) {
489 be->
last2_phone = dict_second_last_phone(ps_search_dict(ngs),w);
494 for (i = 0; i < rcsize; ++i)
495 ngs->bscore_stack[ngs->bss_head + i] =
WORST_SCORE;
497 ngs->bscore_stack[ngs->bss_head + rc] = score;
498 set_real_wid(ngs, ngs->bpidx);
501 ngs->bss_head += rcsize;
517 if (frame_idx == -1 || frame_idx >= ngs->
n_frame)
519 end_bpidx = ngs->bp_table_idx[frame_idx];
525 while (frame_idx >= 0 && ngs->bp_table_idx[frame_idx] == end_bpidx)
532 assert(end_bpidx < ngs->bp_table_size);
533 for (bp = ngs->bp_table_idx[frame_idx]; bp < end_bpidx; ++bp) {
534 if (ngs->bp_table[bp].
wid == ps_search_finish_wid(ngs)
536 best_score = ngs->bp_table[bp].
score;
539 if (ngs->bp_table[bp].
wid == ps_search_finish_wid(ngs))
543 if (out_best_score) {
544 *out_best_score = best_score;
547 *out_is_final = (ngs->bp_table[bp].
wid == ps_search_finish_wid(ngs));
565 while (bp != NO_BP) {
566 bptbl_t *be = &ngs->bp_table[bp];
569 len += strlen(dict_basestr(ps_search_dict(ngs), be->
wid)) + 1;
577 base->
hyp_str = ckd_calloc(1, len);
581 while (bp != NO_BP) {
582 bptbl_t *be = &ngs->bp_table[bp];
587 len = strlen(dict_basestr(ps_search_dict(ngs), be->
wid));
589 memcpy(c, dict_basestr(ps_search_dict(ngs), be->
wid), len);
605 int32 i, tmatid, ciphone;
609 assert(!dict_is_single_phone(ps_search_dict(ngs), w));
610 ciphone = dict_last_phone(ps_search_dict(ngs),w);
613 dict_second_last_phone(ps_search_dict(ngs),w));
614 tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ciphone);
616 if ((hmm == NULL) || (hmm_nonmpx_ssid(&hmm->
hmm) != rssid->
ssid[0])) {
624 E_DEBUG(3,(
"allocated rc_id 0 ssid %d ciphone %d lc %d word %s\n",
626 dict_second_last_phone(ps_search_dict(ngs),w),
627 dict_wordstr(ps_search_dict(ngs),w)));
629 for (i = 1; i < rssid->
n_ssid; ++i) {
630 if ((hmm->
next == NULL) || (hmm_nonmpx_ssid(&hmm->
next->
hmm) != rssid->
ssid[i])) {
639 E_DEBUG(3,(
"allocated rc_id %d ssid %d ciphone %d lc %d word %s\n",
641 dict_second_last_phone(ps_search_dict(ngs),w),
642 dict_wordstr(ps_search_dict(ngs),w)));
654 for (hmm = ngs->
word_chan[w]; hmm; hmm = thmm) {
680 return ngs->bscore_stack[pbe->
s_idx + rssid->
cimap[rcphone]];
689 int32 *out_ascr, int32 *out_lscr)
695 if (be->
bp == NO_BP) {
696 *out_ascr = be->
score;
702 pbe = ngs->bp_table + be->
bp;
704 dict_first_phone(ps_search_dict(ngs),be->
wid));
711 if (be->
wid == ps_search_silence_wid(ngs)) {
712 *out_lscr = ngs->silpen;
715 *out_lscr = ngs->fillpen;
719 *out_lscr = ngram_tg_score(ngs->
lmset,
724 *out_lscr = *out_lscr * lwf;
726 *out_ascr = be->
score - start_score - *out_lscr;
735 ngram_model_flush(ngs->
lmset);
738 else if (ngs->fwdflat)
746 ngram_search_step(
ps_search_t *search,
int frame_idx)
752 else if (ngs->fwdflat)
762 E_INFO(
"Backpointer table (%d entries):\n", ngs->bpidx);
763 for (i = 0; i < ngs->bpidx; ++i) {
764 bptbl_t *bpe = ngs->bp_table + i;
767 E_INFO_NOFN(
"%-5d %-10s start %-3d end %-3d score %-8d bp %-3d real_wid %-5d prev_real_wid %-5d",
768 i, dict_wordstr(ps_search_dict(ngs), bpe->
wid),
770 ? 0 : ngs->bp_table[bpe->
bp].
frame + 1),
781 for (j = 0; j < rcsize; ++j)
783 E_INFOCONT(
" %d", bpe->
score - ngs->bscore_stack[bpe->
s_idx + j]);
794 ngs->n_tot_frame += ngs->
n_frame;
808 while (ps_search_acmod(ngs)->n_feat_frame > 0) {
820 else if (ngs->fwdflat) {
830 ngram_search_bestpath(
ps_search_t *search, int32 *out_score,
int backward)
836 ngs->bestpath_fwdtree_lw_ratio,
842 if (search->
post == 0)
852 ngram_search_hyp(
ps_search_t *search, int32 *out_score, int32 *out_is_final)
857 if (ngs->bestpath && ngs->done) {
863 ptmr_reset(&ngs->bestpath_perf);
864 ptmr_start(&ngs->bestpath_perf);
867 if ((link = ngram_search_bestpath(search, out_score, FALSE)) == NULL)
870 ptmr_stop(&ngs->bestpath_perf);
872 / cmd_ln_int32_r(ps_search_config(ngs),
"-frate");
873 E_INFO(
"bestpath %.2f CPU %.3f xRT\n",
874 ngs->bestpath_perf.t_cpu,
875 ngs->bestpath_perf.t_cpu / n_speech);
876 E_INFO(
"bestpath %.2f wall %.3f xRT\n",
877 ngs->bestpath_perf.t_elapsed,
878 ngs->bestpath_perf.t_elapsed / n_speech);
894 ngram_search_bp2itor(
ps_seg_t *seg,
int bp)
899 be = &ngs->bp_table[bp];
900 pbe = be->
bp == -1 ? NULL : &ngs->bp_table[be->
bp];
901 seg->
word = dict_wordstr(ps_search_dict(ngs), be->
wid);
903 seg->
sf = pbe ? pbe->
frame + 1 : 0;
916 dict_first_phone(ps_search_dict(ngs), be->
wid));
918 if (be->
wid == ps_search_silence_wid(ngs)) {
919 seg->
lscr = ngs->silpen;
922 seg->
lscr = ngs->fillpen;
941 ckd_free(itor->
bpidx);
951 ngram_bp_seg_free(seg);
955 ngram_search_bp2itor(seg, itor->
bpidx[itor->
cur]);
974 itor = ckd_calloc(1,
sizeof(*itor));
975 itor->
base.
vt = &ngram_bp_segfuncs;
980 while (bp != NO_BP) {
981 bptbl_t *be = &ngs->bp_table[bp];
992 while (bp != NO_BP) {
993 bptbl_t *be = &ngs->bp_table[bp];
994 itor->
bpidx[cur] = bp;
1006 ngram_search_seg_iter(
ps_search_t *search, int32 *out_score)
1011 if (ngs->bestpath && ngs->done) {
1017 ptmr_reset(&ngs->bestpath_perf);
1018 ptmr_start(&ngs->bestpath_perf);
1021 if ((link = ngram_search_bestpath(search, out_score, TRUE)) == NULL)
1024 ngs->bestpath_fwdtree_lw_ratio);
1025 ptmr_stop(&ngs->bestpath_perf);
1027 / cmd_ln_int32_r(ps_search_config(ngs),
"-frate");
1028 E_INFO(
"bestpath %.2f CPU %.3f xRT\n",
1029 ngs->bestpath_perf.t_cpu,
1030 ngs->bestpath_perf.t_cpu / n_speech);
1031 E_INFO(
"bestpath %.2f wall %.3f xRT\n",
1032 ngs->bestpath_perf.t_elapsed,
1033 ngs->bestpath_perf.t_elapsed / n_speech);
1041 return ngram_search_bp_iter(ngs, bpidx,
1043 (ngs->done && ngs->fwdflat)
1044 ? ngs->fwdflat_fwdtree_lw_ratio : 1.0);
1056 if (ngs->bestpath && ngs->done) {
1062 if ((link = ngram_search_bestpath(search, NULL, TRUE)) == NULL)
1064 return search->
post;
1078 for (i = 0, bp_ptr = ngs->bp_table; i < ngs->bpidx; ++i, ++bp_ptr) {
1086 sf = (bp_ptr->
bp < 0) ? 0 : ngs->bp_table[bp_ptr->
bp].
frame + 1;
1090 assert(ef < dag->n_frames);
1092 if ((wid == ps_search_finish_wid(ngs)) && (ef < dag->
n_frames - 1))
1097 && (!ngram_model_set_known_wid(ngs->
lmset,
1098 dict_basewid(ps_search_dict(ngs), wid))))
1102 for (node = dag->
nodes; node; node = node->
next) {
1103 if ((node->
wid == wid) && (node->
sf == sf))
1115 node->
fef = node->
lef = i;
1136 for (node = dag->
nodes; node; node = node->
next) {
1137 if ((node->
wid == ps_search_start_wid(ngs)) && (node->
sf == 0))
1142 E_ERROR(
"Couldn't find <s> in first frame\n");
1152 int32 ef, bestbp, bp, bestscore;
1155 for (node = dag->
nodes; node; node = node->
next) {
1156 int32 lef = ngs->bp_table[node->
lef].
frame;
1157 if ((node->
wid == ps_search_finish_wid(ngs))
1168 ef >= 0 && ngs->bp_table_idx[ef] == ngs->bpidx;
1171 E_ERROR(
"Empty backpointer table: can not build DAG.\n");
1178 for (bp = ngs->bp_table_idx[ef]; bp < ngs->bp_table_idx[ef + 1]; ++bp) {
1179 int32 n_used, l_scr, wid, prev_wid;
1183 if (wid == ps_search_finish_wid(ngs)) {
1187 l_scr = ngram_tg_score(ngs->
lmset, ps_search_finish_wid(ngs),
1189 l_scr = l_scr * lwf;
1191 bestscore = ngs->bp_table[bp].
score + l_scr;
1195 if (bestbp == NO_BP) {
1196 E_ERROR(
"No word exits found in last frame (%d), assuming no recognition\n", ef);
1199 E_INFO(
"</s> not found in last frame, using %s.%d instead\n",
1200 dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].
wid), ef);
1203 for (node = dag->
nodes; node; node = node->
next) {
1204 if (node->
lef == bestbp)
1209 E_ERROR(
"Failed to find DAG node corresponding to %s\n",
1210 dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].
wid));
1220 int32 i, score, ascr, lscr;
1224 int min_endfr, nlink;
1228 min_endfr = cmd_ln_int32_r(ps_search_config(search),
"-min_endfr");
1245 lwf = ngs->fwdflat ? ngs->fwdflat_fwdtree_lw_ratio : 1.0;
1246 create_dag_nodes(ngs, dag);
1247 if ((dag->
start = find_start_node(ngs, dag)) == NULL)
1249 if ((dag->
end = find_end_node(ngs, dag, ngs->bestpath_fwdtree_lw_ratio)) == NULL)
1251 E_INFO(
"lattice start node %s.%d end node %s.%d\n",
1255 ngram_compute_seg_score(ngs, ngs->bp_table + dag->
end->
lef, lwf,
1279 E_INFO(
"Eliminated %d nodes before end node\n", i);
1282 for (to = dag->
end; to; to = to->
next) {
1291 fef = ngs->bp_table[to->
fef].
frame;
1292 lef = ngs->bp_table[to->
lef].
frame;
1293 if (to != dag->
end && lef - fef < min_endfr) {
1299 for (from = to->
next; from; from = from->
next) {
1302 fef = ngs->bp_table[from->
fef].
frame;
1303 lef = ngs->bp_table[from->
lef].
frame;
1305 if ((to->
sf <= fef) || (to->
sf > lef + 1))
1307 if (lef - fef < min_endfr) {
1314 from_bpe = ngs->bp_table + i;
1315 for (; i <= from->
lef; i++, from_bpe++) {
1316 if (from_bpe->
wid != from->
wid)
1318 if (from_bpe->
frame >= to->
sf - 1)
1322 if ((i > from->
lef) || (from_bpe->
frame != to->
sf - 1))
1327 ngram_compute_seg_score(ngs, from_bpe, lwf,
1333 dict_first_phone(ps_search_dict(ngs), to->
wid));
1335 if (score == WORST_SCORE)
1339 score = ascr + (score - from_bpe->
score);
1350 else if (score BETTER_THAN WORST_SCORE) {
1360 E_ERROR(
"End node of lattice isolated; unreachable\n");
1364 for (node = dag->
nodes; node; node = node->
next) {
1373 for (node = dag->
nodes; node; node = node->
next) {
1376 for (alt = node->
next; alt && alt->
sf == node->
sf; alt = alt->
next) {
1384 E_INFO(
"Lattice has %d nodes, %d links\n", dag->
n_nodes, nlink);
1390 dag->
end->
basewid = ps_search_finish_wid(ngs);
1408 default_lm = ngram_model_retain(lm);
hmm_t hmm
Basic HMM structure.
Internal implementation of PocketSphinx decoder.
void ngram_fwdtree_finish(ngram_search_t *ngs)
Finish fwdtree decoding for an utterance.
int32 n_frame_alloc
Number of frames allocated in bp_table_idx and friends.
void ngram_fwdtree_deinit(ngram_search_t *ngs)
Release memory associated with fwdtree decoding.
Base structure for search module.
int32 n_nodes
Number of nodes in this lattice.
void ngram_search_alloc_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
ps_seg_t * ps_lattice_seg_iter(ps_lattice_t *dag, ps_latlink_t *link, float32 lwf)
Get hypothesis segmentation iterator after bestpath search.
void hmm_init(hmm_context_t *ctx, hmm_t *hmm, int mpx, int ssid, int tmatid)
Populate a previously-allocated HMM structure, allocating internal data.
int acmod_rewind(acmod_t *acmod)
Rewind the current utterance, allowing it to be rescored.
listelem_alloc_t * chan_alloc
For chan_t.
void ngram_fwdtree_start(ngram_search_t *ngs)
Start fwdtree decoding for an utterance.
void ngram_search_set_lm(ngram_model_t *lm)
Sets the global language model.
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
Re-initialize base structure with new dictionary.
void ps_lattice_penalize_fillers(ps_lattice_t *dag, int32 silpen, int32 fillpen)
Insert penalty for fillers.
frame_idx_t frame
start or end frame
ps_latnode_t * start
Starting node.
uint8 *** tp
The transition matrices; kept in the same scale as acoustic scores; tp[tmatid][from-state][to-state]...
hmm_context_t * hmmctx
HMM context.
ps_segfuncs_t * vt
V-table of seg methods.
logmath_t * lmath
Log-math computation.
uint16 ** sseq
Unique senone sequences (2D array built at load time)
void hmm_deinit(hmm_t *hmm)
Free an HMM structure, releasing internal data (but not the HMM structure itself).
int32 lscr
Language model score.
int32 n_words
Number of words known to search (may be less than in the dictionary)
int16 last2_phone
next-to-last phone of this word
#define BAD_S3WID
Dictionary word id.
int32 n_ssid
#Unique ssid in above, compressed ssid list
frame_idx_t n_frames
Number of frames for this utterance.
int ngram_fwdflat_reinit(ngram_search_t *ngs)
Rebuild search structures for updated language models.
Word graph search implementation.
bitvec_t * word_active
array of active flags for all words.
void ngram_fwdflat_finish(ngram_search_t *ngs)
Finish fwdflat decoding for an utterance.
ps_latnode_t * nodes
List of all nodes.
int ngram_fwdflat_search(ngram_search_t *ngs, int frame_idx)
Search one frame forward in an utterance.
int32 ngram_search_exit_score(ngram_search_t *ngs, bptbl_t *pbe, int rcphone)
Get the exit score for a backpointer entry with a given right context.
listelem_alloc_t * latnode_alloc
Node allocator for this DAG.
int32 prob
Log posterior probability.
latlink_list_t * entries
Links into this node.
POCKETSPHINX_EXPORT int32 ps_lattice_posterior(ps_lattice_t *dag, ngram_model_t *lmset, float32 ascale)
Calculate link posterior probabilities on a word graph.
struct ps_latnode_s * alt
Node with alternate pronunciation for this word.
char const * word
Word string (pointer into dictionary hash)
int32 ** active_word_list
Array of active multi-phone words for current and next frame.
struct chan_s * next
first descendant of this channel; or, in the case of the last phone of a word, the next alternative r...
void ngram_search_save_bp(ngram_search_t *ngs, int frame_idx, int32 w, int32 score, int32 path, int32 rc)
Enter a word in the backpointer table.
ps_search_t * search
Search object from whence this came.
int32 final_node_ascr
Acoustic score of implicit link exiting final node.
Lexicon tree based Viterbi search.
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, const char *type, const char *name, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
int ngram_search_mark_bptable(ngram_search_t *ngs, int frame_idx)
Record the current frame's index in the backpointer table.
int32 rc_id
right-context id for last phone of words
#define dict2pid_rssid(d, ci, lc)
Access macros; not designed for arbitrary use.
void ngram_fwdflat_start(ngram_search_t *ngs)
Start fwdflat decoding for an utterance.
N-Gram search module structure.
int ngram_fwdtree_search(ngram_search_t *ngs, int frame_idx)
Search one frame forward in an utterance.
hmm_context_t * hmm_context_init(int32 n_emit_state, uint8 **const *tp, int16 const *senscore, uint16 *const *sseq)
Create an HMM context.
void ps_search_base_free(ps_search_t *search)
Free search.
void ps_lattice_delete_unreachable(ps_lattice_t *dag)
Remove nodes marked as unreachable.
ps_latnode_t * end
Ending node.
frame_idx_t sf
Start frame.
int32 real_wid
wid of this or latest predecessor real word
int32 prev_real_wid
wid of second-last real word
POCKETSPHINX_EXPORT ps_latlink_t * ps_lattice_bestpath(ps_lattice_t *dag, ngram_model_t *lmset, float32 lwf, float32 ascale)
Do N-Gram based best-path search on a word graph.
ps_lattice_t * ngram_search_lattice(ps_search_t *search)
Construct a word lattice from the current hypothesis.
latlink_list_t * exits
Links out of this node.
#define WORST_SCORE
Large "bad" score.
N-Gram based multi-pass search ("FBS")
tmat_t * tmat
Transition matrices.
int32 ascr
Acoustic score.
int acmod_advance(acmod_t *acmod)
Advance the frame index.
listelem_alloc_t * latnode_alloc
For latnode_t.
int dict_filler_word(dict_t *d, s3wid_t w)
Return 1 if w is a filler word, 0 if not.
void ngram_fwdtree_init(ngram_search_t *ngs)
Initialize N-Gram search for fwdtree decoding.
Segmentation "iterator" for backpointer table results.
ps_latnode_t ** frm_wordlist
List of active words in each frame.
int32 path_scr
Best path score from root of DAG.
Lexical tree node data type for the first phone (root) of each dynamic HMM tree structure.
Lexical tree node data type.
int32 wid
Dictionary word id.
int16 cur
Current position in bpidx.
#define SENSCR_SHIFT
Shift count for senone scores.
a structure for a dictionary.
char const * ps_lattice_hyp(ps_lattice_t *dag, ps_latlink_t *link)
Get hypothesis string after bestpath search.
POCKETSPHINX_EXPORT int dict_real_word(dict_t *d, s3wid_t w)
Test if w is a "real" word, i.e.
void ngram_search_free(ps_search_t *search)
Finalize the N-Gram search module.
Word graph structure used in bestpath/nbest search.
#define WORSE_THAN
Is one score worse than another?
int16 n_bpidx
Number of backpointer IDs.
int32 best_score
Best Viterbi path score.
Back pointer table (forward pass lattice; actually a tree)
cross word triphone model structure
int ngram_fwdtree_reinit(ngram_search_t *ngs)
Rebuild search structures for updated language models.
void ngram_search_free_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
int32 post
Utterance posterior probability.
char * hyp_str
Current hypothesis string.
#define BETTER_THAN
Is one score better than another?
dict_t * dict
Pronunciation dictionary.
void ngram_fwdflat_deinit(ngram_search_t *ngs)
Release memory associated with fwdflat decoding.
int32 s_idx
Start of BScoreStack for various right contexts.
int32 fef
First end frame.
int32 n_frame
Number of frames actually present.
Flat lexicon based Viterbi search.
ngram_model_t * lmset
Set of language models.
uint8 valid
For absolute pruning.
int32 lback
Language model backoff.
listelem_alloc_t * root_chan_alloc
For root_chan_t.
int32 basewid
Dictionary base word id.
int32 ciphone
ciphone for this node
void ngram_fwdflat_init(ngram_search_t *ngs)
Initialize N-Gram search for fwdflat decoding.
ps_lattice_t * ps_lattice_init_search(ps_search_t *search, int n_frame)
Construct an empty word graph with reference to a search structure.
void hmm_context_free(hmm_context_t *ctx)
Free an HMM context.
int32 * bpidx
Sequence of backpointer IDs.
chan_t ** word_chan
Channels associated with a given word (only used for right contexts, single-phone words in fwdtree se...
bin_mdef_t * mdef
Model definition.
int ngram_search_find_exit(ngram_search_t *ngs, int frame_idx, int32 *out_best_score, int32 *out_is_final)
Find the best word exit for the current frame in the backpointer table.
ps_latlink_t * last_link
Final link in best path.
struct ps_latnode_s * next
Next node in DAG (no ordering implied)
int32 score
Score (best among all right contexts)
V-table for search algorithm.
ps_lattice_t * dag
Current hypothesis word graph.
Base structure for hypothesis segmentation iterator.
#define dict_size(d)
Packaged macro access to dictionary members.
s3cipid_t * cimap
Index into ssid[] above for each ci phone.
POCKETSPHINX_EXPORT int ps_lattice_free(ps_lattice_t *dag)
Free a lattice.
ps_seg_t base
Base structure.
float32 ascale
Acoustic score scale for posterior probabilities.
ps_search_t * ngram_search_init(const char *name, ngram_model_t *lm, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize the N-Gram search module.
Acoustic model structure.
float32 lwf
Language weight factor (for second-pass searches)
Building composite triphone (as well as word internal triphones) with the dictionary.
int acmod_set_grow(acmod_t *acmod, int grow_feat)
Set memory allocation policy for utterance processing.
POCKETSPHINX_EXPORT void ps_lattice_link(ps_lattice_t *dag, ps_latnode_t *from, ps_latnode_t *to, int32 score, int32 ef)
Create a directed link between "from" and "to" nodes, but if a link already exists, choose one with the best link_scr.
s3ssid_t * ssid
Senone Sequence ID list for all context ciphones.
frame_idx_t sf
Start frame.
int16 last_phone
last phone of this word
char const * ngram_search_bp_hyp(ngram_search_t *ngs, int bpidx)
Backtrace from a given backpointer index to obtain a word hypothesis.