50 #include <sphinxbase/prim_type.h>
51 #include <sphinxbase/err.h>
52 #include <sphinxbase/cmd_ln.h>
53 #include <sphinxbase/strfuncs.h>
54 #include <sphinxbase/byteorder.h>
55 #include <sphinxbase/feat.h>
56 #include <sphinxbase/bio.h>
59 #include "cmdln_macro.h"
61 #include "s2_semi_mgau.h"
65 #ifndef WORDS_BIGENDIAN
66 #define WORDS_BIGENDIAN 1
69 static int32 acmod_process_mfcbuf(
acmod_t *acmod);
74 char const *mdeffn, *tmatfn, *mllrfn, *hmmdir;
77 if ((mdeffn = cmd_ln_str_r(acmod->
config,
"-mdef")) == NULL) {
78 if ((hmmdir = cmd_ln_str_r(acmod->
config,
"-hmm")) == NULL)
79 E_ERROR(
"Acoustic model definition is not specified either "
80 "with -mdef option or with -hmm\n");
82 E_ERROR(
"Folder '%s' does not contain acoustic model "
83 "definition 'mdef'\n", hmmdir);
89 E_ERROR(
"Failed to read acoustic model definition from %s\n", mdeffn);
94 if ((tmatfn = cmd_ln_str_r(acmod->
config,
"-tmat")) == NULL) {
95 E_ERROR(
"No tmat file specified\n");
99 cmd_ln_float32_r(acmod->
config,
"-tmatfloor"),
103 if ((cmd_ln_str_r(acmod->
config,
"-mean") == NULL)
104 || (cmd_ln_str_r(acmod->
config,
"-var") == NULL)
105 || (cmd_ln_str_r(acmod->
config,
"-tmat") == NULL)) {
106 E_ERROR(
"No mean/var/tmat files specified\n");
110 if (cmd_ln_str_r(acmod->
config,
"-senmgau")) {
111 E_INFO(
"Using general multi-stream GMM computation\n");
112 acmod->
mgau = ms_mgau_init(acmod, acmod->
lmath, acmod->
mdef);
113 if (acmod->
mgau == NULL)
117 E_INFO(
"Attempting to use PTM computation module\n");
118 if ((acmod->
mgau = ptm_mgau_init(acmod, acmod->
mdef)) == NULL) {
119 E_INFO(
"Attempting to use semi-continuous computation module\n");
120 if ((acmod->
mgau = s2_semi_mgau_init(acmod)) == NULL) {
121 E_INFO(
"Falling back to general multi-stream GMM computation\n");
122 acmod->
mgau = ms_mgau_init(acmod, acmod->
lmath, acmod->
mdef);
123 if (acmod->
mgau == NULL)
130 if ((mllrfn = cmd_ln_str_r(acmod->
config,
"-mllr"))) {
141 acmod_init_feat(
acmod_t *acmod)
144 feat_init(cmd_ln_str_r(acmod->
config,
"-feat"),
145 cmn_type_from_str(cmd_ln_str_r(acmod->
config,
"-cmn")),
146 cmd_ln_boolean_r(acmod->
config,
"-varnorm"),
147 agc_type_from_str(cmd_ln_str_r(acmod->
config,
"-agc")),
148 1, cmd_ln_int32_r(acmod->
config,
"-ceplen"));
149 if (acmod->
fcb == NULL)
152 if (cmd_ln_str_r(acmod->
config,
"-lda")) {
153 E_INFO(
"Reading linear feature transformation from %s\n",
154 cmd_ln_str_r(acmod->
config,
"-lda"));
155 if (feat_read_lda(acmod->
fcb,
156 cmd_ln_str_r(acmod->
config,
"-lda"),
157 cmd_ln_int32_r(acmod->
config,
"-ldadim")) < 0)
161 if (cmd_ln_str_r(acmod->
config,
"-svspec")) {
163 E_INFO(
"Using subvector specification %s\n",
164 cmd_ln_str_r(acmod->
config,
"-svspec"));
165 if ((subvecs = parse_subvecs(cmd_ln_str_r(acmod->
config,
"-svspec"))) == NULL)
167 if ((feat_set_subvecs(acmod->
fcb, subvecs)) < 0)
171 if (cmd_ln_exists_r(acmod->
config,
"-agcthresh")
172 && 0 != strcmp(cmd_ln_str_r(acmod->
config,
"-agc"),
"none")) {
173 agc_set_threshold(acmod->
fcb->agc_struct,
174 cmd_ln_float32_r(acmod->
config,
"-agcthresh"));
177 if (acmod->
fcb->cmn_struct
178 && cmd_ln_exists_r(acmod->
config,
"-cmninit")) {
179 char *c, *cc, *vallist;
182 vallist = ckd_salloc(cmd_ln_str_r(acmod->
config,
"-cmninit"));
185 while (nvals < acmod->fcb->cmn_struct->veclen
186 && (cc = strchr(c,
',')) != NULL) {
188 acmod->
fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c));
192 if (nvals < acmod->fcb->cmn_struct->veclen && *c !=
'\0') {
193 acmod->
fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c));
201 acmod_fe_mismatch(
acmod_t *acmod, fe_t *fe)
204 if (cmd_ln_int32_r(acmod->
config,
"-ceplen") != fe_get_output_size(fe)) {
205 E_ERROR(
"Configured feature length %d doesn't match feature "
206 "extraction output size %d\n",
207 cmd_ln_int32_r(acmod->
config,
"-ceplen"),
208 fe_get_output_size(fe));
217 acmod_feat_mismatch(
acmod_t *acmod, feat_t *fcb)
220 if (0 != strcmp(cmd_ln_str_r(acmod->
config,
"-feat"), feat_name(fcb)))
223 if (cmd_ln_int32_r(acmod->
config,
"-ceplen") != feat_cepsize(fcb))
230 acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
234 acmod = ckd_calloc(1,
sizeof(*acmod));
235 acmod->
config = cmd_ln_retain(config);
236 acmod->
lmath = lmath;
241 if (acmod_fe_mismatch(acmod, fe))
248 acmod->
fe = fe_init_auto_r(config);
249 if (acmod->
fe == NULL)
251 if (acmod_fe_mismatch(acmod, acmod->
fe))
255 if (acmod_feat_mismatch(acmod, fcb))
262 if (acmod_init_feat(acmod) < 0)
267 if (acmod_init_am(acmod) < 0)
292 acmod->
compallsen = cmd_ln_boolean_r(config,
"-compallsen");
306 feat_free(acmod->
fcb);
308 cmd_ln_free_r(acmod->
config);
311 ckd_free_2d((
void **)acmod->
mfc_buf);
316 fclose(acmod->
mfcfh);
318 fclose(acmod->
rawfh);
320 fclose(acmod->
senfh);
326 ckd_free(acmod->rawdata);
333 ps_mgau_free(acmod->
mgau);
346 ps_mgau_transform(acmod->
mgau, mllr);
354 char nsenstr[64], logbasestr[64];
356 sprintf(nsenstr,
"%d", bin_mdef_n_sen(acmod->
mdef));
357 sprintf(logbasestr,
"%f", logmath_get_base(acmod->
lmath));
358 return bio_writehdr(logfh,
360 "mdef_file", cmd_ln_str_r(acmod->
config,
"-mdef"),
362 "logbase", logbasestr, NULL);
369 fclose(acmod->
senfh);
370 acmod->
senfh = logfh;
382 fclose(acmod->
mfcfh);
383 acmod->
mfcfh = logfh;
384 fwrite(&rv, 4, 1, acmod->
mfcfh);
392 fclose(acmod->
rawfh);
393 acmod->
rawfh = logfh;
398 acmod_grow_feat_buf(
acmod_t *acmod,
int nfr)
401 E_FATAL(
"Decoder can not process more than %d frames at once, "
419 acmod_grow_feat_buf(acmod, 128);
427 fe_start_utt(acmod->
fe);
437 acmod->rawdata_pos = 0;
453 fe_end_utt(acmod->
fe, acmod->
mfc_buf[inptr], &nfr);
458 nfr = acmod_process_mfcbuf(acmod);
460 feat_update_stats(acmod->
fcb);
465 outlen = (ftell(acmod->
mfcfh) - 4) / 4;
466 if (!WORDS_BIGENDIAN)
469 if ((rv = fseek(acmod->
mfcfh, 0, SEEK_SET)) == 0) {
470 fwrite(&outlen, 4, 1, acmod->
mfcfh);
472 fclose(acmod->
mfcfh);
476 fclose(acmod->
rawfh);
481 fclose(acmod->
senfh);
490 mfcc_t **cep,
int n_frames)
493 int32 *ptr = (int32 *)cep[0];
495 n = n_frames * feat_cepsize(acmod->
fcb);
497 if (!WORDS_BIGENDIAN) {
498 for (i = 0; i < (n *
sizeof(mfcc_t)); ++i) {
503 if (fwrite(cep[0],
sizeof(mfcc_t), n, acmod->
mfcfh) != n) {
504 E_ERROR_SYSTEM(
"Failed to write %d values to log file", n);
508 if (!WORDS_BIGENDIAN) {
509 for (i = 0; i < (n *
sizeof(mfcc_t)); ++i) {
517 acmod_process_full_cep(
acmod_t *acmod,
525 acmod_log_mfc(acmod, *inout_cep, *inout_n_frames);
531 E_FATAL(
"Batch processing can not process more than %d frames "
532 "at once, requested %d\n",
MAX_N_FRAMES, *inout_n_frames);
535 acmod->
feat_buf = feat_array_alloc(acmod->
fcb, *inout_n_frames);
541 nfr = feat_s2mfc2feat_live(acmod->
fcb, *inout_cep, inout_n_frames,
545 *inout_cep += *inout_n_frames;
552 acmod_process_full_raw(
acmod_t *acmod,
553 int16
const **inout_raw,
554 size_t *inout_n_samps)
560 if (*inout_n_samps + acmod->rawdata_pos < acmod->rawdata_size) {
561 memcpy(acmod->rawdata + acmod->rawdata_pos, *inout_raw, *inout_n_samps *
sizeof(int16));
562 acmod->rawdata_pos += *inout_n_samps;
565 fwrite(*inout_raw,
sizeof(int16), *inout_n_samps, acmod->
rawfh);
567 if (fe_process_frames(acmod->
fe, NULL, inout_n_samps, NULL, &nfr, NULL) < 0)
571 acmod->
mfc_buf = ckd_calloc_2d(nfr + 1, fe_get_output_size(acmod->
fe),
577 fe_start_utt(acmod->
fe);
578 if (fe_process_frames(acmod->
fe, inout_raw, inout_n_samps,
579 acmod->
mfc_buf, &nfr, NULL) < 0)
581 fe_end_utt(acmod->
fe, acmod->
mfc_buf[nfr], &ntail);
585 nfr = acmod_process_full_cep(acmod, &cepptr, &nfr);
594 acmod_process_mfcbuf(
acmod_t *acmod)
603 int saved_state = acmod->
state;
616 acmod->
state = saved_state;
628 int16
const **inout_raw,
629 size_t *inout_n_samps,
634 int16
const *prev_audio_inptr;
638 return acmod_process_full_raw(acmod, inout_raw, inout_n_samps);
642 if (inout_n_samps && *inout_n_samps) {
644 int32 processed_samples;
646 prev_audio_inptr = *inout_raw;
655 if (fe_process_frames(acmod->
fe, inout_raw, inout_n_samps,
656 acmod->
mfc_buf + inptr, &ncep1, &out_frameidx) < 0)
659 if (out_frameidx > 0)
662 processed_samples = *inout_raw - prev_audio_inptr;
663 if (processed_samples + acmod->rawdata_pos < acmod->rawdata_size) {
664 memcpy(acmod->rawdata + acmod->rawdata_pos, prev_audio_inptr, processed_samples *
sizeof(int16));
665 acmod->rawdata_pos += processed_samples;
669 fwrite(prev_audio_inptr,
sizeof(int16),
673 prev_audio_inptr = *inout_raw;
690 assert(inptr + ncep <= acmod->n_mfc_alloc);
691 if (fe_process_frames(acmod->
fe, inout_raw, inout_n_samps,
692 acmod->
mfc_buf + inptr, &ncep, &out_frameidx) < 0)
695 if (out_frameidx > 0)
699 processed_samples = *inout_raw - prev_audio_inptr;
700 if (processed_samples + acmod->rawdata_pos < acmod->rawdata_size) {
701 memcpy(acmod->rawdata + acmod->rawdata_pos, prev_audio_inptr, processed_samples *
sizeof(int16));
702 acmod->rawdata_pos += processed_samples;
705 fwrite(prev_audio_inptr,
sizeof(int16),
706 processed_samples, acmod->
rawfh);
708 prev_audio_inptr = *inout_raw;
715 return acmod_process_mfcbuf(acmod);
724 int32 nfeat, ncep, inptr;
729 return acmod_process_full_cep(acmod, inout_cep, inout_n_frames);
733 acmod_log_mfc(acmod, *inout_cep, *inout_n_frames);
736 orig_n_frames = ncep = nfeat = *inout_n_frames;
740 nfeat += feat_window_size(acmod->
fcb);
742 nfeat -= feat_window_size(acmod->
fcb);
749 acmod_grow_feat_buf(acmod, acmod->
n_feat_alloc + nfeat);
770 *inout_n_frames -= ncep;
780 nfeat = feat_s2mfc2feat_live(acmod->
fcb, *inout_cep,
793 *inout_n_frames -= ncep1;
798 nfeat = feat_s2mfc2feat_live(acmod->
fcb, *inout_cep,
808 *inout_n_frames -= ncep;
813 return orig_n_frames - *inout_n_frames;
838 for (i = 0; i < feat_dimension1(acmod->
fcb); ++i)
840 feat[i], feat_dimension2(acmod->
fcb, i) *
sizeof(**feat));
848 acmod_read_senfh_header(
acmod_t *acmod)
854 if (bio_readhdr(acmod->
insenfh, &name, &val, &swap) < 0)
856 for (i = 0; name[i] != NULL; ++i) {
857 if (!strcmp(name[i],
"n_sen")) {
858 if (atoi(val[i]) != bin_mdef_n_sen(acmod->
mdef)) {
859 E_ERROR(
"Number of senones in senone file (%d) does not "
860 "match mdef (%d)\n", atoi(val[i]),
861 bin_mdef_n_sen(acmod->
mdef));
866 if (!strcmp(name[i],
"logbase")) {
867 if (fabs(atof_c(val[i]) - logmath_get_base(acmod->
lmath)) > 0.001) {
868 E_ERROR(
"Logbase in senone file (%f) does not match acmod "
869 "(%f)\n", atof_c(val[i]),
870 logmath_get_base(acmod->
lmath));
876 bio_hdrarg_free(name, val);
879 bio_hdrarg_free(name, val);
893 return acmod_read_senfh_header(acmod);
901 E_ERROR(
"Circular feature buffer cannot be rewound (output frame %d, "
932 int16
const *senscr, FILE *senfh)
947 n_active2 = n_active;
948 if (fwrite(&n_active2, 2, 1, senfh) != 1)
950 if (n_active == bin_mdef_n_sen(acmod->
mdef)) {
951 if (fwrite(senscr, 2, n_active, senfh) != n_active)
956 if (fwrite(active, 1, n_active, senfh) != n_active)
958 for (i = n = 0; i < n_active; ++i) {
960 if (fwrite(senscr + n, 2, 1, senfh) != 1)
966 E_ERROR_SYSTEM(
"Failed to write frame to senone file");
974 acmod_read_scores_internal(
acmod_t *acmod)
990 if ((rv = fread(&n_active, 2, 1, senfh)) != 1)
1008 for (j = n + 1; j < sen; ++j)
1011 if ((rv = fread(acmod->
senone_scores + sen, 2, 1, senfh)) != 1)
1018 while (n < bin_mdef_n_sen(acmod->
mdef))
1024 if (ferror(senfh)) {
1025 E_ERROR_SYSTEM(
"Failed to read frame from senone file");
1049 if ((rv = acmod_read_scores_internal(acmod)) != 1)
1056 E_DEBUG(1,(
"Frame %d has %d active states\n",
1069 calc_frame_idx(
acmod_t *acmod,
int *inout_frame_idx)
1074 if (inout_frame_idx == NULL)
1076 else if (*inout_frame_idx < 0)
1077 frame_idx = acmod->
output_frame + 1 + *inout_frame_idx;
1079 frame_idx = *inout_frame_idx;
1085 calc_feat_idx(
acmod_t *acmod,
int frame_idx)
1087 int n_backfr, feat_idx;
1090 if (frame_idx < 0 || acmod->output_frame - frame_idx > n_backfr) {
1091 E_ERROR(
"Frame %d outside queue of %d frames, %d alloc (%d > %d), "
1110 int frame_idx, feat_idx;
1113 frame_idx = calc_frame_idx(acmod, inout_frame_idx);
1116 if ((feat_idx = calc_feat_idx(acmod, frame_idx)) < 0)
1119 if (inout_frame_idx)
1120 *inout_frame_idx = frame_idx;
1128 int frame_idx, feat_idx;
1131 frame_idx = calc_frame_idx(acmod, inout_frame_idx);
1137 if (inout_frame_idx)
1138 *inout_frame_idx = frame_idx;
1143 if ((feat_idx = calc_feat_idx(acmod, frame_idx)) < 0)
1152 if (acmod_read_scores_internal(acmod) < 0)
1160 ps_mgau_frame_eval(acmod->
mgau,
1169 if (inout_frame_idx)
1170 *inout_frame_idx = frame_idx;
1180 E_DEBUG(1,(
"Frame %d has %d active states\n", frame_idx,
1194 for (i = 0; i < bin_mdef_n_sen(acmod->
mdef); ++i) {
1197 *out_best_senid = i;
1206 if (*senscr < best) {
1208 *out_best_senid = i;
1225 #define MPX_BITVEC_SET(a,h,i) \
1226 if (hmm_mpx_ssid(h,i) != BAD_SSID) \
1227 bitvec_set((a)->senone_active_vec, hmm_mpx_senid(h,i))
1228 #define NONMPX_BITVEC_SET(a,h,i) \
1229 bitvec_set((a)->senone_active_vec, \
1230 hmm_nonmpx_senid(h,i))
1239 if (hmm_is_mpx(hmm)) {
1240 switch (hmm_n_emit_state(hmm)) {
1242 MPX_BITVEC_SET(acmod, hmm, 4);
1243 MPX_BITVEC_SET(acmod, hmm, 3);
1245 MPX_BITVEC_SET(acmod, hmm, 2);
1246 MPX_BITVEC_SET(acmod, hmm, 1);
1247 MPX_BITVEC_SET(acmod, hmm, 0);
1250 for (i = 0; i < hmm_n_emit_state(hmm); ++i) {
1251 MPX_BITVEC_SET(acmod, hmm, i);
1256 switch (hmm_n_emit_state(hmm)) {
1258 NONMPX_BITVEC_SET(acmod, hmm, 4);
1259 NONMPX_BITVEC_SET(acmod, hmm, 3);
1261 NONMPX_BITVEC_SET(acmod, hmm, 2);
1262 NONMPX_BITVEC_SET(acmod, hmm, 1);
1263 NONMPX_BITVEC_SET(acmod, hmm, 0);
1266 for (i = 0; i < hmm_n_emit_state(hmm); ++i) {
1267 NONMPX_BITVEC_SET(acmod, hmm, i);
1276 int32 w, l, n, b, total_dists, total_words, extra_bits;
1279 total_dists = bin_mdef_n_sen(acmod->
mdef);
1284 total_words = total_dists / BITVEC_BITS;
1285 extra_bits = total_dists % BITVEC_BITS;
1290 for (b = 0; b < BITVEC_BITS; ++b) {
1291 if (*flagptr & (1UL << b)) {
1292 int32 sen = w * BITVEC_BITS + b;
1293 int32 delta = sen - l;
1296 while (delta > 255) {
1306 for (b = 0; b < extra_bits; ++b) {
1307 if (*flagptr & (1UL << b)) {
1308 int32 sen = w * BITVEC_BITS + b;
1309 int32 delta = sen - l;
1312 while (delta > 255) {
1322 E_DEBUG(1, (
"acmod_flags2list: %d active in frame %d\n",
1336 fe_start_stream(acmod->
fe);
1344 acmod->rawdata_size = size;
1345 if (acmod->rawdata_size > 0) {
1346 ckd_free(acmod->rawdata);
1347 acmod->rawdata = ckd_calloc(size,
sizeof(int16));
1355 *buffer = acmod->rawdata;
1358 *size = acmod->rawdata_pos;
(Sphinx 3.0 specific) A module that wraps up the code of gauden and senone because they are closely r...
FILE * insenfh
Input senone score file.
void acmod_get_rawdata(acmod_t *acmod, int16 **buffer, int32 *size)
Retrieves the raw data collected during utterance decoding.
uint8 grow_feat
Whether to grow feat_buf.
ps_mgau_t * mgau
Model parameters.
int acmod_read_scores(acmod_t *acmod)
Read one frame of scores from senone score dump file.
uint8 * senone_active
Array of deltas to active GMMs.
long * framepos
File positions of recent frames in senone file.
Utterance started, no data yet.
int acmod_set_insenfh(acmod_t *acmod, FILE *senfh)
Set up a senone score dump file for input.
int32 acmod_stream_offset(acmod_t *acmod)
Get the offset of the utterance start of the current stream, helpful for stream-wide timing...
int acmod_rewind(acmod_t *acmod)
Rewind the current utterance, allowing it to be rescored.
int16 * senone_scores
GMM scores for current frame.
ps_mllr_t * acmod_update_mllr(acmod_t *acmod, ps_mllr_t *mllr)
Adapt acoustic model using a linear transform.
int acmod_process_cep(acmod_t *acmod, mfcc_t ***inout_cep, int *inout_n_frames, int full_utt)
Feed acoustic feature data into the acoustic model for scoring.
int n_senone_active
Number of active GMMs.
An individual HMM among the HMM search space.
void acmod_set_rawdata_size(acmod_t *acmod, int32 size)
Sets the limit of the raw audio data to store.
void acmod_start_stream(acmod_t *acmod)
Reset the current stream.
logmath_t * lmath
Log-math computation.
fe_t * fe
Acoustic feature computation.
frame_idx_t n_mfc_frame
Number of frames active in mfc_buf.
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
Utterance ended, still buffering.
FILE * rawfh
File for writing raw audio data.
mfcc_t ** mfc_buf
Temporary buffer of acoustic features.
frame_idx_t utt_start_frame
Index of the utterance start in the stream, all timings are relative to that.
void tmat_free(tmat_t *t)
RAH, add code to remove memory allocated by tmat_init.
frame_idx_t n_feat_alloc
Number of frames allocated in feat_buf.
mfcc_t *** feat_buf
Temporary buffer of dynamic features.
tmat_t * tmat_init(char const *tmatfile, logmath_t *lmath, float64 tpfloor, int32 breport)
Initialize transition matrix.
int acmod_set_senfh(acmod_t *acmod, FILE *logfh)
Start logging senone scores to a filehandle.
#define MAX_N_FRAMES
Maximum number of frames in index, should be in sync with above.
int acmod_set_rawfh(acmod_t *acmod, FILE *logfh)
Start logging raw audio to a filehandle.
ps_mllr_t * mllr
Speaker transformation.
Fast phonetically-tied mixture evaluation.
POCKETSPHINX_EXPORT ps_mllr_t * ps_mllr_read(char const *file)
Read a speaker-adaptive linear transform from a file.
uint8 compallsen
Compute all senones?
POCKETSPHINX_EXPORT bin_mdef_t * bin_mdef_read(cmd_ln_t *config, const char *filename)
Read a binary mdef from a file.
int acmod_process_feat(acmod_t *acmod, mfcc_t **feat)
Feed dynamic feature data into the acoustic model for scoring.
int acmod_write_senfh_header(acmod_t *acmod, FILE *logfh)
Write senone dump file header.
cmd_ln_t * config
Configuration.
frame_idx_t output_frame
Index of next frame of dynamic features.
int acmod_write_scores(acmod_t *acmod, int n_active, uint8 const *active, int16 const *senscr, FILE *senfh)
Write a frame of senone scores to a dump file.
tmat_t * tmat
Transition matrices.
int32 acmod_flags2list(acmod_t *acmod)
Build active list from.
POCKETSPHINX_EXPORT int ps_mllr_free(ps_mllr_t *mllr)
Release a pointer to a linear transform.
int acmod_end_utt(acmod_t *acmod)
Mark the end of an utterance.
int acmod_advance(acmod_t *acmod)
Advance the frame index.
uint8 state
State of utterance processing.
int acmod_set_mfcfh(acmod_t *acmod, FILE *logfh)
Start logging MFCCs to a filehandle.
void acmod_free(acmod_t *acmod)
Finalize an acoustic model.
void acmod_clear_active(acmod_t *acmod)
Clear set of active senones.
#define SENSCR_DUMMY
Dummy senone score value for unintentionally active states.
Feature space linear transform structure.
frame_idx_t feat_outidx
Start of active frames in feat_buf.
mfcc_t ** acmod_get_frame(acmod_t *acmod, int *inout_frame_idx)
Get a frame of dynamic feature data.
feat_t * fcb
Dynamic feature computation.
int log_zero
Zero log-probability value.
FILE * senfh
File for writing senone score data.
frame_idx_t mfc_outidx
Start of active frames in mfc_buf.
frame_idx_t n_mfc_alloc
Number of frames allocated in mfc_buf.
int acmod_process_raw(acmod_t *acmod, int16 const **inout_raw, size_t *inout_n_samps, int full_utt)
TODO: Set queue length for utterance processing.
int bin_mdef_free(bin_mdef_t *m)
Release a pointer to a binary mdef.
uint8 insen_swap
Whether to swap input senone score.
int acmod_start_utt(acmod_t *acmod)
Mark the start of an utterance.
int senscr_frame
Frame index for senone_scores.
bin_mdef_t * mdef
Model definition.
int acmod_best_score(acmod_t *acmod, int *out_best_senid)
Get best score and senone index for current frame.
acmod_t * acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
Initialize an acoustic model.
frame_idx_t n_feat_frame
Number of frames active in feat_buf.
Acoustic model structures for PocketSphinx.
FILE * mfcfh
File for writing acoustic feature data.
Acoustic model structure.
int acmod_set_grow(acmod_t *acmod, int grow_feat)
Set memory allocation policy for utterance processing.
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
int frame_idx
frame counter.
bitvec_t * senone_active_vec
Active GMMs in current frame.