PocketSphinx  5prealpha
pocketsphinx.h
Go to the documentation of this file.
1 /* -*- c-basic-offset:4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2008 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  *
19  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
20  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
23  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * ====================================================================
32  *
33  */
38 #ifndef __POCKETSPHINX_H__
39 #define __POCKETSPHINX_H__
40 
41 #ifdef __cplusplus
42 extern "C" {
43 #endif
44 #if 0
45 }
46 #endif
47 
48 /* System headers we need. */
49 #include <stdio.h>
50 
51 /* SphinxBase headers we need. */
52 #include <sphinxbase/cmd_ln.h>
53 #include <sphinxbase/logmath.h>
54 #include <sphinxbase/fe.h>
55 #include <sphinxbase/feat.h>
56 
57 /* PocketSphinx headers (not many of them!) */
58 #include <pocketsphinx_export.h>
59 #include <cmdln_macro.h>
60 #include <ps_lattice.h>
61 #include <ps_mllr.h>
62 
66 typedef struct ps_decoder_s ps_decoder_t;
67 
68 #include <ps_search.h>
69 
73 typedef struct ps_astar_s ps_nbest_t;
74 
78 typedef struct ps_seg_s ps_seg_t;
79 
84 POCKETSPHINX_EXPORT void
85 ps_default_search_args(cmd_ln_t *);
86 
97 POCKETSPHINX_EXPORT
98 ps_decoder_t *ps_init(cmd_ln_t *config);
99 
117 POCKETSPHINX_EXPORT
118 int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config);
119 
126 POCKETSPHINX_EXPORT
127 arg_t const *ps_args(void);
128 
139 POCKETSPHINX_EXPORT
141 
152 POCKETSPHINX_EXPORT
153 int ps_free(ps_decoder_t *ps);
154 
163 POCKETSPHINX_EXPORT
164 cmd_ln_t *ps_get_config(ps_decoder_t *ps);
165 
174 POCKETSPHINX_EXPORT
175 logmath_t *ps_get_logmath(ps_decoder_t *ps);
176 
185 POCKETSPHINX_EXPORT
186 fe_t *ps_get_fe(ps_decoder_t *ps);
187 
196 POCKETSPHINX_EXPORT
197 feat_t *ps_get_feat(ps_decoder_t *ps);
198 
210 POCKETSPHINX_EXPORT
212 
227 POCKETSPHINX_EXPORT
228 int ps_load_dict(ps_decoder_t *ps, char const *dictfile,
229  char const *fdictfile, char const *format);
230 
240 POCKETSPHINX_EXPORT
241 int ps_save_dict(ps_decoder_t *ps, char const *dictfile, char const *format);
242 
262 POCKETSPHINX_EXPORT
263 int ps_add_word(ps_decoder_t *ps,
264  char const *word,
265  char const *phones,
266  int update);
267 
279 POCKETSPHINX_EXPORT
280 char *ps_lookup_word(ps_decoder_t *ps,
281  const char *word);
282 
297 POCKETSPHINX_EXPORT
298 long ps_decode_raw(ps_decoder_t *ps, FILE *rawfh,
299  long maxsamps);
300 
308 POCKETSPHINX_EXPORT
309 int ps_decode_senscr(ps_decoder_t *ps, FILE *senfh);
310 
318 POCKETSPHINX_EXPORT
320 
331 POCKETSPHINX_EXPORT
332 int ps_start_utt(ps_decoder_t *ps);
333 
347 POCKETSPHINX_EXPORT
349  int16 const *data,
350  size_t n_samples,
351  int no_search,
352  int full_utt);
353 
367 POCKETSPHINX_EXPORT
369  mfcc_t **data,
370  int n_frames,
371  int no_search,
372  int full_utt);
373 
387 POCKETSPHINX_EXPORT
389 
396 POCKETSPHINX_EXPORT
397 int ps_end_utt(ps_decoder_t *ps);
398 
407 POCKETSPHINX_EXPORT
408 char const *ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score);
409 
418 POCKETSPHINX_EXPORT
419 char const *ps_get_hyp_final(ps_decoder_t *ps, int32 *out_is_final);
420 
434 POCKETSPHINX_EXPORT
435 int32 ps_get_prob(ps_decoder_t *ps);
436 
450 POCKETSPHINX_EXPORT
452 
461 POCKETSPHINX_EXPORT
462 ps_seg_t *ps_seg_iter(ps_decoder_t *ps, int32 *out_best_score);
463 
471 POCKETSPHINX_EXPORT
473 
481 POCKETSPHINX_EXPORT
482 char const *ps_seg_word(ps_seg_t *seg);
483 
495 POCKETSPHINX_EXPORT
496 void ps_seg_frames(ps_seg_t *seg, int *out_sf, int *out_ef);
497 
520 POCKETSPHINX_EXPORT
521 int32 ps_seg_prob(ps_seg_t *seg, int32 *out_ascr, int32 *out_lscr, int32 *out_lback);
522 
526 POCKETSPHINX_EXPORT
527 void ps_seg_free(ps_seg_t *seg);
528 
543 POCKETSPHINX_EXPORT
544 ps_nbest_t *ps_nbest(ps_decoder_t *ps, int sf, int ef,
545  char const *ctx1, char const *ctx2);
546 
554 POCKETSPHINX_EXPORT
556 
564 POCKETSPHINX_EXPORT
565 char const *ps_nbest_hyp(ps_nbest_t *nbest, int32 *out_score);
566 
574 POCKETSPHINX_EXPORT
575 ps_seg_t *ps_nbest_seg(ps_nbest_t *nbest, int32 *out_score);
576 
582 POCKETSPHINX_EXPORT
583 void ps_nbest_free(ps_nbest_t *nbest);
584 
593 POCKETSPHINX_EXPORT
594 void ps_get_utt_time(ps_decoder_t *ps, double *out_nspeech,
595  double *out_ncpu, double *out_nwall);
596 
605 POCKETSPHINX_EXPORT
606 void ps_get_all_time(ps_decoder_t *ps, double *out_nspeech,
607  double *out_ncpu, double *out_nwall);
608 
615 POCKETSPHINX_EXPORT
616 uint8 ps_get_in_speech(ps_decoder_t *ps);
617 
618 
626 POCKETSPHINX_EXPORT
627 void ps_set_rawdata_size(ps_decoder_t *ps, int32 size);
628 
629 
638 POCKETSPHINX_EXPORT
639 void ps_get_rawdata(ps_decoder_t *ps, int16 **buffer, int32 *size);
640 
655 #ifdef __cplusplus
656 } /* extern "C" */
657 #endif
658 
659 #endif /* __POCKETSPHINX_H__ */
POCKETSPHINX_EXPORT void ps_get_utt_time(ps_decoder_t *ps, double *out_nspeech, double *out_ncpu, double *out_nwall)
Get performance information for the current utterance.
POCKETSPHINX_EXPORT feat_t * ps_get_feat(ps_decoder_t *ps)
Get the dynamic feature computation object for this decoder.
Definition: pocketsphinx.c:471
POCKETSPHINX_EXPORT void ps_set_rawdata_size(ps_decoder_t *ps, int32 size)
Sets the limit of the raw audio data to store in decoder to retrieve it later on ps_get_rawdata.
POCKETSPHINX_EXPORT ps_seg_t * ps_seg_iter(ps_decoder_t *ps, int32 *out_best_score)
Get an iterator over the word segmentation for the best hypothesis.
User can configure several "search" objects with different grammars and langauge models and switch th...
POCKETSPHINX_EXPORT int ps_process_cep(ps_decoder_t *ps, mfcc_t **data, int n_frames, int no_search, int full_utt)
Decode acoustic feature data.
POCKETSPHINX_EXPORT arg_t const * ps_args(void)
Returns the argument definitions used in ps_init().
Definition: pocketsphinx.c:423
POCKETSPHINX_EXPORT int ps_load_dict(ps_decoder_t *ps, char const *dictfile, char const *fdictfile, char const *format)
Reload the pronunciation dictionary from a file.
Definition: pocketsphinx.c:741
POCKETSPHINX_EXPORT void ps_nbest_free(ps_nbest_t *nbest)
Finish N-best search early, releasing resources.
POCKETSPHINX_EXPORT ps_nbest_t * ps_nbest(ps_decoder_t *ps, int sf, int ef, char const *ctx1, char const *ctx2)
Get an iterator over the best hypotheses, optionally within a selected region of the utterance...
POCKETSPHINX_EXPORT ps_seg_t * ps_nbest_seg(ps_nbest_t *nbest, int32 *out_score)
Get the word segmentation from an N-best list iterator.
POCKETSPHINX_EXPORT void ps_seg_frames(ps_seg_t *seg, int *out_sf, int *out_ef)
Get inclusive start and end frames from a segmentation iterator.
POCKETSPHINX_EXPORT ps_mllr_t * ps_update_mllr(ps_decoder_t *ps, ps_mllr_t *mllr)
Adapt current acoustic model using a linear transform.
Definition: pocketsphinx.c:477
Word graph search.
POCKETSPHINX_EXPORT uint8 ps_get_in_speech(ps_decoder_t *ps)
Checks if the last feed audio buffer contained speech.
POCKETSPHINX_EXPORT int ps_save_dict(ps_decoder_t *ps, char const *dictfile, char const *format)
Dump the current pronunciation dictionary to a file.
Definition: pocketsphinx.c:797
POCKETSPHINX_EXPORT void ps_default_search_args(cmd_ln_t *)
Sets default grammar and language model if they are not set explicitly and are present in the default...
Definition: pocketsphinx.c:180
POCKETSPHINX_EXPORT char const * ps_nbest_hyp(ps_nbest_t *nbest, int32 *out_score)
Get the hypothesis string from an N-best list iterator.
POCKETSPHINX_EXPORT char * ps_lookup_word(ps_decoder_t *ps, const char *word)
Lookup for the word in the dictionary and return phone transcription for it.
Definition: pocketsphinx.c:872
A* search structure.
POCKETSPHINX_EXPORT ps_lattice_t * ps_get_lattice(ps_decoder_t *ps)
Get word lattice.
POCKETSPHINX_EXPORT ps_nbest_t * ps_nbest_next(ps_nbest_t *nbest)
Move an N-best list iterator forward.
POCKETSPHINX_EXPORT long ps_decode_raw(ps_decoder_t *ps, FILE *rawfh, long maxsamps)
Decode a raw audio stream.
Definition: pocketsphinx.c:895
char const * word
Word string (pointer into dictionary hash)
POCKETSPHINX_EXPORT int32 ps_get_prob(ps_decoder_t *ps)
Get posterior probability.
POCKETSPHINX_EXPORT ps_seg_t * ps_seg_next(ps_seg_t *seg)
Get the next segment in a word segmentation.
POCKETSPHINX_EXPORT char const * ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score)
Get hypothesis string and path score.
Decoder object.
POCKETSPHINX_EXPORT int ps_free(ps_decoder_t *ps)
Finalize the decoder.
Definition: pocketsphinx.c:436
POCKETSPHINX_EXPORT void ps_seg_free(ps_seg_t *seg)
Finish iterating over a word segmentation early, freeing resources.
frame_idx_t ef
End frame.
POCKETSPHINX_EXPORT ps_decoder_t * ps_retain(ps_decoder_t *ps)
Retain a pointer to the decoder.
Definition: pocketsphinx.c:429
Model-space linear transforms for speaker adaptation.
POCKETSPHINX_EXPORT cmd_ln_t * ps_get_config(ps_decoder_t *ps)
Get the configuration object for this decoder.
Definition: pocketsphinx.c:453
Feature space linear transform structure.
Definition: acmod.h:82
POCKETSPHINX_EXPORT int ps_process_raw(ps_decoder_t *ps, int16 const *data, size_t n_samples, int no_search, int full_utt)
Decode raw audio data.
Word graph structure used in bestpath/nbest search.
POCKETSPHINX_EXPORT int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config)
Reinitialize the decoder with updated configuration.
Definition: pocketsphinx.c:234
POCKETSPHINX_EXPORT int ps_end_utt(ps_decoder_t *ps)
End utterance processing.
POCKETSPHINX_EXPORT int ps_add_word(ps_decoder_t *ps, char const *word, char const *phones, int update)
Add a word to the pronunciation dictionary.
Definition: pocketsphinx.c:804
POCKETSPHINX_EXPORT void ps_get_all_time(ps_decoder_t *ps, double *out_nspeech, double *out_ncpu, double *out_nwall)
Get overall performance information.
POCKETSPHINX_EXPORT logmath_t * ps_get_logmath(ps_decoder_t *ps)
Get the log-math computation object for this decoder.
Definition: pocketsphinx.c:459
POCKETSPHINX_EXPORT int32 ps_seg_prob(ps_seg_t *seg, int32 *out_ascr, int32 *out_lscr, int32 *out_lback)
Get language, acoustic, and posterior probabilities from a segmentation iterator. ...
POCKETSPHINX_EXPORT char const * ps_get_hyp_final(ps_decoder_t *ps, int32 *out_is_final)
Get hypothesis string and final flag.
POCKETSPHINX_EXPORT int ps_start_stream(ps_decoder_t *ps)
Start processing of the stream of speech.
Definition: pocketsphinx.c:938
POCKETSPHINX_EXPORT fe_t * ps_get_fe(ps_decoder_t *ps)
Get the feature extraction object for this decoder.
Definition: pocketsphinx.c:465
POCKETSPHINX_EXPORT int ps_start_utt(ps_decoder_t *ps)
Start utterance processing.
Definition: pocketsphinx.c:945
POCKETSPHINX_EXPORT ps_decoder_t * ps_init(cmd_ln_t *config)
Initialize the decoder from a configuration object.
Definition: pocketsphinx.c:404
Base structure for hypothesis segmentation iterator.
POCKETSPHINX_EXPORT int ps_get_n_frames(ps_decoder_t *ps)
Get the number of frames of data searched.
POCKETSPHINX_EXPORT int ps_decode_senscr(ps_decoder_t *ps, FILE *senfh)
Decode a senone score dump file.
POCKETSPHINX_EXPORT char const * ps_seg_word(ps_seg_t *seg)
Get word string from a segmentation iterator.
frame_idx_t sf
Start frame.
POCKETSPHINX_EXPORT void ps_get_rawdata(ps_decoder_t *ps, int16 **buffer, int32 *size)
Retrieves the raw data collected during utterance decoding.