Skip to content

Commit 9bbfdf3

Browse files
authored
Merge pull request #30 from ReadAlongs/release_040
Final 0.4.0 release updates
2 parents 562024d + e8bd798 commit 9bbfdf3

File tree

13 files changed

+103
-115
lines changed

13 files changed

+103
-115
lines changed

MANIFEST.in

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
include CMakeLists.txt
22
include LICENSE
3-
include README.manylinux.md
43
include README.md
54
include TODO.md
6-
include build_wheels.sh
75
include config.h.in
86
include requirements.dev.txt
97
include docs/Makefile
@@ -17,10 +15,15 @@ include docs/source/readme.js.rst
1715
include docs/source/readme.rst
1816
include docs/source/soundswallower.rst
1917
include include/soundswallower/CMakeLists.txt
20-
include include/soundswallower/*.h
21-
include js/CMakeLists.txt
18+
recursive-include include *.h
2219
include js/README.md
20+
include js/.npmignore
21+
include js/*.txt
2322
include js/*.js
23+
include js/*.ts
24+
include js/*.html
25+
include js/*.py
26+
include js/*.c
2427
include js/*.json
2528
recursive-include model *
2629
include py/CMakeLists.txt
@@ -35,23 +38,27 @@ include py/test/test_fsg.py
3538
include pyproject.toml
3639
include setup.py
3740
include src/CMakeLists.txt
38-
include src/*.c
39-
include src/*.h
40-
include src/*.y
41-
include src/*.l
41+
recursive-include src *.c
42+
recursive-include src *.h
43+
recursive-include src *.y
44+
recursive-include src *.l
4245
include tests/CMakeLists.txt
4346
include tests/*.test
4447
include tests/*.res
4548
include tests/*.c
49+
include tests/*.sh
50+
include tests/testfuncs.sh.in
51+
include tests/test_macros.h.in
4652
include tests/compare_table.pl
4753
recursive-include tests/data *
48-
include tests/test_macros.h.in
4954
exclude MANIFEST.in
5055
exclude .readthedocs.yml
5156
exclude .travis.yml
5257
exclude .gitignore
5358
recursive-exclude .github *
5459
recursive-exclude _skbuild *
60+
recursive-exclude build *
61+
recursive-exclude jsbuild *
5562
recursive-exclude * .gitignore
5663
recursive-exclude * *.py[co]
5764
recursive-exclude * *~

README.manylinux.md

Lines changed: 0 additions & 29 deletions
This file was deleted.

build_wheels.sh

Lines changed: 0 additions & 28 deletions
This file was deleted.

js/README.md

Lines changed: 61 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -124,23 +124,29 @@ await decoder.initialize();
124124

125125
The optional `loglevel` and `backtrace` options will make it a bit
126126
more verbose, so you can be sure it's actually doing something. Now
127-
we will create the world's stupidest grammar, which recognizes one
128-
sentence:
127+
we will create and enable the world's stupidest grammar, which
128+
recognizes one sentence:
129129

130130
```js
131-
let fsg = decoder.create_fsg("goforward", 0, 4, [
131+
await decoder.set_fsg("goforward", 0, 4, [
132132
{from: 0, to: 1, prob: 1.0, word: "go"},
133133
{from: 1, to: 2, prob: 1.0, word: "forward"},
134134
{from: 2, to: 3, prob: 1.0, word: "ten"},
135135
{from: 3, to: 4, prob: 1.0, word: "meters"}
136136
]);
137-
await decoder.set_fsg(fsg);
138137
```
139138

140-
You should `delete()` it, unless of course you intend to create a
141-
bunch of them and swap them in and out. It is also possible to parse
142-
a grammar in [JSGF](https://en.wikipedia.org/wiki/JSGF) format, see
143-
below for an example.
139+
If you actually want to just recognize a single sentence, in order to
140+
get time alignments (this is known as "force-alignment"), we have a
141+
better method for you:
142+
143+
```js
144+
await decoder.set_align_text("go forward ten meters");
145+
```
146+
147+
It is also possible to parse a grammar in
148+
[JSGF](https://en.wikipedia.org/wiki/JSGF) format, see below for an
149+
example.
144150

145151
Okay, let's wreck a nice beach! Record yourself saying something,
146152
preferably the sentence "go forward ten meters", using SoX, for
@@ -171,6 +177,23 @@ console.log(decoder.get_hyp());
171177
console.log(decoder.get_hypseg());
172178
```
173179

180+
If you want even more detailed segmentation (phone and HMM state
181+
level) you can use `get_alignment_json`. For more detail on this
182+
format, see [the PocketSphinx
183+
documentation](https://github.com/cmusphinx/pocketsphinx#usage) as it
184+
is borrowed from there. Since this is JSON, you can create an object
185+
from it and iterate over it:
186+
187+
```js
188+
const result = JSON.parse(await decoder.get_alignment_json());
189+
for (const word of result.w) {
190+
console.log(`word ${word.t} at ${word.b} has duration ${word.d}`);
191+
for (const phone of word.w) {
192+
console.log(`phone ${phone.t} at ${phone.b} has duration ${phone.d}`);
193+
}
194+
}
195+
```
196+
174197
Finally, if your program is long-running and you think you might make
175198
multiple recognizers, you ought to delete them, because JavaScript is
176199
awful:
@@ -210,18 +233,6 @@ await require('soundswallower')(ssjs);
210233
This is simply concatenated to the model name, so you should make sure
211234
to include the trailing slash, e.g. "model/" and not "model"!
212235

213-
Currently, it should also support any Sphinx format acoustic model, many of
214-
which are available for download at [the SourceForge
215-
page](https://sourceforge.net/projects/cmusphinx/files/Acoustic%20and%20Language%20Models/).
216-
217-
To use a module, pass the directory (or base URL) containing its files
218-
(i.e. `means`, `variances`, etc) in the `hmm` property when
219-
initializing the decoder, for example:
220-
221-
```js
222-
const decoder = ssjs.Decoder({hmm: "https://example.com/excellent-acoustic-model/"});
223-
```
224-
225236

226237
Using grammars
227238
--------------
@@ -231,7 +242,7 @@ from a JavaScript string and set it in the decoder like this (a
231242
hypothetical pizza-ordering grammar):
232243

233244
```js
234-
let fsg = decoder.parse_jsgf(`#JSGF V1.0;
245+
await decoder.set_jsgf(`#JSGF V1.0;
235246
grammar pizza;
236247
public <order> = [<greeting>] [<want>] [<quantity>] [<size>] [pizza] <toppings>;
237248
<greeting> = hi | hello | yo | howdy;
@@ -241,7 +252,6 @@ public <order> = [<greeting>] [<want>] [<quantity>] [<size>] [pizza] <toppings>;
241252
<toppings> = [with] <topping> ([and] <topping>)*;
242253
<topping> = olives | mushrooms | tomatoes | (green | hot) peppers | pineapple;
243254
`);
244-
await decoder.set_fsg(fsg);
245255
```
246256

247257
Note that all the words in the grammar must first be defined in the
@@ -257,3 +267,32 @@ the internal state.
257267
await decoder.add_word("supercalifragilisticexpialidocious",
258268
"S UW P ER K AE L IH F R AE JH IH L IH S T IH K EH K S P IY AE L IH D OW SH Y UH S");
259269
```
270+
271+
Voice activity detection / Endpointing
272+
--------------------------------------
273+
274+
This is a work in progress, but it is also possible to detect the
275+
start and end of speech in an input stream using an `Endpointer`
276+
object. This requires you to pass buffers of a specific size, which
277+
is understandably difficult since WebAudio also only wants to *give*
278+
you buffers of a specific (and entirely different) size. A better
279+
example is forthcoming but it looks a bit like this (copied directly
280+
from [the
281+
documentation](https://soundswallower.readthedocs.io/en/latest/soundswallower.js.html#Endpointer.get_in_speech):
282+
283+
```js
284+
let prev_in_speech = ep.get_in_speech();
285+
let frame_size = ep.get_frame_size();
286+
// Presume `frame` is a Float32Array of frame_size or less
287+
let speech;
288+
if (frame.size < frame_size)
289+
speech = ep.end_stream(frame);
290+
else
291+
speech = ep.process(frame);
292+
if (speech !== null) {
293+
if (!prev_in_speech)
294+
console.log("Speech started at " + ep.get_speech_start());
295+
if (!ep.get_in_speech())
296+
console.log("Speech ended at " + ep.get_speech_end());
297+
}
298+
```

js/package.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
"description": "An even smaller speech recognizer",
55
"main": "soundswallower.js",
66
"scripts": {
7-
"test": "make && mocha test_node",
8-
"tstest": "make && npx tsc && node test_typescript",
9-
"webtest": "make && xdg-open http://localhost:8000/test_web.html && python server.py"
7+
"test": "mocha test_node",
8+
"tstest": "npx tsc && node test_typescript",
9+
"webtest": "xdg-open http://localhost:8000/test_web.html && python server.py"
1010
},
1111
"repository": {
1212
"type": "git",

requirements.dev.txt

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
scikit-build~=0.13
2-
Cython~=0.29.21
3-
pytest~=7.1.2
4-
build~=0.8.0
1+
scikit-build
2+
Cython
3+
pytest
54
numpy

tests/test_acmod.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ main(int argc, char *argv[])
3636
config_t *config;
3737
FILE *rawfh;
3838
int16 *buf;
39-
int16 const *bptr;
39+
int16 *bptr;
4040
mfcc_t **cepbuf, **cptr;
4141
size_t nread, nsamps;
4242
fe_t *fe;

tests/test_acmod_grow.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ main(int argc, char *argv[])
3737
feat_t *fcb;
3838
FILE *rawfh;
3939
int16 *buf;
40-
int16 const *bptr;
40+
int16 *bptr;
4141
size_t nread, nsamps;
4242
int nfr;
4343
int frame_counter;

tests/test_fe.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -125,10 +125,10 @@ create_shifted(fe_t *fe, int16 *data, size_t nsamp)
125125
}
126126

127127
mfcc_t **
128-
create_full(fe_t *fe, const int16 *data, size_t nsamp)
128+
create_full(fe_t *fe, int16 *data, size_t nsamp)
129129
{
130130
mfcc_t **cepbuf;
131-
const int16 *inptr;
131+
int16 *inptr;
132132
int rv, nfr, ncep;
133133

134134
TEST_EQUAL(0, fe_start(fe));
@@ -155,10 +155,10 @@ create_full(fe_t *fe, const int16 *data, size_t nsamp)
155155
}
156156

157157
mfcc_t **
158-
create_process_frames(fe_t *fe, const int16 *data, size_t nsamp)
158+
create_process_frames(fe_t *fe, int16 *data, size_t nsamp)
159159
{
160160
mfcc_t **cepbuf;
161-
const int16 *inptr;
161+
int16 *inptr;
162162
int i, rv, nfr, ncep, frame_shift, frame_size;
163163

164164
fe_get_input_size(fe, &frame_shift, &frame_size);
@@ -195,10 +195,10 @@ create_process_frames(fe_t *fe, const int16 *data, size_t nsamp)
195195

196196

197197
mfcc_t **
198-
create_fragments(fe_t *fe, const int16 *data, size_t nsamp)
198+
create_fragments(fe_t *fe, int16 *data, size_t nsamp)
199199
{
200200
mfcc_t **cepbuf, **cepptr;
201-
const int16 *inptr;
201+
int16 *inptr;
202202
int i, rv, nfr, ncep, frame_shift, frame_size;
203203
/* Should total 1024 :) */
204204
size_t fragments[] = {

tests/test_fe_float32.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -127,10 +127,10 @@ create_shifted(fe_t *fe, float32 *data, size_t nsamp)
127127
}
128128

129129
mfcc_t **
130-
create_full(fe_t *fe, const float32 *data, size_t nsamp)
130+
create_full(fe_t *fe, float32 *data, size_t nsamp)
131131
{
132132
mfcc_t **cepbuf;
133-
const float32 *inptr;
133+
float32 *inptr;
134134
int rv, nfr, ncep;
135135

136136
TEST_EQUAL(0, fe_start(fe));
@@ -157,10 +157,10 @@ create_full(fe_t *fe, const float32 *data, size_t nsamp)
157157
}
158158

159159
mfcc_t **
160-
create_process_frames(fe_t *fe, const float32 *data, size_t nsamp)
160+
create_process_frames(fe_t *fe, float32 *data, size_t nsamp)
161161
{
162162
mfcc_t **cepbuf;
163-
const float32 *inptr;
163+
float32 *inptr;
164164
int i, rv, nfr, ncep, frame_shift, frame_size;
165165

166166
fe_get_input_size(fe, &frame_shift, &frame_size);
@@ -197,10 +197,10 @@ create_process_frames(fe_t *fe, const float32 *data, size_t nsamp)
197197

198198

199199
mfcc_t **
200-
create_fragments(fe_t *fe, const float32 *data, size_t nsamp)
200+
create_fragments(fe_t *fe, float32 *data, size_t nsamp)
201201
{
202202
mfcc_t **cepbuf, **cepptr;
203-
const float32 *inptr;
203+
float32 *inptr;
204204
int i, rv, nfr, ncep, frame_shift, frame_size;
205205
/* Should total 1024 :) */
206206
size_t fragments[] = {
@@ -238,11 +238,11 @@ create_fragments(fe_t *fe, const float32 *data, size_t nsamp)
238238

239239

240240
mfcc_t **
241-
create_mixed_fragments(fe_t *fe, const float32 *data, const int16 *idata, size_t nsamp, int odd)
241+
create_mixed_fragments(fe_t *fe, float32 *data, int16 *idata, size_t nsamp, int odd)
242242
{
243243
mfcc_t **cepbuf, **cepptr;
244-
const float32 *inptr;
245-
const int16 *iinptr;
244+
float32 *inptr;
245+
int16 *iinptr;
246246
int i, rv, nfr, ncep, frame_shift, frame_size;
247247
/* Should total 1024 :) */
248248
size_t fragments[] = {

0 commit comments

Comments
 (0)