Skip to content

Commit ead27e7

Browse files
authored
Merge pull request #27 from ReadAlongs/merge_pocketsphinx5
Merge new features from PocketSphinx 5 (and simplify things)
2 parents f895cc2 + ba81964 commit ead27e7

File tree

208 files changed

+19655
-9336
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

208 files changed

+19655
-9336
lines changed

.github/workflows/tests.yml

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,18 @@ jobs:
88
steps:
99
- name: Checkout
1010
uses: actions/checkout@v3
11+
- name: Install
12+
run: |
13+
sudo apt-get install sox ninja-build
1114
- name: Build
1215
run: |
1316
mkdir build
14-
(cd build && cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=install ..)
15-
(cd build && make)
17+
cmake -S . -B build -GNinja -DCMAKE_BUILD_TYPE=Debug
18+
cmake --build build
1619
1720
- name: Run tests
1821
run: |
19-
(cd build && make test)
22+
CTEST_OUTPUT_ON_FAILURE=1 cmake --build build --target check
2023
pytest:
2124
runs-on: ubuntu-latest
2225
steps:
@@ -37,7 +40,10 @@ jobs:
3740
uses: actions/checkout@v3
3841
- name: Build
3942
run: |
40-
(cd js && npm install && npm run build:prod)
43+
emcmake cmake -S . -B jsbuild
44+
cmake --build jsbuild
4145
- name: Run tests
4246
run: |
43-
(cd js && npm test)
47+
(cd jsbuild && npm install --also=dev)
48+
(cd jsbuild && npm test)
49+
(cd jsbuild && npm run tstest)

CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@ include(CheckSymbolExists)
44
include(CheckLibraryExists)
55
include(TestBigEndian)
66

7-
project(soundswallower VERSION 0.3.2
7+
project(soundswallower VERSION 0.4.0
88
DESCRIPTION "An even smaller speech recognizer")
99

1010
if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
1111
include(CTest)
12-
enable_testing()
12+
add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND})
1313
endif()
1414

1515
CHECK_INCLUDE_FILE(unistd.h HAVE_UNISTD_H)
@@ -57,7 +57,7 @@ elseif(SKBUILD)
5757
add_subdirectory(py)
5858
else()
5959
# C shared library build
60-
option(BUILD_SHARED_LIBS "Build using shared libraries" ON)
60+
option(BUILD_SHARED_LIBS "Build using shared libraries" OFF)
6161
# Build the core library source (needs to go after BUILD_SHARED_LIBS
6262
# option because CMake is magically stupid)
6363
add_subdirectory(src)

LICENSE

Lines changed: 105 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -29,24 +29,108 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2929
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
3030
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3131

32-
The JavaScript audio and web worker code is from pocketsphinx.js and is:
33-
Copyright © 2013-2017 Sylvain Chevalier
34-
35-
Permission is hereby granted, free of charge, to any person obtaining
36-
a copy of this software and associated documentation files (the
37-
"Software"), to deal in the Software without restriction, including
38-
without limitation the rights to use, copy, modify, merge, publish,
39-
distribute, sublicense, and/or sell copies of the Software, and to
40-
permit persons to whom the Software is furnished to do so, subject to
41-
the following conditions:
42-
43-
The above copyright notice and this permission notice shall be
44-
included in all copies or substantial portions of the Software.
45-
46-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
47-
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
48-
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
49-
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
50-
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
51-
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
52-
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32+
WebRTC VAD code (in src/vad):
33+
34+
Copyright (c) 2011, The WebRTC project authors. All rights reserved.
35+
36+
Redistribution and use in source and binary forms, with or without
37+
modification, are permitted provided that the following conditions are
38+
met:
39+
40+
* Redistributions of source code must retain the above copyright
41+
notice, this list of conditions and the following disclaimer.
42+
43+
* Redistributions in binary form must reproduce the above copyright
44+
notice, this list of conditions and the following disclaimer in
45+
the documentation and/or other materials provided with the
46+
distribution.
47+
48+
* Neither the name of Google nor the names of its contributors may
49+
be used to endorse or promote products derived from this software
50+
without specific prior written permission.
51+
52+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
53+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
54+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
55+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
56+
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
57+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
58+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
59+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
60+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
61+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
62+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
63+
64+
Python WebRTC VAD code and test files (in py and test/data/vad):
65+
66+
The MIT License (MIT)
67+
68+
Copyright (c) 2016 John Wiseman
69+
70+
Permission is hereby granted, free of charge, to any person obtaining a copy
71+
of this software and associated documentation files (the "Software"), to deal
72+
in the Software without restriction, including without limitation the rights
73+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
74+
copies of the Software, and to permit persons to whom the Software is
75+
furnished to do so, subject to the following conditions:
76+
77+
The above copyright notice and this permission notice shall be included in all
78+
copies or substantial portions of the Software.
79+
80+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
81+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
82+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
83+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
84+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
85+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
86+
SOFTWARE.
87+
88+
JSON parser (in src/jsmn.h):
89+
90+
Copyright (c) 2010 Serge A. Zaitsev
91+
92+
Permission is hereby granted, free of charge, to any person obtaining a copy
93+
of this software and associated documentation files (the "Software"), to deal
94+
in the Software without restriction, including without limitation the rights
95+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
96+
copies of the Software, and to permit persons to whom the Software is
97+
furnished to do so, subject to the following conditions:
98+
99+
The above copyright notice and this permission notice shall be included in
100+
all copies or substantial portions of the Software.
101+
102+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
103+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
104+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
105+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
106+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
107+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
108+
THE SOFTWARE.
109+
110+
Escaping code in JSON serialization (src/config.c):
111+
112+
Copyright (C) 2014 James McLaughlin. All rights reserved.
113+
https://github.com/udp/json-builder
114+
115+
Redistribution and use in source and binary forms, with or without
116+
modification, are permitted provided that the following conditions
117+
are met:
118+
119+
1. Redistributions of source code must retain the above copyright
120+
notice, this list of conditions and the following disclaimer.
121+
122+
2. Redistributions in binary form must reproduce the above copyright
123+
notice, this list of conditions and the following disclaimer in the
124+
documentation and/or other materials provided with the distribution.
125+
126+
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
127+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
128+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
129+
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
130+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
131+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
132+
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
133+
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
134+
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
135+
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
136+
SUCH DAMAGE.

README.manylinux.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@ Building binary distributions for Linux
22
---------------------------------------
33

44
To build distributions that are compatible with all the various Linux
5-
distributions, and can therefore be uploaded to PyPI, you can use the
6-
Docker images provided by the [manylinux
5+
distributions, and can therefore be uploaded to PyPI, we now use
6+
[cibuildwheel](https://pypi.org/project/cibuildwheel/). But also, you
7+
can use the Docker images provided by the [manylinux
78
project](https://github.com/pypa/manylinux).
89

910
The full sequence of commands to create Linux wheels for Python 3.7

README.md

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ useful speech technologies.
1313

1414
With that in mind the current version is limited to finite-state
1515
grammar recognition. In addition, the eternally problematic and
16-
badly-designed audio library as well as all other external
16+
badly-designed audio library as well as (almost) all other external
1717
dependencies have been removed.
1818

1919
Compiling SoundSwallower
@@ -22,12 +22,10 @@ Compiling SoundSwallower
2222
Currently SoundSwallower can be built in several different ways. To
2323
build the C shared library, run CMake in the standard way:
2424

25-
mkdir build
26-
cd build
27-
cmake ..
28-
make
29-
make test
30-
make install
25+
cmake -S . -B build
26+
cmake --build build
27+
cmake --build build --target check
28+
sudo cmake --build --target install
3129

3230
Note that this isn't terribly useful as there is no command-line
3331
frontend. You probably want to target JavaScript or Python.
@@ -62,27 +60,54 @@ alignment for one or more input files, for example:
6260
Note that multiple input files are not particularly useful for
6361
`--align` or `--align-text` as they will simply (try to) align the
6462
same text to each file. The output results (a list of time-aligned
65-
words) can be written to a JSON file with `--output`.
63+
words) can be written to a JSON file with `--output`. To obtain
64+
phoneme-level alignments, add the `--phone-align` flag. The JSON
65+
format (which has recently changed) is the same as used in
66+
[PocketSphinx 5.0](https://github.com/cmusphinx/pocketsphinx) and is
67+
more compact than it is readable, but briefly, it consists of one
68+
dictionary (or "object" in JavaScript-ese) per line, where the `t`
69+
attribute is the recognized text and the `w` attribute contains a list
70+
of word segmentations, with start time in `b` and duration in `d` and,
71+
optionally, a list of phone segmentations in the `w` attribute with
72+
the same format.
6673

6774
See also the [full documentation of the Python
6875
API](https://soundswallower.readthedocs.io/en/latest/soundswallower.html).
6976

7077
Compiling to JavaScript/WebAssembly
7178
-----------------------------------
7279

80+
To use the JavaScript library in your projects:
81+
82+
npm install soundswallower
83+
7384
To build the JavaScript library, use CMake with
7485
[Emscripten](https://emscripten.org/):
7586

76-
cd js
77-
emcmake cmake ..
78-
emmake make
87+
emcmake cmake -S . -B jsbuild
88+
cmake --build jsbuild
7989

80-
This will create `js/soundswallower.js` and `js/soundswallower.wasm`
81-
in the `jsbuild` directory, which you can then include in your
82-
projects. Demo applications can be seen at
90+
This will create `soundswallower.js` and `soundswallower.wasm` in the
91+
`jsbuild` directory, which you can then include in your projects. You
92+
can also use `npm link` to link it to your `node_modules` folder for
93+
development Demo applications can be seen at
8394
https://github.com/dhdaines/alignment-demo and
8495
https://github.com/dhdaines/soundswallower-demo.
8596

97+
To run the JavaScript tests:
98+
99+
cd jsbuild
100+
npm install
101+
npm test
102+
npx tsc
103+
node test_typescript.js
104+
105+
And in the browser:
106+
107+
cd jsbuild
108+
python server.py
109+
# Navigate to http://localhost:8000/test_web.html
110+
86111
For more details on the JavaScript implementation and API, see
87112
[js/README.js](https://github.com/ReadAlongs/SoundSwallower/blob/master/js/README.md).
88113

TODO.md

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,23 @@
11
Roadmap:
22

3-
- 0.4.0: Merge PocketSphinx 5.0.0
4-
- VAD/Endpointer
5-
- Two-pass alignment (but easier)
6-
- Config updates, but smaller (no s3kr3t command-line, etc),
7-
actually we were mostly there already
8-
- JSGF correctness
9-
- JSON decoding/alignment output
3+
- 1.0: Finalize API
4+
- ES6 module (possibly separate for Node vs. Web)
5+
- Optimize JSGF compiler
106
- Support IPA dictionaries
11-
- ES6 module
12-
13-
- 1.0.0: Update API
14-
- Clearly define use cases
7+
- Improve Endpointer/VAD
8+
- Clearly define use cases and restructure API for them
159
- Live: feed data asynchronously, check results synchronously or emit events
1610
- *recording* is real-time in a separate thread
11+
- VAD/Endpointing should be done in the recording thread
12+
- Feature extraction could be done there but this is not necessary
1713
- *decoding* is not real-time, can be decomposed to microtasks
18-
- Single: pass data with promise of result, emit progress events
19-
- Remove remaining PocketSphinx API junk if any
14+
- Single/Batch: pass data with promise of result, emit progress events
15+
- Good fit for async
2016
- Easier support for observable/event type uses
2117
- async/await/promise is actually not a great fit
2218
- Support web audio formats directly
23-
- Change ownership semantics to fit use cases
2419
- Better solution for float vs. int in front-end
2520

26-
- 2.0.0: Improved modeling
27-
- DNN acoustic models?
28-
- WFST search?
21+
- 2.0: Improved modeling
22+
- WFST search
23+
- DNN acoustic models

build_wheels.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/bin/sh
22

33
set -e
4-
VERSION=0.3.2
4+
VERSION=0.4.0
55
U=$(id -u)
66
G=$(id -g)
77

config.h.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#cmakedefine HAVE_UNISTD_H
2+
#cmakedefine HAVE_STDINT_H
23
#cmakedefine HAVE_SYS_TYPES_H
34
#cmakedefine HAVE_SYS_STAT_H
45
#cmakedefine HAVE_SNPRINTF

include/soundswallower.h

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/* -*- c-basic-offset:4; indent-tabs-mode: nil -*- */
2+
/* ====================================================================
3+
* Copyright (c) 2022 David Huggins-Daines. All rights reserved.
4+
*
5+
* Redistribution and use in source and binary forms, with or without
6+
* modification, are permitted provided that the following conditions
7+
* are met:
8+
*
9+
* 1. Redistributions of source code must retain the above copyright
10+
* notice, this list of conditions and the following disclaimer.
11+
*
12+
* 2. Redistributions in binary form must reproduce the above copyright
13+
* notice, this list of conditions and the following disclaimer in
14+
* the documentation and/or other materials provided with the
15+
* distribution.
16+
*
17+
*
18+
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESSED
19+
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20+
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21+
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR NOR ITS EMPLOYEES BE
22+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
24+
* OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25+
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26+
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
28+
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29+
* DAMAGE.
30+
*
31+
* ====================================================================
32+
*/
33+
#ifndef __SOUNDSWALLOWER_H__
34+
#define __SOUNDSWALLOWER_H__
35+
36+
#ifdef __cplusplus
37+
extern "C" {
38+
#endif
39+
#if 0
40+
}
41+
#endif
42+
43+
/* Not much here for the moment! */
44+
#include <soundswallower/decoder.h>
45+
46+
#ifdef __cplusplus
47+
} /* extern "C" */
48+
#endif
49+
50+
#endif /* __SOUNDSWALLOWER_H__ */

0 commit comments

Comments
 (0)