Skip to content

Commit c7d0ad8

Browse files
authored
Merge 7b1c22f into be58ca5
2 parents be58ca5 + 7b1c22f commit c7d0ad8

7 files changed

Lines changed: 595 additions & 7 deletions

File tree

nvdaHelper/espeak/sconscript

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ env.Append(
111111
"/DUSE_SPEECHPLAYER=1",
112112
"/DUSE_KLATT=1",
113113
"/DUSE_LIBSONIC=1",
114+
"/DSONIC_DLL",
114115
]
115116
)
116117

@@ -1034,10 +1035,13 @@ def espeak_compileDict_buildAction(
10341035
return ACTION_SUCCESS
10351036

10361037

1037-
sonicLib = env.StaticLibrary(
1038+
sonicLib = env.SharedLibrary(
10381039
target="sonic",
10391040
srcdir=sonicSrcDir.abspath,
1040-
source="sonic.c",
1041+
source=[
1042+
"sonic.c",
1043+
Dir(".").File("sonic.def"),
1044+
]
10411045
)
10421046

10431047
espeakLib = env.SharedLibrary(
@@ -1082,7 +1086,6 @@ espeakLib = env.SharedLibrary(
10821086
"tr_languages.c",
10831087
"voices.c",
10841088
"wavegen.c",
1085-
sonicLib,
10861089
# espeak OPT_SPEECHPLAYER block
10871090
"sPlayer.c",
10881091
"../speechPlayer/src/frame.cpp",
@@ -1100,7 +1103,8 @@ espeakLib = env.SharedLibrary(
11001103
# com\ttsengine.cpp
11011104
# We do not use the ASYNC compile option in espeak.
11021105
],
1103-
LIBS=["advapi32"],
1106+
LIBS=["advapi32", "sonic"],
1107+
LIBPATH='.',
11041108
)
11051109

11061110

@@ -1151,6 +1155,7 @@ for dictFileName, (langCode, inputFiles) in espeakDictionaryCompileList.items():
11511155
)
11521156

11531157
env.Install(synthDriversDir, espeakLib)
1158+
env.Install(synthDriversDir, sonicLib)
11541159

11551160
# install espeak-ng-data
11561161
targetEspeakDataDir = synthDriversDir.Dir("espeak-ng-data")

nvdaHelper/espeak/sonic.def

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
EXPORTS
2+
sonicCreateStream
3+
sonicDestroyStream
4+
sonicSetUserData
5+
sonicGetUserData
6+
sonicWriteFloatToStream
7+
sonicWriteShortToStream
8+
sonicWriteUnsignedCharToStream
9+
sonicReadFloatFromStream
10+
sonicReadShortFromStream
11+
sonicReadUnsignedCharFromStream
12+
sonicFlushStream
13+
sonicSamplesAvailable
14+
sonicGetSpeed
15+
sonicSetSpeed
16+
sonicGetPitch
17+
sonicSetPitch
18+
sonicGetRate
19+
sonicSetRate
20+
sonicGetVolume
21+
sonicSetVolume
22+
sonicGetChordPitch
23+
sonicSetChordPitch
24+
sonicGetQuality
25+
sonicSetQuality
26+
sonicGetSampleRate
27+
sonicSetSampleRate
28+
sonicGetNumChannels
29+
sonicSetNumChannels
30+
sonicChangeFloatSpeed
31+
sonicChangeShortSpeed

nvdaHelper/espeak/sonic.h

Lines changed: 306 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,306 @@
1+
// This is a modified version of the Sonic Library's header,
2+
// which added __declspec(dllimport) to each function
3+
// if SONIC_DLL is defined.
4+
// Used when compiling eSpeak dynamically linked to Sonic.
5+
6+
#ifndef SONIC_H_
7+
#define SONIC_H_
8+
9+
/* Sonic library
10+
Copyright 2010
11+
Bill Cox
12+
This file is part of the Sonic Library.
13+
14+
This file is licensed under the Apache 2.0 license.
15+
*/
16+
17+
/*
18+
The Sonic Library implements a new algorithm invented by Bill Cox for the
19+
specific purpose of speeding up speech by high factors at high quality. It
20+
generates smooth speech at speed up factors as high as 6X, possibly more. It is
21+
also capable of slowing down speech, and generates high quality results
22+
regardless of the speed up or slow down factor. For speeding up speech by 2X or
23+
more, the following equation is used:
24+
25+
newSamples = period/(speed - 1.0)
26+
scale = 1.0/newSamples;
27+
28+
where period is the current pitch period, determined using AMDF or any other
29+
pitch estimator, and speed is the speedup factor. If the current position in
30+
the input stream is pointed to by "samples", and the current output stream
31+
position is pointed to by "out", then newSamples number of samples can be
32+
generated with:
33+
34+
out[t] = (samples[t]*(newSamples - t) + samples[t + period]*t)/newSamples;
35+
36+
where t = 0 to newSamples - 1.
37+
38+
For speed factors < 2X, the PICOLA algorithm is used. The above
39+
algorithm is first used to double the speed of one pitch period. Then, enough
40+
input is directly copied from the input to the output to achieve the desired
41+
speed up factor, where 1.0 < speed < 2.0. The amount of data copied is derived:
42+
43+
speed = (2*period + length)/(period + length)
44+
speed*length + speed*period = 2*period + length
45+
length(speed - 1) = 2*period - speed*period
46+
length = period*(2 - speed)/(speed - 1)
47+
48+
For slowing down speech where 0.5 < speed < 1.0, a pitch period is inserted into
49+
the output twice, and length of input is copied from the input to the output
50+
until the output desired speed is reached. The length of data copied is:
51+
52+
length = period*(speed - 0.5)/(1 - speed)
53+
54+
For slow down factors below 0.5, no data is copied, and an algorithm
55+
similar to high speed factors is used.
56+
*/
57+
58+
/* Uncomment this to use sin-wav based overlap add which in theory can improve
59+
sound quality slightly, at the expense of lots of floating point math. */
60+
/* #define SONIC_USE_SIN */
61+
62+
#ifdef SONIC_DLL
63+
#ifdef SONIC_EXPORT
64+
#define SONIC_API __declspec(dllexport)
65+
#else
66+
#define SONIC_API __declspec(dllimport)
67+
#endif /* SONIC_EXPORT */
68+
#else
69+
#define SONIC_API
70+
#endif /* SONIC_DLL */
71+
72+
#ifdef __cplusplus
73+
extern "C" {
74+
#endif
75+
76+
#ifdef SONIC_INTERNAL
77+
/* The following #define's are used to change the names of the routines defined
78+
* here so that a new library (i.e. speedy) can reuse these names, and then call
79+
* the original names. We do this for two reasons: 1) we don't want to change
80+
* the original API, and 2) we want to add a shim, using the original names and
81+
* still call these routines.
82+
*
83+
* Original users of this API and the libsonic library need to do nothing. The
84+
* original behavior remains.
85+
*
86+
* A new user that add some additional functionality above this library (a shim)
87+
* should #define SONIC_INTERNAL before including this file, undefine all these
88+
* symbols and call the sonicIntXXX functions directly.
89+
*/
90+
#define sonicCreateStream sonicIntCreateStream
91+
#define sonicDestroyStream sonicIntDestroyStream
92+
#define sonicWriteFloatToStream sonicIntWriteFloatToStream
93+
#define sonicWriteShortToStream sonicIntWriteShortToStream
94+
#define sonicWriteUnsignedCharToStream sonicIntWriteUnsignedCharToStream
95+
#define sonicReadFloatFromStream sonicIntReadFloatFromStream
96+
#define sonicReadShortFromStream sonicIntReadShortFromStream
97+
#define sonicReadUnsignedCharFromStream sonicIntReadUnsignedCharFromStream
98+
#define sonicFlushStream sonicIntFlushStream
99+
#define sonicSamplesAvailable sonicIntSamplesAvailable
100+
#define sonicGetSpeed sonicIntGetSpeed
101+
#define sonicSetSpeed sonicIntSetSpeed
102+
#define sonicGetPitch sonicIntGetPitch
103+
#define sonicSetPitch sonicIntSetPitch
104+
#define sonicGetRate sonicIntGetRate
105+
#define sonicSetRate sonicIntSetRate
106+
#define sonicGetVolume sonicIntGetVolume
107+
#define sonicSetVolume sonicIntSetVolume
108+
#define sonicGetQuality sonicIntGetQuality
109+
#define sonicSetQuality sonicIntSetQuality
110+
#define sonicGetSampleRate sonicIntGetSampleRate
111+
#define sonicSetSampleRate sonicIntSetSampleRate
112+
#define sonicGetNumChannels sonicIntGetNumChannels
113+
#define sonicGetUserData sonicIntGetUserData
114+
#define sonicSetUserData sonicIntSetUserData
115+
#define sonicSetNumChannels sonicIntSetNumChannels
116+
#define sonicChangeFloatSpeed sonicIntChangeFloatSpeed
117+
#define sonicChangeShortSpeed sonicIntChangeShortSpeed
118+
#define sonicEnableNonlinearSpeedup sonicIntEnableNonlinearSpeedup
119+
#define sonicSetDurationFeedbackStrength sonicIntSetDurationFeedbackStrength
120+
#define sonicComputeSpectrogram sonicIntComputeSpectrogram
121+
#define sonicGetSpectrogram sonicIntGetSpectrogram
122+
123+
#endif /* SONIC_INTERNAL */
124+
125+
/* This specifies the range of voice pitches we try to match.
126+
Note that if we go lower than 65, we could overflow in findPitchInRange */
127+
#ifndef SONIC_MIN_PITCH
128+
#define SONIC_MIN_PITCH 65
129+
#endif /* SONIC_MIN_PITCH */
130+
#ifndef SONIC_MAX_PITCH
131+
#define SONIC_MAX_PITCH 400
132+
#endif /* SONIC_MAX_PITCH */
133+
134+
/* These are used to down-sample some inputs to improve speed */
135+
#define SONIC_AMDF_FREQ 4000
136+
137+
struct sonicStreamStruct;
138+
typedef struct sonicStreamStruct* sonicStream;
139+
140+
/* For all of the following functions, numChannels is multiplied by numSamples
141+
to determine the actual number of values read or returned. */
142+
143+
/* Create a sonic stream. Return NULL only if we are out of memory and cannot
144+
allocate the stream. Set numChannels to 1 for mono, and 2 for stereo. */
145+
SONIC_API sonicStream sonicCreateStream(int sampleRate, int numChannels);
146+
/* Destroy the sonic stream. */
147+
SONIC_API void sonicDestroyStream(sonicStream stream);
148+
/* Attach user data to the stream. */
149+
SONIC_API void sonicSetUserData(sonicStream stream, void *userData);
150+
/* Retrieve user data attached to the stream. */
151+
SONIC_API void *sonicGetUserData(sonicStream stream);
152+
/* Use this to write floating point data to be speed up or down into the stream.
153+
Values must be between -1 and 1. Return 0 if memory realloc failed,
154+
otherwise 1 */
155+
SONIC_API int sonicWriteFloatToStream(sonicStream stream, const float* samples, int numSamples);
156+
/* Use this to write 16-bit data to be speed up or down into the stream.
157+
Return 0 if memory realloc failed, otherwise 1 */
158+
SONIC_API int sonicWriteShortToStream(sonicStream stream, const short* samples, int numSamples);
159+
/* Use this to write 8-bit unsigned data to be speed up or down into the stream.
160+
Return 0 if memory realloc failed, otherwise 1 */
161+
SONIC_API int sonicWriteUnsignedCharToStream(sonicStream stream, const unsigned char* samples,
162+
int numSamples);
163+
/* Use this to read floating point data out of the stream. Sometimes no data
164+
will be available, and zero is returned, which is not an error condition. */
165+
SONIC_API int sonicReadFloatFromStream(sonicStream stream, float* samples,
166+
int maxSamples);
167+
/* Use this to read 16-bit data out of the stream. Sometimes no data will
168+
be available, and zero is returned, which is not an error condition. */
169+
SONIC_API int sonicReadShortFromStream(sonicStream stream, short* samples,
170+
int maxSamples);
171+
/* Use this to read 8-bit unsigned data out of the stream. Sometimes no data
172+
will be available, and zero is returned, which is not an error condition. */
173+
SONIC_API int sonicReadUnsignedCharFromStream(sonicStream stream, unsigned char* samples,
174+
int maxSamples);
175+
/* Force the sonic stream to generate output using whatever data it currently
176+
has. No extra delay will be added to the output, but flushing in the middle
177+
of words could introduce distortion. */
178+
SONIC_API int sonicFlushStream(sonicStream stream);
179+
/* Return the number of samples in the output buffer */
180+
SONIC_API int sonicSamplesAvailable(sonicStream stream);
181+
/* Get the speed of the stream. */
182+
SONIC_API float sonicGetSpeed(sonicStream stream);
183+
/* Set the speed of the stream. */
184+
SONIC_API void sonicSetSpeed(sonicStream stream, float speed);
185+
/* Get the pitch of the stream. */
186+
SONIC_API float sonicGetPitch(sonicStream stream);
187+
/* Set the pitch of the stream. */
188+
SONIC_API void sonicSetPitch(sonicStream stream, float pitch);
189+
/* Get the rate of the stream. */
190+
SONIC_API float sonicGetRate(sonicStream stream);
191+
/* Set the rate of the stream. */
192+
SONIC_API void sonicSetRate(sonicStream stream, float rate);
193+
/* Get the scaling factor of the stream. */
194+
SONIC_API float sonicGetVolume(sonicStream stream);
195+
/* Set the scaling factor of the stream. */
196+
SONIC_API void sonicSetVolume(sonicStream stream, float volume);
197+
/* Chord pitch is DEPRECATED. AFAIK, it was never used by anyone. These
198+
functions still exist to avoid breaking existing code. */
199+
/* Get the chord pitch setting. */
200+
SONIC_API int sonicGetChordPitch(sonicStream stream);
201+
/* Set chord pitch mode on or off. Default is off. See the documentation
202+
page for a description of this feature. */
203+
SONIC_API void sonicSetChordPitch(sonicStream stream, int useChordPitch);
204+
/* Get the quality setting. */
205+
SONIC_API int sonicGetQuality(sonicStream stream);
206+
/* Set the "quality". Default 0 is virtually as good as 1, but very much
207+
* faster. */
208+
SONIC_API void sonicSetQuality(sonicStream stream, int quality);
209+
/* Get the sample rate of the stream. */
210+
SONIC_API int sonicGetSampleRate(sonicStream stream);
211+
/* Set the sample rate of the stream. This will drop any samples that have not
212+
* been read. */
213+
SONIC_API void sonicSetSampleRate(sonicStream stream, int sampleRate);
214+
/* Get the number of channels. */
215+
SONIC_API int sonicGetNumChannels(sonicStream stream);
216+
/* Set the number of channels. This will drop any samples that have not been
217+
* read. */
218+
SONIC_API void sonicSetNumChannels(sonicStream stream, int numChannels);
219+
/* This is a non-stream oriented interface to just change the speed of a sound
220+
sample. It works in-place on the sample array, so there must be at least
221+
speed*numSamples available space in the array. Returns the new number of
222+
samples. */
223+
SONIC_API int sonicChangeFloatSpeed(float* samples, int numSamples, float speed,
224+
float pitch, float rate, float volume,
225+
int useChordPitch, int sampleRate, int numChannels);
226+
/* This is a non-stream oriented interface to just change the speed of a sound
227+
sample. It works in-place on the sample array, so there must be at least
228+
speed*numSamples available space in the array. Returns the new number of
229+
samples. */
230+
SONIC_API int sonicChangeShortSpeed(short* samples, int numSamples, float speed,
231+
float pitch, float rate, float volume,
232+
int useChordPitch, int sampleRate, int numChannels);
233+
234+
#ifdef SONIC_SPECTROGRAM
235+
/*
236+
This code generates high quality spectrograms from sound samples, using
237+
Time-Aliased-FFTs as described at:
238+
239+
https://github.com/waywardgeek/spectrogram
240+
241+
Basically, two adjacent pitch periods are overlap-added to create a sound
242+
sample that accurately represents the speech sound at that moment in time.
243+
This set of samples is converted to a spetral line using an FFT, and the result
244+
is saved as a single spectral line at that moment in time. The resulting
245+
spectral lines vary in resolution (it is equal to the number of samples in the
246+
pitch period), and the spacing of spectral lines also varies (proportional to
247+
the numver of samples in the pitch period).
248+
249+
To generate a bitmap, linear interpolation is used to render the grayscale
250+
value at any particular point in time and frequency.
251+
*/
252+
253+
#define SONIC_MAX_SPECTRUM_FREQ 5000
254+
255+
struct sonicSpectrogramStruct;
256+
struct sonicBitmapStruct;
257+
typedef struct sonicSpectrogramStruct* sonicSpectrogram;
258+
typedef struct sonicBitmapStruct* sonicBitmap;
259+
260+
/* sonicBitmap objects represent spectrograms as grayscale bitmaps where each
261+
pixel is from 0 (black) to 255 (white). Bitmaps are rows*cols in size.
262+
Rows are indexed top to bottom and columns are indexed left to right */
263+
struct sonicBitmapStruct {
264+
unsigned char* data;
265+
int numRows;
266+
int numCols;
267+
};
268+
269+
typedef struct sonicBitmapStruct* sonicBitmap;
270+
271+
/* Enable coomputation of a spectrogram on the fly. */
272+
SONIC_API void sonicComputeSpectrogram(sonicStream stream);
273+
274+
/* Get the spectrogram. */
275+
SONIC_API sonicSpectrogram sonicGetSpectrogram(sonicStream stream);
276+
277+
/* Create an empty spectrogram. Called automatically if sonicComputeSpectrogram
278+
has been called. */
279+
SONIC_API sonicSpectrogram sonicCreateSpectrogram(int sampleRate);
280+
281+
/* Destroy the spectrotram. This is called automatically when calling
282+
sonicDestroyStream. */
283+
SONIC_API void sonicDestroySpectrogram(sonicSpectrogram spectrogram);
284+
285+
/* Convert the spectrogram to a bitmap. Caller must destroy bitmap when done. */
286+
SONIC_API sonicBitmap sonicConvertSpectrogramToBitmap(sonicSpectrogram spectrogram,
287+
int numRows, int numCols);
288+
289+
/* Destroy a bitmap returned by sonicConvertSpectrogramToBitmap. */
290+
SONIC_API void sonicDestroyBitmap(sonicBitmap bitmap);
291+
292+
SONIC_API int sonicWritePGM(sonicBitmap bitmap, char* fileName);
293+
294+
/* Add two pitch periods worth of samples to the spectrogram. There must be
295+
2*period samples. Time should advance one pitch period for each call to
296+
this function. */
297+
SONIC_API void sonicAddPitchPeriodToSpectrogram(sonicSpectrogram spectrogram,
298+
short* samples, int numSamples,
299+
int numChannels);
300+
#endif /* SONIC_SPECTROGRAM */
301+
302+
#ifdef __cplusplus
303+
}
304+
#endif
305+
306+
#endif /* SONIC_H_ */

0 commit comments

Comments
 (0)