|
| 1 | +// This is a modified version of the Sonic Library's header, |
| 2 | +// which added __declspec(dllimport) to each function |
| 3 | +// if SONIC_DLL is defined. |
| 4 | +// Used when compiling eSpeak dynamically linked to Sonic. |
| 5 | + |
| 6 | +#ifndef SONIC_H_ |
| 7 | +#define SONIC_H_ |
| 8 | + |
| 9 | +/* Sonic library |
| 10 | + Copyright 2010 |
| 11 | + Bill Cox |
| 12 | + This file is part of the Sonic Library. |
| 13 | +
|
| 14 | + This file is licensed under the Apache 2.0 license. |
| 15 | +*/ |
| 16 | + |
| 17 | +/* |
| 18 | +The Sonic Library implements a new algorithm invented by Bill Cox for the |
| 19 | +specific purpose of speeding up speech by high factors at high quality. It |
| 20 | +generates smooth speech at speed up factors as high as 6X, possibly more. It is |
| 21 | +also capable of slowing down speech, and generates high quality results |
| 22 | +regardless of the speed up or slow down factor. For speeding up speech by 2X or |
| 23 | +more, the following equation is used: |
| 24 | +
|
| 25 | + newSamples = period/(speed - 1.0) |
| 26 | + scale = 1.0/newSamples; |
| 27 | +
|
| 28 | +where period is the current pitch period, determined using AMDF or any other |
| 29 | +pitch estimator, and speed is the speedup factor. If the current position in |
| 30 | +the input stream is pointed to by "samples", and the current output stream |
| 31 | +position is pointed to by "out", then newSamples number of samples can be |
| 32 | +generated with: |
| 33 | +
|
| 34 | + out[t] = (samples[t]*(newSamples - t) + samples[t + period]*t)/newSamples; |
| 35 | +
|
| 36 | +where t = 0 to newSamples - 1. |
| 37 | +
|
| 38 | +For speed factors < 2X, the PICOLA algorithm is used. The above |
| 39 | +algorithm is first used to double the speed of one pitch period. Then, enough |
| 40 | +input is directly copied from the input to the output to achieve the desired |
| 41 | +speed up factor, where 1.0 < speed < 2.0. The amount of data copied is derived: |
| 42 | +
|
| 43 | + speed = (2*period + length)/(period + length) |
| 44 | + speed*length + speed*period = 2*period + length |
| 45 | + length(speed - 1) = 2*period - speed*period |
| 46 | + length = period*(2 - speed)/(speed - 1) |
| 47 | +
|
| 48 | +For slowing down speech where 0.5 < speed < 1.0, a pitch period is inserted into |
| 49 | +the output twice, and length of input is copied from the input to the output |
| 50 | +until the output desired speed is reached. The length of data copied is: |
| 51 | +
|
| 52 | + length = period*(speed - 0.5)/(1 - speed) |
| 53 | +
|
| 54 | +For slow down factors below 0.5, no data is copied, and an algorithm |
| 55 | +similar to high speed factors is used. |
| 56 | +*/ |
| 57 | + |
| 58 | +/* Uncomment this to use sin-wav based overlap add which in theory can improve |
| 59 | + sound quality slightly, at the expense of lots of floating point math. */ |
| 60 | +/* #define SONIC_USE_SIN */ |
| 61 | + |
| 62 | +#ifdef SONIC_DLL |
| 63 | +#ifdef SONIC_EXPORT |
| 64 | +#define SONIC_API __declspec(dllexport) |
| 65 | +#else |
| 66 | +#define SONIC_API __declspec(dllimport) |
| 67 | +#endif /* SONIC_EXPORT */ |
| 68 | +#else |
| 69 | +#define SONIC_API |
| 70 | +#endif /* SONIC_DLL */ |
| 71 | + |
| 72 | +#ifdef __cplusplus |
| 73 | +extern "C" { |
| 74 | +#endif |
| 75 | + |
| 76 | +#ifdef SONIC_INTERNAL |
| 77 | +/* The following #define's are used to change the names of the routines defined |
| 78 | + * here so that a new library (i.e. speedy) can reuse these names, and then call |
| 79 | + * the original names. We do this for two reasons: 1) we don't want to change |
| 80 | + * the original API, and 2) we want to add a shim, using the original names and |
| 81 | + * still call these routines. |
| 82 | + * |
| 83 | + * Original users of this API and the libsonic library need to do nothing. The |
| 84 | + * original behavior remains. |
| 85 | + * |
| 86 | + * A new user that add some additional functionality above this library (a shim) |
| 87 | + * should #define SONIC_INTERNAL before including this file, undefine all these |
| 88 | + * symbols and call the sonicIntXXX functions directly. |
| 89 | + */ |
| 90 | +#define sonicCreateStream sonicIntCreateStream |
| 91 | +#define sonicDestroyStream sonicIntDestroyStream |
| 92 | +#define sonicWriteFloatToStream sonicIntWriteFloatToStream |
| 93 | +#define sonicWriteShortToStream sonicIntWriteShortToStream |
| 94 | +#define sonicWriteUnsignedCharToStream sonicIntWriteUnsignedCharToStream |
| 95 | +#define sonicReadFloatFromStream sonicIntReadFloatFromStream |
| 96 | +#define sonicReadShortFromStream sonicIntReadShortFromStream |
| 97 | +#define sonicReadUnsignedCharFromStream sonicIntReadUnsignedCharFromStream |
| 98 | +#define sonicFlushStream sonicIntFlushStream |
| 99 | +#define sonicSamplesAvailable sonicIntSamplesAvailable |
| 100 | +#define sonicGetSpeed sonicIntGetSpeed |
| 101 | +#define sonicSetSpeed sonicIntSetSpeed |
| 102 | +#define sonicGetPitch sonicIntGetPitch |
| 103 | +#define sonicSetPitch sonicIntSetPitch |
| 104 | +#define sonicGetRate sonicIntGetRate |
| 105 | +#define sonicSetRate sonicIntSetRate |
| 106 | +#define sonicGetVolume sonicIntGetVolume |
| 107 | +#define sonicSetVolume sonicIntSetVolume |
| 108 | +#define sonicGetQuality sonicIntGetQuality |
| 109 | +#define sonicSetQuality sonicIntSetQuality |
| 110 | +#define sonicGetSampleRate sonicIntGetSampleRate |
| 111 | +#define sonicSetSampleRate sonicIntSetSampleRate |
| 112 | +#define sonicGetNumChannels sonicIntGetNumChannels |
| 113 | +#define sonicGetUserData sonicIntGetUserData |
| 114 | +#define sonicSetUserData sonicIntSetUserData |
| 115 | +#define sonicSetNumChannels sonicIntSetNumChannels |
| 116 | +#define sonicChangeFloatSpeed sonicIntChangeFloatSpeed |
| 117 | +#define sonicChangeShortSpeed sonicIntChangeShortSpeed |
| 118 | +#define sonicEnableNonlinearSpeedup sonicIntEnableNonlinearSpeedup |
| 119 | +#define sonicSetDurationFeedbackStrength sonicIntSetDurationFeedbackStrength |
| 120 | +#define sonicComputeSpectrogram sonicIntComputeSpectrogram |
| 121 | +#define sonicGetSpectrogram sonicIntGetSpectrogram |
| 122 | + |
| 123 | +#endif /* SONIC_INTERNAL */ |
| 124 | + |
| 125 | +/* This specifies the range of voice pitches we try to match. |
| 126 | + Note that if we go lower than 65, we could overflow in findPitchInRange */ |
| 127 | +#ifndef SONIC_MIN_PITCH |
| 128 | +#define SONIC_MIN_PITCH 65 |
| 129 | +#endif /* SONIC_MIN_PITCH */ |
| 130 | +#ifndef SONIC_MAX_PITCH |
| 131 | +#define SONIC_MAX_PITCH 400 |
| 132 | +#endif /* SONIC_MAX_PITCH */ |
| 133 | + |
| 134 | +/* These are used to down-sample some inputs to improve speed */ |
| 135 | +#define SONIC_AMDF_FREQ 4000 |
| 136 | + |
| 137 | +struct sonicStreamStruct; |
| 138 | +typedef struct sonicStreamStruct* sonicStream; |
| 139 | + |
| 140 | +/* For all of the following functions, numChannels is multiplied by numSamples |
| 141 | + to determine the actual number of values read or returned. */ |
| 142 | + |
| 143 | +/* Create a sonic stream. Return NULL only if we are out of memory and cannot |
| 144 | + allocate the stream. Set numChannels to 1 for mono, and 2 for stereo. */ |
| 145 | +SONIC_API sonicStream sonicCreateStream(int sampleRate, int numChannels); |
| 146 | +/* Destroy the sonic stream. */ |
| 147 | +SONIC_API void sonicDestroyStream(sonicStream stream); |
| 148 | +/* Attach user data to the stream. */ |
| 149 | +SONIC_API void sonicSetUserData(sonicStream stream, void *userData); |
| 150 | +/* Retrieve user data attached to the stream. */ |
| 151 | +SONIC_API void *sonicGetUserData(sonicStream stream); |
| 152 | +/* Use this to write floating point data to be speed up or down into the stream. |
| 153 | + Values must be between -1 and 1. Return 0 if memory realloc failed, |
| 154 | + otherwise 1 */ |
| 155 | +SONIC_API int sonicWriteFloatToStream(sonicStream stream, const float* samples, int numSamples); |
| 156 | +/* Use this to write 16-bit data to be speed up or down into the stream. |
| 157 | + Return 0 if memory realloc failed, otherwise 1 */ |
| 158 | +SONIC_API int sonicWriteShortToStream(sonicStream stream, const short* samples, int numSamples); |
| 159 | +/* Use this to write 8-bit unsigned data to be speed up or down into the stream. |
| 160 | + Return 0 if memory realloc failed, otherwise 1 */ |
| 161 | +SONIC_API int sonicWriteUnsignedCharToStream(sonicStream stream, const unsigned char* samples, |
| 162 | + int numSamples); |
| 163 | +/* Use this to read floating point data out of the stream. Sometimes no data |
| 164 | + will be available, and zero is returned, which is not an error condition. */ |
| 165 | +SONIC_API int sonicReadFloatFromStream(sonicStream stream, float* samples, |
| 166 | + int maxSamples); |
| 167 | +/* Use this to read 16-bit data out of the stream. Sometimes no data will |
| 168 | + be available, and zero is returned, which is not an error condition. */ |
| 169 | +SONIC_API int sonicReadShortFromStream(sonicStream stream, short* samples, |
| 170 | + int maxSamples); |
| 171 | +/* Use this to read 8-bit unsigned data out of the stream. Sometimes no data |
| 172 | + will be available, and zero is returned, which is not an error condition. */ |
| 173 | +SONIC_API int sonicReadUnsignedCharFromStream(sonicStream stream, unsigned char* samples, |
| 174 | + int maxSamples); |
| 175 | +/* Force the sonic stream to generate output using whatever data it currently |
| 176 | + has. No extra delay will be added to the output, but flushing in the middle |
| 177 | + of words could introduce distortion. */ |
| 178 | +SONIC_API int sonicFlushStream(sonicStream stream); |
| 179 | +/* Return the number of samples in the output buffer */ |
| 180 | +SONIC_API int sonicSamplesAvailable(sonicStream stream); |
| 181 | +/* Get the speed of the stream. */ |
| 182 | +SONIC_API float sonicGetSpeed(sonicStream stream); |
| 183 | +/* Set the speed of the stream. */ |
| 184 | +SONIC_API void sonicSetSpeed(sonicStream stream, float speed); |
| 185 | +/* Get the pitch of the stream. */ |
| 186 | +SONIC_API float sonicGetPitch(sonicStream stream); |
| 187 | +/* Set the pitch of the stream. */ |
| 188 | +SONIC_API void sonicSetPitch(sonicStream stream, float pitch); |
| 189 | +/* Get the rate of the stream. */ |
| 190 | +SONIC_API float sonicGetRate(sonicStream stream); |
| 191 | +/* Set the rate of the stream. */ |
| 192 | +SONIC_API void sonicSetRate(sonicStream stream, float rate); |
| 193 | +/* Get the scaling factor of the stream. */ |
| 194 | +SONIC_API float sonicGetVolume(sonicStream stream); |
| 195 | +/* Set the scaling factor of the stream. */ |
| 196 | +SONIC_API void sonicSetVolume(sonicStream stream, float volume); |
| 197 | +/* Chord pitch is DEPRECATED. AFAIK, it was never used by anyone. These |
| 198 | + functions still exist to avoid breaking existing code. */ |
| 199 | +/* Get the chord pitch setting. */ |
| 200 | +SONIC_API int sonicGetChordPitch(sonicStream stream); |
| 201 | +/* Set chord pitch mode on or off. Default is off. See the documentation |
| 202 | + page for a description of this feature. */ |
| 203 | +SONIC_API void sonicSetChordPitch(sonicStream stream, int useChordPitch); |
| 204 | +/* Get the quality setting. */ |
| 205 | +SONIC_API int sonicGetQuality(sonicStream stream); |
| 206 | +/* Set the "quality". Default 0 is virtually as good as 1, but very much |
| 207 | + * faster. */ |
| 208 | +SONIC_API void sonicSetQuality(sonicStream stream, int quality); |
| 209 | +/* Get the sample rate of the stream. */ |
| 210 | +SONIC_API int sonicGetSampleRate(sonicStream stream); |
| 211 | +/* Set the sample rate of the stream. This will drop any samples that have not |
| 212 | + * been read. */ |
| 213 | +SONIC_API void sonicSetSampleRate(sonicStream stream, int sampleRate); |
| 214 | +/* Get the number of channels. */ |
| 215 | +SONIC_API int sonicGetNumChannels(sonicStream stream); |
| 216 | +/* Set the number of channels. This will drop any samples that have not been |
| 217 | + * read. */ |
| 218 | +SONIC_API void sonicSetNumChannels(sonicStream stream, int numChannels); |
| 219 | +/* This is a non-stream oriented interface to just change the speed of a sound |
| 220 | + sample. It works in-place on the sample array, so there must be at least |
| 221 | + speed*numSamples available space in the array. Returns the new number of |
| 222 | + samples. */ |
| 223 | +SONIC_API int sonicChangeFloatSpeed(float* samples, int numSamples, float speed, |
| 224 | + float pitch, float rate, float volume, |
| 225 | + int useChordPitch, int sampleRate, int numChannels); |
| 226 | +/* This is a non-stream oriented interface to just change the speed of a sound |
| 227 | + sample. It works in-place on the sample array, so there must be at least |
| 228 | + speed*numSamples available space in the array. Returns the new number of |
| 229 | + samples. */ |
| 230 | +SONIC_API int sonicChangeShortSpeed(short* samples, int numSamples, float speed, |
| 231 | + float pitch, float rate, float volume, |
| 232 | + int useChordPitch, int sampleRate, int numChannels); |
| 233 | + |
| 234 | +#ifdef SONIC_SPECTROGRAM |
| 235 | +/* |
| 236 | +This code generates high quality spectrograms from sound samples, using |
| 237 | +Time-Aliased-FFTs as described at: |
| 238 | +
|
| 239 | + https://github.com/waywardgeek/spectrogram |
| 240 | +
|
| 241 | +Basically, two adjacent pitch periods are overlap-added to create a sound |
| 242 | +sample that accurately represents the speech sound at that moment in time. |
| 243 | +This set of samples is converted to a spetral line using an FFT, and the result |
| 244 | +is saved as a single spectral line at that moment in time. The resulting |
| 245 | +spectral lines vary in resolution (it is equal to the number of samples in the |
| 246 | +pitch period), and the spacing of spectral lines also varies (proportional to |
| 247 | +the numver of samples in the pitch period). |
| 248 | +
|
| 249 | +To generate a bitmap, linear interpolation is used to render the grayscale |
| 250 | +value at any particular point in time and frequency. |
| 251 | +*/ |
| 252 | + |
| 253 | +#define SONIC_MAX_SPECTRUM_FREQ 5000 |
| 254 | + |
| 255 | +struct sonicSpectrogramStruct; |
| 256 | +struct sonicBitmapStruct; |
| 257 | +typedef struct sonicSpectrogramStruct* sonicSpectrogram; |
| 258 | +typedef struct sonicBitmapStruct* sonicBitmap; |
| 259 | + |
| 260 | +/* sonicBitmap objects represent spectrograms as grayscale bitmaps where each |
| 261 | + pixel is from 0 (black) to 255 (white). Bitmaps are rows*cols in size. |
| 262 | + Rows are indexed top to bottom and columns are indexed left to right */ |
| 263 | +struct sonicBitmapStruct { |
| 264 | + unsigned char* data; |
| 265 | + int numRows; |
| 266 | + int numCols; |
| 267 | +}; |
| 268 | + |
| 269 | +typedef struct sonicBitmapStruct* sonicBitmap; |
| 270 | + |
| 271 | +/* Enable coomputation of a spectrogram on the fly. */ |
| 272 | +SONIC_API void sonicComputeSpectrogram(sonicStream stream); |
| 273 | + |
| 274 | +/* Get the spectrogram. */ |
| 275 | +SONIC_API sonicSpectrogram sonicGetSpectrogram(sonicStream stream); |
| 276 | + |
| 277 | +/* Create an empty spectrogram. Called automatically if sonicComputeSpectrogram |
| 278 | + has been called. */ |
| 279 | +SONIC_API sonicSpectrogram sonicCreateSpectrogram(int sampleRate); |
| 280 | + |
| 281 | +/* Destroy the spectrotram. This is called automatically when calling |
| 282 | + sonicDestroyStream. */ |
| 283 | +SONIC_API void sonicDestroySpectrogram(sonicSpectrogram spectrogram); |
| 284 | + |
| 285 | +/* Convert the spectrogram to a bitmap. Caller must destroy bitmap when done. */ |
| 286 | +SONIC_API sonicBitmap sonicConvertSpectrogramToBitmap(sonicSpectrogram spectrogram, |
| 287 | + int numRows, int numCols); |
| 288 | + |
| 289 | +/* Destroy a bitmap returned by sonicConvertSpectrogramToBitmap. */ |
| 290 | +SONIC_API void sonicDestroyBitmap(sonicBitmap bitmap); |
| 291 | + |
| 292 | +SONIC_API int sonicWritePGM(sonicBitmap bitmap, char* fileName); |
| 293 | + |
| 294 | +/* Add two pitch periods worth of samples to the spectrogram. There must be |
| 295 | + 2*period samples. Time should advance one pitch period for each call to |
| 296 | + this function. */ |
| 297 | +SONIC_API void sonicAddPitchPeriodToSpectrogram(sonicSpectrogram spectrogram, |
| 298 | + short* samples, int numSamples, |
| 299 | + int numChannels); |
| 300 | +#endif /* SONIC_SPECTROGRAM */ |
| 301 | + |
| 302 | +#ifdef __cplusplus |
| 303 | +} |
| 304 | +#endif |
| 305 | + |
| 306 | +#endif /* SONIC_H_ */ |
0 commit comments