383 lines
13 KiB
Dart
383 lines
13 KiB
Dart
import 'dart:math' as math;
|
||
import 'dart:typed_data';
|
||
import '../../utils/audio_math.dart';
|
||
import '../../utils/constants.dart';
|
||
|
||
/// Simplified LPC-10 voice codec operating at ~360 bps.
|
||
///
|
||
/// ── Super-frame structure (200 ms = 10 × 20 ms sub-frames) ─────────────
|
||
///
|
||
/// Bits per super-frame (72 bits = 9 bytes):
|
||
/// 10 PARCOR reflection coefficients (k_1..k_10): 30 bits (3 bits each)
|
||
/// 10 sub-frame gain values (log energy): 20 bits (2 bits each)
|
||
/// 10 voiced/unvoiced flags: 10 bits (1 bit each)
|
||
/// 1 pitch period (shared across voiced sub-frames): 6 bits (0-63 → 20-83 samples)
|
||
/// 1 pitch-valid flag: 1 bit
|
||
/// padding to byte boundary: 5 bits
|
||
/// ────────────────────────────────────────────────────────
|
||
/// Total: 72 bits = 9 bytes
|
||
///
|
||
/// Voice quality is similar to HF digital radio (intelligible, robotic).
|
||
/// All analysis is done at 8 kHz with a 10th-order LPC predictor.
|
||
class LpcCodec {
|
||
// ── Configuration ─────────────────────────────────────────────────
|
||
static const int _p = C.lpcOrder; // 10
|
||
static const int _subN = C.lpcSubframeSamples; // 160 samples = 20 ms
|
||
static const int _numSubs = C.lpcSubframesPerSuper; // 10
|
||
static const int _superN = _subN * _numSubs; // 1600 samples = 200 ms
|
||
static const double _alpha = C.preEmphasis; // 0.97
|
||
|
||
// Encoder persistent state
|
||
double _encPrevSample = 0.0;
|
||
|
||
// Decoder persistent state
|
||
final _synState = Float64List(_p); // IIR filter memory (synthesis filter)
|
||
double _decPrevOut = 0.0; // de-emphasis state
|
||
|
||
// ── Public API ─────────────────────────────────────────────────────
|
||
|
||
/// Encode [pcm] (exactly 1600 Int16 LE samples) into 9-byte bitstream.
|
||
Uint8List encode(Uint8List pcmBytes) {
|
||
assert(pcmBytes.length == _superN * 2,
|
||
'LPC encode: expected ${_superN * 2} bytes, got ${pcmBytes.length}');
|
||
|
||
// Convert Int16 PCM → Float64 normalised.
|
||
final signal = AudioMath.int16BytesToFloat(pcmBytes);
|
||
|
||
// Pre-emphasis filter (updates encoder state).
|
||
_applyPreEmphasis(signal);
|
||
|
||
// ── LPC analysis over the whole super-frame ────────────────────
|
||
// Window the super-frame and compute autocorrelation.
|
||
final windowed = Float64List(_superN);
|
||
final win = AudioMath.hammingWindow(_superN);
|
||
for (int i = 0; i < _superN; i++) {
|
||
windowed[i] = signal[i] * win[i];
|
||
}
|
||
final r = AudioMath.autocorrelation(windowed, _p);
|
||
final kc = _levinsonDurbin(r); // reflection coefficients [-1,1]
|
||
final a = _parcorToLpc(kc); // LP filter coefficients
|
||
|
||
// ── Pitch detection on the full super-frame residual ──────────
|
||
final residual = _computeResidual(signal, a);
|
||
final pitch = AudioMath.amdfPitch(residual, C.pitchMinSamples, C.pitchMaxSamples);
|
||
final hasVoice = pitch > 0;
|
||
|
||
// ── Per-sub-frame gain and voiced/unvoiced ─────────────────────
|
||
final gains = List<int>.filled(_numSubs, 0);
|
||
final vuv = List<bool>.filled(_numSubs, false);
|
||
|
||
for (int s = 0; s < _numSubs; s++) {
|
||
final start = s * _subN;
|
||
final sub = Float64List.sublistView(signal, start, start + _subN);
|
||
final e = AudioMath.energy(sub);
|
||
gains[s] = _quantiseGain(e);
|
||
// Voiced if energy is above floor AND overall pitch was detected.
|
||
vuv[s] = hasVoice && e > 1e-4;
|
||
}
|
||
|
||
// ── Quantise PARCOR coefficients (3 bits each = 8 levels) ────
|
||
final kQuant = List<int>.filled(_p, 0);
|
||
for (int i = 0; i < _p; i++) {
|
||
kQuant[i] = _quantiseParcor(kc[i]);
|
||
}
|
||
|
||
// ── Pack into 72-bit bitstream ─────────────────────────────────
|
||
return _pack(kQuant, gains, vuv, hasVoice ? pitch : 0, hasVoice);
|
||
}
|
||
|
||
/// Decode a 9-byte bitstream into PCM (returns 3200 bytes = 1600 Int16 LE).
|
||
Uint8List decode(Uint8List bits) {
|
||
assert(bits.length == 9, 'LPC decode: expected 9 bytes, got ${bits.length}');
|
||
|
||
final unpacked = _unpack(bits);
|
||
final kc = unpacked.$1; // List<double> PARCOR coefficients
|
||
final gains = unpacked.$2;
|
||
final vuv = unpacked.$3;
|
||
final pitch = unpacked.$4;
|
||
final hasVoice = unpacked.$5;
|
||
|
||
// Convert PARCOR back to LP filter coefficients.
|
||
final a = _parcorToLpc(kc);
|
||
|
||
// Synthesise each sub-frame.
|
||
final output = Float64List(_superN);
|
||
|
||
for (int s = 0; s < _numSubs; s++) {
|
||
final gainLinear = _dequantiseGain(gains[s]);
|
||
final voiced = vuv[s] && hasVoice;
|
||
final excitation = _generateExcitation(_subN, voiced, pitch, gainLinear);
|
||
|
||
// IIR synthesis filter: y[n] = e[n] + Σ(a[k]·y[n-k])
|
||
for (int i = 0; i < _subN; i++) {
|
||
double acc = excitation[i];
|
||
for (int k = 0; k < _p; k++) {
|
||
final idx = i - k - 1;
|
||
acc += a[k] * (idx >= 0 ? output[s * _subN + idx] : _synState[k]);
|
||
}
|
||
output[s * _subN + i] = acc;
|
||
}
|
||
|
||
// Update synthesis state (last P samples of this sub-frame).
|
||
for (int k = 0; k < _p; k++) {
|
||
final idx = _subN - 1 - k;
|
||
_synState[k] = idx >= 0 ? output[s * _subN + idx] : 0.0;
|
||
}
|
||
}
|
||
|
||
// De-emphasis.
|
||
_applyDeEmphasis(output);
|
||
|
||
// Soft-clip to prevent overflow.
|
||
for (int i = 0; i < _superN; i++) {
|
||
output[i] = output[i].clamp(-1.0, 1.0);
|
||
}
|
||
|
||
return AudioMath.floatToInt16Bytes(output);
|
||
}
|
||
|
||
// ── Pre/de-emphasis ────────────────────────────────────────────────
|
||
|
||
void _applyPreEmphasis(Float64List s) {
|
||
double prev = _encPrevSample;
|
||
for (int i = 0; i < s.length; i++) {
|
||
final cur = s[i];
|
||
s[i] = cur - _alpha * prev;
|
||
prev = cur;
|
||
}
|
||
_encPrevSample = s[s.length - 1];
|
||
}
|
||
|
||
void _applyDeEmphasis(Float64List s) {
|
||
double prev = _decPrevOut;
|
||
for (int i = 0; i < s.length; i++) {
|
||
final cur = s[i] + _alpha * prev;
|
||
s[i] = cur;
|
||
prev = cur;
|
||
}
|
||
_decPrevOut = s[s.length - 1];
|
||
}
|
||
|
||
// ── Levinson-Durbin algorithm ──────────────────────────────────────
|
||
//
|
||
// Given autocorrelation r[0..p], returns p reflection (PARCOR) coefficients.
|
||
// The algorithm computes the LP predictor in O(p^2).
|
||
static List<double> _levinsonDurbin(Float64List r) {
|
||
if (r[0] < 1e-10) return List<double>.filled(_p, 0.0);
|
||
|
||
final a = List<double>.filled(_p + 1, 0.0);
|
||
final k = List<double>.filled(_p, 0.0);
|
||
double e = r[0];
|
||
|
||
for (int i = 1; i <= _p; i++) {
|
||
// Reflection coefficient k_i = -(r[i] + Σ a[j]·r[i-j]) / e
|
||
double lambda = r[i];
|
||
for (int j = 1; j < i; j++) {
|
||
lambda += a[j] * r[i - j];
|
||
}
|
||
k[i - 1] = -lambda / e;
|
||
|
||
// Clamp to (-1, 1) for stability.
|
||
k[i - 1] = k[i - 1].clamp(-0.999, 0.999);
|
||
|
||
// Update predictor: a_new[j] = a[j] + k_i · a[i-j]
|
||
final newA = List<double>.from(a);
|
||
newA[i] = k[i - 1];
|
||
for (int j = 1; j < i; j++) {
|
||
newA[j] = a[j] + k[i - 1] * a[i - j];
|
||
}
|
||
for (int j = 0; j <= i; j++) {
|
||
a[j] = newA[j];
|
||
}
|
||
|
||
e *= (1.0 - k[i - 1] * k[i - 1]);
|
||
if (e < 1e-15) break;
|
||
}
|
||
|
||
return k;
|
||
}
|
||
|
||
// ── PARCOR → LP coefficients ───────────────────────────────────────
|
||
//
|
||
// Convert reflection coefficients back to the direct-form LP coefficients
|
||
// using the step-up recursion (inverse Levinson).
|
||
static List<double> _parcorToLpc(List<double> k) {
|
||
final a = List<double>.filled(_p + 1, 0.0);
|
||
a[0] = 1.0;
|
||
for (int i = 0; i < _p; i++) {
|
||
a[i + 1] = k[i];
|
||
for (int j = 1; j <= i; j++) {
|
||
final tmp = a[j] + k[i] * a[i + 1 - j];
|
||
a[j] = tmp;
|
||
}
|
||
}
|
||
// Return a[1..p] (skip a[0]=1)
|
||
return a.sublist(1);
|
||
}
|
||
|
||
// ── Residual computation ───────────────────────────────────────────
|
||
//
|
||
// Apply the analysis (inverse) filter: e[n] = s[n] + Σ a[k]·s[n-k]
|
||
static Float64List _computeResidual(Float64List s, List<double> a) {
|
||
final res = Float64List(s.length);
|
||
for (int n = 0; n < s.length; n++) {
|
||
double acc = s[n];
|
||
for (int k = 0; k < _p; k++) {
|
||
if (n - k - 1 >= 0) {
|
||
acc += a[k] * s[n - k - 1];
|
||
}
|
||
}
|
||
res[n] = acc;
|
||
}
|
||
return res;
|
||
}
|
||
|
||
// ── Gain quantisation (2 bits = 4 levels) ─────────────────────────
|
||
//
|
||
// Map log10(energy) to integers 0-3.
|
||
static int _quantiseGain(double energy) {
|
||
if (energy < 1e-6) return 0;
|
||
final logE = math.log(energy + 1e-10) / math.ln10;
|
||
// Typical speech energy in float normalised to [-1,1]: logE ∈ [-6, 0].
|
||
// Remap to [0,3].
|
||
final mapped = ((logE + 6.0) / 6.0 * 3.0).round().clamp(0, 3);
|
||
return mapped;
|
||
}
|
||
|
||
static double _dequantiseGain(int q) {
|
||
// Inverse of _quantiseGain.
|
||
final logE = q / 3.0 * 6.0 - 6.0;
|
||
return math.pow(10.0, logE).toDouble() * 0.1; // scale for excitation
|
||
}
|
||
|
||
// ── PARCOR quantisation (3 bits = 8 levels, range [-1,1]) ─────────
|
||
static int _quantiseParcor(double k) {
|
||
// Map [-1, 1] → [0, 7].
|
||
final idx = ((k + 1.0) / 2.0 * 7.0).round().clamp(0, 7);
|
||
return idx;
|
||
}
|
||
|
||
static double _dequantiseParcor(int q) {
|
||
return (q / 7.0) * 2.0 - 1.0;
|
||
}
|
||
|
||
// ── Excitation generation ──────────────────────────────────────────
|
||
//
|
||
// Voiced: periodic impulse train at pitch period with gain amplitude.
|
||
// Unvoiced: white Gaussian noise with gain amplitude.
|
||
static Float64List _generateExcitation(
|
||
int n, bool voiced, int pitch, double gain) {
|
||
if (!voiced || pitch <= 0) {
|
||
return AudioMath.whiteNoise(n, gain.clamp(0.0, 1.0));
|
||
}
|
||
final ex = Float64List(n);
|
||
// Impulse at each pitch period boundary.
|
||
for (int i = 0; i < n; i += pitch) {
|
||
if (i < n) ex[i] = gain;
|
||
}
|
||
return ex;
|
||
}
|
||
|
||
// ── Bit packing ────────────────────────────────────────────────────
|
||
//
|
||
// Layout (72 bits, MSB first):
|
||
// Bits 0-29 : 10 × PARCOR (3 bits each)
|
||
// Bits 30-49 : 10 × gain (2 bits each)
|
||
// Bits 50-59 : 10 × V/UV (1 bit each)
|
||
// Bits 60-65 : pitch period (6 bits, value = pitch-20, range 0-63 → 20-83)
|
||
// Bit 66 : pitch-valid flag
|
||
// Bits 67-71 : padding (5 zeros)
|
||
|
||
static Uint8List _pack(List<int> kq, List<int> gains, List<bool> vuv,
|
||
int pitch, bool hasVoice) {
|
||
final bits = List<int>.filled(72, 0);
|
||
int pos = 0;
|
||
|
||
// PARCOR coefficients (3 bits each).
|
||
for (int i = 0; i < _p; i++) {
|
||
bits[pos++] = (kq[i] >> 2) & 1;
|
||
bits[pos++] = (kq[i] >> 1) & 1;
|
||
bits[pos++] = kq[i] & 1;
|
||
}
|
||
|
||
// Gains (2 bits each).
|
||
for (int i = 0; i < _numSubs; i++) {
|
||
bits[pos++] = (gains[i] >> 1) & 1;
|
||
bits[pos++] = gains[i] & 1;
|
||
}
|
||
|
||
// V/UV flags.
|
||
for (int i = 0; i < _numSubs; i++) {
|
||
bits[pos++] = vuv[i] ? 1 : 0;
|
||
}
|
||
|
||
// Pitch (6 bits, offset by 20).
|
||
final pitchEnc = hasVoice ? (pitch - 20).clamp(0, 63) : 0;
|
||
for (int b = 5; b >= 0; b--) {
|
||
bits[pos++] = (pitchEnc >> b) & 1;
|
||
}
|
||
|
||
// Pitch-valid flag.
|
||
bits[pos++] = hasVoice ? 1 : 0;
|
||
|
||
// Remaining bits (padding) are already 0.
|
||
|
||
// Pack 72 bits into 9 bytes.
|
||
final out = Uint8List(9);
|
||
for (int i = 0; i < 9; i++) {
|
||
int byte = 0;
|
||
for (int b = 0; b < 8; b++) {
|
||
byte = (byte << 1) | bits[i * 8 + b];
|
||
}
|
||
out[i] = byte;
|
||
}
|
||
return out;
|
||
}
|
||
|
||
static (List<double>, List<int>, List<bool>, int, bool) _unpack(
|
||
Uint8List bytes) {
|
||
// Unpack 9 bytes into 72 bits.
|
||
final bits = List<int>.filled(72, 0);
|
||
for (int i = 0; i < 9; i++) {
|
||
for (int b = 0; b < 8; b++) {
|
||
bits[i * 8 + b] = (bytes[i] >> (7 - b)) & 1;
|
||
}
|
||
}
|
||
|
||
int pos = 0;
|
||
|
||
// PARCOR.
|
||
final kq = List<int>.filled(_p, 0);
|
||
for (int i = 0; i < _p; i++) {
|
||
kq[i] = (bits[pos] << 2) | (bits[pos + 1] << 1) | bits[pos + 2];
|
||
pos += 3;
|
||
}
|
||
final kc = kq.map(_dequantiseParcor).toList();
|
||
|
||
// Gains.
|
||
final gains = List<int>.filled(_numSubs, 0);
|
||
for (int i = 0; i < _numSubs; i++) {
|
||
gains[i] = (bits[pos] << 1) | bits[pos + 1];
|
||
pos += 2;
|
||
}
|
||
|
||
// V/UV.
|
||
final vuv = List<bool>.filled(_numSubs, false);
|
||
for (int i = 0; i < _numSubs; i++) {
|
||
vuv[i] = bits[pos++] == 1;
|
||
}
|
||
|
||
// Pitch.
|
||
int pitchEnc = 0;
|
||
for (int b = 5; b >= 0; b--) {
|
||
pitchEnc |= bits[pos++] << b;
|
||
}
|
||
final pitch = pitchEnc + 20;
|
||
|
||
// Pitch-valid.
|
||
final hasVoice = bits[pos] == 1;
|
||
|
||
return (kc, gains, vuv, pitch, hasVoice);
|
||
}
|
||
}
|