Neda/call/lib/core/codec/lpc_codec.dart

383 lines
13 KiB
Dart
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import 'dart:math' as math;
import 'dart:typed_data';
import '../../utils/audio_math.dart';
import '../../utils/constants.dart';
/// Simplified LPC-10 voice codec operating at ~360 bps.
///
/// ── Super-frame structure (200 ms = 10 × 20 ms sub-frames) ─────────────
///
/// Bits per super-frame (72 bits = 9 bytes):
/// 10 PARCOR reflection coefficients (k_1..k_10): 30 bits (3 bits each)
/// 10 sub-frame gain values (log energy): 20 bits (2 bits each)
/// 10 voiced/unvoiced flags: 10 bits (1 bit each)
/// 1 pitch period (shared across voiced sub-frames): 6 bits (0-63 → 20-83 samples)
/// 1 pitch-valid flag: 1 bit
/// padding to byte boundary: 5 bits
/// ────────────────────────────────────────────────────────
/// Total: 72 bits = 9 bytes
///
/// Voice quality is similar to HF digital radio (intelligible, robotic).
/// All analysis is done at 8 kHz with a 10th-order LPC predictor.
class LpcCodec {
// ── Configuration ─────────────────────────────────────────────────
static const int _p = C.lpcOrder; // 10
static const int _subN = C.lpcSubframeSamples; // 160 samples = 20 ms
static const int _numSubs = C.lpcSubframesPerSuper; // 10
static const int _superN = _subN * _numSubs; // 1600 samples = 200 ms
static const double _alpha = C.preEmphasis; // 0.97
// Encoder persistent state
double _encPrevSample = 0.0;
// Decoder persistent state
final _synState = Float64List(_p); // IIR filter memory (synthesis filter)
double _decPrevOut = 0.0; // de-emphasis state
// ── Public API ─────────────────────────────────────────────────────
/// Encode [pcm] (exactly 1600 Int16 LE samples) into 9-byte bitstream.
Uint8List encode(Uint8List pcmBytes) {
assert(pcmBytes.length == _superN * 2,
'LPC encode: expected ${_superN * 2} bytes, got ${pcmBytes.length}');
// Convert Int16 PCM → Float64 normalised.
final signal = AudioMath.int16BytesToFloat(pcmBytes);
// Pre-emphasis filter (updates encoder state).
_applyPreEmphasis(signal);
// ── LPC analysis over the whole super-frame ────────────────────
// Window the super-frame and compute autocorrelation.
final windowed = Float64List(_superN);
final win = AudioMath.hammingWindow(_superN);
for (int i = 0; i < _superN; i++) {
windowed[i] = signal[i] * win[i];
}
final r = AudioMath.autocorrelation(windowed, _p);
final kc = _levinsonDurbin(r); // reflection coefficients [-1,1]
final a = _parcorToLpc(kc); // LP filter coefficients
// ── Pitch detection on the full super-frame residual ──────────
final residual = _computeResidual(signal, a);
final pitch = AudioMath.amdfPitch(residual, C.pitchMinSamples, C.pitchMaxSamples);
final hasVoice = pitch > 0;
// ── Per-sub-frame gain and voiced/unvoiced ─────────────────────
final gains = List<int>.filled(_numSubs, 0);
final vuv = List<bool>.filled(_numSubs, false);
for (int s = 0; s < _numSubs; s++) {
final start = s * _subN;
final sub = Float64List.sublistView(signal, start, start + _subN);
final e = AudioMath.energy(sub);
gains[s] = _quantiseGain(e);
// Voiced if energy is above floor AND overall pitch was detected.
vuv[s] = hasVoice && e > 1e-4;
}
// ── Quantise PARCOR coefficients (3 bits each = 8 levels) ────
final kQuant = List<int>.filled(_p, 0);
for (int i = 0; i < _p; i++) {
kQuant[i] = _quantiseParcor(kc[i]);
}
// ── Pack into 72-bit bitstream ─────────────────────────────────
return _pack(kQuant, gains, vuv, hasVoice ? pitch : 0, hasVoice);
}
/// Decode a 9-byte bitstream into PCM (returns 3200 bytes = 1600 Int16 LE).
Uint8List decode(Uint8List bits) {
assert(bits.length == 9, 'LPC decode: expected 9 bytes, got ${bits.length}');
final unpacked = _unpack(bits);
final kc = unpacked.$1; // List<double> PARCOR coefficients
final gains = unpacked.$2;
final vuv = unpacked.$3;
final pitch = unpacked.$4;
final hasVoice = unpacked.$5;
// Convert PARCOR back to LP filter coefficients.
final a = _parcorToLpc(kc);
// Synthesise each sub-frame.
final output = Float64List(_superN);
for (int s = 0; s < _numSubs; s++) {
final gainLinear = _dequantiseGain(gains[s]);
final voiced = vuv[s] && hasVoice;
final excitation = _generateExcitation(_subN, voiced, pitch, gainLinear);
// IIR synthesis filter: y[n] = e[n] + Σ(a[k]·y[n-k])
for (int i = 0; i < _subN; i++) {
double acc = excitation[i];
for (int k = 0; k < _p; k++) {
final idx = i - k - 1;
acc += a[k] * (idx >= 0 ? output[s * _subN + idx] : _synState[k]);
}
output[s * _subN + i] = acc;
}
// Update synthesis state (last P samples of this sub-frame).
for (int k = 0; k < _p; k++) {
final idx = _subN - 1 - k;
_synState[k] = idx >= 0 ? output[s * _subN + idx] : 0.0;
}
}
// De-emphasis.
_applyDeEmphasis(output);
// Soft-clip to prevent overflow.
for (int i = 0; i < _superN; i++) {
output[i] = output[i].clamp(-1.0, 1.0);
}
return AudioMath.floatToInt16Bytes(output);
}
// ── Pre/de-emphasis ────────────────────────────────────────────────
void _applyPreEmphasis(Float64List s) {
double prev = _encPrevSample;
for (int i = 0; i < s.length; i++) {
final cur = s[i];
s[i] = cur - _alpha * prev;
prev = cur;
}
_encPrevSample = s[s.length - 1];
}
void _applyDeEmphasis(Float64List s) {
double prev = _decPrevOut;
for (int i = 0; i < s.length; i++) {
final cur = s[i] + _alpha * prev;
s[i] = cur;
prev = cur;
}
_decPrevOut = s[s.length - 1];
}
// ── Levinson-Durbin algorithm ──────────────────────────────────────
//
// Given autocorrelation r[0..p], returns p reflection (PARCOR) coefficients.
// The algorithm computes the LP predictor in O(p^2).
static List<double> _levinsonDurbin(Float64List r) {
if (r[0] < 1e-10) return List<double>.filled(_p, 0.0);
final a = List<double>.filled(_p + 1, 0.0);
final k = List<double>.filled(_p, 0.0);
double e = r[0];
for (int i = 1; i <= _p; i++) {
// Reflection coefficient k_i = -(r[i] + Σ a[j]·r[i-j]) / e
double lambda = r[i];
for (int j = 1; j < i; j++) {
lambda += a[j] * r[i - j];
}
k[i - 1] = -lambda / e;
// Clamp to (-1, 1) for stability.
k[i - 1] = k[i - 1].clamp(-0.999, 0.999);
// Update predictor: a_new[j] = a[j] + k_i · a[i-j]
final newA = List<double>.from(a);
newA[i] = k[i - 1];
for (int j = 1; j < i; j++) {
newA[j] = a[j] + k[i - 1] * a[i - j];
}
for (int j = 0; j <= i; j++) {
a[j] = newA[j];
}
e *= (1.0 - k[i - 1] * k[i - 1]);
if (e < 1e-15) break;
}
return k;
}
// ── PARCOR → LP coefficients ───────────────────────────────────────
//
// Convert reflection coefficients back to the direct-form LP coefficients
// using the step-up recursion (inverse Levinson).
static List<double> _parcorToLpc(List<double> k) {
final a = List<double>.filled(_p + 1, 0.0);
a[0] = 1.0;
for (int i = 0; i < _p; i++) {
a[i + 1] = k[i];
for (int j = 1; j <= i; j++) {
final tmp = a[j] + k[i] * a[i + 1 - j];
a[j] = tmp;
}
}
// Return a[1..p] (skip a[0]=1)
return a.sublist(1);
}
// ── Residual computation ───────────────────────────────────────────
//
// Apply the analysis (inverse) filter: e[n] = s[n] + Σ a[k]·s[n-k]
static Float64List _computeResidual(Float64List s, List<double> a) {
final res = Float64List(s.length);
for (int n = 0; n < s.length; n++) {
double acc = s[n];
for (int k = 0; k < _p; k++) {
if (n - k - 1 >= 0) {
acc += a[k] * s[n - k - 1];
}
}
res[n] = acc;
}
return res;
}
// ── Gain quantisation (2 bits = 4 levels) ─────────────────────────
//
// Map log10(energy) to integers 0-3.
static int _quantiseGain(double energy) {
if (energy < 1e-6) return 0;
final logE = math.log(energy + 1e-10) / math.ln10;
// Typical speech energy in float normalised to [-1,1]: logE ∈ [-6, 0].
// Remap to [0,3].
final mapped = ((logE + 6.0) / 6.0 * 3.0).round().clamp(0, 3);
return mapped;
}
static double _dequantiseGain(int q) {
// Inverse of _quantiseGain.
final logE = q / 3.0 * 6.0 - 6.0;
return math.pow(10.0, logE).toDouble() * 0.1; // scale for excitation
}
// ── PARCOR quantisation (3 bits = 8 levels, range [-1,1]) ─────────
static int _quantiseParcor(double k) {
// Map [-1, 1] → [0, 7].
final idx = ((k + 1.0) / 2.0 * 7.0).round().clamp(0, 7);
return idx;
}
static double _dequantiseParcor(int q) {
return (q / 7.0) * 2.0 - 1.0;
}
// ── Excitation generation ──────────────────────────────────────────
//
// Voiced: periodic impulse train at pitch period with gain amplitude.
// Unvoiced: white Gaussian noise with gain amplitude.
static Float64List _generateExcitation(
int n, bool voiced, int pitch, double gain) {
if (!voiced || pitch <= 0) {
return AudioMath.whiteNoise(n, gain.clamp(0.0, 1.0));
}
final ex = Float64List(n);
// Impulse at each pitch period boundary.
for (int i = 0; i < n; i += pitch) {
if (i < n) ex[i] = gain;
}
return ex;
}
// ── Bit packing ────────────────────────────────────────────────────
//
// Layout (72 bits, MSB first):
// Bits 0-29 : 10 × PARCOR (3 bits each)
// Bits 30-49 : 10 × gain (2 bits each)
// Bits 50-59 : 10 × V/UV (1 bit each)
// Bits 60-65 : pitch period (6 bits, value = pitch-20, range 0-63 → 20-83)
// Bit 66 : pitch-valid flag
// Bits 67-71 : padding (5 zeros)
static Uint8List _pack(List<int> kq, List<int> gains, List<bool> vuv,
int pitch, bool hasVoice) {
final bits = List<int>.filled(72, 0);
int pos = 0;
// PARCOR coefficients (3 bits each).
for (int i = 0; i < _p; i++) {
bits[pos++] = (kq[i] >> 2) & 1;
bits[pos++] = (kq[i] >> 1) & 1;
bits[pos++] = kq[i] & 1;
}
// Gains (2 bits each).
for (int i = 0; i < _numSubs; i++) {
bits[pos++] = (gains[i] >> 1) & 1;
bits[pos++] = gains[i] & 1;
}
// V/UV flags.
for (int i = 0; i < _numSubs; i++) {
bits[pos++] = vuv[i] ? 1 : 0;
}
// Pitch (6 bits, offset by 20).
final pitchEnc = hasVoice ? (pitch - 20).clamp(0, 63) : 0;
for (int b = 5; b >= 0; b--) {
bits[pos++] = (pitchEnc >> b) & 1;
}
// Pitch-valid flag.
bits[pos++] = hasVoice ? 1 : 0;
// Remaining bits (padding) are already 0.
// Pack 72 bits into 9 bytes.
final out = Uint8List(9);
for (int i = 0; i < 9; i++) {
int byte = 0;
for (int b = 0; b < 8; b++) {
byte = (byte << 1) | bits[i * 8 + b];
}
out[i] = byte;
}
return out;
}
static (List<double>, List<int>, List<bool>, int, bool) _unpack(
Uint8List bytes) {
// Unpack 9 bytes into 72 bits.
final bits = List<int>.filled(72, 0);
for (int i = 0; i < 9; i++) {
for (int b = 0; b < 8; b++) {
bits[i * 8 + b] = (bytes[i] >> (7 - b)) & 1;
}
}
int pos = 0;
// PARCOR.
final kq = List<int>.filled(_p, 0);
for (int i = 0; i < _p; i++) {
kq[i] = (bits[pos] << 2) | (bits[pos + 1] << 1) | bits[pos + 2];
pos += 3;
}
final kc = kq.map(_dequantiseParcor).toList();
// Gains.
final gains = List<int>.filled(_numSubs, 0);
for (int i = 0; i < _numSubs; i++) {
gains[i] = (bits[pos] << 1) | bits[pos + 1];
pos += 2;
}
// V/UV.
final vuv = List<bool>.filled(_numSubs, false);
for (int i = 0; i < _numSubs; i++) {
vuv[i] = bits[pos++] == 1;
}
// Pitch.
int pitchEnc = 0;
for (int b = 5; b >= 0; b--) {
pitchEnc |= bits[pos++] << b;
}
final pitch = pitchEnc + 20;
// Pitch-valid.
final hasVoice = bits[pos] == 1;
return (kc, gains, vuv, pitch, hasVoice);
}
}