Neda/call/lib/core/codec/lpc_codec.dart

import 'dart:math' as math;
import 'dart:typed_data';
import '../../utils/audio_math.dart';
import '../../utils/constants.dart';

/// Simplified LPC-10 voice codec operating at ~360 bps.
///
/// ── Super-frame structure (200 ms = 10 × 20 ms sub-frames) ─────────────
///
/// Bits per super-frame (72 bits = 9 bytes):
///   10 PARCOR reflection coefficients (k_1..k_10):  30 bits  (3 bits each)
///    10 sub-frame gain values (log energy):           20 bits  (2 bits each)
///    10 voiced/unvoiced flags:                        10 bits  (1 bit each)
///    1 pitch period (shared across voiced sub-frames): 6 bits  (0-63 → 20-83 samples)
///    1 pitch-valid flag:                               1 bit
///    padding to byte boundary:                         5 bits
///   ────────────────────────────────────────────────────────
///   Total:                                            72 bits  = 9 bytes
///
/// Voice quality is similar to HF digital radio (intelligible, robotic).
/// All analysis is done at 8 kHz with a 10th-order LPC predictor.
class LpcCodec {
  // ── Configuration ─────────────────────────────────────────────────
  static const int _p         = C.lpcOrder;          // 10
  static const int _subN      = C.lpcSubframeSamples; // 160 samples = 20 ms
  static const int _numSubs   = C.lpcSubframesPerSuper; // 10
  static const int _superN    = _subN * _numSubs;    // 1600 samples = 200 ms
  static const double _alpha  = C.preEmphasis;       // 0.97

  // Encoder persistent state
  double _encPrevSample = 0.0;

  // Decoder persistent state
  final _synState = Float64List(_p); // IIR filter memory (synthesis filter)
  double _decPrevOut = 0.0;          // de-emphasis state

  // ── Public API ─────────────────────────────────────────────────────

  /// Encode [pcm] (exactly 1600 Int16 LE samples) into 9-byte bitstream.
  Uint8List encode(Uint8List pcmBytes) {
    assert(pcmBytes.length == _superN * 2,
        'LPC encode: expected ${_superN * 2} bytes, got ${pcmBytes.length}');

    // Convert Int16 PCM → Float64 normalised.
    final signal = AudioMath.int16BytesToFloat(pcmBytes);

    // Pre-emphasis filter (updates encoder state).
    _applyPreEmphasis(signal);

    // ── LPC analysis over the whole super-frame ────────────────────
    // Window the super-frame and compute autocorrelation.
    final windowed = Float64List(_superN);
    final win = AudioMath.hammingWindow(_superN);
    for (int i = 0; i < _superN; i++) {
      windowed[i] = signal[i] * win[i];
    }
    final r   = AudioMath.autocorrelation(windowed, _p);
    final kc  = _levinsonDurbin(r);          // reflection coefficients [-1,1]
    final a   = _parcorToLpc(kc);            // LP filter coefficients

    // ── Pitch detection on the full super-frame residual ──────────
    final residual = _computeResidual(signal, a);
    final pitch    = AudioMath.amdfPitch(residual, C.pitchMinSamples, C.pitchMaxSamples);
    final hasVoice = pitch > 0;

    // ── Per-sub-frame gain and voiced/unvoiced ─────────────────────
    final gains  = List<int>.filled(_numSubs, 0);
    final vuv    = List<bool>.filled(_numSubs, false);

    for (int s = 0; s < _numSubs; s++) {
      final start = s * _subN;
      final sub   = Float64List.sublistView(signal, start, start + _subN);
      final e     = AudioMath.energy(sub);
      gains[s]    = _quantiseGain(e);
      // Voiced if energy is above floor AND overall pitch was detected.
      vuv[s]      = hasVoice && e > 1e-4;
    }

    // ── Quantise PARCOR coefficients (3 bits each = 8 levels) ────
    final kQuant = List<int>.filled(_p, 0);
    for (int i = 0; i < _p; i++) {
      kQuant[i] = _quantiseParcor(kc[i]);
    }

    // ── Pack into 72-bit bitstream ─────────────────────────────────
    return _pack(kQuant, gains, vuv, hasVoice ? pitch : 0, hasVoice);
  }

  /// Decode a 9-byte bitstream into PCM (returns 3200 bytes = 1600 Int16 LE).
  Uint8List decode(Uint8List bits) {
    assert(bits.length == 9, 'LPC decode: expected 9 bytes, got ${bits.length}');

    final unpacked = _unpack(bits);
    final kc = unpacked.$1;   // List<double> PARCOR coefficients
    final gains = unpacked.$2;
    final vuv   = unpacked.$3;
    final pitch = unpacked.$4;
    final hasVoice = unpacked.$5;

    // Convert PARCOR back to LP filter coefficients.
    final a = _parcorToLpc(kc);

    // Synthesise each sub-frame.
    final output = Float64List(_superN);

    for (int s = 0; s < _numSubs; s++) {
      final gainLinear = _dequantiseGain(gains[s]);
      final voiced     = vuv[s] && hasVoice;
      final excitation = _generateExcitation(_subN, voiced, pitch, gainLinear);

      // IIR synthesis filter: y[n] = e[n] + Σ(a[k]·y[n-k])
      for (int i = 0; i < _subN; i++) {
        double acc = excitation[i];
        for (int k = 0; k < _p; k++) {
          final idx = i - k - 1;
          acc += a[k] * (idx >= 0 ? output[s * _subN + idx] : _synState[k]);
        }
        output[s * _subN + i] = acc;
      }

      // Update synthesis state (last P samples of this sub-frame).
      for (int k = 0; k < _p; k++) {
        final idx = _subN - 1 - k;
        _synState[k] = idx >= 0 ? output[s * _subN + idx] : 0.0;
      }
    }

    // De-emphasis.
    _applyDeEmphasis(output);

    // Soft-clip to prevent overflow.
    for (int i = 0; i < _superN; i++) {
      output[i] = output[i].clamp(-1.0, 1.0);
    }

    return AudioMath.floatToInt16Bytes(output);
  }

  // ── Pre/de-emphasis ────────────────────────────────────────────────

  void _applyPreEmphasis(Float64List s) {
    double prev = _encPrevSample;
    for (int i = 0; i < s.length; i++) {
      final cur = s[i];
      s[i] = cur - _alpha * prev;
      prev = cur;
    }
    _encPrevSample = s[s.length - 1];
  }

  void _applyDeEmphasis(Float64List s) {
    double prev = _decPrevOut;
    for (int i = 0; i < s.length; i++) {
      final cur = s[i] + _alpha * prev;
      s[i] = cur;
      prev = cur;
    }
    _decPrevOut = s[s.length - 1];
  }

  // ── Levinson-Durbin algorithm ──────────────────────────────────────
  //
  // Given autocorrelation r[0..p], returns p reflection (PARCOR) coefficients.
  // The algorithm computes the LP predictor in O(p^2).
  static List<double> _levinsonDurbin(Float64List r) {
    if (r[0] < 1e-10) return List<double>.filled(_p, 0.0);

    final a = List<double>.filled(_p + 1, 0.0);
    final k = List<double>.filled(_p, 0.0);
    double e = r[0];

    for (int i = 1; i <= _p; i++) {
      // Reflection coefficient k_i = -(r[i] + Σ a[j]·r[i-j]) / e
      double lambda = r[i];
      for (int j = 1; j < i; j++) {
        lambda += a[j] * r[i - j];
      }
      k[i - 1] = -lambda / e;

      // Clamp to (-1, 1) for stability.
      k[i - 1] = k[i - 1].clamp(-0.999, 0.999);

      // Update predictor: a_new[j] = a[j] + k_i · a[i-j]
      final newA = List<double>.from(a);
      newA[i] = k[i - 1];
      for (int j = 1; j < i; j++) {
        newA[j] = a[j] + k[i - 1] * a[i - j];
      }
      for (int j = 0; j <= i; j++) {
        a[j] = newA[j];
      }

      e *= (1.0 - k[i - 1] * k[i - 1]);
      if (e < 1e-15) break;
    }

    return k;
  }

  // ── PARCOR → LP coefficients ───────────────────────────────────────
  //
  // Convert reflection coefficients back to the direct-form LP coefficients
  // using the step-up recursion (inverse Levinson).
  static List<double> _parcorToLpc(List<double> k) {
    final a = List<double>.filled(_p + 1, 0.0);
    a[0] = 1.0;
    for (int i = 0; i < _p; i++) {
      a[i + 1] = k[i];
      for (int j = 1; j <= i; j++) {
        final tmp = a[j] + k[i] * a[i + 1 - j];
        a[j] = tmp;
      }
    }
    // Return a[1..p] (skip a[0]=1)
    return a.sublist(1);
  }

  // ── Residual computation ───────────────────────────────────────────
  //
  // Apply the analysis (inverse) filter: e[n] = s[n] + Σ a[k]·s[n-k]
  static Float64List _computeResidual(Float64List s, List<double> a) {
    final res = Float64List(s.length);
    for (int n = 0; n < s.length; n++) {
      double acc = s[n];
      for (int k = 0; k < _p; k++) {
        if (n - k - 1 >= 0) {
          acc += a[k] * s[n - k - 1];
        }
      }
      res[n] = acc;
    }
    return res;
  }

  // ── Gain quantisation (2 bits = 4 levels) ─────────────────────────
  //
  // Map log10(energy) to integers 0-3.
  static int _quantiseGain(double energy) {
    if (energy < 1e-6)  return 0;
    final logE = math.log(energy + 1e-10) / math.ln10;
    // Typical speech energy in float normalised to [-1,1]: logE ∈ [-6, 0].
    // Remap to [0,3].
    final mapped = ((logE + 6.0) / 6.0 * 3.0).round().clamp(0, 3);
    return mapped;
  }

  static double _dequantiseGain(int q) {
    // Inverse of _quantiseGain.
    final logE = q / 3.0 * 6.0 - 6.0;
    return math.pow(10.0, logE).toDouble() * 0.1; // scale for excitation
  }

  // ── PARCOR quantisation (3 bits = 8 levels, range [-1,1]) ─────────
  static int _quantiseParcor(double k) {
    // Map [-1, 1] → [0, 7].
    final idx = ((k + 1.0) / 2.0 * 7.0).round().clamp(0, 7);
    return idx;
  }

  static double _dequantiseParcor(int q) {
    return (q / 7.0) * 2.0 - 1.0;
  }

  // ── Excitation generation ──────────────────────────────────────────
  //
  // Voiced: periodic impulse train at pitch period with gain amplitude.
  // Unvoiced: white Gaussian noise with gain amplitude.
  static Float64List _generateExcitation(
      int n, bool voiced, int pitch, double gain) {
    if (!voiced || pitch <= 0) {
      return AudioMath.whiteNoise(n, gain.clamp(0.0, 1.0));
    }
    final ex = Float64List(n);
    // Impulse at each pitch period boundary.
    for (int i = 0; i < n; i += pitch) {
      if (i < n) ex[i] = gain;
    }
    return ex;
  }

  // ── Bit packing ────────────────────────────────────────────────────
  //
  // Layout (72 bits, MSB first):
  //  Bits  0-29 : 10 × PARCOR (3 bits each)
  //  Bits 30-49 : 10 × gain   (2 bits each)
  //  Bits 50-59 : 10 × V/UV   (1 bit  each)
  //  Bits 60-65 : pitch period (6 bits, value = pitch-20, range 0-63 → 20-83)
  //  Bit     66 : pitch-valid flag
  //  Bits 67-71 : padding (5 zeros)

  static Uint8List _pack(List<int> kq, List<int> gains, List<bool> vuv,
      int pitch, bool hasVoice) {
    final bits = List<int>.filled(72, 0);
    int pos = 0;

    // PARCOR coefficients (3 bits each).
    for (int i = 0; i < _p; i++) {
      bits[pos++] = (kq[i] >> 2) & 1;
      bits[pos++] = (kq[i] >> 1) & 1;
      bits[pos++] =  kq[i]       & 1;
    }

    // Gains (2 bits each).
    for (int i = 0; i < _numSubs; i++) {
      bits[pos++] = (gains[i] >> 1) & 1;
      bits[pos++] =  gains[i]       & 1;
    }

    // V/UV flags.
    for (int i = 0; i < _numSubs; i++) {
      bits[pos++] = vuv[i] ? 1 : 0;
    }

    // Pitch (6 bits, offset by 20).
    final pitchEnc = hasVoice ? (pitch - 20).clamp(0, 63) : 0;
    for (int b = 5; b >= 0; b--) {
      bits[pos++] = (pitchEnc >> b) & 1;
    }

    // Pitch-valid flag.
    bits[pos++] = hasVoice ? 1 : 0;

    // Remaining bits (padding) are already 0.

    // Pack 72 bits into 9 bytes.
    final out = Uint8List(9);
    for (int i = 0; i < 9; i++) {
      int byte = 0;
      for (int b = 0; b < 8; b++) {
        byte = (byte << 1) | bits[i * 8 + b];
      }
      out[i] = byte;
    }
    return out;
  }

  static (List<double>, List<int>, List<bool>, int, bool) _unpack(
      Uint8List bytes) {
    // Unpack 9 bytes into 72 bits.
    final bits = List<int>.filled(72, 0);
    for (int i = 0; i < 9; i++) {
      for (int b = 0; b < 8; b++) {
        bits[i * 8 + b] = (bytes[i] >> (7 - b)) & 1;
      }
    }

    int pos = 0;

    // PARCOR.
    final kq = List<int>.filled(_p, 0);
    for (int i = 0; i < _p; i++) {
      kq[i] = (bits[pos] << 2) | (bits[pos + 1] << 1) | bits[pos + 2];
      pos += 3;
    }
    final kc = kq.map(_dequantiseParcor).toList();

    // Gains.
    final gains = List<int>.filled(_numSubs, 0);
    for (int i = 0; i < _numSubs; i++) {
      gains[i] = (bits[pos] << 1) | bits[pos + 1];
      pos += 2;
    }

    // V/UV.
    final vuv = List<bool>.filled(_numSubs, false);
    for (int i = 0; i < _numSubs; i++) {
      vuv[i] = bits[pos++] == 1;
    }

    // Pitch.
    int pitchEnc = 0;
    for (int b = 5; b >= 0; b--) {
      pitchEnc |= bits[pos++] << b;
    }
    final pitch = pitchEnc + 20;

    // Pitch-valid.
    final hasVoice = bits[pos] == 1;

    return (kc, gains, vuv, pitch, hasVoice);
  }
}