1 """Audio enhancement for Assist."""
3 from abc
import ABC, abstractmethod
4 from dataclasses
import dataclass
7 from pymicro_vad
import MicroVad
8 from pyspeex_noise
import AudioProcessor
10 from .const
import BYTES_PER_CHUNK
12 _LOGGER = logging.getLogger(__name__)
15 @dataclass(frozen=True, slots=True)
17 """Enhanced audio chunk and metadata."""
20 """Raw PCM audio @ 16Khz with 16-bit mono samples"""
23 """Timestamp relative to start of audio stream (milliseconds)"""
25 speech_probability: float |
None
26 """Probability that audio chunk contains speech (0-1), None if unknown"""
30 """Base class for audio enhancement."""
33 self, auto_gain: int, noise_suppression: int, is_vad_enabled: bool
35 """Initialize audio enhancer."""
41 def enhance_chunk(self, audio: bytes, timestamp_ms: int) -> EnhancedAudioChunk:
42 """Enhance chunk of PCM audio @ 16Khz with 16-bit mono samples."""
46 """Audio enhancer that runs microVAD and speex."""
49 self, auto_gain: int, noise_suppression: int, is_vad_enabled: bool
51 """Initialize audio enhancer."""
52 super().
__init__(auto_gain, noise_suppression, is_vad_enabled)
67 "Initialized speex with auto_gain=%s, noise_suppression=%s",
72 self.
vadvad: MicroVad |
None =
None
75 self.
vadvad = MicroVad()
76 _LOGGER.debug(
"Initialized microVAD")
78 def enhance_chunk(self, audio: bytes, timestamp_ms: int) -> EnhancedAudioChunk:
79 """Enhance 10ms chunk of PCM audio @ 16Khz with 16-bit mono samples."""
80 speech_probability: float |
None =
None
82 assert len(audio) == BYTES_PER_CHUNK
84 if self.
vadvad
is not None:
86 speech_probability = self.
vadvad.Process10ms(audio)
94 timestamp_ms=timestamp_ms,
95 speech_probability=speech_probability,
EnhancedAudioChunk enhance_chunk(self, bytes audio, int timestamp_ms)
None __init__(self, int auto_gain, int noise_suppression, bool is_vad_enabled)
None __init__(self, int auto_gain, int noise_suppression, bool is_vad_enabled)
EnhancedAudioChunk enhance_chunk(self, bytes audio, int timestamp_ms)