485 lines
19 KiB
Python
485 lines
19 KiB
Python
import sys
|
||
import pvporcupine
|
||
import pyaudio
|
||
import struct
|
||
import os
|
||
import socket
|
||
import time
|
||
import threading
|
||
import configparser
|
||
import wave
|
||
import datetime
|
||
import logging
|
||
|
||
# 设置日志记录
|
||
log_file = os.path.join(os.getcwd(), "voicelog.txt")
|
||
logging.basicConfig(
|
||
filename=log_file,
|
||
level=logging.INFO,
|
||
format='%(asctime)s - %(levelname)s - %(message)s',
|
||
datefmt='%Y-%m-%d %H:%M:%S'
|
||
)
|
||
|
||
# 创建一个控制台处理器,用于同时在控制台显示日志
|
||
console = logging.StreamHandler()
|
||
console.setLevel(logging.INFO)
|
||
formatter = logging.Formatter('%(message)s')
|
||
console.setFormatter(formatter)
|
||
logging.getLogger('').addHandler(console)
|
||
|
||
# 定义日志函数,替代print
|
||
def log_info(message):
|
||
logging.info(message)
|
||
|
||
base_dir = os.path.dirname(os.path.abspath(__file__)) if getattr(sys, 'frozen', False) else os.path.dirname(
|
||
os.path.abspath(sys.executable))
|
||
config_file_path = os.path.join(base_dir, 'config.ini')
|
||
config = configparser.ConfigParser()
|
||
try:
|
||
config.read('config.ini', encoding='utf-8')
|
||
host = config.get('Server', 'address')
|
||
port = config.getint('Server', 'port')
|
||
key = config.get('Server', 'key')
|
||
cport = config.getint('Server', 'client-port')
|
||
detection_threshold = config.getfloat('Audio', 'detection_threshold', fallback=1)
|
||
voice_threshold = config.getfloat('Audio', 'voice_threshold', fallback=150)
|
||
except (configparser.NoSectionError, configparser.NoOptionError, FileNotFoundError) as e:
|
||
log_info(f"配置错误: {e}")
|
||
sys.exit(1)
|
||
|
||
|
||
|
||
class UDPClientTool:
|
||
def __init__(self, server_host=host, server_port=port, client_port=cport, talker=None,waker = None):
|
||
self.SERVER_HOST = server_host
|
||
self.SERVER_PORT = server_port
|
||
self.CLIENT_PORT = client_port
|
||
self.client_socket = None
|
||
self.talker = talker
|
||
self.waker = waker
|
||
self.HEARTBEAT_INTERVAL = 5
|
||
self.running = True
|
||
self.connected = False # 表示当前连接状态
|
||
self.receive_thread = None
|
||
self.heartbeat_thread = None
|
||
self.communication_thread = threading.Thread(target=self.start_communication, daemon=True)
|
||
self.current_state = "等待唤醒" # 添加状态标记
|
||
self.communication_thread.start()
|
||
self.first_start = True # 添加标记,用于判断是否是第一次启动
|
||
self.last_state = "初始化" # 添加状态记录,用于断线重连后恢复状态
|
||
self.last_command = None # 添加最后处理的指令记录
|
||
self.last_command_time = 0 # 添加最后处理指令的时间戳
|
||
self.is_recording = False # 添加录音状态标记
|
||
self.last_command = None
|
||
self.last_command_time = 0
|
||
|
||
def connect(self):
|
||
try:
|
||
self.client_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||
self.client_socket.settimeout(5)
|
||
if self.CLIENT_PORT != 0:
|
||
self.client_socket.bind(('0.0.0.0', self.CLIENT_PORT))
|
||
test_message = "唤醒连接"
|
||
self.client_socket.sendto(test_message.encode('utf-8'), (self.SERVER_HOST, self.SERVER_PORT))
|
||
data, addr = self.client_socket.recvfrom(4096)
|
||
log_info(f"连接成功: {data.decode('utf-8')}")
|
||
self.connected = True
|
||
|
||
# 根据上次状态恢复
|
||
if self.waker:
|
||
if self.first_start:
|
||
self.waker.open_PyAudio()
|
||
self.first_start = False
|
||
self.last_state = "唤醒监听"
|
||
elif self.last_state == "唤醒监听":
|
||
# 如果上次是在监听唤醒词,则继续监听
|
||
log_info("恢复唤醒词监听状态...")
|
||
self.waker.open_PyAudio()
|
||
elif self.last_state == "录音中":
|
||
# 如果上次是在录音,提示用户并重新开始录音
|
||
log_info("录音中断,请重新说话...")
|
||
time.sleep(0.5) # 延迟0.5秒
|
||
self.waker.record_user_speech(delay=0) # 不再额外延迟
|
||
|
||
return True
|
||
except Exception as e:
|
||
log_info(f"连接失败: {e}")
|
||
return False
|
||
|
||
def start_communication(self):
|
||
while self.running:
|
||
if self.connect():
|
||
self.connected = True
|
||
self.receive_thread = threading.Thread(target=self.receive_messages, daemon=True)
|
||
self.receive_thread.start()
|
||
self.heartbeat_thread = threading.Thread(target=self.send_heartbeat, daemon=True)
|
||
self.heartbeat_thread.start()
|
||
self.receive_thread.join()
|
||
self.heartbeat_thread.join()
|
||
if self.client_socket:
|
||
self.client_socket.close()
|
||
self.client_socket = None
|
||
log_info("连接断开,重新尝试连接...")
|
||
else:
|
||
log_info("3秒后重连...")
|
||
time.sleep(3)
|
||
|
||
def receive_messages(self):
|
||
while self.running:
|
||
try:
|
||
data, addr = self.client_socket.recvfrom(4096)
|
||
decoded = data.decode('utf-8')
|
||
current_time = time.time()
|
||
|
||
if decoded != "HEARTBEAT":
|
||
log_info(f"收到指令: {decoded}")
|
||
|
||
if (decoded == self.last_command and
|
||
current_time - self.last_command_time < 1.0):
|
||
continue
|
||
|
||
self.last_command = decoded
|
||
self.last_command_time = current_time
|
||
|
||
if self.waker:
|
||
if decoded == "开始唤醒":
|
||
self.current_state = "等待唤醒"
|
||
self.last_state = "唤醒监听"
|
||
if self.waker.isOpen:
|
||
self.waker.close_PyAudio()
|
||
time.sleep(0.1)
|
||
self.waker.open_PyAudio()
|
||
elif decoded == "开始对话":
|
||
if not self.is_recording: # 立即设置录音锁
|
||
self.is_recording = True
|
||
self.current_state = "录音中"
|
||
self.last_state = "录音中"
|
||
try:
|
||
record_thread = threading.Thread(
|
||
target=self.waker.record_user_speech,
|
||
args=(1,),
|
||
daemon=True
|
||
)
|
||
record_thread.start()
|
||
record_thread.join(timeout=0.1)
|
||
except Exception as e:
|
||
log_info(f"启动录音线程时出错: {e}")
|
||
self.current_state = "等待唤醒"
|
||
self.is_recording = False
|
||
elif self.is_recording and decoded != "HEARTBEAT":
|
||
log_info("正在录音中,忽略指令:" + decoded)
|
||
continue
|
||
|
||
except socket.timeout:
|
||
continue
|
||
except Exception as e:
|
||
log_info(f"接收错误: {e}")
|
||
if self.running: # 只有在程序仍在运行时才尝试重连
|
||
self.connected = False
|
||
break
|
||
|
||
def send_heartbeat(self):
|
||
"""发送心跳包以保持连接"""
|
||
while self.connected and self.running:
|
||
try:
|
||
self.client_socket.sendto(b'HEARTBEAT', (self.SERVER_HOST, self.SERVER_PORT))
|
||
time.sleep(self.HEARTBEAT_INTERVAL)
|
||
except Exception as e:
|
||
log_info(f"心跳发送失败: {e}")
|
||
self.connected = False
|
||
break
|
||
|
||
def send_message(self, message):
|
||
if self.connected and self.client_socket:
|
||
try:
|
||
self.client_socket.sendto(message.encode('utf-8'), (self.SERVER_HOST, self.SERVER_PORT))
|
||
self.last_sent_message = message
|
||
# 如果发送结束对话消息,更新状态为等待唤醒
|
||
if message == "结束对话":
|
||
self.current_state = "等待唤醒"
|
||
except Exception as e:
|
||
log_info(f"发送失败: {e}")
|
||
|
||
def shutdown(self):
|
||
self.running = False
|
||
self.connected = False
|
||
if self.client_socket:
|
||
self.client_socket.close()
|
||
|
||
def resource_path(relative_path):
|
||
"""获取资源文件的绝对路径"""
|
||
if getattr(sys, 'frozen', False):
|
||
# 如果是打包后的程序
|
||
base_path = os.path.dirname(sys.executable)
|
||
else:
|
||
# 如果是开发环境
|
||
base_path = os.path.dirname(os.path.abspath(__file__))
|
||
return os.path.join(base_path, relative_path)
|
||
|
||
class WakeWordDetector:
|
||
def __init__(self):
|
||
# 配置唤醒词相关参数
|
||
keyword_paths = [resource_path('hello.ppn')]
|
||
model_file = resource_path('porcupine_params_zh.pv')
|
||
wake_words = ["你好"]
|
||
sensitivities = [1.0] # 提高灵敏度到最大
|
||
access_key = key
|
||
|
||
# 创建 Porcupine 实例
|
||
self.porcupine = pvporcupine.create(
|
||
access_key=access_key,
|
||
keyword_paths=keyword_paths,
|
||
model_path=model_file,
|
||
keywords=wake_words,
|
||
sensitivities=sensitivities
|
||
)
|
||
|
||
# 创建 UDP 客户端工具实例
|
||
self.client_tool = UDPClientTool(waker=self)
|
||
|
||
self.scodethread=None
|
||
# 音频流参数
|
||
self.sample_rate = 16000
|
||
self.frame_length = 512
|
||
self.isOpen = False
|
||
# 添加噪音检测相关参数
|
||
self.detection_threshold = detection_threshold # 环境噪音阈值
|
||
self.voice_threshold = voice_threshold # 语音检测阈值
|
||
self.silence_frames = 0 # 连续静音帧计数
|
||
self.min_silence_frames = 10 # 最小静音帧数,用于判断环境噪音
|
||
self.background_energy = 0 # 背景噪音能量值
|
||
self.energy_adjustment_factor = 1.5 # 能量调整因子
|
||
|
||
|
||
# 修改录音文件保存路径
|
||
|
||
#将录音文件保存到当前工作目录,不再使用专门的 recordings 文件夹。
|
||
self.recording_dir = os.getcwd()
|
||
self.max_silence_duration = 1.4 # 最大静音持续时间(秒),用于自动停止录音
|
||
self.max_recording_duration = 30 # 最大录音时间(秒)
|
||
self.silence_threshold_multiplier = 1.2 # 静音判断阈值倍数
|
||
|
||
def open_PyAudio(self):
|
||
try:
|
||
if self.isOpen:
|
||
self.close_PyAudio()
|
||
|
||
self.p = pyaudio.PyAudio()
|
||
self.stream = self.p.open(format=pyaudio.paInt16,
|
||
channels=1,
|
||
rate=self.sample_rate,
|
||
input=True,
|
||
frames_per_buffer=self.frame_length)
|
||
self.isOpen = True
|
||
log_info("开启麦克风...")
|
||
self.scodethread = threading.Thread(target=self.start_detection, daemon=True)
|
||
self.scodethread.start()
|
||
# 移除 join,让线程在后台运行
|
||
# self.scodethread.join()
|
||
except Exception as e:
|
||
log_info(f"打开麦克风失败: {e}")
|
||
self.isOpen = False
|
||
time.sleep(2)
|
||
log_info("尝试重新打开麦克风...")
|
||
self.open_PyAudio()
|
||
|
||
def calibrate_background_noise(self):
|
||
"""校准背景噪音水平"""
|
||
log_info("校准环境噪音水平,请保持安静...")
|
||
energy_values = []
|
||
# 收集2秒的环境噪音样本
|
||
for _ in range(int(self.sample_rate / self.frame_length * 2)):
|
||
if not self.isOpen:
|
||
break
|
||
try:
|
||
pcm = self.stream.read(self.frame_length)
|
||
pcm_data = struct.unpack_from("h" * self.frame_length, pcm)
|
||
energy = sum(abs(x) for x in pcm_data) / len(pcm_data)
|
||
energy_values.append(energy)
|
||
except:
|
||
break
|
||
|
||
if energy_values:
|
||
# 计算平均能量值并应用调整因子
|
||
self.background_energy = sum(energy_values) / len(energy_values)
|
||
self.detection_threshold = self.background_energy * self.energy_adjustment_factor
|
||
log_info(f"环境噪音水平: {self.background_energy}, 检测阈值: {self.detection_threshold}")
|
||
|
||
def close_PyAudio(self):
|
||
try:
|
||
if hasattr(self, 'stream') and self.stream:
|
||
if self.stream.is_active():
|
||
self.stream.stop_stream()
|
||
self.stream.close()
|
||
if hasattr(self, 'p') and self.p:
|
||
self.p.terminate()
|
||
self.isOpen = False
|
||
log_info("关闭麦克风...")
|
||
except Exception as e:
|
||
log_info(f"关闭麦克风时出错: {e}")
|
||
self.isOpen = False
|
||
|
||
def start_detection(self):
|
||
log_info("正在等待唤醒词...")
|
||
retry_count = 0
|
||
max_retries = 3
|
||
|
||
try:
|
||
while True:
|
||
if not self.isOpen:
|
||
log_info("\nself.isOpen=false")
|
||
break
|
||
|
||
try:
|
||
pcm = self.stream.read(self.frame_length)
|
||
pcm_data = struct.unpack_from("h" * self.frame_length, pcm)
|
||
|
||
# 计算当前能量值
|
||
energy = sum(abs(x) for x in pcm_data) / len(pcm_data)
|
||
|
||
# 进行唤醒词检测
|
||
# log_info("\n在听了别问了。")
|
||
result = self.porcupine.process(pcm_data)
|
||
if result >= 0:
|
||
log_info("\n检测到唤醒词!")
|
||
message = "打开助手"
|
||
self.client_tool.send_message(message)
|
||
return # 只返回,不关闭音频流
|
||
|
||
retry_count = 0
|
||
|
||
except OSError as e:
|
||
retry_count += 1
|
||
log_info(f"\n音频设备错误: {e}, 尝试重新连接 ({retry_count}/{max_retries})...")
|
||
|
||
if retry_count >= max_retries:
|
||
log_info("重试次数过多,退出检测")
|
||
self.close_PyAudio()
|
||
time.sleep(2)
|
||
self.open_PyAudio()
|
||
break
|
||
|
||
time.sleep(1)
|
||
continue
|
||
|
||
except KeyboardInterrupt:
|
||
log_info("\n程序已停止。")
|
||
|
||
|
||
def record_user_speech(self, delay=1):
|
||
try:
|
||
if delay > 0:
|
||
log_info(f"将在 {delay} 秒后开始录音...")
|
||
time.sleep(delay)
|
||
log_info("开始录制,请说话...")
|
||
|
||
frames = []
|
||
start_time = time.time()
|
||
is_speaking = False
|
||
consecutive_silence_frames = 0
|
||
|
||
while True:
|
||
data = self.stream.read(self.frame_length)
|
||
frames.append(data)
|
||
|
||
# 计算当前帧的能量
|
||
pcm_data = struct.unpack_from("h" * self.frame_length, data)
|
||
energy = sum(abs(x) for x in pcm_data) / len(pcm_data)
|
||
|
||
if hasattr(self, 'record_frame_counter'):
|
||
self.record_frame_counter += 1
|
||
else:
|
||
self.record_frame_counter = 0
|
||
|
||
if self.record_frame_counter % 10 == 0:
|
||
log_info(f"当前能量: {energy:.2f}, 阈值: {self.voice_threshold:.2f}")
|
||
|
||
# 使用实例变量 self.voice_threshold
|
||
if energy > self.voice_threshold:
|
||
is_speaking = True
|
||
consecutive_silence_frames = 0
|
||
else:
|
||
if is_speaking:
|
||
consecutive_silence_frames += 1
|
||
silence_duration = consecutive_silence_frames * (self.frame_length / self.sample_rate)
|
||
|
||
if silence_duration >= self.max_silence_duration:
|
||
log_info(f"检测到{self.max_silence_duration}秒静音,录音结束")
|
||
break
|
||
|
||
# 检查是否超过最大录音时间
|
||
if time.time() - start_time >= self.max_recording_duration:
|
||
log_info(f"达到最大录音时间{self.max_recording_duration}秒,录音结束")
|
||
break
|
||
|
||
except Exception as e:
|
||
log_info(f"录音过程中出错: {e}")
|
||
is_speaking = False
|
||
frames = []
|
||
finally:
|
||
try:
|
||
# 先保存录音(如果有)
|
||
if is_speaking and frames:
|
||
self.save_recording(frames)
|
||
else:
|
||
log_info("未检测到有效语音,不保存录音")
|
||
|
||
# 重置录音状态
|
||
self.client_tool.is_recording = False # 直接使用 self.client_tool
|
||
|
||
# 发送结束对话消息
|
||
message = "结束对话"
|
||
self.client_tool.send_message(message)
|
||
|
||
# 等待服务器响应
|
||
time.sleep(1)
|
||
|
||
# 确保旧的音频流已完全关闭
|
||
if 'stream' in locals() and self.stream:
|
||
self.stream.stop_stream()
|
||
self.stream.close()
|
||
if 'p' in locals() and self.p:
|
||
self.p.terminate()
|
||
|
||
# 重置状态
|
||
# self.isOpen = False
|
||
|
||
except Exception as e:
|
||
log_info(f"清理录音资源时出错: {e}")
|
||
|
||
|
||
def save_recording(self, frames):
|
||
"""将录制的音频保存为WAV文件"""
|
||
# 使用固定文件名
|
||
filename = os.path.join(os.getcwd(), "output.wav")
|
||
|
||
# 保存WAV文件
|
||
try:
|
||
wf = wave.open(filename, 'wb')
|
||
wf.setnchannels(1)
|
||
wf.setsampwidth(2) # 16位音频
|
||
wf.setframerate(self.sample_rate)
|
||
wf.writeframes(b''.join(frames))
|
||
wf.close()
|
||
log_info(f"录音已保存: {filename}")
|
||
|
||
# 发送录音完成消息
|
||
self.client_tool.send_message(f"录音完成:{filename}")
|
||
except Exception as e:
|
||
log_info(f"保存录音文件时出错: {e}")
|
||
|
||
if __name__ == "__main__":
|
||
log_info("语音唤醒程序启动...")
|
||
detector = WakeWordDetector()
|
||
|
||
try:
|
||
while True:
|
||
time.sleep(1)
|
||
except KeyboardInterrupt:
|
||
log_info("正在关闭程序...")
|
||
detector.client_tool.shutdown()
|
||
detector.porcupine.delete()
|
||
log_info("程序终止")
|
||
except Exception as e:
|
||
log_info(f"主程序异常: {e}")
|