import sys import pvporcupine import pyaudio import struct import os import socket import time import threading import configparser import wave import datetime import logging # 设置日志记录 log_file = os.path.join(os.getcwd(), "voicelog.txt") logging.basicConfig( filename=log_file, level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S' ) # 创建一个控制台处理器,用于同时在控制台显示日志 console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) # 定义日志函数,替代print def log_info(message): logging.info(message) base_dir = os.path.dirname(os.path.abspath(__file__)) if getattr(sys, 'frozen', False) else os.path.dirname( os.path.abspath(sys.executable)) config_file_path = os.path.join(base_dir, 'config.ini') config = configparser.ConfigParser() try: config.read('config.ini', encoding='utf-8') host = config.get('Server', 'address') port = config.getint('Server', 'port') key = config.get('Server', 'key') cport = config.getint('Server', 'client-port') detection_threshold = config.getfloat('Audio', 'detection_threshold', fallback=1) voice_threshold = config.getfloat('Audio', 'voice_threshold', fallback=150) except (configparser.NoSectionError, configparser.NoOptionError, FileNotFoundError) as e: log_info(f"配置错误: {e}") sys.exit(1) class UDPClientTool: def __init__(self, server_host=host, server_port=port, client_port=cport, talker=None,waker = None): self.SERVER_HOST = server_host self.SERVER_PORT = server_port self.CLIENT_PORT = client_port self.client_socket = None self.talker = talker self.waker = waker self.HEARTBEAT_INTERVAL = 5 self.running = True self.connected = False # 表示当前连接状态 self.receive_thread = None self.heartbeat_thread = None self.communication_thread = threading.Thread(target=self.start_communication, daemon=True) self.current_state = "等待唤醒" # 添加状态标记 self.communication_thread.start() self.first_start = True # 添加标记,用于判断是否是第一次启动 self.last_state = "初始化" # 添加状态记录,用于断线重连后恢复状态 self.last_command = None # 添加最后处理的指令记录 self.last_command_time = 0 # 添加最后处理指令的时间戳 self.is_recording = False # 添加录音状态标记 self.last_command = None self.last_command_time = 0 def connect(self): try: self.client_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) self.client_socket.settimeout(5) if self.CLIENT_PORT != 0: self.client_socket.bind(('0.0.0.0', self.CLIENT_PORT)) test_message = "唤醒连接" self.client_socket.sendto(test_message.encode('utf-8'), (self.SERVER_HOST, self.SERVER_PORT)) data, addr = self.client_socket.recvfrom(4096) log_info(f"连接成功: {data.decode('utf-8')}") self.connected = True # 根据上次状态恢复 if self.waker: if self.first_start: self.waker.open_PyAudio() self.first_start = False self.last_state = "唤醒监听" elif self.last_state == "唤醒监听": # 如果上次是在监听唤醒词,则继续监听 log_info("恢复唤醒词监听状态...") self.waker.open_PyAudio() elif self.last_state == "录音中": # 如果上次是在录音,提示用户并重新开始录音 log_info("录音中断,请重新说话...") time.sleep(0.5) # 延迟0.5秒 self.waker.record_user_speech(delay=0) # 不再额外延迟 return True except Exception as e: log_info(f"连接失败: {e}") return False def start_communication(self): while self.running: if self.connect(): self.connected = True self.receive_thread = threading.Thread(target=self.receive_messages, daemon=True) self.receive_thread.start() self.heartbeat_thread = threading.Thread(target=self.send_heartbeat, daemon=True) self.heartbeat_thread.start() self.receive_thread.join() self.heartbeat_thread.join() if self.client_socket: self.client_socket.close() self.client_socket = None log_info("连接断开,重新尝试连接...") else: log_info("3秒后重连...") time.sleep(3) def receive_messages(self): while self.running: try: data, addr = self.client_socket.recvfrom(4096) decoded = data.decode('utf-8') current_time = time.time() if decoded != "HEARTBEAT": log_info(f"收到指令: {decoded}") if (decoded == self.last_command and current_time - self.last_command_time < 1.0): continue self.last_command = decoded self.last_command_time = current_time if self.waker: if decoded == "开始唤醒": self.current_state = "等待唤醒" self.last_state = "唤醒监听" if self.waker.isOpen: self.waker.close_PyAudio() time.sleep(0.1) self.waker.open_PyAudio() elif decoded == "开始对话": if not self.is_recording: # 立即设置录音锁 self.is_recording = True self.current_state = "录音中" self.last_state = "录音中" try: record_thread = threading.Thread( target=self.waker.record_user_speech, args=(1,), daemon=True ) record_thread.start() record_thread.join(timeout=0.1) except Exception as e: log_info(f"启动录音线程时出错: {e}") self.current_state = "等待唤醒" self.is_recording = False elif self.is_recording and decoded != "HEARTBEAT": log_info("正在录音中,忽略指令:" + decoded) continue except socket.timeout: continue except Exception as e: log_info(f"接收错误: {e}") if self.running: # 只有在程序仍在运行时才尝试重连 self.connected = False break def send_heartbeat(self): """发送心跳包以保持连接""" while self.connected and self.running: try: self.client_socket.sendto(b'HEARTBEAT', (self.SERVER_HOST, self.SERVER_PORT)) time.sleep(self.HEARTBEAT_INTERVAL) except Exception as e: log_info(f"心跳发送失败: {e}") self.connected = False break def send_message(self, message): if self.connected and self.client_socket: try: self.client_socket.sendto(message.encode('utf-8'), (self.SERVER_HOST, self.SERVER_PORT)) self.last_sent_message = message # 如果发送结束对话消息,更新状态为等待唤醒 if message == "结束对话": self.current_state = "等待唤醒" except Exception as e: log_info(f"发送失败: {e}") def shutdown(self): self.running = False self.connected = False if self.client_socket: self.client_socket.close() def resource_path(relative_path): """获取资源文件的绝对路径""" if getattr(sys, 'frozen', False): # 如果是打包后的程序 base_path = os.path.dirname(sys.executable) else: # 如果是开发环境 base_path = os.path.dirname(os.path.abspath(__file__)) return os.path.join(base_path, relative_path) class WakeWordDetector: def __init__(self): # 配置唤醒词相关参数 keyword_paths = [resource_path('hello.ppn')] model_file = resource_path('porcupine_params_zh.pv') wake_words = ["你好"] sensitivities = [1.0] # 提高灵敏度到最大 access_key = key # 创建 Porcupine 实例 self.porcupine = pvporcupine.create( access_key=access_key, keyword_paths=keyword_paths, model_path=model_file, keywords=wake_words, sensitivities=sensitivities ) # 创建 UDP 客户端工具实例 self.client_tool = UDPClientTool(waker=self) self.scodethread=None # 音频流参数 self.sample_rate = 16000 self.frame_length = 512 self.isOpen = False # 添加噪音检测相关参数 self.detection_threshold = detection_threshold # 环境噪音阈值 self.voice_threshold = voice_threshold # 语音检测阈值 self.silence_frames = 0 # 连续静音帧计数 self.min_silence_frames = 10 # 最小静音帧数,用于判断环境噪音 self.background_energy = 0 # 背景噪音能量值 self.energy_adjustment_factor = 1.5 # 能量调整因子 # 修改录音文件保存路径 #将录音文件保存到当前工作目录,不再使用专门的 recordings 文件夹。 self.recording_dir = os.getcwd() self.max_silence_duration = 1.4 # 最大静音持续时间(秒),用于自动停止录音 self.max_recording_duration = 30 # 最大录音时间(秒) self.silence_threshold_multiplier = 1.2 # 静音判断阈值倍数 def open_PyAudio(self): try: if self.isOpen: self.close_PyAudio() self.p = pyaudio.PyAudio() self.stream = self.p.open(format=pyaudio.paInt16, channels=1, rate=self.sample_rate, input=True, frames_per_buffer=self.frame_length) self.isOpen = True log_info("开启麦克风...") self.scodethread = threading.Thread(target=self.start_detection, daemon=True) self.scodethread.start() # 移除 join,让线程在后台运行 # self.scodethread.join() except Exception as e: log_info(f"打开麦克风失败: {e}") self.isOpen = False time.sleep(2) log_info("尝试重新打开麦克风...") self.open_PyAudio() def calibrate_background_noise(self): """校准背景噪音水平""" log_info("校准环境噪音水平,请保持安静...") energy_values = [] # 收集2秒的环境噪音样本 for _ in range(int(self.sample_rate / self.frame_length * 2)): if not self.isOpen: break try: pcm = self.stream.read(self.frame_length) pcm_data = struct.unpack_from("h" * self.frame_length, pcm) energy = sum(abs(x) for x in pcm_data) / len(pcm_data) energy_values.append(energy) except: break if energy_values: # 计算平均能量值并应用调整因子 self.background_energy = sum(energy_values) / len(energy_values) self.detection_threshold = self.background_energy * self.energy_adjustment_factor log_info(f"环境噪音水平: {self.background_energy}, 检测阈值: {self.detection_threshold}") def close_PyAudio(self): try: if hasattr(self, 'stream') and self.stream: if self.stream.is_active(): self.stream.stop_stream() self.stream.close() if hasattr(self, 'p') and self.p: self.p.terminate() self.isOpen = False log_info("关闭麦克风...") except Exception as e: log_info(f"关闭麦克风时出错: {e}") self.isOpen = False def start_detection(self): log_info("正在等待唤醒词...") retry_count = 0 max_retries = 3 try: while True: if not self.isOpen: log_info("\nself.isOpen=false") break try: pcm = self.stream.read(self.frame_length) pcm_data = struct.unpack_from("h" * self.frame_length, pcm) # 计算当前能量值 energy = sum(abs(x) for x in pcm_data) / len(pcm_data) # 进行唤醒词检测 # log_info("\n在听了别问了。") result = self.porcupine.process(pcm_data) if result >= 0: log_info("\n检测到唤醒词!") message = "打开助手" self.client_tool.send_message(message) return # 只返回,不关闭音频流 retry_count = 0 except OSError as e: retry_count += 1 log_info(f"\n音频设备错误: {e}, 尝试重新连接 ({retry_count}/{max_retries})...") if retry_count >= max_retries: log_info("重试次数过多,退出检测") self.close_PyAudio() time.sleep(2) self.open_PyAudio() break time.sleep(1) continue except KeyboardInterrupt: log_info("\n程序已停止。") def record_user_speech(self, delay=1): try: if delay > 0: log_info(f"将在 {delay} 秒后开始录音...") time.sleep(delay) log_info("开始录制,请说话...") frames = [] start_time = time.time() is_speaking = False consecutive_silence_frames = 0 while True: data = self.stream.read(self.frame_length) frames.append(data) # 计算当前帧的能量 pcm_data = struct.unpack_from("h" * self.frame_length, data) energy = sum(abs(x) for x in pcm_data) / len(pcm_data) if hasattr(self, 'record_frame_counter'): self.record_frame_counter += 1 else: self.record_frame_counter = 0 if self.record_frame_counter % 10 == 0: log_info(f"当前能量: {energy:.2f}, 阈值: {self.voice_threshold:.2f}") # 使用实例变量 self.voice_threshold if energy > self.voice_threshold: is_speaking = True consecutive_silence_frames = 0 else: if is_speaking: consecutive_silence_frames += 1 silence_duration = consecutive_silence_frames * (self.frame_length / self.sample_rate) if silence_duration >= self.max_silence_duration: log_info(f"检测到{self.max_silence_duration}秒静音,录音结束") break # 检查是否超过最大录音时间 if time.time() - start_time >= self.max_recording_duration: log_info(f"达到最大录音时间{self.max_recording_duration}秒,录音结束") break except Exception as e: log_info(f"录音过程中出错: {e}") is_speaking = False frames = [] finally: try: # 先保存录音(如果有) if is_speaking and frames: self.save_recording(frames) else: log_info("未检测到有效语音,不保存录音") # 重置录音状态 self.client_tool.is_recording = False # 直接使用 self.client_tool # 发送结束对话消息 message = "结束对话" self.client_tool.send_message(message) # 等待服务器响应 time.sleep(1) # 确保旧的音频流已完全关闭 if 'stream' in locals() and self.stream: self.stream.stop_stream() self.stream.close() if 'p' in locals() and self.p: self.p.terminate() # 重置状态 # self.isOpen = False except Exception as e: log_info(f"清理录音资源时出错: {e}") def save_recording(self, frames): """将录制的音频保存为WAV文件""" # 使用固定文件名 filename = os.path.join(os.getcwd(), "output.wav") # 保存WAV文件 try: wf = wave.open(filename, 'wb') wf.setnchannels(1) wf.setsampwidth(2) # 16位音频 wf.setframerate(self.sample_rate) wf.writeframes(b''.join(frames)) wf.close() log_info(f"录音已保存: {filename}") # 发送录音完成消息 self.client_tool.send_message(f"录音完成:{filename}") except Exception as e: log_info(f"保存录音文件时出错: {e}") if __name__ == "__main__": log_info("语音唤醒程序启动...") detector = WakeWordDetector() try: while True: time.sleep(1) except KeyboardInterrupt: log_info("正在关闭程序...") detector.client_tool.shutdown() detector.porcupine.delete() log_info("程序终止") except Exception as e: log_info(f"主程序异常: {e}")