Smart_Talker/mainEx.py
2025-04-17 14:21:53 +08:00

485 lines
19 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import sys
import pvporcupine
import pyaudio
import struct
import os
import socket
import time
import threading
import configparser
import wave
import datetime
import logging
# 设置日志记录
log_file = os.path.join(os.getcwd(), "voicelog.txt")
logging.basicConfig(
filename=log_file,
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
# 创建一个控制台处理器,用于同时在控制台显示日志
console = logging.StreamHandler()
console.setLevel(logging.INFO)
formatter = logging.Formatter('%(message)s')
console.setFormatter(formatter)
logging.getLogger('').addHandler(console)
# 定义日志函数替代print
def log_info(message):
logging.info(message)
base_dir = os.path.dirname(os.path.abspath(__file__)) if getattr(sys, 'frozen', False) else os.path.dirname(
os.path.abspath(sys.executable))
config_file_path = os.path.join(base_dir, 'config.ini')
config = configparser.ConfigParser()
try:
config.read('config.ini', encoding='utf-8')
host = config.get('Server', 'address')
port = config.getint('Server', 'port')
key = config.get('Server', 'key')
cport = config.getint('Server', 'client-port')
detection_threshold = config.getfloat('Audio', 'detection_threshold', fallback=1)
voice_threshold = config.getfloat('Audio', 'voice_threshold', fallback=150)
except (configparser.NoSectionError, configparser.NoOptionError, FileNotFoundError) as e:
log_info(f"配置错误: {e}")
sys.exit(1)
class UDPClientTool:
def __init__(self, server_host=host, server_port=port, client_port=cport, talker=None,waker = None):
self.SERVER_HOST = server_host
self.SERVER_PORT = server_port
self.CLIENT_PORT = client_port
self.client_socket = None
self.talker = talker
self.waker = waker
self.HEARTBEAT_INTERVAL = 5
self.running = True
self.connected = False # 表示当前连接状态
self.receive_thread = None
self.heartbeat_thread = None
self.communication_thread = threading.Thread(target=self.start_communication, daemon=True)
self.current_state = "等待唤醒" # 添加状态标记
self.communication_thread.start()
self.first_start = True # 添加标记,用于判断是否是第一次启动
self.last_state = "初始化" # 添加状态记录,用于断线重连后恢复状态
self.last_command = None # 添加最后处理的指令记录
self.last_command_time = 0 # 添加最后处理指令的时间戳
self.is_recording = False # 添加录音状态标记
self.last_command = None
self.last_command_time = 0
def connect(self):
try:
self.client_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
self.client_socket.settimeout(5)
if self.CLIENT_PORT != 0:
self.client_socket.bind(('0.0.0.0', self.CLIENT_PORT))
test_message = "唤醒连接"
self.client_socket.sendto(test_message.encode('utf-8'), (self.SERVER_HOST, self.SERVER_PORT))
data, addr = self.client_socket.recvfrom(4096)
log_info(f"连接成功: {data.decode('utf-8')}")
self.connected = True
# 根据上次状态恢复
if self.waker:
if self.first_start:
self.waker.open_PyAudio()
self.first_start = False
self.last_state = "唤醒监听"
elif self.last_state == "唤醒监听":
# 如果上次是在监听唤醒词,则继续监听
log_info("恢复唤醒词监听状态...")
self.waker.open_PyAudio()
elif self.last_state == "录音中":
# 如果上次是在录音,提示用户并重新开始录音
log_info("录音中断,请重新说话...")
time.sleep(0.5) # 延迟0.5秒
self.waker.record_user_speech(delay=0) # 不再额外延迟
return True
except Exception as e:
log_info(f"连接失败: {e}")
return False
def start_communication(self):
while self.running:
if self.connect():
self.connected = True
self.receive_thread = threading.Thread(target=self.receive_messages, daemon=True)
self.receive_thread.start()
self.heartbeat_thread = threading.Thread(target=self.send_heartbeat, daemon=True)
self.heartbeat_thread.start()
self.receive_thread.join()
self.heartbeat_thread.join()
if self.client_socket:
self.client_socket.close()
self.client_socket = None
log_info("连接断开,重新尝试连接...")
else:
log_info("3秒后重连...")
time.sleep(3)
def receive_messages(self):
while self.running:
try:
data, addr = self.client_socket.recvfrom(4096)
decoded = data.decode('utf-8')
current_time = time.time()
if decoded != "HEARTBEAT":
log_info(f"收到指令: {decoded}")
if (decoded == self.last_command and
current_time - self.last_command_time < 1.0):
continue
self.last_command = decoded
self.last_command_time = current_time
if self.waker:
if decoded == "开始唤醒":
self.current_state = "等待唤醒"
self.last_state = "唤醒监听"
if self.waker.isOpen:
self.waker.close_PyAudio()
time.sleep(0.1)
self.waker.open_PyAudio()
elif decoded == "开始对话":
if not self.is_recording: # 立即设置录音锁
self.is_recording = True
self.current_state = "录音中"
self.last_state = "录音中"
try:
record_thread = threading.Thread(
target=self.waker.record_user_speech,
args=(1,),
daemon=True
)
record_thread.start()
record_thread.join(timeout=0.1)
except Exception as e:
log_info(f"启动录音线程时出错: {e}")
self.current_state = "等待唤醒"
self.is_recording = False
elif self.is_recording and decoded != "HEARTBEAT":
log_info("正在录音中,忽略指令:" + decoded)
continue
except socket.timeout:
continue
except Exception as e:
log_info(f"接收错误: {e}")
if self.running: # 只有在程序仍在运行时才尝试重连
self.connected = False
break
def send_heartbeat(self):
"""发送心跳包以保持连接"""
while self.connected and self.running:
try:
self.client_socket.sendto(b'HEARTBEAT', (self.SERVER_HOST, self.SERVER_PORT))
time.sleep(self.HEARTBEAT_INTERVAL)
except Exception as e:
log_info(f"心跳发送失败: {e}")
self.connected = False
break
def send_message(self, message):
if self.connected and self.client_socket:
try:
self.client_socket.sendto(message.encode('utf-8'), (self.SERVER_HOST, self.SERVER_PORT))
self.last_sent_message = message
# 如果发送结束对话消息,更新状态为等待唤醒
if message == "结束对话":
self.current_state = "等待唤醒"
except Exception as e:
log_info(f"发送失败: {e}")
def shutdown(self):
self.running = False
self.connected = False
if self.client_socket:
self.client_socket.close()
def resource_path(relative_path):
"""获取资源文件的绝对路径"""
if getattr(sys, 'frozen', False):
# 如果是打包后的程序
base_path = os.path.dirname(sys.executable)
else:
# 如果是开发环境
base_path = os.path.dirname(os.path.abspath(__file__))
return os.path.join(base_path, relative_path)
class WakeWordDetector:
def __init__(self):
# 配置唤醒词相关参数
keyword_paths = [resource_path('hello.ppn')]
model_file = resource_path('porcupine_params_zh.pv')
wake_words = ["你好"]
sensitivities = [1.0] # 提高灵敏度到最大
access_key = key
# 创建 Porcupine 实例
self.porcupine = pvporcupine.create(
access_key=access_key,
keyword_paths=keyword_paths,
model_path=model_file,
keywords=wake_words,
sensitivities=sensitivities
)
# 创建 UDP 客户端工具实例
self.client_tool = UDPClientTool(waker=self)
self.scodethread=None
# 音频流参数
self.sample_rate = 16000
self.frame_length = 512
self.isOpen = False
# 添加噪音检测相关参数
self.detection_threshold = detection_threshold # 环境噪音阈值
self.voice_threshold = voice_threshold # 语音检测阈值
self.silence_frames = 0 # 连续静音帧计数
self.min_silence_frames = 10 # 最小静音帧数,用于判断环境噪音
self.background_energy = 0 # 背景噪音能量值
self.energy_adjustment_factor = 1.5 # 能量调整因子
# 修改录音文件保存路径
#将录音文件保存到当前工作目录,不再使用专门的 recordings 文件夹。
self.recording_dir = os.getcwd()
self.max_silence_duration = 1.4 # 最大静音持续时间(秒),用于自动停止录音
self.max_recording_duration = 30 # 最大录音时间(秒)
self.silence_threshold_multiplier = 1.2 # 静音判断阈值倍数
def open_PyAudio(self):
try:
if self.isOpen:
self.close_PyAudio()
self.p = pyaudio.PyAudio()
self.stream = self.p.open(format=pyaudio.paInt16,
channels=1,
rate=self.sample_rate,
input=True,
frames_per_buffer=self.frame_length)
self.isOpen = True
log_info("开启麦克风...")
self.scodethread = threading.Thread(target=self.start_detection, daemon=True)
self.scodethread.start()
# 移除 join让线程在后台运行
# self.scodethread.join()
except Exception as e:
log_info(f"打开麦克风失败: {e}")
self.isOpen = False
time.sleep(2)
log_info("尝试重新打开麦克风...")
self.open_PyAudio()
def calibrate_background_noise(self):
"""校准背景噪音水平"""
log_info("校准环境噪音水平,请保持安静...")
energy_values = []
# 收集2秒的环境噪音样本
for _ in range(int(self.sample_rate / self.frame_length * 2)):
if not self.isOpen:
break
try:
pcm = self.stream.read(self.frame_length)
pcm_data = struct.unpack_from("h" * self.frame_length, pcm)
energy = sum(abs(x) for x in pcm_data) / len(pcm_data)
energy_values.append(energy)
except:
break
if energy_values:
# 计算平均能量值并应用调整因子
self.background_energy = sum(energy_values) / len(energy_values)
self.detection_threshold = self.background_energy * self.energy_adjustment_factor
log_info(f"环境噪音水平: {self.background_energy}, 检测阈值: {self.detection_threshold}")
def close_PyAudio(self):
try:
if hasattr(self, 'stream') and self.stream:
if self.stream.is_active():
self.stream.stop_stream()
self.stream.close()
if hasattr(self, 'p') and self.p:
self.p.terminate()
self.isOpen = False
log_info("关闭麦克风...")
except Exception as e:
log_info(f"关闭麦克风时出错: {e}")
self.isOpen = False
def start_detection(self):
log_info("正在等待唤醒词...")
retry_count = 0
max_retries = 3
try:
while True:
if not self.isOpen:
log_info("\nself.isOpen=false")
break
try:
pcm = self.stream.read(self.frame_length)
pcm_data = struct.unpack_from("h" * self.frame_length, pcm)
# 计算当前能量值
energy = sum(abs(x) for x in pcm_data) / len(pcm_data)
# 进行唤醒词检测
# log_info("\n在听了别问了。")
result = self.porcupine.process(pcm_data)
if result >= 0:
log_info("\n检测到唤醒词!")
message = "打开助手"
self.client_tool.send_message(message)
return # 只返回,不关闭音频流
retry_count = 0
except OSError as e:
retry_count += 1
log_info(f"\n音频设备错误: {e}, 尝试重新连接 ({retry_count}/{max_retries})...")
if retry_count >= max_retries:
log_info("重试次数过多,退出检测")
self.close_PyAudio()
time.sleep(2)
self.open_PyAudio()
break
time.sleep(1)
continue
except KeyboardInterrupt:
log_info("\n程序已停止。")
def record_user_speech(self, delay=1):
try:
if delay > 0:
log_info(f"将在 {delay} 秒后开始录音...")
time.sleep(delay)
log_info("开始录制,请说话...")
frames = []
start_time = time.time()
is_speaking = False
consecutive_silence_frames = 0
while True:
data = self.stream.read(self.frame_length)
frames.append(data)
# 计算当前帧的能量
pcm_data = struct.unpack_from("h" * self.frame_length, data)
energy = sum(abs(x) for x in pcm_data) / len(pcm_data)
if hasattr(self, 'record_frame_counter'):
self.record_frame_counter += 1
else:
self.record_frame_counter = 0
if self.record_frame_counter % 10 == 0:
log_info(f"当前能量: {energy:.2f}, 阈值: {self.voice_threshold:.2f}")
# 使用实例变量 self.voice_threshold
if energy > self.voice_threshold:
is_speaking = True
consecutive_silence_frames = 0
else:
if is_speaking:
consecutive_silence_frames += 1
silence_duration = consecutive_silence_frames * (self.frame_length / self.sample_rate)
if silence_duration >= self.max_silence_duration:
log_info(f"检测到{self.max_silence_duration}秒静音,录音结束")
break
# 检查是否超过最大录音时间
if time.time() - start_time >= self.max_recording_duration:
log_info(f"达到最大录音时间{self.max_recording_duration}秒,录音结束")
break
except Exception as e:
log_info(f"录音过程中出错: {e}")
is_speaking = False
frames = []
finally:
try:
# 先保存录音(如果有)
if is_speaking and frames:
self.save_recording(frames)
else:
log_info("未检测到有效语音,不保存录音")
# 重置录音状态
self.client_tool.is_recording = False # 直接使用 self.client_tool
# 发送结束对话消息
message = "结束对话"
self.client_tool.send_message(message)
# 等待服务器响应
time.sleep(1)
# 确保旧的音频流已完全关闭
if 'stream' in locals() and self.stream:
self.stream.stop_stream()
self.stream.close()
if 'p' in locals() and self.p:
self.p.terminate()
# 重置状态
# self.isOpen = False
except Exception as e:
log_info(f"清理录音资源时出错: {e}")
def save_recording(self, frames):
"""将录制的音频保存为WAV文件"""
# 使用固定文件名
filename = os.path.join(os.getcwd(), "output.wav")
# 保存WAV文件
try:
wf = wave.open(filename, 'wb')
wf.setnchannels(1)
wf.setsampwidth(2) # 16位音频
wf.setframerate(self.sample_rate)
wf.writeframes(b''.join(frames))
wf.close()
log_info(f"录音已保存: {filename}")
# 发送录音完成消息
self.client_tool.send_message(f"录音完成:{filename}")
except Exception as e:
log_info(f"保存录音文件时出错: {e}")
if __name__ == "__main__":
log_info("语音唤醒程序启动...")
detector = WakeWordDetector()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
log_info("正在关闭程序...")
detector.client_tool.shutdown()
detector.porcupine.delete()
log_info("程序终止")
except Exception as e:
log_info(f"主程序异常: {e}")