Smart_Talker/mainEx.py

485 lines
19 KiB
Python
Raw Permalink Normal View History

2025-04-17 14:21:53 +08:00
import sys
import pvporcupine
import pyaudio
import struct
import os
import socket
import time
import threading
import configparser
import wave
import datetime
import logging
# 设置日志记录
log_file = os.path.join(os.getcwd(), "voicelog.txt")
logging.basicConfig(
filename=log_file,
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
# 创建一个控制台处理器,用于同时在控制台显示日志
console = logging.StreamHandler()
console.setLevel(logging.INFO)
formatter = logging.Formatter('%(message)s')
console.setFormatter(formatter)
logging.getLogger('').addHandler(console)
# 定义日志函数替代print
def log_info(message):
logging.info(message)
base_dir = os.path.dirname(os.path.abspath(__file__)) if getattr(sys, 'frozen', False) else os.path.dirname(
os.path.abspath(sys.executable))
config_file_path = os.path.join(base_dir, 'config.ini')
config = configparser.ConfigParser()
try:
config.read('config.ini', encoding='utf-8')
host = config.get('Server', 'address')
port = config.getint('Server', 'port')
key = config.get('Server', 'key')
cport = config.getint('Server', 'client-port')
detection_threshold = config.getfloat('Audio', 'detection_threshold', fallback=1)
voice_threshold = config.getfloat('Audio', 'voice_threshold', fallback=150)
except (configparser.NoSectionError, configparser.NoOptionError, FileNotFoundError) as e:
log_info(f"配置错误: {e}")
sys.exit(1)
class UDPClientTool:
def __init__(self, server_host=host, server_port=port, client_port=cport, talker=None,waker = None):
self.SERVER_HOST = server_host
self.SERVER_PORT = server_port
self.CLIENT_PORT = client_port
self.client_socket = None
self.talker = talker
self.waker = waker
self.HEARTBEAT_INTERVAL = 5
self.running = True
self.connected = False # 表示当前连接状态
self.receive_thread = None
self.heartbeat_thread = None
self.communication_thread = threading.Thread(target=self.start_communication, daemon=True)
self.current_state = "等待唤醒" # 添加状态标记
self.communication_thread.start()
self.first_start = True # 添加标记,用于判断是否是第一次启动
self.last_state = "初始化" # 添加状态记录,用于断线重连后恢复状态
self.last_command = None # 添加最后处理的指令记录
self.last_command_time = 0 # 添加最后处理指令的时间戳
self.is_recording = False # 添加录音状态标记
self.last_command = None
self.last_command_time = 0
def connect(self):
try:
self.client_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
self.client_socket.settimeout(5)
if self.CLIENT_PORT != 0:
self.client_socket.bind(('0.0.0.0', self.CLIENT_PORT))
test_message = "唤醒连接"
self.client_socket.sendto(test_message.encode('utf-8'), (self.SERVER_HOST, self.SERVER_PORT))
data, addr = self.client_socket.recvfrom(4096)
log_info(f"连接成功: {data.decode('utf-8')}")
self.connected = True
# 根据上次状态恢复
if self.waker:
if self.first_start:
self.waker.open_PyAudio()
self.first_start = False
self.last_state = "唤醒监听"
elif self.last_state == "唤醒监听":
# 如果上次是在监听唤醒词,则继续监听
log_info("恢复唤醒词监听状态...")
self.waker.open_PyAudio()
elif self.last_state == "录音中":
# 如果上次是在录音,提示用户并重新开始录音
log_info("录音中断,请重新说话...")
time.sleep(0.5) # 延迟0.5秒
self.waker.record_user_speech(delay=0) # 不再额外延迟
return True
except Exception as e:
log_info(f"连接失败: {e}")
return False
def start_communication(self):
while self.running:
if self.connect():
self.connected = True
self.receive_thread = threading.Thread(target=self.receive_messages, daemon=True)
self.receive_thread.start()
self.heartbeat_thread = threading.Thread(target=self.send_heartbeat, daemon=True)
self.heartbeat_thread.start()
self.receive_thread.join()
self.heartbeat_thread.join()
if self.client_socket:
self.client_socket.close()
self.client_socket = None
log_info("连接断开,重新尝试连接...")
else:
log_info("3秒后重连...")
time.sleep(3)
def receive_messages(self):
while self.running:
try:
data, addr = self.client_socket.recvfrom(4096)
decoded = data.decode('utf-8')
current_time = time.time()
if decoded != "HEARTBEAT":
log_info(f"收到指令: {decoded}")
if (decoded == self.last_command and
current_time - self.last_command_time < 1.0):
continue
self.last_command = decoded
self.last_command_time = current_time
if self.waker:
if decoded == "开始唤醒":
self.current_state = "等待唤醒"
self.last_state = "唤醒监听"
if self.waker.isOpen:
self.waker.close_PyAudio()
time.sleep(0.1)
self.waker.open_PyAudio()
elif decoded == "开始对话":
if not self.is_recording: # 立即设置录音锁
self.is_recording = True
self.current_state = "录音中"
self.last_state = "录音中"
try:
record_thread = threading.Thread(
target=self.waker.record_user_speech,
args=(1,),
daemon=True
)
record_thread.start()
record_thread.join(timeout=0.1)
except Exception as e:
log_info(f"启动录音线程时出错: {e}")
self.current_state = "等待唤醒"
self.is_recording = False
elif self.is_recording and decoded != "HEARTBEAT":
log_info("正在录音中,忽略指令:" + decoded)
continue
except socket.timeout:
continue
except Exception as e:
log_info(f"接收错误: {e}")
if self.running: # 只有在程序仍在运行时才尝试重连
self.connected = False
break
def send_heartbeat(self):
"""发送心跳包以保持连接"""
while self.connected and self.running:
try:
self.client_socket.sendto(b'HEARTBEAT', (self.SERVER_HOST, self.SERVER_PORT))
time.sleep(self.HEARTBEAT_INTERVAL)
except Exception as e:
log_info(f"心跳发送失败: {e}")
self.connected = False
break
def send_message(self, message):
if self.connected and self.client_socket:
try:
self.client_socket.sendto(message.encode('utf-8'), (self.SERVER_HOST, self.SERVER_PORT))
self.last_sent_message = message
# 如果发送结束对话消息,更新状态为等待唤醒
if message == "结束对话":
self.current_state = "等待唤醒"
except Exception as e:
log_info(f"发送失败: {e}")
def shutdown(self):
self.running = False
self.connected = False
if self.client_socket:
self.client_socket.close()
def resource_path(relative_path):
"""获取资源文件的绝对路径"""
if getattr(sys, 'frozen', False):
# 如果是打包后的程序
base_path = os.path.dirname(sys.executable)
else:
# 如果是开发环境
base_path = os.path.dirname(os.path.abspath(__file__))
return os.path.join(base_path, relative_path)
class WakeWordDetector:
def __init__(self):
# 配置唤醒词相关参数
keyword_paths = [resource_path('hello.ppn')]
model_file = resource_path('porcupine_params_zh.pv')
wake_words = ["你好"]
sensitivities = [1.0] # 提高灵敏度到最大
access_key = key
# 创建 Porcupine 实例
self.porcupine = pvporcupine.create(
access_key=access_key,
keyword_paths=keyword_paths,
model_path=model_file,
keywords=wake_words,
sensitivities=sensitivities
)
# 创建 UDP 客户端工具实例
self.client_tool = UDPClientTool(waker=self)
self.scodethread=None
# 音频流参数
self.sample_rate = 16000
self.frame_length = 512
self.isOpen = False
# 添加噪音检测相关参数
self.detection_threshold = detection_threshold # 环境噪音阈值
self.voice_threshold = voice_threshold # 语音检测阈值
self.silence_frames = 0 # 连续静音帧计数
self.min_silence_frames = 10 # 最小静音帧数,用于判断环境噪音
self.background_energy = 0 # 背景噪音能量值
self.energy_adjustment_factor = 1.5 # 能量调整因子
# 修改录音文件保存路径
#将录音文件保存到当前工作目录,不再使用专门的 recordings 文件夹。
self.recording_dir = os.getcwd()
self.max_silence_duration = 1.4 # 最大静音持续时间(秒),用于自动停止录音
self.max_recording_duration = 30 # 最大录音时间(秒)
self.silence_threshold_multiplier = 1.2 # 静音判断阈值倍数
def open_PyAudio(self):
try:
if self.isOpen:
self.close_PyAudio()
self.p = pyaudio.PyAudio()
self.stream = self.p.open(format=pyaudio.paInt16,
channels=1,
rate=self.sample_rate,
input=True,
frames_per_buffer=self.frame_length)
self.isOpen = True
log_info("开启麦克风...")
self.scodethread = threading.Thread(target=self.start_detection, daemon=True)
self.scodethread.start()
# 移除 join让线程在后台运行
# self.scodethread.join()
except Exception as e:
log_info(f"打开麦克风失败: {e}")
self.isOpen = False
time.sleep(2)
log_info("尝试重新打开麦克风...")
self.open_PyAudio()
def calibrate_background_noise(self):
"""校准背景噪音水平"""
log_info("校准环境噪音水平,请保持安静...")
energy_values = []
# 收集2秒的环境噪音样本
for _ in range(int(self.sample_rate / self.frame_length * 2)):
if not self.isOpen:
break
try:
pcm = self.stream.read(self.frame_length)
pcm_data = struct.unpack_from("h" * self.frame_length, pcm)
energy = sum(abs(x) for x in pcm_data) / len(pcm_data)
energy_values.append(energy)
except:
break
if energy_values:
# 计算平均能量值并应用调整因子
self.background_energy = sum(energy_values) / len(energy_values)
self.detection_threshold = self.background_energy * self.energy_adjustment_factor
log_info(f"环境噪音水平: {self.background_energy}, 检测阈值: {self.detection_threshold}")
def close_PyAudio(self):
try:
if hasattr(self, 'stream') and self.stream:
if self.stream.is_active():
self.stream.stop_stream()
self.stream.close()
if hasattr(self, 'p') and self.p:
self.p.terminate()
self.isOpen = False
log_info("关闭麦克风...")
except Exception as e:
log_info(f"关闭麦克风时出错: {e}")
self.isOpen = False
def start_detection(self):
log_info("正在等待唤醒词...")
retry_count = 0
max_retries = 3
try:
while True:
if not self.isOpen:
log_info("\nself.isOpen=false")
break
try:
pcm = self.stream.read(self.frame_length)
pcm_data = struct.unpack_from("h" * self.frame_length, pcm)
# 计算当前能量值
energy = sum(abs(x) for x in pcm_data) / len(pcm_data)
# 进行唤醒词检测
# log_info("\n在听了别问了。")
result = self.porcupine.process(pcm_data)
if result >= 0:
log_info("\n检测到唤醒词!")
message = "打开助手"
self.client_tool.send_message(message)
return # 只返回,不关闭音频流
retry_count = 0
except OSError as e:
retry_count += 1
log_info(f"\n音频设备错误: {e}, 尝试重新连接 ({retry_count}/{max_retries})...")
if retry_count >= max_retries:
log_info("重试次数过多,退出检测")
self.close_PyAudio()
time.sleep(2)
self.open_PyAudio()
break
time.sleep(1)
continue
except KeyboardInterrupt:
log_info("\n程序已停止。")
def record_user_speech(self, delay=1):
try:
if delay > 0:
log_info(f"将在 {delay} 秒后开始录音...")
time.sleep(delay)
log_info("开始录制,请说话...")
frames = []
start_time = time.time()
is_speaking = False
consecutive_silence_frames = 0
while True:
data = self.stream.read(self.frame_length)
frames.append(data)
# 计算当前帧的能量
pcm_data = struct.unpack_from("h" * self.frame_length, data)
energy = sum(abs(x) for x in pcm_data) / len(pcm_data)
if hasattr(self, 'record_frame_counter'):
self.record_frame_counter += 1
else:
self.record_frame_counter = 0
if self.record_frame_counter % 10 == 0:
log_info(f"当前能量: {energy:.2f}, 阈值: {self.voice_threshold:.2f}")
# 使用实例变量 self.voice_threshold
if energy > self.voice_threshold:
is_speaking = True
consecutive_silence_frames = 0
else:
if is_speaking:
consecutive_silence_frames += 1
silence_duration = consecutive_silence_frames * (self.frame_length / self.sample_rate)
if silence_duration >= self.max_silence_duration:
log_info(f"检测到{self.max_silence_duration}秒静音,录音结束")
break
# 检查是否超过最大录音时间
if time.time() - start_time >= self.max_recording_duration:
log_info(f"达到最大录音时间{self.max_recording_duration}秒,录音结束")
break
except Exception as e:
log_info(f"录音过程中出错: {e}")
is_speaking = False
frames = []
finally:
try:
# 先保存录音(如果有)
if is_speaking and frames:
self.save_recording(frames)
else:
log_info("未检测到有效语音,不保存录音")
# 重置录音状态
self.client_tool.is_recording = False # 直接使用 self.client_tool
# 发送结束对话消息
message = "结束对话"
self.client_tool.send_message(message)
# 等待服务器响应
time.sleep(1)
# 确保旧的音频流已完全关闭
if 'stream' in locals() and self.stream:
self.stream.stop_stream()
self.stream.close()
if 'p' in locals() and self.p:
self.p.terminate()
# 重置状态
# self.isOpen = False
except Exception as e:
log_info(f"清理录音资源时出错: {e}")
def save_recording(self, frames):
"""将录制的音频保存为WAV文件"""
# 使用固定文件名
filename = os.path.join(os.getcwd(), "output.wav")
# 保存WAV文件
try:
wf = wave.open(filename, 'wb')
wf.setnchannels(1)
wf.setsampwidth(2) # 16位音频
wf.setframerate(self.sample_rate)
wf.writeframes(b''.join(frames))
wf.close()
log_info(f"录音已保存: {filename}")
# 发送录音完成消息
self.client_tool.send_message(f"录音完成:{filename}")
except Exception as e:
log_info(f"保存录音文件时出错: {e}")
if __name__ == "__main__":
log_info("语音唤醒程序启动...")
detector = WakeWordDetector()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
log_info("正在关闭程序...")
detector.client_tool.shutdown()
detector.porcupine.delete()
log_info("程序终止")
except Exception as e:
log_info(f"主程序异常: {e}")