Smart_Talker/mainEx.py

import sys
import pvporcupine
import pyaudio
import struct
import os
import socket
import time
import threading
import configparser
import wave
import datetime
import logging

# 设置日志记录
log_file = os.path.join(os.getcwd(), "voicelog.txt")
logging.basicConfig(
    filename=log_file,
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)

# 创建一个控制台处理器，用于同时在控制台显示日志
console = logging.StreamHandler()
console.setLevel(logging.INFO)
formatter = logging.Formatter('%(message)s')
console.setFormatter(formatter)
logging.getLogger('').addHandler(console)

# 定义日志函数，替代print
def log_info(message):
    logging.info(message)

base_dir = os.path.dirname(os.path.abspath(__file__)) if getattr(sys, 'frozen', False) else os.path.dirname(
    os.path.abspath(sys.executable))
config_file_path = os.path.join(base_dir, 'config.ini')
config = configparser.ConfigParser()
try:
    config.read('config.ini', encoding='utf-8')
    host = config.get('Server', 'address')
    port = config.getint('Server', 'port')
    key = config.get('Server', 'key')
    cport = config.getint('Server', 'client-port')
    detection_threshold = config.getfloat('Audio', 'detection_threshold', fallback=1)
    voice_threshold = config.getfloat('Audio', 'voice_threshold', fallback=150)
except (configparser.NoSectionError, configparser.NoOptionError, FileNotFoundError) as e:
    log_info(f"配置错误: {e}")
    sys.exit(1)


class UDPClientTool:
    def __init__(self, server_host=host, server_port=port, client_port=cport, talker=None,waker = None):
        self.SERVER_HOST = server_host
        self.SERVER_PORT = server_port
        self.CLIENT_PORT = client_port
        self.client_socket = None
        self.talker = talker
        self.waker = waker
        self.HEARTBEAT_INTERVAL = 5
        self.running = True
        self.connected = False  # 表示当前连接状态
        self.receive_thread = None
        self.heartbeat_thread = None
        self.communication_thread = threading.Thread(target=self.start_communication, daemon=True)
        self.current_state = "等待唤醒"  # 添加状态标记
        self.communication_thread.start()
        self.first_start = True  # 添加标记，用于判断是否是第一次启动
        self.last_state = "初始化"  # 添加状态记录，用于断线重连后恢复状态
        self.last_command = None  # 添加最后处理的指令记录
        self.last_command_time = 0  # 添加最后处理指令的时间戳
        self.is_recording = False  # 添加录音状态标记
        self.last_command = None
        self.last_command_time = 0

    def connect(self):
        try:
            self.client_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
            self.client_socket.settimeout(5)
            if self.CLIENT_PORT != 0:
                self.client_socket.bind(('0.0.0.0', self.CLIENT_PORT))
            test_message = "唤醒连接"
            self.client_socket.sendto(test_message.encode('utf-8'), (self.SERVER_HOST, self.SERVER_PORT))
            data, addr = self.client_socket.recvfrom(4096)
            log_info(f"连接成功: {data.decode('utf-8')}")
            self.connected = True
            
            # 根据上次状态恢复
            if self.waker:
                if self.first_start:
                    self.waker.open_PyAudio()
                    self.first_start = False
                    self.last_state = "唤醒监听"
                elif self.last_state == "唤醒监听":
                    # 如果上次是在监听唤醒词，则继续监听
                    log_info("恢复唤醒词监听状态...")
                    self.waker.open_PyAudio()
                elif self.last_state == "录音中":
                    # 如果上次是在录音，提示用户并重新开始录音
                    log_info("录音中断，请重新说话...")
                    time.sleep(0.5)  # 延迟0.5秒
                    self.waker.record_user_speech(delay=0)  # 不再额外延迟
            
            return True
        except Exception as e:
            log_info(f"连接失败: {e}")
            return False

    def start_communication(self):
        while self.running:
            if self.connect():
                self.connected = True
                self.receive_thread = threading.Thread(target=self.receive_messages, daemon=True)
                self.receive_thread.start()
                self.heartbeat_thread = threading.Thread(target=self.send_heartbeat, daemon=True)
                self.heartbeat_thread.start()
                self.receive_thread.join()
                self.heartbeat_thread.join()
                if self.client_socket:
                    self.client_socket.close()
                    self.client_socket = None
                log_info("连接断开，重新尝试连接...")
            else:
                log_info("3秒后重连...")
                time.sleep(3)

    def receive_messages(self):
        while self.running:
            try:
                data, addr = self.client_socket.recvfrom(4096)
                decoded = data.decode('utf-8')
                current_time = time.time()
                
                if decoded != "HEARTBEAT":
                    log_info(f"收到指令: {decoded}")
                
                if (decoded == self.last_command and 
                    current_time - self.last_command_time < 1.0):
                    continue
                
                self.last_command = decoded
                self.last_command_time = current_time
                
                if self.waker:
                    if decoded == "开始唤醒":
                        self.current_state = "等待唤醒"
                        self.last_state = "唤醒监听"
                        if self.waker.isOpen:
                            self.waker.close_PyAudio()
                        time.sleep(0.1)
                        self.waker.open_PyAudio()
                    elif decoded == "开始对话":
                        if not self.is_recording:  # 立即设置录音锁
                            self.is_recording = True
                            self.current_state = "录音中"
                            self.last_state = "录音中"
                            try:
                                record_thread = threading.Thread(
                                    target=self.waker.record_user_speech,
                                    args=(1,),
                                    daemon=True
                                )
                                record_thread.start()
                                record_thread.join(timeout=0.1)
                            except Exception as e:
                                log_info(f"启动录音线程时出错: {e}")
                                self.current_state = "等待唤醒"
                                self.is_recording = False
                    elif self.is_recording and decoded != "HEARTBEAT":
                        log_info("正在录音中，忽略指令：" + decoded)
                        continue
                        
            except socket.timeout:
                continue
            except Exception as e:
                log_info(f"接收错误: {e}")
                if self.running:  # 只有在程序仍在运行时才尝试重连
                    self.connected = False
                    break

    def send_heartbeat(self):
        """发送心跳包以保持连接"""
        while self.connected and self.running:
            try:
                self.client_socket.sendto(b'HEARTBEAT', (self.SERVER_HOST, self.SERVER_PORT))
                time.sleep(self.HEARTBEAT_INTERVAL)
            except Exception as e:
                log_info(f"心跳发送失败: {e}")
                self.connected = False
                break

    def send_message(self, message):
        if self.connected and self.client_socket:
            try:
                self.client_socket.sendto(message.encode('utf-8'), (self.SERVER_HOST, self.SERVER_PORT))
                self.last_sent_message = message
                # 如果发送结束对话消息，更新状态为等待唤醒
                if message == "结束对话":
                    self.current_state = "等待唤醒"
            except Exception as e:
                log_info(f"发送失败: {e}")

    def shutdown(self):
        self.running = False
        self.connected = False
        if self.client_socket:
            self.client_socket.close()

def resource_path(relative_path):
    """获取资源文件的绝对路径"""
    if getattr(sys, 'frozen', False):
        # 如果是打包后的程序
        base_path = os.path.dirname(sys.executable)
    else:
        # 如果是开发环境
        base_path = os.path.dirname(os.path.abspath(__file__))
    return os.path.join(base_path, relative_path)

class WakeWordDetector:
    def __init__(self):
        # 配置唤醒词相关参数
        keyword_paths = [resource_path('hello.ppn')]
        model_file = resource_path('porcupine_params_zh.pv')
        wake_words = ["你好"]
        sensitivities = [1.0]  # 提高灵敏度到最大
        access_key = key

        # 创建 Porcupine 实例
        self.porcupine = pvporcupine.create(
            access_key=access_key,
            keyword_paths=keyword_paths,
            model_path=model_file,
            keywords=wake_words,
            sensitivities=sensitivities
        )

        # 创建 UDP 客户端工具实例
        self.client_tool = UDPClientTool(waker=self)

        self.scodethread=None
        # 音频流参数
        self.sample_rate = 16000
        self.frame_length = 512
        self.isOpen = False
        # 添加噪音检测相关参数
        self.detection_threshold = detection_threshold  # 环境噪音阈值
        self.voice_threshold = voice_threshold  # 语音检测阈值
        self.silence_frames = 0  # 连续静音帧计数
        self.min_silence_frames = 10  # 最小静音帧数，用于判断环境噪音
        self.background_energy = 0  # 背景噪音能量值
        self.energy_adjustment_factor = 1.5  # 能量调整因子

        
        # 修改录音文件保存路径
        
        #将录音文件保存到当前工作目录，不再使用专门的 recordings 文件夹。
        self.recording_dir = os.getcwd()
        self.max_silence_duration = 1.4  # 最大静音持续时间（秒），用于自动停止录音
        self.max_recording_duration = 30  # 最大录音时间（秒）
        self.silence_threshold_multiplier = 1.2  # 静音判断阈值倍数

    def open_PyAudio(self):
        try:
            if self.isOpen:
                self.close_PyAudio()
                
            self.p = pyaudio.PyAudio()
            self.stream = self.p.open(format=pyaudio.paInt16,
                                    channels=1,
                                    rate=self.sample_rate,
                                    input=True,
                                    frames_per_buffer=self.frame_length)
            self.isOpen = True
            log_info("开启麦克风...")
            self.scodethread = threading.Thread(target=self.start_detection, daemon=True)
            self.scodethread.start()
            # 移除 join，让线程在后台运行
            # self.scodethread.join()
        except Exception as e:
            log_info(f"打开麦克风失败: {e}")
            self.isOpen = False
            time.sleep(2)
            log_info("尝试重新打开麦克风...")
            self.open_PyAudio()
            
    def calibrate_background_noise(self):
        """校准背景噪音水平"""
        log_info("校准环境噪音水平，请保持安静...")
        energy_values = []
        # 收集2秒的环境噪音样本
        for _ in range(int(self.sample_rate / self.frame_length * 2)):
            if not self.isOpen:
                break
            try:
                pcm = self.stream.read(self.frame_length)
                pcm_data = struct.unpack_from("h" * self.frame_length, pcm)
                energy = sum(abs(x) for x in pcm_data) / len(pcm_data)
                energy_values.append(energy)
            except:
                break
        
        if energy_values:
            # 计算平均能量值并应用调整因子
            self.background_energy = sum(energy_values) / len(energy_values)
            self.detection_threshold = self.background_energy * self.energy_adjustment_factor
            log_info(f"环境噪音水平: {self.background_energy}, 检测阈值: {self.detection_threshold}")

    def close_PyAudio(self):
        try:
            if hasattr(self, 'stream') and self.stream:
                if self.stream.is_active():
                    self.stream.stop_stream()
                self.stream.close()
            if hasattr(self, 'p') and self.p:
                self.p.terminate()
            self.isOpen = False
            log_info("关闭麦克风...")
        except Exception as e:
            log_info(f"关闭麦克风时出错: {e}")
            self.isOpen = False

    def start_detection(self):
        log_info("正在等待唤醒词...")
        retry_count = 0
        max_retries = 3
        
        try:
            while True:
                if not self.isOpen:
                    log_info("\nself.isOpen=false")
                    break
                    
                try:
                    pcm = self.stream.read(self.frame_length)
                    pcm_data = struct.unpack_from("h" * self.frame_length, pcm)
                    
                    # 计算当前能量值
                    energy = sum(abs(x) for x in pcm_data) / len(pcm_data)
                    
                    # 进行唤醒词检测
                    # log_info("\n在听了别问了。")
                    result = self.porcupine.process(pcm_data)
                    if result >= 0:
                        log_info("\n检测到唤醒词!")
                        message = "打开助手"
                        self.client_tool.send_message(message)
                        return  # 只返回，不关闭音频流
                    
                    retry_count = 0
                    
                except OSError as e:
                    retry_count += 1
                    log_info(f"\n音频设备错误: {e}, 尝试重新连接 ({retry_count}/{max_retries})...")
                    
                    if retry_count >= max_retries:
                        log_info("重试次数过多，退出检测")
                        self.close_PyAudio()
                        time.sleep(2)
                        self.open_PyAudio()
                        break
                    
                    time.sleep(1)
                    continue
                    
        except KeyboardInterrupt:
            log_info("\n程序已停止。")

            
    def record_user_speech(self, delay=1):
        try:
            if delay > 0:
                log_info(f"将在 {delay} 秒后开始录音...")
                time.sleep(delay)
            log_info("开始录制，请说话...")
            
            frames = []
            start_time = time.time()
            is_speaking = False
            consecutive_silence_frames = 0
            
            while True:
                data = self.stream.read(self.frame_length)
                frames.append(data)
                
                # 计算当前帧的能量
                pcm_data = struct.unpack_from("h" * self.frame_length, data)
                energy = sum(abs(x) for x in pcm_data) / len(pcm_data)
                
                if hasattr(self, 'record_frame_counter'):
                    self.record_frame_counter += 1
                else:
                    self.record_frame_counter = 0
                
                if self.record_frame_counter % 10 == 0:
                    log_info(f"当前能量: {energy:.2f}, 阈值: {self.voice_threshold:.2f}")
                
                # 使用实例变量 self.voice_threshold
                if energy > self.voice_threshold:
                    is_speaking = True
                    consecutive_silence_frames = 0
                else:
                    if is_speaking:
                        consecutive_silence_frames += 1
                        silence_duration = consecutive_silence_frames * (self.frame_length / self.sample_rate)
                        
                        if silence_duration >= self.max_silence_duration:
                            log_info(f"检测到{self.max_silence_duration}秒静音，录音结束")
                            break
                
                # 检查是否超过最大录音时间
                if time.time() - start_time >= self.max_recording_duration:
                    log_info(f"达到最大录音时间{self.max_recording_duration}秒，录音结束")
                    break
                
        except Exception as e:
            log_info(f"录音过程中出错: {e}")
            is_speaking = False
            frames = []
        finally:
            try:
                # 先保存录音（如果有）
                if is_speaking and frames:
                    self.save_recording(frames)
                else:
                    log_info("未检测到有效语音，不保存录音")
                
                # 重置录音状态
                self.client_tool.is_recording = False  # 直接使用 self.client_tool
                
                # 发送结束对话消息
                message = "结束对话"
                self.client_tool.send_message(message)
                
                # 等待服务器响应
                time.sleep(1)
                
                # 确保旧的音频流已完全关闭
                if 'stream' in locals() and self.stream:
                    self.stream.stop_stream()
                    self.stream.close()
                if 'p' in locals() and self.p:
                    self.p.terminate()
                
                # 重置状态
                # self.isOpen = False
                
            except Exception as e:
                log_info(f"清理录音资源时出错: {e}")

    
    def save_recording(self, frames):
        """将录制的音频保存为WAV文件"""
        # 使用固定文件名
        filename = os.path.join(os.getcwd(), "output.wav")
        
        # 保存WAV文件
        try:
            wf = wave.open(filename, 'wb')
            wf.setnchannels(1)
            wf.setsampwidth(2)  # 16位音频
            wf.setframerate(self.sample_rate)
            wf.writeframes(b''.join(frames))
            wf.close()
            log_info(f"录音已保存: {filename}")
            
            # 发送录音完成消息
            self.client_tool.send_message(f"录音完成:{filename}")
        except Exception as e:
            log_info(f"保存录音文件时出错: {e}")

if __name__ == "__main__":
    log_info("语音唤醒程序启动...")
    detector = WakeWordDetector()

    try:
        while True:
            time.sleep(1)
    except KeyboardInterrupt:
        log_info("正在关闭程序...")
        detector.client_tool.shutdown()
        detector.porcupine.delete()
        log_info("程序终止")
    except Exception as e:
        log_info(f"主程序异常: {e}")