voice_chat_api.py 96 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 基于API的智能语音聊天助手
  5. - ASR: 使用本地ASR识别
  6. - LLM: 千问API (DashScope)
  7. - TTS: 阿里云语音合成API
  8. - 全部使用云端API,无需GPU,部署简单
  9. """
  10. import os
  11. import time
  12. import queue
  13. import threading
  14. import numpy as np
  15. import sounddevice as sd
  16. from pathlib import Path
  17. from http import HTTPStatus
  18. import dashscope
  19. import json
  20. # ============ 配置参数 ============
  21. # 音频参数
  22. SAMPLING_RATE = 16000 # ASR采样率
  23. CHUNK_DURATION = 0.5 # 录音块时长(秒)
  24. CHUNK_SIZE = int(SAMPLING_RATE * CHUNK_DURATION)
  25. # ALSA缓冲区设置,增加缓冲区大小减少欠载错误
  26. LATENCY = 0.1 # 音频流延迟(秒) - 增加延迟以减少ALSA欠载错误
  27. # ===== API配置说明 =====
  28. # ASR: 使用本地ASR识别
  29. #TTS: 使用阿里云智能语音交互 (NLS)
  30. # LLM: 使用千问 API (DashScope)
  31. # 阿里云 NLS 配置(用于 ASR 和 TTS)
  32. ALIYUN_APPKEY = "uE6YeyIf1q7InhxB" # NLS项目的Appkey
  33. ALIYUN_ACCESS_KEY_ID = "LTAI5tGZwbyXsaykduFMsTGE" # AccessKey ID
  34. ALIYUN_ACCESS_KEY_SECRET = "888up1AgLIKDm1rVctnG422OcCmXFt" # AccessKey Secret
  35. # 千问 API 配置(用于 LLM)
  36. QWEN_API_KEY = "sk-362d1accbdec4bacbc8d291348049ab0" # 千问API Key
  37. QWEN_MODEL = "qwen-plus" # LLM模型: qwen-turbo, qwen-plus, qwen-max
  38. # 天气查询 API 配置
  39. WEATHER_API_KEY = "738b541a5f7a" # 天气API密钥
  40. WEATHER_API_URL = "https://whyta.cn/api/tianqi" # 天气API接口地址
  41. # 唤醒词配置
  42. WAKE_WORDS = ["优宝同学","又把同学","有保同学","你好优宝"]
  43. ENABLE_WAKE_WORD = True # 是否启用唤醒词
  44. AWAKE_TIMEOUT = 60 # 唤醒后无操作自动休眠时间(秒) - 从30秒增加到60秒
  45. # 休眠词配置(说这些词会进入休眠,而不是退出程序)
  46. SLEEP_WORDS = ["拜拜", "再见", "休息吧", "睡觉吧"]
  47. SLEEP_RESPONSES = [
  48. "好的,我先休息了,有需要再叫我!",
  49. "好的,拜拜!",
  50. "那我休息了,再见!",
  51. "收到,我去休息了!"
  52. ]
  53. # 退出程序词配置(说这些词会完全退出程序)
  54. EXIT_WORDS = ["关闭程序", "退出程序", "彻底退出"]
  55. EXIT_RESPONSES = [
  56. "好的,程序即将关闭,再见!",
  57. "收到,正在退出程序!"
  58. ]
  59. # TTS音色配置
  60. # 阿里云NLS支持的音色列表(更多音色请查看文档)
  61. # 女声:zhiyuan,zhiyue,zhisha,aiqi,aijia,siqi,
  62. # 男声:aicheng,zhida,aida,
  63. # 童声:mashu,yueer
  64. TTS_VOICE = "aicheng"
  65. TTS_SPEECH_RATE = 0 # 语速: -500到500, 0为正常速度
  66. TTS_PITCH_RATE = 0 # 音调: -500到500, 0为正常音调
  67. TTS_VOLUME = 80 # 音量: 0-100
  68. # VAD参数(语音活动检测)
  69. # ⚡ 高灵敏度配置 - 支持远距离拾音
  70. SILENCE_THRESHOLD = 0.4 # 静音阈值(秒) - 优化: 从0.7降至0.4秒,更快响应
  71. VOLUME_THRESHOLD = 0.009 # 音量阈值 - 从0.008降至0.003,大幅提高灵敏度,支持远距离拾音
  72. AUDIO_GAIN = 1.3 # 音频增益倍数 - 放大远距离声音
  73. # 音频设备
  74. MIC_DEVICE = 0 # 自动选择麦克风
  75. SPEAKER_DEVICE = None # 自动选择扬声器
  76. # 缓存目录
  77. CACHE_DIR = Path.home() / ".cache" / "voice_chat_api"
  78. CACHE_DIR.mkdir(parents=True, exist_ok=True)
  79. # ============ 人脸识别配置 ============
  80. FACE_RECOGNITION_ENABLED = True # 是否启用人脸识别
  81. FACE_RECOGNITION_CONFIDENCE_THRESHOLD = 300 # 人脸检测置信度阈值
  82. FACE_RECOGNITION_DURATION_THRESHOLD = 3 # 人脸持续出现时间阈值(秒)
  83. FACE_DATABASE_PATH = "face_database.pkl" # 人脸数据库路径
  84. CAMERA_ID = 10,8 # 摄像头ID
  85. FACE_RECOGNITION_MODEL = "buffalo_l" # 人脸识别模型
  86. FORCE_CUDA = True # 是否强制使用CUDA(建议根据设备情况调整)
  87. # ============ 全局队列 ============
  88. audio_queue = queue.Queue() # 音频数据队列
  89. text_queue = queue.Queue() # 识别文本队列
  90. response_queue = queue.Queue() # AI回复队列
  91. tts_queue = queue.Queue() # TTS音频队列
  92. face_event_queue = queue.Queue() # 人脸识别事件队列
  93. # 控制标志
  94. stop_flag = threading.Event()
  95. is_speaking = threading.Event() # TTS正在播放
  96. is_listening = threading.Event() # 正在录音
  97. is_awake = threading.Event() # 唤醒状态
  98. exit_requested = threading.Event() # 请求退出
  99. sleep_requested = threading.Event() # 请求休眠
  100. asr_input_enabled = threading.Event() # ASR文本传入LLM的开关,默认开启
  101. face_recognition_running = threading.Event() # 人脸识别运行状态
  102. asr_input_enabled.set() # 默认开启
  103. # 全局人脸识别实例,用于共享资源
  104. global_face_recognition_instance = None
  105. face_recognition_camera_lock = threading.Lock() # 摄像头访问锁
  106. # ============ 人脸识别人脸识别模块 ============
  107. class FaceRecognitionModule:
  108. """人脸识别模块"""
  109. def __init__(self):
  110. """初始化人脸识别模块"""
  111. self.face_recognition = None
  112. self.camera = None
  113. self.running = False
  114. self.face_tracking = {}
  115. self.current_face = None
  116. self.current_face_start_time = None
  117. def initialize(self):
  118. """初始化人脸识别模型和摄像头"""
  119. try:
  120. # 添加项目根目录到Python路径
  121. import sys
  122. sys.path.append('/home/ubuntu')
  123. # 直接导入FaceRecognitionAPI类
  124. from face_recognition_api import FaceRecognitionAPI
  125. # 初始化人脸识别API
  126. self.face_recognition = FaceRecognitionAPI(
  127. db_path=FACE_DATABASE_PATH,
  128. model_name=FACE_RECOGNITION_MODEL,
  129. force_cuda=FORCE_CUDA
  130. )
  131. # 打开摄像头 - 尝试多个ID
  132. import cv2
  133. # 尝试的摄像头ID列表,优先级从高到低
  134. camera_ids = [10, 8]
  135. self.camera = None
  136. for cam_id in camera_ids:
  137. print(f"尝试打开摄像头 ID: {cam_id}...")
  138. self.camera = cv2.VideoCapture(cam_id)
  139. if self.camera.isOpened():
  140. print(f"✅ 成功打开摄像头 ID: {cam_id}")
  141. break
  142. else:
  143. print(f"❌ 无法打开摄像头 ID: {cam_id}")
  144. # 释放失败的摄像头资源
  145. self.camera.release()
  146. self.camera = None
  147. if not self.camera:
  148. print("❌ 所有摄像头ID都无法打开")
  149. return False
  150. # 设置摄像头分辨率
  151. self.camera.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
  152. self.camera.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
  153. # 将实例保存到全局变量,供工具调用
  154. global global_face_recognition_instance
  155. global_face_recognition_instance = self
  156. print("✅ 人脸识别模块初始化成功")
  157. return True
  158. except Exception as e:
  159. print(f"❌ 人脸识别模块初始化失败: {e}")
  160. import traceback
  161. traceback.print_exc()
  162. return False
  163. def start(self):
  164. """启动人脸识别后台线程"""
  165. if not self.face_recognition or not self.camera:
  166. if not self.initialize():
  167. return False
  168. self.running = True
  169. face_recognition_running.set()
  170. # 启动后台线程
  171. self.thread = threading.Thread(target=self._face_recognition_loop, daemon=True)
  172. self.thread.start()
  173. print("✅ 人脸识别后台线程已启动")
  174. return True
  175. def stop(self):
  176. """停止人脸识别后台线程"""
  177. self.running = False
  178. face_recognition_running.clear()
  179. if hasattr(self, 'thread') and self.thread.is_alive():
  180. self.thread.join(timeout=2)
  181. if self.camera:
  182. self.camera.release()
  183. print("✅ 人脸识别后台线程已停止")
  184. def _face_recognition_loop(self):
  185. """人脸识别后台循环"""
  186. import cv2
  187. # 创建窗口
  188. cv2.namedWindow("人脸识别", cv2.WINDOW_NORMAL)
  189. cv2.resizeWindow("人脸识别", 640, 480)
  190. while self.running and not stop_flag.is_set():
  191. try:
  192. # 获取摄像头锁
  193. with face_recognition_camera_lock:
  194. # 读取摄像头帧
  195. ret, frame = self.camera.read()
  196. if not ret:
  197. time.sleep(0.01)
  198. continue
  199. # 检测和识别人脸
  200. try:
  201. results = self.face_recognition.detect_and_recognize(frame)
  202. # 绘制人脸识别结果
  203. self._draw_recognition_results(frame, results)
  204. # 显示摄像头窗口
  205. cv2.imshow("人脸识别", frame)
  206. # 处理键盘事件,按q键退出
  207. if cv2.waitKey(1) & 0xFF == ord('q'):
  208. break
  209. # 处理识别结果
  210. self._process_recognition_results(results)
  211. except Exception as recog_e:
  212. print(f"⚠️ 人脸识别处理错误: {recog_e}")
  213. # 增加安全检查,避免vector越界
  214. import traceback
  215. traceback.print_exc()
  216. # 短暂休眠后继续
  217. time.sleep(0.5)
  218. continue
  219. # 控制检测频率,避免占用过多资源
  220. time.sleep(0.1)
  221. except Exception as e:
  222. print(f"❌ 人脸识别循环错误: {e}")
  223. import traceback
  224. traceback.print_exc()
  225. time.sleep(1)
  226. # 关闭窗口
  227. cv2.destroyWindow("人脸识别")
  228. def _process_recognition_results(self, results):
  229. """处理人脸识别结果"""
  230. current_time = time.time()
  231. detected_faces = results.get("faces", [])
  232. # 安全检查:确保detected_faces是列表
  233. if not isinstance(detected_faces, list):
  234. print("⚠️ 检测到的人脸不是列表类型,跳过处理")
  235. return
  236. # 更新人脸追踪信息
  237. updated_face_ids = set()
  238. valid_faces = []
  239. # 过滤有效人脸(置信度达标)
  240. for face_info in detected_faces:
  241. try:
  242. # 安全检查:确保face_info是字典且包含必要键
  243. if not isinstance(face_info, dict):
  244. continue
  245. # 检查置信度
  246. similarity = face_info.get("similarity", 0)
  247. if similarity < FACE_RECOGNITION_CONFIDENCE_THRESHOLD:
  248. continue
  249. # 简单的人脸ID生成(基于边界框)
  250. bbox = face_info.get("bbox", [])
  251. # 安全检查:确保边界框有效
  252. if not isinstance(bbox, (list, tuple)) or len(bbox) < 4:
  253. continue
  254. # 确保边界框坐标是整数
  255. try:
  256. bbox = [int(coord) for coord in bbox]
  257. except (ValueError, TypeError):
  258. continue
  259. face_id = f"{bbox[0]}_{bbox[1]}_{bbox[2]}_{bbox[3]}"
  260. updated_face_ids.add(face_id)
  261. # 更新人脸持续时间
  262. if face_id in self.face_tracking:
  263. # 人脸已存在,更新持续时间
  264. self.face_tracking[face_id]["end_time"] = current_time
  265. self.face_tracking[face_id]["duration"] = current_time - self.face_tracking[face_id]["start_time"]
  266. self.face_tracking[face_id]["face_info"] = face_info
  267. else:
  268. # 新人脸,初始化追踪信息
  269. self.face_tracking[face_id] = {
  270. "start_time": current_time,
  271. "end_time": current_time,
  272. "duration": 0,
  273. "face_info": face_info
  274. }
  275. # 添加到有效人脸列表
  276. valid_faces.append({
  277. "face_id": face_id,
  278. "face_info": face_info,
  279. "duration": self.face_tracking[face_id]["duration"]
  280. })
  281. except Exception as face_e:
  282. print(f"⚠️ 处理单个人脸信息错误: {face_e}")
  283. import traceback
  284. traceback.print_exc()
  285. continue
  286. # 处理有效人脸
  287. if valid_faces:
  288. # 过滤出已知人脸
  289. known_faces = []
  290. for face in valid_faces:
  291. face_info = face["face_info"]
  292. name = face_info.get("name", "Unknown")
  293. if name != "Unknown" and face["duration"] >= FACE_RECOGNITION_DURATION_THRESHOLD:
  294. known_faces.append(face)
  295. # 检查是否满足单一人脸条件
  296. if len(known_faces) == 1:
  297. # 单一人脸且停留时间达标
  298. face = known_faces[0]
  299. face_info = face["face_info"]
  300. name = face_info.get("name", "Unknown")
  301. # 已知人脸,发送个性化打招呼事件
  302. self._send_face_event({
  303. "type": "known_single_face",
  304. "name": name,
  305. "face_info": face_info
  306. })
  307. elif len(known_faces) > 1:
  308. # 多个人脸,但只处理已知的
  309. # 获取所有已知人脸的名字
  310. known_names = []
  311. for face in known_faces:
  312. name = face["face_info"].get("name", "Unknown")
  313. if name not in known_names:
  314. known_names.append(name)
  315. # 发送包含已知人脸信息的事件
  316. self._send_face_event({
  317. "type": "multiple_known_faces",
  318. "face_count": len(known_faces),
  319. "known_names": known_names
  320. })
  321. # 当只有unknown人脸时,不发送任何事件
  322. # 清理已消失的人脸
  323. try:
  324. for face_id in list(self.face_tracking.keys()):
  325. if face_id not in updated_face_ids:
  326. del self.face_tracking[face_id]
  327. except Exception as clean_e:
  328. print(f"⚠️ 清理人脸追踪信息错误: {clean_e}")
  329. # 重置追踪字典,避免持续错误
  330. self.face_tracking = {}
  331. def _draw_recognition_results(self, frame, results):
  332. """在图像上绘制人脸识别结果"""
  333. import cv2
  334. # 遍历所有检测到的人脸
  335. for face in results.get("faces", []):
  336. try:
  337. # 获取边界框
  338. bbox = face.get("bbox", [])
  339. if not isinstance(bbox, (list, tuple)) or len(bbox) < 4:
  340. continue
  341. # 确保边界框坐标是整数
  342. bbox = [int(coord) for coord in bbox]
  343. # 获取人脸信息
  344. name = face.get("name", "Unknown")
  345. confidence = face.get("confidence", 0.0)
  346. similarity = face.get("similarity", 0.0)
  347. # 绘制边界框
  348. if name != "Unknown":
  349. # 已知人脸使用绿色框
  350. box_color = (0, 255, 0)
  351. else:
  352. # 未知人脸使用蓝色框
  353. box_color = (255, 0, 0)
  354. cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), box_color, 2)
  355. # 绘制文本背景
  356. text_bg_color = (0, 0, 0) # 黑色背景
  357. text_color = (255, 255, 255) # 白色文字
  358. # 准备文本
  359. if name != "Unknown":
  360. text = f"{name} ({similarity:.1f})"
  361. else:
  362. text = f"Unknown ({confidence:.1f})"
  363. # 使用freetype库绘制文字
  364. try:
  365. # 调用face_recognition_api中的_put_text_freetype方法
  366. self.face_recognition._put_text_freetype(
  367. frame,
  368. text,
  369. (bbox[0], bbox[1] - 10),
  370. font_size=18,
  371. color=text_color
  372. )
  373. except Exception as ft_e:
  374. # 如果freetype绘制失败,使用OpenCV默认字体
  375. print(f"⚠️ freetype绘制文字失败,使用默认字体: {ft_e}")
  376. cv2.putText(
  377. frame,
  378. text,
  379. (bbox[0], bbox[1] - 10),
  380. cv2.FONT_HERSHEY_SIMPLEX,
  381. 0.6,
  382. text_color,
  383. 2
  384. )
  385. except Exception as draw_e:
  386. print(f"⚠️ 绘制人脸识别结果错误: {draw_e}")
  387. import traceback
  388. traceback.print_exc()
  389. continue
  390. def _send_face_event(self, event_data):
  391. """发送人脸事件到事件队列"""
  392. # 确保事件数据包含基本信息
  393. event = {
  394. "timestamp": time.time(),
  395. **event_data
  396. }
  397. face_event_queue.put(event)
  398. if event_data["type"] == "known_single_face":
  399. print(f"✅ 检测到已知单一人脸: {event_data['name']}")
  400. elif event_data["type"] == "unknown_single_face":
  401. print(f"✅ 检测到未知单一人脸")
  402. elif event_data["type"] == "multiple_faces":
  403. print(f"✅ 检测到多个人脸: {event_data['face_count']}人")
  404. def pause(self):
  405. """暂停人脸识别"""
  406. self.running = False
  407. face_recognition_running.clear()
  408. print("✅ 人脸识别已暂停")
  409. def resume(self):
  410. """恢复人脸识别"""
  411. self.start()
  412. # ============ 1. 音频采集模块 ============
  413. class AudioCapture:
  414. """实时音频采集"""
  415. def __init__(self):
  416. self.stream = None
  417. self.audio_buffer = []
  418. self.silence_start = None
  419. def callback(self, indata, frames, time_info, status):
  420. """音频流回调 - 流式模式"""
  421. # TTS播放时不采集音频,避免回声(完全丢弃所有数据)
  422. if is_speaking.is_set():
  423. # 清空所有状态
  424. self.audio_buffer = []
  425. self.silence_start = None
  426. is_listening.clear()
  427. return
  428. # 转换为float32格式
  429. audio_data = indata[:, 0].astype(np.float32)
  430. # 🎤 应用音频增益 - 放大远距离声音
  431. audio_data = audio_data * AUDIO_GAIN
  432. # 防止削波(限制在-1到1之间)
  433. audio_data = np.clip(audio_data, -1.0, 1.0)
  434. # 计算音量(检测是否有声音)
  435. volume = np.abs(audio_data).mean()
  436. if volume > VOLUME_THRESHOLD: # 有声音
  437. self.silence_start = None
  438. # 流式模式:立即发送音频块到队列
  439. audio_queue.put(audio_data.copy())
  440. is_listening.set()
  441. else: # 静音
  442. # 检测静音持续时间
  443. if is_listening.is_set():
  444. if self.silence_start is None:
  445. self.silence_start = time.time()
  446. elif time.time() - self.silence_start > SILENCE_THRESHOLD:
  447. # 静音超过阈值,标记说话结束
  448. self.silence_start = None
  449. is_listening.clear()
  450. def start(self):
  451. """启动音频采集"""
  452. self.stream = sd.InputStream(
  453. samplerate=SAMPLING_RATE,
  454. blocksize=CHUNK_SIZE,
  455. device=MIC_DEVICE,
  456. channels=1,
  457. dtype=np.float32,
  458. callback=self.callback,
  459. latency=LATENCY # 增加延迟以减少ALSA欠载错误
  460. )
  461. self.stream.start()
  462. def stop(self):
  463. """停止采集"""
  464. if self.stream:
  465. self.stream.stop()
  466. self.stream.close()
  467. # ============ 2. 语音唤醒检测模块 ============
  468. class WakeWordDetector:
  469. """语音唤醒检测器"""
  470. def __init__(self):
  471. self.wake_words = WAKE_WORDS
  472. self.last_active_time = None
  473. # 预计算唤醒词的拼音(用于模糊匹配)
  474. try:
  475. from pypinyin import lazy_pinyin
  476. self.wake_words_pinyin = {
  477. word: ''.join(lazy_pinyin(word))
  478. for word in self.wake_words
  479. }
  480. except ImportError:
  481. self.wake_words_pinyin = None
  482. if not ENABLE_WAKE_WORD:
  483. is_awake.set() # 如果不启用唤醒词,默认一直唤醒
  484. def check_wake_word(self, text):
  485. """检查是否包含唤醒词(支持拼音模糊匹配)"""
  486. if not text:
  487. return False
  488. # 方法1: 精确匹配(优先)
  489. for wake_word in self.wake_words:
  490. if wake_word in text:
  491. return True
  492. # 方法2: 拼音模糊匹配
  493. if self.wake_words_pinyin:
  494. try:
  495. from pypinyin import lazy_pinyin
  496. text_pinyin = ''.join(lazy_pinyin(text))
  497. for wake_word, wake_pinyin in self.wake_words_pinyin.items():
  498. # 检查拼音是否包含
  499. if wake_pinyin in text_pinyin:
  500. return True
  501. # 更模糊的匹配:计算相似度
  502. similarity = self._calculate_similarity(wake_pinyin, text_pinyin)
  503. if similarity > 0.7: # 70%相似度就认为匹配
  504. return True
  505. except Exception:
  506. pass
  507. return False
  508. def _calculate_similarity(self, s1, s2):
  509. """计算两个拼音字符串的相似度(编辑距离)"""
  510. if not s1 or not s2:
  511. return 0.0
  512. # Levenshtein距离
  513. len1, len2 = len(s1), len(s2)
  514. dp = [[0] * (len2 + 1) for _ in range(len1 + 1)]
  515. for i in range(len1 + 1):
  516. dp[i][0] = i
  517. for j in range(len2 + 1):
  518. dp[0][j] = j
  519. for i in range(1, len1 + 1):
  520. for j in range(1, len2 + 1):
  521. if s1[i-1] == s2[j-1]:
  522. dp[i][j] = dp[i-1][j-1]
  523. else:
  524. dp[i][j] = min(dp[i-1][j], dp[i][j-1], dp[i-1][j-1]) + 1
  525. distance = dp[len1][len2]
  526. max_len = max(len1, len2)
  527. similarity = 1 - (distance / max_len) if max_len > 0 else 0
  528. return similarity
  529. def check_sleep_word(self, text):
  530. """检查是否包含休眠词(支持拼音匹配)"""
  531. if not text:
  532. return False
  533. # 精确匹配
  534. for sleep_word in SLEEP_WORDS:
  535. if sleep_word in text:
  536. return True
  537. # 拼音匹配
  538. if self.wake_words_pinyin:
  539. try:
  540. from pypinyin import lazy_pinyin
  541. text_pinyin = ''.join(lazy_pinyin(text))
  542. for sleep_word in SLEEP_WORDS:
  543. sleep_pinyin = ''.join(lazy_pinyin(sleep_word))
  544. if sleep_pinyin in text_pinyin:
  545. return True
  546. except:
  547. pass
  548. return False
  549. def check_exit_word(self, text):
  550. """检查是否包含退出程序词(支持拼音匹配)"""
  551. if not text:
  552. return False
  553. # 精确匹配
  554. for exit_word in EXIT_WORDS:
  555. if exit_word in text:
  556. return True
  557. # 拼音匹配
  558. if self.wake_words_pinyin:
  559. try:
  560. from pypinyin import lazy_pinyin
  561. text_pinyin = ''.join(lazy_pinyin(text))
  562. for exit_word in EXIT_WORDS:
  563. exit_pinyin = ''.join(lazy_pinyin(exit_word))
  564. if exit_pinyin in text_pinyin:
  565. return True
  566. except:
  567. pass
  568. return False
  569. def wake_up(self):
  570. """唤醒"""
  571. is_awake.set()
  572. self.last_active_time = time.time()
  573. def sleep(self):
  574. """休眠"""
  575. is_awake.clear()
  576. self.last_active_time = None
  577. def update_activity(self):
  578. """更新活动时间"""
  579. if is_awake.is_set() and ENABLE_WAKE_WORD:
  580. self.last_active_time = time.time()
  581. def check_timeout(self):
  582. """检查是否超时需要休眠"""
  583. if ENABLE_WAKE_WORD and is_awake.is_set() and self.last_active_time:
  584. if time.time() - self.last_active_time > AWAKE_TIMEOUT:
  585. self.sleep()
  586. return True
  587. return False
  588. # ============ 3. ASR识别模块(基于外部程序)============
  589. class ExternalASR:
  590. """基于外部程序的ASR识别(使用pty方式)"""
  591. def __init__(self):
  592. try:
  593. import pty
  594. import os
  595. import re
  596. self.pty = pty
  597. self.os = os
  598. self.re = re
  599. self.pattern = self.re.compile(r"\('([^']*)',\s*'([^']*)'\)")
  600. self.demo_path = self.os.path.abspath("../audio_check_demo")
  601. if not self.os.path.exists(self.demo_path):
  602. print(f"[ASR] ✗ 外部程序不存在: {self.demo_path}")
  603. raise FileNotFoundError(f"ASR程序不存在: {self.demo_path}")
  604. self.master = None
  605. self.slave = None
  606. self.pid = None
  607. self.wake_detector = None
  608. self._start_pty_process()
  609. print("[ASR] ✓ 外部ASR程序加载成功")
  610. except Exception as e:
  611. print(f"[ASR] ✗ 初始化失败: {e}")
  612. import traceback
  613. traceback.print_exc()
  614. raise
  615. def _start_pty_process(self):
  616. """启动pty进程运行外部ASR程序"""
  617. self.master, self.slave = self.pty.openpty()
  618. self.pid = self.os.fork()
  619. if self.pid == 0:
  620. self.os.close(self.master)
  621. self.os.dup2(self.slave, 1)
  622. self.os.dup2(self.slave, 2)
  623. self.os.close(self.slave)
  624. self.os.execv(self.demo_path, [self.demo_path])
  625. else:
  626. self.os.close(self.slave)
  627. print(f"[ASR] 已启动外部ASR程序 (PID: {self.pid})")
  628. def _process_recognition_result(self, text, wake_detector):
  629. """处理识别结果"""
  630. if not text:
  631. return
  632. if wake_detector.check_exit_word(text):
  633. import random
  634. goodbye_msg = random.choice(EXIT_RESPONSES)
  635. response_queue.put(goodbye_msg)
  636. exit_requested.set()
  637. return
  638. if wake_detector.check_sleep_word(text):
  639. if is_awake.is_set():
  640. import random
  641. sleep_msg = random.choice(SLEEP_RESPONSES)
  642. response_queue.put(sleep_msg)
  643. sleep_requested.set()
  644. return
  645. if not asr_input_enabled.is_set():
  646. return
  647. if ENABLE_WAKE_WORD:
  648. if not is_awake.is_set():
  649. if wake_detector.check_wake_word(text):
  650. wake_detector.wake_up()
  651. print(f"[ASR] 识别: {text}")
  652. wake_detector.update_activity()
  653. text_queue.put(text)
  654. else:
  655. if len(text.strip()) < 2:
  656. return
  657. print(f"[ASR] 识别: {text}")
  658. wake_detector.update_activity()
  659. text_queue.put(text)
  660. else:
  661. print(f"[ASR] 识别: {text}")
  662. text_queue.put(text)
  663. def run(self, wake_detector):
  664. """ASR线程主循环"""
  665. self.wake_detector = wake_detector
  666. print("[ASR] 外部ASR识别线程已启动")
  667. try:
  668. with self.os.fdopen(self.master, 'r', encoding='utf-8', errors='ignore') as f:
  669. while not stop_flag.is_set():
  670. try:
  671. line = f.readline()
  672. if not line:
  673. break
  674. line = line.strip()
  675. if not line:
  676. continue
  677. match = self.pattern.match(line)
  678. if match:
  679. text = match.group(1)
  680. pinyin = match.group(2)
  681. print(f"[ASR] 识别文本:{text} | 拼音:{pinyin}")
  682. if text:
  683. self._process_recognition_result(text, self.wake_detector)
  684. else:
  685. print(f"【原始】:{line}")
  686. except Exception as e:
  687. print(f"[ASR] 读取错误: {e}")
  688. import traceback
  689. traceback.print_exc()
  690. break
  691. except Exception as e:
  692. print(f"[ASR] 线程错误: {e}")
  693. import traceback
  694. traceback.print_exc()
  695. finally:
  696. if self.master:
  697. try:
  698. self.os.close(self.master)
  699. except:
  700. pass
  701. if self.pid > 0:
  702. try:
  703. self.os.waitpid(self.pid, 0)
  704. except:
  705. pass
  706. # ============ 3. ASR识别模块(阿里云NLS - 已废弃)============
  707. class AliyunStreamingASR:
  708. """阿里云NLS流式语音识别(WebSocket)"""
  709. def __init__(self):
  710. print("[ASR] 初始化阿里云NLS流式识别...")
  711. try:
  712. self.appkey = ALIYUN_APPKEY
  713. self.access_key_id = ALIYUN_ACCESS_KEY_ID
  714. self.access_key_secret = ALIYUN_ACCESS_KEY_SECRET
  715. self.token = None
  716. self.token_expire_time = 0
  717. # WebSocket连接
  718. self.ws = None
  719. self.ws_connected = False
  720. self.ws_lock = threading.Lock()
  721. self.task_id = None # 当前识别会话ID
  722. self.session_started = False # 当前会话是否已发送StartTranscription
  723. # 识别结果缓存
  724. self.current_result = ""
  725. self.final_result = ""
  726. self.sentence_results = [] # 🔧 新增:存储SentenceEnd的句子结果
  727. self.wake_detector = None # 🔧 新增:保存wake_detector引用
  728. # Token管理(Token有效期180秒)
  729. self.token_create_time = 0
  730. # 获取Token
  731. self._get_token()
  732. print("[ASR] ✓ 阿里云NLS 流式ASR初始化成功")
  733. print(f"[ASR] 使用Appkey: {self.appkey}")
  734. print(f"[ASR] 模式: WebSocket 流式识别")
  735. except Exception as e:
  736. print(f"[ASR] ✗ 初始化失败: {e}")
  737. raise
  738. def _get_token(self):
  739. """获取NLS访问Token(使用阿里云OpenAPI签名)"""
  740. import requests
  741. import hmac
  742. import hashlib
  743. import base64
  744. from datetime import datetime
  745. import uuid
  746. from urllib.parse import quote
  747. try:
  748. # OpenAPI参数
  749. timestamp = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
  750. nonce = str(uuid.uuid4())
  751. # 公共请求参数
  752. params = {
  753. 'AccessKeyId': self.access_key_id,
  754. 'Action': 'CreateToken',
  755. 'Format': 'JSON',
  756. 'RegionId': 'cn-shanghai',
  757. 'SignatureMethod': 'HMAC-SHA1',
  758. 'SignatureNonce': nonce,
  759. 'SignatureVersion': '1.0',
  760. 'Timestamp': timestamp,
  761. 'Version': '2019-02-28'
  762. }
  763. # 构造签名字符串
  764. sorted_params = sorted(params.items())
  765. canonicalized_query_string = '&'.join([f"{quote(k, safe='')}={quote(str(v), safe='')}" for k, v in sorted_params])
  766. string_to_sign = f"GET&%2F&{quote(canonicalized_query_string, safe='')}"
  767. # 计算签名
  768. signature = base64.b64encode(
  769. hmac.new(
  770. (self.access_key_secret + '&').encode('utf-8'),
  771. string_to_sign.encode('utf-8'),
  772. hashlib.sha1
  773. ).digest()
  774. ).decode('utf-8')
  775. # 添加签名到参数
  776. params['Signature'] = signature
  777. # 发送请求
  778. url = "https://nls-meta.cn-shanghai.aliyuncs.com/"
  779. print("[ASR] 正在获取NLS Token...")
  780. print(f"[ASR] 使用签名认证")
  781. response = requests.get(url, params=params, timeout=10)
  782. print(f"[ASR] HTTP状态码: {response.status_code}")
  783. if response.status_code == 200:
  784. result = response.json()
  785. print(f"[ASR] CreateToken响应: {result}")
  786. # 提取Token
  787. if 'Token' in result and isinstance(result['Token'], dict):
  788. if 'Id' in result['Token']:
  789. self.token = result['Token']['Id']
  790. self.token_create_time = time.time()
  791. self.token_expire_time = result['Token'].get('ExpireTime', 0)
  792. print(f"[ASR] ✓ Token获取成功")
  793. print(f"[ASR] Token: {self.token}")
  794. print(f"[ASR] 有效期: 180秒")
  795. print(f"[ASR] Appkey: {self.appkey}")
  796. print(f"[ASR] 提示: 确保Token和Appkey来自同一账号")
  797. return
  798. print(f"[ASR] ⚠️ 无法从响应提取Token,响应: {result}")
  799. else:
  800. print(f"[ASR] Token API失败: {response.text}")
  801. except Exception as e:
  802. print(f"[ASR] ❌ Token获取异常: {e}")
  803. import traceback
  804. traceback.print_exc()
  805. # 失败提示
  806. print("\n" + "=" * 60)
  807. print("❌ Token获取失败 - 请检查:")
  808. print("=" * 60)
  809. print(f"1. AccessKey ID: {self.access_key_id}")
  810. print(f"2. AccessKey Secret: {self.access_key_secret[:5]}***")
  811. print("3. 是否已开通 NLS 服务: https://nls.console.aliyun.com/")
  812. print("4. AccessKey 是否有 NLS 权限")
  813. print("=" * 60 + "\n")
  814. self.token = None
  815. def _connect_websocket(self):
  816. """建立WebSocket连接"""
  817. import websocket
  818. import json
  819. try:
  820. # 检查Token有效期(180秒)
  821. if not self.token or (time.time() - self.token_create_time) > 170:
  822. print("[ASR] Token为空或即将过期,重新获取...")
  823. self._get_token()
  824. if not self.token:
  825. print("[ASR] Token获取失败,无法建立连接")
  826. return False
  827. # ✅ 正确方式:Token必须通过URL参数传递(不是Header!)
  828. ws_url = f"wss://nls-gateway-cn-shanghai.aliyuncs.com/ws/v1?token={self.token}"
  829. # 创建WebSocket连接(Token已在URL中,无需header)
  830. self.ws = websocket.WebSocketApp(
  831. ws_url,
  832. on_message=self._on_message,
  833. on_error=self._on_error,
  834. on_close=self._on_close,
  835. on_open=self._on_open
  836. )
  837. # 在新线程中运行WebSocket
  838. ws_thread = threading.Thread(target=self.ws.run_forever, daemon=True)
  839. ws_thread.start()
  840. # 等待连接建立
  841. timeout = 5
  842. start_time = time.time()
  843. while not self.ws_connected and time.time() - start_time < timeout:
  844. time.sleep(0.1)
  845. if self.ws_connected:
  846. print("[ASR] ✓ WebSocket连接成功")
  847. return True
  848. else:
  849. print("[ASR] ✗ WebSocket连接超时")
  850. return False
  851. except Exception as e:
  852. print(f"[ASR] WebSocket连接失败: {e}")
  853. import traceback
  854. traceback.print_exc()
  855. return False
  856. def _on_open(self, ws):
  857. """WebSocket连接打开(长连接建立成功)"""
  858. print("[ASR] ✓ WebSocket长连接已建立")
  859. self.ws_connected = True
  860. # 不在这里发送StartTranscription,等待实际识别时再发送
  861. def _send_start_transcription(self):
  862. """发送开始识别命令(每次新的识别会话调用)"""
  863. import json
  864. import uuid
  865. if not self.ws_connected or not self.ws:
  866. print("[ASR] WebSocket未连接")
  867. return False
  868. try:
  869. # 为每个新会话生成新的task_id
  870. self.task_id = uuid.uuid4().hex # 32位十六进制
  871. message_id = uuid.uuid4().hex # 32位十六进制
  872. # 清空之前的结果
  873. self.current_result = ""
  874. self.final_result = ""
  875. print(f"[ASR] 开始新识别会话 (task_id: {self.task_id})")
  876. start_msg = {
  877. "header": {
  878. "message_id": message_id,
  879. "task_id": self.task_id,
  880. "namespace": "SpeechTranscriber",
  881. "name": "StartTranscription",
  882. "appkey": self.appkey
  883. },
  884. "payload": {
  885. "format": "pcm",
  886. "sample_rate": SAMPLING_RATE,
  887. "enable_intermediate_result": True,
  888. "enable_punctuation_prediction": True,
  889. "enable_inverse_text_normalization": True,
  890. "max_sentence_silence": 400 # 🔧 从800降至400ms,更快触发识别
  891. }
  892. }
  893. with self.ws_lock:
  894. if self.ws:
  895. self.ws.send(json.dumps(start_msg))
  896. print("[ASR] ✓ StartTranscription已发送")
  897. return True
  898. except Exception as e:
  899. print(f"[ASR] 发送StartTranscription失败: {e}")
  900. return False
  901. def _on_message(self, ws, message):
  902. """接收识别结果"""
  903. import json
  904. try:
  905. result = json.loads(message)
  906. # 解析结果
  907. if 'header' in result:
  908. name = result['header'].get('name', '')
  909. status = result['header'].get('status', 0)
  910. # 🔧 添加调试: 打印所有消息类型(排除常见的)
  911. if name not in ['TranscriptionResultChanged', 'TranscriptionCompleted', 'TaskFailed', 'SentenceEnd', 'SentenceBegin']:
  912. print(f"[ASR] 收到消息: {name}, status: {status}")
  913. if name == 'TranscriptionResultChanged':
  914. # 中间结果
  915. if 'payload' in result and 'result' in result['payload']:
  916. text = result['payload']['result']
  917. self.current_result = text
  918. print(f"[ASR] 中间结果: {text}")
  919. elif name == 'SentenceEnd':
  920. # 🔧 新增:处理句子结束消息(流式识别的实时结果)
  921. if 'payload' in result and 'result' in result['payload']:
  922. text = result['payload']['result']
  923. print(f"[ASR] ✓ 句子完成: {text}")
  924. # 🔧 过滤短句和可能的回声
  925. if text and len(text.strip()) >= 2: # 至少2个字符
  926. # 检查是否在播放中(回声)
  927. if is_speaking.is_set():
  928. print(f"[ASR] ⚠️ 播放期间的识别,丢弃(可能是回声): {text}")
  929. elif self.wake_detector:
  930. self._process_recognition_result(text, self.wake_detector)
  931. else:
  932. print(f"[ASR] ⚠️ 句子太短,丢弃: {text}")
  933. elif name == 'TranscriptionCompleted':
  934. # 最终结果
  935. print(f"[ASR] 收到TranscriptionCompleted消息")
  936. print(f"[ASR] 完整消息: {result}")
  937. if 'payload' in result:
  938. payload = result['payload']
  939. print(f"[ASR] payload: {payload}")
  940. # 尝试多种可能的字段
  941. text = None
  942. if 'result' in payload:
  943. text = payload['result']
  944. elif 'text' in payload:
  945. text = payload['text']
  946. elif isinstance(payload, str):
  947. text = payload
  948. if text:
  949. self.final_result = text
  950. print(f"[ASR] ✓ 最终结果: {text}")
  951. else:
  952. print(f"[ASR] ⚠️ payload中无result字段")
  953. else:
  954. print(f"[ASR] ⚠️ 消息中无payload")
  955. elif name == 'TaskFailed':
  956. # 识别失败
  957. message = result['header'].get('message', '未知错误')
  958. print(f"[ASR] ✗ 识别失败: {message}")
  959. except Exception as e:
  960. print(f"[ASR] 消息解析错误: {e}")
  961. def _on_error(self, ws, error):
  962. """WebSocket错误"""
  963. print(f"[ASR] WebSocket错误: {error}")
  964. import traceback
  965. traceback.print_exc()
  966. self.ws_connected = False
  967. def _on_close(self, ws, close_status_code, close_msg):
  968. """WebSocket关闭"""
  969. print(f"[ASR] WebSocket已关闭")
  970. print(f"[ASR] 关闭码: {close_status_code}")
  971. print(f"[ASR] 关闭消息: {close_msg}")
  972. # 解析错误码
  973. if close_msg:
  974. try:
  975. close_msg_str = str(close_msg)
  976. if '240000002' in close_msg_str or b'240000002' in close_msg:
  977. print("[ASR] ❌ 错误: 请求参数校验失败 (240000002)")
  978. print("[ASR] 可能原因:")
  979. print(" 1. Token已过期(有效期180秒)")
  980. print(" 2. Appkey与Token不匹配")
  981. print(" 3. 参数格式错误")
  982. elif '440000004' in close_msg_str or b'440000004' in close_msg:
  983. print("[ASR] ⚠️ 错误: 连接异常 (440000004)")
  984. print("[ASR] 可能原因: 长时间连接或音频数据异常,将自动重连")
  985. except:
  986. pass
  987. self.ws_connected = False
  988. def send_audio(self, audio_data):
  989. """发送音频数据到WebSocket"""
  990. import websocket
  991. if not self.ws_connected or not self.ws:
  992. # 重新连接
  993. if not self._connect_websocket():
  994. return False
  995. try:
  996. # 将float32转为PCM (int16)
  997. audio_int16 = (audio_data * 32767).astype(np.int16)
  998. audio_bytes = audio_int16.tobytes()
  999. # 🔧 添加调试: 打印音频数据统计
  1000. duration_ms = len(audio_data) / SAMPLING_RATE * 1000
  1001. # print(f"[ASR] 发送音频: {len(audio_bytes)} bytes ({duration_ms:.0f}ms)")
  1002. # 发送二进制音频数据
  1003. with self.ws_lock:
  1004. if self.ws and self.ws_connected:
  1005. self.ws.send(audio_bytes, opcode=websocket.ABNF.OPCODE_BINARY)
  1006. return True
  1007. except Exception as e:
  1008. print(f"[ASR] 发送音频失败: {e}")
  1009. self.ws_connected = False
  1010. return False
  1011. def finish_recognition(self):
  1012. """结束当前识别会话"""
  1013. import json
  1014. import uuid
  1015. if not self.ws_connected or not self.ws:
  1016. return self.final_result
  1017. try:
  1018. # 发送结束命令(使用相同的task_id)
  1019. message_id = uuid.uuid4().hex # 32位十六进制
  1020. stop_msg = {
  1021. "header": {
  1022. "message_id": message_id, # 32位十六进制
  1023. "task_id": self.task_id, # 使用开始时的task_id(32位十六进制)
  1024. "namespace": "SpeechTranscriber",
  1025. "name": "StopTranscription",
  1026. "appkey": self.appkey
  1027. }
  1028. }
  1029. print("[ASR] 发送StopTranscription命令...")
  1030. print(f"[ASR] task_id: {self.task_id}")
  1031. print(f"[ASR] StopTranscription消息: {json.dumps(stop_msg)}")
  1032. with self.ws_lock:
  1033. if self.ws:
  1034. self.ws.send(json.dumps(stop_msg))
  1035. # 等待最终结果(WebSocket异步接收,需要足够的时间)
  1036. print("[ASR] 等待TranscriptionCompleted消息...")
  1037. print(f"[ASR] 当前final_result: '{self.final_result}'")
  1038. max_wait = 1.2 # 🔧 等待1.2秒获取识别结果
  1039. start_time = time.time()
  1040. while (time.time() - start_time) < max_wait:
  1041. if self.final_result:
  1042. print(f"[ASR] ✓ 收到最终结果!")
  1043. break
  1044. time.sleep(0.05) # 优化: 从0.1秒降至0.05秒,更快检测
  1045. result = self.final_result
  1046. # 🔧 优先使用最终结果,没有则用中间结果
  1047. if not result and self.current_result:
  1048. result = self.current_result
  1049. print(f"[ASR] 使用中间结果作为最终结果: '{result}'")
  1050. elif not result:
  1051. print(f"[ASR] ⚠️ 既无最终结果也无中间结果!")
  1052. print(f"[ASR] 返回结果: '{result}' (等待时间: {time.time()-start_time:.2f}秒)")
  1053. # 清空结果缓存
  1054. self.current_result = ""
  1055. self.final_result = ""
  1056. return result
  1057. except Exception as e:
  1058. print(f"[ASR] 结束识别失败: {e}")
  1059. return self.final_result
  1060. def run(self, wake_detector):
  1061. """ASR线程主循环 - 流式识别版本(保持WebSocket长连接)"""
  1062. print("[ASR] 流式识别线程已启动")
  1063. # 🔧 保存wake_detector引用,供_on_message使用
  1064. self.wake_detector = wake_detector
  1065. # 建立初始WebSocket连接(保持长连接)
  1066. if not self._connect_websocket():
  1067. print("[ASR] ✗ 无法建立WebSocket连接,退出")
  1068. return
  1069. recognition_active = False
  1070. empty_count = 0 # 连续空队列计数,避免过早结束识别
  1071. audio_chunk_count = 0 # 已发送音频块计数
  1072. while not stop_flag.is_set():
  1073. try:
  1074. # 检查WebSocket连接状态,如果断开则重连
  1075. if not self.ws_connected:
  1076. print("[ASR] WebSocket已断开,尝试重新连接...")
  1077. if not self._connect_websocket():
  1078. print("[ASR] 重连失败,等待下次...")
  1079. time.sleep(1)
  1080. continue
  1081. # 获取音频数据(非阻塞,使用较短超时)
  1082. try:
  1083. audio_data = audio_queue.get(timeout=0.1)
  1084. empty_count = 0 # 重置空队列计数
  1085. except queue.Empty:
  1086. # 检查是否有正在进行的识别需要结束
  1087. # 需要连续多次Empty且is_listening为False才真正结束
  1088. if recognition_active and not is_listening.is_set():
  1089. empty_count += 1
  1090. # 🔧 修复: 等待3次Empty(约0.3秒)
  1091. # 且至少发送了1个完整音频块(约0.5秒)
  1092. if empty_count >= 3 and audio_chunk_count >= 1:
  1093. print(f"[ASR] 用户停止说话,结束识别会话... (已发送{audio_chunk_count}个音频块)")
  1094. text = self.finish_recognition()
  1095. recognition_active = False
  1096. self.session_started = False # 重置会话状态
  1097. empty_count = 0
  1098. audio_chunk_count = 0 # 重置计数
  1099. # ✅ 不关闭WebSocket,保持长连接
  1100. print("[ASR] 识别会话结束,WebSocket保持连接")
  1101. if text:
  1102. self._process_recognition_result(text, wake_detector)
  1103. else:
  1104. print("[ASR] ⚠️ 未获取到最终结果")
  1105. elif empty_count >= 5 and audio_chunk_count < 1:
  1106. # 音频太短(没有完整的音频块),直接丢弃
  1107. print(f"[ASR] ⚠️ 音频过短({audio_chunk_count}块),丢弃")
  1108. recognition_active = False
  1109. self.session_started = False
  1110. empty_count = 0
  1111. audio_chunk_count = 0
  1112. continue
  1113. # 开始新的识别会话(如果还没开始)
  1114. if not recognition_active:
  1115. print("[ASR] 准备开始新识别...")
  1116. recognition_active = True
  1117. self.session_started = False # 重置会话状态
  1118. audio_chunk_count = 0 # 重置计数
  1119. # 发送音频数据
  1120. if recognition_active:
  1121. # ✅ 关键优化:只在第一次发送音频时才发送StartTranscription
  1122. if not self.session_started:
  1123. print("[ASR] 🎤 收到音频数据,发送StartTranscription...")
  1124. if self._send_start_transcription():
  1125. self.session_started = True
  1126. else:
  1127. print("[ASR] StartTranscription发送失败,跳过本次音频")
  1128. continue
  1129. # 发送音频数据
  1130. self.send_audio(audio_data)
  1131. audio_chunk_count += 1 # 增加计数
  1132. except Exception as e:
  1133. print(f"[ASR] 线程错误: {e}")
  1134. import traceback
  1135. traceback.print_exc()
  1136. recognition_active = False
  1137. self.ws_connected = False # 标记需要重连
  1138. audio_chunk_count = 0 # 重置计数
  1139. # 清理:程序退出时才关闭WebSocket
  1140. print("[ASR] 程序退出,关闭WebSocket连接...")
  1141. if self.ws:
  1142. try:
  1143. self.ws.close()
  1144. except:
  1145. pass
  1146. def _process_recognition_result(self, text, wake_detector):
  1147. """处理识别结果"""
  1148. # 在方法开头声明全局变量,避免"used prior to global declaration"错误
  1149. global latest_face_info
  1150. print(f"\n[ASR] ========== 处理识别结果 ==========")
  1151. print(f"[ASR] 原始文本: '{text}'")
  1152. print(f"[ASR] 文本长度: {len(text) if text else 0}")
  1153. print(f"[ASR] 唤醒状态: {is_awake.is_set()}")
  1154. if not text:
  1155. print("[ASR] 未识别到有效内容")
  1156. return
  1157. # 优先级1: 检查退出程序词(完全退出)
  1158. if wake_detector.check_exit_word(text):
  1159. print("[ASR] ✓ 检测到退出程序指令!")
  1160. import random
  1161. goodbye_msg = random.choice(EXIT_RESPONSES)
  1162. response_queue.put(goodbye_msg)
  1163. exit_requested.set() # 标记退出请求
  1164. return
  1165. # 优先级2: 检查休眠词(进入休眠,可再次唤醒)
  1166. if wake_detector.check_sleep_word(text):
  1167. print("[ASR] ✓ 检测到休眠指令!")
  1168. if is_awake.is_set(): # 只有唤醒状态才能休眠
  1169. import random
  1170. sleep_msg = random.choice(SLEEP_RESPONSES)
  1171. response_queue.put(sleep_msg)
  1172. sleep_requested.set() # 标记休眠请求,等待TTS播放完成
  1173. print("[ASR] 已请求休眠,将在TTS播放完成后执行")
  1174. else:
  1175. print("[ASR] 当前已是休眠状态,忽略")
  1176. return
  1177. # 检查唤醒词
  1178. if ENABLE_WAKE_WORD:
  1179. if not is_awake.is_set():
  1180. # 未唤醒,检查是否包含唤醒词
  1181. print(f"[ASR] 未唤醒状态,检查唤醒词...")
  1182. print(f"[ASR] 唤醒词列表: {WAKE_WORDS}")
  1183. print(f"[ASR] 识别文本: '{text}'")
  1184. if wake_detector.check_wake_word(text):
  1185. print(f"[ASR] ✓ 检测到唤醒词!")
  1186. wake_detector.wake_up() # 自动播放唤醒确认
  1187. # 去除唤醒词后的内容
  1188. original_text = text
  1189. for wake_word in wake_detector.wake_words:
  1190. text = text.replace(wake_word, "").strip()
  1191. # 🔧 去除前后的标点符号
  1192. text = text.strip(',。!?、;:,.!?;: ')
  1193. print(f"[ASR] 去除唤醒词后: '{text}'")
  1194. # 🔧 先停止ASR,再把文本传给大模型,附加视觉信息
  1195. print("[ASR] 停止ASR程序(准备将文本传给大模型)")
  1196. self._stop_asr_process()
  1197. text_with_visual = f"{text}{visual_context}"
  1198. print(f"[ASR] → 放入text_queue: '{text_with_visual}'")
  1199. wake_detector.update_activity()
  1200. text_queue.put(text_with_visual)
  1201. else:
  1202. # 不启用唤醒词,直接处理
  1203. print(f"[ASR] 唤醒词未启用,直接处理")
  1204. # 🔧 先停止ASR,再把文本传给大模型
  1205. print("[ASR] 停止ASR程序(准备将文本传给大模型)")
  1206. self._stop_asr_process()
  1207. text_with_visual = f"{text}{visual_context}"
  1208. print(f"[ASR] → 放入text_queue: '{text_with_visual}'")
  1209. text_queue.put(text_with_visual)
  1210. print(f"[ASR] ========== 处理完成 ==========\n")
  1211. # ============ 4. 大模型对话模块(千问API)============
  1212. # ============ 天气查询工具 ============
  1213. def query_weather(city):
  1214. """查询指定城市的天气信息"""
  1215. try:
  1216. import requests
  1217. # 构建请求URL
  1218. url = f"{WEATHER_API_URL}?key={WEATHER_API_KEY}&city={city}"
  1219. # 发送GET请求
  1220. response = requests.get(url, timeout=10)
  1221. # 解析响应
  1222. result = response.json()
  1223. # 检查响应状态
  1224. if result.get("status") == 1 and result.get("message") == "success":
  1225. data = result.get("data", {})
  1226. # 提取关键天气信息
  1227. temp_c = data.get("temp_C", "")
  1228. feels_like = data.get("FeelsLikeC", "")
  1229. humidity = data.get("humidity", "")
  1230. weather_desc = data["weatherDesc"][0]["value"] if "weatherDesc" in data and data["weatherDesc"] else ""
  1231. wind_speed = data.get("windspeedKmph", "0")
  1232. wind_dir = data.get("winddir16Point", "")
  1233. city_name = data.get("city", city)
  1234. # 转换为整数进行比较
  1235. try:
  1236. wind_speed_int = int(wind_speed)
  1237. except ValueError:
  1238. wind_speed_int = 0
  1239. if wind_speed_int < 1:
  1240. windlv="无风"
  1241. elif wind_speed_int < 5:
  1242. windlv="1"
  1243. elif wind_speed_int < 10:
  1244. windlv="2"
  1245. elif wind_speed_int < 20:
  1246. windlv="3"
  1247. elif wind_speed_int < 30:
  1248. windlv="4"
  1249. else:
  1250. windlv = "5"
  1251. # 处理风向
  1252. wind = ""
  1253. if wind_dir == "E":
  1254. wind = "东"
  1255. elif wind_dir == "NE":
  1256. wind = "东北"
  1257. elif wind_dir == "NW":
  1258. wind = "西北"
  1259. elif wind_dir == "N":
  1260. wind = "北"
  1261. elif wind_dir == "W":
  1262. wind = "西"
  1263. elif wind_dir == "SW":
  1264. wind = "西南"
  1265. elif wind_dir == "S":
  1266. wind = "南"
  1267. elif wind_dir == "SE":
  1268. wind = "东南"
  1269. # 格式化天气信息
  1270. weather_info = f"{city_name}当前天气:{weather_desc},温度{temp_c}摄氏度,体感温度{feels_like}摄氏度,湿度百分之{humidity},{wind}风{windlv}级"
  1271. return weather_info
  1272. else:
  1273. # API返回错误信息
  1274. error_msg = result.get("message", "查询失败")
  1275. return f"天气查询失败:{error_msg}"
  1276. except Exception as e:
  1277. # 捕获网络或解析错误
  1278. return f"天气查询失败:{str(e)}"
  1279. # ============ 人脸识别工具 ============
  1280. def recognize_face():
  1281. """识别人脸信息,返回识别结果"""
  1282. try:
  1283. print("[人脸识别工具] 开始识别人脸...")
  1284. # 使用全局人脸识别实例
  1285. global global_face_recognition_instance
  1286. if global_face_recognition_instance is None:
  1287. return "人脸识别失败:人脸识别模块未初始化"
  1288. # 获取全局实例
  1289. face_recognition_module = global_face_recognition_instance
  1290. # 使用已打开的摄像头进行识别
  1291. import cv2
  1292. import time
  1293. # 尝试多次识别,提高成功率
  1294. recognition_results = []
  1295. max_attempts = 3
  1296. attempts = 0
  1297. # 获取摄像头锁,避免冲突
  1298. with face_recognition_camera_lock:
  1299. while attempts < max_attempts:
  1300. # 读取摄像头帧
  1301. ret, frame = face_recognition_module.camera.read()
  1302. if not ret:
  1303. attempts += 1
  1304. time.sleep(0.1)
  1305. continue
  1306. # 检测和识别人脸
  1307. results = face_recognition_module.face_recognition.detect_and_recognize(frame)
  1308. if results["detected"] and len(results["faces"]) > 0:
  1309. recognition_results.append(results)
  1310. # 短暂休眠,避免过度占用资源
  1311. time.sleep(0.1)
  1312. attempts += 1
  1313. # 处理识别结果
  1314. if recognition_results:
  1315. # 选择检测到人脸最多的结果
  1316. best_result = max(recognition_results, key=lambda x: x["count"])
  1317. # 过滤出已知人脸
  1318. known_faces = []
  1319. for face in best_result["faces"]:
  1320. if face["name"] != "Unknown":
  1321. known_faces.append(face)
  1322. if len(known_faces) == 1:
  1323. # 单一人脸
  1324. face = known_faces[0]
  1325. name = face["name"]
  1326. confidence = face["confidence"]
  1327. similarity = face["similarity"]
  1328. return f"识别成功!这是{name},相似度{similarity:.2f},置信度{confidence:.2f}"
  1329. elif len(known_faces) > 1:
  1330. # 多个人脸,只返回已知人脸信息
  1331. known_names = [face["name"] for face in known_faces]
  1332. # 去重
  1333. known_names = list(set(known_names))
  1334. if len(known_names) == 1:
  1335. # 多个人脸但都是同一个人
  1336. return f"识别成功!检测到{len(known_faces)}张人脸,都是{known_names[0]}"
  1337. else:
  1338. # 多个人脸且是不同的人
  1339. names_str = "、".join(known_names)
  1340. return f"识别成功!检测到{len(known_faces)}张人脸,其中已知的有:{names_str}"
  1341. elif best_result["count"] == 1:
  1342. # 只有一张未知人脸
  1343. return f"识别成功!检测到人脸,但系统中没有该人脸信息"
  1344. else:
  1345. # 多张人脸但都是未知的
  1346. return "识别成功!检测到多张人脸,但系统中没有这些人脸的信息"
  1347. else:
  1348. return "识别失败:未检测到人脸"
  1349. except Exception as e:
  1350. import traceback
  1351. traceback.print_exc()
  1352. return f"人脸识别失败:{str(e)}"
  1353. class QwenChat:
  1354. """千问API对话"""
  1355. def __init__(self):
  1356. self.api_key = QWEN_API_KEY
  1357. self.model = QWEN_MODEL
  1358. # 初始化对话历史
  1359. self.conversation_history = [{
  1360. "role": "system",
  1361. "content": """你是一个智能语音聊天助手,名叫'优宝'。请用简洁、友好、口语化的语言回答用户问题。
  1362. 回答要通俗易懂,避免使用过于专业的术语。每次回答控制在80字以内,适合语音朗读。
  1363. 不要输出颜文字表情,也不要使用emoji表情。
  1364. 当用户询问天气相关问题时,你可以调用query_weather工具来获取实时天气信息。
  1365. 例如:
  1366. - 用户问:'北京今天天气怎么样?',你应该调用query_weather工具,参数city为'北京'
  1367. - 用户问:'上海的天气如何?',你应该调用query_weather工具,参数city为'上海'
  1368. - 用户问:'武汉今天热吗?',你应该调用query_weather工具,参数city为'武汉'
  1369. 当用户提出身份确认类问题时,你可以调用recognize_face工具来获取人脸信息。
  1370. 例如:
  1371. - 用户问:'我是谁?',你应该调用recognize_face工具
  1372. - 用户问:'你认识我吗?',你应该调用recognize_face工具
  1373. - 用户问:'你知道我是谁吗?',你应该调用recognize_face工具"""
  1374. }]
  1375. def chat(self, user_message):
  1376. """发送消息并获取回复"""
  1377. try:
  1378. dashscope.api_key = self.api_key
  1379. # 添加到历史
  1380. self.conversation_history.append({
  1381. "role": "user",
  1382. "content": user_message
  1383. })
  1384. # 定义工具列表
  1385. tools = [
  1386. {
  1387. "type": "function",
  1388. "function": {
  1389. "name": "query_weather",
  1390. "description": "查询指定城市的天气信息",
  1391. "parameters": {
  1392. "type": "object",
  1393. "properties": {
  1394. "city": {
  1395. "type": "string",
  1396. "description": "城市名称,例如:北京、上海、武汉"
  1397. }
  1398. },
  1399. "required": ["city"]
  1400. }
  1401. }
  1402. },
  1403. {
  1404. "type": "function",
  1405. "function": {
  1406. "name": "recognize_face",
  1407. "description": "识别人脸信息,用于身份确认",
  1408. "parameters": {
  1409. "type": "object",
  1410. "properties": {},
  1411. "required": []
  1412. }
  1413. }
  1414. }
  1415. ]
  1416. # 调用千问API
  1417. response = dashscope.Generation.call(
  1418. model=self.model,
  1419. messages=self.conversation_history,
  1420. tools=tools,
  1421. result_format='message',
  1422. stream=False,
  1423. top_p=0.8,
  1424. temperature=0.7,
  1425. )
  1426. if response.status_code == HTTPStatus.OK:
  1427. # 获取模型回复
  1428. message = response.output.choices[0].message
  1429. # 初始化assistant_message
  1430. assistant_message = ""
  1431. # 检查是否需要工具调用 - 更安全的方式
  1432. # 先检查message的类型
  1433. if isinstance(message, dict):
  1434. # 字典类型直接使用get方法
  1435. tool_calls = message.get('tool_calls', None)
  1436. else:
  1437. # 对象类型尝试获取属性
  1438. try:
  1439. tool_calls = message.tool_calls
  1440. except (AttributeError, KeyError):
  1441. tool_calls = None
  1442. if tool_calls:
  1443. # 处理工具调用
  1444. for tool_call in tool_calls:
  1445. try:
  1446. # 获取工具调用的基本信息
  1447. if isinstance(tool_call, dict):
  1448. tool_call_id = tool_call.get('id', '')
  1449. function_info = tool_call.get('function', {})
  1450. func_name = function_info.get('name', '')
  1451. arguments = function_info.get('arguments', '{}')
  1452. else:
  1453. # 对象类型
  1454. tool_call_id = getattr(tool_call, 'id', '')
  1455. function_obj = getattr(tool_call, 'function', None)
  1456. func_name = getattr(function_obj, 'name', '') if function_obj else ''
  1457. arguments = getattr(function_obj, 'arguments', '{}') if function_obj else '{}'
  1458. if func_name == "query_weather":
  1459. # 解析参数
  1460. params = json.loads(arguments)
  1461. city = params.get("city")
  1462. # 调用天气查询函数
  1463. tool_result = query_weather(city)
  1464. # 添加工具调用记录到历史
  1465. self.conversation_history.append({
  1466. "role": "assistant",
  1467. "tool_calls": [{
  1468. "id": tool_call_id,
  1469. "type": "function",
  1470. "function": {
  1471. "name": func_name,
  1472. "arguments": arguments
  1473. }
  1474. }]
  1475. })
  1476. # 添加工具执行结果到历史
  1477. self.conversation_history.append({
  1478. "role": "tool",
  1479. "name": "query_weather",
  1480. "content": tool_result,
  1481. "tool_call_id": tool_call_id
  1482. })
  1483. # 再次调用API获取最终回复
  1484. final_response = dashscope.Generation.call(
  1485. model=self.model,
  1486. messages=self.conversation_history,
  1487. result_format='message',
  1488. stream=False,
  1489. top_p=0.8,
  1490. temperature=0.7,
  1491. )
  1492. if final_response.status_code == HTTPStatus.OK:
  1493. final_message = final_response.output.choices[0].message
  1494. # 获取最终回复
  1495. if isinstance(final_message, dict):
  1496. assistant_message = final_message.get('content', '')
  1497. else:
  1498. assistant_message = getattr(final_message, 'content', '')
  1499. # 添加最终回复到历史
  1500. self.conversation_history.append({
  1501. "role": "assistant",
  1502. "content": assistant_message
  1503. })
  1504. else:
  1505. assistant_message = "抱歉,处理天气信息时出错了。"
  1506. elif func_name == "recognize_face":
  1507. # 调用人脸识别函数
  1508. tool_result = recognize_face()
  1509. # 添加工具调用记录到历史
  1510. self.conversation_history.append({
  1511. "role": "assistant",
  1512. "tool_calls": [{
  1513. "id": tool_call_id,
  1514. "type": "function",
  1515. "function": {
  1516. "name": func_name,
  1517. "arguments": arguments
  1518. }
  1519. }]
  1520. })
  1521. # 添加工具执行结果到历史
  1522. self.conversation_history.append({
  1523. "role": "tool",
  1524. "name": "recognize_face",
  1525. "content": tool_result,
  1526. "tool_call_id": tool_call_id
  1527. })
  1528. # 再次调用API获取最终回复
  1529. final_response = dashscope.Generation.call(
  1530. model=self.model,
  1531. messages=self.conversation_history,
  1532. result_format='message',
  1533. stream=False,
  1534. top_p=0.8,
  1535. temperature=0.7,
  1536. )
  1537. if final_response.status_code == HTTPStatus.OK:
  1538. final_message = final_response.output.choices[0].message
  1539. # 获取最终回复
  1540. if isinstance(final_message, dict):
  1541. assistant_message = final_message.get('content', '')
  1542. else:
  1543. assistant_message = getattr(final_message, 'content', '')
  1544. # 添加最终回复到历史
  1545. self.conversation_history.append({
  1546. "role": "assistant",
  1547. "content": assistant_message
  1548. })
  1549. else:
  1550. assistant_message = "抱歉,处理人脸识别信息时出错了。"
  1551. else:
  1552. assistant_message = "抱歉,我不支持该工具。"
  1553. except Exception as e:
  1554. print(f"[千问] 处理工具调用时出错: {e}")
  1555. assistant_message = "抱歉,处理工具调用时出错了。"
  1556. else:
  1557. # 直接回复
  1558. if isinstance(message, dict):
  1559. assistant_message = message.get('content', '')
  1560. else:
  1561. assistant_message = getattr(message, 'content', '')
  1562. # 添加到历史
  1563. self.conversation_history.append({
  1564. "role": "assistant",
  1565. "content": assistant_message
  1566. })
  1567. # 限制历史长度(保留系统提示词+最近8轮对话)
  1568. if len(self.conversation_history) > 17:
  1569. self.conversation_history = [self.conversation_history[0]] + self.conversation_history[-16:]
  1570. return assistant_message
  1571. else:
  1572. print(f"[千问] API错误: {response.code} - {response.message}")
  1573. return "抱歉,我现在无法回答。"
  1574. except Exception as e:
  1575. print(f"[千问] 错误: {e}")
  1576. import traceback
  1577. traceback.print_exc()
  1578. return "抱歉,出现了错误。"
  1579. def run(self):
  1580. """对话线程主循环"""
  1581. while not stop_flag.is_set():
  1582. try:
  1583. user_text = text_queue.get(timeout=1)
  1584. # 🔧 LLM处理时禁用ASR文本传入LLM
  1585. asr_input_enabled.clear()
  1586. print(f"[LLM] 处理: {user_text}")
  1587. response = self.chat(user_text)
  1588. print(f"[LLM] 回复: {response}")
  1589. response_queue.put(response)
  1590. # 🔧 LLM处理完成后,ASR文本传入会在TTS播放结束后0.2秒才启用
  1591. except queue.Empty:
  1592. continue
  1593. except Exception as e:
  1594. print(f"[千问] 线程错误: {e}")
  1595. import traceback
  1596. traceback.print_exc()
  1597. # ============ 5. TTS语音合成模块(阿里云NLS)============
  1598. class AliyunTTS:
  1599. """阿里云NLS语音合成"""
  1600. def __init__(self):
  1601. try:
  1602. self.appkey = ALIYUN_APPKEY
  1603. self.access_key_id = ALIYUN_ACCESS_KEY_ID
  1604. self.access_key_secret = ALIYUN_ACCESS_KEY_SECRET
  1605. self.token = None
  1606. # 获取Token
  1607. self._get_token()
  1608. except Exception as e:
  1609. print(f"[TTS] ✗ 初始化失败: {e}")
  1610. raise
  1611. def _get_token(self):
  1612. """获取NLS访问Token(使用签名认证)"""
  1613. import requests
  1614. import hmac
  1615. import hashlib
  1616. import base64
  1617. from datetime import datetime
  1618. import uuid
  1619. from urllib.parse import quote
  1620. try:
  1621. # OpenAPI参数
  1622. timestamp = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
  1623. nonce = str(uuid.uuid4())
  1624. # 公共请求参数
  1625. params = {
  1626. 'AccessKeyId': self.access_key_id,
  1627. 'Action': 'CreateToken',
  1628. 'Format': 'JSON',
  1629. 'RegionId': 'cn-shanghai',
  1630. 'SignatureMethod': 'HMAC-SHA1',
  1631. 'SignatureNonce': nonce,
  1632. 'SignatureVersion': '1.0',
  1633. 'Timestamp': timestamp,
  1634. 'Version': '2019-02-28',
  1635. }
  1636. # 构造签名字符串
  1637. sorted_params = sorted(params.items())
  1638. canonicalized_query_string = '&'.join([f"{quote(k, safe='')}={quote(str(v), safe='')}" for k, v in sorted_params])
  1639. string_to_sign = f"GET&%2F&{quote(canonicalized_query_string, safe='')}"
  1640. # 计算签名
  1641. signature = base64.b64encode(
  1642. hmac.new(
  1643. (self.access_key_secret + '&').encode('utf-8'),
  1644. string_to_sign.encode('utf-8'),
  1645. hashlib.sha1
  1646. ).digest()
  1647. ).decode('utf-8')
  1648. # 添加签名
  1649. params['Signature'] = signature
  1650. # 发送请求
  1651. url = "https://nls-meta.cn-shanghai.aliyuncs.com/"
  1652. print("[TTS] 正在获取NLS Token...")
  1653. response = requests.get(url, params=params, timeout=10)
  1654. print(f"[TTS] HTTP状态码: {response.status_code}")
  1655. if response.status_code == 200:
  1656. result = response.json()
  1657. if 'Token' in result and isinstance(result['Token'], dict):
  1658. if 'Id' in result['Token']:
  1659. self.token = result['Token']['Id']
  1660. return
  1661. except Exception as e:
  1662. print(f"[TTS] Token获取异常: {e}")
  1663. self.token = None
  1664. def synthesize(self, text):
  1665. """合成语音(使用NLS RESTful API)"""
  1666. import requests
  1667. import json
  1668. try:
  1669. # NLS语音合成API地址
  1670. url = "https://nls-gateway.cn-shanghai.aliyuncs.com/stream/v1/tts"
  1671. # 请求参数
  1672. params = {
  1673. "appkey": self.appkey,
  1674. "text": text,
  1675. "format": "pcm",
  1676. "sample_rate": 16000,
  1677. "voice": TTS_VOICE, # 使用配置的音色
  1678. "volume": TTS_VOLUME, # 使用配置的音量
  1679. "speech_rate": TTS_SPEECH_RATE, # 使用配置的语速
  1680. "pitch_rate": TTS_PITCH_RATE # 使用配置的音调
  1681. }
  1682. # 检查Token
  1683. if not self.token:
  1684. self._get_token()
  1685. # 请求头 - 使用 Bearer Token 格式
  1686. headers = {
  1687. "Content-Type": "application/json",
  1688. }
  1689. # 添加Token
  1690. if self.token:
  1691. headers["X-NLS-Token"] = self.token
  1692. # 发送请求
  1693. response = requests.post(
  1694. url,
  1695. params=params,
  1696. headers=headers,
  1697. timeout=10
  1698. )
  1699. if response.status_code == 200:
  1700. # 返回的是PCM音频数据
  1701. pcm_data = response.content
  1702. # 将PCM bytes转换为numpy数组
  1703. audio_int16 = np.frombuffer(pcm_data, dtype=np.int16)
  1704. audio_float32 = audio_int16.astype(np.float32) / 32767.0
  1705. return audio_float32
  1706. else:
  1707. print(f"[TTS] API调用失败: {response.status_code}")
  1708. print(f"[TTS] 错误内容: {response.text}")
  1709. # 如果Token过期,重新获取
  1710. if response.status_code == 401:
  1711. self._get_token()
  1712. return None
  1713. except Exception as e:
  1714. print(f"[TTS] 合成错误: {e}")
  1715. import traceback
  1716. traceback.print_exc()
  1717. return None
  1718. def run(self):
  1719. """TTS线程主循环"""
  1720. while not stop_flag.is_set():
  1721. try:
  1722. text = response_queue.get(timeout=1)
  1723. print(f"[TTS] 合成: {text[:30]}...")
  1724. audio = self.synthesize(text)
  1725. if audio is not None:
  1726. tts_queue.put(audio)
  1727. print(f"[TTS] 完成")
  1728. except queue.Empty:
  1729. continue
  1730. except Exception as e:
  1731. print(f"[TTS] 线程错误: {e}")
  1732. # ============ 6. 音频播放模块 ============
  1733. class AudioPlayer:
  1734. """音频播放器"""
  1735. def __init__(self, wake_detector=None, asr=None):
  1736. self.wake_detector = wake_detector
  1737. self.asr = asr # ASR实例,用于控制ASR的启动和停止
  1738. def play(self, audio_data, sample_rate=16000):
  1739. """播放音频"""
  1740. try:
  1741. is_speaking.set() # 标记为播放中,此时会暂停音频采集
  1742. # 🔧 TTS播放时禁用ASR文本传入LLM
  1743. asr_input_enabled.clear()
  1744. # 确保是numpy数组
  1745. if not isinstance(audio_data, np.ndarray):
  1746. audio_data = np.array(audio_data)
  1747. # 如果是双声道,转为单声道
  1748. if len(audio_data.shape) > 1:
  1749. audio_data = audio_data.mean(axis=1)
  1750. # 使用OutputStream控制缓冲区大小和延迟,减少ALSA欠载错误
  1751. with sd.OutputStream(
  1752. samplerate=sample_rate,
  1753. device=SPEAKER_DEVICE,
  1754. channels=1,
  1755. dtype=np.float32,
  1756. latency=LATENCY, # 增加延迟以减少ALSA欠载错误
  1757. blocksize=int(sample_rate * 0.1) # 增加缓冲区大小
  1758. ) as stream:
  1759. # 写入音频数据
  1760. stream.write(audio_data)
  1761. # 🔧 等待缓冲区中的数据完全播放完成
  1762. stream.stop() # 确保所有数据都已播放
  1763. time.sleep(0.1) # 额外等待0.1秒,确保最后一个字播放完成
  1764. # 🔧 播放完成后延迟足够时间,避免捕获回声和残留声音
  1765. time.sleep(0.4) # 增加到0.5秒,确保回声完全消失
  1766. is_speaking.clear() # 恢复音频采集
  1767. # 🔧 TTS播放完成后等待0.2秒再启用ASR文本传入LLM
  1768. time.sleep(0.5)
  1769. asr_input_enabled.set()
  1770. # 🔧 清空播放期间累积的音频队列
  1771. while not audio_queue.empty():
  1772. try:
  1773. audio_queue.get_nowait()
  1774. except:
  1775. break
  1776. except Exception as e:
  1777. print(f"[播放] 错误: {e}")
  1778. is_speaking.clear()
  1779. asr_input_enabled.set() # 出错时也要恢复
  1780. def run(self):
  1781. """播放线程主循环"""
  1782. while not stop_flag.is_set():
  1783. try:
  1784. audio_data = tts_queue.get(timeout=1)
  1785. # 🔧 播放前清空音频队列和停止当前识别
  1786. while not audio_queue.empty():
  1787. try:
  1788. audio_queue.get_nowait()
  1789. except:
  1790. break
  1791. # 停止当前监听状态
  1792. is_listening.clear()
  1793. self.play(audio_data)
  1794. # 检查是否需要休眠(播放完告别语后)
  1795. if sleep_requested.is_set():
  1796. time.sleep(0.3) # 短暂延迟
  1797. if self.wake_detector:
  1798. self.wake_detector.sleep() # 执行休眠
  1799. sleep_requested.clear() # 清除标志
  1800. # 检查是否是告别语播放完成后需要退出程序
  1801. if exit_requested.is_set():
  1802. time.sleep(0.5) # 短暂延迟,让用户听完
  1803. stop_flag.set() # 触发主程序退出
  1804. break
  1805. except queue.Empty:
  1806. continue
  1807. except Exception:
  1808. pass
  1809. # ============ 7. 主控制器 ============
  1810. # 预设个性化欢迎词模板
  1811. WELCOME_TEMPLATES = {
  1812. "warm": [
  1813. "你好,{name}!欢迎回来!",
  1814. "嗨,{name}!好久不见,很高兴见到你!",
  1815. "{name},欢迎你!今天过得怎么样?",
  1816. "你好啊,{name}!有什么我可以帮你的吗?",
  1817. "欢迎回来,{name}!最近还好吗?"
  1818. ],
  1819. "professional": [
  1820. "您好,{name}!欢迎使用智能语音助手。",
  1821. "{name},您好!请问有什么可以为您服务的?",
  1822. "欢迎,{name}!我已准备好为您提供帮助。",
  1823. "您好,{name}!很高兴为您服务。",
  1824. "{name},您好!请告诉我您的需求。"
  1825. ],
  1826. "casual": [
  1827. "嘿,{name}!又见面啦!",
  1828. "哇,{name}!今天你看起来气色不错!",
  1829. "{name}!欢迎欢迎,热烈欢迎!",
  1830. "哟,{name}!什么风把你吹来了?",
  1831. "嗨呀,{name}!今天想聊点啥?"
  1832. ]
  1833. }
  1834. class VoiceChatAssistant:
  1835. """语音聊天助手主控制器"""
  1836. def __init__(self):
  1837. self.audio_capture = AudioCapture()
  1838. self.wake_detector = WakeWordDetector()
  1839. self.asr = ExternalASR() # 使用外部ASR程序
  1840. self.llm = QwenChat()
  1841. self.tts = AliyunTTS()
  1842. self.player = AudioPlayer(wake_detector=self.wake_detector, asr=self.asr) # 传入wake_detector和asr
  1843. # 初始化人脸识别模块
  1844. self.face_recognition = None
  1845. if FACE_RECOGNITION_ENABLED:
  1846. self.face_recognition = FaceRecognitionModule()
  1847. self.threads = []
  1848. self.current_face_name = None
  1849. self.face_event_processed = False
  1850. def _generate_welcome_message(self, name):
  1851. """生成个性化欢迎语"""
  1852. import random
  1853. # 随机选择欢迎语风格
  1854. style = random.choice(list(WELCOME_TEMPLATES.keys()))
  1855. templates = WELCOME_TEMPLATES[style]
  1856. # 随机选择一个模板并填充名字
  1857. welcome_message = random.choice(templates).format(name=name)
  1858. return welcome_message
  1859. def _face_event_handler(self):
  1860. """处理人脸事件的线程"""
  1861. while not stop_flag.is_set():
  1862. try:
  1863. # 获取人脸事件
  1864. event = face_event_queue.get(timeout=1)
  1865. # 初始化消息
  1866. welcome_message = ""
  1867. if event["type"] == "known_single_face":
  1868. # 已知单一人脸,执行个性化打招呼
  1869. face_name = event["name"]
  1870. # 检查是否是新的人脸
  1871. if face_name != self.current_face_name:
  1872. self.current_face_name = face_name
  1873. self.face_event_processed = False
  1874. # 如果还没有处理过这个人脸事件,生成欢迎语
  1875. if not self.face_event_processed:
  1876. print(f"[人脸识别] 检测到已知人脸 {face_name},准备唤醒...")
  1877. # 生成个性化欢迎语
  1878. welcome_message = self._generate_welcome_message(face_name)
  1879. # 唤醒智能体
  1880. self.wake_detector.wake_up()
  1881. print(f"[人脸识别] 智能体已唤醒,唤醒状态: {is_awake.is_set()}")
  1882. # 发送欢迎语到TTS队列
  1883. response_queue.put(welcome_message)
  1884. print(f"[人脸识别] 欢迎语已发送: {welcome_message}")
  1885. # 标记事件已处理
  1886. self.face_event_processed = True
  1887. # 暂时暂停人脸识别模块,节省资源
  1888. if self.face_recognition:
  1889. self.face_recognition.pause()
  1890. print(f"[人脸识别] 检测到已知人脸 {face_name},暂停人脸识别功能")
  1891. elif event["type"] == "unknown_single_face" or event["type"] == "multiple_faces":
  1892. # 未知单一人脸或多个人脸,执行通用礼貌问好
  1893. if not hasattr(self, 'face_event_processed') or not self.face_event_processed:
  1894. print(f"[人脸识别] 检测到{event['type']},准备唤醒...")
  1895. if event["type"] == "unknown_single_face":
  1896. welcome_message = "你好,欢迎使用智能语音助手!"
  1897. else:
  1898. welcome_message = f"你们好,欢迎使用智能语音助手!"
  1899. # 唤醒智能体
  1900. self.wake_detector.wake_up()
  1901. print(f"[人脸识别] 智能体已唤醒,唤醒状态: {is_awake.is_set()}")
  1902. # 发送欢迎语到TTS队列
  1903. response_queue.put(welcome_message)
  1904. print(f"[人脸识别] 欢迎语已发送: {welcome_message}")
  1905. # 标记事件已处理
  1906. self.face_event_processed = True
  1907. # 暂时暂停人脸识别模块,节省资源
  1908. if self.face_recognition:
  1909. self.face_recognition.pause()
  1910. print(f"[人脸识别] 检测到{event['type']},暂停人脸识别功能")
  1911. elif event["type"] == "multiple_known_faces":
  1912. # 多个人脸,只对已知人脸打招呼
  1913. if not hasattr(self, 'face_event_processed') or not self.face_event_processed:
  1914. known_names = event.get("known_names", [])
  1915. print(f"[人脸识别] 检测到多个人脸,已知人脸: {known_names},准备唤醒...")
  1916. # 生成包含所有已知人脸的欢迎语
  1917. if len(known_names) == 1:
  1918. welcome_message = self._generate_welcome_message(known_names[0])
  1919. else:
  1920. # 多人欢迎语
  1921. name_list = "、".join(known_names)
  1922. welcome_message = f"你们好,{name_list}!欢迎使用智能语音助手!"
  1923. # 唤醒智能体
  1924. self.wake_detector.wake_up()
  1925. print(f"[人脸识别] 智能体已唤醒,唤醒状态: {is_awake.is_set()}")
  1926. # 发送欢迎语到TTS队列
  1927. response_queue.put(welcome_message)
  1928. print(f"[人脸识别] 欢迎语已发送: {welcome_message}")
  1929. # 标记事件已处理
  1930. self.face_event_processed = True
  1931. # 暂时暂停人脸识别模块,节省资源
  1932. if self.face_recognition:
  1933. self.face_recognition.pause()
  1934. print(f"[人脸识别] 检测到多个人脸,暂停人脸识别功能")
  1935. except queue.Empty:
  1936. continue
  1937. except Exception as e:
  1938. print(f"❌ 人脸事件处理错误: {e}")
  1939. import traceback
  1940. traceback.print_exc()
  1941. def start(self):
  1942. """启动所有模块"""
  1943. # 检查配置
  1944. if not QWEN_API_KEY or QWEN_API_KEY.startswith("your-"):
  1945. print("⚠️ 警告: 请配置DashScope API Key!")
  1946. print(" 1. 访问 https://dashscope.aliyun.com/")
  1947. print(" 2. 登录后在控制台获取API-KEY")
  1948. print(" 3. 将API Key填入代码第36行 QWEN_API_KEY 处")
  1949. # 启动音频采集
  1950. self.audio_capture.start()
  1951. # 启动人脸识别模块
  1952. if self.face_recognition:
  1953. if self.face_recognition.start():
  1954. # 启动人脸事件处理线程
  1955. face_event_thread = threading.Thread(target=self._face_event_handler, daemon=True, name="人脸事件处理")
  1956. face_event_thread.start()
  1957. self.threads.append(face_event_thread)
  1958. # 启动各工作线程
  1959. threads_config = [
  1960. ("ASR识别", lambda: self.asr.run(self.wake_detector)),
  1961. ("千问对话", self.llm.run),
  1962. ("TTS合成", self.tts.run),
  1963. ("音频播放", self.player.run),
  1964. ]
  1965. for name, target in threads_config:
  1966. t = threading.Thread(target=target, name=name, daemon=True)
  1967. t.start()
  1968. self.threads.append(t)
  1969. # 主循环(状态监控)
  1970. try:
  1971. while True:
  1972. time.sleep(0.5)
  1973. # 检查唤醒超时
  1974. if ENABLE_WAKE_WORD:
  1975. self.wake_detector.check_timeout()
  1976. except KeyboardInterrupt:
  1977. self.stop()
  1978. def stop(self):
  1979. """停止所有模块"""
  1980. stop_flag.set()
  1981. self.audio_capture.stop()
  1982. # 停止人脸识别模块
  1983. if self.face_recognition:
  1984. self.face_recognition.stop()
  1985. # 清空队列
  1986. while not audio_queue.empty():
  1987. try:
  1988. audio_queue.get_nowait()
  1989. except:
  1990. break
  1991. while not text_queue.empty():
  1992. try:
  1993. text_queue.get_nowait()
  1994. except:
  1995. break
  1996. while not response_queue.empty():
  1997. try:
  1998. response_queue.get_nowait()
  1999. except:
  2000. break
  2001. while not tts_queue.empty():
  2002. try:
  2003. tts_queue.get_nowait()
  2004. except:
  2005. break
  2006. while not face_event_queue.empty():
  2007. try:
  2008. face_event_queue.get_nowait()
  2009. except:
  2010. break
  2011. # 等待线程结束
  2012. for t in self.threads:
  2013. t.join(timeout=2)
  2014. # ============ 主入口 ============
  2015. if __name__ == "__main__":
  2016. # 启动助手
  2017. assistant = VoiceChatAssistant()
  2018. assistant.start()