zwy 1 日 前
コミット
b8247725f9

+ 0 - 1
.idea/vcs.xml

@@ -2,6 +2,5 @@
 <project version="4">
   <component name="VcsDirectoryMappings">
     <mapping directory="" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/back/rvm_temp" vcs="Git" />
   </component>
 </project>

+ 3 - 2
back/video_to_greenscreen.py

@@ -120,7 +120,8 @@ class GreenScreenProcessor:
         
         # 加载本地权重
         print(f"📦 加载本地权重:{model_path}")
-        checkpoint = torch.load(model_path, map_location=device, weights_only=True)
+        # 兼容CPU和GPU,关闭weights_only避免反序列化错误
+        checkpoint = torch.load(model_path, map_location=device, weights_only=False)
         model.load_state_dict(checkpoint)
         
         self.model = model.to(device)
@@ -189,7 +190,7 @@ def main():
     parser.add_argument('input_video', type=str, help='输入视频路径')
     parser.add_argument('-o', '--output', type=str, default=None, help='输出视频路径')
     parser.add_argument('-m', '--model', type=str, default=None, help='RVM模型路径 (默认使用脚本目录下的rvm_resnet50.pth)')
-    parser.add_argument('-d', '--device', type=str, default='cuda', help='计算设备 cuda/cpu')
+    parser.add_argument('-d', '--device', type=str, default='cpu', help='计算设备 cuda/cpu')
     parser.add_argument('--max-frames', type=int, default=None, help='最大处理帧数')
     parser.add_argument('--downsample-ratio', type=float, default=0.25, help='下采样比例')
     

BIN
data/avatars/wav2lip256_avatar1/coords.pkl


ファイルの差分が大きいため隠しています
+ 0 - 91
livetalking.log


+ 48 - 79
run_train_and_cloneb.sh

@@ -48,6 +48,7 @@ AVATAR_DIR="${SCRIPT_DIR}data/avatars/${AVATAR_ID}"
 : "${TORCHDYNAMO_DISABLE:=1}"
 # RTMP 推流配置(使用 nginx-rtmp + FFmpeg)
 : "${ENABLE_RTMP:=true}"  # 是否启用 RTMP 推流(默认启用)
+: "${TRANSPORT_MODE:=webrtc}"  # 传输模式,默认 WebRTC
 : "${RTMP_PORT:=1935}"    # RTMP 服务端口
 : "${FFMPEG_PUSH:=true}"  # 是否启动本机 FFmpeg 转发
 export CUDA_VISIBLE_DEVICES AIOICE_PORT_MIN AIOICE_PORT_MAX AIOICE_BIND_IP WEBRTC_NAT_IP
@@ -73,10 +74,10 @@ echo "Current Directory: $(pwd)"
 echo
 
 # ========================================
-# Step 1: Convert video background to green screen
+# Step 1: 调用 API 转换视频背景到绿幕
 # ========================================
 echo
-echo "[Step 1/3] Converting video background to green screen..."
+echo "[Step 1/3] Calling API for video background conversion..."
 echo
 
 # 确保 back/video 目录存在
@@ -86,16 +87,17 @@ mkdir -p "${SCRIPT_DIR}back/video"
 INPUT_BASENAME=$(basename "${INPUT_VIDEO}" | sed 's/\.[^.]*$//')
 GREENSCREEN_VIDEO="${SCRIPT_DIR}back/video/${INPUT_BASENAME}_greenscreen.mp4"
 
-echo "Converting video to green screen..."
+echo "Converting video to green screen via API..."
 echo "  Input: ${INPUT_VIDEO}"
 echo "  Output: ${GREENSCREEN_VIDEO}"
+echo "  API: http://183.252.196.135:6001"
 echo
 
-# 运行绿幕转换脚本
-python back/video_to_greenscreen.py "${INPUT_VIDEO}" -o "${GREENSCREEN_VIDEO}"
+# 调用 RVM GreenScreen API,传递输出文件路径
+python back/back_api.py "${GREENSCREEN_VIDEO}"
 
 if [ $? -ne 0 ]; then
-    echo "Error: Green screen conversion failed!"
+    echo "Error: API video conversion failed!"
     cd "${SCRIPT_DIR}" || exit 1
     exit 1
 fi
@@ -145,89 +147,56 @@ echo "Avatar data saved to: data/avatars/${AVATAR_ID}"
 echo
 
 # ========================================
-# Step 3: 启动 VoxCPM2 API 服务 + 运行 app.py
+# Step 3: 测试 VoxCPM2 API TTS 服务 + 运行 app.py
 # ========================================
 echo
-echo "[Step 3/4] 检查并启动 VoxCPM2 API 服务..."
+echo "[Step 3/4] Testing VoxCPM2 API TTS service..."
 echo
 
 # VoxCPM2 API 服务配置
-VOXCPM2_API_URL="http://localhost:6003"
-VOXCPM2_API_LOG="/tmp/voxcpm2_api.log"
+VOXCPM2_API_URL="http://183.252.196.135:6003"
 
-echo "检查 VoxCPM2 API 服务状态..."
+echo "Testing VoxCPM2 API TTS service..."
 echo "  API 地址:${VOXCPM2_API_URL}"
+echo
 
-# 检查 API 服务是否已在运行
-if curl -s ${VOXCPM2_API_URL}/health > /dev/null 2>&1; then
-    echo "✅ VoxCPM2 API 服务已在运行"
-    curl -s ${VOXCPM2_API_URL}/health | python -m json.tool 2>/dev/null || true
-else
-    echo "🚀 VoxCPM2 API 服务未运行,正在启动..."
-    echo
-    
-    # 检查模型目录是否存在
-    if [ ! -d "/mnt/nvme1data/model/VoxCPM2" ]; then
-        echo "❌ 错误:VoxCPM2 模型目录不存在:/mnt/nvme1data/model/VoxCPM2"
-        echo "请先下载或复制模型文件到该目录"
-        exit 1
+# 先测试 API 服务是否可用(重试3次)
+API_READY=false
+for i in 1 2 3; do
+    if curl -s --max-time 5 ${VOXCPM2_API_URL}/health > /dev/null 2>&1; then
+        API_READY=true
+        break
     fi
-    
-    # 检查 API 脚本是否存在
-    if [ ! -f "/mnt/nvme1data/model/voxcpm2_api.py" ]; then
-        echo "❌ 错误:VoxCPM2 API 脚本不存在:/mnt/nvme1data/model/voxcpm2_api.py"
-        exit 1
-    fi
-    
-    # 在后台启动 API 服务
-    cd /mnt/nvme1data/model
-    nohup python voxcpm2_api.py > ${VOXCPM2_API_LOG} 2>&1 &
-    API_PID=$!
-    echo "✅ VoxCPM2 API 服务已启动"
-    echo "   PID: ${API_PID}"
-    echo "   日志:${VOXCPM2_API_LOG}"
+    echo "   等待 API 启动... (${i}/3)"
+    sleep 2
+done
+
+if [ "${API_READY}" != "true" ]; then
+    echo "❌ VoxCPM2 API 服务无法连接:${VOXCPM2_API_URL}"
     echo
-    
-    # 等待服务启动(最多 120 秒)
-    echo "等待模型加载(最多120秒)..."
-    START_TIME=$(date +%s)
-    
-    for i in $(seq 1 120); do
-        if curl -s ${VOXCPM2_API_URL}/health > /dev/null 2>&1; then
-            END_TIME=$(date +%s)
-            WAIT_TIME=$((END_TIME - START_TIME))
-            echo "✅ VoxCPM2 API 服务已就绪(等待 ${WAIT_TIME} 秒)"
-            echo
-            # 显示服务信息
-            curl -s ${VOXCPM2_API_URL}/health | python -m json.tool 2>/dev/null || true
-            break
-        fi
-        
-        # 每 10 秒显示一次进度
-        if [ $((i % 10)) -eq 0 ]; then
-            echo "   已等待 ${i} 秒..."
-        fi
-        
-        if [ $i -eq 120 ]; then
-            echo
-            echo "❌ VoxCPM2 API 服务启动超时(120秒)"
-            echo
-            echo "最后 50 行日志:"
-            echo "========================================"
-            tail -50 ${VOXCPM2_API_LOG}
-            echo "========================================"
-            echo
-            echo "请检查:"
-            echo "  1. 模型文件是否完整:ls -lh /mnt/nvme1data/model/VoxCPM2/"
-            echo "  2. voxcpm 包是否安装:python -c 'import voxcpm'"
-            echo "  3. GPU 是否可用:nvidia-smi"
-            echo
-            exit 1
-        fi
-        sleep 1
-    done
-    
-    cd "${SCRIPT_DIR}"
+    echo "请确保远程服务器已启动 VoxCPM2 API 服务:"
+    echo "  SSH到服务器: ssh root@183.252.196.135"
+    echo "  然后: ssh test@192.168.22.9"
+    echo "  启动服务: cd /mnt/nvme1data/model && python voxcpm2_api.py"
+    echo
+    echo "如果服务已启动,请建立 SSH 隧道:"
+    echo "  ssh -f -N -L 6003:192.168.22.9:6003 root@183.252.196.135"
+    echo
+    exit 1
+fi
+
+echo "✅ VoxCPM2 API 服务正常"
+curl -s ${VOXCPM2_API_URL}/health | python -m json.tool 2>/dev/null || true
+echo
+
+# 测试 TTS 生成
+echo "🧪 测试 TTS 音频生成..."
+python voxcpm_api.py
+
+if [ $? -ne 0 ]; then
+    echo "⚠️  TTS 测试失败,但将继续启动数字人服务"
+else
+    echo "✅ TTS 测试成功"
 fi
 
 echo

+ 3 - 3
voxcpm2_api_tts.py

@@ -34,7 +34,7 @@ class VoxCPM2APITTS:
         self.parent = parent
         
         # API 配置(中性模式:降低 CFG,不使用 prompt)
-        self.api_url = getattr(opt, 'VOXCPM2_API_URL', 'http://localhost:6003')
+        self.api_url = getattr(opt, 'VOXCPM2_API_URL', 'http://183.252.196.135:6003')
         self.ref_audio_path = getattr(opt, 'VOXCPM2_REF_WAV', 'voice_output.wav')
         self.ref_text = getattr(opt, 'VOXCPM2_REF_TEXT', '你好,买水果,卖水果,新鲜的水果。')
         # 中性模式:降低 CFG 值,减少情绪模仿
@@ -63,11 +63,11 @@ class VoxCPM2APITTS:
         self.use_pre_gen = False  # 禁用预生成音频
         
         # Image_Analysis 音频目录配置
-        self.image_analysis_dir = "/mnt/nvme1data/Digital_Human/Image_Analysis/wav/wav"
+        self.image_analysis_dir = "/Users/alien/Desktop/Digital_Human/Image_Analysis/wav/wav"
         self.played_audio_files = set()
         
         # 实时克隆音频保存目录
-        self.kelong_dir = "/mnt/nvme1data/Digital_Human/Image_Analysis/knowledge_kelong"
+        self.kelong_dir = "/Users/alien/Desktop/Digital_Human/Image_Analysis/knowledge_kelong"
         os.makedirs(self.kelong_dir, exist_ok=True)
         logger.info(f"📁 实时克隆音频保存目录:{self.kelong_dir}")
         

この差分においてかなりの量のファイルが変更されているため、一部のファイルを表示していません