|
|
@@ -14,6 +14,10 @@ except ImportError:
|
|
|
v.LooseVersion = LooseVersion
|
|
|
|
|
|
import time, random, re, os, subprocess, urllib.parse, json, traceback
|
|
|
+from selenium import webdriver
|
|
|
+from selenium.webdriver.edge.options import Options as EdgeOptions
|
|
|
+from selenium.webdriver.edge.service import Service as EdgeService
|
|
|
+from webdriver_manager.microsoft import EdgeChromiumDriverManager
|
|
|
import undetected_chromedriver as uc
|
|
|
from selenium.webdriver.common.by import By
|
|
|
from selenium.webdriver.common.action_chains import ActionChains
|
|
|
@@ -25,20 +29,85 @@ class Scraper1688:
|
|
|
self.status_callback = status_callback # 用于回调 GUI 状态
|
|
|
self.user_data_path = os.path.abspath(os.path.join(os.getcwd(), "1688_user_data"))
|
|
|
self._cleanup()
|
|
|
- options = uc.ChromeOptions()
|
|
|
- options.add_argument(f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36')
|
|
|
- options.add_argument(f"--user-data-dir={self.user_data_path}")
|
|
|
- if headless: options.add_argument('--headless=new')
|
|
|
- options.add_argument('--disable-blink-features=AutomationControlled')
|
|
|
- options.add_argument("--window-size=1920,1080")
|
|
|
+
|
|
|
+ # 1. 优先探测 Edge
|
|
|
+ edge_path = self._find_edge()
|
|
|
+ self.driver = None
|
|
|
+
|
|
|
+ if edge_path:
|
|
|
+ print(f"[*] 检测到 Edge 浏览器,尝试启动: {edge_path}")
|
|
|
+ from selenium.webdriver.edge.options import Options as EdgeOptions
|
|
|
+ from selenium.webdriver.edge.service import Service as EdgeService
|
|
|
+ from webdriver_manager.microsoft import EdgeChromiumDriverManager
|
|
|
+
|
|
|
+ options = EdgeOptions()
|
|
|
+ options.binary_location = edge_path
|
|
|
+ options.add_argument(f"--user-data-dir={self.user_data_path}")
|
|
|
+ if headless: options.add_argument('--headless=new')
|
|
|
+ options.add_argument('--disable-blink-features=AutomationControlled')
|
|
|
+ options.add_argument("--window-size=1920,1080")
|
|
|
+
|
|
|
+ # 解决权限和被占用的常见参数
|
|
|
+ options.add_argument("--no-sandbox")
|
|
|
+ options.add_argument("--disable-dev-shm-usage")
|
|
|
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
|
|
+ options.add_experimental_option('useAutomationExtension', False)
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 尝试自动安装驱动
|
|
|
+ driver_path = EdgeChromiumDriverManager().install()
|
|
|
+ print(f"[*] 使用 EdgeDriver: {driver_path}")
|
|
|
+ service = EdgeService(driver_path)
|
|
|
+ self.driver = webdriver.Edge(service=service, options=options)
|
|
|
+ print("[+] Edge 启动成功!")
|
|
|
+ except Exception as e:
|
|
|
+ # 捕获并打印完整错误
|
|
|
+ err_msg = traceback.format_exc()
|
|
|
+ print(f"[!] Edge 启动异常: \n{err_msg}")
|
|
|
+ print("[*] 正在尝试回退到 Chrome 模式...")
|
|
|
+
|
|
|
+ if not self.driver:
|
|
|
+ print("[×] 没有Edge?🐈...")
|
|
|
+ # print("[*] 正在初始化 Chrome (undetected-chromedriver)...")
|
|
|
+ # self._init_chrome(headless)
|
|
|
+
|
|
|
+ if self.driver:
|
|
|
+ stealth(self.driver, languages=["zh-CN", "zh"], vendor="Google Inc.", platform="Win32", fix_hairline=True)
|
|
|
+
|
|
|
+ def _find_edge(self):
|
|
|
+ """ 探测 Windows 下 Edge 的常见安装路径 """
|
|
|
+ paths = [
|
|
|
+ r"C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe",
|
|
|
+ r"C:\Program Files\Microsoft\Edge\Application\msedge.exe",
|
|
|
+ os.path.expandvars(r"%LOCALAPPDATA%\Microsoft\Edge\Application\msedge.exe")
|
|
|
+ ]
|
|
|
+ for p in paths:
|
|
|
+ if os.path.exists(p): return p
|
|
|
+ return None
|
|
|
+
|
|
|
+ def _init_chrome(self, headless):
|
|
|
+ """ 原有的 Chrome (undetected-chromedriver) 初始化逻辑 """
|
|
|
+ def create_options():
|
|
|
+ opts = uc.ChromeOptions()
|
|
|
+ opts.add_argument(f"--user-data-dir={self.user_data_path}")
|
|
|
+ if headless: opts.add_argument('--headless=new')
|
|
|
+ opts.add_argument('--disable-blink-features=AutomationControlled')
|
|
|
+ opts.add_argument("--window-size=1920,1080")
|
|
|
+ return opts
|
|
|
+
|
|
|
try:
|
|
|
- self.driver = uc.Chrome(options=options, headless=headless, version_main=137)
|
|
|
- except:
|
|
|
- self.driver = uc.Chrome(options=options, headless=headless)
|
|
|
- stealth(self.driver, languages=["zh-CN", "zh"], vendor="Google Inc.", platform="Win32", fix_hairline=True)
|
|
|
+ # 第一尝试:指定版本
|
|
|
+ self.driver = uc.Chrome(options=create_options(), headless=headless, version_main=131)
|
|
|
+ except Exception as e:
|
|
|
+ print(f"[*] 指定版本 Chrome 启动失败,尝试自动匹配: {e}")
|
|
|
+ # 关键:创建全新的 options 对象,避免 reuse 错误
|
|
|
+ self.driver = uc.Chrome(options=create_options(), headless=headless)
|
|
|
|
|
|
def _cleanup(self):
|
|
|
- if os.name == 'nt': subprocess.call(['taskkill', '/F', '/IM', 'chrome.exe', '/T'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
|
+ if os.name == 'nt':
|
|
|
+ for proc in ['chrome.exe', 'msedge.exe', 'edgedriver.exe', 'chromedriver.exe']:
|
|
|
+ subprocess.call(['taskkill', '/F', '/IM', proc, '/T'],
|
|
|
+ stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
|
if os.path.exists(self.user_data_path):
|
|
|
for root, _, files in os.walk(self.user_data_path):
|
|
|
for f in files:
|