|
|
@@ -15,8 +15,9 @@ except ImportError:
|
|
|
|
|
|
import time, random, re, os, subprocess, urllib.parse, json, traceback, socket
|
|
|
from selenium import webdriver
|
|
|
+from selenium.webdriver.edge.options import Options as EdgeOptions
|
|
|
+from selenium.webdriver.edge.service import Service as EdgeService
|
|
|
from selenium.webdriver.chrome.options import Options as ChromeOptions
|
|
|
-from selenium.webdriver.chrome.service import Service as ChromeService
|
|
|
import undetected_chromedriver as uc
|
|
|
from selenium.webdriver.common.by import By
|
|
|
from selenium.webdriver.common.action_chains import ActionChains
|
|
|
@@ -50,35 +51,47 @@ class Scraper1688:
|
|
|
cmd.append("--headless")
|
|
|
|
|
|
try:
|
|
|
+ # 异步启动浏览器进程
|
|
|
subprocess.Popen(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
|
time.sleep(3) # 等待浏览器初始化
|
|
|
|
|
|
- # 4. 关键:使用 Chrome 类接管 Edge
|
|
|
- # 这样可以解决 selenium-stealth 只支持 Chrome 类的问题
|
|
|
- opts = ChromeOptions()
|
|
|
+ # 4. 接管 Edge
|
|
|
+ opts = EdgeOptions()
|
|
|
opts.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
|
|
|
|
|
|
+ # 尝试连接
|
|
|
try:
|
|
|
- self.driver = webdriver.Chrome(options=opts)
|
|
|
- print("[+] Edge 浏览器已通过 Chrome 类接管成功!")
|
|
|
+ # 使用真正的 Edge 驱动类连接,解决“unrecognized Chrome version”报错
|
|
|
+ self.driver = webdriver.Edge(options=opts)
|
|
|
+ print("[+] Edge 浏览器已成功自动弹出并接管!")
|
|
|
except:
|
|
|
- print("[*] 尝试自动下载匹配的驱动接管...")
|
|
|
- from webdriver_manager.chrome import ChromeDriverManager
|
|
|
- service = ChromeService(ChromeDriverManager().install())
|
|
|
- self.driver = webdriver.Chrome(service=service, options=opts)
|
|
|
- print("[+] Edge 浏览器已成功接管!")
|
|
|
+ # 如果连不上,尝试使用 webdriver_manager 自动下载匹配驱动
|
|
|
+ print("[*] 尝试自动下载匹配的 EdgeDriver...")
|
|
|
+ from webdriver_manager.microsoft import EdgeChromiumDriverManager
|
|
|
+ service = EdgeService(EdgeChromiumDriverManager().install())
|
|
|
+ self.driver = webdriver.Edge(service=service, options=opts)
|
|
|
+ print("[+] Edge 浏览器已通过驱动管理接管成功!")
|
|
|
|
|
|
except Exception as e:
|
|
|
- print(f"[*] Edge 自动接管模式失败,准备回退: {e}")
|
|
|
+ print(f"[*] Edge 自动接管模式失败,准备回退到 Chrome: {e}")
|
|
|
|
|
|
- # 5. 兜底方案
|
|
|
+ # 5. 兜底方案:如果 Edge 启动或接管失败,启动 Chrome
|
|
|
if not self.driver:
|
|
|
print("[*] 正在启动 Chrome (undetected-chromedriver) 模式...")
|
|
|
self._init_chrome(headless)
|
|
|
|
|
|
if self.driver:
|
|
|
- # 此时 self.driver 无论是接管还是自启,都是 Chrome 类型,不会报错
|
|
|
- stealth(self.driver, languages=["zh-CN", "zh"], vendor="Google Inc.", platform="Win32", fix_hairline=True)
|
|
|
+ # 关键:只有在使用 Chrome 模式时才应用 stealth
|
|
|
+ # 接管模式下的 Edge 是真实的浏览器进程,本身就具备极高的隐蔽性
|
|
|
+ if "chrome" in str(type(self.driver)).lower() and "edge" not in str(type(self.driver)).lower():
|
|
|
+ stealth(self.driver, languages=["zh-CN", "zh"], vendor="Google Inc.", platform="Win32", fix_hairline=True)
|
|
|
+ else:
|
|
|
+ # 针对 Edge 的轻量级反爬补丁(避开库类型检查错误)
|
|
|
+ self.driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
|
|
|
+ "source": """
|
|
|
+ Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
|
|
|
+ """
|
|
|
+ })
|
|
|
|
|
|
def _find_edge(self):
|
|
|
""" 通过注册表寻找 Edge 精准安装路径 """
|
|
|
@@ -255,7 +268,7 @@ class Scraper1688:
|
|
|
except: return None
|
|
|
|
|
|
def _extract_all_methods(self):
|
|
|
- """ 列表页多方式提取 """
|
|
|
+ """ 列表页提取 """
|
|
|
results = []
|
|
|
try:
|
|
|
res = self.driver.execute_script("return JSON.stringify(window.data || window.__INITIAL_DATA__)")
|