LuTong 3 kuukautta sitten
vanhempi
commit
55b515aefb
2 muutettua tiedostoa jossa 83 lisäystä ja 12 poistoa
  1. 3 1
      bash/pack.bash
  2. 80 11
      src/scraper.py

+ 3 - 1
bash/pack.bash

@@ -1 +1,3 @@
-pyinstaller --noconsole --onedir --add-data "templates;templates" --paths . --collect-all selenium_stealth --icon="app.ico" --name "1688_Product_Scraper" --clean src/main.py
+pyinstaller --noconsole --onedir --add-data "templates;templates" --paths . --collect-all selenium_stealth --icon="app.ico" --name "1688_Product_Scraper" --clean src/main.py
+
+pyinstaller --noconsole --onedir --noconfirm --add-data "templates;templates" --add-data "app.ico;." --paths . --collect-all selenium_stealth --collect-all webdriver_manager --icon="app.ico" --name "1688_Product_Scraper" --clean src/main.py

+ 80 - 11
src/scraper.py

@@ -14,6 +14,10 @@ except ImportError:
     v.LooseVersion = LooseVersion
 
 import time, random, re, os, subprocess, urllib.parse, json, traceback
+from selenium import webdriver
+from selenium.webdriver.edge.options import Options as EdgeOptions
+from selenium.webdriver.edge.service import Service as EdgeService
+from webdriver_manager.microsoft import EdgeChromiumDriverManager
 import undetected_chromedriver as uc 
 from selenium.webdriver.common.by import By
 from selenium.webdriver.common.action_chains import ActionChains
@@ -25,20 +29,85 @@ class Scraper1688:
         self.status_callback = status_callback # 用于回调 GUI 状态
         self.user_data_path = os.path.abspath(os.path.join(os.getcwd(), "1688_user_data"))
         self._cleanup()
-        options = uc.ChromeOptions()
-        options.add_argument(f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36')
-        options.add_argument(f"--user-data-dir={self.user_data_path}")
-        if headless: options.add_argument('--headless=new')
-        options.add_argument('--disable-blink-features=AutomationControlled')
-        options.add_argument("--window-size=1920,1080")
+        
+        # 1. 优先探测 Edge
+        edge_path = self._find_edge()
+        self.driver = None
+        
+        if edge_path:
+            print(f"[*] 检测到 Edge 浏览器,尝试启动: {edge_path}")
+            from selenium.webdriver.edge.options import Options as EdgeOptions
+            from selenium.webdriver.edge.service import Service as EdgeService
+            from webdriver_manager.microsoft import EdgeChromiumDriverManager
+            
+            options = EdgeOptions()
+            options.binary_location = edge_path
+            options.add_argument(f"--user-data-dir={self.user_data_path}")
+            if headless: options.add_argument('--headless=new')
+            options.add_argument('--disable-blink-features=AutomationControlled')
+            options.add_argument("--window-size=1920,1080")
+            
+            # 解决权限和被占用的常见参数
+            options.add_argument("--no-sandbox")
+            options.add_argument("--disable-dev-shm-usage")
+            options.add_experimental_option("excludeSwitches", ["enable-automation"])
+            options.add_experimental_option('useAutomationExtension', False)
+            
+            try:
+                # 尝试自动安装驱动
+                driver_path = EdgeChromiumDriverManager().install()
+                print(f"[*] 使用 EdgeDriver: {driver_path}")
+                service = EdgeService(driver_path)
+                self.driver = webdriver.Edge(service=service, options=options)
+                print("[+] Edge 启动成功!")
+            except Exception as e:
+                # 捕获并打印完整错误
+                err_msg = traceback.format_exc()
+                print(f"[!] Edge 启动异常: \n{err_msg}")
+                print("[*] 正在尝试回退到 Chrome 模式...")
+        
+        if not self.driver:
+            print("[×] 没有Edge?🐈...")
+            # print("[*] 正在初始化 Chrome (undetected-chromedriver)...")
+            # self._init_chrome(headless)
+
+        if self.driver:
+            stealth(self.driver, languages=["zh-CN", "zh"], vendor="Google Inc.", platform="Win32", fix_hairline=True)
+
+    def _find_edge(self):
+        """ 探测 Windows 下 Edge 的常见安装路径 """
+        paths = [
+            r"C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe",
+            r"C:\Program Files\Microsoft\Edge\Application\msedge.exe",
+            os.path.expandvars(r"%LOCALAPPDATA%\Microsoft\Edge\Application\msedge.exe")
+        ]
+        for p in paths:
+            if os.path.exists(p): return p
+        return None
+
+    def _init_chrome(self, headless):
+        """ 原有的 Chrome (undetected-chromedriver) 初始化逻辑 """
+        def create_options():
+            opts = uc.ChromeOptions()
+            opts.add_argument(f"--user-data-dir={self.user_data_path}")
+            if headless: opts.add_argument('--headless=new')
+            opts.add_argument('--disable-blink-features=AutomationControlled')
+            opts.add_argument("--window-size=1920,1080")
+            return opts
+
         try:
-            self.driver = uc.Chrome(options=options, headless=headless, version_main=137)
-        except:
-            self.driver = uc.Chrome(options=options, headless=headless)
-        stealth(self.driver, languages=["zh-CN", "zh"], vendor="Google Inc.", platform="Win32", fix_hairline=True)
+            # 第一尝试:指定版本
+            self.driver = uc.Chrome(options=create_options(), headless=headless, version_main=131)
+        except Exception as e:
+            print(f"[*] 指定版本 Chrome 启动失败,尝试自动匹配: {e}")
+            # 关键:创建全新的 options 对象,避免 reuse 错误
+            self.driver = uc.Chrome(options=create_options(), headless=headless)
 
     def _cleanup(self):
-        if os.name == 'nt': subprocess.call(['taskkill', '/F', '/IM', 'chrome.exe', '/T'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        if os.name == 'nt':
+            for proc in ['chrome.exe', 'msedge.exe', 'edgedriver.exe', 'chromedriver.exe']:
+                subprocess.call(['taskkill', '/F', '/IM', proc, '/T'], 
+                                stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
         if os.path.exists(self.user_data_path):
             for root, _, files in os.walk(self.user_data_path):
                 for f in files: