LuTong 2 ماه پیش
والد
کامیت
68a82bb4ee
3فایلهای تغییر یافته به همراه94 افزوده شده و 222 حذف شده
  1. 15 30
      src/excel_handler.py
  2. 24 75
      src/gui.py
  3. 55 117
      src/scraper.py

+ 15 - 30
src/excel_handler.py

@@ -6,9 +6,7 @@ from openpyxl import load_workbook
 def get_resource_path(relative_path):
     """ 获取资源绝对路径,兼容开发环境和 PyInstaller 打包环境 """
     if hasattr(sys, '_MEIPASS'):
-        # PyInstaller 打包后的临时解压路径
         return os.path.join(sys._MEIPASS, relative_path)
-    # 开发环境下的路径
     base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
     return os.path.join(base_dir, relative_path)
 
@@ -41,7 +39,6 @@ def get_existing_info(file_path):
 def append_to_template(products, output_path, status_callback=None):
     """
     将产品数据追加写入到指定的 Excel 文件中。
-    增加文件占用检测:如果文件被打开,则暂停任务直到关闭。
     并在第二个 Sheet 中记录商品总数。
     """
     template_path = get_resource_path(os.path.join('templates', '【进价】产品信息空表.xlsx'))
@@ -58,10 +55,9 @@ def append_to_template(products, output_path, status_callback=None):
         os.makedirs(os.path.dirname(output_path), exist_ok=True)
         wb = load_workbook(template_path)
     
-    # 1. 写入主数据 Sheet
     ws = wb.active
     
-    # 寻找起始行 (基于第 11 列“产品链接”进行判定,防止覆盖)
+    # 寻找起始行 (基于第 11 列“产品链接”进行判定)
     start_row = 3
     for r in range(3, ws.max_row + 2):
         val_link = ws.cell(row=r, column=11).value
@@ -71,11 +67,11 @@ def append_to_template(products, output_path, status_callback=None):
     else:
         start_row = ws.max_row + 1
     
-    # 获取当前已有的链接集合用于后续统计唯一商品
-    existing_links = set()
+    # 获取当前已有的链接集合用于统计
+    current_links = set()
     for r in range(3, start_row):
         link = ws.cell(row=r, column=11).value
-        if link: existing_links.add(str(link).strip())
+        if link: current_links.add(str(link).strip())
 
     for i, product in enumerate(products):
         row = start_row + i
@@ -92,33 +88,22 @@ def append_to_template(products, output_path, status_callback=None):
         ws.cell(row=row, column=11, value=product.get('link', '')) 
         ws.cell(row=row, column=12, value=product.get('supplier', ''))
         
-        link = product.get('link')
-        if link: existing_links.add(str(link).strip())
+        if product.get('link'): current_links.add(str(product['link']).strip())
 
-    # 2. 写入/更新计数 Sheet (第二个 Sheet)
-    try:
-        sheet_names = wb.sheetnames
-        if "统计状态" not in sheet_names:
-            wb.create_sheet("统计状态")
-        
-        ws_stat = wb["统计状态"]
-        ws_stat.cell(row=1, column=1, value="已解析商品总数")
-        ws_stat.cell(row=1, column=2, value=len(existing_links))
-        ws_stat.cell(row=2, column=1, value="最后更新时间")
-        ws_stat.cell(row=2, column=2, value=time.strftime("%Y-%m-%d %H:%M:%S"))
-    except Exception as e:
-        print(f"[!] 统计状态 Sheet 更新失败: {e}")
+    # 写入/更新统计 Sheet
+    if "统计状态" not in wb.sheetnames:
+        wb.create_sheet("统计状态")
+    ws_stat = wb["统计状态"]
+    ws_stat.cell(row=1, column=1, value="已解析商品总数")
+    ws_stat.cell(row=1, column=2, value=len(current_links))
+    ws_stat.cell(row=2, column=1, value="最后更新时间")
+    ws_stat.cell(row=2, column=2, value=time.strftime("%Y-%m-%d %H:%M:%S"))
 
-    # 3. 占用检测保存循环
     while True:
         try:
             wb.save(output_path)
-            if status_callback:
-                status_callback(False, "写入成功")
+            if status_callback: status_callback(False, "写入成功")
             break
         except PermissionError:
-            msg = "文件被占用,请关闭 Excel"
-            print(f"[!] {msg}: {output_path}")
-            if status_callback:
-                status_callback(True, msg)
+            if status_callback: status_callback(True, "文件被占用,请关闭 Excel")
             time.sleep(3)

+ 24 - 75
src/gui.py

@@ -13,7 +13,6 @@ from src.scraper import Scraper1688
 from src.excel_handler import append_to_template, get_existing_info
 
 def get_resource_path(relative_path):
-    """ 获取资源绝对路径,兼容开发环境和 PyInstaller 打包环境 """
     if hasattr(sys, '_MEIPASS'):
         return os.path.join(sys._MEIPASS, relative_path)
     return os.path.join(os.getcwd(), relative_path)
@@ -21,7 +20,6 @@ def get_resource_path(relative_path):
 class ScraperThread(QThread):
     progress = pyqtSignal(int)
     log = pyqtSignal(str)
-    # finished 信号增加耗时参数 (秒)
     finished = pyqtSignal(str, object, float)
 
     def __init__(self, keyword, output_path, total_count, headless=True):
@@ -35,53 +33,39 @@ class ScraperThread(QThread):
         scraper = None
         start_time = time.time()
         try:
-            # 读取已抓取的链接,实现断点续爬
             existing_links, _ = get_existing_info(self.output_path)
-            if existing_links:
-                self.log.emit(f"[*] 发现已有记录: {len(existing_links)} 条,将从新记录开始搜索...")
-
-            self.log.emit(f"<b>[*] 任务启动: {self.keyword}</b>")
-            
-            def status_cb(is_waiting, msg):
-                if is_waiting:
-                    self.log.emit(f"<font color='red' size='5'><b>!!! {msg} !!!</b></font>")
-                else:
-                    self.log.emit(f"<font color='green'><b>[√] {msg}</b></font>")
-
-            scraper = Scraper1688(headless=self.headless, status_callback=status_cb)
             
-            # 使用流式生成器抓取
-            collected_count = 0
-            
-            # 记录初始商品数,用于断点续记
             initial_p_count = 0
             if os.path.exists(self.output_path):
                 try:
                     import openpyxl
                     wb_tmp = openpyxl.load_workbook(self.output_path, data_only=True)
-                    # 关键修改:先检查 Sheet 是否存在,避免 KeyError
                     if "统计状态" in wb_tmp.sheetnames:
                         val = wb_tmp["统计状态"].cell(row=1, column=2).value
                         initial_p_count = int(val) if val is not None else 0
                     wb_tmp.close()
-                except Exception as e:
-                    print(f"[*] 读取初始商品计数失败 (可能文件尚不包含统计页): {e}")
-                    initial_p_count = 0
+                except: pass
+
+            self.log.emit(f"<b>[*] 任务启动: {self.keyword}</b>")
+            
+            def status_cb(is_waiting, msg):
+                if is_waiting: self.log.emit(f"<font color='red' size='5'><b>!!! {msg} !!!</b></font>")
+                else: self.log.emit(f"<font color='green'><b>[√] {msg}</b></font>")
 
+            scraper = Scraper1688(headless=self.headless, status_callback=status_cb, log_callback=self.log.emit)
+            
+            collected_count = 0
             product_index = initial_p_count
             
             for batch_results in scraper.search_products_yield(self.keyword, total_count=self.total_count, existing_links=existing_links):
-                # 实时写入 Excel (此时 batch_results 为 10 条或页末余数)
                 append_to_template(batch_results, self.output_path, status_callback=status_cb)
                 
-                # 计算本批次包含的独立商品数量并累加
-                unique_links_in_batch = len(set(item.get('link') for item in batch_results if item.get('link')))
-                product_index += unique_links_in_batch
+                unique_links = len(set(item.get('link') for item in batch_results if item.get('link')))
+                product_index += unique_links
                 collected_count += len(batch_results)
                 
                 self.log.emit(f"[+] 解析到第 {product_index} 个商品,新增数据已持久化: {len(batch_results)} 条,本次共计: {collected_count}")
                 
-                # 进度条基于本次任务的目标数量
                 current_task_done = product_index - initial_p_count
                 prog = int((current_task_done / self.total_count) * 100)
                 self.progress.emit(min(prog, 100))
@@ -109,17 +93,13 @@ class MainWindow(QMainWindow):
     def initUI(self):
         self.setWindowTitle("1688 产品信息实时抓取工具 v3.0")
         self.setGeometry(100, 100, 1100, 750)
-
-        # 设置窗口图标
         icon_path = get_resource_path("app.ico")
-        if os.path.exists(icon_path):
-            self.setWindowIcon(QIcon(icon_path))
+        if os.path.exists(icon_path): self.setWindowIcon(QIcon(icon_path))
 
         central_widget = QWidget()
         self.setCentralWidget(central_widget)
         main_layout = QHBoxLayout(central_widget)
 
-        # 左侧类目树
         left_widget = QWidget()
         left_layout = QVBoxLayout(left_widget)
         self.load_category_btn = QPushButton("选择类目文件")
@@ -133,7 +113,6 @@ class MainWindow(QMainWindow):
         left_layout.addWidget(self.load_category_btn)
         left_layout.addWidget(self.category_tree)
 
-        # 右侧操作区
         right_widget = QWidget()
         right_layout = QVBoxLayout(right_widget)
 
@@ -155,8 +134,6 @@ class MainWindow(QMainWindow):
 
         action_layout = QHBoxLayout()
         self.category_display = QLabel("请选择二级类目")
-        
-        # 抓取数量配置
         count_layout = QHBoxLayout()
         self.count_spin = QSpinBox()
         self.count_spin.setRange(1, 10000)
@@ -166,22 +143,10 @@ class MainWindow(QMainWindow):
         count_layout.addWidget(self.count_spin)
         
         self.search_btn = QPushButton("开始抓取")
-        self.search_btn.setEnabled(False) # 初始置灰,直到选择类目和路径
+        self.search_btn.setEnabled(False)
         self.search_btn.clicked.connect(self.start_scraping)
         self.search_btn.setMinimumHeight(50)
-        self.search_btn.setStyleSheet("""
-            QPushButton { 
-                background-color: #0078d4; 
-                color: white; 
-                font-weight: bold; 
-                font-size: 16px; 
-                border-radius: 4px;
-            }
-            QPushButton:disabled { 
-                background-color: #cccccc; 
-                color: #888888; 
-            }
-        """)
+        self.search_btn.setStyleSheet("QPushButton { background-color: #0078d4; color: white; font-weight: bold; font-size: 16px; border-radius: 4px; } QPushButton:disabled { background-color: #cccccc; color: #888888; }")
         
         action_layout.addWidget(QLabel("<font color='red'>*</font>检索类目:"))
         action_layout.addWidget(self.category_display, 1)
@@ -242,32 +207,23 @@ class MainWindow(QMainWindow):
             if self.output_base_path:
                 full_p = os.path.normpath(os.path.join(self.output_base_path, "选品", self.selected_category_1, f"{self.selected_category_2}.xlsx"))
                 self.path_display.setText(full_p)
-                self.search_btn.setEnabled(True) # 仅在路径和类目都选好时启用按钮
+                self.search_btn.setEnabled(True)
 
     def select_output_path(self):
         p = QFileDialog.getExistingDirectory(self, "选择保存根目录")
         if p: self.output_base_path = p; self.update_displays()
 
     def start_scraping(self):
-        if not self.selected_category_2 or not self.output_base_path:
-            self.log_output.append("<font color='red'>[错误] 请选择类目和输出路径</font>")
-            return
-        
+        if not self.selected_category_2 or not self.output_base_path: return
         target_dir = os.path.join(self.output_base_path, "选品", self.selected_category_1)
         file_path = os.path.normpath(os.path.join(target_dir, f"{self.selected_category_2}.xlsx"))
-        self.current_output_file = file_path # 记录当前文件用于最后打开
-        
-        # 启动抓取前不再删除旧文件,实现断点续爬功能
-
+        self.current_output_file = file_path
         self.search_btn.setEnabled(False)
-        self.count_spin.setEnabled(False) # 任务开始后也禁用数量输入
+        self.count_spin.setEnabled(False)
         self.status_label.setText("处理中……")
         self.log_output.clear()
         self.pbar.setValue(0)
-        headless = not self.show_browser_cb.isChecked()
-        total_count = self.count_spin.value()
-        
-        self.thread = ScraperThread(self.selected_category_2, file_path, total_count, headless)
+        self.thread = ScraperThread(self.selected_category_2, file_path, self.count_spin.value(), not self.show_browser_cb.isChecked())
         self.thread.log.connect(self.log_output.append)
         self.thread.progress.connect(self.pbar.setValue)
         self.thread.finished.connect(self.on_finished)
@@ -275,20 +231,13 @@ class MainWindow(QMainWindow):
 
     def on_finished(self, err, scraper, duration):
         self.search_btn.setEnabled(True)
-        self.count_spin.setEnabled(True) # 任务结束后恢复数量输入
-        if scraper: self.active_scraper = scraper
-        
+        self.count_spin.setEnabled(True)
         if not err:
             self.status_label.setText("任务完成")
-            # 自动打开目标文件
             if hasattr(self, 'current_output_file') and os.path.exists(self.current_output_file):
-                try:
-                    os.startfile(self.current_output_file)
-                    self.log_output.append(f"<font color='blue'>[系统] 已自动打开结果文件</font>")
-                except Exception as e:
-                    self.log_output.append(f"<font color='orange'>[警告] 无法自动打开文件: {e}</font>")
-        else:
-            self.status_label.setText("异常终止")
+                try: os.startfile(self.current_output_file)
+                except: pass
+        else: self.status_label.setText("异常终止")
 
 if __name__ == "__main__":
     app = QApplication(sys.argv)

+ 55 - 117
src/scraper.py

@@ -24,22 +24,15 @@ class Scraper1688:
     def __init__(self, headless=True, status_callback=None, log_callback=None):
         self.headless = headless
         self.status_callback = status_callback
-        self.log_callback = log_callback # 用于向 GUI 发送普通日志
-        # 使用全新的独立目录,避开锁定冲突
+        self.log_callback = log_callback
         self.user_data_path = os.path.abspath(os.path.join(os.getcwd(), "chrome_stable_profile"))
         self.driver = None
-        
-        # 1. 强制清理残留,解决 ConnectionResetError
         self._cleanup()
-        
-        # 2. 启动浏览器
         self._init_chrome(headless)
-        
         if self.driver:
             stealth(self.driver, languages=["zh-CN", "zh"], vendor="Google Inc.", platform="Win32", fix_hairline=True)
 
     def _find_chrome(self):
-        """ 通过注册表寻找 Chrome 精准安装路径 """
         import winreg
         reg_paths = [
             (winreg.HKEY_LOCAL_MACHINE, r"SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\chrome.exe"),
@@ -54,14 +47,10 @@ class Scraper1688:
         return None
 
     def _cleanup(self):
-        """ 杀掉所有残留进程,确保端口和文件未被锁定 """
         if os.name == 'nt':
             for proc in ['chrome.exe', 'chromedriver.exe']:
-                try:
-                    subprocess.call(['taskkill', '/F', '/IM', proc, '/T'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+                try: subprocess.call(['taskkill', '/F', '/IM', proc, '/T'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
                 except: pass
-        
-        # 清理锁定文件
         if os.path.exists(self.user_data_path):
             for root, _, files in os.walk(self.user_data_path):
                 for f in files:
@@ -70,9 +59,7 @@ class Scraper1688:
                         except: pass
 
     def _init_chrome(self, headless):
-        """ 强化版启动:解决浏览器不弹出及连接重置报错 """
         chrome_path = self._find_chrome()
-        
         def create_options():
             opts = uc.ChromeOptions()
             opts.add_argument(f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36')
@@ -80,55 +67,26 @@ class Scraper1688:
             if headless: opts.add_argument('--headless=new')
             opts.add_argument('--disable-blink-features=AutomationControlled')
             opts.add_argument("--window-size=1920,1080")
-            # 兼容性全家桶
             opts.add_argument("--no-sandbox")
             opts.add_argument("--disable-dev-shm-usage")
             opts.add_argument("--remote-allow-origins=*")
-            opts.add_argument("--no-first-run")
-            opts.add_argument("--no-default-browser-check")
             return opts
-
-        print(f"[*] 正在物理启动 Chrome: {chrome_path}")
         try:
-            # 增加 use_subprocess=True,显著提升在 Win11 下的连接稳定性
-            self.driver = uc.Chrome(
-                options=create_options(), 
-                headless=headless, 
-                browser_executable_path=chrome_path,
-                use_subprocess=True
-            )
-            print("[+] Chrome 浏览器已成功弹出!")
+            self.driver = uc.Chrome(options=create_options(), headless=headless, browser_executable_path=chrome_path, use_subprocess=True)
         except Exception as e:
-            print(f"[*] 首次启动失败 ({e}),尝试自动兼容模式...")
-            try:
-                self._cleanup()
-                time.sleep(2)
-                # 兜底方案:不使用 subprocess
-                self.driver = uc.Chrome(options=create_options(), headless=headless)
-                print("[+] 自动兼容模式启动成功!")
-            except Exception as e2:
-                print(f"[致命错误] 无法启动 Chrome: {e2}")
-                raise Exception("无法拉起 Chrome,请尝试关闭杀毒软件或重新安装 Chrome。")
+            self.driver = uc.Chrome(options=create_options(), headless=headless, use_subprocess=True)
 
     def clean_url(self, url):
-        """ 极其鲁棒的 ID 提取逻辑,强制转化为详情页链接,过滤店铺页 """
         if not url: return ""
         if url.startswith("//"): url = "https:" + url
-        
-        # 1. 尝试从各种路径模式中提取纯数字商品 ID
         id_match = re.search(r'offer(?:Id|Ids)?/(\d+)\.html', url) or \
                    re.search(r'[?&](?:offerId|offerIds|id)=(\d+)', url) or \
                    re.search(r'object_id@(\d+)', url)
-        
         if id_match:
-            # 只有提取到 ID 的才被认为是商品,统一转化为标准详情页格式
             return f"https://detail.1688.com/offer/{id_match.group(1)}.html"
-        
-        # 2. 如果没提取到 ID(说明是店铺首页、广告页等),返回空以过滤掉
         return ""
 
     def check_for_captcha(self):
-        """ 检测登录、滑块、验证等状态 """
         def is_blocked():
             try:
                 url, src, title = self.driver.current_url.lower(), self.driver.page_source.lower(), self.driver.title.lower()
@@ -138,16 +96,12 @@ class Scraper1688:
                 is_punish = "punish" in url or "哎哟喂" in src or "验证码" in title
                 return is_slider or is_login or is_punish
             except: return False
-        
         if is_blocked():
             msg = "请登录验证"
             if self.status_callback: self.status_callback(True, msg)
             while is_blocked(): time.sleep(3)
             if self.status_callback: self.status_callback(False, "验证通过")
-            
-            cool_msg = "[*] 解封成功,进入 120 秒冷却期以规避风控追溯..."
-            if self.log_callback: self.log_callback(f"<font color='orange'>{cool_msg}</font>")
-            time.sleep(120) 
+            time.sleep(3)
         return True
 
     def search_products_yield(self, keyword, total_count=200, existing_links=None):
@@ -155,7 +109,6 @@ class Scraper1688:
         base_url = f"https://s.1688.com/selloffer/offer_search.htm?keywords={gbk_keyword}&n=y&netType=1%2C11%2C16"
         self.driver.get("https://www.1688.com")
         self.check_for_captcha()
-        
         all_links = existing_links if existing_links is not None else set()
         page, initial_count = 1, len(all_links)
         
@@ -163,8 +116,8 @@ class Scraper1688:
             print(f"[*] 正在处理列表页: 第 {page} 页...")
             self.driver.get(f"{base_url}&beginPage={page}&page={page}")
             self.check_for_captcha()
-            for i in range(1, 5):
-                self.driver.execute_script(f"window.scrollTo(0, document.body.scrollHeight * {i/4});")
+            for i in range(1, 6):
+                self.driver.execute_script(f"window.scrollTo(0, document.body.scrollHeight * {i/5});")
                 time.sleep(1.5)
 
             page_results = self._extract_all_methods()
@@ -173,23 +126,24 @@ class Scraper1688:
             page_batch = []
             for it in page_results:
                 clean_url = self.clean_url(it["link"])
-                # 核心改进:只处理清洗成功的商品链接
                 if clean_url and clean_url not in all_links:
                     all_links.add(clean_url)
                     print(f"  [>] 抓取详情: {clean_url}")
                     detail_results = self.scrape_detail(clean_url)
-                    if detail_results: page_batch.extend(detail_results)
-                    else: page_batch.append({
-                        "category": "", "brand": "", "name": it["name"],
-                        "color": "", "spec": "", "material": "", "price": it.get("price", ""),
-                        "moq": "", "wholesale_price": "", "link": clean_url, "supplier": ""
-                    })
+                    if detail_results:
+                        page_batch.extend(detail_results)
+                    else:
+                        page_batch.append({
+                            "category": "", "brand": "", "name": it.get("name", ""),
+                            "color": "", "spec": "", "material": "", "price": "",
+                            "moq": "", "wholesale_price": "", "link": clean_url, "supplier": ""
+                        })
                     
                     if len(page_batch) >= 10:
                         yield page_batch
                         page_batch = []
                     
-                    time.sleep(random.uniform(15, 30)) # 保持慢速,确保长效稳定
+                    time.sleep(random.uniform(15, 30))
                     if len(all_links) >= total_count + initial_count: break
             
             if page_batch: yield page_batch
@@ -200,13 +154,16 @@ class Scraper1688:
         return list(all_links)
 
     def scrape_detail(self, url):
-        """ 极其精准的详情页解析:获取 expand-view-list-wrapper 中的款式描述 + 逐条价格 """
+        """ 深度提取变体逻辑:款式描述 + 逐条价格 """
         try:
             self.driver.get(url)
             time.sleep(random.uniform(5, 10))
             self.check_for_captcha()
             model = self.driver.execute_script(
-                "return (window.context && window.context.result && window.context.result.global && window.context.result.global.globalData && window.context.result.global.globalData.model) || window.__INITIAL_DATA__ || window.iDetailData || window.iDetailConfig || null;"
+                "return (window.context && window.context.result && "
+                "window.context.result.global && window.context.result.global.globalData "
+                "&& window.context.result.global.globalData.model) || "
+                "window.__INITIAL_DATA__ || window.iDetailData || window.iDetailConfig || null;"
             )
             if not model: return None
 
@@ -221,19 +178,15 @@ class Scraper1688:
                 except: pass
                 return ""
 
-            def safe_text(by, sel):
-                try: return self.driver.find_element(by, sel).text.strip()
-                except: return ""
-
             trade = model.get("tradeModel", {}) if isinstance(model, dict) else {}
             ranges = trade.get("disPriceRanges") or trade.get("currentPrices") or []
             range_text = " / ".join([f"{r.get('beginAmount')}起 ¥{r.get('price') or r.get('discountPrice')}" for r in ranges])
 
             base_data = {
-                "category": (model.get("offerDetail", {}).get("leafCategoryName", "") if isinstance(model, dict) else "") or safe_text(By.CSS_SELECTOR, "div[class*=breadcrumb] a:last-child"),
+                "category": (model.get("offerDetail", {}).get("leafCategoryName", "") if isinstance(model, dict) else "") or self.driver.find_element(By.CSS_SELECTOR, "div[class*=breadcrumb] a:last-child").text.strip(),
                 "brand": get_attr("品牌"),
                 "name": (model.get("offerDetail", {}).get("subject", "") if isinstance(model, dict) else "") or self.driver.title.split('-')[0],
-                "spec": get_attr("尺码") or get_attr("规格") or get_attr("型号") or safe_text(By.XPATH, "//div[@id='productAttributes']//th[span='尺码' or span='规格']/following-sibling::td[1]//span[@class='field-value']"),
+                "spec": get_attr("尺码") or get_attr("规格") or get_attr("型号"),
                 "material": get_attr("材质") or get_attr("面料"),
                 "price": "", 
                 "moq": trade.get("beginAmount", ""),
@@ -244,21 +197,14 @@ class Scraper1688:
 
             variant_data_list = []
             try:
-                # 1. 核心需求:从 expand-view-list-wrapper 中提取文字和价格
                 wrappers = self.driver.find_elements(By.CLASS_NAME, "expand-view-list-wrapper")
                 if wrappers:
-                    # 寻找容器下的条目
                     items = wrappers[0].find_elements(By.CSS_SELECTOR, ".expand-view-list-item, [class*='list-item'], .sku-item")
                     for item_el in items:
                         try:
-                            # 款式描述文字 (item-label)
                             label = item_el.find_element(By.CLASS_NAME, "item-label").text.strip()
-                            # 逐条价格 (item-price-stock)
-                            price_raw = item_el.find_element(By.CLASS_NAME, "item-price-stock").text.strip()
-                            # 清洗价格,只保留数字
-                            price_clean = re.sub(r'[^\d.]', '', price_raw)
-                            if label:
-                                variant_data_list.append({"label": label, "price": price_clean})
+                            price = item_el.find_element(By.CLASS_NAME, "item-price-stock").text.strip()
+                            if label: variant_data_list.append({"label": label, "price": re.sub(r'[^\d.]', '', price)})
                         except: continue
             except: pass
 
@@ -270,51 +216,43 @@ class Scraper1688:
                     row["price"] = vd["price"]
                     results.append(row)
                 return results
-            
-            # 2. 方案 B: 如果 DOM 探测失败,回退到 JS 模型
-            sku_props = model.get("skuModel", {}).get("skuProps", []) or model.get("detailData", {}).get("skuProps", []) or []
-            main_prop = next((p for p in sku_props if any(k in p.get("prop", "") for k in ["颜色", "分类", "款式", "花色", "净含量"])), None)
-            if not main_prop and sku_props: main_prop = sku_props[0]
-            if main_prop and main_prop.get("value"):
-                results = []
-                for val in main_prop["value"]:
-                    if val.get("name"):
-                        row = base_data.copy()
-                        row["color"] = val.get("name")
-                        row["price"] = trade.get("minPrice", "")
-                        results.append(row)
-                return results
-            
-            base_data["price"] = trade.get("minPrice", "")
             return [base_data]
         except: return None
 
     def _extract_all_methods(self):
-        """ 列表页提取 """
+        """ 强化版:对标 req.py 的 JS 变量探测 """
         results = []
-        try:
-            res = self.driver.execute_script("return JSON.stringify(window.data || window.__INITIAL_DATA__)")
-            if res:
-                data = json.loads(res)
-                def find_list(obj):
-                    if isinstance(obj, list) and len(obj) > 0 and ('title' in obj[0] or 'offerId' in obj[0]): return obj
-                    if isinstance(obj, dict):
-                        for k in obj:
-                            f = find_list(obj[k])
-                            if f: return f
-                    return None
-                for o in (find_list(data) or []):
-                    link = o.get('itemUrl', o.get('url', ''))
-                    if link and "similar_search" not in link:
-                        results.append({"name": str(o.get('title', '')), "link": link})
-        except: pass
-        if not results:
-            # 引入最新版 1688 选择器,确保能抓到详情链接
-            for s in [".sm-offer-item", ".offer-card-item", ".search-offer-item", "[class*='offer-card']", ".offer-item"]:
-                for el in self.driver.find_elements(By.CSS_SELECTOR, s):
+        scripts = [
+            "return JSON.stringify(window.data || window.context?.result?.data || window.__INITIAL_DATA__)",
+            "return JSON.stringify(window.context?.result?.global?.globalData?.data || null)"
+        ]
+        for s in scripts:
+            try:
+                res = self.driver.execute_script(s)
+                if res and res != "null":
+                    data = json.loads(res)
+                    def find_list(obj):
+                        if isinstance(obj, list) and len(obj) > 0 and isinstance(obj[0], dict) and any(k in obj[0] for k in ['offerId', 'title', 'subject']): return obj
+                        if isinstance(obj, dict):
+                            for k in obj:
+                                f = find_list(obj[k])
+                                if f: return f
+                        return None
+                    for o in (find_list(data) or []):
+                        link = o.get('itemUrl', o.get('url', ''))
+                        if link and "similar_search" not in link:
+                            results.append({"name": str(o.get('title', o.get('subject', ''))), "link": link})
+                    if results: return results
+            except: continue
+        
+        selectors = [".sm-offer-item", ".offer-card-item", ".search-offer-item", "[class*='offer-card']", ".offer-item"]
+        for s in selectors:
+            elements = self.driver.find_elements(By.CSS_SELECTOR, s)
+            if len(elements) > 2:
+                for el in elements:
                     try:
                         link = el.find_element(By.TAG_NAME, "a").get_attribute("href")
-                        if link and ("offer" in link or "item" in link) and "similar_search" not in link:
+                        if link and "similar_search" not in link:
                             results.append({"name": el.text.split('\n')[0][:50], "link": link})
                     except: continue
                 if results: break