4 mēneši atpakaļ · 68a82bb4ee
--- a/src/excel_handler.py
+++ b/src/excel_handler.py
@@ -6,9 +6,7 @@ from openpyxl import load_workbook
 
				 def get_resource_path(relative_path):
			
 
				     """ 获取资源绝对路径，兼容开发环境和 PyInstaller 打包环境 """
			
 
				     if hasattr(sys, '_MEIPASS'):
			
 
				-        # PyInstaller 打包后的临时解压路径
			
 
				         return os.path.join(sys._MEIPASS, relative_path)
			
 
				-    # 开发环境下的路径
			
 
				     base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
			
 
				     return os.path.join(base_dir, relative_path)
			
 
				 
			
@@ -41,7 +39,6 @@ def get_existing_info(file_path):
 
				 def append_to_template(products, output_path, status_callback=None):
			
 
				     """
			
 
				     将产品数据追加写入到指定的 Excel 文件中。
			
 
				-    增加文件占用检测：如果文件被打开，则暂停任务直到关闭。
			
 
				     并在第二个 Sheet 中记录商品总数。
			
 
				     """
			
 
				     template_path = get_resource_path(os.path.join('templates', '【进价】产品信息空表.xlsx'))
			
@@ -58,10 +55,9 @@ def append_to_template(products, output_path, status_callback=None):
 
				         os.makedirs(os.path.dirname(output_path), exist_ok=True)
			
 
				         wb = load_workbook(template_path)
			
 
				     
			
 
				-    # 1. 写入主数据 Sheet
			
 
				     ws = wb.active
			
 
				     
			
 
				-    # 寻找起始行 (基于第 11 列“产品链接”进行判定，防止覆盖)
			
 
				+    # 寻找起始行 (基于第 11 列“产品链接”进行判定)
			
 
				     start_row = 3
			
 
				     for r in range(3, ws.max_row + 2):
			
 
				         val_link = ws.cell(row=r, column=11).value
			
@@ -71,11 +67,11 @@ def append_to_template(products, output_path, status_callback=None):
 
				     else:
			
 
				         start_row = ws.max_row + 1
			
 
				     
			
 
				-    # 获取当前已有的链接集合，用于后续统计唯一商品
			
 
				-    existing_links = set()
			
 
				+    # 获取当前已有的链接集合用于统计
			
 
				+    current_links = set()
			
 
				     for r in range(3, start_row):
			
 
				         link = ws.cell(row=r, column=11).value
			
 
				-        if link: existing_links.add(str(link).strip())
			
 
				+        if link: current_links.add(str(link).strip())
			
 
				 
			
 
				     for i, product in enumerate(products):
			
 
				         row = start_row + i
			
@@ -92,33 +88,22 @@ def append_to_template(products, output_path, status_callback=None):
 
				         ws.cell(row=row, column=11, value=product.get('link', '')) 
			
 
				         ws.cell(row=row, column=12, value=product.get('supplier', ''))
			
 
				         
			
 
				-        link = product.get('link')
			
 
				-        if link: existing_links.add(str(link).strip())
			
 
				+        if product.get('link'): current_links.add(str(product['link']).strip())
			
 
				 
			
 
				-    # 2. 写入/更新计数 Sheet (第二个 Sheet)
			
 
				-    try:
			
 
				-        sheet_names = wb.sheetnames
			
 
				-        if "统计状态" not in sheet_names:
			
 
				-            wb.create_sheet("统计状态")
			
 
				-        
			
 
				-        ws_stat = wb["统计状态"]
			
 
				-        ws_stat.cell(row=1, column=1, value="已解析商品总数")
			
 
				-        ws_stat.cell(row=1, column=2, value=len(existing_links))
			
 
				-        ws_stat.cell(row=2, column=1, value="最后更新时间")
			
 
				-        ws_stat.cell(row=2, column=2, value=time.strftime("%Y-%m-%d %H:%M:%S"))
			
 
				-    except Exception as e:
			
 
				-        print(f"[!] 统计状态 Sheet 更新失败: {e}")
			
 
				+    # 写入/更新统计 Sheet
			
 
				+    if "统计状态" not in wb.sheetnames:
			
 
				+        wb.create_sheet("统计状态")
			
 
				+    ws_stat = wb["统计状态"]
			
 
				+    ws_stat.cell(row=1, column=1, value="已解析商品总数")
			
 
				+    ws_stat.cell(row=1, column=2, value=len(current_links))
			
 
				+    ws_stat.cell(row=2, column=1, value="最后更新时间")
			
 
				+    ws_stat.cell(row=2, column=2, value=time.strftime("%Y-%m-%d %H:%M:%S"))
			
 
				 
			
 
				-    # 3. 占用检测保存循环
			
 
				     while True:
			
 
				         try:
			
 
				             wb.save(output_path)
			
 
				-            if status_callback:
			
 
				-                status_callback(False, "写入成功")
			
 
				+            if status_callback: status_callback(False, "写入成功")
			
 
				             break
			
 
				         except PermissionError:
			
 
				-            msg = "文件被占用，请关闭 Excel"
			
 
				-            print(f"[!] {msg}: {output_path}")
			
 
				-            if status_callback:
			
 
				-                status_callback(True, msg)
			
 
				+            if status_callback: status_callback(True, "文件被占用，请关闭 Excel")
			
 
				             time.sleep(3)
			
--- a/src/gui.py
+++ b/src/gui.py
@@ -13,7 +13,6 @@ from src.scraper import Scraper1688
 
				 from src.excel_handler import append_to_template, get_existing_info
			
 
				 
			
 
				 def get_resource_path(relative_path):
			
 
				-    """ 获取资源绝对路径，兼容开发环境和 PyInstaller 打包环境 """
			
 
				     if hasattr(sys, '_MEIPASS'):
			
 
				         return os.path.join(sys._MEIPASS, relative_path)
			
 
				     return os.path.join(os.getcwd(), relative_path)
			
@@ -21,7 +20,6 @@ def get_resource_path(relative_path):
 
				 class ScraperThread(QThread):
			
 
				     progress = pyqtSignal(int)
			
 
				     log = pyqtSignal(str)
			
 
				-    # finished 信号增加耗时参数 (秒)
			
 
				     finished = pyqtSignal(str, object, float)
			
 
				 
			
 
				     def __init__(self, keyword, output_path, total_count, headless=True):
			
@@ -35,53 +33,39 @@ class ScraperThread(QThread):
 
				         scraper = None
			
 
				         start_time = time.time()
			
 
				         try:
			
 
				-            # 读取已抓取的链接，实现断点续爬
			
 
				             existing_links, _ = get_existing_info(self.output_path)
			
 
				-            if existing_links:
			
 
				-                self.log.emit(f"[*] 发现已有记录: {len(existing_links)} 条，将从新记录开始搜索...")
			
 
				-
			
 
				-            self.log.emit(f"<b>[*] 任务启动: {self.keyword}</b>")
			
 
				-            
			
 
				-            def status_cb(is_waiting, msg):
			
 
				-                if is_waiting:
			
 
				-                    self.log.emit(f"<font color='red' size='5'><b>!!! {msg} !!!</b></font>")
			
 
				-                else:
			
 
				-                    self.log.emit(f"<font color='green'><b>[√] {msg}</b></font>")
			
 
				-
			
 
				-            scraper = Scraper1688(headless=self.headless, status_callback=status_cb)
			
 
				             
			
 
				-            # 使用流式生成器抓取
			
 
				-            collected_count = 0
			
 
				-            
			
 
				-            # 记录初始商品数，用于断点续记
			
 
				             initial_p_count = 0
			
 
				             if os.path.exists(self.output_path):
			
 
				                 try:
			
 
				                     import openpyxl
			
 
				                     wb_tmp = openpyxl.load_workbook(self.output_path, data_only=True)
			
 
				-                    # 关键修改：先检查 Sheet 是否存在，避免 KeyError
			
 
				                     if "统计状态" in wb_tmp.sheetnames:
			
 
				                         val = wb_tmp["统计状态"].cell(row=1, column=2).value
			
 
				                         initial_p_count = int(val) if val is not None else 0
			
 
				                     wb_tmp.close()
			
 
				-                except Exception as e:
			
 
				-                    print(f"[*] 读取初始商品计数失败 (可能文件尚不包含统计页): {e}")
			
 
				-                    initial_p_count = 0
			
 
				+                except: pass
			
 
				+
			
 
				+            self.log.emit(f"<b>[*] 任务启动: {self.keyword}</b>")
			
 
				+            
			
 
				+            def status_cb(is_waiting, msg):
			
 
				+                if is_waiting: self.log.emit(f"<font color='red' size='5'><b>!!! {msg} !!!</b></font>")
			
 
				+                else: self.log.emit(f"<font color='green'><b>[√] {msg}</b></font>")
			
 
				 
			
 
				+            scraper = Scraper1688(headless=self.headless, status_callback=status_cb, log_callback=self.log.emit)
			
 
				+            
			
 
				+            collected_count = 0
			
 
				             product_index = initial_p_count
			
 
				             
			
 
				             for batch_results in scraper.search_products_yield(self.keyword, total_count=self.total_count, existing_links=existing_links):
			
 
				-                # 实时写入 Excel (此时 batch_results 为 10 条或页末余数)
			
 
				                 append_to_template(batch_results, self.output_path, status_callback=status_cb)
			
 
				                 
			
 
				-                # 计算本批次包含的独立商品数量并累加
			
 
				-                unique_links_in_batch = len(set(item.get('link') for item in batch_results if item.get('link')))
			
 
				-                product_index += unique_links_in_batch
			
 
				+                unique_links = len(set(item.get('link') for item in batch_results if item.get('link')))
			
 
				+                product_index += unique_links
			
 
				                 collected_count += len(batch_results)
			
 
				                 
			
 
				                 self.log.emit(f"[+] 解析到第 {product_index} 个商品，新增数据已持久化: {len(batch_results)} 条，本次共计: {collected_count}")
			
 
				                 
			
 
				-                # 进度条基于本次任务的目标数量
			
 
				                 current_task_done = product_index - initial_p_count
			
 
				                 prog = int((current_task_done / self.total_count) * 100)
			
 
				                 self.progress.emit(min(prog, 100))
			
@@ -109,17 +93,13 @@ class MainWindow(QMainWindow):
 
				     def initUI(self):
			
 
				         self.setWindowTitle("1688 产品信息实时抓取工具 v3.0")
			
 
				         self.setGeometry(100, 100, 1100, 750)
			
 
				-
			
 
				-        # 设置窗口图标
			
 
				         icon_path = get_resource_path("app.ico")
			
 
				-        if os.path.exists(icon_path):
			
 
				-            self.setWindowIcon(QIcon(icon_path))
			
 
				+        if os.path.exists(icon_path): self.setWindowIcon(QIcon(icon_path))
			
 
				 
			
 
				         central_widget = QWidget()
			
 
				         self.setCentralWidget(central_widget)
			
 
				         main_layout = QHBoxLayout(central_widget)
			
 
				 
			
 
				-        # 左侧类目树
			
 
				         left_widget = QWidget()
			
 
				         left_layout = QVBoxLayout(left_widget)
			
 
				         self.load_category_btn = QPushButton("选择类目文件")
			
@@ -133,7 +113,6 @@ class MainWindow(QMainWindow):
 
				         left_layout.addWidget(self.load_category_btn)
			
 
				         left_layout.addWidget(self.category_tree)
			
 
				 
			
 
				-        # 右侧操作区
			
 
				         right_widget = QWidget()
			
 
				         right_layout = QVBoxLayout(right_widget)
			
 
				 
			
@@ -155,8 +134,6 @@ class MainWindow(QMainWindow):
 
				 
			
 
				         action_layout = QHBoxLayout()
			
 
				         self.category_display = QLabel("请选择二级类目")
			
 
				-        
			
 
				-        # 抓取数量配置
			
 
				         count_layout = QHBoxLayout()
			
 
				         self.count_spin = QSpinBox()
			
 
				         self.count_spin.setRange(1, 10000)
			
@@ -166,22 +143,10 @@ class MainWindow(QMainWindow):
 
				         count_layout.addWidget(self.count_spin)
			
 
				         
			
 
				         self.search_btn = QPushButton("开始抓取")
			
 
				-        self.search_btn.setEnabled(False) # 初始置灰，直到选择类目和路径
			
 
				+        self.search_btn.setEnabled(False)
			
 
				         self.search_btn.clicked.connect(self.start_scraping)
			
 
				         self.search_btn.setMinimumHeight(50)
			
 
				-        self.search_btn.setStyleSheet("""
			
 
				-            QPushButton { 
			
 
				-                background-color: #0078d4; 
			
 
				-                color: white; 
			
 
				-                font-weight: bold; 
			
 
				-                font-size: 16px; 
			
 
				-                border-radius: 4px;
			
 
				-            }
			
 
				-            QPushButton:disabled { 
			
 
				-                background-color: #cccccc; 
			
 
				-                color: #888888; 
			
 
				-            }
			
 
				-        """)
			
 
				+        self.search_btn.setStyleSheet("QPushButton { background-color: #0078d4; color: white; font-weight: bold; font-size: 16px; border-radius: 4px; } QPushButton:disabled { background-color: #cccccc; color: #888888; }")
			
 
				         
			
 
				         action_layout.addWidget(QLabel("<font color='red'>*</font>检索类目:"))
			
 
				         action_layout.addWidget(self.category_display, 1)
			
@@ -242,32 +207,23 @@ class MainWindow(QMainWindow):
 
				             if self.output_base_path:
			
 
				                 full_p = os.path.normpath(os.path.join(self.output_base_path, "选品", self.selected_category_1, f"{self.selected_category_2}.xlsx"))
			
 
				                 self.path_display.setText(full_p)
			
 
				-                self.search_btn.setEnabled(True) # 仅在路径和类目都选好时启用按钮
			
 
				+                self.search_btn.setEnabled(True)
			
 
				 
			
 
				     def select_output_path(self):
			
 
				         p = QFileDialog.getExistingDirectory(self, "选择保存根目录")
			
 
				         if p: self.output_base_path = p; self.update_displays()
			
 
				 
			
 
				     def start_scraping(self):
			
 
				-        if not self.selected_category_2 or not self.output_base_path:
			
 
				-            self.log_output.append("<font color='red'>[错误] 请选择类目和输出路径</font>")
			
 
				-            return
			
 
				-        
			
 
				+        if not self.selected_category_2 or not self.output_base_path: return
			
 
				         target_dir = os.path.join(self.output_base_path, "选品", self.selected_category_1)
			
 
				         file_path = os.path.normpath(os.path.join(target_dir, f"{self.selected_category_2}.xlsx"))
			
 
				-        self.current_output_file = file_path # 记录当前文件用于最后打开
			
 
				-        
			
 
				-        # 启动抓取前不再删除旧文件，实现断点续爬功能
			
 
				-
			
 
				+        self.current_output_file = file_path
			
 
				         self.search_btn.setEnabled(False)
			
 
				-        self.count_spin.setEnabled(False) # 任务开始后也禁用数量输入
			
 
				+        self.count_spin.setEnabled(False)
			
 
				         self.status_label.setText("处理中……")
			
 
				         self.log_output.clear()
			
 
				         self.pbar.setValue(0)
			
 
				-        headless = not self.show_browser_cb.isChecked()
			
 
				-        total_count = self.count_spin.value()
			
 
				-        
			
 
				-        self.thread = ScraperThread(self.selected_category_2, file_path, total_count, headless)
			
 
				+        self.thread = ScraperThread(self.selected_category_2, file_path, self.count_spin.value(), not self.show_browser_cb.isChecked())
			
 
				         self.thread.log.connect(self.log_output.append)
			
 
				         self.thread.progress.connect(self.pbar.setValue)
			
 
				         self.thread.finished.connect(self.on_finished)
			
@@ -275,20 +231,13 @@ class MainWindow(QMainWindow):
 
				 
			
 
				     def on_finished(self, err, scraper, duration):
			
 
				         self.search_btn.setEnabled(True)
			
 
				-        self.count_spin.setEnabled(True) # 任务结束后恢复数量输入
			
 
				-        if scraper: self.active_scraper = scraper
			
 
				-        
			
 
				+        self.count_spin.setEnabled(True)
			
 
				         if not err:
			
 
				             self.status_label.setText("任务完成")
			
 
				-            # 自动打开目标文件
			
 
				             if hasattr(self, 'current_output_file') and os.path.exists(self.current_output_file):
			
 
				-                try:
			
 
				-                    os.startfile(self.current_output_file)
			
 
				-                    self.log_output.append(f"<font color='blue'>[系统] 已自动打开结果文件</font>")
			
 
				-                except Exception as e:
			
 
				-                    self.log_output.append(f"<font color='orange'>[警告] 无法自动打开文件: {e}</font>")
			
 
				-        else:
			
 
				-            self.status_label.setText("异常终止")
			
 
				+                try: os.startfile(self.current_output_file)
			
 
				+                except: pass
			
 
				+        else: self.status_label.setText("异常终止")
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     app = QApplication(sys.argv)
			
--- a/src/scraper.py
+++ b/src/scraper.py
@@ -24,22 +24,15 @@ class Scraper1688:
 
				     def __init__(self, headless=True, status_callback=None, log_callback=None):
			
 
				         self.headless = headless
			
 
				         self.status_callback = status_callback
			
 
				-        self.log_callback = log_callback # 用于向 GUI 发送普通日志
			
 
				-        # 使用全新的独立目录，避开锁定冲突
			
 
				+        self.log_callback = log_callback
			
 
				         self.user_data_path = os.path.abspath(os.path.join(os.getcwd(), "chrome_stable_profile"))
			
 
				         self.driver = None
			
 
				-        
			
 
				-        # 1. 强制清理残留，解决 ConnectionResetError
			
 
				         self._cleanup()
			
 
				-        
			
 
				-        # 2. 启动浏览器
			
 
				         self._init_chrome(headless)
			
 
				-        
			
 
				         if self.driver:
			
 
				             stealth(self.driver, languages=["zh-CN", "zh"], vendor="Google Inc.", platform="Win32", fix_hairline=True)
			
 
				 
			
 
				     def _find_chrome(self):
			
 
				-        """ 通过注册表寻找 Chrome 精准安装路径 """
			
 
				         import winreg
			
 
				         reg_paths = [
			
 
				             (winreg.HKEY_LOCAL_MACHINE, r"SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\chrome.exe"),
			
@@ -54,14 +47,10 @@ class Scraper1688:
 
				         return None
			
 
				 
			
 
				     def _cleanup(self):
			
 
				-        """ 杀掉所有残留进程，确保端口和文件未被锁定 """
			
 
				         if os.name == 'nt':
			
 
				             for proc in ['chrome.exe', 'chromedriver.exe']:
			
 
				-                try:
			
 
				-                    subprocess.call(['taskkill', '/F', '/IM', proc, '/T'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
			
 
				+                try: subprocess.call(['taskkill', '/F', '/IM', proc, '/T'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
			
 
				                 except: pass
			
 
				-        
			
 
				-        # 清理锁定文件
			
 
				         if os.path.exists(self.user_data_path):
			
 
				             for root, _, files in os.walk(self.user_data_path):
			
 
				                 for f in files:
			
@@ -70,9 +59,7 @@ class Scraper1688:
 
				                         except: pass
			
 
				 
			
 
				     def _init_chrome(self, headless):
			
 
				-        """ 强化版启动：解决浏览器不弹出及连接重置报错 """
			
 
				         chrome_path = self._find_chrome()
			
 
				-        
			
 
				         def create_options():
			
 
				             opts = uc.ChromeOptions()
			
 
				             opts.add_argument(f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36')
			
@@ -80,55 +67,26 @@ class Scraper1688:
 
				             if headless: opts.add_argument('--headless=new')
			
 
				             opts.add_argument('--disable-blink-features=AutomationControlled')
			
 
				             opts.add_argument("--window-size=1920,1080")
			
 
				-            # 兼容性全家桶
			
 
				             opts.add_argument("--no-sandbox")
			
 
				             opts.add_argument("--disable-dev-shm-usage")
			
 
				             opts.add_argument("--remote-allow-origins=*")
			
 
				-            opts.add_argument("--no-first-run")
			
 
				-            opts.add_argument("--no-default-browser-check")
			
 
				             return opts
			
 
				-
			
 
				-        print(f"[*] 正在物理启动 Chrome: {chrome_path}")
			
 
				         try:
			
 
				-            # 增加 use_subprocess=True，显著提升在 Win11 下的连接稳定性
			
 
				-            self.driver = uc.Chrome(
			
 
				-                options=create_options(), 
			
 
				-                headless=headless, 
			
 
				-                browser_executable_path=chrome_path,
			
 
				-                use_subprocess=True
			
 
				-            )
			
 
				-            print("[+] Chrome 浏览器已成功弹出！")
			
 
				+            self.driver = uc.Chrome(options=create_options(), headless=headless, browser_executable_path=chrome_path, use_subprocess=True)
			
 
				         except Exception as e:
			
 
				-            print(f"[*] 首次启动失败 ({e})，尝试自动兼容模式...")
			
 
				-            try:
			
 
				-                self._cleanup()
			
 
				-                time.sleep(2)
			
 
				-                # 兜底方案：不使用 subprocess
			
 
				-                self.driver = uc.Chrome(options=create_options(), headless=headless)
			
 
				-                print("[+] 自动兼容模式启动成功！")
			
 
				-            except Exception as e2:
			
 
				-                print(f"[致命错误] 无法启动 Chrome: {e2}")
			
 
				-                raise Exception("无法拉起 Chrome，请尝试关闭杀毒软件或重新安装 Chrome。")
			
 
				+            self.driver = uc.Chrome(options=create_options(), headless=headless, use_subprocess=True)
			
 
				 
			
 
				     def clean_url(self, url):
			
 
				-        """ 极其鲁棒的 ID 提取逻辑，强制转化为详情页链接，过滤店铺页 """
			
 
				         if not url: return ""
			
 
				         if url.startswith("//"): url = "https:" + url
			
 
				-        
			
 
				-        # 1. 尝试从各种路径模式中提取纯数字商品 ID
			
 
				         id_match = re.search(r'offer(?:Id|Ids)?/(\d+)\.html', url) or \
			
 
				                    re.search(r'[?&](?:offerId|offerIds|id)=(\d+)', url) or \
			
 
				                    re.search(r'object_id@(\d+)', url)
			
 
				-        
			
 
				         if id_match:
			
 
				-            # 只有提取到 ID 的才被认为是商品，统一转化为标准详情页格式
			
 
				             return f"https://detail.1688.com/offer/{id_match.group(1)}.html"
			
 
				-        
			
 
				-        # 2. 如果没提取到 ID（说明是店铺首页、广告页等），返回空以过滤掉
			
 
				         return ""
			
 
				 
			
 
				     def check_for_captcha(self):
			
 
				-        """ 检测登录、滑块、验证等状态 """
			
 
				         def is_blocked():
			
 
				             try:
			
 
				                 url, src, title = self.driver.current_url.lower(), self.driver.page_source.lower(), self.driver.title.lower()
			
@@ -138,16 +96,12 @@ class Scraper1688:
 
				                 is_punish = "punish" in url or "哎哟喂" in src or "验证码" in title
			
 
				                 return is_slider or is_login or is_punish
			
 
				             except: return False
			
 
				-        
			
 
				         if is_blocked():
			
 
				             msg = "请登录验证"
			
 
				             if self.status_callback: self.status_callback(True, msg)
			
 
				             while is_blocked(): time.sleep(3)
			
 
				             if self.status_callback: self.status_callback(False, "验证通过")
			
 
				-            
			
 
				-            cool_msg = "[*] 解封成功，进入 120 秒冷却期以规避风控追溯..."
			
 
				-            if self.log_callback: self.log_callback(f"<font color='orange'>{cool_msg}</font>")
			
 
				-            time.sleep(120) 
			
 
				+            time.sleep(3)
			
 
				         return True
			
 
				 
			
 
				     def search_products_yield(self, keyword, total_count=200, existing_links=None):
			
@@ -155,7 +109,6 @@ class Scraper1688:
 
				         base_url = f"https://s.1688.com/selloffer/offer_search.htm?keywords={gbk_keyword}&n=y&netType=1%2C11%2C16"
			
 
				         self.driver.get("https://www.1688.com")
			
 
				         self.check_for_captcha()
			
 
				-        
			
 
				         all_links = existing_links if existing_links is not None else set()
			
 
				         page, initial_count = 1, len(all_links)
			
 
				         
			
@@ -163,8 +116,8 @@ class Scraper1688:
 
				             print(f"[*] 正在处理列表页: 第 {page} 页...")
			
 
				             self.driver.get(f"{base_url}&beginPage={page}&page={page}")
			
 
				             self.check_for_captcha()
			
 
				-            for i in range(1, 5):
			
 
				-                self.driver.execute_script(f"window.scrollTo(0, document.body.scrollHeight * {i/4});")
			
 
				+            for i in range(1, 6):
			
 
				+                self.driver.execute_script(f"window.scrollTo(0, document.body.scrollHeight * {i/5});")
			
 
				                 time.sleep(1.5)
			
 
				 
			
 
				             page_results = self._extract_all_methods()
			
@@ -173,23 +126,24 @@ class Scraper1688:
 
				             page_batch = []
			
 
				             for it in page_results:
			
 
				                 clean_url = self.clean_url(it["link"])
			
 
				-                # 核心改进：只处理清洗成功的商品链接
			
 
				                 if clean_url and clean_url not in all_links:
			
 
				                     all_links.add(clean_url)
			
 
				                     print(f"  [>] 抓取详情: {clean_url}")
			
 
				                     detail_results = self.scrape_detail(clean_url)
			
 
				-                    if detail_results: page_batch.extend(detail_results)
			
 
				-                    else: page_batch.append({
			
 
				-                        "category": "", "brand": "", "name": it["name"],
			
 
				-                        "color": "", "spec": "", "material": "", "price": it.get("price", ""),
			
 
				-                        "moq": "", "wholesale_price": "", "link": clean_url, "supplier": ""
			
 
				-                    })
			
 
				+                    if detail_results:
			
 
				+                        page_batch.extend(detail_results)
			
 
				+                    else:
			
 
				+                        page_batch.append({
			
 
				+                            "category": "", "brand": "", "name": it.get("name", ""),
			
 
				+                            "color": "", "spec": "", "material": "", "price": "",
			
 
				+                            "moq": "", "wholesale_price": "", "link": clean_url, "supplier": ""
			
 
				+                        })
			
 
				                     
			
 
				                     if len(page_batch) >= 10:
			
 
				                         yield page_batch
			
 
				                         page_batch = []
			
 
				                     
			
 
				-                    time.sleep(random.uniform(15, 30)) # 保持慢速，确保长效稳定
			
 
				+                    time.sleep(random.uniform(15, 30))
			
 
				                     if len(all_links) >= total_count + initial_count: break
			
 
				             
			
 
				             if page_batch: yield page_batch
			
@@ -200,13 +154,16 @@ class Scraper1688:
 
				         return list(all_links)
			
 
				 
			
 
				     def scrape_detail(self, url):
			
 
				-        """ 极其精准的详情页解析：获取 expand-view-list-wrapper 中的款式描述 + 逐条价格 """
			
 
				+        """ 深度提取变体逻辑：款式描述 + 逐条价格 """
			
 
				         try:
			
 
				             self.driver.get(url)
			
 
				             time.sleep(random.uniform(5, 10))
			
 
				             self.check_for_captcha()
			
 
				             model = self.driver.execute_script(
			
 
				-                "return (window.context && window.context.result && window.context.result.global && window.context.result.global.globalData && window.context.result.global.globalData.model) || window.__INITIAL_DATA__ || window.iDetailData || window.iDetailConfig || null;"
			
 
				+                "return (window.context && window.context.result && "
			
 
				+                "window.context.result.global && window.context.result.global.globalData "
			
 
				+                "&& window.context.result.global.globalData.model) || "
			
 
				+                "window.__INITIAL_DATA__ || window.iDetailData || window.iDetailConfig || null;"
			
 
				             )
			
 
				             if not model: return None
			
 
				 
			
@@ -221,19 +178,15 @@ class Scraper1688:
 
				                 except: pass
			
 
				                 return ""
			
 
				 
			
 
				-            def safe_text(by, sel):
			
 
				-                try: return self.driver.find_element(by, sel).text.strip()
			
 
				-                except: return ""
			
 
				-
			
 
				             trade = model.get("tradeModel", {}) if isinstance(model, dict) else {}
			
 
				             ranges = trade.get("disPriceRanges") or trade.get("currentPrices") or []
			
 
				             range_text = " / ".join([f"{r.get('beginAmount')}起 ¥{r.get('price') or r.get('discountPrice')}" for r in ranges])
			
 
				 
			
 
				             base_data = {
			
 
				-                "category": (model.get("offerDetail", {}).get("leafCategoryName", "") if isinstance(model, dict) else "") or safe_text(By.CSS_SELECTOR, "div[class*=breadcrumb] a:last-child"),
			
 
				+                "category": (model.get("offerDetail", {}).get("leafCategoryName", "") if isinstance(model, dict) else "") or self.driver.find_element(By.CSS_SELECTOR, "div[class*=breadcrumb] a:last-child").text.strip(),
			
 
				                 "brand": get_attr("品牌"),
			
 
				                 "name": (model.get("offerDetail", {}).get("subject", "") if isinstance(model, dict) else "") or self.driver.title.split('-')[0],
			
 
				-                "spec": get_attr("尺码") or get_attr("规格") or get_attr("型号") or safe_text(By.XPATH, "//div[@id='productAttributes']//th[span='尺码' or span='规格']/following-sibling::td[1]//span[@class='field-value']"),
			
 
				+                "spec": get_attr("尺码") or get_attr("规格") or get_attr("型号"),
			
 
				                 "material": get_attr("材质") or get_attr("面料"),
			
 
				                 "price": "", 
			
 
				                 "moq": trade.get("beginAmount", ""),
			
@@ -244,21 +197,14 @@ class Scraper1688:
 
				 
			
 
				             variant_data_list = []
			
 
				             try:
			
 
				-                # 1. 核心需求：从 expand-view-list-wrapper 中提取文字和价格
			
 
				                 wrappers = self.driver.find_elements(By.CLASS_NAME, "expand-view-list-wrapper")
			
 
				                 if wrappers:
			
 
				-                    # 寻找容器下的条目
			
 
				                     items = wrappers[0].find_elements(By.CSS_SELECTOR, ".expand-view-list-item, [class*='list-item'], .sku-item")
			
 
				                     for item_el in items:
			
 
				                         try:
			
 
				-                            # 款式描述文字 (item-label)
			
 
				                             label = item_el.find_element(By.CLASS_NAME, "item-label").text.strip()
			
 
				-                            # 逐条价格 (item-price-stock)
			
 
				-                            price_raw = item_el.find_element(By.CLASS_NAME, "item-price-stock").text.strip()
			
 
				-                            # 清洗价格，只保留数字
			
 
				-                            price_clean = re.sub(r'[^\d.]', '', price_raw)
			
 
				-                            if label:
			
 
				-                                variant_data_list.append({"label": label, "price": price_clean})
			
 
				+                            price = item_el.find_element(By.CLASS_NAME, "item-price-stock").text.strip()
			
 
				+                            if label: variant_data_list.append({"label": label, "price": re.sub(r'[^\d.]', '', price)})
			
 
				                         except: continue
			
 
				             except: pass
			
 
				 
			
@@ -270,51 +216,43 @@ class Scraper1688:
 
				                     row["price"] = vd["price"]
			
 
				                     results.append(row)
			
 
				                 return results
			
 
				-            
			
 
				-            # 2. 方案 B: 如果 DOM 探测失败，回退到 JS 模型
			
 
				-            sku_props = model.get("skuModel", {}).get("skuProps", []) or model.get("detailData", {}).get("skuProps", []) or []
			
 
				-            main_prop = next((p for p in sku_props if any(k in p.get("prop", "") for k in ["颜色", "分类", "款式", "花色", "净含量"])), None)
			
 
				-            if not main_prop and sku_props: main_prop = sku_props[0]
			
 
				-            if main_prop and main_prop.get("value"):
			
 
				-                results = []
			
 
				-                for val in main_prop["value"]:
			
 
				-                    if val.get("name"):
			
 
				-                        row = base_data.copy()
			
 
				-                        row["color"] = val.get("name")
			
 
				-                        row["price"] = trade.get("minPrice", "")
			
 
				-                        results.append(row)
			
 
				-                return results
			
 
				-            
			
 
				-            base_data["price"] = trade.get("minPrice", "")
			
 
				             return [base_data]
			
 
				         except: return None
			
 
				 
			
 
				     def _extract_all_methods(self):
			
 
				-        """ 列表页提取 """
			
 
				+        """ 强化版：对标 req.py 的 JS 变量探测 """
			
 
				         results = []
			
 
				-        try:
			
 
				-            res = self.driver.execute_script("return JSON.stringify(window.data || window.__INITIAL_DATA__)")
			
 
				-            if res:
			
 
				-                data = json.loads(res)
			
 
				-                def find_list(obj):
			
 
				-                    if isinstance(obj, list) and len(obj) > 0 and ('title' in obj[0] or 'offerId' in obj[0]): return obj
			
 
				-                    if isinstance(obj, dict):
			
 
				-                        for k in obj:
			
 
				-                            f = find_list(obj[k])
			
 
				-                            if f: return f
			
 
				-                    return None
			
 
				-                for o in (find_list(data) or []):
			
 
				-                    link = o.get('itemUrl', o.get('url', ''))
			
 
				-                    if link and "similar_search" not in link:
			
 
				-                        results.append({"name": str(o.get('title', '')), "link": link})
			
 
				-        except: pass
			
 
				-        if not results:
			
 
				-            # 引入最新版 1688 选择器，确保能抓到详情链接
			
 
				-            for s in [".sm-offer-item", ".offer-card-item", ".search-offer-item", "[class*='offer-card']", ".offer-item"]:
			
 
				-                for el in self.driver.find_elements(By.CSS_SELECTOR, s):
			
 
				+        scripts = [
			
 
				+            "return JSON.stringify(window.data || window.context?.result?.data || window.__INITIAL_DATA__)",
			
 
				+            "return JSON.stringify(window.context?.result?.global?.globalData?.data || null)"
			
 
				+        ]
			
 
				+        for s in scripts:
			
 
				+            try:
			
 
				+                res = self.driver.execute_script(s)
			
 
				+                if res and res != "null":
			
 
				+                    data = json.loads(res)
			
 
				+                    def find_list(obj):
			
 
				+                        if isinstance(obj, list) and len(obj) > 0 and isinstance(obj[0], dict) and any(k in obj[0] for k in ['offerId', 'title', 'subject']): return obj
			
 
				+                        if isinstance(obj, dict):
			
 
				+                            for k in obj:
			
 
				+                                f = find_list(obj[k])
			
 
				+                                if f: return f
			
 
				+                        return None
			
 
				+                    for o in (find_list(data) or []):
			
 
				+                        link = o.get('itemUrl', o.get('url', ''))
			
 
				+                        if link and "similar_search" not in link:
			
 
				+                            results.append({"name": str(o.get('title', o.get('subject', ''))), "link": link})
			
 
				+                    if results: return results
			
 
				+            except: continue
			
 
				+        
			
 
				+        selectors = [".sm-offer-item", ".offer-card-item", ".search-offer-item", "[class*='offer-card']", ".offer-item"]
			
 
				+        for s in selectors:
			
 
				+            elements = self.driver.find_elements(By.CSS_SELECTOR, s)
			
 
				+            if len(elements) > 2:
			
 
				+                for el in elements:
			
 
				                     try:
			
 
				                         link = el.find_element(By.TAG_NAME, "a").get_attribute("href")
			
 
				-                        if link and ("offer" in link or "item" in link) and "similar_search" not in link:
			
 
				+                        if link and "similar_search" not in link:
			
 
				                             results.append({"name": el.text.split('\n')[0][:50], "link": link})
			
 
				                     except: continue
			
 
				                 if results: break