LuTong hai 2 meses
pai
achega
6428a97b08
Modificáronse 1 ficheiros con 39 adicións e 19 borrados
  1. 39 19
      src/scraper.py

+ 39 - 19
src/scraper.py

@@ -210,33 +210,53 @@ class Scraper1688:
                 "supplier": (model.get("sellerModel", {}).get("companyName", "") if isinstance(model, dict) else ""),
             }
 
+            # --- 核心逻辑订正:精准识别变体区域并拆分多行数据 ---
             variant_results = []
             try:
-                # 按照用户提供的线索,精准锁定变体容器
+                # 按照用户提供的线索,锁定核心容器
+                # 兼容 expand-view-list 和 expand-view-list-wrapper
                 wrappers = self.driver.find_elements(By.CSS_SELECTOR, ".expand-view-list, .expand-view-list-wrapper")
                 if wrappers:
-                    # 寻找每一个变体子项条目
-                    items = wrappers[0].find_elements(By.CSS_SELECTOR, ".expand-view-list-item, [class*='list-item'], .sku-item")
-                    for item_el in items:
-                        try:
-                            # 描述文字文字 (item-label) -> 颜色列
-                            label = item_el.find_element(By.CLASS_NAME, "item-label").text.strip()
-                            # 逐条对应的价格 (item-price-stock) -> 单品进价列
-                            price_raw = item_el.find_element(By.CLASS_NAME, "item-price-stock").text.strip()
-                            # 价格清洗
-                            price_clean = re.sub(r'[^\d.]', '', price_raw)
-                            
-                            if label:
+                    # 1. 尝试直接获取所有 label 和 price 的对
+                    labels = wrappers[0].find_elements(By.CLASS_NAME, "item-label")
+                    prices = wrappers[0].find_elements(By.CLASS_NAME, "item-price-stock")
+                    
+                    if labels and prices and len(labels) == len(prices):
+                        for i in range(len(labels)):
+                            l_text = labels[i].text.strip()
+                            p_text = prices[i].text.strip()
+                            # 价格清洗:只保留数字和小数点
+                            p_clean = re.sub(r'[^\d.]', '', p_text)
+                            if l_text:
                                 row = base_data.copy()
-                                row["color"] = label
-                                row["price"] = price_clean
-                                # 如果 spec 还没拿,就把款式描述填入规格
-                                if not row["spec"]: row["spec"] = label
+                                row["color"] = l_text # 款式描述 -> 颜色列
+                                row["price"] = p_clean if p_clean else p_text # 逐条价格 -> 单品进价列
+                                if not row["spec"]: row["spec"] = l_text # 规格也同步填充
                                 variant_results.append(row)
-                        except: continue
-            except: pass
+                    
+                    # 2. 如果数量对不上,尝试按照子容器逐项提取
+                    if not variant_results:
+                        items = wrappers[0].find_elements(By.CSS_SELECTOR, ".expand-view-list-item, [class*='list-item'], .sku-item")
+                        for item_el in items:
+                            try:
+                                l_el = item_el.find_element(By.CLASS_NAME, "item-label")
+                                p_el = item_el.find_element(By.CLASS_NAME, "item-price-stock")
+                                if l_el and p_el:
+                                    l_text = l_el.text.strip()
+                                    p_text = p_el.text.strip()
+                                    p_clean = re.sub(r'[^\d.]', '', p_text)
+                                    if l_text:
+                                        row = base_data.copy()
+                                        row["color"] = l_text
+                                        row["price"] = p_clean if p_clean else p_text
+                                        if not row["spec"]: row["spec"] = l_text
+                                        variant_results.append(row)
+                            except: continue
+            except Exception as e:
+                print(f"  [!] 变体区域解析异常: {e}")
 
             if variant_results:
+                print(f"  [+] 成功解析到 {len(variant_results)} 个款式变体")
                 return variant_results
             return [base_data]
         except: return None