LuTong 3 месяцев назад
Родитель
Сommit
f0ed327ada
1 измененных файлов с 40 добавлено и 19 удалено
  1. 40 19
      src/scraper.py

+ 40 - 19
src/scraper.py

@@ -215,34 +215,55 @@ class Scraper1688:
                 "supplier": (model.get("sellerModel", {}).get("companyName", "") if isinstance(model, dict) else ""),
             }
 
-            variant_results = []
+            # --- 核心逻辑订正:智能识别规格区域(获取款式名称和逐条价格) ---
+            variant_data_list = []
             try:
-                # 按照用户提供的 class "expand-view-list" 进行锁定
-                wrappers = self.driver.find_elements(By.CSS_SELECTOR, ".expand-view-list, .expand-view-list-wrapper")
-                if wrappers:
-                    # 寻找每一个变体条目
-                    items = wrappers[0].find_elements(By.CSS_SELECTOR, ".expand-view-list-item, [class*='list-item'], .sku-item")
+                # 按照用户提供的线索,优先尝试多种可能的 Class 名
+                selectors = [".expand-view-list", ".expand-view-list-wrapper", ".sku-wrapper", ".obj-sku"]
+                wrapper = None
+                for s in selectors:
+                    found = self.driver.find_elements(By.CSS_SELECTOR, s)
+                    if found and found[0].is_displayed():
+                        wrapper = found[0]
+                        break
+                
+                if wrapper:
+                    # 获取该容器下的所有子项条目
+                    items = wrapper.find_elements(By.CSS_SELECTOR, ".expand-view-list-item, [class*='list-item'], .sku-item, .obj-sku-item")
+                    
                     for item_el in items:
                         try:
                             # 提取款式描述文字 (item-label) -> 对应 Excel “颜色”列
-                            label = item_el.find_element(By.CLASS_NAME, "item-label").text.strip()
+                            label_el = item_el.find_elements(By.CLASS_NAME, "item-label")
                             # 提取逐条价格 (item-price-stock) -> 对应 Excel “单品进价(元)”列
-                            price_raw = item_el.find_element(By.CLASS_NAME, "item-price-stock").text.strip()
-                            # 价格清洗
-                            price_clean = re.sub(r'[^\d.]', '', price_raw)
+                            price_el = item_el.find_elements(By.CLASS_NAME, "item-price-stock")
                             
-                            if label:
-                                row = base_data.copy()
-                                row["color"] = label
-                                row["price"] = price_clean
-                                variant_results.append(row)
+                            if label_el and price_el:
+                                label_text = label_el[0].text.strip()
+                                price_text = price_el[0].text.strip()
+                                # 清洗价格,只保留数字和小数点
+                                price_clean = re.sub(r'[^\d.]', '', price_text)
+                                
+                                if label_text:
+                                    variant_data_list.append({
+                                        "label": label_text,
+                                        "price": price_clean if price_clean else price_text
+                                    })
                         except: continue
-            except: pass
+            except Exception as e:
+                print(f"  [!] DOM 变体解析异常: {e}")
 
-            if variant_results:
-                return variant_results
+            if variant_data_list:
+                print(f"  [+] 成功解析到 {len(variant_data_list)} 个款式变体")
+                results = []
+                for vd in variant_data_list:
+                    row = base_data.copy()
+                    row["color"] = vd["label"]
+                    row["price"] = vd["price"]
+                    results.append(row)
+                return results
 
-            # 方案 B: 回退到模型提取
+            # --- 方案 B:如果 DOM 探测失败,回退到 JS 模型提取 ---
             sku_props = model.get("skuModel", {}).get("skuProps", []) or model.get("detailData", {}).get("skuProps", []) or []
             main_prop = next((p for p in sku_props if any(k in p.get("prop", "") for k in ["颜色", "分类", "款式", "花色", "净含量"])), None)
             if not main_prop and sku_props: main_prop = sku_props[0]