4 months ago · 6428a97b08
--- a/src/scraper.py
+++ b/src/scraper.py
@@ -210,33 +210,53 @@ class Scraper1688:
 
				                 "supplier": (model.get("sellerModel", {}).get("companyName", "") if isinstance(model, dict) else ""),
			
 
				             }
			
 
				 
			
 
				+            # --- 核心逻辑订正：精准识别变体区域并拆分多行数据 ---
			
 
				             variant_results = []
			
 
				             try:
			
 
				-                # 按照用户提供的线索，精准锁定变体容器
			
 
				+                # 按照用户提供的线索，锁定核心容器
			
 
				+                # 兼容 expand-view-list 和 expand-view-list-wrapper
			
 
				                 wrappers = self.driver.find_elements(By.CSS_SELECTOR, ".expand-view-list, .expand-view-list-wrapper")
			
 
				                 if wrappers:
			
 
				-                    # 寻找每一个变体子项条目
			
 
				-                    items = wrappers[0].find_elements(By.CSS_SELECTOR, ".expand-view-list-item, [class*='list-item'], .sku-item")
			
 
				-                    for item_el in items:
			
 
				-                        try:
			
 
				-                            # 描述文字文字 (item-label) -> 颜色列
			
 
				-                            label = item_el.find_element(By.CLASS_NAME, "item-label").text.strip()
			
 
				-                            # 逐条对应的价格 (item-price-stock) -> 单品进价列
			
 
				-                            price_raw = item_el.find_element(By.CLASS_NAME, "item-price-stock").text.strip()
			
 
				-                            # 价格清洗
			
 
				-                            price_clean = re.sub(r'[^\d.]', '', price_raw)
			
 
				-                            
			
 
				-                            if label:
			
 
				+                    # 1. 尝试直接获取所有 label 和 price 的对
			
 
				+                    labels = wrappers[0].find_elements(By.CLASS_NAME, "item-label")
			
 
				+                    prices = wrappers[0].find_elements(By.CLASS_NAME, "item-price-stock")
			
 
				+                    
			
 
				+                    if labels and prices and len(labels) == len(prices):
			
 
				+                        for i in range(len(labels)):
			
 
				+                            l_text = labels[i].text.strip()
			
 
				+                            p_text = prices[i].text.strip()
			
 
				+                            # 价格清洗：只保留数字和小数点
			
 
				+                            p_clean = re.sub(r'[^\d.]', '', p_text)
			
 
				+                            if l_text:
			
 
				                                 row = base_data.copy()
			
 
				-                                row["color"] = label
			
 
				-                                row["price"] = price_clean
			
 
				-                                # 如果 spec 还没拿，就把款式描述填入规格
			
 
				-                                if not row["spec"]: row["spec"] = label
			
 
				+                                row["color"] = l_text # 款式描述 -> 颜色列
			
 
				+                                row["price"] = p_clean if p_clean else p_text # 逐条价格 -> 单品进价列
			
 
				+                                if not row["spec"]: row["spec"] = l_text # 规格也同步填充
			
 
				                                 variant_results.append(row)
			
 
				-                        except: continue
			
 
				-            except: pass
			
 
				+                    
			
 
				+                    # 2. 如果数量对不上，尝试按照子容器逐项提取
			
 
				+                    if not variant_results:
			
 
				+                        items = wrappers[0].find_elements(By.CSS_SELECTOR, ".expand-view-list-item, [class*='list-item'], .sku-item")
			
 
				+                        for item_el in items:
			
 
				+                            try:
			
 
				+                                l_el = item_el.find_element(By.CLASS_NAME, "item-label")
			
 
				+                                p_el = item_el.find_element(By.CLASS_NAME, "item-price-stock")
			
 
				+                                if l_el and p_el:
			
 
				+                                    l_text = l_el.text.strip()
			
 
				+                                    p_text = p_el.text.strip()
			
 
				+                                    p_clean = re.sub(r'[^\d.]', '', p_text)
			
 
				+                                    if l_text:
			
 
				+                                        row = base_data.copy()
			
 
				+                                        row["color"] = l_text
			
 
				+                                        row["price"] = p_clean if p_clean else p_text
			
 
				+                                        if not row["spec"]: row["spec"] = l_text
			
 
				+                                        variant_results.append(row)
			
 
				+                            except: continue
			
 
				+            except Exception as e:
			
 
				+                print(f"  [!] 变体区域解析异常: {e}")
			
 
				 
			
 
				             if variant_results:
			
 
				+                print(f"  [+] 成功解析到 {len(variant_results)} 个款式变体")
			
 
				                 return variant_results
			
 
				             return [base_data]
			
 
				         except: return None