|
|
@@ -210,33 +210,53 @@ class Scraper1688:
|
|
|
"supplier": (model.get("sellerModel", {}).get("companyName", "") if isinstance(model, dict) else ""),
|
|
|
}
|
|
|
|
|
|
+ # --- 核心逻辑订正:精准识别变体区域并拆分多行数据 ---
|
|
|
variant_results = []
|
|
|
try:
|
|
|
- # 按照用户提供的线索,精准锁定变体容器
|
|
|
+ # 按照用户提供的线索,锁定核心容器
|
|
|
+ # 兼容 expand-view-list 和 expand-view-list-wrapper
|
|
|
wrappers = self.driver.find_elements(By.CSS_SELECTOR, ".expand-view-list, .expand-view-list-wrapper")
|
|
|
if wrappers:
|
|
|
- # 寻找每一个变体子项条目
|
|
|
- items = wrappers[0].find_elements(By.CSS_SELECTOR, ".expand-view-list-item, [class*='list-item'], .sku-item")
|
|
|
- for item_el in items:
|
|
|
- try:
|
|
|
- # 描述文字文字 (item-label) -> 颜色列
|
|
|
- label = item_el.find_element(By.CLASS_NAME, "item-label").text.strip()
|
|
|
- # 逐条对应的价格 (item-price-stock) -> 单品进价列
|
|
|
- price_raw = item_el.find_element(By.CLASS_NAME, "item-price-stock").text.strip()
|
|
|
- # 价格清洗
|
|
|
- price_clean = re.sub(r'[^\d.]', '', price_raw)
|
|
|
-
|
|
|
- if label:
|
|
|
+ # 1. 尝试直接获取所有 label 和 price 的对
|
|
|
+ labels = wrappers[0].find_elements(By.CLASS_NAME, "item-label")
|
|
|
+ prices = wrappers[0].find_elements(By.CLASS_NAME, "item-price-stock")
|
|
|
+
|
|
|
+ if labels and prices and len(labels) == len(prices):
|
|
|
+ for i in range(len(labels)):
|
|
|
+ l_text = labels[i].text.strip()
|
|
|
+ p_text = prices[i].text.strip()
|
|
|
+ # 价格清洗:只保留数字和小数点
|
|
|
+ p_clean = re.sub(r'[^\d.]', '', p_text)
|
|
|
+ if l_text:
|
|
|
row = base_data.copy()
|
|
|
- row["color"] = label
|
|
|
- row["price"] = price_clean
|
|
|
- # 如果 spec 还没拿,就把款式描述填入规格
|
|
|
- if not row["spec"]: row["spec"] = label
|
|
|
+ row["color"] = l_text # 款式描述 -> 颜色列
|
|
|
+ row["price"] = p_clean if p_clean else p_text # 逐条价格 -> 单品进价列
|
|
|
+ if not row["spec"]: row["spec"] = l_text # 规格也同步填充
|
|
|
variant_results.append(row)
|
|
|
- except: continue
|
|
|
- except: pass
|
|
|
+
|
|
|
+ # 2. 如果数量对不上,尝试按照子容器逐项提取
|
|
|
+ if not variant_results:
|
|
|
+ items = wrappers[0].find_elements(By.CSS_SELECTOR, ".expand-view-list-item, [class*='list-item'], .sku-item")
|
|
|
+ for item_el in items:
|
|
|
+ try:
|
|
|
+ l_el = item_el.find_element(By.CLASS_NAME, "item-label")
|
|
|
+ p_el = item_el.find_element(By.CLASS_NAME, "item-price-stock")
|
|
|
+ if l_el and p_el:
|
|
|
+ l_text = l_el.text.strip()
|
|
|
+ p_text = p_el.text.strip()
|
|
|
+ p_clean = re.sub(r'[^\d.]', '', p_text)
|
|
|
+ if l_text:
|
|
|
+ row = base_data.copy()
|
|
|
+ row["color"] = l_text
|
|
|
+ row["price"] = p_clean if p_clean else p_text
|
|
|
+ if not row["spec"]: row["spec"] = l_text
|
|
|
+ variant_results.append(row)
|
|
|
+ except: continue
|
|
|
+ except Exception as e:
|
|
|
+ print(f" [!] 变体区域解析异常: {e}")
|
|
|
|
|
|
if variant_results:
|
|
|
+ print(f" [+] 成功解析到 {len(variant_results)} 个款式变体")
|
|
|
return variant_results
|
|
|
return [base_data]
|
|
|
except: return None
|