|
|
@@ -215,34 +215,55 @@ class Scraper1688:
|
|
|
"supplier": (model.get("sellerModel", {}).get("companyName", "") if isinstance(model, dict) else ""),
|
|
|
}
|
|
|
|
|
|
- variant_results = []
|
|
|
+ # --- 核心逻辑订正:智能识别规格区域(获取款式名称和逐条价格) ---
|
|
|
+ variant_data_list = []
|
|
|
try:
|
|
|
- # 按照用户提供的 class "expand-view-list" 进行锁定
|
|
|
- wrappers = self.driver.find_elements(By.CSS_SELECTOR, ".expand-view-list, .expand-view-list-wrapper")
|
|
|
- if wrappers:
|
|
|
- # 寻找每一个变体条目
|
|
|
- items = wrappers[0].find_elements(By.CSS_SELECTOR, ".expand-view-list-item, [class*='list-item'], .sku-item")
|
|
|
+ # 按照用户提供的线索,优先尝试多种可能的 Class 名
|
|
|
+ selectors = [".expand-view-list", ".expand-view-list-wrapper", ".sku-wrapper", ".obj-sku"]
|
|
|
+ wrapper = None
|
|
|
+ for s in selectors:
|
|
|
+ found = self.driver.find_elements(By.CSS_SELECTOR, s)
|
|
|
+ if found and found[0].is_displayed():
|
|
|
+ wrapper = found[0]
|
|
|
+ break
|
|
|
+
|
|
|
+ if wrapper:
|
|
|
+ # 获取该容器下的所有子项条目
|
|
|
+ items = wrapper.find_elements(By.CSS_SELECTOR, ".expand-view-list-item, [class*='list-item'], .sku-item, .obj-sku-item")
|
|
|
+
|
|
|
for item_el in items:
|
|
|
try:
|
|
|
# 提取款式描述文字 (item-label) -> 对应 Excel “颜色”列
|
|
|
- label = item_el.find_element(By.CLASS_NAME, "item-label").text.strip()
|
|
|
+ label_el = item_el.find_elements(By.CLASS_NAME, "item-label")
|
|
|
# 提取逐条价格 (item-price-stock) -> 对应 Excel “单品进价(元)”列
|
|
|
- price_raw = item_el.find_element(By.CLASS_NAME, "item-price-stock").text.strip()
|
|
|
- # 价格清洗
|
|
|
- price_clean = re.sub(r'[^\d.]', '', price_raw)
|
|
|
+ price_el = item_el.find_elements(By.CLASS_NAME, "item-price-stock")
|
|
|
|
|
|
- if label:
|
|
|
- row = base_data.copy()
|
|
|
- row["color"] = label
|
|
|
- row["price"] = price_clean
|
|
|
- variant_results.append(row)
|
|
|
+ if label_el and price_el:
|
|
|
+ label_text = label_el[0].text.strip()
|
|
|
+ price_text = price_el[0].text.strip()
|
|
|
+ # 清洗价格,只保留数字和小数点
|
|
|
+ price_clean = re.sub(r'[^\d.]', '', price_text)
|
|
|
+
|
|
|
+ if label_text:
|
|
|
+ variant_data_list.append({
|
|
|
+ "label": label_text,
|
|
|
+ "price": price_clean if price_clean else price_text
|
|
|
+ })
|
|
|
except: continue
|
|
|
- except: pass
|
|
|
+ except Exception as e:
|
|
|
+ print(f" [!] DOM 变体解析异常: {e}")
|
|
|
|
|
|
- if variant_results:
|
|
|
- return variant_results
|
|
|
+ if variant_data_list:
|
|
|
+ print(f" [+] 成功解析到 {len(variant_data_list)} 个款式变体")
|
|
|
+ results = []
|
|
|
+ for vd in variant_data_list:
|
|
|
+ row = base_data.copy()
|
|
|
+ row["color"] = vd["label"]
|
|
|
+ row["price"] = vd["price"]
|
|
|
+ results.append(row)
|
|
|
+ return results
|
|
|
|
|
|
- # 方案 B: 回退到模型提取
|
|
|
+ # --- 方案 B:如果 DOM 探测失败,回退到 JS 模型提取 ---
|
|
|
sku_props = model.get("skuModel", {}).get("skuProps", []) or model.get("detailData", {}).get("skuProps", []) or []
|
|
|
main_prop = next((p for p in sku_props if any(k in p.get("prop", "") for k in ["颜色", "分类", "款式", "花色", "净含量"])), None)
|
|
|
if not main_prop and sku_props: main_prop = sku_props[0]
|