|
|
@@ -272,7 +272,49 @@ class Scraper1688:
|
|
|
or safe_text(By.CSS_SELECTOR, "div.company-name"),
|
|
|
}
|
|
|
|
|
|
- # --- 核心逻辑:拆分规格/颜色分类 ---
|
|
|
+ # --- 核心逻辑订正:智能识别规格区域(优先使用 DOM 检查,获取款式名称和逐条价格) ---
|
|
|
+ variant_data_list = []
|
|
|
+ try:
|
|
|
+ # 尝试用户提供的特定 DOM 容器
|
|
|
+ wrappers = self.driver.find_elements(By.CLASS_NAME, "expand-view-list-wrapper")
|
|
|
+ if wrappers:
|
|
|
+ # 获取该容器下的所有子项容器(通常包含 item-label 和 item-price-stock)
|
|
|
+ # 尝试定位包含这两者的条目级容器
|
|
|
+ items = wrappers[0].find_elements(By.CSS_SELECTOR, ".expand-view-list-item, [class*='list-item'], .sku-item")
|
|
|
+
|
|
|
+ if not items:
|
|
|
+ # 如果找不到明确的子项容器,则根据 label 元素反向寻找或直接成对提取
|
|
|
+ labels = wrappers[0].find_elements(By.CLASS_NAME, "item-label")
|
|
|
+ prices = wrappers[0].find_elements(By.CLASS_NAME, "item-price-stock")
|
|
|
+ for l, p in zip(labels, prices):
|
|
|
+ variant_data_list.append({
|
|
|
+ "label": l.text.strip(),
|
|
|
+ "price": p.text.strip()
|
|
|
+ })
|
|
|
+ else:
|
|
|
+ for item_el in items:
|
|
|
+ try:
|
|
|
+ label = item_el.find_element(By.CLASS_NAME, "item-label").text.strip()
|
|
|
+ price = item_el.find_element(By.CLASS_NAME, "item-price-stock").text.strip()
|
|
|
+ if label:
|
|
|
+ variant_data_list.append({"label": label, "price": price})
|
|
|
+ except: continue
|
|
|
+ except: pass
|
|
|
+
|
|
|
+ if variant_data_list:
|
|
|
+ results = []
|
|
|
+ for vd in variant_data_list:
|
|
|
+ row = base_data.copy()
|
|
|
+ # 款式描述写入“颜色”列
|
|
|
+ row["color"] = vd["label"]
|
|
|
+ # 逐条价格写入“单品进价(元)”列 (即 price 键)
|
|
|
+ # 清洗价格,移除 ¥ 等非数字字符,只保留数字和小数点
|
|
|
+ clean_price = re.sub(r'[^\d.]', '', vd["price"])
|
|
|
+ row["price"] = clean_price if clean_price else vd["price"]
|
|
|
+ results.append(row)
|
|
|
+ return results
|
|
|
+
|
|
|
+ # --- 方案 B:如果 DOM 探测失败,回退到 JS 模型提取 ---
|
|
|
sku_props = []
|
|
|
try:
|
|
|
# 尝试多种路径获取 SKU 属性
|
|
|
@@ -282,18 +324,17 @@ class Scraper1688:
|
|
|
except: pass
|
|
|
|
|
|
# 智能寻找主维度:
|
|
|
- # 1. 优先找包含“颜色”、“分类”、“款式”、“花色”的维度
|
|
|
- # 2. 如果没有,则取第一个 SKU 维度(例如“净含量”、“规格”等)
|
|
|
+ # 1. 优先找包含“颜色”、“分类”、“款式”、“花色”、“净含量”的维度
|
|
|
+ # 2. 如果没有,则取第一个 SKU 维度
|
|
|
main_prop = None
|
|
|
if sku_props:
|
|
|
- main_prop = next((p for p in sku_props if any(k in p.get("prop", "") for k in ["颜色", "分类", "款式", "花色"])), None)
|
|
|
+ main_prop = next((p for p in sku_props if any(k in p.get("prop", "") for k in ["颜色", "分类", "款式", "花色", "净含量", "规格"])), None)
|
|
|
if not main_prop:
|
|
|
main_prop = sku_props[0]
|
|
|
|
|
|
if main_prop and main_prop.get("value"):
|
|
|
variant_results = []
|
|
|
for val in main_prop["value"]:
|
|
|
- # 只有当该分类确实有名字时才记录
|
|
|
variant_name = val.get("name")
|
|
|
if variant_name:
|
|
|
row = base_data.copy()
|
|
|
@@ -301,8 +342,8 @@ class Scraper1688:
|
|
|
variant_results.append(row)
|
|
|
return variant_results
|
|
|
else:
|
|
|
- # 兜底:如果没有发现规格选择区,则获取单属性颜色
|
|
|
- base_data["color"] = get_attr("颜色") or get_attr("颜色分类") or ""
|
|
|
+ # 最终兜底:单属性
|
|
|
+ base_data["color"] = get_attr("颜色") or get_attr("颜色分类") or get_attr("净含量") or ""
|
|
|
return [base_data]
|
|
|
|
|
|
except Exception as e:
|