|
@@ -13,7 +13,8 @@ from src.scraper import Scraper1688
|
|
|
from src.excel_handler import append_to_template, get_existing_info
|
|
from src.excel_handler import append_to_template, get_existing_info
|
|
|
|
|
|
|
|
def get_resource_path(relative_path):
|
|
def get_resource_path(relative_path):
|
|
|
- if hasattr(sys, '_MEIPASS'): return os.path.join(sys._MEIPASS, relative_path)
|
|
|
|
|
|
|
+ if hasattr(sys, '_MEIPASS'):
|
|
|
|
|
+ return os.path.join(sys._MEIPASS, relative_path)
|
|
|
return os.path.join(os.getcwd(), relative_path)
|
|
return os.path.join(os.getcwd(), relative_path)
|
|
|
|
|
|
|
|
class ScraperThread(QThread):
|
|
class ScraperThread(QThread):
|
|
@@ -33,6 +34,7 @@ class ScraperThread(QThread):
|
|
|
start_time = time.time()
|
|
start_time = time.time()
|
|
|
try:
|
|
try:
|
|
|
existing_links, _ = get_existing_info(self.output_path)
|
|
existing_links, _ = get_existing_info(self.output_path)
|
|
|
|
|
+
|
|
|
initial_p_count = 0
|
|
initial_p_count = 0
|
|
|
if os.path.exists(self.output_path):
|
|
if os.path.exists(self.output_path):
|
|
|
try:
|
|
try:
|
|
@@ -45,20 +47,25 @@ class ScraperThread(QThread):
|
|
|
except: pass
|
|
except: pass
|
|
|
|
|
|
|
|
self.log.emit(f"<b>[*] 任务启动: {self.keyword}</b>")
|
|
self.log.emit(f"<b>[*] 任务启动: {self.keyword}</b>")
|
|
|
|
|
+
|
|
|
def status_cb(is_waiting, msg):
|
|
def status_cb(is_waiting, msg):
|
|
|
if is_waiting: self.log.emit(f"<font color='red' size='5'><b>!!! {msg} !!!</b></font>")
|
|
if is_waiting: self.log.emit(f"<font color='red' size='5'><b>!!! {msg} !!!</b></font>")
|
|
|
else: self.log.emit(f"<font color='green'><b>[√] {msg}</b></font>")
|
|
else: self.log.emit(f"<font color='green'><b>[√] {msg}</b></font>")
|
|
|
|
|
|
|
|
scraper = Scraper1688(headless=self.headless, status_callback=status_cb, log_callback=self.log.emit)
|
|
scraper = Scraper1688(headless=self.headless, status_callback=status_cb, log_callback=self.log.emit)
|
|
|
|
|
+
|
|
|
collected_count = 0
|
|
collected_count = 0
|
|
|
product_index = initial_p_count
|
|
product_index = initial_p_count
|
|
|
|
|
|
|
|
for batch_results in scraper.search_products_yield(self.keyword, total_count=self.total_count, existing_links=existing_links):
|
|
for batch_results in scraper.search_products_yield(self.keyword, total_count=self.total_count, existing_links=existing_links):
|
|
|
append_to_template(batch_results, self.output_path, status_callback=status_cb)
|
|
append_to_template(batch_results, self.output_path, status_callback=status_cb)
|
|
|
|
|
+
|
|
|
unique_links = len(set(item.get('link') for item in batch_results if item.get('link')))
|
|
unique_links = len(set(item.get('link') for item in batch_results if item.get('link')))
|
|
|
product_index += unique_links
|
|
product_index += unique_links
|
|
|
collected_count += len(batch_results)
|
|
collected_count += len(batch_results)
|
|
|
|
|
+
|
|
|
self.log.emit(f"[+] 解析到第 {product_index} 个商品,新增数据已持久化: {len(batch_results)} 条,本次共计: {collected_count}")
|
|
self.log.emit(f"[+] 解析到第 {product_index} 个商品,新增数据已持久化: {len(batch_results)} 条,本次共计: {collected_count}")
|
|
|
|
|
+
|
|
|
current_task_done = product_index - initial_p_count
|
|
current_task_done = product_index - initial_p_count
|
|
|
prog = int((current_task_done / self.total_count) * 100)
|
|
prog = int((current_task_done / self.total_count) * 100)
|
|
|
self.progress.emit(min(prog, 100))
|
|
self.progress.emit(min(prog, 100))
|
|
@@ -88,9 +95,11 @@ class MainWindow(QMainWindow):
|
|
|
self.setGeometry(100, 100, 1100, 750)
|
|
self.setGeometry(100, 100, 1100, 750)
|
|
|
icon_path = get_resource_path("app.ico")
|
|
icon_path = get_resource_path("app.ico")
|
|
|
if os.path.exists(icon_path): self.setWindowIcon(QIcon(icon_path))
|
|
if os.path.exists(icon_path): self.setWindowIcon(QIcon(icon_path))
|
|
|
|
|
+
|
|
|
central_widget = QWidget()
|
|
central_widget = QWidget()
|
|
|
self.setCentralWidget(central_widget)
|
|
self.setCentralWidget(central_widget)
|
|
|
main_layout = QHBoxLayout(central_widget)
|
|
main_layout = QHBoxLayout(central_widget)
|
|
|
|
|
+
|
|
|
left_widget = QWidget()
|
|
left_widget = QWidget()
|
|
|
left_layout = QVBoxLayout(left_widget)
|
|
left_layout = QVBoxLayout(left_widget)
|
|
|
self.load_category_btn = QPushButton("选择类目文件")
|
|
self.load_category_btn = QPushButton("选择类目文件")
|
|
@@ -103,13 +112,16 @@ class MainWindow(QMainWindow):
|
|
|
left_layout.addWidget(QLabel("<b>商品类目树</b>"))
|
|
left_layout.addWidget(QLabel("<b>商品类目树</b>"))
|
|
|
left_layout.addWidget(self.load_category_btn)
|
|
left_layout.addWidget(self.load_category_btn)
|
|
|
left_layout.addWidget(self.category_tree)
|
|
left_layout.addWidget(self.category_tree)
|
|
|
|
|
+
|
|
|
right_widget = QWidget()
|
|
right_widget = QWidget()
|
|
|
right_layout = QVBoxLayout(right_widget)
|
|
right_layout = QVBoxLayout(right_widget)
|
|
|
|
|
+
|
|
|
opt_layout = QHBoxLayout()
|
|
opt_layout = QHBoxLayout()
|
|
|
self.show_browser_cb = QCheckBox("显示浏览器界面 (手动过验证时勾选)")
|
|
self.show_browser_cb = QCheckBox("显示浏览器界面 (手动过验证时勾选)")
|
|
|
self.show_browser_cb.setChecked(True)
|
|
self.show_browser_cb.setChecked(True)
|
|
|
opt_layout.addWidget(self.show_browser_cb)
|
|
opt_layout.addWidget(self.show_browser_cb)
|
|
|
right_layout.addLayout(opt_layout)
|
|
right_layout.addLayout(opt_layout)
|
|
|
|
|
+
|
|
|
path_layout = QHBoxLayout()
|
|
path_layout = QHBoxLayout()
|
|
|
self.path_display = QLabel("未选择输出路径")
|
|
self.path_display = QLabel("未选择输出路径")
|
|
|
self.path_display.setStyleSheet("color: gray; border: 1px solid #ccc; padding: 5px;")
|
|
self.path_display.setStyleSheet("color: gray; border: 1px solid #ccc; padding: 5px;")
|
|
@@ -119,6 +131,7 @@ class MainWindow(QMainWindow):
|
|
|
path_layout.addWidget(self.path_display, 1)
|
|
path_layout.addWidget(self.path_display, 1)
|
|
|
path_layout.addWidget(self.select_path_btn)
|
|
path_layout.addWidget(self.select_path_btn)
|
|
|
right_layout.addLayout(path_layout)
|
|
right_layout.addLayout(path_layout)
|
|
|
|
|
+
|
|
|
action_layout = QHBoxLayout()
|
|
action_layout = QHBoxLayout()
|
|
|
self.category_display = QLabel("请选择二级类目")
|
|
self.category_display = QLabel("请选择二级类目")
|
|
|
count_layout = QHBoxLayout()
|
|
count_layout = QHBoxLayout()
|
|
@@ -128,16 +141,22 @@ class MainWindow(QMainWindow):
|
|
|
self.count_spin.setFixedWidth(80)
|
|
self.count_spin.setFixedWidth(80)
|
|
|
count_layout.addWidget(QLabel("抓取数量:"))
|
|
count_layout.addWidget(QLabel("抓取数量:"))
|
|
|
count_layout.addWidget(self.count_spin)
|
|
count_layout.addWidget(self.count_spin)
|
|
|
|
|
+
|
|
|
self.search_btn = QPushButton("开始抓取")
|
|
self.search_btn = QPushButton("开始抓取")
|
|
|
self.search_btn.setEnabled(False)
|
|
self.search_btn.setEnabled(False)
|
|
|
self.search_btn.clicked.connect(self.start_scraping)
|
|
self.search_btn.clicked.connect(self.start_scraping)
|
|
|
self.search_btn.setMinimumHeight(50)
|
|
self.search_btn.setMinimumHeight(50)
|
|
|
- self.search_btn.setStyleSheet("QPushButton { background-color: #0078d4; color: white; font-weight: bold; font-size: 16px; border-radius: 4px; } QPushButton:disabled { background-color: #cccccc; color: #888888; }")
|
|
|
|
|
|
|
+ self.search_btn.setStyleSheet("""
|
|
|
|
|
+ QPushButton { background-color: #0078d4; color: white; font-weight: bold; font-size: 16px; border-radius: 4px; }
|
|
|
|
|
+ QPushButton:disabled { background-color: #cccccc; color: #888888; }
|
|
|
|
|
+ """)
|
|
|
|
|
+
|
|
|
action_layout.addWidget(QLabel("<font color='red'>*</font>检索类目:"))
|
|
action_layout.addWidget(QLabel("<font color='red'>*</font>检索类目:"))
|
|
|
action_layout.addWidget(self.category_display, 1)
|
|
action_layout.addWidget(self.category_display, 1)
|
|
|
action_layout.addLayout(count_layout)
|
|
action_layout.addLayout(count_layout)
|
|
|
action_layout.addWidget(self.search_btn)
|
|
action_layout.addWidget(self.search_btn)
|
|
|
right_layout.addLayout(action_layout)
|
|
right_layout.addLayout(action_layout)
|
|
|
|
|
+
|
|
|
self.pbar = QProgressBar()
|
|
self.pbar = QProgressBar()
|
|
|
self.log_output = QTextEdit()
|
|
self.log_output = QTextEdit()
|
|
|
self.log_output.setReadOnly(True)
|
|
self.log_output.setReadOnly(True)
|
|
@@ -146,6 +165,7 @@ class MainWindow(QMainWindow):
|
|
|
right_layout.addWidget(self.pbar)
|
|
right_layout.addWidget(self.pbar)
|
|
|
self.status_label = QLabel("就绪")
|
|
self.status_label = QLabel("就绪")
|
|
|
right_layout.addWidget(self.status_label)
|
|
right_layout.addWidget(self.status_label)
|
|
|
|
|
+
|
|
|
splitter = QSplitter(Qt.Orientation.Horizontal)
|
|
splitter = QSplitter(Qt.Orientation.Horizontal)
|
|
|
splitter.addWidget(left_widget)
|
|
splitter.addWidget(left_widget)
|
|
|
splitter.addWidget(right_widget)
|
|
splitter.addWidget(right_widget)
|