This commit is contained in:
manchuwork
2025-10-04 01:19:56 +08:00
parent 6d42506d4e
commit ce414ffc51
5 changed files with 11969 additions and 11 deletions

3
cookies/.gitignore vendored
View File

@@ -1 +1,2 @@
*
*
*.json

View File

@@ -85,6 +85,10 @@ def crawl_and_save_aiqicha_details(input_csv, output_csv):
aiqicha_data = filtered_aiqicha_data
print(f'过滤后剩余 {len(aiqicha_data)} 条爱企查数据待处理')
if len(aiqicha_data) <= 0:
print("没有待处理的爱企查数据,退出")
return
# 使用爱企查详情爬虫
with AiqichaDetailCrawler() as crawler:
company_details = []
@@ -143,7 +147,7 @@ def crawl_and_save_aiqicha_details(input_csv, output_csv):
# 添加延迟,避免请求过快
time.sleep(2)
next_sleep_interval = random.uniform(5, 15)
next_sleep_interval = random.uniform(3, 15)
time.sleep(next_sleep_interval)
print(f"总共成功处理并保存了 {success_count} 条企业详情数据到 {output_csv}")

3
data/.gitignore vendored
View File

@@ -1 +1,2 @@
*
*
data.csv

11925
demo/aiqicha-datail.html Normal file

File diff suppressed because one or more lines are too long

View File

@@ -27,12 +27,12 @@ class AiqichaDetailCrawler:
else:
print("已加载Cookie")
# 使用登录管理器检测登录状态
logined = self.login_manager.check_and_login()
if logined:
print("登录成功")
else:
print("登录失败")
# # 使用登录管理器检测登录状态
# logined = self.login_manager.check_and_login()
# if logined:
# print("登录成功")
# else:
# print("登录失败")
self.browser_started = True
except Exception as e:
print(f"启动浏览器失败: {e}")
@@ -80,12 +80,39 @@ class AiqichaDetailCrawler:
print("等待页面关键元素加载...")
try:
# 等待关键元素加载,增加超时时间
self.browser.page.wait_for_selector('.addr-enter-bg-ele', timeout=15000)
try:
self.browser.page.wait_for_selector('.addr-enter-bg-ele', timeout=10000)
except Exception as e:
print(f"等待页面元素时出错: {e}")
# self.browser.page.wait_for_selector('.addr-enter-bg-ele', timeout=10000)
print("关键元素已加载")
# 使用登录管理器检测登录状态
logined = self.login_manager.check_and_login()
if logined:
print("登录成功")
else:
print("登录失败")
# 关闭指定的弹窗 (aiqicha-datail.html 792-793 行对应的元素)
try:
# 查找并点击关闭按钮
svip_modals = self.browser.page.query_selector_all('.clue-card-wrap')
for modal in svip_modals:
if modal:
# 查找.ivu-modal-close关闭按钮
close_button = modal.query_selector('a.ivu-modal-close')
if close_button:
close_button.click()
print("已关闭SVIP弹窗")
break
except Exception as e:
print(f"关闭弹窗时出错: {e}")
# 额外等待一段时间确保页面完全加载
import time
time.sleep(10)
time.sleep(3)
print("额外等待完成,页面应该已完全加载")
except Exception as e:
print(f"等待页面元素时出错: {e}")