aiqicha
This commit is contained in:
1
cookies/.gitignore
vendored
1
cookies/.gitignore
vendored
@@ -1 +1,2 @@
|
||||
*
|
||||
*.json
|
||||
@@ -85,6 +85,10 @@ def crawl_and_save_aiqicha_details(input_csv, output_csv):
|
||||
aiqicha_data = filtered_aiqicha_data
|
||||
print(f'过滤后剩余 {len(aiqicha_data)} 条爱企查数据待处理')
|
||||
|
||||
if len(aiqicha_data) <= 0:
|
||||
print("没有待处理的爱企查数据,退出")
|
||||
return
|
||||
|
||||
# 使用爱企查详情爬虫
|
||||
with AiqichaDetailCrawler() as crawler:
|
||||
company_details = []
|
||||
@@ -143,7 +147,7 @@ def crawl_and_save_aiqicha_details(input_csv, output_csv):
|
||||
# 添加延迟,避免请求过快
|
||||
|
||||
time.sleep(2)
|
||||
next_sleep_interval = random.uniform(5, 15)
|
||||
next_sleep_interval = random.uniform(3, 15)
|
||||
time.sleep(next_sleep_interval)
|
||||
|
||||
print(f"总共成功处理并保存了 {success_count} 条企业详情数据到 {output_csv}")
|
||||
|
||||
1
data/.gitignore
vendored
1
data/.gitignore
vendored
@@ -1 +1,2 @@
|
||||
*
|
||||
data.csv
|
||||
11925
demo/aiqicha-datail.html
Normal file
11925
demo/aiqicha-datail.html
Normal file
File diff suppressed because one or more lines are too long
@@ -27,12 +27,12 @@ class AiqichaDetailCrawler:
|
||||
else:
|
||||
print("已加载Cookie")
|
||||
|
||||
# 使用登录管理器检测登录状态
|
||||
logined = self.login_manager.check_and_login()
|
||||
if logined:
|
||||
print("登录成功")
|
||||
else:
|
||||
print("登录失败")
|
||||
# # 使用登录管理器检测登录状态
|
||||
# logined = self.login_manager.check_and_login()
|
||||
# if logined:
|
||||
# print("登录成功")
|
||||
# else:
|
||||
# print("登录失败")
|
||||
self.browser_started = True
|
||||
except Exception as e:
|
||||
print(f"启动浏览器失败: {e}")
|
||||
@@ -80,12 +80,39 @@ class AiqichaDetailCrawler:
|
||||
print("等待页面关键元素加载...")
|
||||
try:
|
||||
# 等待关键元素加载,增加超时时间
|
||||
self.browser.page.wait_for_selector('.addr-enter-bg-ele', timeout=15000)
|
||||
try:
|
||||
self.browser.page.wait_for_selector('.addr-enter-bg-ele', timeout=10000)
|
||||
except Exception as e:
|
||||
print(f"等待页面元素时出错: {e}")
|
||||
# self.browser.page.wait_for_selector('.addr-enter-bg-ele', timeout=10000)
|
||||
print("关键元素已加载")
|
||||
|
||||
# 使用登录管理器检测登录状态
|
||||
logined = self.login_manager.check_and_login()
|
||||
if logined:
|
||||
print("登录成功")
|
||||
else:
|
||||
print("登录失败")
|
||||
|
||||
# 关闭指定的弹窗 (aiqicha-datail.html 792-793 行对应的元素)
|
||||
try:
|
||||
# 查找并点击关闭按钮
|
||||
svip_modals = self.browser.page.query_selector_all('.clue-card-wrap')
|
||||
for modal in svip_modals:
|
||||
if modal:
|
||||
# 查找.ivu-modal-close关闭按钮
|
||||
close_button = modal.query_selector('a.ivu-modal-close')
|
||||
if close_button:
|
||||
close_button.click()
|
||||
print("已关闭SVIP弹窗")
|
||||
break
|
||||
except Exception as e:
|
||||
print(f"关闭弹窗时出错: {e}")
|
||||
|
||||
# 额外等待一段时间确保页面完全加载
|
||||
import time
|
||||
time.sleep(10)
|
||||
time.sleep(3)
|
||||
|
||||
print("额外等待完成,页面应该已完全加载")
|
||||
except Exception as e:
|
||||
print(f"等待页面元素时出错: {e}")
|
||||
|
||||
Reference in New Issue
Block a user