aiqicha
This commit is contained in:
1
cookies/.gitignore
vendored
1
cookies/.gitignore
vendored
@@ -1 +1,2 @@
|
|||||||
*
|
*
|
||||||
|
*.json
|
||||||
@@ -85,6 +85,10 @@ def crawl_and_save_aiqicha_details(input_csv, output_csv):
|
|||||||
aiqicha_data = filtered_aiqicha_data
|
aiqicha_data = filtered_aiqicha_data
|
||||||
print(f'过滤后剩余 {len(aiqicha_data)} 条爱企查数据待处理')
|
print(f'过滤后剩余 {len(aiqicha_data)} 条爱企查数据待处理')
|
||||||
|
|
||||||
|
if len(aiqicha_data) <= 0:
|
||||||
|
print("没有待处理的爱企查数据,退出")
|
||||||
|
return
|
||||||
|
|
||||||
# 使用爱企查详情爬虫
|
# 使用爱企查详情爬虫
|
||||||
with AiqichaDetailCrawler() as crawler:
|
with AiqichaDetailCrawler() as crawler:
|
||||||
company_details = []
|
company_details = []
|
||||||
@@ -143,7 +147,7 @@ def crawl_and_save_aiqicha_details(input_csv, output_csv):
|
|||||||
# 添加延迟,避免请求过快
|
# 添加延迟,避免请求过快
|
||||||
|
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
next_sleep_interval = random.uniform(5, 15)
|
next_sleep_interval = random.uniform(3, 15)
|
||||||
time.sleep(next_sleep_interval)
|
time.sleep(next_sleep_interval)
|
||||||
|
|
||||||
print(f"总共成功处理并保存了 {success_count} 条企业详情数据到 {output_csv}")
|
print(f"总共成功处理并保存了 {success_count} 条企业详情数据到 {output_csv}")
|
||||||
|
|||||||
1
data/.gitignore
vendored
1
data/.gitignore
vendored
@@ -1 +1,2 @@
|
|||||||
*
|
*
|
||||||
|
data.csv
|
||||||
11925
demo/aiqicha-datail.html
Normal file
11925
demo/aiqicha-datail.html
Normal file
File diff suppressed because one or more lines are too long
@@ -27,12 +27,12 @@ class AiqichaDetailCrawler:
|
|||||||
else:
|
else:
|
||||||
print("已加载Cookie")
|
print("已加载Cookie")
|
||||||
|
|
||||||
# 使用登录管理器检测登录状态
|
# # 使用登录管理器检测登录状态
|
||||||
logined = self.login_manager.check_and_login()
|
# logined = self.login_manager.check_and_login()
|
||||||
if logined:
|
# if logined:
|
||||||
print("登录成功")
|
# print("登录成功")
|
||||||
else:
|
# else:
|
||||||
print("登录失败")
|
# print("登录失败")
|
||||||
self.browser_started = True
|
self.browser_started = True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"启动浏览器失败: {e}")
|
print(f"启动浏览器失败: {e}")
|
||||||
@@ -80,12 +80,39 @@ class AiqichaDetailCrawler:
|
|||||||
print("等待页面关键元素加载...")
|
print("等待页面关键元素加载...")
|
||||||
try:
|
try:
|
||||||
# 等待关键元素加载,增加超时时间
|
# 等待关键元素加载,增加超时时间
|
||||||
self.browser.page.wait_for_selector('.addr-enter-bg-ele', timeout=15000)
|
try:
|
||||||
|
self.browser.page.wait_for_selector('.addr-enter-bg-ele', timeout=10000)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"等待页面元素时出错: {e}")
|
||||||
|
# self.browser.page.wait_for_selector('.addr-enter-bg-ele', timeout=10000)
|
||||||
print("关键元素已加载")
|
print("关键元素已加载")
|
||||||
|
|
||||||
|
# 使用登录管理器检测登录状态
|
||||||
|
logined = self.login_manager.check_and_login()
|
||||||
|
if logined:
|
||||||
|
print("登录成功")
|
||||||
|
else:
|
||||||
|
print("登录失败")
|
||||||
|
|
||||||
|
# 关闭指定的弹窗 (aiqicha-datail.html 792-793 行对应的元素)
|
||||||
|
try:
|
||||||
|
# 查找并点击关闭按钮
|
||||||
|
svip_modals = self.browser.page.query_selector_all('.clue-card-wrap')
|
||||||
|
for modal in svip_modals:
|
||||||
|
if modal:
|
||||||
|
# 查找.ivu-modal-close关闭按钮
|
||||||
|
close_button = modal.query_selector('a.ivu-modal-close')
|
||||||
|
if close_button:
|
||||||
|
close_button.click()
|
||||||
|
print("已关闭SVIP弹窗")
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
print(f"关闭弹窗时出错: {e}")
|
||||||
|
|
||||||
# 额外等待一段时间确保页面完全加载
|
# 额外等待一段时间确保页面完全加载
|
||||||
import time
|
import time
|
||||||
time.sleep(10)
|
time.sleep(3)
|
||||||
|
|
||||||
print("额外等待完成,页面应该已完全加载")
|
print("额外等待完成,页面应该已完全加载")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"等待页面元素时出错: {e}")
|
print(f"等待页面元素时出错: {e}")
|
||||||
|
|||||||
Reference in New Issue
Block a user