修改aiqicha login逻辑, cookie文件位置

This commit is contained in:
manchuwork
2025-10-03 07:45:21 +08:00
parent ecf17dbf1d
commit dd368bb645
7 changed files with 59 additions and 17 deletions

25
.gitignore vendored Normal file
View File

@@ -0,0 +1,25 @@
.gitignore
.git/
.idea/
.vscode/
.DS_Store
.env
.env.local
.env.development.local
.env.test.local
.env.production.local
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
node_modules/
dist/
build/
.next/
.out/
.serverless/
cdk.out/
.vscode-test/
cookies/
data/

1
cookies/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
*

View File

@@ -346,7 +346,7 @@ if __name__ == '__main__':
filter_list_with_not_match = filter_aiqicha_qcc(data_list, company_name, with_not_match=True) filter_list_with_not_match = filter_aiqicha_qcc(data_list, company_name, with_not_match=True)
# 创建CSV工具实例 # 创建CSV工具实例
csv_tool = CSVTool( csv_tool = CSVTool(
csv_file_name='company_search_filter_is_none_data.csv', csv_file_name='data/company_search_filter_is_none_data.csv',
headers=['company_name','title', 'web_site_type','url', 'request_url', 'create_time'] headers=['company_name','title', 'web_site_type','url', 'request_url', 'create_time']
) )

1
data/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
*

View File

@@ -7,7 +7,7 @@ from tool.aiqicha_login import AiqichaLoginManager # 导入登录管理器
from tool.aiqicha_detail_parser import AiqichaDetailParser # 导入解析器 from tool.aiqicha_detail_parser import AiqichaDetailParser # 导入解析器
class AiqichaDetailCrawler: class AiqichaDetailCrawler:
def __init__(self, cookie_path="aiqicha_cookies.json"): def __init__(self, cookie_path="cookies/aiqicha_cookies.json"):
self.browser = WebBrowser(cookie_path) self.browser = WebBrowser(cookie_path)
self.browser_started = False self.browser_started = False
self.login_manager = None # 添加登录管理器实例 self.login_manager = None # 添加登录管理器实例
@@ -28,11 +28,11 @@ class AiqichaDetailCrawler:
print("已加载Cookie") print("已加载Cookie")
# 使用登录管理器检测登录状态 # 使用登录管理器检测登录状态
# logined = self.login_manager.check_and_login() logined = self.login_manager.check_and_login()
# if logined: if logined:
# print("登录成功") print("登录成功")
# else: else:
# print("登录失败") print("登录失败")
self.browser_started = True self.browser_started = True
except Exception as e: except Exception as e:
print(f"启动浏览器失败: {e}") print(f"启动浏览器失败: {e}")

View File

@@ -16,17 +16,32 @@ class AiqichaLoginManager:
else: else:
print("已加载Cookie验证登录状态...") print("已加载Cookie验证登录状态...")
# 加载cookie后访问页面验证是否真正登录 # 加载cookie后访问页面验证是否真正登录
self.browser.page.goto("https://aiqicha.baidu.com") # 判断当前的url地址 如果是 https://aiqicha.baidu.com/company_detail_* 地址
# 等待页面加载完成 if "company_detail" in self.browser.page.url:
self.browser.page.wait_for_load_state("networkidle") # 等待页面加载完成
self.browser.page.wait_for_load_state("networkidle")
# 验证登录状态
if not self.check_login_status():
print("Cookie已过期或无效重新登录...")
return self.login()
else:
print("Cookie有效已登录")
return True
# 验证登录状态
if not self.check_login_status():
print("Cookie已过期或无效重新登录...")
return self.login()
else: else:
print("Cookie有效已登录") print("未登录或已过期,开始登录流程...")
return True self.browser.page.goto("https://aiqicha.baidu.com")
# 等待页面加载完成
self.browser.page.wait_for_load_state("networkidle")
# 验证登录状态
if not self.check_login_status():
print("Cookie已过期或无效重新登录...")
return self.login()
else:
print("Cookie有效已登录")
return True
def check_login_status(self): def check_login_status(self):
"""检测登录状态返回True表示已登录""" """检测登录状态返回True表示已登录"""

View File

@@ -6,7 +6,7 @@ from tool.web_browser import WebBrowser
class BingSearcher: class BingSearcher:
def __init__(self, cookie_path="bing_cookies.json"): def __init__(self, cookie_path="cookies/bing_cookies.json"):
self.browser = WebBrowser(cookie_path) self.browser = WebBrowser(cookie_path)
self.browser_started = False self.browser_started = False