修改aiqicha login逻辑, cookie文件位置
This commit is contained in:
25
.gitignore
vendored
Normal file
25
.gitignore
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
|
||||
.gitignore
|
||||
.git/
|
||||
.idea/
|
||||
.vscode/
|
||||
.DS_Store
|
||||
.env
|
||||
.env.local
|
||||
.env.development.local
|
||||
.env.test.local
|
||||
.env.production.local
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
pnpm-debug.log*
|
||||
node_modules/
|
||||
dist/
|
||||
build/
|
||||
.next/
|
||||
.out/
|
||||
.serverless/
|
||||
cdk.out/
|
||||
.vscode-test/
|
||||
cookies/
|
||||
data/
|
||||
1
cookies/.gitignore
vendored
Normal file
1
cookies/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
*
|
||||
@@ -346,7 +346,7 @@ if __name__ == '__main__':
|
||||
filter_list_with_not_match = filter_aiqicha_qcc(data_list, company_name, with_not_match=True)
|
||||
# 创建CSV工具实例
|
||||
csv_tool = CSVTool(
|
||||
csv_file_name='company_search_filter_is_none_data.csv',
|
||||
csv_file_name='data/company_search_filter_is_none_data.csv',
|
||||
headers=['company_name','title', 'web_site_type','url', 'request_url', 'create_time']
|
||||
)
|
||||
|
||||
|
||||
1
data/.gitignore
vendored
Normal file
1
data/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
*
|
||||
@@ -7,7 +7,7 @@ from tool.aiqicha_login import AiqichaLoginManager # 导入登录管理器
|
||||
from tool.aiqicha_detail_parser import AiqichaDetailParser # 导入解析器
|
||||
|
||||
class AiqichaDetailCrawler:
|
||||
def __init__(self, cookie_path="aiqicha_cookies.json"):
|
||||
def __init__(self, cookie_path="cookies/aiqicha_cookies.json"):
|
||||
self.browser = WebBrowser(cookie_path)
|
||||
self.browser_started = False
|
||||
self.login_manager = None # 添加登录管理器实例
|
||||
@@ -28,11 +28,11 @@ class AiqichaDetailCrawler:
|
||||
print("已加载Cookie")
|
||||
|
||||
# 使用登录管理器检测登录状态
|
||||
# logined = self.login_manager.check_and_login()
|
||||
# if logined:
|
||||
# print("登录成功")
|
||||
# else:
|
||||
# print("登录失败")
|
||||
logined = self.login_manager.check_and_login()
|
||||
if logined:
|
||||
print("登录成功")
|
||||
else:
|
||||
print("登录失败")
|
||||
self.browser_started = True
|
||||
except Exception as e:
|
||||
print(f"启动浏览器失败: {e}")
|
||||
|
||||
@@ -16,6 +16,21 @@ class AiqichaLoginManager:
|
||||
else:
|
||||
print("已加载Cookie,验证登录状态...")
|
||||
# 加载cookie后访问页面验证是否真正登录
|
||||
# 判断当前的url地址 如果是 https://aiqicha.baidu.com/company_detail_* 地址
|
||||
if "company_detail" in self.browser.page.url:
|
||||
# 等待页面加载完成
|
||||
self.browser.page.wait_for_load_state("networkidle")
|
||||
|
||||
# 验证登录状态
|
||||
if not self.check_login_status():
|
||||
print("Cookie已过期或无效,重新登录...")
|
||||
return self.login()
|
||||
else:
|
||||
print("Cookie有效,已登录")
|
||||
return True
|
||||
|
||||
else:
|
||||
print("未登录或已过期,开始登录流程...")
|
||||
self.browser.page.goto("https://aiqicha.baidu.com")
|
||||
# 等待页面加载完成
|
||||
self.browser.page.wait_for_load_state("networkidle")
|
||||
|
||||
@@ -6,7 +6,7 @@ from tool.web_browser import WebBrowser
|
||||
|
||||
|
||||
class BingSearcher:
|
||||
def __init__(self, cookie_path="bing_cookies.json"):
|
||||
def __init__(self, cookie_path="cookies/bing_cookies.json"):
|
||||
self.browser = WebBrowser(cookie_path)
|
||||
self.browser_started = False
|
||||
|
||||
|
||||
Reference in New Issue
Block a user