修改aiqicha login逻辑, cookie文件位置
This commit is contained in:
25
.gitignore
vendored
Normal file
25
.gitignore
vendored
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
|
||||||
|
.gitignore
|
||||||
|
.git/
|
||||||
|
.idea/
|
||||||
|
.vscode/
|
||||||
|
.DS_Store
|
||||||
|
.env
|
||||||
|
.env.local
|
||||||
|
.env.development.local
|
||||||
|
.env.test.local
|
||||||
|
.env.production.local
|
||||||
|
npm-debug.log*
|
||||||
|
yarn-debug.log*
|
||||||
|
yarn-error.log*
|
||||||
|
pnpm-debug.log*
|
||||||
|
node_modules/
|
||||||
|
dist/
|
||||||
|
build/
|
||||||
|
.next/
|
||||||
|
.out/
|
||||||
|
.serverless/
|
||||||
|
cdk.out/
|
||||||
|
.vscode-test/
|
||||||
|
cookies/
|
||||||
|
data/
|
||||||
1
cookies/.gitignore
vendored
Normal file
1
cookies/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
*
|
||||||
@@ -346,7 +346,7 @@ if __name__ == '__main__':
|
|||||||
filter_list_with_not_match = filter_aiqicha_qcc(data_list, company_name, with_not_match=True)
|
filter_list_with_not_match = filter_aiqicha_qcc(data_list, company_name, with_not_match=True)
|
||||||
# 创建CSV工具实例
|
# 创建CSV工具实例
|
||||||
csv_tool = CSVTool(
|
csv_tool = CSVTool(
|
||||||
csv_file_name='company_search_filter_is_none_data.csv',
|
csv_file_name='data/company_search_filter_is_none_data.csv',
|
||||||
headers=['company_name','title', 'web_site_type','url', 'request_url', 'create_time']
|
headers=['company_name','title', 'web_site_type','url', 'request_url', 'create_time']
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
1
data/.gitignore
vendored
Normal file
1
data/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
*
|
||||||
@@ -7,7 +7,7 @@ from tool.aiqicha_login import AiqichaLoginManager # 导入登录管理器
|
|||||||
from tool.aiqicha_detail_parser import AiqichaDetailParser # 导入解析器
|
from tool.aiqicha_detail_parser import AiqichaDetailParser # 导入解析器
|
||||||
|
|
||||||
class AiqichaDetailCrawler:
|
class AiqichaDetailCrawler:
|
||||||
def __init__(self, cookie_path="aiqicha_cookies.json"):
|
def __init__(self, cookie_path="cookies/aiqicha_cookies.json"):
|
||||||
self.browser = WebBrowser(cookie_path)
|
self.browser = WebBrowser(cookie_path)
|
||||||
self.browser_started = False
|
self.browser_started = False
|
||||||
self.login_manager = None # 添加登录管理器实例
|
self.login_manager = None # 添加登录管理器实例
|
||||||
@@ -28,11 +28,11 @@ class AiqichaDetailCrawler:
|
|||||||
print("已加载Cookie")
|
print("已加载Cookie")
|
||||||
|
|
||||||
# 使用登录管理器检测登录状态
|
# 使用登录管理器检测登录状态
|
||||||
# logined = self.login_manager.check_and_login()
|
logined = self.login_manager.check_and_login()
|
||||||
# if logined:
|
if logined:
|
||||||
# print("登录成功")
|
print("登录成功")
|
||||||
# else:
|
else:
|
||||||
# print("登录失败")
|
print("登录失败")
|
||||||
self.browser_started = True
|
self.browser_started = True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"启动浏览器失败: {e}")
|
print(f"启动浏览器失败: {e}")
|
||||||
|
|||||||
@@ -16,17 +16,32 @@ class AiqichaLoginManager:
|
|||||||
else:
|
else:
|
||||||
print("已加载Cookie,验证登录状态...")
|
print("已加载Cookie,验证登录状态...")
|
||||||
# 加载cookie后访问页面验证是否真正登录
|
# 加载cookie后访问页面验证是否真正登录
|
||||||
self.browser.page.goto("https://aiqicha.baidu.com")
|
# 判断当前的url地址 如果是 https://aiqicha.baidu.com/company_detail_* 地址
|
||||||
# 等待页面加载完成
|
if "company_detail" in self.browser.page.url:
|
||||||
self.browser.page.wait_for_load_state("networkidle")
|
# 等待页面加载完成
|
||||||
|
self.browser.page.wait_for_load_state("networkidle")
|
||||||
|
|
||||||
|
# 验证登录状态
|
||||||
|
if not self.check_login_status():
|
||||||
|
print("Cookie已过期或无效,重新登录...")
|
||||||
|
return self.login()
|
||||||
|
else:
|
||||||
|
print("Cookie有效,已登录")
|
||||||
|
return True
|
||||||
|
|
||||||
# 验证登录状态
|
|
||||||
if not self.check_login_status():
|
|
||||||
print("Cookie已过期或无效,重新登录...")
|
|
||||||
return self.login()
|
|
||||||
else:
|
else:
|
||||||
print("Cookie有效,已登录")
|
print("未登录或已过期,开始登录流程...")
|
||||||
return True
|
self.browser.page.goto("https://aiqicha.baidu.com")
|
||||||
|
# 等待页面加载完成
|
||||||
|
self.browser.page.wait_for_load_state("networkidle")
|
||||||
|
|
||||||
|
# 验证登录状态
|
||||||
|
if not self.check_login_status():
|
||||||
|
print("Cookie已过期或无效,重新登录...")
|
||||||
|
return self.login()
|
||||||
|
else:
|
||||||
|
print("Cookie有效,已登录")
|
||||||
|
return True
|
||||||
|
|
||||||
def check_login_status(self):
|
def check_login_status(self):
|
||||||
"""检测登录状态,返回True表示已登录"""
|
"""检测登录状态,返回True表示已登录"""
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ from tool.web_browser import WebBrowser
|
|||||||
|
|
||||||
|
|
||||||
class BingSearcher:
|
class BingSearcher:
|
||||||
def __init__(self, cookie_path="bing_cookies.json"):
|
def __init__(self, cookie_path="cookies/bing_cookies.json"):
|
||||||
self.browser = WebBrowser(cookie_path)
|
self.browser = WebBrowser(cookie_path)
|
||||||
self.browser_started = False
|
self.browser_started = False
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user