aiqicha
This commit is contained in:
86
company/qcc_crawler.py
Normal file
86
company/qcc_crawler.py
Normal file
@@ -0,0 +1,86 @@
|
||||
from playwright.sync_api import sync_playwright
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
|
||||
COOKIE_FILE = "qcc_cookies.json"
|
||||
|
||||
|
||||
def ensure_cookie_file():
|
||||
if not os.path.exists(COOKIE_FILE):
|
||||
with open(COOKIE_FILE, 'w') as f:
|
||||
json.dump([], f)
|
||||
print(f"已创建新的cookie文件: {COOKIE_FILE}")
|
||||
|
||||
|
||||
def save_cookies(context):
|
||||
cookies = context.cookies()
|
||||
with open(COOKIE_FILE, 'w') as f:
|
||||
json.dump(cookies, f, indent=2)
|
||||
print(f"Cookies已保存到 {COOKIE_FILE}")
|
||||
|
||||
|
||||
def load_cookies(context):
|
||||
try:
|
||||
with open(COOKIE_FILE, 'r') as f:
|
||||
cookies = json.load(f)
|
||||
if cookies:
|
||||
context.add_cookies(cookies)
|
||||
return True
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"加载cookies失败: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
def qcc_login(page):
|
||||
page.goto("https://www.qcc.com")
|
||||
page.wait_for_selector(".login-container", timeout=5000)
|
||||
page.click("text=扫码登录")
|
||||
|
||||
print("请扫描页面二维码登录...")
|
||||
page.wait_for_url("**/usercenter**", timeout=120000)
|
||||
print("登录成功!")
|
||||
|
||||
|
||||
def search_company(page, company_name):
|
||||
page.goto(f"https://www.qcc.com/web/search?key={company_name}")
|
||||
page.wait_for_selector(".search-result-item", timeout=10000)
|
||||
|
||||
# 示例数据提取
|
||||
company_info = {
|
||||
"name": page.locator(".company-name").first.inner_text(),
|
||||
"legal_rep": page.locator(".legal-person").first.inner_text(),
|
||||
"status": page.locator(".company-status").first.inner_text()
|
||||
}
|
||||
return company_info
|
||||
|
||||
|
||||
def main():
|
||||
ensure_cookie_file()
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=False)
|
||||
context = browser.new_context()
|
||||
page = context.new_page()
|
||||
|
||||
if not load_cookies(context):
|
||||
qcc_login(page)
|
||||
save_cookies(context)
|
||||
|
||||
companies = ["阿里巴巴", "腾讯科技", "华为技术"]
|
||||
for company in companies:
|
||||
try:
|
||||
info = search_company(page, company)
|
||||
print(f"{info['name']} | 法人:{info['legal_rep']} | 状态:{info['status']}")
|
||||
save_cookies(context) # 每次操作后更新cookies
|
||||
time.sleep(3)
|
||||
except Exception as e:
|
||||
print(f"查询 {company} 失败: {str(e)}")
|
||||
|
||||
context.close()
|
||||
browser.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user