cookies
This commit is contained in:
3
.idea/.gitignore
generated
vendored
Normal file
3
.idea/.gitignore
generated
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
7
.idea/MarsCodeWorkspaceAppSettings.xml
generated
Normal file
7
.idea/MarsCodeWorkspaceAppSettings.xml
generated
Normal file
@@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="com.codeverse.userSettings.MarscodeWorkspaceAppSettingsState">
|
||||
<option name="ckgOperationStatus" value="SUCCESS" />
|
||||
<option name="progress" value="1.0" />
|
||||
</component>
|
||||
</project>
|
||||
10
.idea/SearchCompany.iml
generated
Normal file
10
.idea/SearchCompany.iml
generated
Normal file
@@ -0,0 +1,10 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<excludeFolder url="file://$MODULE_DIR$/.venv" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Python 3.13 (SearchCompany)" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
||||
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
||||
6
.idea/misc.xml
generated
Normal file
6
.idea/misc.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.13 (SearchCompany)" />
|
||||
</component>
|
||||
</project>
|
||||
8
.idea/modules.xml
generated
Normal file
8
.idea/modules.xml
generated
Normal file
@@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/SearchCompany.iml" filepath="$PROJECT_DIR$/.idea/SearchCompany.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
||||
6
.idea/vcs.xml
generated
Normal file
6
.idea/vcs.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
||||
BIN
OPPO广东移动通信有限公司-OPPO广东移动通信有限公司.xlsx
Normal file
BIN
OPPO广东移动通信有限公司-OPPO广东移动通信有限公司.xlsx
Normal file
Binary file not shown.
BIN
__pycache__/config.cpython-313.pyc
Normal file
BIN
__pycache__/config.cpython-313.pyc
Normal file
Binary file not shown.
0
company/aiqicha.py
Normal file
0
company/aiqicha.py
Normal file
344
company/qcc.py
Normal file
344
company/qcc.py
Normal file
@@ -0,0 +1,344 @@
|
||||
# qcc.py
|
||||
import json
|
||||
import re
|
||||
import os
|
||||
from playwright.sync_api import sync_playwright
|
||||
import argparse
|
||||
|
||||
|
||||
def clean_text(text):
|
||||
"""
|
||||
清理文本内容,去除多余空白字符
|
||||
"""
|
||||
if not text:
|
||||
return ""
|
||||
# 替换多个空白字符为单个空格,并去除首尾空白
|
||||
return re.sub(r'\s+', ' ', text.replace('\r', '').replace('\n', '').replace('\t', '')).strip()
|
||||
|
||||
|
||||
class QCCParser:
|
||||
def __init__(self, page):
|
||||
self.page = page
|
||||
self.company_data = {}
|
||||
|
||||
def init_table(self):
|
||||
"""
|
||||
初始化表格元素
|
||||
"""
|
||||
# 等待页面加载完成
|
||||
self.page.wait_for_load_state('networkidle')
|
||||
|
||||
# 查找企业信息容器
|
||||
cominfo_normal = self.page.query_selector("div.cominfo-normal")
|
||||
if not cominfo_normal:
|
||||
print("未找到企业信息容器")
|
||||
return False
|
||||
|
||||
# 查找企业信息表格
|
||||
self.table = cominfo_normal.query_selector("table.ntable")
|
||||
if not self.table:
|
||||
print("未找到企业信息表格")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def get_optimized_value(self, title):
|
||||
"""
|
||||
根据标题获取对应的值
|
||||
"""
|
||||
# 查找所有表头单元格
|
||||
header_cells = self.table.query_selector_all("td.tb")
|
||||
value = None
|
||||
|
||||
for header in header_cells:
|
||||
if clean_text(header.text_content()).find(title) != -1:
|
||||
value_cell = header.query_selector("+ td")
|
||||
if value_cell:
|
||||
# 尝试从copy-value类中获取值
|
||||
copy_value = value_cell.query_selector(".copy-value")
|
||||
if copy_value:
|
||||
value = clean_text(copy_value.text_content())
|
||||
else:
|
||||
value = clean_text(value_cell.text_content())
|
||||
break
|
||||
|
||||
return value
|
||||
|
||||
def get_legal_representative(self):
|
||||
"""
|
||||
获取法定代表人
|
||||
"""
|
||||
# 尝试基本方法获取
|
||||
basic_value = self.get_optimized_value("法定代表人") or self.get_optimized_value("法人")
|
||||
if basic_value and basic_value.strip():
|
||||
# 移除"关联企业 X"等附加信息
|
||||
return re.sub(r'\s*关联企业\s*\d+$', '', basic_value).strip()
|
||||
|
||||
# 查找法定代表人表头
|
||||
header_cell = None
|
||||
for cell in self.table.query_selector_all("td.tb"):
|
||||
if clean_text(cell.text_content()).find("法定代表人") != -1:
|
||||
header_cell = cell
|
||||
break
|
||||
|
||||
if not header_cell:
|
||||
return None
|
||||
|
||||
value_cell = header_cell.query_selector("+ td")
|
||||
if not value_cell:
|
||||
return None
|
||||
|
||||
# 查找包含法定代表人姓名的链接
|
||||
name_links = value_cell.query_selector_all('a[target="_blank"]')
|
||||
for link in name_links:
|
||||
name = clean_text(link.text_content())
|
||||
# 确保不是空值且不包含非姓名文本
|
||||
if name and "关联企业" not in name and "复制" not in name:
|
||||
return name
|
||||
|
||||
# 备选方案:查找第一个链接
|
||||
first_link = value_cell.query_selector("a")
|
||||
if first_link:
|
||||
name = clean_text(first_link.text_content())
|
||||
return re.sub(r'\s*关联企业\s*\d+$', '', name).strip()
|
||||
|
||||
# 最后备选方案:直接提取文本
|
||||
copy_value = value_cell.query_selector(".copy-value")
|
||||
if copy_value:
|
||||
name = clean_text(copy_value.text_content())
|
||||
return re.sub(r'\s*关联企业\s*\d+$', '', name).strip()
|
||||
|
||||
raw_text = clean_text(value_cell.text_content())
|
||||
return re.sub(r'\s*关联企业\s*\d+$', '', raw_text).strip()
|
||||
|
||||
def get_unified_social_credit_code(self):
|
||||
"""
|
||||
获取统一社会信用代码
|
||||
"""
|
||||
return (self.get_optimized_value("统一社会信用代码") or
|
||||
self.get_optimized_value("信用代码"))
|
||||
|
||||
def get_business_registration_no(self):
|
||||
"""
|
||||
获取工商注册号
|
||||
"""
|
||||
return (self.get_optimized_value("工商注册号") or
|
||||
self.get_optimized_value("注册号"))
|
||||
|
||||
def get_organization_code(self):
|
||||
"""
|
||||
获取组织机构代码
|
||||
"""
|
||||
return self.get_optimized_value("组织机构代码")
|
||||
|
||||
def get_taxpayer_id(self):
|
||||
"""
|
||||
获取纳税人识别号
|
||||
"""
|
||||
return (self.get_optimized_value("纳税人识别号") or
|
||||
self.get_unified_social_credit_code())
|
||||
|
||||
def get_insurance_number(self):
|
||||
"""
|
||||
获取参保人数
|
||||
"""
|
||||
# 查找参保人数表头
|
||||
header_cell = None
|
||||
for cell in self.table.query_selector_all("td.tb"):
|
||||
if clean_text(cell.text_content()).find("参保人数") != -1:
|
||||
header_cell = cell
|
||||
break
|
||||
|
||||
if not header_cell:
|
||||
return None
|
||||
|
||||
value_cell = header_cell.query_selector("+ td")
|
||||
if not value_cell:
|
||||
return None
|
||||
|
||||
# 提取参保人数数字
|
||||
number_span = value_cell.query_selector("span")
|
||||
number = clean_text(number_span.text_content()) if number_span else None
|
||||
|
||||
# 提取年报年份
|
||||
report_link = value_cell.query_selector("a.m-l-r-10")
|
||||
report_year = clean_text(report_link.text_content()) if report_link else ""
|
||||
|
||||
# 组合结果
|
||||
return f"{number}人 {report_year}" if number else None
|
||||
|
||||
def get_phone_number(self):
|
||||
"""
|
||||
获取联系电话
|
||||
"""
|
||||
# 查找联系信息容器
|
||||
contact_info = self.page.query_selector("div.contact-info")
|
||||
if not contact_info:
|
||||
return None
|
||||
|
||||
# 查找右侧信息区域
|
||||
right_part = contact_info.query_selector("div.main-part-item.right")
|
||||
if not right_part:
|
||||
return None
|
||||
|
||||
# 查找包含电话的行
|
||||
rows = right_part.query_selector_all("div.rline")
|
||||
phone_row = None
|
||||
for row in rows:
|
||||
if clean_text(row.text_content()).find("电话:") != -1:
|
||||
phone_row = row
|
||||
break
|
||||
|
||||
if not phone_row:
|
||||
return None
|
||||
|
||||
# 提取电话号码
|
||||
spans = phone_row.query_selector_all("span.need-copy-field")
|
||||
phone_span = None
|
||||
for span in spans:
|
||||
if clean_text(span.text_content()).find("电话:") == -1:
|
||||
phone_span = span
|
||||
break
|
||||
|
||||
return clean_text(phone_span.text_content()) if phone_span else None
|
||||
|
||||
def get_approval_date(self):
|
||||
"""
|
||||
获取核准日期
|
||||
"""
|
||||
return (self.get_optimized_value("核准日期") or
|
||||
self.get_optimized_value("成立日期"))
|
||||
|
||||
def parse_company_info(self):
|
||||
"""
|
||||
解析公司信息主方法
|
||||
"""
|
||||
if not self.init_table():
|
||||
return None
|
||||
|
||||
self.company_data = {
|
||||
"企业名称": (self.get_optimized_value("企业名称") or
|
||||
self.get_optimized_value("公司名称")),
|
||||
"统一社会信用代码": self.get_unified_social_credit_code(),
|
||||
"法定代表人": self.get_legal_representative(),
|
||||
"经营状态": self.get_optimized_value("登记状态"),
|
||||
"成立日期": self.get_optimized_value("成立日期"),
|
||||
"行政区划": self.get_optimized_value("行政区划"),
|
||||
"注册资本": self.get_optimized_value("注册资本"),
|
||||
"实缴资本": self.get_optimized_value("实缴资本"),
|
||||
"企业类型": self.get_optimized_value("企业类型"),
|
||||
"所属行业": self.get_optimized_value("国标行业"),
|
||||
"工商注册号": self.get_business_registration_no(),
|
||||
"组织机构代码": self.get_organization_code(),
|
||||
"纳税人识别号": self.get_taxpayer_id(),
|
||||
"纳税人资质": self.get_optimized_value("纳税人资质"),
|
||||
"营业期限": self.get_optimized_value("营业期限"),
|
||||
"核准日期": self.get_approval_date(),
|
||||
"参保人数": self.get_insurance_number(),
|
||||
"电话": self.get_phone_number(),
|
||||
"登记机关": self.get_optimized_value("登记机关"),
|
||||
"曾用名": self.get_optimized_value("曾用名"),
|
||||
"注册地址": self.get_optimized_value("注册地址"),
|
||||
"经营范围": self.get_optimized_value("经营范围"),
|
||||
}
|
||||
|
||||
return self.company_data
|
||||
|
||||
|
||||
def load_cookies(context, cookie_file):
|
||||
"""
|
||||
从文件加载cookies
|
||||
"""
|
||||
if os.path.exists(cookie_file):
|
||||
with open(cookie_file, 'r') as f:
|
||||
cookies = json.load(f)
|
||||
context.add_cookies(cookies)
|
||||
print("已加载本地cookies")
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def save_cookies(context, cookie_file):
|
||||
"""
|
||||
保存cookies到文件
|
||||
"""
|
||||
cookies = context.cookies()
|
||||
with open(cookie_file, 'w') as f:
|
||||
json.dump(cookies, f)
|
||||
print("已保存cookies到文件")
|
||||
|
||||
|
||||
def wait_for_login(page, cookie_file):
|
||||
"""
|
||||
等待用户扫码登录
|
||||
"""
|
||||
print("检测到需要登录,请使用手机扫码登录...")
|
||||
print("登录成功后将自动跳转到目标页面")
|
||||
|
||||
# 等待页面跳转到非登录页面
|
||||
page.wait_for_url("**/weblogin", timeout=3000)
|
||||
page.wait_for_url(lambda url: "weblogin" not in url, timeout=120000)
|
||||
|
||||
# 保存登录后的cookies
|
||||
save_cookies(page.context, cookie_file)
|
||||
print("登录成功,已保存cookies")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='解析企查查公司信息')
|
||||
parser.add_argument('url', help='企查查公司页面URL')
|
||||
parser.add_argument('--headless', action='store_true', help='无头模式运行')
|
||||
parser.add_argument('--cookie-file', default='qcc_cookies.txt', help='cookies文件路径')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
with sync_playwright() as p:
|
||||
# 启动浏览器
|
||||
browser = p.chromium.launch(headless=args.headless)
|
||||
context = browser.new_context()
|
||||
page = context.new_page()
|
||||
|
||||
try:
|
||||
# 尝试加载本地保存的cookies
|
||||
if load_cookies(context, args.cookie_file):
|
||||
print("使用已保存的登录信息")
|
||||
|
||||
# 访问指定URL
|
||||
page.goto(args.url)
|
||||
|
||||
# 检查是否跳转到了登录页面
|
||||
if "weblogin" in page.url:
|
||||
wait_for_login(page, args.cookie_file)
|
||||
else:
|
||||
print("已登录或无需登录")
|
||||
|
||||
# 重新访问目标URL(确保页面正确加载)
|
||||
page.goto(args.url)
|
||||
|
||||
# 创建解析器并解析信息
|
||||
parser = QCCParser(page)
|
||||
company_info = parser.parse_company_info()
|
||||
|
||||
if company_info:
|
||||
# 格式化输出JSON
|
||||
print(json.dumps(company_info, ensure_ascii=False, indent=2))
|
||||
else:
|
||||
print("未能获取公司信息")
|
||||
|
||||
except Exception as e:
|
||||
print(f"发生错误: {e}")
|
||||
finally:
|
||||
browser.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
# python qcc.py "https://www.qcc.com/firm/50b0e3189f2eb2b20304b255669ce1a1.html"
|
||||
# # 首次运行需要扫码登录
|
||||
# python qcc.py "https://www.qcc.com/firm/公司URL"
|
||||
#
|
||||
# # 后续运行将自动使用已保存的登录信息
|
||||
# python qcc.py "https://www.qcc.com/firm/公司URL"
|
||||
#
|
||||
# # 指定自定义cookies文件
|
||||
# python qcc.py --cookie-file my_cookies.txt "https://www.qcc.com/firm/公司URL"
|
||||
831
company/youhou.js
Normal file
831
company/youhou.js
Normal file
@@ -0,0 +1,831 @@
|
||||
// ==UserScript==
|
||||
// @name 爱企查&企查查等
|
||||
// @namespace http://tampermonkey.net/
|
||||
// @version 0.2
|
||||
// @description 在页面右下角添加工具按钮,支持复制源码和解析公司信息
|
||||
// @author You
|
||||
// @match https://www.qcc.com/firm/*
|
||||
// @match https://aiqicha.baidu.com/company_detail_*
|
||||
// @grant none
|
||||
// ==/UserScript==
|
||||
|
||||
(function () {
|
||||
"use strict";
|
||||
|
||||
// 工具类 - 存放通用函数
|
||||
class ToolUtils {
|
||||
static cleanText(text) {
|
||||
return text
|
||||
.replace(/\s+/g, " ")
|
||||
.replace(/[\r\n\t]/g, "")
|
||||
.trim();
|
||||
}
|
||||
|
||||
static extractText(doc, selectors) {
|
||||
for (const selector of selectors) {
|
||||
const element = doc.querySelector(selector);
|
||||
if (element && element.textContent.trim()) {
|
||||
return element.textContent.trim();
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
static copyToClipboard(content, successMessage) {
|
||||
const textarea = document.createElement("textarea");
|
||||
textarea.value = content;
|
||||
textarea.style.position = "fixed";
|
||||
textarea.style.top = "0";
|
||||
textarea.style.left = "0";
|
||||
textarea.style.width = "1px";
|
||||
textarea.style.height = "1px";
|
||||
textarea.style.opacity = "0";
|
||||
|
||||
document.body.appendChild(textarea);
|
||||
textarea.select();
|
||||
document.execCommand("copy");
|
||||
document.body.removeChild(textarea);
|
||||
|
||||
if (successMessage) {
|
||||
this.showAutoCloseMessage(successMessage, "success");
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
static showAutoCloseMessage(message, type = "info") {
|
||||
// 创建一个自动关闭的提示框替代 alert
|
||||
const alertBox = document.createElement("div");
|
||||
alertBox.textContent = message;
|
||||
alertBox.style.position = "fixed";
|
||||
alertBox.style.top = "50%";
|
||||
alertBox.style.left = "50%";
|
||||
alertBox.style.transform = "translate(-50%, -50%)";
|
||||
|
||||
// 根据消息类型设置不同颜色
|
||||
if (type === "success") {
|
||||
alertBox.style.backgroundColor = "#52c41a";
|
||||
} else if (type === "error") {
|
||||
alertBox.style.backgroundColor = "#f5222d";
|
||||
} else {
|
||||
alertBox.style.backgroundColor = "#1890ff";
|
||||
}
|
||||
|
||||
alertBox.style.color = "white";
|
||||
alertBox.style.padding = "10px 20px";
|
||||
alertBox.style.borderRadius = "4px";
|
||||
alertBox.style.zIndex = "10001";
|
||||
alertBox.style.boxShadow = "0 2px 8px rgba(0,0,0,0.15)";
|
||||
alertBox.style.transition = "opacity 0.3s";
|
||||
|
||||
document.body.appendChild(alertBox);
|
||||
|
||||
// 2秒后自动关闭
|
||||
setTimeout(() => {
|
||||
if (document.body.contains(alertBox)) {
|
||||
// 添加淡出效果
|
||||
alertBox.style.opacity = "0";
|
||||
// 真正移除元素
|
||||
setTimeout(() => {
|
||||
if (document.body.contains(alertBox)) {
|
||||
document.body.removeChild(alertBox);
|
||||
}
|
||||
}, 300);
|
||||
}
|
||||
}, 2000);
|
||||
}
|
||||
|
||||
static showResult(data) {
|
||||
const modal = document.createElement("div");
|
||||
modal.style.position = "fixed";
|
||||
modal.style.top = "50%";
|
||||
modal.style.left = "50%";
|
||||
modal.style.transform = "translate(-50%, -50%)";
|
||||
modal.style.width = "600px";
|
||||
modal.style.maxHeight = "80vh";
|
||||
modal.style.overflowY = "auto";
|
||||
modal.style.backgroundColor = "white";
|
||||
modal.style.padding = "20px";
|
||||
modal.style.boxShadow = "0 0 10px rgba(0,0,0,0.3)";
|
||||
modal.style.zIndex = "10000";
|
||||
|
||||
const pre = document.createElement("pre");
|
||||
pre.textContent = JSON.stringify(data, null, 2);
|
||||
pre.style.whiteSpace = "pre-wrap";
|
||||
pre.style.wordWrap = "break-word";
|
||||
|
||||
const copyBtn = document.createElement("button");
|
||||
copyBtn.textContent = "复制JSON";
|
||||
copyBtn.style.marginTop = "10px";
|
||||
copyBtn.style.padding = "8px 16px";
|
||||
copyBtn.style.backgroundColor = "#52c41a";
|
||||
copyBtn.style.color = "white";
|
||||
copyBtn.style.border = "none";
|
||||
copyBtn.style.borderRadius = "4px";
|
||||
copyBtn.style.cursor = "pointer";
|
||||
|
||||
copyBtn.addEventListener("click", () => {
|
||||
navigator.clipboard
|
||||
.writeText(JSON.stringify(data, null, 2))
|
||||
.then(() => this.showAutoCloseMessage("已复制到剪贴板", "success"))
|
||||
.catch((err) => alert("复制失败: " + err));
|
||||
});
|
||||
|
||||
const closeBtn = document.createElement("button");
|
||||
closeBtn.textContent = "关闭";
|
||||
closeBtn.style.marginLeft = "10px";
|
||||
closeBtn.style.marginTop = "10px";
|
||||
closeBtn.style.padding = "8px 16px";
|
||||
closeBtn.style.backgroundColor = "#f5222d";
|
||||
closeBtn.style.color = "white";
|
||||
closeBtn.style.border = "none";
|
||||
closeBtn.style.borderRadius = "4px";
|
||||
closeBtn.style.cursor = "pointer";
|
||||
|
||||
closeBtn.addEventListener("click", () => {
|
||||
document.body.removeChild(modal);
|
||||
});
|
||||
|
||||
modal.innerHTML = '<h2 style="margin-top: 0;">企业信息解析结果</h2>';
|
||||
modal.appendChild(pre);
|
||||
modal.appendChild(document.createElement("br"));
|
||||
modal.appendChild(copyBtn);
|
||||
modal.appendChild(closeBtn);
|
||||
|
||||
document.body.appendChild(modal);
|
||||
// // Automatically close the modal after 2 seconds
|
||||
// setTimeout(() => {
|
||||
// if (document.body.contains(modal)) {
|
||||
// document.body.removeChild(modal);
|
||||
// }
|
||||
// }, 2000);
|
||||
}
|
||||
}
|
||||
|
||||
// 爱企查解析类
|
||||
class AiQiChaParser {
|
||||
getPhoneNumber() {
|
||||
// 查找电话信息容器
|
||||
const phoneContainer = document.querySelector(
|
||||
"div.business-info div.telphone-lists-wrap"
|
||||
);
|
||||
if (!phoneContainer) return "未找到电话信息";
|
||||
|
||||
// 查找包含电话号码的元素
|
||||
const phoneElement = phoneContainer.querySelector("span.copy-box span");
|
||||
if (!phoneElement) return "未找到电话号码";
|
||||
|
||||
return ToolUtils.cleanText(phoneElement.textContent);
|
||||
}
|
||||
constructor() {
|
||||
this.table = null;
|
||||
}
|
||||
|
||||
// 初始化表格
|
||||
initTable() {
|
||||
this.table = document.querySelector("table.zx-detail-basic-table");
|
||||
if (!this.table) {
|
||||
alert("未找到企业信息表格");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// 获取优化后的值
|
||||
getOptimizedValue(title) {
|
||||
const cells = Array.from(this.table.querySelectorAll("td"));
|
||||
const titleCell = cells.find(
|
||||
(cell) => ToolUtils.cleanText(cell.textContent) === title
|
||||
);
|
||||
|
||||
if (!titleCell) return null;
|
||||
|
||||
let valueCell = titleCell.nextElementSibling;
|
||||
if (!valueCell) return null;
|
||||
|
||||
const valueElement =
|
||||
valueCell.querySelector(".enter-bg-ele") ||
|
||||
valueCell.querySelector(".addr-enter-bg-ele") ||
|
||||
valueCell;
|
||||
|
||||
return ToolUtils.cleanText(valueElement.textContent);
|
||||
}
|
||||
|
||||
// 获取法定代表人
|
||||
getLegalRepresentative() {
|
||||
const legalElements = Array.from(
|
||||
this.table.querySelectorAll("td")
|
||||
).filter((td) => ToolUtils.cleanText(td.textContent) === "法定代表人");
|
||||
|
||||
if (legalElements.length > 0) {
|
||||
const valueCell = legalElements[0].nextElementSibling;
|
||||
if (valueCell && valueCell.classList.contains("image-text-content")) {
|
||||
const nameElement = valueCell.querySelector(".person-name-warp a");
|
||||
if (nameElement) {
|
||||
return ToolUtils.cleanText(nameElement.textContent);
|
||||
}
|
||||
return ToolUtils.cleanText(valueCell.textContent);
|
||||
}
|
||||
}
|
||||
|
||||
const titleElements = Array.from(
|
||||
this.table.querySelectorAll("td")
|
||||
).filter((td) => td.textContent.includes("法定代表人"));
|
||||
|
||||
if (titleElements.length > 0 && titleElements[0].nextElementSibling) {
|
||||
const valueCell = titleElements[0].nextElementSibling;
|
||||
return ToolUtils.cleanText(valueCell.textContent);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
// 获取统一社会信用代码
|
||||
getUnifiedSocialCreditCode() {
|
||||
const codeElements = Array.from(this.table.querySelectorAll("td")).filter(
|
||||
(td) => {
|
||||
return (
|
||||
td.textContent.includes("统一社会信用代码") &&
|
||||
td.nextElementSibling &&
|
||||
td.nextElementSibling.classList.contains("table-regCapital-lable")
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
if (codeElements.length > 0) {
|
||||
const valueCell = codeElements[0].nextElementSibling;
|
||||
const rawValue =
|
||||
valueCell.querySelector(".enter-bg-ele")?.textContent ||
|
||||
valueCell.textContent;
|
||||
return ToolUtils.cleanText(rawValue);
|
||||
}
|
||||
|
||||
const taxElements = Array.from(this.table.querySelectorAll("td")).filter(
|
||||
(td) => td.textContent.includes("纳税人识别号")
|
||||
);
|
||||
|
||||
if (taxElements.length > 0 && taxElements[0].nextElementSibling) {
|
||||
const valueCell = taxElements[0].nextElementSibling;
|
||||
const rawValue =
|
||||
valueCell.querySelector(".enter-bg-ele")?.textContent ||
|
||||
valueCell.textContent;
|
||||
return ToolUtils.cleanText(rawValue);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
// 获取工商注册号
|
||||
getBusinessRegistrationNo() {
|
||||
const regElements = Array.from(this.table.querySelectorAll("td")).filter(
|
||||
(td) => ToolUtils.cleanText(td.textContent).includes("工商注册号")
|
||||
);
|
||||
|
||||
if (regElements.length > 0 && regElements[0].nextElementSibling) {
|
||||
const valueCell = regElements[0].nextElementSibling;
|
||||
const rawValue =
|
||||
valueCell.querySelector(".enter-bg-ele")?.textContent ||
|
||||
valueCell.textContent;
|
||||
return ToolUtils.cleanText(rawValue);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
// 获取组织机构代码
|
||||
getOrganizationCode() {
|
||||
const orgCodeElements = Array.from(
|
||||
this.table.querySelectorAll(".poptip-wrap-org-no")
|
||||
).filter((el) => el.textContent.includes("组织机构代码"));
|
||||
|
||||
if (orgCodeElements.length > 0) {
|
||||
const valueCell = orgCodeElements[0].closest("td").nextElementSibling;
|
||||
if (valueCell && valueCell.classList.contains("enter-bg")) {
|
||||
const rawValue =
|
||||
valueCell.querySelector(".enter-bg-ele")?.textContent ||
|
||||
valueCell.textContent;
|
||||
return ToolUtils.cleanText(rawValue);
|
||||
}
|
||||
}
|
||||
|
||||
const titleElements = Array.from(
|
||||
this.table.querySelectorAll("td")
|
||||
).filter((td) => ToolUtils.cleanText(td.textContent) === "组织机构代码");
|
||||
|
||||
if (titleElements.length > 0 && titleElements[0].nextElementSibling) {
|
||||
const valueCell = titleElements[0].nextElementSibling;
|
||||
const rawValue =
|
||||
valueCell.querySelector(".enter-bg-ele")?.textContent ||
|
||||
valueCell.textContent;
|
||||
return ToolUtils.cleanText(rawValue);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
// 获取纳税人识别号
|
||||
getTaxpayerId() {
|
||||
const taxElements = Array.from(this.table.querySelectorAll("td")).filter(
|
||||
(td) => ToolUtils.cleanText(td.textContent).includes("纳税人识别号")
|
||||
);
|
||||
|
||||
if (taxElements.length > 0 && taxElements[0].nextElementSibling) {
|
||||
const valueCell = taxElements[0].nextElementSibling;
|
||||
const rawValue =
|
||||
valueCell.querySelector(".enter-bg-ele")?.textContent ||
|
||||
valueCell.textContent;
|
||||
return ToolUtils.cleanText(rawValue);
|
||||
}
|
||||
|
||||
const creditElements = Array.from(
|
||||
this.table.querySelectorAll("td")
|
||||
).filter((td) =>
|
||||
ToolUtils.cleanText(td.textContent).includes("统一社会信用代码")
|
||||
);
|
||||
|
||||
if (creditElements.length > 0 && creditElements[0].nextElementSibling) {
|
||||
const valueCell = creditElements[0].nextElementSibling;
|
||||
const rawValue =
|
||||
valueCell.querySelector(".enter-bg-ele")?.textContent ||
|
||||
valueCell.textContent;
|
||||
return ToolUtils.cleanText(rawValue);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
// 获取参保人数
|
||||
getInsuranceNumber() {
|
||||
const insuranceElements = Array.from(
|
||||
this.table.querySelectorAll("td")
|
||||
).filter((td) => {
|
||||
return (
|
||||
td.textContent.includes("参保人数") &&
|
||||
td.querySelector(".insurance-info")
|
||||
);
|
||||
});
|
||||
|
||||
if (insuranceElements.length > 0) {
|
||||
const valueCell = insuranceElements[0].nextElementSibling;
|
||||
if (!valueCell) return null;
|
||||
|
||||
const rawText = valueCell.textContent.replace(/[\r\n\t]/g, "").trim();
|
||||
const match = rawText.match(/(\d+人)/);
|
||||
return match ? match[0] : null;
|
||||
}
|
||||
|
||||
const registrationElements = Array.from(
|
||||
this.table.querySelectorAll("td")
|
||||
).filter((td) => td.textContent.includes("登记机关"));
|
||||
|
||||
if (
|
||||
registrationElements.length > 0 &&
|
||||
registrationElements[0].previousElementSibling
|
||||
) {
|
||||
const valueCell = registrationElements[0].previousElementSibling;
|
||||
const rawText = valueCell.textContent.replace(/[\r\n\t]/g, "").trim();
|
||||
const match = rawText.match(/(\d+人)/);
|
||||
return match ? match[0] : null;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
// 获取核准日期
|
||||
getApprovalDate() {
|
||||
const approvalElements = Array.from(
|
||||
this.table.querySelectorAll(".poptip-wrap-annual-date")
|
||||
).filter((el) => el.textContent.includes("核准日期"));
|
||||
|
||||
if (approvalElements.length > 0) {
|
||||
const valueCell = approvalElements[0].closest("td").nextElementSibling;
|
||||
if (valueCell) {
|
||||
const rawValue = valueCell.textContent
|
||||
.replace(/[\r\n\t]/g, "")
|
||||
.trim();
|
||||
if (/^\d{4}-\d{2}-\d{2}$/.test(rawValue)) {
|
||||
return rawValue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const titleElements = Array.from(
|
||||
this.table.querySelectorAll("td")
|
||||
).filter((td) => ToolUtils.cleanText(td.textContent) === "核准日期");
|
||||
|
||||
if (titleElements.length > 0 && titleElements[0].nextElementSibling) {
|
||||
const valueCell = titleElements[0].nextElementSibling;
|
||||
const rawValue = ToolUtils.cleanText(valueCell.textContent);
|
||||
if (/^\d{4}-\d{2}-\d{2}$/.test(rawValue)) {
|
||||
return rawValue;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
// 解析公司信息主方法
|
||||
parseCompanyInfo() {
|
||||
if (!this.initTable()) return;
|
||||
|
||||
const companyData = {
|
||||
企业名称: this.getOptimizedValue("企业名称"),
|
||||
统一社会信用代码: this.getUnifiedSocialCreditCode(),
|
||||
法定代表人: this.getLegalRepresentative(),
|
||||
电话: this.getPhoneNumber(),
|
||||
经营状态: this.getOptimizedValue("经营状态"),
|
||||
成立日期: this.getOptimizedValue("成立日期"),
|
||||
行政区划: this.getOptimizedValue("行政区划"),
|
||||
注册资本: this.getOptimizedValue("注册资本"),
|
||||
实缴资本: this.getOptimizedValue("实缴资本"),
|
||||
企业类型: this.getOptimizedValue("企业类型"),
|
||||
所属行业: this.getOptimizedValue("所属行业"),
|
||||
工商注册号: this.getBusinessRegistrationNo(),
|
||||
组织机构代码: this.getOrganizationCode(),
|
||||
纳税人识别号: this.getTaxpayerId(),
|
||||
纳税人资质: this.getOptimizedValue("纳税人资质"),
|
||||
营业期限: this.getOptimizedValue("营业期限"),
|
||||
核准日期: this.getApprovalDate(),
|
||||
参保人数: this.getInsuranceNumber(),
|
||||
登记机关: this.getOptimizedValue("登记机关"),
|
||||
曾用名: this.getOptimizedValue("曾用名"),
|
||||
注册地址: this.getOptimizedValue("注册地址"),
|
||||
经营范围: this.getOptimizedValue("经营范围"),
|
||||
};
|
||||
|
||||
ToolUtils.showResult(companyData);
|
||||
}
|
||||
}
|
||||
|
||||
// QCC解析类 企查查
|
||||
class QCCParser {
|
||||
constructor() {
|
||||
this.table = null;
|
||||
}
|
||||
|
||||
// 初始化表格
|
||||
initTable() {
|
||||
const cominfoNormal = document.querySelector("div.cominfo-normal");
|
||||
if (!cominfoNormal) {
|
||||
alert("未找到企业信息容器");
|
||||
return false;
|
||||
}
|
||||
|
||||
this.table = cominfoNormal.querySelector("table.ntable");
|
||||
if (!this.table) {
|
||||
alert("未找到企业信息表格");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// 获取优化后的值
|
||||
getOptimizedValue(title) {
|
||||
const headerCells = Array.from(this.table.querySelectorAll("td.tb"));
|
||||
let value = null;
|
||||
|
||||
headerCells.forEach((header) => {
|
||||
if (ToolUtils.cleanText(header.textContent).includes(title)) {
|
||||
const valueCell = header.nextElementSibling;
|
||||
if (valueCell) {
|
||||
// 尝试从copy-value类中获取值
|
||||
const copyValue = valueCell.querySelector(".copy-value");
|
||||
if (copyValue) {
|
||||
value = ToolUtils.cleanText(copyValue.textContent);
|
||||
} else {
|
||||
value = ToolUtils.cleanText(valueCell.textContent);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
// 获取法定代表人
|
||||
getLegalRepresentative() {
|
||||
// Try the existing method first
|
||||
const basicValue =
|
||||
this.getOptimizedValue("法定代表人") || this.getOptimizedValue("法人");
|
||||
if (basicValue && basicValue.trim()) {
|
||||
// Remove any "关联企业 X" text
|
||||
return basicValue.replace(/\s*关联企业\s*\d+$/, "").trim();
|
||||
}
|
||||
|
||||
// If basic extraction fails, try more specific approach
|
||||
const headerCell = Array.from(this.table.querySelectorAll("td.tb")).find(
|
||||
(cell) => ToolUtils.cleanText(cell.textContent).includes("法定代表人")
|
||||
);
|
||||
|
||||
if (!headerCell) return null;
|
||||
|
||||
const valueCell = headerCell.nextElementSibling;
|
||||
if (!valueCell) return null;
|
||||
|
||||
// Try to find the name within the complex structure
|
||||
// Look for anchor tags with target="_blank" which typically contain the legal representative's name
|
||||
const nameLinks = valueCell.querySelectorAll('a[target="_blank"]');
|
||||
for (const link of nameLinks) {
|
||||
const name = ToolUtils.cleanText(link.textContent);
|
||||
// Make sure it's not empty and doesn't contain obvious non-name text
|
||||
if (name && !name.includes("关联企业") && !name.includes("复制")) {
|
||||
return name;
|
||||
}
|
||||
}
|
||||
|
||||
// Alternative approach - look for the first anchor tag in the cell
|
||||
const firstLink = valueCell.querySelector("a");
|
||||
if (firstLink) {
|
||||
const name = ToolUtils.cleanText(firstLink.textContent);
|
||||
// Remove any trailing "关联企业 X" text
|
||||
return name.replace(/\s*关联企业\s*\d+$/, "").trim();
|
||||
}
|
||||
|
||||
// Fallback to general value extraction
|
||||
const copyValue = valueCell.querySelector(".copy-value");
|
||||
if (copyValue) {
|
||||
const name = ToolUtils.cleanText(copyValue.textContent);
|
||||
// Remove any trailing "关联企业 X" text
|
||||
return name.replace(/\s*关联企业\s*\d+$/, "").trim();
|
||||
}
|
||||
|
||||
const rawText = ToolUtils.cleanText(valueCell.textContent);
|
||||
// Remove any trailing "关联企业 X" text
|
||||
return rawText.replace(/\s*关联企业\s*\d+$/, "").trim();
|
||||
}
|
||||
|
||||
// 获取统一社会信用代码
|
||||
getUnifiedSocialCreditCode() {
|
||||
return (
|
||||
this.getOptimizedValue("统一社会信用代码") ||
|
||||
this.getOptimizedValue("信用代码")
|
||||
);
|
||||
}
|
||||
|
||||
// 获取工商注册号
|
||||
getBusinessRegistrationNo() {
|
||||
return (
|
||||
this.getOptimizedValue("工商注册号") || this.getOptimizedValue("注册号")
|
||||
);
|
||||
}
|
||||
|
||||
// 获取组织机构代码
|
||||
getOrganizationCode() {
|
||||
return this.getOptimizedValue("组织机构代码");
|
||||
}
|
||||
|
||||
// 获取纳税人识别号
|
||||
getTaxpayerId() {
|
||||
return (
|
||||
this.getOptimizedValue("纳税人识别号") ||
|
||||
this.getUnifiedSocialCreditCode()
|
||||
);
|
||||
}
|
||||
|
||||
// 获取参保人数
|
||||
getInsuranceNumber() {
|
||||
// 查找参保人数表头
|
||||
const headerCell = Array.from(this.table.querySelectorAll("td.tb")).find(
|
||||
(cell) => ToolUtils.cleanText(cell.textContent).includes("参保人数")
|
||||
);
|
||||
|
||||
if (!headerCell) return null;
|
||||
|
||||
const valueCell = headerCell.nextElementSibling;
|
||||
if (!valueCell) return null;
|
||||
|
||||
// 提取参保人数数字
|
||||
const numberSpan = valueCell.querySelector("span");
|
||||
const number = numberSpan
|
||||
? ToolUtils.cleanText(numberSpan.textContent)
|
||||
: null;
|
||||
|
||||
// 提取年报年份
|
||||
const reportLink = valueCell.querySelector("a.m-l-r-10");
|
||||
const reportYear = reportLink
|
||||
? ToolUtils.cleanText(reportLink.textContent)
|
||||
: "";
|
||||
|
||||
// 组合结果
|
||||
return number ? `${number}人 ${reportYear}` : null;
|
||||
}
|
||||
|
||||
// 获取联系电话
|
||||
getPhoneNumber() {
|
||||
// 查找联系信息容器
|
||||
const contactInfo = document.querySelector("div.contact-info");
|
||||
if (!contactInfo) return null;
|
||||
|
||||
// 查找右侧信息区域
|
||||
const rightPart = contactInfo.querySelector("div.main-part-item.right");
|
||||
if (!rightPart) return null;
|
||||
|
||||
// 查找包含电话的行
|
||||
const rows = Array.from(rightPart.querySelectorAll("div.rline"));
|
||||
const phoneRow = rows.find((row) =>
|
||||
ToolUtils.cleanText(row.textContent).includes("电话:")
|
||||
);
|
||||
|
||||
if (!phoneRow) return null;
|
||||
|
||||
// 提取电话号码
|
||||
const spans = Array.from(
|
||||
phoneRow.querySelectorAll("span.need-copy-field")
|
||||
);
|
||||
const phoneSpan = spans.find(
|
||||
(span) => !ToolUtils.cleanText(span.textContent).includes("电话:")
|
||||
);
|
||||
|
||||
return phoneSpan ? ToolUtils.cleanText(phoneSpan.textContent) : null;
|
||||
}
|
||||
|
||||
// 获取核准日期
|
||||
getApprovalDate() {
|
||||
return (
|
||||
this.getOptimizedValue("核准日期") || this.getOptimizedValue("成立日期")
|
||||
);
|
||||
}
|
||||
|
||||
// 解析公司信息主方法
|
||||
parseCompanyInfo() {
|
||||
if (!this.initTable()) return;
|
||||
|
||||
const companyData = {
|
||||
企业名称:
|
||||
this.getOptimizedValue("企业名称") ||
|
||||
this.getOptimizedValue("公司名称"),
|
||||
统一社会信用代码: this.getUnifiedSocialCreditCode(),
|
||||
法定代表人: this.getLegalRepresentative(),
|
||||
经营状态: this.getOptimizedValue("登记状态"),
|
||||
成立日期: this.getOptimizedValue("成立日期"),
|
||||
行政区划: this.getOptimizedValue("行政区划"),
|
||||
注册资本: this.getOptimizedValue("注册资本"),
|
||||
实缴资本: this.getOptimizedValue("实缴资本"),
|
||||
企业类型: this.getOptimizedValue("企业类型"),
|
||||
所属行业: this.getOptimizedValue("国标行业"),
|
||||
工商注册号: this.getBusinessRegistrationNo(),
|
||||
组织机构代码: this.getOrganizationCode(),
|
||||
纳税人识别号: this.getTaxpayerId(),
|
||||
纳税人资质: this.getOptimizedValue("纳税人资质"),
|
||||
营业期限: this.getOptimizedValue("营业期限"),
|
||||
核准日期: this.getApprovalDate(),
|
||||
参保人数: this.getInsuranceNumber(),
|
||||
电话: this.getPhoneNumber(),
|
||||
登记机关: this.getOptimizedValue("登记机关"),
|
||||
曾用名: this.getOptimizedValue("曾用名"),
|
||||
注册地址: this.getOptimizedValue("注册地址"),
|
||||
经营范围: this.getOptimizedValue("经营范围"),
|
||||
};
|
||||
|
||||
ToolUtils.showResult(companyData);
|
||||
}
|
||||
}
|
||||
|
||||
// 创建按钮容器
|
||||
function createButtonContainer() {
|
||||
const container = document.createElement("div");
|
||||
container.id = "tool-container";
|
||||
Object.assign(container.style, {
|
||||
position: "fixed",
|
||||
right: "20px",
|
||||
bottom: "20px",
|
||||
zIndex: "9999",
|
||||
display: "flex",
|
||||
flexDirection: "column",
|
||||
gap: "10px",
|
||||
width: "40px",
|
||||
height: "40px",
|
||||
backgroundColor: "#4CAF50",
|
||||
borderRadius: "50%",
|
||||
transition: "all 0.3s ease",
|
||||
overflow: "hidden",
|
||||
cursor: "move",
|
||||
});
|
||||
|
||||
// +号指示器
|
||||
const plusSign = document.createElement("div");
|
||||
plusSign.textContent = "+";
|
||||
Object.assign(plusSign.style, {
|
||||
color: "white",
|
||||
fontSize: "24px",
|
||||
textAlign: "center",
|
||||
lineHeight: "40px",
|
||||
width: "100%",
|
||||
});
|
||||
container.appendChild(plusSign);
|
||||
|
||||
// 悬停展开效果
|
||||
container.addEventListener("mouseenter", () => {
|
||||
container.style.width = "150px";
|
||||
container.style.height = "auto";
|
||||
container.style.borderRadius = "8px";
|
||||
});
|
||||
|
||||
container.addEventListener("mouseleave", () => {
|
||||
container.style.width = "40px";
|
||||
container.style.height = "40px";
|
||||
container.style.borderRadius = "50%";
|
||||
});
|
||||
|
||||
// 添加拖动功能
|
||||
let isDragging = false;
|
||||
let offsetX, offsetY;
|
||||
|
||||
// 鼠标按下开始拖动
|
||||
container.addEventListener("mousedown", (e) => {
|
||||
// 只有点击+号区域才允许拖动
|
||||
if (e.target === plusSign || e.target === container) {
|
||||
isDragging = true;
|
||||
const rect = container.getBoundingClientRect();
|
||||
offsetX = e.clientX - rect.left;
|
||||
offsetY = e.clientY - rect.top;
|
||||
container.style.cursor = "grabbing";
|
||||
// 阻止事件冒泡和默认行为
|
||||
e.stopPropagation();
|
||||
e.preventDefault();
|
||||
}
|
||||
});
|
||||
|
||||
// 鼠标移动时更新位置
|
||||
document.addEventListener("mousemove", (e) => {
|
||||
if (!isDragging) return;
|
||||
container.style.left = e.clientX - offsetX + "px";
|
||||
container.style.top = e.clientY - offsetY + "px";
|
||||
container.style.right = "auto";
|
||||
container.style.bottom = "auto";
|
||||
});
|
||||
|
||||
// 鼠标释放结束拖动
|
||||
document.addEventListener("mouseup", () => {
|
||||
if (isDragging) {
|
||||
isDragging = false;
|
||||
container.style.cursor = "move";
|
||||
}
|
||||
});
|
||||
// 创建功能按钮
|
||||
function createButton(text, onClick) {
|
||||
const button = document.createElement("button");
|
||||
button.textContent = text;
|
||||
Object.assign(button.style, {
|
||||
padding: "8px 12px",
|
||||
border: "none",
|
||||
borderRadius: "4px",
|
||||
backgroundColor: "white",
|
||||
color: "#333",
|
||||
cursor: "pointer",
|
||||
width: "100%",
|
||||
transition: "backgroundColor 0.2s",
|
||||
});
|
||||
button.addEventListener(
|
||||
"mouseenter",
|
||||
() => (button.style.backgroundColor = "#f0f0f0")
|
||||
);
|
||||
button.addEventListener(
|
||||
"mouseleave",
|
||||
() => (button.style.backgroundColor = "white")
|
||||
);
|
||||
button.addEventListener("click", onClick);
|
||||
return button;
|
||||
}
|
||||
|
||||
// 复制源码按钮
|
||||
const copySourceButton = createButton("复制源码", () => {
|
||||
const html = document.documentElement.outerHTML;
|
||||
|
||||
copyToClipboard(html, "HTML源码已复制到剪贴板");
|
||||
/*
|
||||
navigator.clipboard
|
||||
.writeText(html)
|
||||
.then(() => {
|
||||
alert("源码已复制到剪贴板");
|
||||
})
|
||||
.catch((err) => {
|
||||
console.error("复制失败:", err);
|
||||
});
|
||||
*/
|
||||
});
|
||||
|
||||
// 解析公司信息按钮
|
||||
const parseInfoButton = createButton("解析公司信息", () => {
|
||||
// 根据当前URL选择对应的解析器
|
||||
let parser;
|
||||
if (window.location.host.includes("aiqicha.baidu.com")) {
|
||||
parser = new AiQiChaParser();
|
||||
} else if (window.location.host.includes("qcc.com")) {
|
||||
parser = new QCCParser();
|
||||
} else {
|
||||
alert("不支持的网站");
|
||||
return;
|
||||
}
|
||||
parser.parseCompanyInfo();
|
||||
});
|
||||
|
||||
// 添加按钮到容器
|
||||
container.appendChild(copySourceButton);
|
||||
container.appendChild(parseInfoButton);
|
||||
|
||||
document.body.appendChild(container);
|
||||
}
|
||||
|
||||
// 页面加载完成后创建按钮
|
||||
window.addEventListener("load", createButtonContainer);
|
||||
})();
|
||||
23
config.py
Normal file
23
config.py
Normal file
@@ -0,0 +1,23 @@
|
||||
|
||||
|
||||
# 最好写入三家搜索引擎登录后的cookie
|
||||
bingheaders = {
|
||||
'cookie': """""",
|
||||
'referer': 'https://cn.bing.com/',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/82.0.4051.0 Safari/537.36 Edg/82.0.425.0'}
|
||||
|
||||
baiduheaders = {
|
||||
'Cookie': """PSTM=1755051967; BAIDUID=9623ABA6AF15935E519C6D57EB04D5BD:FG=1; BIDUPSID=BFDEAE9917763352A1CF94FF7A9AD50F; BD_UPN=12314753; delPer=0; BD_CK_SAM=1; PSINO=3; BAIDUID_BFESS=9623ABA6AF15935E519C6D57EB04D5BD:FG=1; ZFY=LX6tLiXJLyE8Spg0Tn3yWYhYWOqUXgNuD45NXzSsgDY:C; baikeVisitId=6e4f6130-a8eb-49b3-8413-1815a6af31a3; BD_HOME=1; ppfuid=FOCoIC3q5fKa8fgJnwzbE67EJ49BGJeplOzf+4l4EOvDuu2RXBRv6R3A1AZMa49I27C0gDDLrJyxcIIeAeEhD8JYsoLTpBiaCXhLqvzbzmvy3SeAW17tKgNq/Xx+RgOdb8TWCFe62MVrDTY6lMf2GrfqL8c87KLF2qFER3obJGlT/s3qQuIlmw0dmIvm22ZTGEimjy3MrXEpSuItnI4KDyGSNvJz3OVxhMd6l0BD7nHci+eNtO+sUfx41sINYk+w3il4JkBUe91yGyLjoc4piSRx4OH9u8PLj7EqnTyQEyOWgTqV0RFcOD/4ANUzZZkGhGlPjfasITJONp0AJTY8kGLSgWjlFVG9Xmh1+20oPSbrzvDjYtVPmZ+9/6evcXmhcO1Y58MgLozKnaQIaLfWRPAn9I0uOqAMff6fuUeWcH0OjH2+RiDANKDxQc+RdNr2uC5D1fu00TizBtFeq9APvs5FjnYxYstXg/9EfB3EVmJIvdK3BvFGk0IgcgSSzt63lV1Uhhp5FAe6gNJIUptp7EMAaXYKm11G+JVPszQFdp9AJLcm4YSsYUXkaPI2Tl66J246cmjWQDTahAOINR5rXR5r/7VVI1RMZ8gb40q7az7vCK56XLooKT5a+rsFrf5Zu0yyCiiagElhrTEOtNdBJJq8eHwEHuFBni9ahSwpC7lbKkUwaKH69tf0DFV7hJROiLETSFloIVkHdy3+I2JUr1LsplAz0hMkWt/tE4tXVUV7QcTDTZWS/2mCoS/GV3N9awQ6iM6hs/BWjlgnEa1+5gbcves5wJ6gbk0b0Avk9wGRtTVVEE/aHCSd+6WFfR1C5FKazXcZ/j40FJv+iLGBn3nkkgHlne61I8I7KhtQgIkmBMJIjPMkS/L051MeqdGScsKYTJuSucgI5c3+79eVH+y2TvbOTuuHv1uGxwXFb2atIU1ZYPbmmXculmizKcKIUiL64VMhr/ZycHJ3jpdZlyprBJR80ygAVuGrjl4whGbgBRkDPTwtXjYtgzmW74m0fDU2MZaxpBZZF8YurfocYcmDdcxFKeoIFQmVqAoAU+3YcXQt2xKThZZyV1v3sCvnzidUZtKM9cRRUfRWBtQSb50APM+gs/408xg7KHCB8AOKpZpfIpPhQ0RJhew8GR0aTqYsJo1IRCwM3UbbrvtJ7eqPMNzJcGcSYcQWm1FubInMonve94c+p8Vi2wc72MfReeFiTzMp1G6pDt2e40gPDGbdQI+jba4UjRlyA+9CbTW6Mt45W/80hW/gFEKh9+Klyky6FPenbJgt/vQK9TAiTA==; BDUSS=o4ZFV6UTVucGp0Rmx6TlNucFQ1Z1FEMnQyZ1ZOdmRUZWg2Nn5FQWxteWdBTVZvSVFBQUFBJCQAAAAAAAAAAAEAAAAXn3lCu8PRqdTGtssAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKBznWigc51oZW; BDUSS_BFESS=o4ZFV6UTVucGp0Rmx6TlNucFQ1Z1FEMnQyZ1ZOdmRUZWg2Nn5FQWxteWdBTVZvSVFBQUFBJCQAAAAAAAAAAAEAAAAXn3lCu8PRqdTGtssAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKBznWigc51oZW; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22370464293%22%2C%22first_id%22%3A%22198a7105b40582-0a1b1b0944bf378-4c657b58-1440000-198a7105b4183a%22%2C%22props%22%3A%7B%7D%2C%22%24device_id%22%3A%22198a7105b40582-0a1b1b0944bf378-4c657b58-1440000-198a7105b4183a%22%7D; MCITY=-179%3A; log_first_time=1755482524636; log_last_time=1755482544322; RT="z=1&dm=baidu.com&si=1403e7da-9af8-439d-bdca-61f492a1b52a&ss=mecm9ry0&sl=0&tt=0&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf&ld=jhi&ul=3upx87&hd=3upxa3"; H_PS_PSSID=62325_63147_63327_63948_64048_64174_64248_64245_64258_64260_64317_64358_64366_64362_64363_64395_64414_64429_64436_64442_64450_64457_64473_64483_64502_64512_64448_64087_64559_64571; BA_HECTOR=048lak8h81218h8h8020850k80a00g1ka54mp25; H_WISE_SIDS=62325_63147_63327_63948_64048_64174_64248_64245_64258_64260_64317_64358_64366_64362_64363_64395_64414_64429_64436_64442_64450_64457_64473_64483_64502_64512_64448_64087_64559_64571; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; COOKIE_SESSION=21_0_8_9_13_23_0_1_8_9_1_6_498875_0_0_0_1754446941_0_1755485313%7C9%231543375_16_1753882701%7C7; H_PS_645EC=1275d4%2BgYNOGPU5%2Fgp6XcloUiDEOGWs8LNx7nISyDCmJSXMYxQLNnwJypIA""",
|
||||
'Host': 'www.baidu.com',
|
||||
'referer': 'https://www.baidu.com/s',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36 Edg/139.0.0.0'
|
||||
}
|
||||
googleheaders = {
|
||||
'cookie': """""",
|
||||
'referer': 'https://www.google.com/',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36'
|
||||
}
|
||||
|
||||
# 爬取谷歌需要代理
|
||||
proxy='http://127.0.0.1:7897'
|
||||
|
||||
69
data.csv
Normal file
69
data.csv
Normal file
@@ -0,0 +1,69 @@
|
||||
杭州辉煌物业管理有限公司
|
||||
杭州辉望科技有限公司
|
||||
浙江八方电信科技集团有限公司
|
||||
中国移动通信集团浙江有限公司余杭分公司西溪八方城(自营厅)
|
||||
瑞凤九天(杭州)科技有限公司
|
||||
金码智能科技(杭州)有限公司
|
||||
OPPO广东移动通信有限公司
|
||||
杭州巨量引擎网络技术有限公司
|
||||
杭州绿城衡宇环境设计有限公司
|
||||
新疆浙疆果业有限公司
|
||||
杭州未知数品牌管理有限公司
|
||||
杭州慈山科技有限公司
|
||||
杭州扬拓体育科技有限公司
|
||||
杭州乂游网络科技有限公司
|
||||
杭州钱橙似锦科技有限公司
|
||||
杭州奥得徕贸易有限公司
|
||||
杭州伍壹荟旅游咨资询有限公司
|
||||
杭州心满意定供应链服务有限公司
|
||||
杭州麒晨科技有限公司
|
||||
杭州羊咩咩文化传媒有限公司
|
||||
杭州禾露则正生物科技有限公司
|
||||
浙江商盟支付有限公司
|
||||
天禄(杭州)科技有限公司
|
||||
如是启创(杭州)科技有限公司
|
||||
杭州音视贝科技有限公司
|
||||
杭州千骏轴承有限公司
|
||||
杭州锐擎科技有限公司
|
||||
浙江力一科技有限公司
|
||||
煜邦电力智能装备(嘉兴)有限公司
|
||||
杭州裕阳经营管理合伙企业(有限合伙)
|
||||
氧气.康复中心
|
||||
杭州云迹物联科技有限公司
|
||||
杭州着墨文化创意者限公司
|
||||
亚信科技(南京)有限公司
|
||||
杭州密尔沃智能装备有限公司
|
||||
杭州骏远电子商务有限公司
|
||||
杭州一喂智能科技有限公司
|
||||
杭州孚伦特科技有限公司
|
||||
杭州人谋天成科技有限公司
|
||||
杭州瑾馨贸易集团有限公司
|
||||
杭州琑为缘文化艺术有限公司
|
||||
浙江丝里伯睡眠科技股份有限公司
|
||||
杭州倍驰科技有限公司
|
||||
杭州心灵部落教育有限公司(灵动生活)
|
||||
杭州云印智造科技有限公司
|
||||
浙江海拓环境技术有限公司
|
||||
申能环境科技有限公司
|
||||
医贝云服(杭州)科技有限公司
|
||||
杭州甬盛通信技术有限公司
|
||||
杭州字节跳动科技有限公司
|
||||
杭州邻汇网络科技有限公司
|
||||
浙江建盛安全科技有限公司
|
||||
幻想集团·杭州运营中心
|
||||
杭州阿克莱斯设备有限公司
|
||||
浙江省现代农业促进会
|
||||
益思芯科技(杭州)有限公司
|
||||
杭州霖思网络科技有限公司
|
||||
杭州星瀚知识产权代理有限公司
|
||||
风华(杭州)信息技术有限公司
|
||||
杭州晓羽科技有限公司
|
||||
浙江根旺律师事务所
|
||||
远大住宅工业(杭州)有限公司
|
||||
浙江全应科技有限公司
|
||||
杭州塞牧文化传媒有限公司
|
||||
浙江彩屋信息技术有限公司
|
||||
杭州瑞泡特教育科技有限公司
|
||||
杭州贝享健康科技有限公司
|
||||
杭州摸象大数据科技有限公司
|
||||
杭州颐刻生物科技有限公司
|
||||
|
1
freeze.bat
Normal file
1
freeze.bat
Normal file
@@ -0,0 +1 @@
|
||||
pip freeze > requirements.txt
|
||||
2
install_requirements.bat
Normal file
2
install_requirements.bat
Normal file
@@ -0,0 +1,2 @@
|
||||
pip install -r requirements.txt
|
||||
python.exe -m pip install --upgrade pip
|
||||
138
main.py
Normal file
138
main.py
Normal file
@@ -0,0 +1,138 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import asyncio
|
||||
import random
|
||||
|
||||
import aiohttp
|
||||
import time
|
||||
import sys
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import aiofiles
|
||||
import urllib.parse
|
||||
import argparse
|
||||
from colorama import init, Fore
|
||||
from search import Bing,Baidu
|
||||
import openpyxl
|
||||
import ssl
|
||||
|
||||
from tool.read_csv import CSVReader
|
||||
|
||||
start = time.time()
|
||||
def printascii():
|
||||
# 初始化
|
||||
init()
|
||||
# 设置颜色
|
||||
print(Fore.GREEN + r'''
|
||||
____ _
|
||||
/ ___| ___ __ _ _ __ ___| |__ ___ _ __
|
||||
\___ \ / _ \/ _` | '__/ __| '_ \ / _ \ '__|
|
||||
___) | __/ (_| | | | (__| | | | __/ |
|
||||
|____/ \___|\__,_|_| \___|_| |_|\___|_|
|
||||
''' + Fore.RESET)
|
||||
# 天欣安全实验室
|
||||
|
||||
|
||||
def writeExcel(titles, links,ws):
|
||||
infos = list(zip(titles, links))
|
||||
for row in infos:
|
||||
ws.append(row)
|
||||
|
||||
def create_sheet_and_write(wb, engine, keywords, num, title):
|
||||
ws = wb.create_sheet(title=title)
|
||||
result = engine(keywords, num)
|
||||
writeExcel(result[0], result[1], ws)
|
||||
def excel_text2url(link_url): #如果函数内部没有进行异步操作,使用 async 并不会对性能或功能产生实际影响。
|
||||
'''把一个网址字符串转换为 Excel公式,使其可以点击直接转跳'''
|
||||
return f'=HYPERLINK("{link_url}","{link_url}")'
|
||||
# 遍历所有工作表,并将第二列的所有数据传递给 excel_text2url 函数重新赋值
|
||||
def update_hyperlinks(wb):
|
||||
for sheet in wb.worksheets: # 遍历每一个工作表
|
||||
for row in sheet.iter_rows(min_row=1, max_row=sheet.max_row, min_col=2, max_col=2): # 遍历第二列
|
||||
for cell in row:
|
||||
if cell.value: # 检查单元格是否有内容
|
||||
cell.value = excel_text2url(cell.value) # 将网址转换为超链接公式
|
||||
else:
|
||||
break
|
||||
|
||||
def commend():
|
||||
parser = argparse.ArgumentParser(prog="Searcher", description='此工具用于对百度、必应和谷歌搜索的协程爬取--天欣安全实验室', usage='please read -h')
|
||||
parser.add_argument("-k", type=str, help="搜索的关键词", nargs='+')
|
||||
# 添加一个positional arguments,叫a,读取类型为int(默认是字符串)
|
||||
parser.add_argument("-p", type=str, help="需要搜索页数,默认为5,支持范围搜索,例如搜索从第2页到第五页的参数为 2:5", default='5')
|
||||
parser.add_argument("-m", type=str, help="使用的搜索引擎:百度:bd,必应:bin,谷歌:goo 不填写默认使用全部", default='all',nargs='+')
|
||||
# parser.add_argument("-t", '--task', type=int, help="设置的线程,默认为8", default=8)
|
||||
parser.exit_on_error = False
|
||||
args = parser.parse_args()
|
||||
if len(sys.argv) == 1:
|
||||
printascii()
|
||||
parser.print_help()
|
||||
sys.exit()
|
||||
return args
|
||||
def search_company_info(company_name_arg, num):
|
||||
keywords = company_name_arg
|
||||
# for key in keyword:
|
||||
# keywords = keywords + key + " "
|
||||
keywords = keywords.strip()
|
||||
result = Bing.bing_main(keywords, num)
|
||||
|
||||
# for 循环 遍历 result[0] 和 result[1]
|
||||
|
||||
data_list =[]
|
||||
for i in range(len(result[0])):
|
||||
title= result[0][i]
|
||||
url = result[1][i]
|
||||
print(f"必应搜索爬取结果为,title:{title}, url:{url}")
|
||||
if re.match(r"^https://aiqicha.baidu.com/company_detail_.*|https://www.qcc.com/firm/.*|https://www.tianyancha.com/company/.*", url):
|
||||
data_list.append([title, url])
|
||||
return data_list
|
||||
|
||||
def filter_company_sites(urls):
|
||||
# urls https://www.tianyancha.com/company/5226478758
|
||||
# url:https://aiqicha.baidu.com/company_detail_26602790857925
|
||||
# url:https://www.qcc.com/firm/05b449eb5cc417d0f97c14104051f5c0.html
|
||||
# 匹配 前缀https://aiqicha.baidu.com/company_detail_*,https://www.qcc.com/firm/*.html,https://www.tianyancha.com/company/5226478758*
|
||||
filtered_urls = [url for url in urls if re.match(r"^https://aiqicha.baidu.com/company_detail_.*|https://www.qcc.com/firm/.*|https://www.tianyancha.com/company/.*", url)]
|
||||
return filtered_urls
|
||||
|
||||
|
||||
def search_one_company(company_name_arg, num):
|
||||
|
||||
keywords = company_name_arg
|
||||
# for key in keyword:
|
||||
# keywords = keywords + key + " "
|
||||
keywords = keywords.strip()
|
||||
print(f"您搜索的关键词为:{keywords}")
|
||||
wb = openpyxl.Workbook()
|
||||
# 删除默认创建的工作表(现在名为 "数据表1")
|
||||
wb.remove(wb['Sheet'])
|
||||
printascii()
|
||||
pattern = r"[\\/:\*\?\"<>|]"
|
||||
keyword = re.sub(pattern, "", keywords)
|
||||
create_sheet_and_write(wb, Bing.bing_main, keywords, num, "必应爬取结果")
|
||||
create_sheet_and_write(wb, Baidu.baidu_main, keywords, num, "百度爬取结果")
|
||||
# 将所有url变为超链接,点击即可打开转跳
|
||||
update_hyperlinks(wb)
|
||||
wb.save(f'./{keyword}-{company_name_arg}.xlsx')
|
||||
print(Fore.GREEN + '总任务结束!' + Fore.RESET)
|
||||
end = time.time()
|
||||
print(Fore.RED + f'脚本总时间: {end - start:.2f}')
|
||||
|
||||
if __name__ == '__main__':
|
||||
reader = CSVReader('data.csv')
|
||||
company_names = reader.read_column(0, has_header=False)
|
||||
print("所有数据:", company_names)
|
||||
|
||||
i= 1
|
||||
for company_name in company_names:
|
||||
sleep_time = 5
|
||||
sleep_time += random.randint(1, 5)
|
||||
time.sleep(sleep_time)
|
||||
company_name += " 爱企查|企查查"
|
||||
data_list = search_company_info(company_name, '1')
|
||||
print(data_list)
|
||||
i=i+1
|
||||
if i > 1:
|
||||
break
|
||||
|
||||
|
||||
|
||||
20
requirements.txt
Normal file
20
requirements.txt
Normal file
@@ -0,0 +1,20 @@
|
||||
aiofiles==24.1.0
|
||||
aiohappyeyeballs==2.4.0
|
||||
aiohttp==3.10.5
|
||||
aiosignal==1.3.1
|
||||
async-timeout==4.0.3
|
||||
attrs==24.2.0
|
||||
beautifulsoup4==4.12.3
|
||||
colorama==0.4.6
|
||||
et-xmlfile==1.1.0
|
||||
frozenlist==1.4.1
|
||||
greenlet==3.2.4
|
||||
idna==3.8
|
||||
lxml==5.3.0
|
||||
multidict==6.1.0
|
||||
openpyxl==3.1.5
|
||||
playwright==1.55.0
|
||||
pyee==13.0.0
|
||||
soupsieve==2.6
|
||||
typing_extensions==4.12.2
|
||||
yarl==1.11.1
|
||||
128
search/Baidu.py
Normal file
128
search/Baidu.py
Normal file
@@ -0,0 +1,128 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import time
|
||||
import sys
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import aiofiles
|
||||
import urllib.parse
|
||||
import argparse
|
||||
from colorama import init, Fore
|
||||
import ssl
|
||||
from urllib.parse import quote
|
||||
# 添加项目根目录到 sys.path
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
import config
|
||||
baiduheaders=config.baiduheaders
|
||||
|
||||
timeout = aiohttp.ClientTimeout(
|
||||
total=None, # 总超时
|
||||
sock_connect=5.5, # 连接超时时间5.5
|
||||
sock_read=5.5 # 读取超时为5.5秒
|
||||
)
|
||||
#--天欣安全实验室--#
|
||||
|
||||
# 初次请求获取百度加密后的url
|
||||
async def getfirstinfo(keyword, pn,session):
|
||||
sslcontext = ssl.create_default_context()
|
||||
sslcontext.check_hostname = False
|
||||
sslcontext.verify_mode = ssl.CERT_NONE
|
||||
titlelist = []
|
||||
fakeurl = []
|
||||
url = f'https://www.baidu.com/s?wd={keyword}&pn={pn}'
|
||||
# print("正在爬取的url为:"+url)
|
||||
j=0
|
||||
while j<3:
|
||||
try:
|
||||
async with session.get(url, headers=baiduheaders, ssl=sslcontext,timeout=timeout) as resp:
|
||||
html = await resp.text()
|
||||
soup = BeautifulSoup(html, 'lxml')
|
||||
h3 = soup.select('h3.t')
|
||||
for h3 in h3:
|
||||
h3text = h3.text.replace('\n', '').replace(',', ' ').replace('\ue636', '').strip()
|
||||
titlelist.append(h3text) #保存h3标签内的文字内容
|
||||
fakeurl.append(h3.a.get('href')) #获取h3下a标签的href链接,此链接为百度跳转链接,需要处理
|
||||
return titlelist, fakeurl
|
||||
except Exception as e:
|
||||
# print(e)
|
||||
print("baidu链接失败,正在重新尝试...")
|
||||
j=j+1
|
||||
print(f"百度任务出错:{url}该url无法正常获取数据。")
|
||||
return [],[]
|
||||
|
||||
# 再次请求获取真实的网站url
|
||||
async def gettrueurl(url,printtitle,session):
|
||||
try:
|
||||
domain = 'https://www.baidu.com/'
|
||||
# async with aiohttp.ClientSession() as session:
|
||||
async with session.get(url, headers=baiduheaders, allow_redirects=False) as resp:
|
||||
await resp.text()
|
||||
if str(resp.headers.get('Location')) != None and str(resp.headers.get('Location')) != '':
|
||||
trueurl=str(resp.headers.get('Location'))
|
||||
print(printtitle," ",trueurl)
|
||||
return trueurl
|
||||
else:
|
||||
print(url + '该url无法转跳')
|
||||
url = urllib.parse.urljoin(domain, url)
|
||||
print(printtitle, " ",url)
|
||||
return url
|
||||
except:
|
||||
return url
|
||||
|
||||
|
||||
|
||||
|
||||
async def baidu_spinder(keyword, num):
|
||||
print(f'百度爬取任务进行中,爬取页数为{num}...')
|
||||
urllist = []
|
||||
titlelist = []
|
||||
tasks1 = []
|
||||
tasks2 = []
|
||||
Source = []
|
||||
if ':' in num:
|
||||
if num.count(':') > 1:
|
||||
raise ValueError("输入中必须且只能包含一个 ':'")
|
||||
else:
|
||||
# 分割字符串,确保分割后的两部分都是数字
|
||||
start_page, end_page = num.split(':')
|
||||
# 判断两边是否都是数字
|
||||
if not (start_page.isdigit() and end_page.isdigit()):
|
||||
raise ValueError("':' 两侧的值必须是数字")
|
||||
else:
|
||||
start_page = (int(start_page) - 1) * 10
|
||||
end_page = (int(end_page)) * 10
|
||||
else:
|
||||
start_page, end_page = 0, int(num) * 10
|
||||
async with aiohttp.ClientSession() as session:
|
||||
for i, pn in enumerate(range(start_page, end_page, 10)):
|
||||
tasks1 = tasks1 + [asyncio.create_task(getfirstinfo(keyword, pn,session))]
|
||||
result = await asyncio.gather(*tasks1)
|
||||
async with aiohttp.ClientSession() as session:
|
||||
for i in range(int((end_page-start_page) / 10)):
|
||||
titlelist += result[i][0]
|
||||
for j,url in enumerate(result[i][1]):
|
||||
printtitle=result[i][0][j]
|
||||
if not url.startswith(('http://', 'https://')):
|
||||
domain = 'http://www.baidu.com/'
|
||||
url = urllib.parse.urljoin(domain, url)
|
||||
tasks2 = tasks2 + [asyncio.create_task(gettrueurl(url,printtitle,session))]
|
||||
print('标题\t URL\t')
|
||||
urllist += await asyncio.gather(*tasks2)
|
||||
count = len(urllist)
|
||||
print(f"百度搜索爬取结果数量为{count}")
|
||||
print(Fore.GREEN + '百度爬取任务完成!\n' + Fore.RESET)
|
||||
return titlelist, urllist
|
||||
# await baiduwriteCSV(titlelist, urllist, keyword)
|
||||
|
||||
|
||||
def baidu_main(keyword, num):
|
||||
keyword = quote(keyword)
|
||||
if sys.platform.startswith('win'):
|
||||
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
||||
loop = asyncio.get_event_loop()
|
||||
return loop.run_until_complete(baidu_spinder(keyword, num))
|
||||
|
||||
async def Baidu_main(keywords, num):
|
||||
return await baidu_spinder(keywords, num)
|
||||
90
search/Bing.py
Normal file
90
search/Bing.py
Normal file
@@ -0,0 +1,90 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
import urllib.parse
|
||||
from urllib.parse import quote
|
||||
import aiohttp
|
||||
from bs4 import BeautifulSoup
|
||||
from colorama import Fore
|
||||
|
||||
# 添加项目根目录到 sys.path
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
import config
|
||||
bingheaders=config.bingheaders
|
||||
proxy=config.proxy
|
||||
timeout = aiohttp.ClientTimeout(
|
||||
total=None, # 总超时
|
||||
sock_connect=5.5, # 连接超时时间5.5
|
||||
sock_read=5.5 # 读取超时为5.5秒
|
||||
)
|
||||
async def getbing(url, session):
|
||||
url_list = []
|
||||
title_list = []
|
||||
async with session.get(url, headers=bingheaders,timeout=timeout) as resp:
|
||||
# print("正在爬取url:"+url)
|
||||
try:
|
||||
a = await resp.text()
|
||||
soup = BeautifulSoup(a, 'lxml')
|
||||
h2a = soup.select('h2 a')
|
||||
for h in h2a:
|
||||
htext = h.text.replace('\n', '').replace(',', ' ').strip()
|
||||
hurl=h.get('href')
|
||||
if not hurl.startswith(('http://', 'https://')):
|
||||
domain = 'https://cn.bing.com/'
|
||||
hurl = urllib.parse.urljoin(domain, hurl)
|
||||
print(htext," ",hurl)
|
||||
title_list.append(htext)
|
||||
url_list.append(hurl)
|
||||
except:
|
||||
print(f"必应页面爬取失败,{url}该url无法正常获取数据。")
|
||||
return [],[]
|
||||
return url_list, title_list
|
||||
|
||||
|
||||
async def bing_spinder(keyword, num):
|
||||
print(f'必应爬取任务进行中,爬取页数为{num}...')
|
||||
print('标题 url')
|
||||
urllist = []
|
||||
titlelist = []
|
||||
tasks = []
|
||||
if ':' in num:
|
||||
if num.count(':') > 1:
|
||||
raise ValueError("输入中必须且只能包含一个 ':'")
|
||||
else:
|
||||
# 分割字符串,确保分割后的两部分都是数字
|
||||
start_page, end_page = num.split(':')
|
||||
# 判断两边是否都是数字
|
||||
if not (start_page.isdigit() and end_page.isdigit()):
|
||||
raise ValueError("':' 两侧的值必须是数字")
|
||||
else:
|
||||
start_page = (int(start_page)-1)*10
|
||||
end_page = (int(end_page))*10
|
||||
else:
|
||||
start_page, end_page =0,int(num) * 10
|
||||
async with aiohttp.ClientSession() as session:
|
||||
for pn in range(start_page, end_page, 10):
|
||||
#url = f'https://cn.bing.com/search?q={keyword}&first={pn}&mkt=zh-CN'
|
||||
url = f'https://cn.bing.com/search?q={keyword}&qs=n&form=QBRE&sp=-1&lq=0'
|
||||
# print("正在爬取的url为:"+url)
|
||||
tasks = tasks + [asyncio.create_task(getbing(url, session))]
|
||||
result = await asyncio.gather(*tasks)
|
||||
for i in range(int((end_page-start_page) / 10)):
|
||||
urllist += result[i][0]
|
||||
titlelist += result[i][1]
|
||||
count=len(urllist)
|
||||
print(f"必应搜索爬取结果为{count}")
|
||||
print(Fore.GREEN + '必应爬取任务完成\n' + Fore.RESET)
|
||||
return titlelist, urllist
|
||||
# await bingwriteCSV(titlelist, urllist, keyword)
|
||||
|
||||
|
||||
def bing_main(keyword,num):
|
||||
keyword = quote(keyword)
|
||||
if sys.platform.startswith('win'):
|
||||
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
||||
loop = asyncio.get_event_loop()
|
||||
return loop.run_until_complete(bing_spinder(keyword,num))
|
||||
|
||||
async def Bing_main(keywords, num):
|
||||
return await bing_spinder(keywords, num)
|
||||
BIN
search/__pycache__/Baidu.cpython-313.pyc
Normal file
BIN
search/__pycache__/Baidu.cpython-313.pyc
Normal file
Binary file not shown.
BIN
search/__pycache__/Bing.cpython-313.pyc
Normal file
BIN
search/__pycache__/Bing.cpython-313.pyc
Normal file
Binary file not shown.
BIN
tool/__pycache__/read_csv.cpython-313.pyc
Normal file
BIN
tool/__pycache__/read_csv.cpython-313.pyc
Normal file
Binary file not shown.
190
tool/read_csv.py
Normal file
190
tool/read_csv.py
Normal file
@@ -0,0 +1,190 @@
|
||||
import csv
|
||||
from typing import List, Dict, Union, Any, Optional
|
||||
|
||||
class CSVReader:
|
||||
"""
|
||||
CSV文件读取工具类
|
||||
支持有表头和无表头模式,可按列索引或表头字段名返回数据
|
||||
"""
|
||||
|
||||
def __init__(self, file_path: str):
|
||||
"""
|
||||
初始化CSV读取器
|
||||
|
||||
Args:
|
||||
file_path: CSV文件路径
|
||||
"""
|
||||
self.file_path = file_path
|
||||
|
||||
def read(self, has_header: bool = True, encoding: str = 'utf-8') -> List[Dict[str, Any]]:
|
||||
"""
|
||||
读取CSV文件并返回查询结果
|
||||
|
||||
Args:
|
||||
has_header: 是否有表头,默认为True
|
||||
encoding: 文件编码,默认为utf-8
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: 查询结果列表,每个元素是一行数据的字典表示
|
||||
"""
|
||||
data = []
|
||||
|
||||
with open(self.file_path, 'r', encoding=encoding) as file:
|
||||
if has_header:
|
||||
# 使用DictReader处理有表头的CSV
|
||||
reader = csv.DictReader(file)
|
||||
for row in reader:
|
||||
data.append(dict(row))
|
||||
else:
|
||||
# 使用普通reader处理无表头的CSV
|
||||
reader = csv.reader(file)
|
||||
for row in reader:
|
||||
# 为无表头的行创建字典,使用列索引作为键
|
||||
row_dict = {f'col_{i}': value for i, value in enumerate(row)}
|
||||
data.append(row_dict)
|
||||
|
||||
return data
|
||||
|
||||
def read_column(self, column: Union[int, str], has_header: bool = True,
|
||||
encoding: str = 'utf-8') -> List[Any]:
|
||||
"""
|
||||
读取指定列的数据
|
||||
|
||||
Args:
|
||||
column: 列索引(从0开始)或列名
|
||||
has_header: 是否有表头
|
||||
encoding: 文件编码
|
||||
|
||||
Returns:
|
||||
List[Any]: 指定列的数据列表
|
||||
"""
|
||||
data = self.read(has_header, encoding)
|
||||
|
||||
if has_header and isinstance(column, str):
|
||||
# 有表头且指定了列名
|
||||
return [row[column] for row in data]
|
||||
elif isinstance(column, int):
|
||||
# 指定了列索引
|
||||
if has_header:
|
||||
# 有表头时需要获取列名
|
||||
if data:
|
||||
keys = list(data[0].keys())
|
||||
if 0 <= column < len(keys):
|
||||
column_name = keys[column]
|
||||
return [row[column_name] for row in data]
|
||||
else:
|
||||
raise IndexError(f"Column index {column} out of range")
|
||||
else:
|
||||
return []
|
||||
else:
|
||||
# 无表头时使用默认列名
|
||||
column_name = f'col_{column}'
|
||||
return [row[column_name] for row in data if column_name in row]
|
||||
else:
|
||||
raise ValueError("Invalid column parameter")
|
||||
|
||||
def read_columns(self, columns: Dict[str, Union[int, str]], has_header: bool = True,
|
||||
encoding: str = 'utf-8') -> List[Dict[str, Any]]:
|
||||
"""
|
||||
读取指定的多列数据,可以重命名列名
|
||||
|
||||
Args:
|
||||
columns: 字典,键为返回结果中的列名,值为原CSV中的列索引或列名
|
||||
has_header: 是否有表头
|
||||
encoding: 文件编码
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: 指定列的数据列表
|
||||
"""
|
||||
all_data = self.read(has_header, encoding)
|
||||
result = []
|
||||
|
||||
# 获取所有列名
|
||||
if all_data:
|
||||
header_keys = list(all_data[0].keys())
|
||||
else:
|
||||
header_keys = []
|
||||
|
||||
for row in all_data:
|
||||
new_row = {}
|
||||
for new_name, old_column in columns.items():
|
||||
if isinstance(old_column, str) and has_header:
|
||||
# 按列名获取值
|
||||
new_row[new_name] = row.get(old_column, '')
|
||||
elif isinstance(old_column, int):
|
||||
# 按列索引获取值
|
||||
if has_header:
|
||||
if 0 <= old_column < len(header_keys):
|
||||
key = header_keys[old_column]
|
||||
new_row[new_name] = row.get(key, '')
|
||||
else:
|
||||
new_row[new_name] = ''
|
||||
else:
|
||||
key = f'col_{old_column}'
|
||||
new_row[new_name] = row.get(key, '')
|
||||
result.append(new_row)
|
||||
|
||||
return result
|
||||
|
||||
# 使用示例
|
||||
if __name__ == "__main__":
|
||||
# 示例1: 有表头的CSV文件
|
||||
# 假设有一个名为data.csv的文件内容如下:
|
||||
# name,age,city
|
||||
# Alice,25,Beijing
|
||||
# Bob,30,Shanghai
|
||||
# Charlie,35,Guangzhou
|
||||
|
||||
reader = CSVReader('../data.csv')
|
||||
# 读取所有数据
|
||||
#all_data = reader.read(has_header=False)
|
||||
# 读取所有数据
|
||||
# all_data = reader.read(has_header=True)
|
||||
|
||||
|
||||
# print("所有数据:", all_data)
|
||||
|
||||
selected_data_no_header = reader.read_columns({
|
||||
'company_name': 0
|
||||
}, has_header=False)
|
||||
print("所有数据:", selected_data_no_header)
|
||||
|
||||
selected_data_no_header = reader.read_column(0, has_header=False)
|
||||
print("所有数据:", selected_data_no_header)
|
||||
# # 读取指定列(按列名)
|
||||
# names = reader.read_column('name', has_header=True)
|
||||
# print("姓名列:", names)
|
||||
#
|
||||
# # 读取指定列(按索引)
|
||||
# ages = reader.read_column(1, has_header=True)
|
||||
# print("年龄列:", ages)
|
||||
#
|
||||
# # 读取多列并重命名
|
||||
# selected_data = reader.read_columns({
|
||||
# '姓名': 'name',
|
||||
# '年龄': 1
|
||||
# }, has_header=True)
|
||||
# print("选择的数据:", selected_data)
|
||||
#
|
||||
# # 示例2: 无表头的CSV文件
|
||||
# # 假设有一个名为data_no_header.csv的文件内容如下:
|
||||
# # Alice,25,Beijing
|
||||
# # Bob,30,Shanghai
|
||||
# # Charlie,35,Guangzhou
|
||||
#
|
||||
# reader2 = CSVReader('data_no_header.csv')
|
||||
#
|
||||
# # 读取所有数据
|
||||
# all_data_no_header = reader2.read(has_header=False)
|
||||
# print("无表头所有数据:", all_data_no_header)
|
||||
#
|
||||
# # 读取指定列(按索引)
|
||||
# first_column = reader2.read_column(0, has_header=False)
|
||||
# print("第一列:", first_column)
|
||||
#
|
||||
# # 读取多列并指定名称
|
||||
# selected_data_no_header = reader2.read_columns({
|
||||
# '姓名': 0,
|
||||
# '城市': 2
|
||||
# }, has_header=False)
|
||||
# print("无表头选择的数据:", selected_data_no_header)
|
||||
BIN
~$亚信科技(南京)有限公司-亚信科技(南京)有限公司.xlsx
Normal file
BIN
~$亚信科技(南京)有限公司-亚信科技(南京)有限公司.xlsx
Normal file
Binary file not shown.
BIN
~$杭州云印智造科技有限公司-杭州云印智造科技有限公司.xlsx
Normal file
BIN
~$杭州云印智造科技有限公司-杭州云印智造科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
中国移动通信集团浙江有限公司余杭分公司西溪八方城(自营厅)-中国移动通信集团浙江有限公司余杭分公司西溪八方城(自营厅).xlsx
Normal file
BIN
中国移动通信集团浙江有限公司余杭分公司西溪八方城(自营厅)-中国移动通信集团浙江有限公司余杭分公司西溪八方城(自营厅).xlsx
Normal file
Binary file not shown.
BIN
亚信科技(南京)有限公司-亚信科技(南京)有限公司.xlsx
Normal file
BIN
亚信科技(南京)有限公司-亚信科技(南京)有限公司.xlsx
Normal file
Binary file not shown.
BIN
医贝云服(杭州)科技有限公司-医贝云服(杭州)科技有限公司.xlsx
Normal file
BIN
医贝云服(杭州)科技有限公司-医贝云服(杭州)科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
天禄(杭州)科技有限公司-天禄(杭州)科技有限公司.xlsx
Normal file
BIN
天禄(杭州)科技有限公司-天禄(杭州)科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
如是启创(杭州)科技有限公司-如是启创(杭州)科技有限公司.xlsx
Normal file
BIN
如是启创(杭州)科技有限公司-如是启创(杭州)科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
幻想集团·杭州运营中心-幻想集团·杭州运营中心.xlsx
Normal file
BIN
幻想集团·杭州运营中心-幻想集团·杭州运营中心.xlsx
Normal file
Binary file not shown.
BIN
新疆浙疆果业有限公司-新疆浙疆果业有限公司.xlsx
Normal file
BIN
新疆浙疆果业有限公司-新疆浙疆果业有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州一喂智能科技有限公司-杭州一喂智能科技有限公司.xlsx
Normal file
BIN
杭州一喂智能科技有限公司-杭州一喂智能科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州乂游网络科技有限公司-杭州乂游网络科技有限公司.xlsx
Normal file
BIN
杭州乂游网络科技有限公司-杭州乂游网络科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州云印智造科技有限公司-杭州云印智造科技有限公司.xlsx
Normal file
BIN
杭州云印智造科技有限公司-杭州云印智造科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州云迹物联科技有限公司-杭州云迹物联科技有限公司.xlsx
Normal file
BIN
杭州云迹物联科技有限公司-杭州云迹物联科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州人谋天成科技有限公司-杭州人谋天成科技有限公司.xlsx
Normal file
BIN
杭州人谋天成科技有限公司-杭州人谋天成科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州伍壹荟旅游咨资询有限公司-杭州伍壹荟旅游咨资询有限公司.xlsx
Normal file
BIN
杭州伍壹荟旅游咨资询有限公司-杭州伍壹荟旅游咨资询有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州倍驰科技有限公司-杭州倍驰科技有限公司.xlsx
Normal file
BIN
杭州倍驰科技有限公司-杭州倍驰科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州千骏轴承有限公司-杭州千骏轴承有限公司.xlsx
Normal file
BIN
杭州千骏轴承有限公司-杭州千骏轴承有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州塞牧文化传媒有限公司-杭州塞牧文化传媒有限公司.xlsx
Normal file
BIN
杭州塞牧文化传媒有限公司-杭州塞牧文化传媒有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州奥得徕贸易有限公司-杭州奥得徕贸易有限公司.xlsx
Normal file
BIN
杭州奥得徕贸易有限公司-杭州奥得徕贸易有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州字节跳动科技有限公司-杭州字节跳动科技有限公司.xlsx
Normal file
BIN
杭州字节跳动科技有限公司-杭州字节跳动科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州孚伦特科技有限公司-杭州孚伦特科技有限公司.xlsx
Normal file
BIN
杭州孚伦特科技有限公司-杭州孚伦特科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州密尔沃智能装备有限公司-杭州密尔沃智能装备有限公司.xlsx
Normal file
BIN
杭州密尔沃智能装备有限公司-杭州密尔沃智能装备有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州巨量引擎网络技术有限公司-杭州巨量引擎网络技术有限公司.xlsx
Normal file
BIN
杭州巨量引擎网络技术有限公司-杭州巨量引擎网络技术有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州心满意定供应链服务有限公司-杭州心满意定供应链服务有限公司.xlsx
Normal file
BIN
杭州心满意定供应链服务有限公司-杭州心满意定供应链服务有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州心灵部落教育有限公司(灵动生活)-杭州心灵部落教育有限公司(灵动生活).xlsx
Normal file
BIN
杭州心灵部落教育有限公司(灵动生活)-杭州心灵部落教育有限公司(灵动生活).xlsx
Normal file
Binary file not shown.
BIN
杭州慈山科技有限公司-杭州慈山科技有限公司.xlsx
Normal file
BIN
杭州慈山科技有限公司-杭州慈山科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州扬拓体育科技有限公司-杭州扬拓体育科技有限公司.xlsx
Normal file
BIN
杭州扬拓体育科技有限公司-杭州扬拓体育科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州摸象大数据科技有限公司-杭州摸象大数据科技有限公司.xlsx
Normal file
BIN
杭州摸象大数据科技有限公司-杭州摸象大数据科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州星瀚知识产权代理有限公司-杭州星瀚知识产权代理有限公司.xlsx
Normal file
BIN
杭州星瀚知识产权代理有限公司-杭州星瀚知识产权代理有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州晓羽科技有限公司-杭州晓羽科技有限公司.xlsx
Normal file
BIN
杭州晓羽科技有限公司-杭州晓羽科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州未知数品牌管理有限公司-杭州未知数品牌管理有限公司.xlsx
Normal file
BIN
杭州未知数品牌管理有限公司-杭州未知数品牌管理有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州琑为缘文化艺术有限公司-杭州琑为缘文化艺术有限公司.xlsx
Normal file
BIN
杭州琑为缘文化艺术有限公司-杭州琑为缘文化艺术有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州瑞泡特教育科技有限公司-杭州瑞泡特教育科技有限公司.xlsx
Normal file
BIN
杭州瑞泡特教育科技有限公司-杭州瑞泡特教育科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州瑾馨贸易集团有限公司-杭州瑾馨贸易集团有限公司.xlsx
Normal file
BIN
杭州瑾馨贸易集团有限公司-杭州瑾馨贸易集团有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州甬盛通信技术有限公司-杭州甬盛通信技术有限公司.xlsx
Normal file
BIN
杭州甬盛通信技术有限公司-杭州甬盛通信技术有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州着墨文化创意者限公司-杭州着墨文化创意者限公司.xlsx
Normal file
BIN
杭州着墨文化创意者限公司-杭州着墨文化创意者限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州禾露则正生物科技有限公司-杭州禾露则正生物科技有限公司.xlsx
Normal file
BIN
杭州禾露则正生物科技有限公司-杭州禾露则正生物科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州绿城衡宇环境设计有限公司-杭州绿城衡宇环境设计有限公司.xlsx
Normal file
BIN
杭州绿城衡宇环境设计有限公司-杭州绿城衡宇环境设计有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州羊咩咩文化传媒有限公司-杭州羊咩咩文化传媒有限公司.xlsx
Normal file
BIN
杭州羊咩咩文化传媒有限公司-杭州羊咩咩文化传媒有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州裕阳经营管理合伙企业(有限合伙)-杭州裕阳经营管理合伙企业(有限合伙).xlsx
Normal file
BIN
杭州裕阳经营管理合伙企业(有限合伙)-杭州裕阳经营管理合伙企业(有限合伙).xlsx
Normal file
Binary file not shown.
BIN
杭州贝享健康科技有限公司-杭州贝享健康科技有限公司.xlsx
Normal file
BIN
杭州贝享健康科技有限公司-杭州贝享健康科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州辉望科技有限公司-杭州辉望科技有限公司.xlsx
Normal file
BIN
杭州辉望科技有限公司-杭州辉望科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州辉煌物业管理有限公司-杭州辉煌物业管理有限公司.xlsx
Normal file
BIN
杭州辉煌物业管理有限公司-杭州辉煌物业管理有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州邻汇网络科技有限公司-杭州邻汇网络科技有限公司.xlsx
Normal file
BIN
杭州邻汇网络科技有限公司-杭州邻汇网络科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州钱橙似锦科技有限公司-杭州钱橙似锦科技有限公司.xlsx
Normal file
BIN
杭州钱橙似锦科技有限公司-杭州钱橙似锦科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州锐擎科技有限公司-杭州锐擎科技有限公司.xlsx
Normal file
BIN
杭州锐擎科技有限公司-杭州锐擎科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州阿克莱斯设备有限公司-杭州阿克莱斯设备有限公司.xlsx
Normal file
BIN
杭州阿克莱斯设备有限公司-杭州阿克莱斯设备有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州霖思网络科技有限公司-杭州霖思网络科技有限公司.xlsx
Normal file
BIN
杭州霖思网络科技有限公司-杭州霖思网络科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州音视贝科技有限公司-杭州音视贝科技有限公司.xlsx
Normal file
BIN
杭州音视贝科技有限公司-杭州音视贝科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州颐刻生物科技有限公司-杭州颐刻生物科技有限公司.xlsx
Normal file
BIN
杭州颐刻生物科技有限公司-杭州颐刻生物科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州骏远电子商务有限公司-杭州骏远电子商务有限公司.xlsx
Normal file
BIN
杭州骏远电子商务有限公司-杭州骏远电子商务有限公司.xlsx
Normal file
Binary file not shown.
BIN
杭州麒晨科技有限公司-杭州麒晨科技有限公司.xlsx
Normal file
BIN
杭州麒晨科技有限公司-杭州麒晨科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
氧气.康复中心-氧气.康复中心.xlsx
Normal file
BIN
氧气.康复中心-氧气.康复中心.xlsx
Normal file
Binary file not shown.
BIN
浙江丝里伯睡眠科技股份有限公司-浙江丝里伯睡眠科技股份有限公司.xlsx
Normal file
BIN
浙江丝里伯睡眠科技股份有限公司-浙江丝里伯睡眠科技股份有限公司.xlsx
Normal file
Binary file not shown.
BIN
浙江全应科技有限公司-浙江全应科技有限公司.xlsx
Normal file
BIN
浙江全应科技有限公司-浙江全应科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
浙江八方电信科技集团有限公司-浙江八方电信科技集团有限公司.xlsx
Normal file
BIN
浙江八方电信科技集团有限公司-浙江八方电信科技集团有限公司.xlsx
Normal file
Binary file not shown.
BIN
浙江力一科技有限公司-浙江力一科技有限公司.xlsx
Normal file
BIN
浙江力一科技有限公司-浙江力一科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
浙江商盟支付有限公司-浙江商盟支付有限公司.xlsx
Normal file
BIN
浙江商盟支付有限公司-浙江商盟支付有限公司.xlsx
Normal file
Binary file not shown.
BIN
浙江建盛安全科技有限公司-浙江建盛安全科技有限公司.xlsx
Normal file
BIN
浙江建盛安全科技有限公司-浙江建盛安全科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
浙江彩屋信息技术有限公司-浙江彩屋信息技术有限公司.xlsx
Normal file
BIN
浙江彩屋信息技术有限公司-浙江彩屋信息技术有限公司.xlsx
Normal file
Binary file not shown.
BIN
浙江根旺律师事务所-浙江根旺律师事务所.xlsx
Normal file
BIN
浙江根旺律师事务所-浙江根旺律师事务所.xlsx
Normal file
Binary file not shown.
BIN
浙江海拓环境技术有限公司-浙江海拓环境技术有限公司.xlsx
Normal file
BIN
浙江海拓环境技术有限公司-浙江海拓环境技术有限公司.xlsx
Normal file
Binary file not shown.
BIN
浙江省现代农业促进会-浙江省现代农业促进会.xlsx
Normal file
BIN
浙江省现代农业促进会-浙江省现代农业促进会.xlsx
Normal file
Binary file not shown.
BIN
煜邦电力智能装备(嘉兴)有限公司-煜邦电力智能装备(嘉兴)有限公司.xlsx
Normal file
BIN
煜邦电力智能装备(嘉兴)有限公司-煜邦电力智能装备(嘉兴)有限公司.xlsx
Normal file
Binary file not shown.
BIN
瑞凤九天(杭州)科技有限公司-瑞凤九天(杭州)科技有限公司.xlsx
Normal file
BIN
瑞凤九天(杭州)科技有限公司-瑞凤九天(杭州)科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
申能环境科技有限公司-申能环境科技有限公司.xlsx
Normal file
BIN
申能环境科技有限公司-申能环境科技有限公司.xlsx
Normal file
Binary file not shown.
BIN
益思芯科技(杭州)有限公司-益思芯科技(杭州)有限公司.xlsx
Normal file
BIN
益思芯科技(杭州)有限公司-益思芯科技(杭州)有限公司.xlsx
Normal file
Binary file not shown.
BIN
远大住宅工业(杭州)有限公司-远大住宅工业(杭州)有限公司.xlsx
Normal file
BIN
远大住宅工业(杭州)有限公司-远大住宅工业(杭州)有限公司.xlsx
Normal file
Binary file not shown.
BIN
金码智能科技(杭州)有限公司-金码智能科技(杭州)有限公司.xlsx
Normal file
BIN
金码智能科技(杭州)有限公司-金码智能科技(杭州)有限公司.xlsx
Normal file
Binary file not shown.
BIN
风华(杭州)信息技术有限公司-风华(杭州)信息技术有限公司.xlsx
Normal file
BIN
风华(杭州)信息技术有限公司-风华(杭州)信息技术有限公司.xlsx
Normal file
Binary file not shown.
Reference in New Issue
Block a user