bugfix 爱企查 法人 地址等

This commit is contained in:
manchuwork
2025-12-12 03:12:17 +08:00
parent a1ac43d4e1
commit 2a99a38af5
7 changed files with 10037 additions and 10 deletions

6
demo/Untitled-1.ini Normal file
View File

@@ -0,0 +1,6 @@
1760/1年
500 /2年
AI智能建档APP

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

386
demo/group_all.html Normal file

File diff suppressed because one or more lines are too long

111
script-spider-all-group.js Normal file
View File

@@ -0,0 +1,111 @@
// ==UserScript==
// @name 集团客户数据抓取工具
// @namespace http://tampermonkey.net/
// @version 1.0
// @description 自动抓取集团客户表格数据并导出为CSV文件
// @author You
// @match file:///*D:/yd-other/爱企查/demo/group_all.html*
// @grant none
// ==/UserScript==
(function() {
'use strict';
// 创建黄色圆点按钮
function createYellowDotButton() {
const button = document.createElement('div');
button.style.position = 'fixed';
button.style.right = '20px';
button.style.top = '20px';
button.style.width = '30px';
button.style.height = '30px';
button.style.backgroundColor = 'yellow';
button.style.borderRadius = '50%';
button.style.cursor = 'pointer';
button.style.zIndex = '9999';
button.style.boxShadow = '0 0 5px rgba(0,0,0,0.5)';
button.title = '点击抓取表格数据';
button.addEventListener('click', scrapeTableData);
document.body.appendChild(button);
}
// 抓取表格数据并导出为CSV
async function scrapeTableData() {
try {
// 获取所有表格行数据
const tableRows = document.querySelectorAll('.arco-table-tr:not(.arco-table-empty-row)');
if (tableRows.length <= 1) { // 只有表头或者没有数据
alert('未找到表格数据');
return;
}
// 提取表头
const headers = [];
const headerRow = tableRows[0];
const headerCells = headerRow.querySelectorAll('.arco-table-th .arco-table-th-item-title');
headerCells.forEach(cell => {
headers.push(cell.textContent.trim());
});
// 提取数据行
const dataRows = [];
for (let i = 1; i < tableRows.length; i++) {
const row = tableRows[i];
const cells = row.querySelectorAll('.arco-table-td .arco-table-cell-wrap-value');
const rowData = [];
cells.forEach(cell => {
// 处理嵌套的typography元素
const typographyElement = cell.querySelector('.arco-typography');
const text = typographyElement ? typographyElement.textContent.trim() : cell.textContent.trim();
rowData.push(text);
});
// 如果行中有数据,则添加到结果中
if (rowData.some(cell => cell !== '')) {
dataRows.push(rowData);
}
}
// 构建CSV内容
let csvContent = headers.join(',') + '\n';
dataRows.forEach(row => {
csvContent += row.map(cell => `"${cell.replace(/"/g, '""')}"`).join(',') + '\n';
});
// 下载CSV文件
downloadCSV(csvContent, '集团客户数据.csv');
alert(`成功抓取 ${dataRows.length} 条记录`);
} catch (error) {
console.error('抓取数据时发生错误:', error);
alert('抓取数据时发生错误,请检查控制台');
}
}
// 下载CSV文件
function downloadCSV(csvContent, filename) {
const blob = new Blob(['\uFEFF' + csvContent], { type: 'text/csv;charset=utf-8;' });
const url = URL.createObjectURL(blob);
const link = document.createElement('a');
link.setAttribute('href', url);
link.setAttribute('download', filename);
link.style.visibility = 'hidden';
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
}
// 页面加载完成后创建按钮
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', createYellowDotButton);
} else {
createYellowDotButton();
}
})();

View File

@@ -213,21 +213,47 @@
// 获取法定代表人
getLegalRepresentative() {
const legalElements = Array.from(
this.table.querySelectorAll("td")
).filter((td) => ToolUtils.cleanText(td.textContent) === "法定代表人");
if (legalElements.length > 0) {
const valueCell = legalElements[0].nextElementSibling;
if (valueCell && valueCell.classList.contains("image-text-content")) {
// First check for 经营者 (operator) which is used for individual businesses
const operatorElements = Array.from(
this.table.querySelectorAll("td")
).filter((td) => (ToolUtils.cleanText(td.textContent) === "经营者") ||
ToolUtils.cleanText(td.textContent) === "执行事务合伙人" ||
ToolUtils.cleanText(td.textContent) === "负责人" ||
ToolUtils.cleanText(td.textContent) === "法定代表人"
);
if (operatorElements.length > 0) {
const valueCell = operatorElements[0].nextElementSibling;
if (valueCell) {
// Check if it has the image-text-content structure
if (valueCell.classList.contains("image-text-content")) {
const nameElement = valueCell.querySelector(".person-name-warp a");
if (nameElement) {
return ToolUtils.cleanText(nameElement.textContent);
}
return ToolUtils.cleanText(valueCell.textContent);
}
// Regular structure
return ToolUtils.cleanText(valueCell.textContent);
}
}
// const legalElements = Array.from(
// this.table.querySelectorAll("td")
// ).filter((td) => ToolUtils.cleanText(td.textContent) === "法定代表人");
// if (legalElements.length > 0) {
// const valueCell = legalElements[0].nextElementSibling;
// if (valueCell && valueCell.classList.contains("image-text-content")) {
// const nameElement = valueCell.querySelector(".person-name-warp a");
// if (nameElement) {
// return ToolUtils.cleanText(nameElement.textContent);
// }
// return ToolUtils.cleanText(valueCell.textContent);
// }
// }
// 备用查找方式
const titleElements = Array.from(
this.table.querySelectorAll("td")
).filter((td) => td.textContent.includes("法定代表人"));
@@ -388,6 +414,20 @@
return match ? match[0] : null;
}
// 尝试查找其他可能包含员工数的字段
const employeeElements = Array.from(
this.table.querySelectorAll("td")
).filter((td) =>
td.textContent.includes("人员规模") ||
td.textContent.includes("员工人数")
);
if (employeeElements.length > 0 && employeeElements[0].nextElementSibling) {
const valueCell = employeeElements[0].nextElementSibling;
const rawText = valueCell.textContent.replace(/[\r\n\t]/g, "").trim();
return rawText;
}
return null;
}
@@ -421,6 +461,12 @@
}
}
// 如果没有核准日期,尝试使用成立日期作为替代
const establishmentDate = this.getOptimizedValue("成立日期");
if (establishmentDate && /^\d{4}-\d{2}-\d{2}$/.test(establishmentDate)) {
return establishmentDate;
}
return null;
}
@@ -761,7 +807,36 @@ class NationalCreditParser {
// 获取住所/注册地址
getRegisteredAddress() {
return this.getOptimizedValue("住所");
// return this.getOptimizedValue("住所");
// 主要查找方式
const address = this.getOptimizedValue("注册地址");
if (address) {
return address;
}
// 备用查找方式
const alternativeElements = Array.from(
this.table.querySelectorAll("td")
).filter((td) =>
ToolUtils.cleanText(td.textContent).includes("住所") ||
ToolUtils.cleanText(td.textContent).includes("营业地址") ||
ToolUtils.cleanText(td.textContent).includes("办公地址")
);
if (alternativeElements.length > 0 && alternativeElements[0].nextElementSibling) {
const valueCell = alternativeElements[0].nextElementSibling;
const valueElement =
valueCell.querySelector(".enter-bg-ele") ||
valueCell.querySelector(".addr-enter-bg-ele") ||
valueCell;
const addressText = ToolUtils.cleanText(valueElement.textContent);
if (addressText) {
return addressText;
}
}
return null;
}
// 获取经营范围