bugfix 爱企查 法人 地址等
This commit is contained in:
6
demo/Untitled-1.ini
Normal file
6
demo/Untitled-1.ini
Normal file
@@ -0,0 +1,6 @@
|
||||
1760/1年
|
||||
500 /2年
|
||||
|
||||
|
||||
|
||||
AI智能建档APP
|
||||
3058
demo/aiqicha_bug_pages/北京莱盛高新技术有限公司杭州分公司.html
Normal file
3058
demo/aiqicha_bug_pages/北京莱盛高新技术有限公司杭州分公司.html
Normal file
File diff suppressed because one or more lines are too long
2847
demo/aiqicha_bug_pages/杭州余杭图王广告设计工作室.html
Normal file
2847
demo/aiqicha_bug_pages/杭州余杭图王广告设计工作室.html
Normal file
File diff suppressed because one or more lines are too long
3544
demo/aiqicha_bug_pages/杭州鱼米企业管理合伙企业(有限合伙).html
Normal file
3544
demo/aiqicha_bug_pages/杭州鱼米企业管理合伙企业(有限合伙).html
Normal file
File diff suppressed because one or more lines are too long
386
demo/group_all.html
Normal file
386
demo/group_all.html
Normal file
File diff suppressed because one or more lines are too long
111
script-spider-all-group.js
Normal file
111
script-spider-all-group.js
Normal file
@@ -0,0 +1,111 @@
|
||||
// ==UserScript==
|
||||
// @name 集团客户数据抓取工具
|
||||
// @namespace http://tampermonkey.net/
|
||||
// @version 1.0
|
||||
// @description 自动抓取集团客户表格数据并导出为CSV文件
|
||||
// @author You
|
||||
// @match file:///*D:/yd-other/爱企查/demo/group_all.html*
|
||||
// @grant none
|
||||
// ==/UserScript==
|
||||
|
||||
(function() {
|
||||
'use strict';
|
||||
|
||||
// 创建黄色圆点按钮
|
||||
function createYellowDotButton() {
|
||||
const button = document.createElement('div');
|
||||
button.style.position = 'fixed';
|
||||
button.style.right = '20px';
|
||||
button.style.top = '20px';
|
||||
button.style.width = '30px';
|
||||
button.style.height = '30px';
|
||||
button.style.backgroundColor = 'yellow';
|
||||
button.style.borderRadius = '50%';
|
||||
button.style.cursor = 'pointer';
|
||||
button.style.zIndex = '9999';
|
||||
button.style.boxShadow = '0 0 5px rgba(0,0,0,0.5)';
|
||||
button.title = '点击抓取表格数据';
|
||||
|
||||
button.addEventListener('click', scrapeTableData);
|
||||
document.body.appendChild(button);
|
||||
}
|
||||
|
||||
// 抓取表格数据并导出为CSV
|
||||
async function scrapeTableData() {
|
||||
try {
|
||||
// 获取所有表格行数据
|
||||
const tableRows = document.querySelectorAll('.arco-table-tr:not(.arco-table-empty-row)');
|
||||
|
||||
if (tableRows.length <= 1) { // 只有表头或者没有数据
|
||||
alert('未找到表格数据');
|
||||
return;
|
||||
}
|
||||
|
||||
// 提取表头
|
||||
const headers = [];
|
||||
const headerRow = tableRows[0];
|
||||
const headerCells = headerRow.querySelectorAll('.arco-table-th .arco-table-th-item-title');
|
||||
|
||||
headerCells.forEach(cell => {
|
||||
headers.push(cell.textContent.trim());
|
||||
});
|
||||
|
||||
// 提取数据行
|
||||
const dataRows = [];
|
||||
for (let i = 1; i < tableRows.length; i++) {
|
||||
const row = tableRows[i];
|
||||
const cells = row.querySelectorAll('.arco-table-td .arco-table-cell-wrap-value');
|
||||
const rowData = [];
|
||||
|
||||
cells.forEach(cell => {
|
||||
// 处理嵌套的typography元素
|
||||
const typographyElement = cell.querySelector('.arco-typography');
|
||||
const text = typographyElement ? typographyElement.textContent.trim() : cell.textContent.trim();
|
||||
rowData.push(text);
|
||||
});
|
||||
|
||||
// 如果行中有数据,则添加到结果中
|
||||
if (rowData.some(cell => cell !== '')) {
|
||||
dataRows.push(rowData);
|
||||
}
|
||||
}
|
||||
|
||||
// 构建CSV内容
|
||||
let csvContent = headers.join(',') + '\n';
|
||||
dataRows.forEach(row => {
|
||||
csvContent += row.map(cell => `"${cell.replace(/"/g, '""')}"`).join(',') + '\n';
|
||||
});
|
||||
|
||||
// 下载CSV文件
|
||||
downloadCSV(csvContent, '集团客户数据.csv');
|
||||
|
||||
alert(`成功抓取 ${dataRows.length} 条记录`);
|
||||
|
||||
} catch (error) {
|
||||
console.error('抓取数据时发生错误:', error);
|
||||
alert('抓取数据时发生错误,请检查控制台');
|
||||
}
|
||||
}
|
||||
|
||||
// 下载CSV文件
|
||||
function downloadCSV(csvContent, filename) {
|
||||
const blob = new Blob(['\uFEFF' + csvContent], { type: 'text/csv;charset=utf-8;' });
|
||||
const url = URL.createObjectURL(blob);
|
||||
|
||||
const link = document.createElement('a');
|
||||
link.setAttribute('href', url);
|
||||
link.setAttribute('download', filename);
|
||||
link.style.visibility = 'hidden';
|
||||
|
||||
document.body.appendChild(link);
|
||||
link.click();
|
||||
document.body.removeChild(link);
|
||||
}
|
||||
|
||||
// 页面加载完成后创建按钮
|
||||
if (document.readyState === 'loading') {
|
||||
document.addEventListener('DOMContentLoaded', createYellowDotButton);
|
||||
} else {
|
||||
createYellowDotButton();
|
||||
}
|
||||
})();
|
||||
@@ -213,21 +213,47 @@
|
||||
|
||||
// 获取法定代表人
|
||||
getLegalRepresentative() {
|
||||
const legalElements = Array.from(
|
||||
this.table.querySelectorAll("td")
|
||||
).filter((td) => ToolUtils.cleanText(td.textContent) === "法定代表人");
|
||||
|
||||
if (legalElements.length > 0) {
|
||||
const valueCell = legalElements[0].nextElementSibling;
|
||||
if (valueCell && valueCell.classList.contains("image-text-content")) {
|
||||
const nameElement = valueCell.querySelector(".person-name-warp a");
|
||||
if (nameElement) {
|
||||
return ToolUtils.cleanText(nameElement.textContent);
|
||||
// First check for 经营者 (operator) which is used for individual businesses
|
||||
const operatorElements = Array.from(
|
||||
this.table.querySelectorAll("td")
|
||||
).filter((td) => (ToolUtils.cleanText(td.textContent) === "经营者") ||
|
||||
ToolUtils.cleanText(td.textContent) === "执行事务合伙人" ||
|
||||
ToolUtils.cleanText(td.textContent) === "负责人" ||
|
||||
ToolUtils.cleanText(td.textContent) === "法定代表人"
|
||||
);
|
||||
|
||||
if (operatorElements.length > 0) {
|
||||
const valueCell = operatorElements[0].nextElementSibling;
|
||||
if (valueCell) {
|
||||
// Check if it has the image-text-content structure
|
||||
if (valueCell.classList.contains("image-text-content")) {
|
||||
const nameElement = valueCell.querySelector(".person-name-warp a");
|
||||
if (nameElement) {
|
||||
return ToolUtils.cleanText(nameElement.textContent);
|
||||
}
|
||||
return ToolUtils.cleanText(valueCell.textContent);
|
||||
}
|
||||
// Regular structure
|
||||
return ToolUtils.cleanText(valueCell.textContent);
|
||||
}
|
||||
}
|
||||
// const legalElements = Array.from(
|
||||
// this.table.querySelectorAll("td")
|
||||
// ).filter((td) => ToolUtils.cleanText(td.textContent) === "法定代表人");
|
||||
|
||||
// if (legalElements.length > 0) {
|
||||
// const valueCell = legalElements[0].nextElementSibling;
|
||||
// if (valueCell && valueCell.classList.contains("image-text-content")) {
|
||||
// const nameElement = valueCell.querySelector(".person-name-warp a");
|
||||
// if (nameElement) {
|
||||
// return ToolUtils.cleanText(nameElement.textContent);
|
||||
// }
|
||||
// return ToolUtils.cleanText(valueCell.textContent);
|
||||
// }
|
||||
// }
|
||||
|
||||
// 备用查找方式
|
||||
const titleElements = Array.from(
|
||||
this.table.querySelectorAll("td")
|
||||
).filter((td) => td.textContent.includes("法定代表人"));
|
||||
@@ -388,6 +414,20 @@
|
||||
return match ? match[0] : null;
|
||||
}
|
||||
|
||||
// 尝试查找其他可能包含员工数的字段
|
||||
const employeeElements = Array.from(
|
||||
this.table.querySelectorAll("td")
|
||||
).filter((td) =>
|
||||
td.textContent.includes("人员规模") ||
|
||||
td.textContent.includes("员工人数")
|
||||
);
|
||||
|
||||
if (employeeElements.length > 0 && employeeElements[0].nextElementSibling) {
|
||||
const valueCell = employeeElements[0].nextElementSibling;
|
||||
const rawText = valueCell.textContent.replace(/[\r\n\t]/g, "").trim();
|
||||
return rawText;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -421,6 +461,12 @@
|
||||
}
|
||||
}
|
||||
|
||||
// 如果没有核准日期,尝试使用成立日期作为替代
|
||||
const establishmentDate = this.getOptimizedValue("成立日期");
|
||||
if (establishmentDate && /^\d{4}-\d{2}-\d{2}$/.test(establishmentDate)) {
|
||||
return establishmentDate;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -761,7 +807,36 @@ class NationalCreditParser {
|
||||
|
||||
// 获取住所/注册地址
|
||||
getRegisteredAddress() {
|
||||
return this.getOptimizedValue("住所");
|
||||
// return this.getOptimizedValue("住所");
|
||||
// 主要查找方式
|
||||
const address = this.getOptimizedValue("注册地址");
|
||||
if (address) {
|
||||
return address;
|
||||
}
|
||||
|
||||
// 备用查找方式
|
||||
const alternativeElements = Array.from(
|
||||
this.table.querySelectorAll("td")
|
||||
).filter((td) =>
|
||||
ToolUtils.cleanText(td.textContent).includes("住所") ||
|
||||
ToolUtils.cleanText(td.textContent).includes("营业地址") ||
|
||||
ToolUtils.cleanText(td.textContent).includes("办公地址")
|
||||
);
|
||||
|
||||
if (alternativeElements.length > 0 && alternativeElements[0].nextElementSibling) {
|
||||
const valueCell = alternativeElements[0].nextElementSibling;
|
||||
const valueElement =
|
||||
valueCell.querySelector(".enter-bg-ele") ||
|
||||
valueCell.querySelector(".addr-enter-bg-ele") ||
|
||||
valueCell;
|
||||
|
||||
const addressText = ToolUtils.cleanText(valueElement.textContent);
|
||||
if (addressText) {
|
||||
return addressText;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
// 获取经营范围
|
||||
|
||||
Reference in New Issue
Block a user