bugfix 爱企查 法人 地址等
This commit is contained in:
6
demo/Untitled-1.ini
Normal file
6
demo/Untitled-1.ini
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
1760/1年
|
||||||
|
500 /2年
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
AI智能建档APP
|
||||||
3058
demo/aiqicha_bug_pages/北京莱盛高新技术有限公司杭州分公司.html
Normal file
3058
demo/aiqicha_bug_pages/北京莱盛高新技术有限公司杭州分公司.html
Normal file
File diff suppressed because one or more lines are too long
2847
demo/aiqicha_bug_pages/杭州余杭图王广告设计工作室.html
Normal file
2847
demo/aiqicha_bug_pages/杭州余杭图王广告设计工作室.html
Normal file
File diff suppressed because one or more lines are too long
3544
demo/aiqicha_bug_pages/杭州鱼米企业管理合伙企业(有限合伙).html
Normal file
3544
demo/aiqicha_bug_pages/杭州鱼米企业管理合伙企业(有限合伙).html
Normal file
File diff suppressed because one or more lines are too long
386
demo/group_all.html
Normal file
386
demo/group_all.html
Normal file
File diff suppressed because one or more lines are too long
111
script-spider-all-group.js
Normal file
111
script-spider-all-group.js
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
// ==UserScript==
|
||||||
|
// @name 集团客户数据抓取工具
|
||||||
|
// @namespace http://tampermonkey.net/
|
||||||
|
// @version 1.0
|
||||||
|
// @description 自动抓取集团客户表格数据并导出为CSV文件
|
||||||
|
// @author You
|
||||||
|
// @match file:///*D:/yd-other/爱企查/demo/group_all.html*
|
||||||
|
// @grant none
|
||||||
|
// ==/UserScript==
|
||||||
|
|
||||||
|
(function() {
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
// 创建黄色圆点按钮
|
||||||
|
function createYellowDotButton() {
|
||||||
|
const button = document.createElement('div');
|
||||||
|
button.style.position = 'fixed';
|
||||||
|
button.style.right = '20px';
|
||||||
|
button.style.top = '20px';
|
||||||
|
button.style.width = '30px';
|
||||||
|
button.style.height = '30px';
|
||||||
|
button.style.backgroundColor = 'yellow';
|
||||||
|
button.style.borderRadius = '50%';
|
||||||
|
button.style.cursor = 'pointer';
|
||||||
|
button.style.zIndex = '9999';
|
||||||
|
button.style.boxShadow = '0 0 5px rgba(0,0,0,0.5)';
|
||||||
|
button.title = '点击抓取表格数据';
|
||||||
|
|
||||||
|
button.addEventListener('click', scrapeTableData);
|
||||||
|
document.body.appendChild(button);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 抓取表格数据并导出为CSV
|
||||||
|
async function scrapeTableData() {
|
||||||
|
try {
|
||||||
|
// 获取所有表格行数据
|
||||||
|
const tableRows = document.querySelectorAll('.arco-table-tr:not(.arco-table-empty-row)');
|
||||||
|
|
||||||
|
if (tableRows.length <= 1) { // 只有表头或者没有数据
|
||||||
|
alert('未找到表格数据');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 提取表头
|
||||||
|
const headers = [];
|
||||||
|
const headerRow = tableRows[0];
|
||||||
|
const headerCells = headerRow.querySelectorAll('.arco-table-th .arco-table-th-item-title');
|
||||||
|
|
||||||
|
headerCells.forEach(cell => {
|
||||||
|
headers.push(cell.textContent.trim());
|
||||||
|
});
|
||||||
|
|
||||||
|
// 提取数据行
|
||||||
|
const dataRows = [];
|
||||||
|
for (let i = 1; i < tableRows.length; i++) {
|
||||||
|
const row = tableRows[i];
|
||||||
|
const cells = row.querySelectorAll('.arco-table-td .arco-table-cell-wrap-value');
|
||||||
|
const rowData = [];
|
||||||
|
|
||||||
|
cells.forEach(cell => {
|
||||||
|
// 处理嵌套的typography元素
|
||||||
|
const typographyElement = cell.querySelector('.arco-typography');
|
||||||
|
const text = typographyElement ? typographyElement.textContent.trim() : cell.textContent.trim();
|
||||||
|
rowData.push(text);
|
||||||
|
});
|
||||||
|
|
||||||
|
// 如果行中有数据,则添加到结果中
|
||||||
|
if (rowData.some(cell => cell !== '')) {
|
||||||
|
dataRows.push(rowData);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 构建CSV内容
|
||||||
|
let csvContent = headers.join(',') + '\n';
|
||||||
|
dataRows.forEach(row => {
|
||||||
|
csvContent += row.map(cell => `"${cell.replace(/"/g, '""')}"`).join(',') + '\n';
|
||||||
|
});
|
||||||
|
|
||||||
|
// 下载CSV文件
|
||||||
|
downloadCSV(csvContent, '集团客户数据.csv');
|
||||||
|
|
||||||
|
alert(`成功抓取 ${dataRows.length} 条记录`);
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error('抓取数据时发生错误:', error);
|
||||||
|
alert('抓取数据时发生错误,请检查控制台');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 下载CSV文件
|
||||||
|
function downloadCSV(csvContent, filename) {
|
||||||
|
const blob = new Blob(['\uFEFF' + csvContent], { type: 'text/csv;charset=utf-8;' });
|
||||||
|
const url = URL.createObjectURL(blob);
|
||||||
|
|
||||||
|
const link = document.createElement('a');
|
||||||
|
link.setAttribute('href', url);
|
||||||
|
link.setAttribute('download', filename);
|
||||||
|
link.style.visibility = 'hidden';
|
||||||
|
|
||||||
|
document.body.appendChild(link);
|
||||||
|
link.click();
|
||||||
|
document.body.removeChild(link);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 页面加载完成后创建按钮
|
||||||
|
if (document.readyState === 'loading') {
|
||||||
|
document.addEventListener('DOMContentLoaded', createYellowDotButton);
|
||||||
|
} else {
|
||||||
|
createYellowDotButton();
|
||||||
|
}
|
||||||
|
})();
|
||||||
@@ -213,21 +213,47 @@
|
|||||||
|
|
||||||
// 获取法定代表人
|
// 获取法定代表人
|
||||||
getLegalRepresentative() {
|
getLegalRepresentative() {
|
||||||
const legalElements = Array.from(
|
|
||||||
this.table.querySelectorAll("td")
|
|
||||||
).filter((td) => ToolUtils.cleanText(td.textContent) === "法定代表人");
|
|
||||||
|
|
||||||
if (legalElements.length > 0) {
|
// First check for 经营者 (operator) which is used for individual businesses
|
||||||
const valueCell = legalElements[0].nextElementSibling;
|
const operatorElements = Array.from(
|
||||||
if (valueCell && valueCell.classList.contains("image-text-content")) {
|
this.table.querySelectorAll("td")
|
||||||
const nameElement = valueCell.querySelector(".person-name-warp a");
|
).filter((td) => (ToolUtils.cleanText(td.textContent) === "经营者") ||
|
||||||
if (nameElement) {
|
ToolUtils.cleanText(td.textContent) === "执行事务合伙人" ||
|
||||||
return ToolUtils.cleanText(nameElement.textContent);
|
ToolUtils.cleanText(td.textContent) === "负责人" ||
|
||||||
|
ToolUtils.cleanText(td.textContent) === "法定代表人"
|
||||||
|
);
|
||||||
|
|
||||||
|
if (operatorElements.length > 0) {
|
||||||
|
const valueCell = operatorElements[0].nextElementSibling;
|
||||||
|
if (valueCell) {
|
||||||
|
// Check if it has the image-text-content structure
|
||||||
|
if (valueCell.classList.contains("image-text-content")) {
|
||||||
|
const nameElement = valueCell.querySelector(".person-name-warp a");
|
||||||
|
if (nameElement) {
|
||||||
|
return ToolUtils.cleanText(nameElement.textContent);
|
||||||
|
}
|
||||||
|
return ToolUtils.cleanText(valueCell.textContent);
|
||||||
}
|
}
|
||||||
|
// Regular structure
|
||||||
return ToolUtils.cleanText(valueCell.textContent);
|
return ToolUtils.cleanText(valueCell.textContent);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// const legalElements = Array.from(
|
||||||
|
// this.table.querySelectorAll("td")
|
||||||
|
// ).filter((td) => ToolUtils.cleanText(td.textContent) === "法定代表人");
|
||||||
|
|
||||||
|
// if (legalElements.length > 0) {
|
||||||
|
// const valueCell = legalElements[0].nextElementSibling;
|
||||||
|
// if (valueCell && valueCell.classList.contains("image-text-content")) {
|
||||||
|
// const nameElement = valueCell.querySelector(".person-name-warp a");
|
||||||
|
// if (nameElement) {
|
||||||
|
// return ToolUtils.cleanText(nameElement.textContent);
|
||||||
|
// }
|
||||||
|
// return ToolUtils.cleanText(valueCell.textContent);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
// 备用查找方式
|
||||||
const titleElements = Array.from(
|
const titleElements = Array.from(
|
||||||
this.table.querySelectorAll("td")
|
this.table.querySelectorAll("td")
|
||||||
).filter((td) => td.textContent.includes("法定代表人"));
|
).filter((td) => td.textContent.includes("法定代表人"));
|
||||||
@@ -388,6 +414,20 @@
|
|||||||
return match ? match[0] : null;
|
return match ? match[0] : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 尝试查找其他可能包含员工数的字段
|
||||||
|
const employeeElements = Array.from(
|
||||||
|
this.table.querySelectorAll("td")
|
||||||
|
).filter((td) =>
|
||||||
|
td.textContent.includes("人员规模") ||
|
||||||
|
td.textContent.includes("员工人数")
|
||||||
|
);
|
||||||
|
|
||||||
|
if (employeeElements.length > 0 && employeeElements[0].nextElementSibling) {
|
||||||
|
const valueCell = employeeElements[0].nextElementSibling;
|
||||||
|
const rawText = valueCell.textContent.replace(/[\r\n\t]/g, "").trim();
|
||||||
|
return rawText;
|
||||||
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -421,6 +461,12 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 如果没有核准日期,尝试使用成立日期作为替代
|
||||||
|
const establishmentDate = this.getOptimizedValue("成立日期");
|
||||||
|
if (establishmentDate && /^\d{4}-\d{2}-\d{2}$/.test(establishmentDate)) {
|
||||||
|
return establishmentDate;
|
||||||
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -761,7 +807,36 @@ class NationalCreditParser {
|
|||||||
|
|
||||||
// 获取住所/注册地址
|
// 获取住所/注册地址
|
||||||
getRegisteredAddress() {
|
getRegisteredAddress() {
|
||||||
return this.getOptimizedValue("住所");
|
// return this.getOptimizedValue("住所");
|
||||||
|
// 主要查找方式
|
||||||
|
const address = this.getOptimizedValue("注册地址");
|
||||||
|
if (address) {
|
||||||
|
return address;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 备用查找方式
|
||||||
|
const alternativeElements = Array.from(
|
||||||
|
this.table.querySelectorAll("td")
|
||||||
|
).filter((td) =>
|
||||||
|
ToolUtils.cleanText(td.textContent).includes("住所") ||
|
||||||
|
ToolUtils.cleanText(td.textContent).includes("营业地址") ||
|
||||||
|
ToolUtils.cleanText(td.textContent).includes("办公地址")
|
||||||
|
);
|
||||||
|
|
||||||
|
if (alternativeElements.length > 0 && alternativeElements[0].nextElementSibling) {
|
||||||
|
const valueCell = alternativeElements[0].nextElementSibling;
|
||||||
|
const valueElement =
|
||||||
|
valueCell.querySelector(".enter-bg-ele") ||
|
||||||
|
valueCell.querySelector(".addr-enter-bg-ele") ||
|
||||||
|
valueCell;
|
||||||
|
|
||||||
|
const addressText = ToolUtils.cleanText(valueElement.textContent);
|
||||||
|
if (addressText) {
|
||||||
|
return addressText;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 获取经营范围
|
// 获取经营范围
|
||||||
|
|||||||
Reference in New Issue
Block a user