aiqicha
This commit is contained in:
@@ -373,8 +373,8 @@ if __name__ == '__main__':
|
||||
# results2 = searcher.search("腾讯", 1)
|
||||
# results3 = searcher.search("百度", 1)
|
||||
|
||||
sleep_time = 5
|
||||
sleep_time += random.randint(3, 10)
|
||||
sleep_time = 3
|
||||
sleep_time += random.randint(1, 2)
|
||||
time.sleep(sleep_time)
|
||||
pass
|
||||
pass
|
||||
|
||||
@@ -41,7 +41,19 @@ def crawl_and_save_aiqicha_details(input_csv, output_csv):
|
||||
'address', # 公司地址
|
||||
'business_scope', # 经营范围
|
||||
'source_url', # 原始URL
|
||||
'create_time' # 创建时间
|
||||
'create_time' ,
|
||||
# 新增字段
|
||||
'company_type', # 企业类型
|
||||
'industry', # 所属行业
|
||||
'registration_authority', # 登记机关
|
||||
'operating_period', # 营业期限
|
||||
'actual_capital', # 实缴资本
|
||||
'taxpayer_id', # 纳税人识别号
|
||||
'organization_code', # 组织机构代码
|
||||
'approved_date', # 核准日期
|
||||
'staff_size', # 参保人数
|
||||
'phone' # 电话
|
||||
# 创建时间
|
||||
]
|
||||
|
||||
# 创建输出CSV工具实例
|
||||
@@ -99,7 +111,18 @@ def crawl_and_save_aiqicha_details(input_csv, output_csv):
|
||||
'business_status': detail.get('business_status', ''),
|
||||
'address': detail.get('address', ''),
|
||||
'business_scope': detail.get('business_scope', ''),
|
||||
'source_url': detail.get('source_url', '')
|
||||
'source_url': detail.get('source_url', ''),
|
||||
# 新增字段映射
|
||||
'company_type': detail.get('company_type', ''),
|
||||
'industry': detail.get('industry', ''),
|
||||
'registration_authority': detail.get('registration_authority', ''),
|
||||
'operating_period': detail.get('operating_period', ''),
|
||||
'actual_capital': detail.get('actual_capital', ''),
|
||||
'taxpayer_id': detail.get('taxpayer_id', ''),
|
||||
'organization_code': detail.get('organization_code', ''),
|
||||
'approved_date': detail.get('approved_date', ''),
|
||||
'staff_size': detail.get('staff_size', ''),
|
||||
'phone': detail.get('phone', '')
|
||||
}
|
||||
|
||||
# 立即保存每条数据,避免数据丢失
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -51,16 +51,22 @@ class AiqichaDetailParser:
|
||||
# 批量提取信息
|
||||
# 爱企查页面使用表格结构,需要特殊处理
|
||||
field_mapping = {
|
||||
'legal_representative': '法定代表人',
|
||||
'business_scope': '经营范围',
|
||||
'credit_code': '统一社会信用代码',
|
||||
'registered_capital': '注册资本',
|
||||
'establishment_date': '成立日期',
|
||||
'business_status': '经营状态',
|
||||
'company_type': '企业类型',
|
||||
'registration_authority': '登记机关',
|
||||
'operating_period': '营业期限',
|
||||
'address': '注册地址'
|
||||
'legal_representative': '法定代表人',
|
||||
'business_scope': '经营范围',
|
||||
'credit_code': '统一社会信用代码',
|
||||
'registered_capital': '注册资本',
|
||||
'establishment_date': '成立日期',
|
||||
'business_status': '经营状态',
|
||||
'company_type': '企业类型',
|
||||
'registration_authority': '登记机关',
|
||||
'operating_period': '营业期限',
|
||||
'address': '注册地址',
|
||||
'administrative_division': '行政区划',
|
||||
'business_registration_number': '工商注册号',
|
||||
'taxpayer_qualification': '纳税人资质',
|
||||
'approved_date': '核准日期',
|
||||
'staff_size': '参保人数',
|
||||
'former_name': '曾用名'
|
||||
}
|
||||
for field_name, field_text in field_mapping.items():
|
||||
company_info[field_name] = self._extract_field_value(field_text)
|
||||
|
||||
Reference in New Issue
Block a user