Skip to content

Commit 3b396ea

Browse files
committed
fix Data too long for column 'tags' at row 1, 239 characters truncated
1 parent 9a250df commit 3b396ea

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

page_parse/user/person.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,21 +92,21 @@ def get_detail(html, uid):
9292
user.description = description.encode('gbk', 'ignore').decode('gbk')
9393
elif '注册时间:' in each_str:
9494
user.register_time = each.find(attrs={'class': 'pt_detail'}).get_text().replace('\t', '').replace(
95-
'\r\n', '')
95+
'\r\n', '').replace(' ', '')
9696

9797
if '标签信息' in basic_str:
9898
basic_info = each_module.find_all(attrs={'class': 'li_1 clearfix'})
9999
for each in basic_info:
100100
if '标签:' in each.get_text():
101101
user.tags = each.find(attrs={'class': 'pt_detail'}).get_text().replace('\t', '').replace(
102-
'\n\n\n', '') .strip().replace('\r\n', ';')
102+
'\n\n\n', '') .strip().replace('\r\n', ';').replace(' ', '')
103103

104104
if '教育信息' in basic_str:
105105
basic_info = each_module.find_all(attrs={'class': 'li_1 clearfix'})
106106
for each in basic_info:
107107
if '大学:' in each.get_text():
108108
user.education_info = each.find(attrs={'class': 'pt_detail'}).get_text().replace('\r\n', ',') \
109-
.replace('\t', '').replace('\n', ';').lstrip(';').rstrip(';')
109+
.replace('\t', '').replace('\n', ';').lstrip(';').rstrip(';').replace(' ', '')
110110

111111
if '工作信息' in basic_str:
112112
basic_info = each_module.find_all(attrs={'class': 'li_1 clearfix'})
@@ -116,7 +116,7 @@ def get_detail(html, uid):
116116
jobs = each.find_all(attrs={'class': 'pt_detail'})
117117
for job in jobs:
118118
jobs_info.append(job.get_text().replace('\r\n', '').replace('\t', '').replace('\n', ''))
119-
user.work_info = ';'.join(jobs_info)
119+
user.work_info = ';'.join(jobs_info).replace(' ', '')
120120

121121
if '联系信息' in basic_str:
122122
basic_info = each_module.find_all(attrs={'class': 'li_1 clearfix'})
@@ -129,7 +129,7 @@ def get_detail(html, uid):
129129
contact_info.append('email:' + each.find(attrs={'class': 'pt_detail'}).get_text())
130130
if 'MSN:' in each.get_text():
131131
contact_info.append('msn:' + each.find(attrs={'class': 'pt_detail'}).get_text())
132-
user.contact_info = ';'.join(contact_info)
132+
user.contact_info = ';'.join(contact_info).replace(' ', '')
133133
except Exception as why:
134134
print('解析出错,具体原因为{why}'.format(why=why))
135135

0 commit comments

Comments
 (0)