利用python对csv文件进行简单的数据分析
在通过爬虫爬取数据后,将数据放到csv文件里,为了方便观察,可以对数据进行简单的分析。下面我将对爬取的51job招聘数据中的薪资进行求平均值以及中位数操作
1.爬取数据
下面是我借用的爬取51job代码,稍加修改
# -*- coding:utf8 -*-
# 使用 xpath 方法对 51job 进行职位爬取
import requests
import json
import re
import csv
from lxml import etree
BASE_DOMAIN = 'https://search.51job.com'
HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
}
Recruitments = []
def parse_page(url):
# url = 'https://search.51job.com/list/120200,000000,0000,00,9,99,python,2,1.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
resp = requests.get(url,headers=HEADERS)
text = resp.content.decode('gbk')
tree = etree.HTML(text)
PositionAndCompany = tree.xpath("//div[@class='el']//span/a/@title")
Company = PositionAndCompany[1::2]
Position = PositionAndCompany[::2]
Workplace = tree.xpath("//div[@class='el']//span[@class='t3']/text()")
Payroll = tree.xpath("//div[@class='el']//span[@class='t4']/text()")
Releasetime = tree.xpath("//div[@class='el']//span[@class='t5']/text()")
for value in zip(Position,Company,Workplace,Payroll,Releasetime):
Position