Bootstrap

数据解析概述----bs4解析入门(爬取北京新发地市场价格信息)

1、安装 ---pip install bs4

2、bs4  API:https://beautifulsoup.readthedocs.io/zh_CN/v4.4.0/#id14

例子:

# *-* coding:utf-8 *-*
'''
爬取北京新发地市场价格信息:
1、拿到页面源代码
2、使用bs4解析,拿到数据
'''
import requests
import re,os
from bs4 import BeautifulSoup

url = 'http://www.xinfadi.com.cn/marketanalysis/0/list/1.shtml'
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36"}

respon = requests.get(url,headers = headers)  #verify=False去掉安全验证

#生成源代码
respon_text=respon.text
respon.close()


#解析数据---把页面源代码交给 beautifulsoup 生成bs对象
pape = BeautifulSoup(respon_text,"html.parser") #html.parser 指定HTML解析器
	
#从bs对象中查找数据
table=pape.find("table",attrs={"class":"hq_table"}) 
trs = table.find_all("tr",attrs = {"class":"tr_color"})
for tr in trs:
	tds = tr.find_all("td")
	#提取 数据
	print([td.text for td in tds][:-1])
print("over") 

输出:

 

 

;