1、安装 ---pip install bs4
2、bs4 API:https://beautifulsoup.readthedocs.io/zh_CN/v4.4.0/#id14
例子:
# *-* coding:utf-8 *-*
'''
爬取北京新发地市场价格信息:
1、拿到页面源代码
2、使用bs4解析,拿到数据
'''
import requests
import re,os
from bs4 import BeautifulSoup
url = 'http://www.xinfadi.com.cn/marketanalysis/0/list/1.shtml'
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36"}
respon = requests.get(url,headers = headers) #verify=False去掉安全验证
#生成源代码
respon_text=respon.text
respon.close()
#解析数据---把页面源代码交给 beautifulsoup 生成bs对象
pape = BeautifulSoup(respon_text,"html.parser") #html.parser 指定HTML解析器
#从bs对象中查找数据
table=pape.find("table",attrs={"class":"hq_table"})
trs = table.find_all("tr",attrs = {"class":"tr_color"})
for tr in trs:
tds = tr.find_all("td")
#提取 数据
print([td.text for td in tds][:-1])
print("over")
输出: