目录
3.1 打印响应的内容:respone.text和respone.content
一、GET请求
1.1 简单使用
import requests
url = "https://www.baidu.com"
response = requests.get(url)
# 打印响应的内容
print(response.content.decode())
# 打印响应响应的请求头信息
print(response.request.headers)
1.2 带header
import requests
url = "https://www.baidu.com"
headers = {
"Accept" : "*/*",
"Accept-Encoding" : "gzip, deflate, br, zstd",
"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0"
}
response = requests.get(url, headers=headers )
1.3 带params
1. url里面带参
url = "https://www.baidu.com/s?wd=python"
2. get方法传参
import requests
url = "https://www.baidu.com/s?"
headers = {
"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0"
}
kw = {
"wd" : "python"
}
response = requests.get(url, headers=headers, params=kw)
1.4 带cookie
1. 方式1
import requests
url = "https://mp.csdn.net/"
headers = {
"Cookie" : "uuid_tt_dd=10_18803805180-1711458752220-320466; UserName=HuanBianCheng27; UserInfo=54257457f4154082bc88c34aaf74681a; UserToken=54257457f4154082bc88c34aaf74681a; UserNick=herb.dr; AU=5B3; UN=HuanBianCheng27; BT=1711466608435; p_uid=U010000; Hm_up_6bcd52f51e9b3dce32bec4a3997715ac=%7B%22islogin%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isonline%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isvip%22%3A%7B%22value%22%3A%220%22%2C%22scope%22%3A1%7D%2C%22uid_%22%3A%7B%22value%22%3A%22HuanBianCheng27%22%2C%22scope%22%3A1%7D%7D; Hm_ct_6bcd52f51e9b3dce32bec4a3997715ac=6525*1*10_18803805180-1711458752220-320466!5744*1*HuanBianCheng27; c_dl_um=-; FCNEC=%5B%5B%22AKsRol9iX1Tba3h_JtQxL85On8j5HF8Z3B8sumfv4WOWHsDl19UxB3J_ddHCopWH8NugJUPeePJAtAaIjfjLV8IaMWLv5c2bueFJlhbZRGhdkG-hOmNPGB3ubhlzYRcPi5dWNX_TPAla0m2beC8dfB3pboExmwBewA%3D%3D%22%5D%5D; c_dl_prid=1713365520364_902128; c_dl_rid=1713490976789_898249; c_dl_fref=https://blog.csdn.net/Zwq8023520/article/details/132272082; c_dl_fpage=/download/weixin_38554781/12843177; HuanBianCheng27comment_new=1704697815394; _clck=8ywswb%7C2%7Cfls%7C0%7C1546; SidecHatdocDescBoxNum=true; __gads=ID=e2b796bffae167a1:T=1711458754:RT=1715757151:S=ALNI_MZ-MMgm4eo0BLshmaZFGfRfte_bCA; __gpi=UID=00000d677567e5fc:T=1711458754:RT=1715757151:S=ALNI_MbFBcrstxA-w_iqE4S1wVrzv8ubjQ; __eoi=ID=98aceaad28052c81:T=1711458754:RT=1715757151:S=AA-AfjYUoC5oJMtfsA8pgTPN6JGp; firstDie=1; _clsk=1byoxsg%7C1715757238756%7C3%7C0%7Cq.clarity.ms%2Fcollect; dc_sid=14fd0090162d5886eee8c47bff481d8f; c_pref=default; c_ref=default; dc_session_id=11_1715770884627.106417; c_first_ref=default; c_first_page=https%3A//mp.csdn.net/%3Fspm%3D1010.2135.3001.8539; c_dsid=11_1715770884627.619418; c_segment=2; c_page_id=default; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1715646520,1715669698,1715734389,1715770885; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1715770885; creativeSetApiNew=%7B%22toolbarImg%22%3A%22https%3A//img-home.csdnimg.cn/images/20230921102607.png%22%2C%22publishSuccessImg%22%3A%22https%3A//img-home.csdnimg.cn/images/20240229024608.png%22%2C%22articleNum%22%3A43%2C%22type%22%3A2%2C%22oldUser%22%3Atrue%2C%22useSeven%22%3Afalse%2C%22oldFullVersion%22%3Atrue%2C%22userName%22%3A%22HuanBianCheng27%22%7D; creative_btn_mp=1; log_Id_pv=636; log_Id_view=18635; log_Id_click=734; dc_tos=sdixdg"
}
response = requests.get(url, headers=headers)
2. 方式2
import requests
url = "https://mp.csdn.net/"
temp = "uuid_tt_dd=10_18803805180-1711458752220-320466; UserName=HuanBianCheng27; UserInfo=54257457f4154082bc88c34aaf74681a; UserToken=54257457f4154082bc88c34aaf74681a; UserNick=herb.dr; AU=5B3; UN=HuanBianCheng27; BT=1711466608435; p_uid=U010000; Hm_up_6bcd52f51e9b3dce32bec4a3997715ac=%7B%22islogin%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isonline%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isvip%22%3A%7B%22value%22%3A%220%22%2C%22scope%22%3A1%7D%2C%22uid_%22%3A%7B%22value%22%3A%22HuanBianCheng27%22%2C%22scope%22%3A1%7D%7D; Hm_ct_6bcd52f51e9b3dce32bec4a3997715ac=6525*1*10_18803805180-1711458752220-320466!5744*1*HuanBianCheng27; c_dl_um=-; FCNEC=%5B%5B%22AKsRol9iX1Tba3h_JtQxL85On8j5HF8Z3B8sumfv4WOWHsDl19UxB3J_ddHCopWH8NugJUPeePJAtAaIjfjLV8IaMWLv5c2bueFJlhbZRGhdkG-hOmNPGB3ubhlzYRcPi5dWNX_TPAla0m2beC8dfB3pboExmwBewA%3D%3D%22%5D%5D; c_dl_prid=1713365520364_902128; c_dl_rid=1713490976789_898249; c_dl_fref=https://blog.csdn.net/Zwq8023520/article/details/132272082; c_dl_fpage=/download/weixin_38554781/12843177; HuanBianCheng27comment_new=1704697815394; _clck=8ywswb%7C2%7Cfls%7C0%7C1546; SidecHatdocDescBoxNum=true; __gads=ID=e2b796bffae167a1:T=1711458754:RT=1715757151:S=ALNI_MZ-MMgm4eo0BLshmaZFGfRfte_bCA; __gpi=UID=00000d677567e5fc:T=1711458754:RT=1715757151:S=ALNI_MbFBcrstxA-w_iqE4S1wVrzv8ubjQ; __eoi=ID=98aceaad28052c81:T=1711458754:RT=1715757151:S=AA-AfjYUoC5oJMtfsA8pgTPN6JGp; firstDie=1; _clsk=1byoxsg%7C1715757238756%7C3%7C0%7Cq.clarity.ms%2Fcollect; dc_sid=14fd0090162d5886eee8c47bff481d8f; c_pref=default; c_ref=default; dc_session_id=11_1715770884627.106417; c_first_ref=default; c_first_page=https%3A//mp.csdn.net/%3Fspm%3D1010.2135.3001.8539; c_dsid=11_1715770884627.619418; c_segment=2; c_page_id=default; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1715646520,1715669698,1715734389,1715770885; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1715770885; creativeSetApiNew=%7B%22toolbarImg%22%3A%22https%3A//img-home.csdnimg.cn/images/20230921102607.png%22%2C%22publishSuccessImg%22%3A%22https%3A//img-home.csdnimg.cn/images/20240229024608.png%22%2C%22articleNum%22%3A43%2C%22type%22%3A2%2C%22oldUser%22%3Atrue%2C%22useSeven%22%3Afalse%2C%22oldFullVersion%22%3Atrue%2C%22userName%22%3A%22HuanBianCheng27%22%7D; creative_btn_mp=1; log_Id_pv=636; log_Id_view=18635; log_Id_click=734; dc_tos=sdixdg"
cookies_list = temp.split("; ")
cookies = { }
for cookie in cookies_list:
cookies[cookie.split("=")[0]] = cookie.split("=")[-1]
print(cookies)
response = requests.get(url, cookies=cookies)
print(response.content.decode())
1.5 超时参数timeout
请求到时间没返回就抛出异常
import requests
url = "https://gethub.com"
try:
# 设置超时时间为1秒
response = requests.get(url, timeout=1)
print(response.content.decode())
except requests.exceptions.ConnectTimeout:
print("Time out.")
1.6 忽略CA证书验证
verify=False
import requests
url = "https://gethub.com"
response = requests.get(url, verify=False)
print(response.content.decode())
二、POST请求
和get请求参数,除了data以外,其他完全一致
data是字典类型的body数据
response = requests.post(url, data, ...)
三、response响应
response = requests.get(url)
● respone.text 获取str类型的响应内容
● respone.content 获取bytes类型的响应内容
● response.url 响应的url; 有时候响应的ur和请求的url并不一致
● response.tatus_code响应状态码
● response.request.headers 响应对应的请求头
● response.headers 响应头
● response.request.cookies 响应对应请求的cookie; 返回cookieJar类型
● response.cookies 响应的cookie (经过了set-cookie动作; 返回cookieJar类型
● response.json() 自动将json字符串类型的响应内容转换为python对象 (dict or list)
3.1 打印响应的内容:respone.text和respone.content
import requests
url = "https://www.baidu.com"
response = requests.get(url)
# 打印响应的内容——方式1
# 手动设定编码格式
response.encoding = 'utf-8'
print(response.text)
# 打印响应的内容——方式2
print(response.content.decode())
常见的编码字符集
utf-8、gbk、gb2312、ascii、iso-8859-1
3.2 cookieJar对象转换为cookies字典
使用request获取的respone对象,具有cookies属性。该属性值是一个cookieJar类型,包含了对方服务器设置在本地的cookie
将cookieJar转成cookies字典:cookies_dict=requests.utils.dict_from_cookieJar(response.cookies)
四、代理访问网页
1. 原理
2. 获取代理服务器方式
百度搜索免费代理服务器,多试几个
3. 使用代理服务器访问百度
import requests
url = "http://www.baidu.com"
proxies = {
"http" : "http://185.162.231.112:80"
# "https" : "https://185.162.231.112:80"
}
response = requests.get(url, proxies=proxies)
print(response.text)
五、requests.session进行状态保持
requests模块中的session类能够自动处理发送请求,获取相应过程中产生的cookie,进而达到状态保持的目的
session实例请求一个网站之后,对方服务器设置在本地的cookie会保存在session中,下一次再使用session请求对方服务器的时候,会带上前一次的cookie
session对象发送get或post请求的参数,与requests模块发送请求的参数完全一致
session = requests.session() #实例化session对象
response = session.get(url, headers, ...)
response = session.post(url, data, ...)