Bootstrap

python requests模块

目录

一、GET请求

1.1 简单使用

1.2 带header

1.3 带params

1.4 带cookie

1.5 超时参数timeout

1.6 忽略CA证书验证

二、POST请求

三、response响应

3.1 打印响应的内容:respone.text和respone.content

3.2 cookieJar对象转换为cookies字典

四、代理访问网页

五、requests.session进行状态保持


一、GET请求

1.1 简单使用

import requests

url = "https://www.baidu.com"
response = requests.get(url)

# 打印响应的内容
print(response.content.decode())

# 打印响应响应的请求头信息
print(response.request.headers)

1.2 带header

import requests

url = "https://www.baidu.com"

headers = {
    "Accept" : "*/*",
    "Accept-Encoding" : "gzip, deflate, br, zstd",
    "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0"
}

response = requests.get(url, headers=headers )

1.3 带params

1. url里面带参

url = "https://www.baidu.com/s?wd=python"

2. get方法传参

import requests

url = "https://www.baidu.com/s?"

headers = {
    "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0"
}

kw = { 
    "wd" : "python"
}

response = requests.get(url, headers=headers, params=kw)

1.4 带cookie

1. 方式1

import requests

url = "https://mp.csdn.net/"

headers = {
    "Cookie" : "uuid_tt_dd=10_18803805180-1711458752220-320466; UserName=HuanBianCheng27; UserInfo=54257457f4154082bc88c34aaf74681a; UserToken=54257457f4154082bc88c34aaf74681a; UserNick=herb.dr; AU=5B3; UN=HuanBianCheng27; BT=1711466608435; p_uid=U010000; Hm_up_6bcd52f51e9b3dce32bec4a3997715ac=%7B%22islogin%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isonline%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isvip%22%3A%7B%22value%22%3A%220%22%2C%22scope%22%3A1%7D%2C%22uid_%22%3A%7B%22value%22%3A%22HuanBianCheng27%22%2C%22scope%22%3A1%7D%7D; Hm_ct_6bcd52f51e9b3dce32bec4a3997715ac=6525*1*10_18803805180-1711458752220-320466!5744*1*HuanBianCheng27; c_dl_um=-; FCNEC=%5B%5B%22AKsRol9iX1Tba3h_JtQxL85On8j5HF8Z3B8sumfv4WOWHsDl19UxB3J_ddHCopWH8NugJUPeePJAtAaIjfjLV8IaMWLv5c2bueFJlhbZRGhdkG-hOmNPGB3ubhlzYRcPi5dWNX_TPAla0m2beC8dfB3pboExmwBewA%3D%3D%22%5D%5D; c_dl_prid=1713365520364_902128; c_dl_rid=1713490976789_898249; c_dl_fref=https://blog.csdn.net/Zwq8023520/article/details/132272082; c_dl_fpage=/download/weixin_38554781/12843177; HuanBianCheng27comment_new=1704697815394; _clck=8ywswb%7C2%7Cfls%7C0%7C1546; SidecHatdocDescBoxNum=true; __gads=ID=e2b796bffae167a1:T=1711458754:RT=1715757151:S=ALNI_MZ-MMgm4eo0BLshmaZFGfRfte_bCA; __gpi=UID=00000d677567e5fc:T=1711458754:RT=1715757151:S=ALNI_MbFBcrstxA-w_iqE4S1wVrzv8ubjQ; __eoi=ID=98aceaad28052c81:T=1711458754:RT=1715757151:S=AA-AfjYUoC5oJMtfsA8pgTPN6JGp; firstDie=1; _clsk=1byoxsg%7C1715757238756%7C3%7C0%7Cq.clarity.ms%2Fcollect; dc_sid=14fd0090162d5886eee8c47bff481d8f; c_pref=default; c_ref=default; dc_session_id=11_1715770884627.106417; c_first_ref=default; c_first_page=https%3A//mp.csdn.net/%3Fspm%3D1010.2135.3001.8539; c_dsid=11_1715770884627.619418; c_segment=2; c_page_id=default; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1715646520,1715669698,1715734389,1715770885; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1715770885; creativeSetApiNew=%7B%22toolbarImg%22%3A%22https%3A//img-home.csdnimg.cn/images/20230921102607.png%22%2C%22publishSuccessImg%22%3A%22https%3A//img-home.csdnimg.cn/images/20240229024608.png%22%2C%22articleNum%22%3A43%2C%22type%22%3A2%2C%22oldUser%22%3Atrue%2C%22useSeven%22%3Afalse%2C%22oldFullVersion%22%3Atrue%2C%22userName%22%3A%22HuanBianCheng27%22%7D; creative_btn_mp=1; log_Id_pv=636; log_Id_view=18635; log_Id_click=734; dc_tos=sdixdg"
}

response = requests.get(url, headers=headers)

2. 方式2

import requests

url = "https://mp.csdn.net/"

temp = "uuid_tt_dd=10_18803805180-1711458752220-320466; UserName=HuanBianCheng27; UserInfo=54257457f4154082bc88c34aaf74681a; UserToken=54257457f4154082bc88c34aaf74681a; UserNick=herb.dr; AU=5B3; UN=HuanBianCheng27; BT=1711466608435; p_uid=U010000; Hm_up_6bcd52f51e9b3dce32bec4a3997715ac=%7B%22islogin%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isonline%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isvip%22%3A%7B%22value%22%3A%220%22%2C%22scope%22%3A1%7D%2C%22uid_%22%3A%7B%22value%22%3A%22HuanBianCheng27%22%2C%22scope%22%3A1%7D%7D; Hm_ct_6bcd52f51e9b3dce32bec4a3997715ac=6525*1*10_18803805180-1711458752220-320466!5744*1*HuanBianCheng27; c_dl_um=-; FCNEC=%5B%5B%22AKsRol9iX1Tba3h_JtQxL85On8j5HF8Z3B8sumfv4WOWHsDl19UxB3J_ddHCopWH8NugJUPeePJAtAaIjfjLV8IaMWLv5c2bueFJlhbZRGhdkG-hOmNPGB3ubhlzYRcPi5dWNX_TPAla0m2beC8dfB3pboExmwBewA%3D%3D%22%5D%5D; c_dl_prid=1713365520364_902128; c_dl_rid=1713490976789_898249; c_dl_fref=https://blog.csdn.net/Zwq8023520/article/details/132272082; c_dl_fpage=/download/weixin_38554781/12843177; HuanBianCheng27comment_new=1704697815394; _clck=8ywswb%7C2%7Cfls%7C0%7C1546; SidecHatdocDescBoxNum=true; __gads=ID=e2b796bffae167a1:T=1711458754:RT=1715757151:S=ALNI_MZ-MMgm4eo0BLshmaZFGfRfte_bCA; __gpi=UID=00000d677567e5fc:T=1711458754:RT=1715757151:S=ALNI_MbFBcrstxA-w_iqE4S1wVrzv8ubjQ; __eoi=ID=98aceaad28052c81:T=1711458754:RT=1715757151:S=AA-AfjYUoC5oJMtfsA8pgTPN6JGp; firstDie=1; _clsk=1byoxsg%7C1715757238756%7C3%7C0%7Cq.clarity.ms%2Fcollect; dc_sid=14fd0090162d5886eee8c47bff481d8f; c_pref=default; c_ref=default; dc_session_id=11_1715770884627.106417; c_first_ref=default; c_first_page=https%3A//mp.csdn.net/%3Fspm%3D1010.2135.3001.8539; c_dsid=11_1715770884627.619418; c_segment=2; c_page_id=default; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1715646520,1715669698,1715734389,1715770885; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1715770885; creativeSetApiNew=%7B%22toolbarImg%22%3A%22https%3A//img-home.csdnimg.cn/images/20230921102607.png%22%2C%22publishSuccessImg%22%3A%22https%3A//img-home.csdnimg.cn/images/20240229024608.png%22%2C%22articleNum%22%3A43%2C%22type%22%3A2%2C%22oldUser%22%3Atrue%2C%22useSeven%22%3Afalse%2C%22oldFullVersion%22%3Atrue%2C%22userName%22%3A%22HuanBianCheng27%22%7D; creative_btn_mp=1; log_Id_pv=636; log_Id_view=18635; log_Id_click=734; dc_tos=sdixdg"

cookies_list = temp.split("; ")
cookies = { }
for cookie in cookies_list:
    cookies[cookie.split("=")[0]] = cookie.split("=")[-1]
print(cookies)

response = requests.get(url, cookies=cookies)
print(response.content.decode())

1.5 超时参数timeout

请求到时间没返回就抛出异常

import requests

url = "https://gethub.com"

try:
    # 设置超时时间为1秒
    response = requests.get(url, timeout=1)
    print(response.content.decode())
except requests.exceptions.ConnectTimeout:
    print("Time out.")

1.6 忽略CA证书验证

verify=False

import requests

url = "https://gethub.com"

response = requests.get(url, verify=False)

print(response.content.decode())

二、POST请求

和get请求参数,除了data以外,其他完全一致

data是字典类型的body数据

response = requests.post(url, data, ...)

三、response响应

response = requests.get(url)

● respone.text 获取str类型的响应内容

● respone.content 获取bytes类型的响应内容

● response.url 响应的url; 有时候响应的ur和请求的url并不一致

● response.tatus_code响应状态码

● response.request.headers 响应对应的请求头

● response.headers 响应头

● response.request.cookies 响应对应请求的cookie; 返回cookieJar类型

● response.cookies 响应的cookie (经过了set-cookie动作; 返回cookieJar类型

● response.json() 自动将json字符串类型的响应内容转换为python对象 (dict or list)

3.1 打印响应的内容:respone.text和respone.content

import requests

url = "https://www.baidu.com"
response = requests.get(url)

# 打印响应的内容——方式1
# 手动设定编码格式
response.encoding = 'utf-8'
print(response.text)

# 打印响应的内容——方式2
print(response.content.decode())

常见的编码字符集

utf-8、gbk、gb2312、ascii、iso-8859-1

3.2 cookieJar对象转换为cookies字典

使用request获取的respone对象,具有cookies属性。该属性值是一个cookieJar类型,包含了对方服务器设置在本地的cookie

将cookieJar转成cookies字典:cookies_dict=requests.utils.dict_from_cookieJar(response.cookies)

四、代理访问网页

1. 原理

2. 获取代理服务器方式

百度搜索免费代理服务器,多试几个

3. 使用代理服务器访问百度

import requests

url = "http://www.baidu.com"

proxies = {
    "http" : "http://185.162.231.112:80"
    # "https" : "https://185.162.231.112:80"
}

response = requests.get(url, proxies=proxies)

print(response.text)

五、requests.session进行状态保持

requests模块中的session类能够自动处理发送请求,获取相应过程中产生的cookie,进而达到状态保持的目的

session实例请求一个网站之后,对方服务器设置在本地的cookie会保存在session中,下一次再使用session请求对方服务器的时候,会带上前一次的cookie

session对象发送get或post请求的参数,与requests模块发送请求的参数完全一致

session = requests.session()  #实例化session对象
response = session.get(url, headers, ...)
response = session.post(url, data, ...)

;