Bootstrap

关于 Accept-Encoding:gzip, deflate, br 的思考

import urllib
from io import BytesIO
import urllib.request
from urllib.request import urlopen
import zlib
import gzip


def loadData(url, headers):

    request = urllib.request.Request(url, headers=headers)
    request.add_header('Accept-encoding', 'gzip,deflate')
    response = urlopen(request)
    content = response.read()
    encoding = response.info().get('Content-Encoding')
    if encoding == 'gzip':
        content = gZip(content)
    elif encoding == 'deflate':
        content = deflate(content)
    return content

def gZip(data):
    buf = BytesIO(data)
    f = gzip.GzipFile(fileobj=buf)
    return f.read()

def deflate(data):
    try:
        return zlib.decompress(data, -zlib.MAX_WBITS)
    except zlib.error:
        return zlib.decompress(data)

def main():
    url = "http://www.puyang.gov.cn/shownews.asp?id=109165"
    headers = {
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
        "Accept-Language": "zh-CN,zh;q=0.9",
        "Cache-Control": "no-cache",
        "Connection": "keep-alive",
        "Pragma": "no-cache",
        "Upgrade-Insecure-Requests": "1",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
    }
    content = loadData(url, headers)
    html = content.decode("gb18030")
    print(html)

if __name__ == '__main__':
    main()

好像不太行欸@_@

;