截获response和request
await page.setRequestInterception(True)
page.on('request', intercept_request)
page.on('response', intercept_response)
intercept_request
和intercept_response
相当于是注册的两个回调函数,在浏览器发出请求和获取到请求之前指向这两个函数。
比如可以这样禁止获取图片、多媒体资源和发起 websocket 请求:
async def intercept_request(req):
"""请求过滤"""
if req.resourceType in ['image', 'media', 'eventsource', 'websocket']:
await req.abort()
else:
await req.continue_()
然后每次获取到请求之后将内容打印出来(这里只打印了fetch
和xhr
类型response 的内容):
async def intercept_response(res):
resourceType = res.request.resourceType
if resourceType in ['xhr', 'fetch']:
resp = await res.text()
print(resp)
from scrapy import signals
import pyppeteer
import asyncio
import os
import time
import json
import tkinter
from scrapy.http import HtmlResponse
from Aliexpress.ConfigDB import RedisDB,RedisPool
import logging
pyppeteer_level = logging.WARNING
logging.getLogger('pyppeteer').setLevel(pyppeteer_level)
logging.getLogger('websockets.protocol').setLevel(pyppeteer_level)
pyppeteer_logger = logging.getLogger('pyppeteer')
pyppeteer_logger.setLevel(logging.WARNING)
# redisconn=RedisDB(db=0)
redisconn=RedisPool(db=0)
pyppeteer.DEBUG = False
def _patch_pyppeteer():
from typing import Any
from pyppeteer import connection, launcher
import websockets.client
class PatchedConnection(connection.Connection): # type: ignore
def __init__(self, *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)
# the _ws argument is not yet connected, can simply be replaced with another
# with better defaults.
self._ws = websockets.client.connect(
self._url,
loop=self._loop,
# the followi