python 数据采集

import pandas as pd
from time import sleep
import requests
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
from concurrent.futures import ThreadPoolExecutor, as_completed
# 采集部分
'''https://ncpscxx.moa.gov.cn/#/queryDataMain/showSearchData?item=3&id=6'''

headers = {
   
    'Accept': 'application/json, text/plain, */*',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    'Content-Type': 'application/json',
    'Origin': 'https://ncpscxx.moa.gov.cn',
    'Pragma': 'no-cache',
    'Referer': 'https://ncpscxx.moa.gov.cn/',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-origin',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
    'sec-ch-ua': '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}

json_data = {
   
    'currentPage': 3,
    'pageSize': 10,
    'params': {
   
        'keyword': '',
        'productName': '苹果',
        'itemName': '',
        'areaName': '',
        'source': '',
        'beginTime': '2023-01-01',
        'endTime': '2024-12-10',
    },
}
item_list = []
for n in [{
   'name': '苹果', 'page': 445}, {
   'name': '西红柿', 'page': 1000}, {
   'name': '玉米', 'page': 286}, ]:

    for page in range(1, n['page'] + 1):
        json_data['currentPage'] = page
        response = requests.post('https://ncpscxx.moa.gov.cn/product/datacenter/list', headers=headers, json=json_data,
                                 verify=False)
        for i in response
python 数据采集

悦读