之前打算做个微信小程序的社区,所以写了爬虫去爬取微信小程序,后面发现做微信小程序没有前途,就把原来的项目废弃了做了现在的网站观点,不过代码放着也是放着,还不如公开让大家用,所以我把代码贴出来,有需要的复制了使用就是了。
#coding:utf-8
__author__ = 'haoning'
#!/usr/bin/env python
import time
import urllib2
import datetime
import requests
import json
import random
import sys
import platform
import uuid
reload(sys)
sys.setdefaultencoding( "utf-8" )
import re
import os
import MySQLdb as mdb
from bs4 import BeautifulSoup
DB_HOST = '127.0.0.1'
DB_USER = 'root'
DB_PASS = 'root'
#init database
conn = mdb.connect(DB_HOST, DB_USER, DB_PASS, 'pybbs-springboot', charset='utf8')
conn.autocommit(False)
curr = conn.cursor()
count=0
how_many=0
base_url='http://www.wechat-cloud.com'
url=base_url+"/index.php?s=/home/article/ajax_get_list.html&category_id={category_id}&page={page}&size={size}"
user_agents = [
'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
'Opera/9.25 (Windows NT 5.1; U; en)',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12',
'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9',
"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, lik