Bootstrap

python爬取umei网17万美女图片

直接上代码

from pyquery import PyQuery as pq
import re
import pymongo
import threading
client=pymongo.MongoClient(host='localhost',port=27017)
table=client.taobao.mei


def save_to_mongo(result):
	try:
		if table.insert(result):
			print('存储到Mongo成功')
	except Exception:
		print('存储到Mongo失败',result)

def father_link():
	for i in range(1,43):
		doc=pq(url='http://www.umei.cc/meinvtupian/rentiyishu/'+str(i)+'.htm',encoding='utf-8')
		items=doc('.TypeList li').items()
		for item in items:
			Son_link=item.find('a').attr('href')
			doc2=pq(Son_link,encoding='utf-8')
			#下载第一页图片
			image_url1=doc('#ArticleId22 > p > a > img').attr('src')
			image1={'image':image_url1}
			save_to_mongo(image1)
			print('save number 1 success!')
			page_num=doc2('body > div.wrap > div.NewPages > ul > li:nth-child(1) > a').text()
			page_num=re.findall(r"\d+\.?\d*",page_num)
			try:
				page_num=int(page_num[0])
				for l in range(2,page_num):
					Son_url=Son_link[:-4]+'_'+str(l)+'.htm'
					doc3=pq(Son_url,encoding='utf-8')
					image_url=doc3('#ArticleId22 > p > a > img').attr('src')
					image={'image':image_url}
					save_to_mongo(image)
					print('save success!')
			except Exception:
				pass


def main():
	father_link()


if __name__ == '__main__':
	main()
;