#!/usr/bin/env python #coding = utf-8 ''' 本爬虫是用来爬取6V电影网站上的电影资源的一个小脚本程序,爬取到的电影链接会通过网页的形式显示出来 ''' import requests import re from bs4 import BeautifulSoup as bs from queue import Queue from other import getUser_Agent import threading import sys import time headers = getUser_Agent.getUser_Agent() class Movielinks(threading.Thread): def __init__(self,que,filepath,totalcount,starttime): threading.Thread.__init__(self) self._que = que self._filepath = filepath self._totalcount = totalcount self._starttime = starttime def run(self): try: while not self._que.empty(): url = self._que.get() threading.Thread(target=self.showdetail).start() self.spider(url) except: print('error--->def run(self):') def spider(self,url): try: r = requests.get(url,headers) file = open(self._filepath,'a+') if r.status_code == 200: soup = bs(r.content.decode('gbk'),'html.parser') links = soup.find_all('td') title = re.search(r'《(.*?)》',soup.title.string) # print(title.group()) for link in links: if '下载帮助' not in str(link): hrefs = link.find_all('a') # print(link) # print('**' * 100) # mima = re.search(r'</a>(.*?)</td>',str(link)) # print(mima.group()) for href in hrefs: if 'ed2k' in href['href'] or '.torrent' in href['href'] or 'thunder' in href['href'] or '