**
第一步是爬虫的代码,爬取豆瓣TOP250的数据后进行存储到本地mysql数据库
**
##reptile.py
import requests
import uuid
import requests
from lxml import html
import re
import os
import pymysql
def get_douban_top250():
print("--------")
print('正在获取豆瓣TOP250影片信息并保存至本地...')
index = 1
page_count = 10
headers={'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36'}
for i in range(page_count):
url = 'https://movie.douban.com/top250?start={}&filter='.format(i * 25)
url_content = requests.get(url,headers=headers).content
# 内容节点
doc = html.fromstring(url_content)
for y in doc.xpath('//div[@class="info"]'):
name = y.xpath('div[@class="hd"]/a/span[@class="title"]/text()')[0] # 影片名称
# print(name)
# 影片详情
move_content = y.xpath('div[@class="bd"]/p[1]/text()')
# print(move_content)
# 上映日期
dates = move_content[1].replace(" ", "").replace("\n", "").split("/")[0]
dates = dates.replace("\xa0", "").replace("\xee", "").replace("\xf6", "").replace("\u0161", "").replace(
"\xf4", "").replace("\xfb", "").replace("\u2027", "")
date = str(dates)
# print(date)
# 制片国家
country = move_content[1].replace(" ", "").replace("\n", "").split("/")[1]
country = country.replace("\xa0", "").replace("\xee", "").replace("\xf6", "").replace("\u0161", "").replace(
"\xf4", "").replace("\xfb", "").replace("\u2027", "")
# print(country)
# 影片类型
type = move_content[1].replace(" ", "").replace("\n", "").split("/")[2]
type = type.replace("\xa0", "").replace("\xee", "").replace("\xf6", "").replace("\u0161", "").replace(
"\xf4", "").replace("\xfb", "").replace("\u2027", "")
# 导演及演员信息
chractor = move_content[0].replace(" ", "").replace("\n", "")
chractor = chractor.replace("\xa0", "").replace("\xee", "").replace("\xf6", "").replace("\u0161",
"").replace(
"\xf4", "").replace("\xfb", "").replace("\u2027", "").replace("\xe5", "").replace("\u22ef", "")
# 影片描述
remark = y.xpath('div[@class="bd"]/p[@class="quote"]/span[@class="inq"]/text()')
remark = str(remark)
remark = remark.replace("[", "").replace("]", "").replace("'", "").replace("\u22ef", "")
# remark = remark.replace("\u22ef", "")
# // *[ @ id = "content"] / div / div[1] / ol / li[1] / div / div[2] / div[2] / p[2] / span
# print(remark)
# 评分
score = y.xpath('div[@class="bd"]/div[@class="star"]/span[2]/text()')[0]
# 评论人数
count = y.xpath('div[@class="bd"]/div[@class="star"]/span[4]/text()')[0]
# 排名
rank = str(index)
# 保存至本地
f = open("douban.txt", "a") # 将上面的信息每一行以按逗号分隔的规律存入本地
f.write("Top" + rank + ",")
f.write(name + ",")
f.write(date + ",")
f.write(country + ",")
f.write(type + ",")
f.write(chractor + ",")
f.write(remark + ",")
f.write(score + ",")
f.write(count)
f.write("\n")
index += 1
f.close() # 记得关闭文件
# 程序的开始
if __name__ == "__main__":
if os.path.exists("...\\douban.txt") == True:
os.remove("...\\douban.txt")
# 执行get_douban_top250方法
get_douban_top250()
# 连接数据库
print("--------")
print("开始连接数据库...")
connect = pymysql.connect(
user="root",
password="",
host="localhost",
db="douban",
port=3306,
charset=("utf8"), # 注意编码一定要设置,否则gbk你懂的
use_unicode=True,
)
# 设置游标
con = connect.cursor()
# con.execute('SET NAMES UTF8')
con.execute("use douban") # 使用数据库
con.execute("drop table if exists t_doubantop")
# 创建一个表 对应的信息有rank name date country type charactor remark score count
sql = '''create table t_doubantop(rank_list VARCHAR(10),name VARCHAR(40) NOT NULL,date VARCHAR(20),country VARCHAR(20),
type VARCHAR(20),charactor VARCHAR(100),remark VARCHAR(100),score VARCHAR(40),count VARCHAR(40))'''
con.execute(sql)
# 导入数据库
print("--------")
print("正在导入数据...")
f = open("douban.txt", "r")
while True:
line = f.readline()
if line:
line = line.strip("\n")
line = line.split(",") # 将你写的txt文件的数据用逗号分开,此时用逗号将他们转化为列表
# print(line)
rank_list = line[0] # 将需要的量复制
name = line[1]
date = line[2]
country = line[3]
type = line[4]
charactor = line[5]
remark = line[6]
score = line[7]
count = line[8]
# 导入数据库
con.execute(
"insert into t_doubantop(rank_list, name, date, country, type, charactor, remark, score, count)values(%s,%s,%s,%s,%s,%s,%s,%s,%s)",
[rank_list, name, date, country, type, charactor, remark, score, count])
else:
break
# 提交数据
connect.commit()
# 最后关掉连接
con.close()
connect.close()
print("--------")
print("数据库导入成功...")
print('任务执行完成!')
**
第二步就是编写功能类,也就是连接数据库给我们界面功能进行提供dbhelper的帮助。界面的功能有:显示所有TOP250信息,按照排名降序输出,通过电影名输出影片简介,模糊查询电影信息。
**
爬下来的数据库内的数据如图所示
##util.py
import pymysql
##通过电影名搜索电影叙述
def search_description_by_name(name):
# 连接数据库
print("--------")
print("开始连接数据库...")
connect = pymysql.connect(
user="root",
password="",
host="localhost",
db="douban",
port=3306,
charset=("utf8"), # 注意编码一定要设置,否则gbk你懂的
use_unicode=True,
)
# 设置游标
con = connect.cursor()
# con.execute('SET NAMES UTF8')
con.execute("use douban") # 使用数据库
# 创建一个表 对应的信息有rank name date country type charactor remark score count
sql = '''select remark from t_doubantop where name like \'{}\''''.format(str(name))
print(sql)
con.execute(sql)
# 从游标中取出所有记录放到一个序列中并关闭游标
result = con.fetchall()
# 提交数据
connect.commit()
# 最后关掉连接
con.close()
connect.close()
return result
##按照排名降序输出
def DESC():
# 连接数据库
print("--------")
print("开始连接数据库...")
connect = pymysql.connect(
user="root",
password="",
host="localhost",
db="douban",
port=3306,
charset=("utf8"), # 注意编码一定要设置,否则gbk你懂的
use_unicode=True,
)
# 设置游标
con = connect.cursor()
# con.execute('SET NAMES UTF8')
con.execute("use douban") # 使用数据库
# 创建一个表 对应的信息有rank name date country type charactor remark score count
sql = '''select * from t_doubantop order by rank_list DESC '''
con.execute(sql)
# 从游标中取出所有记录放到一个序列中并关闭游标
result = con.fetchall()
# 元祖类型result转换成列表类型result
result = list(result)
# 提交数据
connect.commit()
# 最后关掉连接
con.close()
connect.close()
return result
##查询所有TOP250的信息
def search_top():
# 连接数据库
print("--------")
print("开始连接数据库...")
connect = pymysql.connect(
user="root",
password="",
host="localhost",
db="douban",
port=3306,
charset=("utf8"), # 注意编码一定要设置,否则gbk你懂的
use_unicode=True,
)
# 设置游标
con = connect.cursor()
# con.execute('SET NAMES UTF8')
con.execute("use douban") # 使用数据库
# 创建一个表 对应的信息有rank name date country type charactor remark score count
sql = '''select * from t_doubantop'''
con.execute(sql)
# 从游标中取出所有记录放到一个序列中并关闭游标
result = con.fetchall()
# 元祖类型result转换成列表类型result
result = list(result)
# 提交数据
connect.commit()
# 最后关掉连接
con.close()
connect.close()
return result
第三步就是gui的界面了
import sys
from PyQt5.QtCore import *
from PyQt5.QtGui import *
from PyQt5.QtWidgets import *
from PyQt5 import QtCore
from douban import util
class Second(QDialog):
def __init__(self):
super().__init__()
self.initUI()
def initUI(self):
##窗体组件声明
self.resize(1000,800)
self.setWindowTitle('douban TOP250')
top = util.search_top()
# 全局布局 水平
wlayout = QHBoxLayout()
# 局部布局
glayout = QGridLayout()
searchlayout=QGridLayout()
search_des_layout=QGridLayout()
label1 = QLabel(self)
label2 = QLabel(self)
label1.setText("<strong >根据电影名查询影片简介</strong>")
label2.setText("<strong >模糊查询,结果会单元格高亮</strong>")
self.text_browser = QTextBrowser(self)##电影简介
self.text_browser.resize(50,50)
self.MyTable = QTableWidget()
self.MyTable.setRowCount(250)
self.MyTable.setColumnCount(9)
self.MyTable.setHorizontalHeaderLabels(['排名', '电影名', '上映年份','国家','类型','演员表','影片简介','分数','评价人数'])
self.MyTable.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch)#设置表头自伸缩
# 设置最后一列自动填充容器
self.MyTable.horizontalHeader().setStretchLastSection(True)
##表格数据载入
for x in range(250):
newItem = QTableWidgetItem(str(top[x][0]))
self.MyTable.setItem(x, 0, newItem)
newItem = QTableWidgetItem(top[x][1])
self.MyTable.setItem(x, 1, newItem)
newItem = QTableWidgetItem(top[x][2])
self.MyTable.setItem(x, 2, newItem)
newItem = QTableWidgetItem(top[x][3])
self.MyTable.setItem(x, 3, newItem)
newItem = QTableWidgetItem(top[x][4])
self.MyTable.setItem(x, 4, newItem)
newItem = QTableWidgetItem(top[x][5])
self.MyTable.setItem(x, 5, newItem)
newItem = QTableWidgetItem(top[x][6])
self.MyTable.setItem(x, 6, newItem)
newItem = QTableWidgetItem(top[x][7])
self.MyTable.setItem(x, 7, newItem)
newItem = QTableWidgetItem(top[x][8])
self.MyTable.setItem(x, 8, newItem)
self.MyTable.resizeColumnsToContents()
##功能实现
#查询表格内数据
self.searchLab = QLineEdit(self)
self.searchLab.setPlaceholderText('请输入需要查询的数据')
self.bt1 = QPushButton('点击查询', self)
self.bt1.clicked.connect(self.showMessage)
searchlayout.addWidget(self.bt1,0,1)
searchlayout.addWidget(self.searchLab,0,0)
s_widget=QWidget()
s_widget.setLayout(searchlayout)
glayout.addWidget(label2,0,0)
glayout.addWidget(s_widget,1,0)
#根据电影名查询简介
self.searchLab_name = QLineEdit(self)
self.searchLab_name.setPlaceholderText('请输入需要查询的电影名')
self.bt2 = QPushButton('点击查询', self)
self.bt2.clicked.connect(self.search_description_by_name)
search_des_layout.addWidget(self.bt2, 0, 1)
search_des_layout.addWidget(self.searchLab_name, 0, 0)
sname_widget = QWidget()
sname_widget.setLayout(search_des_layout)
glayout.addWidget(label1,2,0)
glayout.addWidget(sname_widget,3,0)
glayout.addWidget(self.text_browser,4,0)
self.bt_desc = QPushButton('排名降序', self)
self.bt_desc.clicked.connect(self.desc)
glayout.addWidget(self.bt_desc, 5, 0)
##添加组件
#嵌套用的容器
gwg = QWidget()
gwg.setLayout(glayout)
wlayout.addWidget(self.MyTable,8)
wlayout.addWidget(gwg,2)
self.setLayout(wlayout)
def showMessage(self):
wen = self.searchLab.text()
items = self.MyTable.findItems(wen, QtCore.Qt.MatchStartsWith)
for r in range(250):
for c in range(9):
self.MyTable.item(r, c).setBackground(QBrush(QColor(255, 255, 255)))
if items:
if len(items) > 0:
for item in items:
item.setBackground(QBrush(QColor(0, 255, 0)))
# item.setForeground(QBrush(QColor(255, 0, 0)))
else:
QMessageBox.information(self, "提示", "您所搜索的内容不在表格中!")
def search_description_by_name(self):
wen = self.searchLab_name.text()
items = self.MyTable.findItems(wen, QtCore.Qt.MatchExactly)##精确查询
for r in range(250):
for c in range(9):
self.MyTable.item(r, c).setBackground(QBrush(QColor(255, 255, 255)))
if items:
if len(items) > 0:
for item in items:
item.setBackground(QBrush(QColor(0, 255, 0)))
# item.setForeground(QBrush(QColor(255, 0, 0)))
description = str(util.search_description_by_name(wen))
description = description.replace('(','')
description = description.replace(')', '')
self.text_browser.setText(description)
else:
QMessageBox.information(self, "提示", "您所搜索的内容不在表格中!")
def desc(self):
top_desc = util.DESC()
for x in range(250):
newItem = QTableWidgetItem(str(top_desc[x][0]))
self.MyTable.setItem(x, 0, newItem)
newItem = QTableWidgetItem(top_desc[x][1])
self.MyTable.setItem(x, 1, newItem)
newItem = QTableWidgetItem(top_desc[x][2])
self.MyTable.setItem(x, 2, newItem)
newItem = QTableWidgetItem(top_desc[x][3])
self.MyTable.setItem(x, 3, newItem)
newItem = QTableWidgetItem(top_desc[x][4])
self.MyTable.setItem(x, 4, newItem)
newItem = QTableWidgetItem(top_desc[x][5])
self.MyTable.setItem(x, 5, newItem)
newItem = QTableWidgetItem(top_desc[x][6])
self.MyTable.setItem(x, 6, newItem)
newItem = QTableWidgetItem(top_desc[x][7])
self.MyTable.setItem(x, 7, newItem)
newItem = QTableWidgetItem(top_desc[x][8])
self.MyTable.setItem(x, 8, newItem)
if __name__ == '__main__':
app = QApplication(sys.argv)
TOP250 = Second()
TOP250.show()
sys.exit(app.exec_())
最后的gui界面的功能实现如下
功能1:模糊查询
搜索意大利,会出现对应单元格高亮
功能2:通过电影名查询影片描述
状况1:没有对应电影名
状况2:有电影名,成功查询到,显示到文本框中并且单元格高亮
功能3:排名降序输出表格
完结撒花!!✿✿ヽ(°▽°)ノ✿