**
python3爬虫selenium爬取今日头条财经新闻
**
使用selenium爬取今日头条财经版块新闻标题,并保存至本地文档。
参考文章:https://blog.csdn.net/qq_42689278/article/details/84590798
from selenium import webdriver
import time #导入时间库
#给txt文件命名
def txt_name():
d = time.localtime( time.time())
t_n = 'D:/python/workspace/toutiaonews/'+'newstitle_{}.{}.txt'.format(d[1],d[2])
return t_n
#获取当前时间
def get_time():
d = time.localtime( time.time())
dd = "现在是{}年{}月{}日{}时{}分".format(d[0],d[1],d[2],d[3],d[4])
return dd
#将爬取的当前时间写入文本
def write_time(t_n):
t = get_time()
with open(t_n,'a',encoding='utf-8') as data:
print(t,file = data)
#保存标题
def write_info(t_n):
titles= brower.find_elements_by_xpath('//div[@class="title-box"]/a')#如何找到这个位置
for title in titles:
con = title.text
with open(t_n,'a',encoding='utf-8') as data:
print(con,file = data)
#下拉界面
def get_manyinfo(t_n):
brower.execute_script("window.scrollTo(0,1000);")
time.sleep(1)# 推迟执行的秒数
t = 0
while t < 10:
for i in range(30):
brower.execute_script("window.scrollTo(0,document.body.scrollHeight);")
time.sleep(3)
write_info(t_n)
brower.refresh()
t += 1
else:
brower.close()