python爬取起点中文网_python面向对象xpath爬起点中文网

# -*- coding: utf-8 -*-

import requests

from lxml import etree#抽取数据，只写了爬前页，章节页没写

'''oop例子

class Pig:#大写

def eat(self):

print('Hello World')

t=Pig()

t.eat()

'''

class Spider(object):

def start_request(self):

response=requests.get('https://www.qidian.com/all')

html=response.content.decode()

dom=etree.HTML(html)

#标题xpath

Bigtit_list=dom.xpath('//div[@class="book-mid-info"]/h4/a/text()')

#print(Bigtit_list)

Bighref_list=dom.xpath('//div[@class="book-mid-info"]/h4/a/@href')

#print(Bighref_list)

#代码管理一一对应zip

for Bigtit_list,Bighref_list in zip (Bigtit_list,Bighref_list):

print(Bigtit_list,Bighref_list)

def start_chapter(self,url):

response=requests.get(url)

html=response.content.decode()

dom=etree.HTML(html)

print(dom)

spider=Spider()

spider.start_request()

#spider.start_chapter('https://book.qidian.com/info/1009480992')