目录
创建类
class info_Spider(object):
def _init_(self,code,Lstr1,Lstr2,idx1,idx2):
self.code = code
self.Lstr1 = Lstr1
self.Lstr2 = Lstr2
self.idx1 = idx1
self.idx2 = idx2
#获取指定范围的数据块
def get_info(self,code):
self.code = code
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
chrome_driver = 'C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe'
#chromedriver的文件位置
#
driver = webdriver.Chrome(executable_path = chrome_driver,chrome_options=chrome_options)
url = 'http://fund.eastmoney.com/pingzhongdata/' + code + '.js?v=20190304115823'
driver.get(url)
#检查网页发现数据以json格式 存放在pre标签中 使用x_path匹配标签匹配数据
getPage_text = driver.find_element_by_xpath(\
"//pre").get_attribute("innerHTML")
print(getPage_text)
#对字符串做加工处理
getPage_text1 = re.sub("var Data_netWorthTre