python网络爬虫基于selenium爬取斗鱼直播信息( 二 )

四、完整代码from selenium import webdriverfrom time import sleepimport json#创建一个类class Douyu():def __init__(self):self.url = 'https://www.douyu.com/directory/all'#解析数据的函数def parse(self):#强制等待两秒,等待页面数据加载完毕sleep(2)li_list = self.bro.find_elements_by_xpath('//*[@id="listAll"]/section[2]/div[2]/ul/li')#print(len(li_list))data_list = []for li in li_list:dic_data = https://www.isolves.com/it/cxkf/yy/Python/2022-03-15/{}dic_data['title'] = li.find_element_by_xpath('./div/a/div[2]/div[1]/h3').textdic_data['name'] = li.find_element_by_xpath('./div/a/div[2]/div[2]/h2/div').textdic_data['art_type'] = li.find_element_by_xpath('./div/a/div[2]/div[1]/span').textdic_data['hot'] = li.find_element_by_xpath('./div/a/div[2]/div[2]/span').textdata_list.append(dic_data)return data_list#保存数据的函数def save_data(self,data_list,i):#在当前目录下将数据存为txt文件with open('./douyu.txt','w',encoding='utf-8') as fp:for data in data_list:data = str(data)fp.write(data+'n')print("第%d页保存完成!" % i)# json文件的存法# with open('./可执行文件/可执行文件/chromedriver.exe'self.bro.get(self.url)while i <= page_num:#调用解析函数data_list = self.parse()#调用保存函数self.save_data(data_list,i)try:#定位包含“下一页”字段的按钮并点击button = self.bro.find_element_by_xpath('//span[contains(text(),"下一页")]')button.click()i += 1except:breakself.bro.quit()else:print("输入格式错误!")if __name__ == '__main__':douyu = Douyu()douyu.run()



推荐阅读