前言
Python Selenium 爬虫爬取英雄联盟官网壁纸
准备工作
源代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
| import os, time, urllib.request, ssl from selenium import webdriver
if not os.path.exists('./皮肤/'): os.mkdir('./皮肤/')
ssl._create_default_https_context = ssl._create_unverified_context
hero = []
browser = webdriver.Chrome(executable_path='./chromedriver')
def get_images(): browser.get('https://lol.qq.com/data/info-heros.shtml') time.sleep(1) browser.execute_script('window.scrollTo(0, document.body.scrollHeight)') infos = browser.find_element_by_id('jSearchHeroDiv') links = infos.find_elements_by_css_selector('li>a') for link in links: urls = link.get_attribute('href') hero.append(urls.split()) for _ in range(len(hero)): browser.get(hero[_][0]) time.sleep(2) browser.execute_script('window.scrollTo(0, 800)') browser.find_element_by_css_selector('#skinNAV').find_elements_by_css_selector('li a')[1].click() skins = browser.find_element_by_css_selector('#skinBG').find_elements_by_css_selector('li') for skin in skins: img_title = skin.get_attribute('title') img_url = skin.find_element_by_css_selector('img').get_attribute('src') try: urllib.request.urlretrieve(img_url, './皮肤/%s.jpg' % img_title) print('下载成功', img_title) except: pass
get_images()
|
完成
参考文献
腾讯课堂——青灯教育