=> 需要一套能讓瀏覽器操作自動化的工具集
webdriver.PhantomJS(路徑) 預設是環境變數,可自行指定路徑from selenium import webdriver
driver = webdriver.PhantomJS()
driver.get("http://ecshweb.pchome.com.tw/search/v3.3/?q=mac")
driver.save_screenshot("pchome.jpg")
True
Unix上一次全殺的指令:
kill $(ps aux | grep 'phantomjs' | awk '{print $2}')
import signal
driver.service.process.send_signal(signal.SIGTERM) # 從系統中kill掉
driver.quit() # 從程式中釋放
from selenium import webdriver
import signal
driver = webdriver.PhantomJS()
driver.get("http://ecshweb.pchome.com.tw/search/v3.3/?q=mac")
html_selenium = driver.page_source
fileout = open('pchome_selenium.html','w')
fileout.write(html_selenium)
fileout.close()
driver.service.process.send_signal(signal.SIGTERM)
driver.quit()
import requests
resp = requests.get("http://ecshweb.pchome.com.tw/search/v3.3/?q=mac")
html_requests = resp.content.decode('utf8')
fileout = open('pchome_requests.html','w')
fileout.write(html_requests)
fileout.close()
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
import signal
driver = webdriver.PhantomJS()
driver.get("http://shopping.pchome.com.tw/")
time.sleep(5)
text_box = driver.find_element(By.XPATH, "//*[@id='keyword']")
text_box.send_keys("macbook") # 填入
text_box.clear() # 清空
text_box.send_keys("mac") # 填入
driver.save_screenshot("pchome_textbox.jpg")
text_box.send_keys(Keys.ENTER)
time.sleep(5)
driver.save_screenshot("pchome_search.jpg")
driver.service.process.send_signal(signal.SIGTERM)
driver.quit()
試試看把爬蟲的投影片截圖下載下來吧!
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from lxml import etree
import time
import signal
driver = webdriver.PhantomJS()
url = "http://tw.pyladies.com/~marsw/crawler04.slides.html"
driver.get(url)
time.sleep(10)
driver.save_screenshot("slide01.jpg")
fileout = open('slide.html','w')
fileout.write(driver.page_source)
fileout.close()
page = etree.HTML(driver.page_source)
for button_enable in page.xpath("//button/@class"):
print (button_enable)
anywhere = driver.find_element(By.XPATH, "//h2")
anywhere.send_keys(Keys.ARROW_DOWN)
time.sleep(5)
driver.save_screenshot("slide02.jpg")
driver.service.process.send_signal(signal.SIGTERM)
driver.quit()
navigate-left navigate-right enabled navigate-up navigate-down enabled
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import signal
driver = webdriver.PhantomJS()
driver.get("http://24h.pchome.com.tw/prod/DGAX07-A900799PN")
time.sleep(10)
### 選單列表
driver.find_element(By.XPATH, "//select[@class='Qty']/option[text()='2']").click()
### 一般按鈕
sbtn = driver.find_element(By.XPATH, "//button[@accesskey='I']")
sbtn.click()
time.sleep(10)
driver.save_screenshot("pchome_add2cart.jpg")
driver.service.process.send_signal(signal.SIGTERM)
driver.quit()
# 不等待
from selenium import webdriver
from selenium.webdriver.common.by import By
import signal
driver = webdriver.PhantomJS()
driver.get("http://24h.pchome.com.tw/prod/DGAX07-A900799PN")
driver.find_element(By.XPATH, "//select[@class='Qty']/option[text()='2']").click()
driver.service.process.send_signal(signal.SIGTERM)
driver.quit()
--------------------------------------------------------------------------- NoSuchElementException Traceback (most recent call last) <ipython-input-8-675a839bd9d5> in <module>() 5 driver.get("http://24h.pchome.com.tw/prod/DGAX07-A900799PN") 6 ----> 7 driver.find_element(By.XPATH, "//select[@class='Qty']/option[text()='2']").click() 8 9 driver.service.process.send_signal(signal.SIGTERM) /usr/local/lib/python3.4/dist-packages/selenium/webdriver/remote/webdriver.py in find_element(self, by, value) 750 return self.execute(Command.FIND_ELEMENT, { 751 'using': by, --> 752 'value': value})['value'] 753 754 def find_elements(self, by=By.ID, value=None): /usr/local/lib/python3.4/dist-packages/selenium/webdriver/remote/webdriver.py in execute(self, driver_command, params) 234 response = self.command_executor.execute(driver_command, params) 235 if response: --> 236 self.error_handler.check_response(response) 237 response['value'] = self._unwrap_value( 238 response.get('value', None)) /usr/local/lib/python3.4/dist-packages/selenium/webdriver/remote/errorhandler.py in check_response(self, response) 190 elif exception_class == UnexpectedAlertPresentException and 'alert' in value: 191 raise exception_class(message, screen, stacktrace, value['alert'].get('text')) --> 192 raise exception_class(message, screen, stacktrace) 193 194 def _value_or_default(self, obj, key, default): NoSuchElementException: Message: {"errorMessage":"Unable to find element with xpath '//select[@class='Qty']/option[text()='2']'","request":{"headers":{"Accept":"application/json","Accept-Encoding":"identity","Connection":"close","Content-Length":"125","Content-Type":"application/json;charset=UTF-8","Host":"127.0.0.1:34925","User-Agent":"Python-urllib/3.4"},"httpVersion":"1.1","method":"POST","post":"{\"value\": \"//select[@class='Qty']/option[text()='2']\", \"using\": \"xpath\", \"sessionId\": \"e7a97100-f330-11e6-99d9-c35c87d1b9f4\"}","url":"/element","urlParsed":{"anchor":"","query":"","file":"element","directory":"/","path":"/element","relative":"/element","port":"","host":"","password":"","user":"","userInfo":"","authority":"","protocol":"","source":"/element","queryKey":{},"chunks":["element"]},"urlOriginal":"/session/e7a97100-f330-11e6-99d9-c35c87d1b9f4/element"}} Screenshot: available via screen
# 強迫等待10秒
from selenium import webdriver
from selenium.webdriver.common.by import By
import signal
import time
driver = webdriver.PhantomJS()
driver.get("http://24h.pchome.com.tw/prod/DGAX07-A900799PN")
tstart = time.time()
time.sleep(10)
tstop = time.time()
print (tstop-tstart)
driver.find_element(By.XPATH, "//select[@class='Qty']/option[text()='2']").click()
driver.service.process.send_signal(signal.SIGTERM)
driver.quit()
10.015915870666504
# 最多等待10秒
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import signal
import time
driver = webdriver.PhantomJS()
driver.get("http://24h.pchome.com.tw/prod/DGAX07-A900799PN")
tstart = time.time()
try:
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, "//select[@class='Qty']/option[text()='2']"))
)
except:
print ("TimeOut")
finally:
tstop = time.time()
print (tstop-tstart)
driver.find_element(By.XPATH, "//select[@class='Qty']/option[text()='2']").click()
driver.service.process.send_signal(signal.SIGTERM)
driver.quit()
1.8321282863616943
sudo npm -g install phantomjssudo yum install nodejs
sudo yum install npm
sudo yum install bzip2
sudo npm -g install phantomjs
sudo yum install fontconfig
sudo apt-get install -y nodejs
sudo apt-get install npm
sudo apt-get install phantomjs
依系統而定,有時需要額外安裝fontconfig或libfontconfig。
webdriver.PhantomJS(路徑)指定路徑,或是依照各系統設定環境變數Unix 解壓縮指令
sudo ln -s /xxxx/bin/phantomjs /usr/bin/phantomjs
sudo apt-get install libfontconfig1 libfreetype6 libpng12-0
curl -o /tmp/phantomjs_2.1.1_armhf.deb -sSL https://github.com/fg2it/phantomjs-on-raspberry/releases/download/v2.1.1-wheezy-jessie-armv6/phantomjs_2.1.1_armhf.deb
sudo dpkg -i /tmp/phantomjs_2.1.1_armhf.deb
sudo ln -s /usr/local/bin/phantomjs /usr/bin/phantomjs
webdriver.Chrome(路徑)指定路徑