[Python] Selenium no Google Colab - Cheatsheet
Automatizando consultas no Google Colab com Selenium
As vezes preciso rodar um teste com o selenium e uso o Google Colab para isso.
Compartilho abaixo o script que utilizo. Pode melhorar por vários motivos, mas ainda é funcional e cumpre o propósito.
Quando possível, acrescentarei comentários ao código para ficar mais didático.
Fique à vontade para reutilizar ou sugerir melhorias.
Função para configurar o Selenium no Colab
def iniciar_selenium(
driver_path=None,
undetec=None,
mobile=None,
proxy=None,
downloads_path=None,
colab=None,
headless=None,
remote=None,
user_agent=None,
opt_adv=True):
try:
from selenium import webdriver
except:
!pip install selenium
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ServiceC
try:
import undetected_chromedriver.v2 as uc
except:
!pip install undetected-chromedriver
import undetected_chromedriver.v2 as uc
# pip install selenium-stealth
# from selenium_stealth import stealth
try:
from webdriver_manager.chrome import ChromeDriverManager
except:
!pip install webdriver-manager
from webdriver_manager.chrome import ChromeDriverManager
try:
import chromedriver_binary # Adds chromedriver binary to path
except:
!pip install chromedriver-binary
import chromedriver_binary
from selenium import __version__
from os import path
options = webdriver.ChromeOptions()
if headless:
options.add_argument("--headless")
if remote:
# nesse caso o navegador roda externo e é algo mais avançado.
return webdriver.Remote(
command_executor="http://127.0.0.1:4444/wd/hub", options=options
)
else:
# desativar recursos não necessários
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--log-level=3")
if undetec is None:
options.binary_location = chromedriver_binary.chromedriver_filename
if driver_path is None:
driver_path = ChromeDriverManager().install()
print("CHROME_PATH ", driver_path)
if mobile:
mobile_emulation = {
# "deviceName": "iPhone 11 Pro",
"deviceMetrics": {"width": 375, "height": 812, "pixelRatio": 3.0},
"userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19",
}
options.add_experimental_option("mobileEmulation", mobile_emulation)
else:
WINDOW_SIZE = "1920,1080"
options.add_argument(f"--window-size={WINDOW_SIZE}")
if proxy:
options.add_argument(f"--proxy-server={proxy}")
prefs = {
"profile.default_content_settings.popups": 0,
"directory_upgrade": True,
"credentials_enable_service": False,
"profile": {"password_manager_enabled": False},
}
if downloads_path:
prefs.update(
{
"download.default_directory": downloads_path,
"plugins.plugins_disabled": ["Chrome PDF Viewer"],
"download.prompt_for_download": False,
"plugins.always_open_pdf_externally": True,
}
)
if user_agent:
options.add_argument(f"--user-agent={user_agent}")
if opt_adv is True:
options.add_argument("--ignore-ssl-errors=yes")
options.add_argument("--incognito")
options.add_argument("--no-default-browser-check")
options.add_argument("--ignore-certificate-errors")
options.add_argument("--no-first-run")
options.add_argument("--disable-infobars")
options.add_argument("--disable-blink-features")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("--disable-gpu")
options.add_argument("--disable-popup-blocking")
options.add_argument("--disable-notifications")
options.add_experimental_option("prefs", prefs)
options.add_experimental_option(
"excludeSwitches",
[
"ignore-certificate-errors",
"enable-automation",
"safebrowsing-disable-download-protection",
"safebrowsing-disable-auto-update",
"disable-client-side-phishing-detection",
],
)
options.add_experimental_option("useAutomationExtension", False)
if undetec is True:
options = uc.ChromeOptions()
options.add_argument("--log-level=3")
if colab:
def instalar_selenium():
if path.exists("/usr/lib/chromium-browser/chromedriver") is False:
!apt-get update
!apt install chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
# !pip install selenium
print("Navegador instalado")
instalar_selenium()
return uc.Chrome(
options=options,
driver_executable_path="/usr/bin/chromedriver",
browser_executable_path="/usr/bin/chromium-browser",
headless=True,
)
else:
return uc.Chrome(
options=options,
# driver_executable_path=CHROMEDRIVER_PATH,
# browser_executable_path=chromedriver_binary.chromedriver_filename,
version_main=90,
# service_log_path='NUL',#os.path.devnull,
headless=headless,
)
else:
versao_4 = True if __version__.split(".")[0] == "4" else False
if versao_4 is True:
if driver_path:
service = ServiceC(executable_path=driver_path)
else:
service = ServiceC(driver_path)
return webdriver.Chrome(
service=service, options=options, service_log_path=path.devnull
)
else:
return webdriver.Chrome(driver_path, options=options)
Chamada e execução do webdriver
driver = iniciar_selenium(
#driver_path=None,
undetec=True,
#mobile=None,
#proxy=None,
#downloads_path=None,
colab=True,
#headless=None,
#remote=None,
#user_agent=None,
opt_adv=True,
)
driver.get('https://www.tabnews.com.br')
print(driver.title)
# TabNews: Conteúdos para quem trabalha com Programação e Tecnologia