Executando verificação de segurança...
1

[Python] Selenium no Google Colab - Cheatsheet

Automatizando consultas no Google Colab com Selenium

As vezes preciso rodar um teste com o selenium e uso o Google Colab para isso.

Compartilho abaixo o script que utilizo. Pode melhorar por vários motivos, mas ainda é funcional e cumpre o propósito.

Quando possível, acrescentarei comentários ao código para ficar mais didático.

Fique à vontade para reutilizar ou sugerir melhorias.

Função para configurar o Selenium no Colab


def iniciar_selenium(
    driver_path=None,
    undetec=None,
    mobile=None,
    proxy=None,
    downloads_path=None,
    colab=None,
    headless=None,
    remote=None,
    user_agent=None,
    opt_adv=True):
    
    try:
        from selenium import webdriver
    except:
        !pip install selenium
        from selenium import webdriver        
    
    from selenium.webdriver.chrome.service import Service as ServiceC

    try:
        import undetected_chromedriver.v2 as uc
    except:
        !pip install undetected-chromedriver
        import undetected_chromedriver.v2 as uc

    # pip install selenium-stealth
    # from selenium_stealth import stealth

    try:
        from webdriver_manager.chrome import ChromeDriverManager
    except:
        !pip install webdriver-manager
        from webdriver_manager.chrome import ChromeDriverManager
    
    try:
        import chromedriver_binary  # Adds chromedriver binary to path
    except:
        !pip install chromedriver-binary
        import chromedriver_binary

    from selenium import __version__
    from os import path

    options = webdriver.ChromeOptions()

    if headless:
        options.add_argument("--headless")

    if remote:
        # nesse caso o navegador roda externo e é algo mais avançado.
        return webdriver.Remote(
            command_executor="http://127.0.0.1:4444/wd/hub", options=options
        )

    else:
        # desativar recursos não necessários
        options.add_argument("--no-sandbox")
        options.add_argument("--disable-dev-shm-usage")
        options.add_argument("--log-level=3")

        if undetec is None:

            options.binary_location = chromedriver_binary.chromedriver_filename

            if driver_path is None:
                driver_path = ChromeDriverManager().install()
                print("CHROME_PATH ", driver_path)

        if mobile:
            mobile_emulation = {
                # "deviceName": "iPhone 11 Pro",
                "deviceMetrics": {"width": 375, "height": 812, "pixelRatio": 3.0},
                "userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19",
            }
            options.add_experimental_option("mobileEmulation", mobile_emulation)

        else:
            WINDOW_SIZE = "1920,1080"
            options.add_argument(f"--window-size={WINDOW_SIZE}")

        if proxy:
            options.add_argument(f"--proxy-server={proxy}")

        prefs = {
            "profile.default_content_settings.popups": 0,
            "directory_upgrade": True,
            "credentials_enable_service": False,
            "profile": {"password_manager_enabled": False},
        }

        if downloads_path:
            prefs.update(
                {
                    "download.default_directory": downloads_path,
                    "plugins.plugins_disabled": ["Chrome PDF Viewer"],
                    "download.prompt_for_download": False,
                    "plugins.always_open_pdf_externally": True,
                }
            )

        if user_agent:
            options.add_argument(f"--user-agent={user_agent}")

        if opt_adv is True:
            options.add_argument("--ignore-ssl-errors=yes")
            options.add_argument("--incognito")
            options.add_argument("--no-default-browser-check")
            options.add_argument("--ignore-certificate-errors")
            options.add_argument("--no-first-run")
            options.add_argument("--disable-infobars")
            options.add_argument("--disable-blink-features")
            options.add_argument("--disable-blink-features=AutomationControlled")
            options.add_argument("--disable-gpu")
            options.add_argument("--disable-popup-blocking")
            options.add_argument("--disable-notifications")
            options.add_experimental_option("prefs", prefs)
            options.add_experimental_option(
                "excludeSwitches",
                [
                    "ignore-certificate-errors",
                    "enable-automation",
                    "safebrowsing-disable-download-protection",
                    "safebrowsing-disable-auto-update",
                    "disable-client-side-phishing-detection",
                ],
            )
            options.add_experimental_option("useAutomationExtension", False)

        if undetec is True:

            options = uc.ChromeOptions()
            options.add_argument("--log-level=3")

            if colab:

                def instalar_selenium():

                    if path.exists("/usr/lib/chromium-browser/chromedriver") is False:
                        !apt-get update
                        !apt install chromium-chromedriver
                        !cp /usr/lib/chromium-browser/chromedriver /usr/bin
                        # !pip install selenium
                        
                    print("Navegador instalado")

                instalar_selenium()

                return uc.Chrome(
                    options=options,
                    driver_executable_path="/usr/bin/chromedriver",
                    browser_executable_path="/usr/bin/chromium-browser",
                    headless=True,
                )

            else:
                return uc.Chrome(
                    options=options,
                    # driver_executable_path=CHROMEDRIVER_PATH,
                    # browser_executable_path=chromedriver_binary.chromedriver_filename,
                    version_main=90,
                    # service_log_path='NUL',#os.path.devnull,
                    headless=headless,
                )
        else:
            versao_4 = True if __version__.split(".")[0] == "4" else False
            if versao_4 is True:

                if driver_path:
                    service = ServiceC(executable_path=driver_path)
                else:
                    service = ServiceC(driver_path)

                return webdriver.Chrome(
                    service=service, options=options, service_log_path=path.devnull
                )
            else:
                return webdriver.Chrome(driver_path, options=options)

Chamada e execução do webdriver

driver = iniciar_selenium(
    #driver_path=None,
    undetec=True,
    #mobile=None,
    #proxy=None,
    #downloads_path=None,
    colab=True,
    #headless=None,
    #remote=None,
    #user_agent=None,
    opt_adv=True,
)

driver.get('https://www.tabnews.com.br')
print(driver.title)
# TabNews: Conteúdos para quem trabalha com Programação e Tecnologia
Carregando publicação patrocinada...