Source code for darc.selenium
# -*- coding: utf-8 -*-
"""Selenium Wrapper
======================
The :mod:`darc.selenium` module wraps around the :mod:`selenium`
module, and provides some simple interface for the :mod:`darc`
project.
"""
import getpass
import os
import platform
import shutil
import selenium.webdriver
import darc.typing as typing
from darc.const import DEBUG
from darc.error import UnsupportedLink, UnsupportedPlatform, UnsupportedProxy
from darc.link import Link
from darc.proxy.i2p import I2P_PORT, I2P_SELENIUM_PROXY
from darc.proxy.tor import TOR_PORT, TOR_SELENIUM_PROXY
# Google Chrome binary location.
BINARY_LOCATION = os.getenv('CHROME_BINARY_LOCATION')
if BINARY_LOCATION is None:
_system = platform.system()
if _system == 'Darwin':
BINARY_LOCATION = '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
elif _system == 'Linux':
BINARY_LOCATION = shutil.which('google-chrome')
del _system
[docs]def request_driver(link: Link) -> typing.Driver:
"""Get selenium driver.
Args:
link: Link requesting for :class:`~selenium.webdriver.Chrome`.
Returns:
selenium.webdriver.Chrome: The web driver object with corresponding proxy settings.
Raises:
UnsupportedLink: If the proxy type of ``link``
if not specified in the :data:`~darc.proxy.LINK_MAP`.
See Also:
* :data:`darc.proxy.LINK_MAP`
"""
from darc.proxy import LINK_MAP # pylint: disable=import-outside-toplevel
_, driver = LINK_MAP[link.proxy]
if driver is None:
raise UnsupportedLink(link.url)
return driver()
[docs]def get_options(type: str = 'null') -> typing.Options: # pylint: disable=redefined-builtin
"""Generate options.
Args:
type: Proxy type for options.
Returns:
selenium.webdriver.ChromeOptions: The options for the web driver :class:`~selenium.webdriver.Chrome`.
Raises:
UnsupportedPlatform: If the operation system is **NOT**
macOS or Linux and :envvar:`CHROME_BINARY_LOCATION`
is **NOT** set.
UnsupportedProxy: If the proxy type is **NOT**
``null``, ``tor`` or ``i2p``.
Important:
The function raises :exc:`UnsupportedPlatform` in cases where
:data:`~darc.selenium.BINARY_LOCATION` is :data:`None`.
Please provide :envvar:`CHROME_BINARY_LOCATION` when running
:mod:`darc` in ``loader`` mode on non *macOS* and/or *Linux*
systems.
See Also:
* :data:`darc.proxy.tor.TOR_PORT`
* :data:`darc.proxy.i2p.I2P_PORT`
References:
* `Google Chrome command line switches <https://peter.sh/experiments/chromium-command-line-switches/>`__
* Disable sandbox (``--no-sandbox``) when running as ``root`` user
- https://crbug.com/638180
- https://stackoverflow.com/a/50642913/7218152
* Disable usage of ``/dev/shm``
- http://crbug.com/715363
* `Using Socks proxy <https://www.chromium.org/developers/design-documents/network-stack/socks-proxy>`__
"""
_system = platform.system()
# initiate options
options = selenium.webdriver.ChromeOptions()
if BINARY_LOCATION is None:
raise UnsupportedPlatform(f'unsupported system: {_system}')
options.binary_location = BINARY_LOCATION
# https://peter.sh/experiments/chromium-command-line-switches/
if not DEBUG:
options.add_argument('--headless')
if _system == 'Linux':
if os.path.isfile('/.dockerenv'): # check if in Docker
options.headless = True # force headless option in Docker environment
# c.f. https://crbug.com/638180; https://stackoverflow.com/a/50642913/7218152
if getpass.getuser() == 'root':
options.add_argument('--no-sandbox')
# c.f. http://crbug.com/715363
options.add_argument('--disable-dev-shm-usage')
if type != 'null':
if type == 'tor':
port = TOR_PORT
elif type == 'i2p':
port = I2P_PORT
else:
raise UnsupportedProxy(f'unsupported proxy: {type}')
# c.f. https://www.chromium.org/developers/design-documents/network-stack/socks-proxy
options.add_argument(f'--proxy-server=socks5://localhost:{port}')
options.add_argument('--host-resolver-rules="MAP * ~NOTFOUND , EXCLUDE localhost"')
return options
[docs]def get_capabilities(type: str = 'null') -> dict: # pylint: disable=redefined-builtin
"""Generate desied capabilities.
Args:
type: Proxy type for capabilities.
Returns:
The desied capabilities for the web driver :class:`~selenium.webdriver.Chrome`.
Raises:
UnsupportedProxy: If the proxy type is **NOT**
``null``, ``tor`` or ``i2p``.
See Also:
* :data:`darc.proxy.tor.TOR_SELENIUM_PROXY`
* :data:`darc.proxy.i2p.I2P_SELENIUM_PROXY`
"""
# do not modify source dict
capabilities = selenium.webdriver.DesiredCapabilities.CHROME.copy()
if type == 'null':
pass
elif type == 'tor':
TOR_SELENIUM_PROXY.add_to_capabilities(capabilities)
elif type == 'i2p':
I2P_SELENIUM_PROXY.add_to_capabilities(capabilities)
else:
raise UnsupportedProxy(f'unsupported proxy: {type}')
return capabilities
[docs]def i2p_driver() -> typing.Driver:
"""I2P (``.i2p``) driver.
Returns:
selenium.webdriver.Chrome: The web driver object with I2P proxy settings.
See Also:
* :func:`darc.selenium.get_options`
* :func:`darc.selenium.get_capabilities`
"""
options = get_options('i2p')
capabilities = get_capabilities('i2p')
# initiate driver
driver = selenium.webdriver.Chrome(options=options,
desired_capabilities=capabilities)
return driver
[docs]def tor_driver() -> typing.Driver:
"""Tor (``.onion``) driver.
Returns:
selenium.webdriver.Chrome: The web driver object with Tor proxy settings.
See Also:
* :func:`darc.selenium.get_options`
* :func:`darc.selenium.get_capabilities`
"""
options = get_options('tor')
capabilities = get_capabilities('tor')
# initiate driver
driver = selenium.webdriver.Chrome(options=options,
desired_capabilities=capabilities)
return driver
[docs]def null_driver() -> typing.Driver:
"""No proxy driver.
Returns:
selenium.webdriver.Chrome: The web driver object with no proxy settings.
See Also:
* :func:`darc.selenium.get_options`
* :func:`darc.selenium.get_capabilities`
"""
options = get_options('null')
capabilities = get_capabilities('null')
# initiate driver
driver = selenium.webdriver.Chrome(options=options,
desired_capabilities=capabilities)
return driver