Source code for darc.selenium
# -*- coding: utf-8 -*-
"""Selenium Wrapper
======================
The :mod:`darc.selenium` module wraps around the :mod:`selenium`
module, and provides some simple interface for the :mod:`darc`
project.
"""
import getpass
import platform
import shutil
import selenium
import darc.typing as typing
from darc.const import DEBUG
from darc.error import UnsupportedLink, UnsupportedPlatform, UnsupportedProxy
from darc.link import Link
from darc.proxy.i2p import I2P_PORT, I2P_SELENIUM_PROXY
from darc.proxy.tor import TOR_PORT, TOR_SELENIUM_PROXY
[docs]def request_driver(link: Link) -> typing.Driver:
"""Get selenium driver.
Args:
link: Link requesting for :class:`~selenium.webdriver.Chrome`.
Returns:
selenium.webdriver.Chrome: The web driver object with corresponding proxy settings.
Raises:
UnsupportedLink: If the proxy type of ``link``
if not specified in the :data:`~darc.proxy.LINK_MAP`.
See Also:
* :data:`darc.proxy.LINK_MAP`
"""
from darc.proxy import LINK_MAP # pylint: disable=import-outside-toplevel
_, driver = LINK_MAP[link.proxy]
if driver is None:
raise UnsupportedLink(link.url)
return driver()
[docs]def get_options(type: str = 'null') -> typing.Options: # pylint: disable=redefined-builtin
"""Generate options.
Args:
type: Proxy type for options.
Returns:
selenium.webdriver.ChromeOptions: The options for the web driver :class:`~selenium.webdriver.Chrome`.
Raises:
UnsupportedPlatform: If the operation system is **NOT**
macOS or Linux.
UnsupportedProxy: If the proxy type is **NOT**
``null``, ``tor`` or ``i2p``.
See Also:
* :data:`darc.proxy.tor.TOR_PORT`
* :data:`darc.proxy.i2p.I2P_PORT`
References:
* `Google Chrome command line switches <https://peter.sh/experiments/chromium-command-line-switches/>`__
* Disable sandbox (``--no-sandbox``) when running as ``root`` user
- https://crbug.com/638180
- https://stackoverflow.com/a/50642913/7218152
* Disable usage of ``/dev/shm``
- http://crbug.com/715363
* `Using Socks proxy <https://www.chromium.org/developers/design-documents/network-stack/socks-proxy>`__
"""
_system = platform.system()
# initiate options
options = selenium.webdriver.ChromeOptions()
# https://peter.sh/experiments/chromium-command-line-switches/
if _system == 'Darwin':
options.binary_location = '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
if not DEBUG:
options.add_argument('--headless')
elif _system == 'Linux':
options.binary_location = shutil.which('google-chrome')
options.add_argument('--headless')
# c.f. https://crbug.com/638180; https://stackoverflow.com/a/50642913/7218152
if getpass.getuser() == 'root':
options.add_argument('--no-sandbox')
# c.f. http://crbug.com/715363
options.add_argument('--disable-dev-shm-usage')
else:
raise UnsupportedPlatform(f'unsupported system: {_system}')
if type != 'null':
if type == 'tor':
port = TOR_PORT
elif type == 'i2p':
port = I2P_PORT
else:
raise UnsupportedProxy(f'unsupported proxy: {type}')
# c.f. https://www.chromium.org/developers/design-documents/network-stack/socks-proxy
options.add_argument(f'--proxy-server=socks5://localhost:{port}')
options.add_argument('--host-resolver-rules="MAP * ~NOTFOUND , EXCLUDE localhost"')
return options
[docs]def get_capabilities(type: str = 'null') -> dict: # pylint: disable=redefined-builtin
"""Generate desied capabilities.
Args:
type: Proxy type for capabilities.
Returns:
The desied capabilities for the web driver :class:`~selenium.webdriver.Chrome`.
Raises:
UnsupportedProxy: If the proxy type is **NOT**
``null``, ``tor`` or ``i2p``.
See Also:
* :data:`darc.proxy.tor.TOR_SELENIUM_PROXY`
* :data:`darc.proxy.i2p.I2P_SELENIUM_PROXY`
"""
# do not modify source dict
capabilities = selenium.webdriver.DesiredCapabilities.CHROME.copy()
if type == 'null':
pass
elif type == 'tor':
TOR_SELENIUM_PROXY.add_to_capabilities(capabilities)
elif type == 'i2p':
I2P_SELENIUM_PROXY.add_to_capabilities(capabilities)
else:
raise UnsupportedProxy(f'unsupported proxy: {type}')
return capabilities
[docs]def i2p_driver() -> typing.Driver:
"""I2P (``.i2p``) driver.
Returns:
selenium.webdriver.Chrome: The web driver object with I2P proxy settings.
See Also:
* :func:`darc.selenium.get_options`
* :func:`darc.selenium.get_capabilities`
"""
options = get_options('i2p')
capabilities = get_capabilities('i2p')
# initiate driver
driver = selenium.webdriver.Chrome(options=options,
desired_capabilities=capabilities)
return driver
[docs]def tor_driver() -> typing.Driver:
"""Tor (``.onion``) driver.
Returns:
selenium.webdriver.Chrome: The web driver object with Tor proxy settings.
See Also:
* :func:`darc.selenium.get_options`
* :func:`darc.selenium.get_capabilities`
"""
options = get_options('tor')
capabilities = get_capabilities('tor')
# initiate driver
driver = selenium.webdriver.Chrome(options=options,
desired_capabilities=capabilities)
return driver
[docs]def null_driver() -> typing.Driver:
"""No proxy driver.
Returns:
selenium.webdriver.Chrome: The web driver object with no proxy settings.
See Also:
* :func:`darc.selenium.get_options`
* :func:`darc.selenium.get_capabilities`
"""
options = get_options('null')
capabilities = get_capabilities('null')
# initiate driver
driver = selenium.webdriver.Chrome(options=options,
desired_capabilities=capabilities)
return driver