Source code for darc.sites._abc

# -*- coding: utf-8 -*-
# pylint: disable=ungrouped-imports
"""Base Sites Customisation
==============================

The :mod:`darc.sites._abc` module provides the *abstract base class*
for sites customisation implementation. All sites customisation **must**
inherit from the :class:`~darc.sites._abc.BaseSite` exclusively.

Important:
    The :class:`~darc.sites._abc.BaseSite` class is **NOT** intended to
    be used directly from the :mod:`darc.sites._abc` module. Instead,
    you are recommended to import it from :mod:`darc.sites` respectively.

"""

from typing import TYPE_CHECKING

from darc.error import LinkNoReturn

if TYPE_CHECKING:
    from typing import List, NoReturn, Optional, Union

    from requests import Response, Session
    from selenium.webdriver import Chrome as Driver

    import darc.link as darc_link  # Link
    from darc._compat import datetime


[docs]class BaseSite: """Abstract base class for sites customisation.""" #: Hostnames (**case insensitive**) the sites customisation is designed for. hostname = None # type: Optional[List[str]]
[docs] @staticmethod def crawler(timestamp: 'datetime', session: 'Session', link: 'darc_link.Link') -> 'Union[NoReturn, Response]': # pylint: disable=unused-argument """Crawler hook for my site. Args: timestamp: Timestamp of the worker node reference. session: Session object with proxy settings. link: Link object to be crawled. Raises: LinkNoReturn: This link has no return response. """ raise LinkNoReturn(link)
[docs] @staticmethod def loader(timestamp: 'datetime', driver: 'Driver', link: 'darc_link.Link') -> 'Union[NoReturn, Driver]': # pylint: disable=unused-argument """Loader hook for my site. Args: timestamp: Timestamp of the worker node reference. driver (selenium.webdriver.Chrome): Web driver object with proxy settings. link: Link object to be loaded. Raises: LinkNoReturn: This link has no return response. """ raise LinkNoReturn(link)