Source code for darc.model.web.hostname

# -*- coding: utf-8 -*-
"""Hostname Records
----------------------

The :mod:`darc.model.web.hostname` module defines the data model
representing hostnames, specifically from ``new_host`` submission.

.. seealso::

   Please refer to :func:`darc.submit.submit_new_host` for more
   information.

"""

import peewee

import darc.typing as typing
from darc._compat import cached_property
from darc.model.abc import BaseModelWeb as BaseModel
from darc.model.utils import IntEnumField, Proxy

__all__ = ['HostnameModel']


[docs]class HostnameModel(BaseModel): """Data model for a hostname record. Important: The *alive* of a hostname is toggled if :func:`~darc.crawl.crawler` successfully requested a URL with such hostname. """ #: Hostname (c.f. :attr:`link.host <darc.link.Link.host>`). hostname: str = peewee.TextField() #: Proxy type (c.f. :attr:`link.proxy <darc.link.Link.proxy>`). proxy: Proxy = IntEnumField(choices=Proxy) #: Timestamp of first ``new_host`` submission. discovery: typing.Datetime = peewee.DateTimeField() #: Timestamp of last related submission. last_seen: typing.Datetime = peewee.DateTimeField() @cached_property def alive(self) -> bool: """If the hostname is still active. We consider the hostname as *inactive*, only if all subsidiary URLs are *inactive*. """ return any(map(lambda url: url.alive, self.urls)) # pylint: disable=no-member @cached_property def since(self) -> typing.Datetime: """The hostname is active/inactive since such timestamp. We confider the timestamp by the earlies timestamp of related subsidiary *active/inactive* URLs. """ if self.alive: filtering = lambda url: url.alive else: filtering = lambda url: not url.alive return min(*filter( filtering, self.urls # pylint: disable=no-member ), key=lambda url: url.since)