Source code for darc.model.web.url
# -*- coding: utf-8 -*-
"""URL Records
-----------------
The :mod:`darc.model.web.url` module defines the data model
representing URLs, specifically from ``requests`` and
``selenium`` submission.
.. seealso::
Please refer to :func:`darc.submit.submit_requests` and
:func:`darc.submit.submit_selenium` for more information.
"""
import peewee
import darc.typing as typing
from darc.model.abc import BaseModelWeb as BaseModel
from darc.model.utils import IntEnumField, Proxy
from darc.model.web.hostname import HostnameModel
__all__ = ['URLModel']
[docs]class URLModel(BaseModel):
"""Data model for a requested URL.
Important:
The *alive* of a URL is toggled if :func:`~darc.crawl.crawler`
successfully requested such URL and the status code is
:attr:`~flask.Response.ok`.
"""
#: Original URL (c.f. :attr:`link.url <darc.link.Link.url>`).
url: str = peewee.TextField()
#: Sha256 hash value (c.f. :attr:`Link.name <darc.link.Link.name>`).
hash: str = peewee.CharField(max_length=256, unique=True)
#: Hostname (c.f. :attr:`link.host <darc.link.Link.host>`).
hostname: HostnameModel = peewee.ForeignKeyField(HostnameModel, backref='urls')
#: Proxy type (c.f. :attr:`link.proxy <darc.link.Link.proxy>`).
proxy: Proxy = IntEnumField(choices=Proxy)
#: Timestamp of first submission.
discovery: typing.Datetime = peewee.DateTimeField()
#: Timestamp of last submission.
last_seen: typing.Datetime = peewee.DateTimeField()
#: If the hostname is still active.
alive: bool = peewee.BooleanField()
#: The hostname is active/inactive since this timestamp.
since: typing.Datetime = peewee.DateTimeField()