Source code for darc.model.web.requests
# -*- coding: utf-8 -*-
"""Crawler Records
---------------------
The :mod:`darc.model.web.requests` module defines the data model
representing :obj:`crawler <darc.crawl.crawler>`, specifically
from ``requests`` submission.
.. seealso::
Please refer to :func:`darc.submit.submit_requests` for more
information.
"""
import peewee
import darc.typing as typing
from darc.model.abc import BaseModelWeb as BaseModel
from darc.model.utils import JSONField
from darc.model.web.url import URLModel
__all__ = ['RequestsModel']
[docs]class RequestsModel(BaseModel):
"""Data model for documents from ``requests`` submission."""
#: Original URL (c.f. :attr:`link.url <darc.link.Link.url>`).
url: URLModel = peewee.ForeignKeyField(URLModel, backref='requests')
#: Timestamp of the submission.
timestamp: typing.Datetime = peewee.DateTimeField()
#: Request method (normally ``GET``).
method: str = peewee.CharField()
#: Document data as :obj:`bytes`.
document: bytes = peewee.BlobField()
#: Conetent type.
mime_type: str = peewee.CharField()
#: If document is HTML or miscellaneous data.
is_html: bool = peewee.BooleanField()
#: Status code.
status_code: int = peewee.IntegerField()
#: Response reason string.
reason: str = peewee.TextField()
#: Response cookies.
cookies: typing.Cookies = JSONField()
#: Session cookies.
session: typing.Cookies = JSONField()
#: Request headers.
request: typing.Headers = JSONField()
#: Response headers.
response: typing.Headers = JSONField()
[docs]class RequestsHistoryModel(BaseModel):
"""Data model for history records from ``requests`` submission."""
#: History index number.
index: int = peewee.IntegerField()
#: Original record.
model: RequestsModel = peewee.ForeignKeyField(RequestsModel, backref='history')
#: Request URL.
url: str = peewee.TextField()
#: Timestamp of the submission.
timestamp: typing.Datetime = peewee.DateTimeField()
#: Request method (normally ``GET``).
method: str = peewee.CharField()
#: Document data as :obj:`bytes`.
document: bytes = peewee.BlobField()
#: Status code.
status_code: int = peewee.IntegerField()
#: Response reason string.
reason: str = peewee.TextField()
#: Response cookies.
cookies: typing.Cookies = JSONField()
#: Request headers.
request: typing.Headers = JSONField()
#: Response headers.
response: typing.Headers = JSONField()