Source code for darc.model.web.requests
# -*- coding: utf-8 -*-
"""Crawler Records
---------------------
The :mod:`darc.model.web.requests` module defines the data model
representing :obj:`crawler <darc.crawl.crawler>`, specifically
from ``requests`` submission.
.. seealso::
Please refer to :func:`darc.submit.submit_requests` for more
information.
"""
from typing import TYPE_CHECKING
from peewee import (BlobField, BooleanField, CharField, DateTimeField, ForeignKeyField,
IntegerField, TextField)
from darc.model.abc import BaseModelWeb as BaseModel
from darc.model.utils import JSONField
from darc.model.web.url import URLModel
if TYPE_CHECKING:
from typing import Any, Dict, List
from darc._compat import datetime
Cookies = List[Dict[str, Any]]
Headers = Dict[str, str]
__all__ = ['RequestsModel']
[docs]class RequestsModel(BaseModel):
"""Data model for documents from ``requests`` submission."""
#: List of redirect history, back reference from
#: :attr:`RequestsHistoryModel.model <darc.model.web.requests.RequestsHistoryModel.model>`.
history: 'List[RequestsHistoryModel]'
#: Original URL (c.f. :attr:`link.url <darc.link.Link.url>`).
url: 'URLModel' = ForeignKeyField(URLModel, backref='requests')
#: Timestamp of the submission.
timestamp: 'datetime' = DateTimeField()
#: Request method (normally ``GET``).
method: str = CharField()
#: Document data as :obj:`bytes`.
document: bytes = BlobField()
#: Conetent type.
mime_type: str = CharField()
#: If document is HTML or miscellaneous data.
is_html: bool = BooleanField()
#: Status code.
status_code: int = IntegerField()
#: Response reason string.
reason: str = TextField()
#: Response cookies.
cookies: 'Cookies' = JSONField()
#: Session cookies.
session: 'Cookies' = JSONField()
#: Request headers.
request: 'Headers' = JSONField()
#: Response headers.
response: 'Headers' = JSONField()
[docs]class RequestsHistoryModel(BaseModel):
"""Data model for history records from ``requests`` submission."""
#: History index number.
index: int = IntegerField()
#: Original record.
model: RequestsModel = ForeignKeyField(RequestsModel, backref='history')
#: Request URL.
url: str = TextField()
#: Timestamp of the submission.
timestamp: 'datetime' = DateTimeField()
#: Request method (normally ``GET``).
method: str = CharField()
#: Document data as :obj:`bytes`.
document: bytes = BlobField()
#: Status code.
status_code: int = IntegerField()
#: Response reason string.
reason: str = TextField()
#: Response cookies.
cookies: 'Cookies' = JSONField()
#: Request headers.
request: 'Headers' = JSONField()
#: Response headers.
response: 'Headers' = JSONField()