Source code for darc.link

# -*- coding: utf-8 -*-
"""URL Utilities
===================

The :class:`~darc.link.Link` class is the key data structure
of the :mod:`darc` project, it contains all information
required to identify a URL's proxy type, hostname, path prefix
when saving, etc.

The :mod:`~darc.link` module also provides several wrapper
function to the :mod:`urllib.parse` module.

"""

import contextlib
import dataclasses
import functools
import hashlib
import os
import re
import urllib.parse

import darc.typing as typing
from darc.const import PATH_DB

try:
    from pathlib import PosixPath
    PosixPath(os.curdir)
except NotImplementedError:
    from pathlib import PurePosixPath as PosixPath  # type: ignore


[docs]def quote(string: typing.AnyStr, safe: typing.AnyStr = '/', # type: ignore encoding: typing.Optional[str] = None, errors: typing.Optional[str] = None) -> str: """Wrapper function for :func:`urllib.parse.quote`. Args: string: string to be quoted safe: charaters not to escape encoding: string encoding errors: encoding error handler Returns: The quoted string. Note: The function suppressed possible errors when calling :func:`urllib.parse.quote`. If any, it will return the original string. """ with contextlib.suppress(Exception): return urllib.parse.quote(string, safe, encoding=encoding, errors=errors) # type: ignore return str(string)
[docs]def unquote(string: typing.AnyStr, encoding: str = 'utf-8', errors: str = 'replace') -> str: """Wrapper function for :func:`urllib.parse.unquote`. Args: string: string to be unquoted encoding: string encoding errors: encoding error handler Returns: The quoted string. Note: The function suppressed possible errors when calling :func:`urllib.parse.unquote`. If any, it will return the original string. """ with contextlib.suppress(Exception): return urllib.parse.unquote(string, encoding=encoding, errors=errors) # type: ignore return str(string)
[docs]def urljoin(base: typing.AnyStr, url: typing.AnyStr, allow_fragments: bool = True) -> str: """Wrapper function for :func:`urllib.parse.urljoin`. Args: base: base URL url: URL to be joined allow_fragments: if allow fragments Returns: The joined URL. Note: The function suppressed possible errors when calling :func:`urllib.parse.urljoin`. If any, it will return ``base/url`` directly. """ with contextlib.suppress(ValueError): return urllib.parse.urljoin(base, url, allow_fragments=allow_fragments) # type: ignore return f'{str(base)}/{str(url)}'
[docs]def urlparse(url: str, scheme: str = '', allow_fragments: bool = True) -> urllib.parse.ParseResult: """Wrapper function for :func:`urllib.parse.urlparse`. Args: url: URL to be parsed scheme: URL scheme allow_fragments: if allow fragments Returns: The parse result. Note: The function suppressed possible errors when calling :func:`urllib.parse.urlparse`. If any, it will return ``urllib.parse.ParseResult(scheme=scheme, netloc='', path=url, params='', query='', fragment='')`` directly. """ with contextlib.suppress(ValueError): return urllib.parse.urlparse(url, scheme, allow_fragments=allow_fragments) return urllib.parse.ParseResult(scheme=scheme, netloc='', path=url, params='', query='', fragment='')
[docs]def urlsplit(url: str, scheme: str = '', allow_fragments: bool = True) -> urllib.parse.SplitResult: """Wrapper function for :func:`urllib.parse.urlsplit`. Args: url: URL to be split scheme: URL scheme allow_fragments: if allow fragments Returns: The split result. Note: The function suppressed possible errors when calling :func:`urllib.parse.urlsplit`. If any, it will return ``urllib.parse.SplitResult(scheme=scheme, netloc='', path=url, params='', query='', fragment='')`` directly. """ with contextlib.suppress(ValueError): return urllib.parse.urlsplit(url, scheme, allow_fragments=allow_fragments) return urllib.parse.SplitResult(scheme=scheme, netloc='', path=url, query='', fragment='')