Source code for darc.link

# -*- coding: utf-8 -*-
# pylint: disable=unsubscriptable-object,ungrouped-imports,line-too-long
"""URL Utilities
===================

The :class:`~darc.link.Link` class is the key data structure
of the :mod:`darc` project, it contains all information
required to identify a URL's proxy type, hostname, path prefix
when saving, etc.

The :mod:`~darc.link` module also provides several wrapper
function to the :mod:`urllib.parse` module.

"""

import contextlib
import dataclasses
import functools
import hashlib
import os
import re
import urllib.parse as urllib_parse
from typing import TYPE_CHECKING

from darc.const import PATH_DB

try:
    from pathlib import PosixPath
    PosixPath(os.curdir)
except NotImplementedError:
    from pathlib import PurePosixPath as PosixPath  # type: ignore[misc]

if TYPE_CHECKING:
    from typing import Any, AnyStr, Dict, Optional, Union
    from urllib.parse import ParseResult

    _Str = Union[bytes, str]


[docs]def quote(string: str, safe: '_Str' = '/', encoding: 'Optional[str]' = None, errors: 'Optional[str]' = None) -> str: """Wrapper function for :func:`urllib.parse.quote`. Args: string: string to be quoted safe: charaters not to escape encoding: string encoding errors: encoding error handler Returns: The quoted string. Note: The function suppressed possible errors when calling :func:`urllib.parse.quote`. If any, it will return the original string. """ with contextlib.suppress(Exception): return urllib_parse.quote(string, safe, encoding=encoding, errors=errors) return str(string)
[docs]def unquote(string: str, encoding: str = 'utf-8', errors: str = 'replace') -> str: """Wrapper function for :func:`urllib.parse.unquote`. Args: string: string to be unquoted encoding: string encoding errors: encoding error handler Returns: The quoted string. Note: The function suppressed possible errors when calling :func:`urllib.parse.unquote`. If any, it will return the original string. """ with contextlib.suppress(Exception): return urllib_parse.unquote(string, encoding=encoding, errors=errors) return str(string)
[docs]def urljoin(base: 'AnyStr', url: 'AnyStr', allow_fragments: bool = True) -> 'AnyStr': """Wrapper function for :func:`urllib.parse.urljoin`. Args: base: base URL url: URL to be joined allow_fragments: if allow fragments Returns: The joined URL. Note: The function suppressed possible errors when calling :func:`urllib.parse.urljoin`. If any, it will return ``base/url`` directly. """ with contextlib.suppress(ValueError): return urllib_parse.urljoin(base, url, allow_fragments=allow_fragments) if isinstance(base, bytes): return b'%s/%s' % (base, url) return f'{base}/{url}'
[docs]def urlparse(url: str, scheme: str = '', allow_fragments: bool = True) -> 'ParseResult': """Wrapper function for :func:`urllib.parse.urlparse`. Args: url: URL to be parsed scheme: URL scheme allow_fragments: if allow fragments Returns: The parse result. Note: The function suppressed possible errors when calling :func:`urllib.parse.urlparse`. If any, it will return ``urllib.parse.ParseResult(scheme=scheme, netloc='', path=url, params='', query='', fragment='')`` directly. """ with contextlib.suppress(ValueError): return urllib_parse.urlparse(url, scheme, allow_fragments=allow_fragments) return urllib_parse.ParseResult(scheme=scheme, netloc='', path=url, params='', query='', fragment='')
[docs]def urlsplit(url: str, scheme: str = '', allow_fragments: bool = True) -> urllib_parse.SplitResult: """Wrapper function for :func:`urllib.parse.urlsplit`. Args: url: URL to be split scheme: URL scheme allow_fragments: if allow fragments Returns: The split result. Note: The function suppressed possible errors when calling :func:`urllib.parse.urlsplit`. If any, it will return ``urllib.parse.SplitResult(scheme=scheme, netloc='', path=url, params='', query='', fragment='')`` directly. """ with contextlib.suppress(ValueError): return urllib_parse.urlsplit(url, scheme, allow_fragments=allow_fragments) return urllib_parse.SplitResult(scheme=scheme, netloc='', path=url, query='', fragment='')