LucaCappelletti94/tinycrawler

View on GitHub
tinycrawler/proxy/proxy.py

Summary

Maintainability
A
0 mins
Test Coverage
from typing import Dict, Callable
from time import time
from ..eta import Eta
from ..domains_eta import DomainsEta
from ..robots import Robots


class Proxy:

    def __init__(self, data: Dict, proxy_timeout: float, domains_timeout: float, custom_domains_timeout: Callable[[str], float], follow_robots_txt: bool, robots: Robots):
        self._data = self._data_to_working_proxy(data)
        self._eta = Eta(proxy_timeout)
        self._domains_eta = DomainsEta(
            domains_timeout, custom_domains_timeout, follow_robots_txt, robots)
        self._usages = self._successes = 0

    def _data_to_working_proxy(self, data: Dict):
        if data is None:
            return None
        return {protocol:  "{protocol}://{ip}:{port}".format(protocol=protocol, ip=data["ip"], port=data["port"]) for key, protocol in {
            "http": "http",
            "https": "https",
            "socks4": "https",
            "socks5": "https"
        }.items() if data["support"][key]}

    def data(self):
        return self._data

    def failure_rate(self)->float:
        return 1 - self._usages / self._successes

    def used(self, success: bool, url: str):
        self._usages += 1
        self._successes += success
        self._domains_eta.add(url)
        self._eta.add()

    def unripe(self):
        return self._domains_eta.unripe()

    def wait_for(self, value):
        self._domains_eta.wait_for(value)
        self._eta.wait_for()

    def is_local(self):
        return False