Nekmo/proxy-db

View on GitHub
proxy_db/proxies.py

Summary

Maintainability
A
0 mins
Test Coverage
import six
from sqlalchemy import exists, func

from proxy_db.exceptions import NoProvidersAvailable, UnsupportedEngine
from proxy_db.models import Proxy, ProviderRequest, create_session
from proxy_db.providers import PROVIDERS, ManualProxy


class NONE:
    pass


class ListingStrategy(object):
    def __init__(self, filters=None, order_by=None, no_repeat=True):
        self.filters = filters
        self.order_by = order_by
        self.no_repeat = no_repeat
        self._proxies = set()

    def get_default_filters(self):
        return []

    def get_order_by(self, query):
        if self.order_by is not None:
            return self.order_by
        else:
            return Proxy.created_at.desc()

    def get_filters(self, query):
        filters = list(self.filters or [])
        filters.extend(self.get_default_filters())
        if self.no_repeat:
            filters.append(~Proxy.id.in_(self._returned_proxies()))
        return filters

    def _returned_proxies(self):
        return [proxy.id for proxy in self._proxies]

    def get_query(self, query):
        return query.filter(*self.get_filters(query)).order_by(self.get_order_by(query))

    def next(self, query):
        query = self.get_query(query)
        proxy = query.first()
        if proxy is not None and self.no_repeat:
            self._proxies.add(proxy)
        if proxy is not None:
            proxy._set_providers()
        return proxy


class VotesListingStrategy(ListingStrategy):
    def __init__(self, filters=None, min_votes=0):
        super().__init__(filters, Proxy.votes.desc())
        self.min_votes = min_votes

    def get_default_filters(self):
        return [Proxy.votes >= self.min_votes]


class RandomListingStrategy(ListingStrategy):
    def __init__(self, filters=None):
        super().__init__(filters, None)

    def get_order_by(self, query):
        engine_name = query.session.get_bind().name
        if engine_name in ['sqlite', 'postgresql']:
            return func.random()
        elif engine_name == 'mysql':
            return func.rand()
        elif engine_name == 'oracle':
            return 'dbms_random.value'
        else:
            raise UnsupportedEngine(
                '{engine_name} engine does not support random ordering.'.format(**locals())
            )


class ProxiesList(object):
    def __init__(self, country=None, provider=None, protocol=None, strategy=None):
        if isinstance(country, six.string_types):
            country = country.upper()
        self.request_options = dict(
            country=country,
            protocol=protocol,
        )
        provider_name = provider
        if provider is not None and isinstance(provider, str):
            provider = next(iter(filter(lambda x: x.name == provider, PROVIDERS)), NONE)
        if provider is NONE:
            manual_provider_exists = create_session().query(
                exists().where(ProviderRequest.provider == provider_name)
            ).scalar()
            assert manual_provider_exists is True, "Invalid provider name."
            provider = ManualProxy(provider_name)
        self.provider = provider
        if strategy and isinstance(strategy, type):
            # Is a class without initialize. Instance now.
            strategy = strategy()
        self.strategy = strategy or VotesListingStrategy()

    def available_providers(self):
        providers = PROVIDERS
        if self.provider:
            providers = [self.provider]
        return filter(lambda x: x.is_available(), providers)

    def find_db_proxy(self):
        query = create_session().query(Proxy).join(Proxy.provider_requests).filter(
            ProviderRequest.provider.in_([x.name for x in self.available_providers()]),
        )
        country = self.request_options['country']
        protocol = self.request_options['protocol']
        if country:
            query = query.filter(Proxy.country == country)
        if protocol:
            query = query.filter(Proxy.protocol == protocol)
        return self.strategy.next(query)

    def find_provider(self):
        for provider in self.available_providers():
            req = provider.request(**self.request_options)
            if req.requires_update():
                return provider
        raise NoProvidersAvailable

    def reload_provider(self):
        provider = self.find_provider()
        provider.request(**self.request_options).now()

    def reload_provider_without_error(self):
        try:
            self.reload_provider()
        except NoProvidersAvailable:
            pass

    def __iter__(self):
        self._proxies = set()
        return self

    def try_get_proxy(self, retry=True):
        proxy = self.find_db_proxy()
        if proxy:
            return proxy
        elif retry:
            self.reload_provider_without_error()
        if retry:
            return self.try_get_proxy(retry=False)
        else:
            raise StopIteration

    def __next__(self):
        return self.try_get_proxy()

    def next(self):
        return self.__next__()