Swall0w/arxiv-bot

View on GitHub
scripts/main.py

Summary

Maintainability
C
1 day
Test Coverage
from arxivbot.twitter import get_oauth
from tweepy.streaming import StreamListener, Stream
from datetime import timedelta
import configparser
from arxivbot.utils import (find_pattern_url_on_text,
                            usual_url_pattern,
                            extract_arxiv_data)
from arxivbot.github import create_github_issue
import traceback


class AbstractedlyListener(StreamListener):
    def __init__(self, github_user):
        super(AbstractedlyListener, self).__init__()
        self.github_user = github_user

    def on_status(self, status):
        status.created_at += timedelta(hours=9)
        urls = find_pattern_url_on_text(status.text, usual_url_pattern())
        if (status.author.screen_name == 'Swall0wTech') and urls:
            try:
                issues = extract_arxiv_data(urls)
                for issue in issues:
                    title = issue['title']
                    body = "{}\n\n{}\n\n{}".format(
                        ', '.join(issue['authors']), issue['abstract'], issue['url'])
                    labels = []
                    is_created, content = create_github_issue(
                        self.github_user, title, body, labels)
                    if is_created:
                        print('OK')
                        print(content)
                    else:
                        print('Bad')
                        print(content)
            except:
                error = traceback.format_exc()
                print(error)


    def _run(self):
        # Authenticate
        url = "https://%s%s" % (self.host, self.url)
        # Connect and process the stream
        error_counter = 0
        resp = None
        exception = None
        while self.running:
            if self.retry_count is not None:
                if error_counter > self.retry_count:
                    # quit if error count greater than retry count
                    break
            try:
                auth = self.auth.apply_auth()
                resp = self.session.request('POST',
                                            url,
                                            data=self.body,
                                            timeout=self.timeout,
                                            stream=True,
                                            auth=auth,
                                            verify=self.verify)
                if resp.status_code != 200:
                    if self.listener.on_error(resp.status_code) is False:
                        break
                    error_counter += 1
                    if resp.status_code == 420:
                        self.retry_time = max(self.retry_420_start,
                                              self.retry_time)
                    sleep(self.retry_time)
                    self.retry_time = min(self.retry_time * 2,
                                          self.retry_time_cap)
                else:
                    error_counter = 0
                    self.retry_time = self.retry_time_start
                    self.snooze_time = self.snooze_time_step
                    self.listener.on_connect()
                    self._read_loop(resp)

            except (Timeout, ssl.SSLError) as exc:
                # This is still necessary, as a SSLError can actually be
                # thrown when using Requests
                # If it's not time out treat it like any other exception
                if isinstance(exc, ssl.SSLError):
                    if not (exc.args and 'timed out' in str(exc.args[0])):
                        exception = exc
                        break
                if self.listener.on_timeout() is False:
                    break
                if self.running is False:
                    break
                sleep(self.snooze_time)
                self.snooze_time = min(self.snooze_time + self.snooze_time_step,
                                       self.snooze_time_cap)
            except Exception as exc:
                exception = exc
                # any other exception is fatal, so kill loop
                break

        # cleanup
        self.running = False
        if resp:
            resp.close()

        self.new_session()

        if exception:
            # call a handler first so that the exception can be logged.
            self.listener.on_exception(exception)
            raise


def main():
    inifile = configparser.ConfigParser()
    inifile.read('./user.conf')
    init = {'consumer_key': inifile.get('twitter', 'consumer_key'),
            'consumer_secret': inifile.get('twitter', 'consumer_secret'), 
            'access_key': inifile.get('twitter', 'access_key'), 
            'access_secret': inifile.get('twitter', 'access_secret'), 
            }

    github_user = {'repo_owner': inifile.get('github', 'repo_owner'),
                   'repo_name': inifile.get('github', 'repo_name'), 
                   'username': inifile.get('github', 'username'), 
                   'password': inifile.get('github', 'password'), 
                   }

    auth = get_oauth(init)
    stream = Stream(auth, AbstractedlyListener(github_user=github_user), secure=True)
    error = 0
    while True:
        try:
            stream.userstream()
        except:
            error += 1
            print(error, traceback.format_exc())


if __name__ == '__main__':
    main()