masschallenge/django-accelerator

View on GitHub
accelerator/twitter_handle_cleanup/utils.py

Summary

Maintainability
C
1 day
Test Coverage
F
18%
# This file and the logic within it were used in the sibling
# organization_cleanup.py, expert_profile_cleanup.py and
# entrepreneur_profile_cleanup.py files to turn as many "bad"
# twitter handles into valid ones. See ticket below.
# Ticket reference: https://masschallenge.atlassian.net/browse/AC-6406

import re
import csv

from django.db.models import (
    F,
    Func,
    Value
)


TWITTER_HANDLE_CSV_FILE_NAME = 'twitter_handle_cleanup.csv'


def remove_leading_slashes(obj):
    twitter_handle = str(obj.twitter_handle)
    match = re.match(r'^/', twitter_handle)
    new_twitter_handle = ""
    if match:
        new_twitter_handle = twitter_handle[1:]
        profile_list = [
            obj.__class__.__name__,
            obj.id,
            twitter_handle,
            new_twitter_handle
        ]
        write_to_csv(profile_list)
        obj.twitter_handle = new_twitter_handle
        obj.save()


def remove_trailing_slashes(obj):
    twitter_handle = str(obj.twitter_handle)
    new_twitter_handle = ''
    if twitter_handle.endswith("/"):
        new_twitter_handle = twitter_handle[:-1]
        profile_list = [
            obj.__class__.__name__,
            obj.id,
            twitter_handle,
            new_twitter_handle
        ]
        write_to_csv(profile_list)
        obj.twitter_handle = new_twitter_handle
        obj.save()


def remove_trailing_and_leading_whitespace(obj):
    new_twitter_handle = str(obj.twitter_handle).strip()
    if new_twitter_handle != obj.twitter_handle:
        obj.twitter_handle = new_twitter_handle
        obj.save()


def remove_leading_hashtag_on_valid_twitter_handles(Model):

    profiles = Model.objects.filter(twitter_handle__iregex="^#")

    profile_list = profiles.values_list("id", "twitter_handle")
    for profile in profile_list:
        profile = [Model.__name__] + list(profile)
        profile.append(profile[2][1:])
        write_to_csv(profile)

    profiles.update(twitter_handle=Func(
                    F('twitter_handle'),
                    Value("#"),
                    Value(""),
                    function="replace"))


def remove_twitter_url_prefix_from_handles(Model):

    twitter_url_variations = [
        'maapit or https://twitter.com/',
        '@https://twitter.com/',
        'https://twitter.com/#!/',
        'Http://twitter.com/',
        'http://twitter.com/',
        'http://www.twitter.com/',
        'https://mobile.twitter.com/',
        'https://twitter.com/',
        'https://ww.twitter.com/',
        'https://www.Twitter.com/',
        'https://www.twitter.com/',
        'https;//twitter.com/',
        'htttp://www.twitter.com/',
        'www.Twitter.com/',
        'www.twitter.com/',
        'Twitter.com/',
        'twitter.com/',
    ]

    for variation in twitter_url_variations:
        profiles = Model.objects.filter(twitter_handle__startswith=variation)

        profile_list = profiles.values_list("id", "twitter_handle")
        for profile in profile_list:
            profile = [Model.__name__] + list(profile)
            profile.append(profile[2].replace(variation, ""))
            write_to_csv(profile)

        try:
            profiles.update(
                twitter_handle=Func(
                    F('twitter_handle'),
                    Value(variation),
                    Value(""),
                    function="replace"))
        except ValueError:
            pass


def remove_not_available_abbreviation_from_twitter_handles(Model):
    profiles = Model.objects.filter(twitter_handle__icontains="n/a")
    create_list_to_write_to_csv(profiles, Model)
    profiles.update(twitter_handle="")


def turn_handles_with_incomplete_handles_to_empty_string(Model):
    profiles = Model.objects.filter(
        twitter_handle="https://twitter.")
    create_list_to_write_to_csv(profiles, Model)
    profiles.update(twitter_handle="")


def remove_hashbang_from_twitter_handles(Model):

    profiles = Model.objects.filter(twitter_handle__iregex="^#!/")

    profile_list = profiles.values_list("id", "twitter_handle")
    for profile in profile_list:
        profile = [Model.__name__] + list(profile)
        profile.append(profile[2][3:])
        write_to_csv(profile)

    profiles.update(twitter_handle=str(F('twitter_handle'))[3:])


def write_to_csv(data_list):
    with open(TWITTER_HANDLE_CSV_FILE_NAME, 'a+') as myfile:
        wr = csv.writer(
            myfile,
            quoting=csv.QUOTE_ALL
        )
        wr.writerow(data_list)


def create_list_to_write_to_csv(profiles, Model, new_handle=""):
    profile_list = profiles.values_list("id", "twitter_handle")
    for profile in profile_list:
        profile = [Model.__name__] + list(profile)
        profile.append(new_handle)
        write_to_csv(profile)