src/ej_clusters/models/cluster.py from ejplatform/ej-server

src/ej_clusters/models/cluster.py
Summary

Maintainability

0 mins
Test Coverage

Issues
from boogie import models
from django.contrib.auth import get_user_model
from django.db.models import Subquery
from django.urls import reverse
from django.utils.translation import gettext_lazy as _
from model_utils.models import TimeStampedModel
from sidekick import delegate_to, lazy, import_later, placeholder as this

from ej_conversations.models import Comment
from .cluster_queryset import ClusterManager
from .stereotype_vote import StereotypeVote

pd = import_later("pandas")
np = import_later("numpy")
clusterization_pipeline = import_later("..math:clusterization_pipeline", package=__package__)


class Cluster(TimeStampedModel):
    """
    Represents an opinion group.
    """

    clusterization = models.ForeignKey("Clusterization", on_delete=models.CASCADE, related_name="clusters")
    name = models.CharField(_("Name"), max_length=64)
    description = models.TextField(
        _("Description"), blank=True, help_text=_("How was this cluster conceived?")
    )
    users = models.ManyToManyField(get_user_model(), related_name="clusters", blank=True)
    stereotypes = models.ManyToManyField("Stereotype", related_name="clusters")
    conversation = delegate_to("clusterization")
    comments = delegate_to("clusterization")
    objects = ClusterManager()

    @property
    def votes(self):
        return self.clusterization.votes.filter(author__in=self.users.all())

    @property
    def stereotype_votes(self):
        return self.clusterization.stereotype_votes.filter(author__in=self.stereotypes.all())

    n_votes = lazy(this.votes.count())
    n_users = lazy(this.users.count())
    n_stereotypes = lazy(this.stereotypes.count())
    n_stereotype_votes = lazy(this.n_stereotype_votes.count())

    def __str__(self):
        msg = _('{name} ("{conversation}" conversation, {n} users)')
        return msg.format(name=self.name, conversation=self.conversation, n=self.users.count())

    def get_absolute_url(self):
        args = {
            "conversation": self.conversation,
            "cluster": self,
            "board_slug": self.conversation.board.slug,
        }
        return reverse("boards:cluster-detail", kwargs=args)

    def mean_stereotype(self):
        """
        Return the mean stereotype for cluster.
        """
        stereotypes = self.stereotypes.all()
        votes = StereotypeVote.objects.filter(author__in=Subquery(stereotypes.values("id"))).values_list(
            "comment", "choice"
        )
        df = pd.DataFrame(list(votes), columns=["comment", "choice"])
        if len(df) == 0:
            return pd.DataFrame([], columns=["choice"])
        else:
            return df.pivot_table("choice", index="comment", aggfunc="mean")

    def comments_statistics_summary_dataframe(self, normalization=1.0):
        """
        Like comments.statistics_summary_dataframe(), but restricts votes to
        users in the current clusters.
        """
        kwargs = dict(normalization=normalization, votes=self.votes)
        return self.comments.statistics_summary_dataframe(**kwargs)

    def separate_comments(self, sort=True):
        """
        Separate comments into a pair for comments that cluster agrees to and
        comments that cluster disagree.
        """
        tol = 1e-6
        table = self.votes.votes_table()

        n_agree = (table > 0).sum()
        n_disagree = (table < 0).sum()
        total = n_agree + n_disagree + (table == 0).sum() + tol

        d_agree = dict(((n_agree[n_agree >= n_disagree] + tol) / total).dropna().items())
        d_disagree = dict(((n_disagree[n_disagree > n_agree] + tol) / total).dropna().items())

        agree = []
        disagree = []
        for comment in Comment.objects.filter(id__in=d_agree):
            # It would accept 0% agreement since we test sfor n_agree >= n_disagree
            # We must prevent cases with 0 agrees (>= 0 disagrees) to enter in
            # the calculation
            n_agree = d_agree[comment.id]
            if n_agree:
                comment.agree = n_agree
                agree.append(comment)

        for comment in Comment.objects.filter(id__in=d_disagree):
            comment.disagree = d_disagree[comment.id]
            disagree.append(comment)

        if sort:
            agree.sort(key=lambda c: c.agree, reverse=True)
            disagree.sort(key=lambda c: c.disagree, reverse=True)

        return agree, disagree