DeveloperCAP/MLCAT

View on GitHub
lib/util/graph.py

Summary

Maintainability
A
0 mins
Test Coverage
import json
from lib.util.read import lines_per_n

"""
This module has utility functions for handling graphs and for retriving auxiliary graph properties.
"""

def get_current_leaf_nodes(list1, list2):
    """
    This function eliminates the non-leaf message-ids from the list of leaf message ids.

    :param list1: List containing all nodes
    :param list2: Reference list
    :return: List without non-leaf nodes
    """
    s = set(list2)
    list3 = [msg_id for msg_id in list1 if str(msg_id) not in s]
    return list3


def get_leaf_nodes(src_file, dest_file):
    """
    This function is used to compute the message-ids of leaf nodes in the thread graph.

    :param src_file: Source file containing message-ids.
    :param dest_file: Destination file(csv) to which message-ids of leaf nodes be stored.
    :return: List of message-ids of leaf nodes
    """
    leaf_msgs = []  # Keeps track of all those message ids that are leaf nodes
    msg_ref_map = {}  # Map between message id of each mail to its references list

    with open(src_file, 'r') as fil:
        for chunk in lines_per_n(fil, 9):

            jfile = json.loads(chunk)

            leaf_msgs.append(jfile['Message-ID'])
            msg_ref_map[jfile['Message-ID']] = str(jfile['References'])

            if not (jfile['References'] == None):
                leaf_msgs = get_current_leaf_nodes(leaf_msgs, jfile['References'].split(','))

        fil.close()

    with open(dest_file, 'w') as csv_file:
        for msg_id in leaf_msgs:
            csv_file.write("{0};{1}\n".format(msg_id, msg_ref_map[msg_id]))
    csv_file.close()
    return leaf_msgs