DeveloperCAP/MLCAT

View on GitHub

Showing 109 of 109 total issues

Identical blocks of code found in 2 locations. Consider refactoring.
Open

if jfile['In-Reply-To']:
parent_id = jfile['In-Reply-To']
if parent_id and parent_id < msg_id:
edges.add((parent_id, msg_id))
Severity: Major
Found in lib/analysis/thread/graph/edge_list.py and 1 other location - About 1 hr to fix
lib/analysis/thread/graph/edge_list.py on lines 66..69

Function generate_edge_list has a Cognitive Complexity of 14 (exceeds 5 allowed). Consider refactoring.
Open

def generate_edge_list(nodelist_filename='graph_nodes.csv', edgelist_filename='graph_edges.csv', json_filename='clean_data.json'):
"""
This function generates a list of nodes and edges in the graphs from the JSON file and saves it as a CSV file.
 
:param nodelist_filename: csv file to store the graph nodes.
Severity: Minor
Found in lib/analysis/thread/graph/edge_list.py - About 1 hr to fix

Function get_lone_author_threads has a Cognitive Complexity of 14 (exceeds 5 allowed). Consider refactoring.
Open

def get_lone_author_threads(save_file=None, nodelist_filename='graph_nodes.csv', edgelist_filename='graph_edges.csv'):
"""
This function returns the UID of all the nodes that belong to a thread that has only one author
 
:param save_file: If True, the list of UIDs of nodes are saved to a text file
Severity: Minor
Found in lib/util/read.py - About 1 hr to fix

Consider simplifying this complex logical expression.
Open

if len(trunc_date) > 30 and trunc_date[14] == ':':
datetime_obj = datetime.datetime.strptime(trunc_date, "%a, %b %d %H:%M:%S %Y %z")
elif len(trunc_date) == 25 or len(trunc_date) == 26:
datetime_obj = datetime.datetime.strptime(trunc_date, "%d %b %Y %H:%M:%S %z")
elif trunc_date[3] == ',' and (len(trunc_date) == 28 or len(trunc_date) == 29) and '+' not in trunc_date and '-' not in trunc_date:
Severity: Critical
Found in lib/util/read.py - About 1 hr to fix

    Consider simplifying this complex logical expression.
    Open

    if len(trunc_date) > 30 and trunc_date[14] == ':':
    datetime_obj = datetime.datetime.strptime(trunc_date, "%a, %b %d %H:%M:%S %Y %z")
    elif len(trunc_date) == 25 or len(trunc_date) == 26:
    datetime_obj = datetime.datetime.strptime(trunc_date, "%d %b %Y %H:%M:%S %z")
    elif trunc_date[3] == ',' and (len(trunc_date) == 28 or len(trunc_date) == 29) and '+' not in trunc_date and '-' not in trunc_date:
    Severity: Critical
    Found in lib/util/read.py - About 1 hr to fix

      Function generate_node_labels has a Cognitive Complexity of 13 (exceeds 5 allowed). Consider refactoring.
      Open

      def generate_node_labels(nodelist_filename='graph_nodes.txt', edgelist_filename='graph_edges.txt', json_filename='clean_data.json'):
      """
       
      This function generates a list of nodes and edges in the graphs from the JSON file and saves it as a TXT file.
       
       
      Severity: Minor
      Found in lib/analysis/thread/graph/edge_list.py - About 1 hr to fix

      Identical blocks of code found in 2 locations. Consider refactoring.
      Open

      if month in {'Jan', 'Mar', 'May', 'Jul', 'Aug', 'Oct', 'Dec'}:
      max_day = 31
      elif month == 'Feb':
      max_day = 28
      else:
      Severity: Major
      Found in driver_thread_analysis.py and 1 other location - About 1 hr to fix
      driver_author_analysis.py on lines 41..46

      Identical blocks of code found in 2 locations. Consider refactoring.
      Open

      if month in {'Jan', 'Mar', 'May', 'Jul', 'Aug', 'Oct', 'Dec'}:
      max_day = 31
      elif month == 'Feb':
      max_day = 28
      else:
      Severity: Major
      Found in driver_author_analysis.py and 1 other location - About 1 hr to fix
      driver_thread_analysis.py on lines 35..40

      Identical blocks of code found in 2 locations. Consider refactoring.
      Open

      msg_tokens = [x.lower() for x in re.sub('\W+', ' ', msg_body).split() if 2 < len(x) < 30]
      Severity: Major
      Found in lib/input/mbox/keyword_digest.py and 1 other location - About 1 hr to fix
      lib/input/mbox/keyword_clustering.py on lines 139..139

      Identical blocks of code found in 2 locations. Consider refactoring.
      Open

      msg_tokens = [x.lower() for x in re.sub('\W+', ' ', msg_body).split() if 2 < len(x) < 30]
      Severity: Major
      Found in lib/input/mbox/keyword_clustering.py and 1 other location - About 1 hr to fix
      lib/input/mbox/keyword_digest.py on lines 143..143

      Identical blocks of code found in 3 locations. Consider refactoring.
      Open

      with open(output_file, 'w') as json_file:
      for json_obj in write_to_file:
      json.dump(json_obj, json_file, indent=1)
      json_file.write("\n")
      Severity: Major
      Found in lib/input/check_headers.py and 2 other locations - About 1 hr to fix
      lib/input/check_headers.py on lines 182..185
      lib/input/check_headers.py on lines 232..235

      Identical blocks of code found in 3 locations. Consider refactoring.
      Open

      with open(output_file, 'w') as json_file:
      for json_obj in write_to_file:
      json.dump(json_obj, json_file, indent=1)
      json_file.write("\n")
      Severity: Major
      Found in lib/input/check_headers.py and 2 other locations - About 1 hr to fix
      lib/input/check_headers.py on lines 150..153
      lib/input/check_headers.py on lines 182..185

      Identical blocks of code found in 2 locations. Consider refactoring.
      Open

      msg_tokens = [wnl.lemmatize(x) for x in msg_tokens if not x.isdigit() and x not in from_addr]
      Severity: Major
      Found in lib/input/mbox/keyword_digest.py and 1 other location - About 1 hr to fix
      lib/input/mbox/keyword_clustering.py on lines 143..143

      Identical blocks of code found in 2 locations. Consider refactoring.
      Open

      msg_tokens = [wnl.lemmatize(x) for x in msg_tokens if not x.isdigit() and x not in from_addr]
      Severity: Major
      Found in lib/input/mbox/keyword_clustering.py and 1 other location - About 1 hr to fix
      lib/input/mbox/keyword_digest.py on lines 147..147

      Identical blocks of code found in 3 locations. Consider refactoring.
      Open

      with open(output_file, 'w') as json_file:
      for json_obj in write_to_file:
      json.dump(json_obj, json_file, indent=1)
      json_file.write("\n")
      Severity: Major
      Found in lib/input/check_headers.py and 2 other locations - About 1 hr to fix
      lib/input/check_headers.py on lines 150..153
      lib/input/check_headers.py on lines 232..235

      Function vertex_clustering has 26 lines of code (exceeds 25 allowed). Consider refactoring.
      Open

      def vertex_clustering(json_filename, nodelist_filename, edgelist_filename, foldername, time_limit=None, ignore_lat=False):
      """
      This function performs vertex clustering on the dataset passed in the parameters and saves the dendrogram resulting
      from the vertex clustering as a PDF along with the visualization of the vertex cluster itself. It is recommended to
      limit these graphs to 200 authors as the visualization becomes incompehensible beyond that.
      Severity: Minor
      Found in lib/analysis/author/community.py - About 1 hr to fix

        Function conversation_refresh_times has 8 arguments (exceeds 4 allowed). Consider refactoring.
        Open

        def conversation_refresh_times(headers_filename, nodelist_filename, edgelist_filename, foldername, time_ubound = None, time_lbound = None, plot=False, ignore_lat = False):
        Severity: Major
        Found in lib/analysis/author/time_statistics.py - About 1 hr to fix

          Function add_to_multigraph has a Cognitive Complexity of 9 (exceeds 5 allowed). Consider refactoring.
          Open

          def add_to_multigraph(graph_obj, discussion_graph, json_data, nbunch, label_prefix=''):
          """
          Add multiple edges to the MultiDiGraph object recursively.
           
          :param graph_obj: Object for a directed graph with mulitple edges.
          Severity: Minor
          Found in lib/analysis/author/graph/interaction.py - About 55 mins to fix

          Identical blocks of code found in 2 locations. Consider refactoring.
          Open

          for num in range(len(keywords_list)):
          keywords_list[num] = " ".join(keywords_list[num])
          Severity: Minor
          Found in lib/input/mbox/keyword_clustering.py and 1 other location - About 55 mins to fix
          lib/input/mbox/keyword_digest.py on lines 160..161

          Identical blocks of code found in 2 locations. Consider refactoring.
          Open

          for num in range(len(keywords_list)):
          keywords_list[num] = " ".join(keywords_list[num])
          Severity: Minor
          Found in lib/input/mbox/keyword_digest.py and 1 other location - About 55 mins to fix
          lib/input/mbox/keyword_clustering.py on lines 152..153
          Severity
          Category
          Status
          Source
          Language