BLKSerene/Wordless

View on GitHub
wordless/wl_figs/wl_figs_stats.py

Summary

Maintainability
A
0 mins
Test Coverage
# ----------------------------------------------------------------------
# Wordless: Figures - Statistics
# Copyright (C) 2018-2024  Ye Lei (叶磊)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
# ----------------------------------------------------------------------

from PyQt5.QtCore import QCoreApplication

from wordless.wl_figs import wl_figs
from wordless.wl_nlp import wl_texts
from wordless.wl_utils import wl_sorting

_tr = QCoreApplication.translate

def wl_fig_stats(main, stat_files_items, tab):
    fig_settings = main.settings_custom[tab]['fig_settings']

    # Tokens / Keywords
    if stat_files_items and isinstance(list(stat_files_items.keys())[0], str):
        stat_files_items = {
            item.display_text(): stat_files
            for item, stat_files in stat_files_items.items()
        }
    # N-grams
    elif stat_files_items and isinstance(list(stat_files_items.keys())[0][0], str):
        stat_files_items = {
            ' '.join(wl_texts.to_display_texts(item)): stat_files
            for item, stat_files in stat_files_items.items()
        }
    # Collocations / Colligations
    else:
        if fig_settings['graph_type'] == _tr('wl_figs_stats', 'Network graph'):
            stat_files_items = {
                (' '.join(wl_texts.to_display_texts(node)), collocate.display_text()): stat_files
                for (node, collocate), stat_files in stat_files_items.items()
            }
        else:
            stat_files_items = {
                ' '.join(wl_texts.to_display_texts(node)) + ', ' + collocate.display_text(): stat_files
                for (node, collocate), stat_files in stat_files_items.items()
            }

    file_names_selected = [*main.wl_file_area.get_selected_file_names(), _tr('wl_figs_stats', 'Total')]
    col_sort_by_file = file_names_selected.index(fig_settings['sort_by_file'])

    if fig_settings['use_data'] == _tr('wl_figs_stats', 'p-value'):
        stat_files_items = wl_sorting.sorted_freq_files_items(
            stat_files_items,
            sort_by_col = col_sort_by_file,
            reverse = True
        )
    else:
        stat_files_items = wl_sorting.sorted_freq_files_items(
            stat_files_items,
            sort_by_col = col_sort_by_file
        )

    # Line Chart
    if fig_settings['graph_type'] == _tr('wl_figs_stats', 'Line chart'):
        if tab == 'wordlist_generator':
            label_x = _tr('wl_figs_stats', 'Token')
        elif tab == 'ngram_generator':
            label_x = _tr('wl_figs_stats', 'N-gram')
        elif tab in ['collocation_extractor', 'colligation_extractor']:
            label_x = _tr('wl_figs_stats', 'Collocate')
        elif tab == 'keyword_extractor':
            label_x = _tr('wl_figs_stats', 'Keyword')

        wl_figs.generate_line_chart(
            main, stat_files_items,
            fig_settings = fig_settings,
            file_names_selected = file_names_selected,
            label_x = label_x
        )
    else:
        stat_file_items = [
            (item, stat_files[col_sort_by_file])
            for item, stat_files in stat_files_items
        ]

        # Word Cloud
        if fig_settings['graph_type'] == _tr('wl_figs_stats', 'Word cloud'):
            wl_figs.generate_word_cloud(main, stat_file_items, fig_settings = fig_settings)
        # Network Graph
        elif fig_settings['graph_type'] == _tr('wl_figs_stats', 'Network graph'):
            wl_figs.generate_network_graph(main, stat_file_items, fig_settings = fig_settings)