BLKSerene/Wordless

View on GitHub
wordless/wl_settings/wl_settings_sentence_tokenization.py

Summary

Maintainability
A
0 mins
Test Coverage
# ----------------------------------------------------------------------
# Wordless: Settings - Sentence Tokenization
# Copyright (C) 2018-2024  Ye Lei (叶磊)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
# ----------------------------------------------------------------------

import copy

from PyQt5.QtCore import pyqtSignal
from PyQt5.QtGui import QStandardItem
from PyQt5.QtWidgets import (
    QGroupBox,
    QLabel,
    QPushButton,
    QTextEdit
)

from wordless.wl_nlp import wl_nlp_utils, wl_sentence_tokenization
from wordless.wl_settings import wl_settings
from wordless.wl_utils import wl_conversion, wl_threading
from wordless.wl_widgets import (
    wl_boxes,
    wl_item_delegates,
    wl_layouts,
    wl_tables
)

class Wl_Settings_Sentence_Tokenization(wl_settings.Wl_Settings_Node):
    def __init__(self, main):
        super().__init__(main)

        self.settings_global = self.main.settings_global['sentence_tokenizers']
        self.settings_default = self.main.settings_default['sentence_tokenization']
        self.settings_custom = self.main.settings_custom['sentence_tokenization']

        # Sentence Tokenizer Settings
        self.group_box_sentence_tokenizer_settings = QGroupBox(self.tr('Sentence Tokenizer Settings'), self)

        self.table_sentence_tokenizers = wl_tables.Wl_Table(
            self,
            headers = [
                self.tr('Language'),
                self.tr('Sentence Tokenizers')
            ],
            editable = True
        )

        self.table_sentence_tokenizers.setFixedHeight(370)
        self.table_sentence_tokenizers.verticalHeader().setHidden(True)
        self.table_sentence_tokenizers.model().setRowCount(len(self.settings_global))

        self.table_sentence_tokenizers.disable_updates()

        for i, lang in enumerate(self.settings_global):
            self.table_sentence_tokenizers.model().setItem(i, 0, QStandardItem(wl_conversion.to_lang_text(self.main, lang)))
            self.table_sentence_tokenizers.model().setItem(i, 1, QStandardItem())

            self.table_sentence_tokenizers.setItemDelegateForRow(i, wl_item_delegates.Wl_Item_Delegate_Combo_Box(
                parent = self.table_sentence_tokenizers,
                items = list(wl_nlp_utils.to_lang_util_texts(
                    self.main,
                    util_type = 'sentence_tokenizers',
                    util_codes = self.settings_global[lang]
                )),
                col = 1
            ))

        self.table_sentence_tokenizers.enable_updates()

        self.group_box_sentence_tokenizer_settings.setLayout(wl_layouts.Wl_Layout())
        self.group_box_sentence_tokenizer_settings.layout().addWidget(self.table_sentence_tokenizers, 0, 0)

        # Preview
        self.group_box_preview = QGroupBox(self.tr('Preview'), self)

        self.label_preview_lang = QLabel(self.tr('Select language:'), self)
        self.combo_box_preview_lang = wl_boxes.Wl_Combo_Box(self)
        self.button_show_preview = QPushButton(self.tr('Show preview'), self)
        self.text_edit_preview_samples = QTextEdit(self)
        self.text_edit_preview_results = QTextEdit(self)

        self.combo_box_preview_lang.addItems(wl_conversion.to_lang_texts(self.main, self.settings_global))

        self.button_show_preview.setMinimumWidth(140)
        self.text_edit_preview_samples.setAcceptRichText(False)
        self.text_edit_preview_results.setReadOnly(True)

        self.combo_box_preview_lang.currentTextChanged.connect(self.preview_changed)
        self.button_show_preview.clicked.connect(self.preview_results_changed)
        self.text_edit_preview_samples.textChanged.connect(self.preview_changed)
        self.text_edit_preview_results.textChanged.connect(self.preview_changed)

        layout_preview_settings = wl_layouts.Wl_Layout()
        layout_preview_settings.addWidget(self.label_preview_lang, 0, 0)
        layout_preview_settings.addWidget(self.combo_box_preview_lang, 0, 1)
        layout_preview_settings.addWidget(self.button_show_preview, 0, 3)

        layout_preview_settings.setColumnStretch(2, 1)

        self.group_box_preview.setLayout(wl_layouts.Wl_Layout())
        self.group_box_preview.layout().addLayout(layout_preview_settings, 0, 0, 1, 2)
        self.group_box_preview.layout().addWidget(self.text_edit_preview_samples, 1, 0)
        self.group_box_preview.layout().addWidget(self.text_edit_preview_results, 1, 1)

        self.setLayout(wl_layouts.Wl_Layout())
        self.layout().addWidget(self.group_box_sentence_tokenizer_settings, 0, 0)
        self.layout().addWidget(self.group_box_preview, 1, 0)

        self.layout().setContentsMargins(6, 4, 6, 4)
        self.layout().setRowStretch(1, 1)

    def preview_changed(self):
        self.settings_custom['preview']['preview_lang'] = wl_conversion.to_lang_code(self.main, self.combo_box_preview_lang.currentText())
        self.settings_custom['preview']['preview_samples'] = self.text_edit_preview_samples.toPlainText()
        self.settings_custom['preview']['preview_results'] = self.text_edit_preview_results.toPlainText()

        if self.settings_custom['preview']['preview_samples'].strip():
            self.button_show_preview.setEnabled(True)
        else:
            self.button_show_preview.setEnabled(False)

    def preview_results_changed(self):
        if self.combo_box_preview_lang.isEnabled():
            row = list(self.settings_global.keys()).index(self.settings_custom['preview']['preview_lang'])

            self.table_sentence_tokenizers.itemDelegateForRow(row).set_enabled(False)
            self.combo_box_preview_lang.setEnabled(False)
            self.button_show_preview.setEnabled(False)
            self.text_edit_preview_samples.setEnabled(False)

            self.button_show_preview.setText(self.tr('Processing...'))

            sentence_tokenizer = wl_nlp_utils.to_lang_util_code(
                self.main,
                util_type = 'sentence_tokenizers',
                util_text = self.table_sentence_tokenizers.model().item(row, 1).text()
            )

            if wl_nlp_utils.check_models(
                self.main,
                langs = [self.settings_custom['preview']['preview_lang']],
                lang_utils = [[sentence_tokenizer]]
            ):
                worker_preview_sentence_tokenizer = Wl_Worker_Preview_Sentence_Tokenizer(
                    self.main,
                    update_gui = self.update_gui,
                    sentence_tokenizer = sentence_tokenizer
                )

                self.thread_preview_sentence_tokenizer = wl_threading.Wl_Thread_No_Progress(worker_preview_sentence_tokenizer)
                self.thread_preview_sentence_tokenizer.start_worker()
            else:
                self.update_gui_err()

    def update_gui(self, preview_results):
        self.text_edit_preview_results.setPlainText('\n'.join(preview_results))

        self.update_gui_err()

    def update_gui_err(self):
        self.button_show_preview.setText(self.tr('Show preview'))

        row = list(self.settings_global.keys()).index(self.settings_custom['preview']['preview_lang'])

        self.table_sentence_tokenizers.itemDelegateForRow(row).set_enabled(True)
        self.combo_box_preview_lang.setEnabled(True)
        self.button_show_preview.setEnabled(True)
        self.text_edit_preview_samples.setEnabled(True)

    def load_settings(self, defaults = False):
        if defaults:
            settings = copy.deepcopy(self.settings_default)
        else:
            settings = copy.deepcopy(self.settings_custom)

        self.table_sentence_tokenizers.disable_updates()

        for i, lang in enumerate(settings['sentence_tokenizer_settings']):
            self.table_sentence_tokenizers.model().item(i, 1).setText(wl_nlp_utils.to_lang_util_text(
                self.main,
                util_type = 'sentence_tokenizers',
                util_code = settings['sentence_tokenizer_settings'][lang]
            ))

        self.table_sentence_tokenizers.enable_updates()

        if not defaults:
            self.combo_box_preview_lang.blockSignals(True)
            self.text_edit_preview_samples.blockSignals(True)

            self.combo_box_preview_lang.setCurrentText(wl_conversion.to_lang_text(self.main, settings['preview']['preview_lang']))
            self.text_edit_preview_samples.setText(settings['preview']['preview_samples'])
            self.text_edit_preview_results.setText(settings['preview']['preview_results'])

            self.combo_box_preview_lang.blockSignals(False)
            self.text_edit_preview_samples.blockSignals(False)

        self.preview_changed()

    def apply_settings(self):
        for i, lang in enumerate(self.settings_custom['sentence_tokenizer_settings']):
            self.settings_custom['sentence_tokenizer_settings'][lang] = wl_nlp_utils.to_lang_util_code(
                self.main,
                util_type = 'sentence_tokenizers',
                util_text = self.table_sentence_tokenizers.model().item(i, 1).text()
            )

        return True

class Wl_Worker_Preview_Sentence_Tokenizer(wl_threading.Wl_Worker_No_Progress):
    worker_done = pyqtSignal(list)

    def run(self):
        preview_lang = self.main.settings_custom['sentence_tokenization']['preview']['preview_lang']
        preview_samples = self.main.settings_custom['sentence_tokenization']['preview']['preview_samples']

        preview_results = wl_sentence_tokenization.wl_sentence_tokenize(
            self.main,
            text = preview_samples.strip(),
            lang = preview_lang,
            sentence_tokenizer = self.sentence_tokenizer
        )

        self.worker_done.emit(preview_results)