wordless/wl_nlp/wl_token_processing.py
Avoid deeply nested control flow statements. Open
Open
for token in sentence_seg:
if token.tag is not None:
token.tag = token.tag.lower()
if token.lemma is not None:
Avoid deeply nested control flow statements. Open
Open
for sentence_seg in sentence:
for i, token in enumerate(sentence_seg):
if token.isupper():
sentence_seg[i] = wl_texts.Wl_Token('')
# Title Case
Avoid deeply nested control flow statements. Open
Open
for sentence_seg in sentence:
for i, token in enumerate(sentence_seg):
if token.istitle():
sentence_seg[i] = wl_texts.Wl_Token('')
else:
Avoid deeply nested control flow statements. Open
Open
for sentence_seg in sentence:
for i, token in enumerate(sentence_seg):
# Convert to strings to ignore tags and punctuation marks, if any, when checking for stop words
if token.lower() in stop_words:
sentence_seg[i] = wl_texts.Wl_Token('')
Avoid deeply nested control flow statements. Open
Open
for sentence_seg in sentence:
for i, token in enumerate(sentence_seg):
if token.islower():
sentence_seg[i] = wl_texts.Wl_Token('')
# Uppercase
Avoid deeply nested control flow statements. Open
Open
for i, token in enumerate(sentence_seg):
if wl_checks_tokens.is_num(token):
sentence_seg[i] = wl_texts.Wl_Token('')
# Replace token texts with lemmas
Avoid deeply nested control flow statements. Open
Open
if i == 0 and j == 0 and k == 0:
tokens = []
for l, token in enumerate(sentence_seg):
# Do not remove the first token and set it to an empty token instead if it is a punctuation mark
Avoid deeply nested control flow statements. Open
Open
for sentence_seg in sentence:
for i, token in enumerate(sentence_seg):
# Convert to strings to ignore tags and punctuation marks, if any, when checking for stop words
if str(token) in stop_words:
sentence_seg[i] = wl_texts.Wl_Token('')
Avoid deeply nested control flow statements. Open
Open
for i, token in enumerate(sentence_seg):
if wl_checks_tokens.is_word_alphabetic(token):
sentence_seg[i] = wl_texts.Wl_Token('')
# Numerals
Avoid deeply nested control flow statements. Open
Open
for token in sentence_seg:
if wl_checks_tokens.is_punc(token.head):
token.head = None
Consider simplifying this complex logical expression. Open
Open
if (
token_settings.get('apply_lemmatization', False)
or search_settings['match_inflected_forms']
or (
search_settings['context_settings']['incl']['incl']
Function wl_process_tokens_concordancer
has 5 arguments (exceeds 4 allowed). Consider refactoring. Open
Open
def wl_process_tokens_concordancer(main, text, token_settings, search_settings, preserve_blank_lines = False):