Showing 4,191 of 4,191 total issues
Wrong hanging indentation before block (add 4 spaces). Open
and ipa[pos : pos + i] in _PHONETIC_FEATURES
- Read upRead up
- Exclude checks
TODO and ipa[pos : pos + i] in PHONETICFEATURES ^ |
Too many lines in module (1006/1000) Open
# Copyright 2014-2020 by Christopher C. Little.
- Read upRead up
- Exclude checks
Used when a module has too many lines, reducing its readability.
Wrong hanging indentation before block (add 4 spaces). Open
doc_split: str = '\n\n',
- Read upRead up
- Exclude checks
TODO doc_split: str = 'nn', ^ |
Wrong hanging indentation before block (add 4 spaces). Open
word_transform: Optional[Callable[[str], str]] = None,
- Read upRead up
- Exclude checks
TODO word_transform: Optional[Callable[[str], str]] = None, ^ |
Similar lines in 2 files Open
# Copyright 2014-2020 by Christopher C. Little.
- Read upRead up
- Exclude checks
Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.distance.blocklevenshtein:40 ==abydos.distance.dameraulevenshtein:43 def init( self, cost: Tuple[float, float, float, float] = (1, 1, 1, 1), normalizer: Callable[[List[float]], float] = max, **kwargs: Any ): ```Initialize BlockLevenshtein instance.
Parameters
**kwargs Arbitrary keyword arguments
.. versionadded:: 0.4.0
Similar lines in 2 files Open
# Copyright 2014-2020 by Christopher C. Little.
- Read upRead up
- Exclude checks
Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.stemmer.snowballdanish:40 ==abydos.stemmer.snowballswedish:39 'b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 't', 'v', 'y',
Similar lines in 2 files Open
# Copyright 2014-2020 by Christopher C. Little.
- Read upRead up
- Exclude checks
Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.distance.jarowinkler:247 ==abydos.distance._strcmp95:265 )
return weight
if name == 'main': import doctest
doctest.testmod()
Wrong hanging indentation before block (add 4 spaces). Open
corpus_text: str = '',
- Read upRead up
- Exclude checks
TODO corpus_text: str = '', ^ |
Similar lines in 2 files Open
# Copyright 2014-2020 by Christopher C. Little.
- Read upRead up
- Exclude checks
Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.distance.needlemanwunsch:149 ==abydos.distance.smithwaterman:64 self.gapcost = gapcost self.simfunc = cast( Callable[[str, str], float], NeedlemanWunsch.simmatrix if simfunc is None else simfunc, ) # type: Callable[[str, str], float]
def sim_score(self, src: str, tar: str) -> float: ```Return the Needleman-Wunsch score of two strings.
Parameters
src : str Source string for comparison tar : str Target string for comparison
Returns
float Needleman-Wunsch score
Examples
cmp = NeedlemanWunsch() cmp.simscore('cat', 'hat') 2.0 cmp.simscore('Niall', 'Neil') 1.0 cmp.simscore('aluminum', 'Catalan') -1.0 cmp.simscore('ATCG', 'TAGC') 0.0
.. versionadded:: 0.1.0 .. versionchanged:: 0.3.6 Encapsulated in class
d_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float)
Similar lines in 3 files Open
# Copyright 2014-2020 by Christopher C. Little.
- Read upRead up
- Exclude checks
Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.distance.euclidean:40 ==abydos.distance.manhattan:40 ==abydos.distance.minkowski:49 alphabet: Optional[ Union[TCounter[str], Sequence[str], Set[str], int] ] = 0, tokenizer: Optional[Tokenizer] = None, intersection_type: str = 'crisp', **kwargs: Any ) -> None: ```Initialize Euclidean instance.
Parameters
alphabet : collection or int
The values or size of the alphabet
tokenizer : Tokenizer
A tokenizer instance from the :py:mod:abydos.tokenizer
package
intersectiontype : str
Specifies the intersection type, and set type as a result:
See :ref:intersection_type <intersection_type>
description in
:py:class:_TokenDistance
for details.
**kwargs
Arbitrary keyword arguments
Other Parameters
qval : int
The length of each q-gram. Using this parameter and tokenizer=None
will cause the instance to use the QGram tokenizer with this
q value.
metric : _Distance
A string distance measure class for use in the soft
and
fuzzy
variants.
threshold : float
A threshold value, similarities above which are counted as
members of the intersection for the fuzzy
variant.
.. versionadded:: 0.4.0
Similar lines in 2 files Open
# Copyright 2014-2020 by Christopher C. Little.
- Read upRead up
- Exclude checks
Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.phonetic.haase:234 ==abydos.phonetic.koelner:143 elif word[i] == 'B': sdx += '1' elif word[i] == 'P': if _before(word, i, {'H'}): sdx += '3' else: sdx += '1' elif word[i] in {'D', 'T'}: if _before(word, i, {'C', 'S', 'Z'}): sdx += '8' else: sdx += '2' elif word[i] in {'F', 'V', 'W'}: sdx += '3' elif word[i] in {'G', 'K', 'Q'}: sdx += '4' elif word[i] == 'C': if _after(word, i, {'S', 'Z'}): sdx += '8' elif i == 0: if _before(
Similar lines in 3 files Open
# Copyright 2014-2020 by Christopher C. Little.
- Read upRead up
- Exclude checks
Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.fingerprint.count:37 ==abydos.fingerprint.occurrence:36 ==abydos.fingerprint.occurrencehalved:36 def init( self, nbits: int = 16, mostcommon: Tuple[str, ...] = MOSTCOMMONLETTERS_CG, ) -> None: ```Initialize Count instance.
Parameters
nbits : int Number of bits in the fingerprint returned mostcommon : list The most common tokens in the target language, ordered by frequency
.. versionadded:: 0.4.0
Similar lines in 2 files Open
# Copyright 2014-2020 by Christopher C. Little.
- Read upRead up
- Exclude checks
Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.fingerprint.occurrence:141 ==abydos.fingerprint.occurrencehalved:153 if nbits > 0: fingerprint <<= n_bits
return fingerprint
if name == 'main': import doctest
doctest.testmod()
Unused argument 'kwargs' Open
self,
- Read upRead up
- Exclude checks
Used when a function or method argument is not used.
Wrong hanging indentation before block (add 4 spaces). Open
self,
- Read upRead up
- Exclude checks
TODO self, ^ |
Wrong hanging indentation before block (add 4 spaces). Open
nltk_tokenizer: Optional[object] = None,
- Read upRead up
- Exclude checks
TODO nltk_tokenizer: Optional[object] = None, ^ |
Wrong hanging indentation before block (add 4 spaces). Open
scaler: Optional[Union[str, Callable[[float], float]]] = None,
- Read upRead up
- Exclude checks
TODO scaler: Optional[Union[str, Callable[[float], float]]] = None, ^ |
Wrong hanging indentation before block (add 4 spaces). Open
corpus: Optional[TCounter[Optional[str]]] = None,
- Read upRead up
- Exclude checks
TODO corpus: Optional[TCounter[Optional[str]]] = None, ^ |
Similar lines in 2 files Open
# Copyright 2014-2020 by Christopher C. Little.
- Read upRead up
- Exclude checks
Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.phonetic.lein:63 ==abydos.phonetic.refinedsoundex:72 def encodealpha(self, word: str) -> str: ```Return the alphabetic LEIN code for a word.
Parameters
word : str The word to transform
Returns
str The alphabetic LEIN code
Examples
pe = LEIN() pe.encodealpha('Christopher') 'CLKT' pe.encodealpha('Niall') 'NL' pe.encodealpha('Smith') 'SNT' pe.encodealpha('Schmidt') 'SKNT'
.. versionadded:: 0.4.0
code = self.encode(word).rstrip('0')
return code[:1] + code[1:].translate(self._alphabetic)
def encode(self, word: str) -> str:
```Return the LEIN code for a word.
Parameters
----------
word : str
The word to transform
Returns
-------
str
The LEIN code
Examples
--------
>>> pe = LEIN()
>>> pe.encode('Christopher')
'C351'
>>> pe.encode('Niall')
'N300'
>>> pe.encode('Smith')
'S210'
>>> pe.encode('Schmidt')
'S521'
.. versionadded:: 0.3.0
.. versionchanged:: 0.3.6
Encapsulated in class
# uppercase, normalize, decompose, and filter non-A-Z out word = unicodenormalize('NFKD', word.upper()) word = ''.join(c for c in word if c in self.uc_set)
Similar lines in 2 files Open
# Copyright 2014-2020 by Christopher C. Little.
- Read upRead up
- Exclude checks
Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.phonetic.pshpsoundexfirst:203 ==abydos.phonetic.pshpsoundexlast:239 code = code.replace('0', '') # rule 1
if self.maxlength != -1: if len(code) < self.maxlength: code += '0' * (self.maxlength - len(code)) else: code = code[: self.maxlength]
return code
if name == 'main': import doctest
doctest.testmod()