Showing 4,191 of 4,191 total issues
Wrong hanging indentation before block (add 4 spaces). Open
feat2: int,
- Read upRead up
- Exclude checks
TODO feat2: int, ^ |
Wrong hanging indentation before block (add 4 spaces). Open
word_transform: Optional[Callable[[str], str]] = None,
- Read upRead up
- Exclude checks
TODO word_transform: Optional[Callable[[str], str]] = None, ^ |
Unnecessary elif
after return
Open
if corpus is None:
- Read upRead up
- Exclude checks
Used in order to highlight an unnecessary block of code following an if containing a return statement. As such, it will warn when it encounters an else following a chain of ifs, all of them containing a return statement.
Similar lines in 2 files Open
# Copyright 2014-2020 by Christopher C. Little.
- Read upRead up
- Exclude checks
Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.phonetic.lein:61 ==abydos.phonetic.phonix:190 self.zeropad = zero_pad
def encode_alpha(self, word: str) -> str: ```Return the alphabetic LEIN code for a word.
Parameters
word : str The word to transform
Returns
str The alphabetic LEIN code
Examples
pe = LEIN() pe.encodealpha('Christopher') 'CLKT' pe.encodealpha('Niall') 'NL' pe.encodealpha('Smith') 'SNT' pe.encodealpha('Schmidt') 'SKNT'
.. versionadded:: 0.4.0
code = self.encode(word).rstrip('0')
return code[:1] + code[1:].translate(self._alphabetic)
def encode(self, word: str) -> str:
```Return the LEIN code for a word.
Parameters
----------
word : str
The word to transform
Returns
-------
str
The LEIN code
Examples
--------
>>> pe = LEIN()
>>> pe.encode('Christopher')
'C351'
>>> pe.encode('Niall')
'N300'
>>> pe.encode('Smith')
'S210'
>>> pe.encode('Schmidt')
'S521'
.. versionadded:: 0.3.0
.. versionchanged:: 0.3.6
Encapsulated in class
# uppercase, normalize, decompose, and filter non-A-Z out
Similar lines in 2 files Open
# Copyright 2014-2020 by Christopher C. Little.
- Read upRead up
- Exclude checks
Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.distance.levenshtein:323 ==abydos.distance.phoneticeditdistance:253 dmat = cast( np.ndarray, self.alignment_matrix(src, tar, backtrace=False) )
if int(dmat[srclen, tarlen]) == dmat[srclen, tarlen]: return int(dmat[srclen, tarlen]) else: return cast(float, dmat[srclen, tarlen])
def dist(self, src: str, tar: str) -> float: ```Return the normalized phonetic edit distance between two strings.
The edit distance is normalized by dividing the edit distance
(calculated by either of the two supported methods) by the
greater of the number of characters in src times the cost of a delete
and the number of characters in tar times the cost of an insert.
For the case in which all operations have :math:cost = 1
, this is
equivalent to the greater of the length of the two strings src & tar.
Parameters
src : str Source string for comparison tar : str Target string for comparison
Returns
float The normalized Levenshtein distance between src & tar
Examples
cmp = PhoneticEditDistance() round(cmp.dist('cat', 'hat'), 12) 0.059139784946 round(cmp.dist('Niall', 'Neil'), 12) 0.232258064516 cmp.dist('aluminum', 'Catalan') 0.3084677419354839 cmp.dist('ATCG', 'TAGC') 0.2983870967741935
.. versionadded:: 0.4.1
Similar lines in 2 files Open
# Copyright 2014-2020 by Christopher C. Little.
- Read upRead up
- Exclude checks
Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.distance.prefix:32 ==abydos.distance.suffix:32 def sim(self, src: str, tar: str) -> float: ```Return the prefix similarity of two strings.
Prefix similarity is the ratio of the length of the shorter term that exactly matches the longer term to the length of the shorter term, beginning at the start of both terms.
Parameters
src : str Source string for comparison tar : str Target string for comparison
Returns
float Prefix similarity
Examples
cmp = Prefix() cmp.sim('cat', 'hat') 0.0 cmp.sim('Niall', 'Neil') 0.25 cmp.sim('aluminum', 'Catalan') 0.0 cmp.sim('ATCG', 'TAGC') 0.0
.. versionadded:: 0.1.0 .. versionchanged:: 0.3.6 Encapsulated in class
if src == tar:
return 1.0
if not src or not tar:
return 0.0
min_word, max_word = (src, tar) if len(src) < len(tar) else (tar, src)
min_len = len(min_word)
for i in range(min_len, 0, -1):
Similar lines in 3 files Open
# Copyright 2014-2020 by Christopher C. Little.
- Read upRead up
- Exclude checks
Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.distance.gotoh:67 ==abydos.distance.needlemanwunsch:150 ==abydos.distance.smithwaterman:65 self.simfunc = cast( Callable[[str, str], float], NeedlemanWunsch.simmatrix if simfunc is None else simfunc, ) # type: Callable[[str, str], float]
def sim_score(self, src: str, tar: str) -> float: ```Return the Gotoh score of two strings.
Parameters
src : str Source string for comparison tar : str Target string for comparison
Returns
float Gotoh score
Examples
cmp = Gotoh() cmp.simscore('cat', 'hat') 2.0 cmp.simscore('Niall', 'Neil') 1.0 round(cmp.simscore('aluminum', 'Catalan'), 12) -0.4 cmp.simscore('cat', 'hat') 2.0
.. versionadded:: 0.1.0 .. versionchanged:: 0.3.6 Encapsulated in class
d_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float)
Similar lines in 2 files Open
# Copyright 2014-2020 by Christopher C. Little.
- Read upRead up
- Exclude checks
Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.phonetic.pshpsoundexfirst:41 ==abydos.phonetic.pshpsoundexlast:41 trans = dict( zip( (ord() for _ in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'), '01230120022455012523010202', ) )
alphabetic = dict(zip((ord() for _ in '12345'), 'PKTLN'))
def init(self, max_length: int = 4, german: bool = False) -> None: ```Initialize PSHPSoundexFirst instance.
Parameters
max_length : int The length of the code returned (defaults to 4) german : bool Set to True if the name is German (different rules apply)
.. versionadded:: 0.4.0
self._max_length = max_length
self._german = german
Similar lines in 2 files Open
# Copyright 2014-2020 by Christopher C. Little.
- Read upRead up
- Exclude checks
Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.distance.blocklevenshtein:40 ==abydos.distance.dameraulevenshtein:43 def init( self, cost: Tuple[float, float, float, float] = (1, 1, 1, 1), normalizer: Callable[[List[float]], float] = max, **kwargs: Any ): ```Initialize BlockLevenshtein instance.
Parameters
**kwargs Arbitrary keyword arguments
.. versionadded:: 0.4.0
Similar lines in 2 files Open
# Copyright 2014-2020 by Christopher C. Little.
- Read upRead up
- Exclude checks
Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.tokenizer.legalipy:136 ==abydos.tokenizer.sonoripy:101 if not self.orderedtokens: self.orderedtokens = [self._string]
self.scaleand_counterize() return self
if name == 'main': import doctest
doctest.testmod()
Similar lines in 5 files Open
# Copyright 2014-2020 by Christopher C. Little.
- Read upRead up
- Exclude checks
Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.distance.ncdbwtrle:80 ==abydos.distance.ncdbz2:103 ==abydos.distance.ncdlzma:102 ==abydos.distance.ncdlzss:90 ==abydos.distance.ncdrle:80 return ( min(len(concatcomp), len(concatcomp2)) - min(len(srccomp), len(tarcomp)) ) / max(len(srccomp), len(tarcomp))
if name == 'main': import doctest
doctest.testmod()
Similar lines in 2 files Open
# Copyright 2014-2020 by Christopher C. Little.
- Read upRead up
- Exclude checks
Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.distance.chebyshev:21 ==abydos.distance.unknown_f:22 from typing import ( Any, Counter as TCounter, NoReturn, Optional, Sequence, Set, Union, )
Wrong hanging indentation before block (add 4 spaces). Open
self, scaler: Optional[Union[str, Callable[[float], float]]] = None,
- Read upRead up
- Exclude checks
TODO self, scaler: Optional[Union[str, Callable[[float], float]]] = None, ^ |
Wrong hanging indentation before block (add 4 spaces). Open
and 'nltk.tokenize' in nltk_tokenizer.__module__
- Read upRead up
- Exclude checks
TODO and 'nltk.tokenize' in nltktokenizer.module_ ^ |
Wrong hanging indentation before block (add 4 spaces). Open
consonants: Optional[Set[str]] = None,
- Read upRead up
- Exclude checks
TODO consonants: Optional[Set[str]] = None, ^ |
Wrong hanging indentation before block (add 4 spaces). Open
self, scaler: Optional[Union[str, Callable[[float], float]]] = None,
- Read upRead up
- Exclude checks
TODO self, scaler: Optional[Union[str, Callable[[float], float]]] = None, ^ |
Wrong hanging indentation before block (add 4 spaces). Open
weights: Optional[
- Read upRead up
- Exclude checks
TODO weights: Optional[ ^ |
Wrong hanging indentation before block (add 4 spaces). Open
and token[0] not in self._vowels
- Read upRead up
- Exclude checks
TODO and token[0] not in self._vowels ^ |
Wrong hanging indentation before block (add 4 spaces). Open
nltk_tokenizer: Optional[object] = None,
- Read upRead up
- Exclude checks
TODO nltk_tokenizer: Optional[object] = None, ^ |