chrislit/abydos

View on GitHub

Showing 4,191 of 4,191 total issues

Wrong hanging indentation before block (add 4 spaces).
Open

                    (
Severity: Info
Found in abydos/tokenizer/_saps.py by pylint

TODO ( ^ |

Wrong hanging indentation before block (add 4 spaces).
Open

    feat2: int,
Severity: Info
Found in abydos/phones/_phones.py by pylint

TODO feat2: int, ^ |

Wrong hanging indentation before block (add 4 spaces).
Open

        word_transform: Optional[Callable[[str], str]] = None,
Severity: Info
Found in abydos/corpus/_unigram_corpus.py by pylint

TODO word_transform: Optional[Callable[[str], str]] = None, ^ |

Unnecessary elif after return
Open

        if corpus is None:
Severity: Info
Found in abydos/corpus/_ngram_corpus.py by pylint

Used in order to highlight an unnecessary block of code following an if containing a return statement. As such, it will warn when it encounters an else following a chain of ifs, all of them containing a return statement.

Similar lines in 2 files
Open

# Copyright 2014-2020 by Christopher C. Little.
Severity: Info
Found in abydos/compression/_rle.py by pylint

Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.phonetic.lein:61 ==abydos.phonetic.phonix:190 self.zeropad = zero_pad

def encode_alpha(self, word: str) -> str: ```Return the alphabetic LEIN code for a word.

Parameters


word : str The word to transform

Returns


str The alphabetic LEIN code

Examples


pe = LEIN() pe.encodealpha('Christopher') 'CLKT' pe.encodealpha('Niall') 'NL' pe.encodealpha('Smith') 'SNT' pe.encodealpha('Schmidt') 'SKNT'

.. versionadded:: 0.4.0

code = self.encode(word).rstrip('0')
 return code[:1] + code[1:].translate(self._alphabetic)

 def encode(self, word: str) -> str:
 ```Return the LEIN code for a word.

 Parameters
 ----------
 word : str
 The word to transform

 Returns
 -------
 str
 The LEIN code

 Examples
 --------
 >>> pe = LEIN()
 >>> pe.encode('Christopher')
 'C351'
 >>> pe.encode('Niall')
 'N300'
 >>> pe.encode('Smith')
 'S210'
 >>> pe.encode('Schmidt')
 'S521'


 .. versionadded:: 0.3.0
 .. versionchanged:: 0.3.6
 Encapsulated in class

# uppercase, normalize, decompose, and filter non-A-Z out

Similar lines in 2 files
Open

# Copyright 2014-2020 by Christopher C. Little.
Severity: Info
Found in abydos/compression/_rle.py by pylint

Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.distance.levenshtein:323 ==abydos.distance.phoneticeditdistance:253 dmat = cast( np.ndarray, self.alignment_matrix(src, tar, backtrace=False) )

if int(dmat[srclen, tarlen]) == dmat[srclen, tarlen]: return int(dmat[srclen, tarlen]) else: return cast(float, dmat[srclen, tarlen])

def dist(self, src: str, tar: str) -> float: ```Return the normalized phonetic edit distance between two strings.

The edit distance is normalized by dividing the edit distance (calculated by either of the two supported methods) by the greater of the number of characters in src times the cost of a delete and the number of characters in tar times the cost of an insert. For the case in which all operations have :math:cost = 1, this is equivalent to the greater of the length of the two strings src & tar.

Parameters


src : str Source string for comparison tar : str Target string for comparison

Returns


float The normalized Levenshtein distance between src & tar

Examples


cmp = PhoneticEditDistance() round(cmp.dist('cat', 'hat'), 12) 0.059139784946 round(cmp.dist('Niall', 'Neil'), 12) 0.232258064516 cmp.dist('aluminum', 'Catalan') 0.3084677419354839 cmp.dist('ATCG', 'TAGC') 0.2983870967741935

.. versionadded:: 0.4.1

Similar lines in 2 files
Open

# Copyright 2014-2020 by Christopher C. Little.
Severity: Info
Found in abydos/compression/_rle.py by pylint

Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.distance.prefix:32 ==abydos.distance.suffix:32 def sim(self, src: str, tar: str) -> float: ```Return the prefix similarity of two strings.

Prefix similarity is the ratio of the length of the shorter term that exactly matches the longer term to the length of the shorter term, beginning at the start of both terms.

Parameters


src : str Source string for comparison tar : str Target string for comparison

Returns


float Prefix similarity

Examples


cmp = Prefix() cmp.sim('cat', 'hat') 0.0 cmp.sim('Niall', 'Neil') 0.25 cmp.sim('aluminum', 'Catalan') 0.0 cmp.sim('ATCG', 'TAGC') 0.0

.. versionadded:: 0.1.0 .. versionchanged:: 0.3.6 Encapsulated in class

if src == tar:
 return 1.0
 if not src or not tar:
 return 0.0
 min_word, max_word = (src, tar) if len(src) < len(tar) else (tar, src)
 min_len = len(min_word)
 for i in range(min_len, 0, -1):

Similar lines in 3 files
Open

# Copyright 2014-2020 by Christopher C. Little.
Severity: Info
Found in abydos/compression/_rle.py by pylint

Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.distance.gotoh:67 ==abydos.distance.needlemanwunsch:150 ==abydos.distance.smithwaterman:65 self.simfunc = cast( Callable[[str, str], float], NeedlemanWunsch.simmatrix if simfunc is None else simfunc, ) # type: Callable[[str, str], float]

def sim_score(self, src: str, tar: str) -> float: ```Return the Gotoh score of two strings.

Parameters


src : str Source string for comparison tar : str Target string for comparison

Returns


float Gotoh score

Examples


cmp = Gotoh() cmp.simscore('cat', 'hat') 2.0 cmp.simscore('Niall', 'Neil') 1.0 round(cmp.simscore('aluminum', 'Catalan'), 12) -0.4 cmp.simscore('cat', 'hat') 2.0

.. versionadded:: 0.1.0 .. versionchanged:: 0.3.6 Encapsulated in class

d_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float)

Similar lines in 2 files
Open

# Copyright 2014-2020 by Christopher C. Little.
Severity: Info
Found in abydos/compression/_rle.py by pylint

Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.phonetic.pshpsoundexfirst:41 ==abydos.phonetic.pshpsoundexlast:41 trans = dict( zip( (ord() for _ in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'), '01230120022455012523010202', ) )

alphabetic = dict(zip((ord() for _ in '12345'), 'PKTLN'))

def init(self, max_length: int = 4, german: bool = False) -> None: ```Initialize PSHPSoundexFirst instance.

Parameters


max_length : int The length of the code returned (defaults to 4) german : bool Set to True if the name is German (different rules apply)

.. versionadded:: 0.4.0

self._max_length = max_length
 self._german = german

Similar lines in 2 files
Open

# Copyright 2014-2020 by Christopher C. Little.
Severity: Info
Found in abydos/compression/_rle.py by pylint

Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.distance.blocklevenshtein:40 ==abydos.distance.dameraulevenshtein:43 def init( self, cost: Tuple[float, float, float, float] = (1, 1, 1, 1), normalizer: Callable[[List[float]], float] = max, **kwargs: Any ): ```Initialize BlockLevenshtein instance.

Parameters


**kwargs Arbitrary keyword arguments

.. versionadded:: 0.4.0

Similar lines in 2 files
Open

# Copyright 2014-2020 by Christopher C. Little.
Severity: Info
Found in abydos/compression/_rle.py by pylint

Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.tokenizer.legalipy:136 ==abydos.tokenizer.sonoripy:101 if not self.orderedtokens: self.orderedtokens = [self._string]

self.scaleand_counterize() return self

if name == 'main': import doctest

doctest.testmod()

Similar lines in 5 files
Open

# Copyright 2014-2020 by Christopher C. Little.
Severity: Info
Found in abydos/compression/_rle.py by pylint

Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.distance.ncdbwtrle:80 ==abydos.distance.ncdbz2:103 ==abydos.distance.ncdlzma:102 ==abydos.distance.ncdlzss:90 ==abydos.distance.ncdrle:80 return ( min(len(concatcomp), len(concatcomp2)) - min(len(srccomp), len(tarcomp)) ) / max(len(srccomp), len(tarcomp))

if name == 'main': import doctest

doctest.testmod()

Similar lines in 2 files
Open

# Copyright 2014-2020 by Christopher C. Little.
Severity: Info
Found in abydos/compression/_rle.py by pylint

Indicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication. ==abydos.distance.chebyshev:21 ==abydos.distance.unknown_f:22 from typing import ( Any, Counter as TCounter, NoReturn, Optional, Sequence, Set, Union, )

Wrong hanging indentation before block (add 4 spaces).
Open

        self, scaler: Optional[Union[str, Callable[[float], float]]] = None,
Severity: Info
Found in abydos/tokenizer/_character.py by pylint

TODO self, scaler: Optional[Union[str, Callable[[float], float]]] = None, ^ |

Wrong hanging indentation before block (add 4 spaces).
Open

            and 'nltk.tokenize' in nltk_tokenizer.__module__
Severity: Info
Found in abydos/tokenizer/_nltk.py by pylint

TODO and 'nltk.tokenize' in nltktokenizer.module_ ^ |

Wrong hanging indentation before block (add 4 spaces).
Open

        consonants: Optional[Set[str]] = None,
Severity: Info
Found in abydos/tokenizer/_cv_cluster.py by pylint

TODO consonants: Optional[Set[str]] = None, ^ |

Wrong hanging indentation before block (add 4 spaces).
Open

        self, scaler: Optional[Union[str, Callable[[float], float]]] = None,
Severity: Info
Found in abydos/tokenizer/_saps.py by pylint

TODO self, scaler: Optional[Union[str, Callable[[float], float]]] = None, ^ |

Wrong hanging indentation before block (add 4 spaces).
Open

    weights: Optional[
Severity: Info
Found in abydos/phones/_phones.py by pylint

TODO weights: Optional[ ^ |

Wrong hanging indentation before block (add 4 spaces).
Open

                and token[0] not in self._vowels
Severity: Info
Found in abydos/tokenizer/_cv_cluster.py by pylint

TODO and token[0] not in self._vowels ^ |

Wrong hanging indentation before block (add 4 spaces).
Open

        nltk_tokenizer: Optional[object] = None,
Severity: Info
Found in abydos/tokenizer/_nltk.py by pylint

TODO nltk_tokenizer: Optional[object] = None, ^ |

Severity
Category
Status
Source
Language