codicefiscale/codicefiscale.py from fabiocaccamo/python-codicefiscale

codicefiscale/codicefiscale.py
Summary

Maintainability

4 hrs
Test Coverage

Issues
from __future__ import annotations

import re
import string
from datetime import datetime, timedelta
from itertools import combinations
from typing import Any, Literal
from re import Pattern

from dateutil import parser as date_parser
from slugify import slugify

from codicefiscale.data import get_indexed_data

_CONSONANTS: list[str] = list("bcdfghjklmnpqrstvwxyz")
_VOWELS: list[str] = list("aeiou")
_MONTHS: list[str] = list("ABCDEHLMPRST")
_CIN: dict[str, tuple[int, int]] = {
    "0": (0, 1),
    "1": (1, 0),
    "2": (2, 5),
    "3": (3, 7),
    "4": (4, 9),
    "5": (5, 13),
    "6": (6, 15),
    "7": (7, 17),
    "8": (8, 19),
    "9": (9, 21),
    "A": (0, 1),
    "B": (1, 0),
    "C": (2, 5),
    "D": (3, 7),
    "E": (4, 9),
    "F": (5, 13),
    "G": (6, 15),
    "H": (7, 17),
    "I": (8, 19),
    "J": (9, 21),
    "K": (10, 2),
    "L": (11, 4),
    "M": (12, 18),
    "N": (13, 20),
    "O": (14, 11),
    "P": (15, 3),
    "Q": (16, 6),
    "R": (17, 8),
    "S": (18, 12),
    "T": (19, 14),
    "U": (20, 16),
    "V": (21, 10),
    "W": (22, 22),
    "X": (23, 25),
    "Y": (24, 24),
    "Z": (25, 23),
}
_CIN_REMAINDERS: list[str] = list(string.ascii_uppercase)

_OMOCODIA: dict[str, str] = {
    "0": "L",
    "1": "M",
    "2": "N",
    "3": "P",
    "4": "Q",
    "5": "R",
    "6": "S",
    "7": "T",
    "8": "U",
    "9": "V",
}
_OMOCODIA_DIGITS: str = "".join(list(_OMOCODIA))
_OMOCODIA_LETTERS: str = "".join([_OMOCODIA[digit] for digit in _OMOCODIA])
_OMOCODIA_ENCODE_TRANS: dict[int, int] = "".maketrans(
    _OMOCODIA_DIGITS, _OMOCODIA_LETTERS
)
_OMOCODIA_DECODE_TRANS: dict[int, int] = "".maketrans(
    _OMOCODIA_LETTERS, _OMOCODIA_DIGITS
)
_OMOCODIA_SUBS_INDEXES: list[int] = list(reversed([6, 7, 9, 10, 12, 13, 14]))
_OMOCODIA_SUBS_INDEXES_COMBINATIONS: list[list[int]] = [[]]
for combo_size in range(1, len(_OMOCODIA_SUBS_INDEXES) + 1):
    for combo in combinations(_OMOCODIA_SUBS_INDEXES, combo_size):
        _OMOCODIA_SUBS_INDEXES_COMBINATIONS.append(list(combo))


_DATA: dict[str, dict[str, list[dict[str, Any]]]] = get_indexed_data()

CODICEFISCALE_RE: Pattern[str] = re.compile(
    r"^"
    r"(?P<lastname>[a-z]{3})"
    r"(?P<firstname>[a-z]{3})"
    r"(?P<birthdate>(?P<birthdate_year>[a-z\d]{2})(?P<birthdate_month>[abcdehlmprst]{1})(?P<birthdate_day>[a-z\d]{2}))"  # noqa: B950, E501
    r"(?P<birthplace>[a-z]{1}[a-z\d]{3})"
    r"(?P<cin>[a-z]{1})$",
    re.IGNORECASE,
)


def _get_consonants(s: str) -> list[str]:
    return [char for char in s if char in _CONSONANTS]


def _get_vowels(s: str) -> list[str]:
    return [char for char in s if char in _VOWELS]


def _get_consonants_and_vowels(
    consonants: list[str],
    vowels: list[str],
) -> str:
    return "".join(list(consonants[:3] + vowels[:3] + (["X"] * 3))[:3]).upper()


def _get_date(
    date: datetime | str | None,
    separator: str = "-",
) -> datetime | None:
    if not date:
        return None
    if isinstance(date, datetime):
        date = date.replace(tzinfo=None)
        return date
    date_slug = slugify(date)
    date_parts = date_slug.split("-")[:3]
    date_parser_options = (
        {
            "yearfirst": True,
        }
        if len(date_parts[0]) == 4
        else {
            "dayfirst": True,
        }
    )
    try:
        date_obj = date_parser.parse(
            date_slug,
            parserinfo=date_parser.parserinfo(**date_parser_options),
        )
        date_obj = date_obj.replace(tzinfo=None)
        return date_obj
    except ValueError:
        return None


def _get_birthplace(
    birthplace: str,
    birthdate: datetime | str | None = None,
) -> dict[str, dict[str, Any]] | None:
    birthplace_slug = slugify(birthplace)
    birthplace_code = birthplace_slug.upper()
    birthplaces_options = _DATA["municipalities"].get(
        birthplace_slug,
        _DATA["countries"].get(
            birthplace_slug,
            _DATA["codes"].get(
                birthplace_code,
            ),
        ),
    )
    if not birthplaces_options:
        return None

    birthdate_date = _get_date(birthdate)
    if not birthdate_date:
        return birthplaces_options[0].copy()

    # search birthplace that has been created before / deleted after birthdate
    for birthplace_option in birthplaces_options:
        date_created = _get_date(birthplace_option["date_created"]) or datetime.min
        date_deleted = _get_date(birthplace_option["date_deleted"]) or datetime.max
        # print(birthdate_date, date_created, date_deleted)
        if birthdate_date >= date_created and birthdate_date <= date_deleted:
            return birthplace_option.copy()

    return _get_birthplace_fallback(birthplaces_options, birthdate_date)


def _get_birthplace_fallback(
    birthplaces_options: list[dict[str, Any]],
    birthdate_date: datetime,
) -> dict[str, dict[str, Any]] | None:
    # avoid wrong birthplace code error when birthdate falls in
    # missing date-range in the data-source even if birthplace code is valid
    if len(birthplaces_options) > 1:
        for index in range(len(birthplaces_options) - 1):
            birthplace_option = birthplaces_options[index]
            birthplace_option_next = birthplaces_options[(index + 1)]
            date_deleted = _get_date(birthplace_option["date_deleted"])
            date_created = _get_date(birthplace_option_next["date_created"])
            if date_deleted and date_created:
                if birthdate_date >= date_deleted and date_deleted <= date_created:
                    # the birthdate is in between a deleted munipality and a created one
                    # if the deleted one has a very short active time delta,
                    # it means that probably the deleted_at value is wrong
                    date_created = _get_date(birthplace_option["date_created"])
                    date_deleted = _get_date(birthplace_option["date_deleted"])
                    if date_created and date_deleted:
                        date_delta = date_deleted - date_created
                        if date_delta <= timedelta(days=1):
                            return birthplace_option.copy()
                    return birthplace_option_next.copy()

    return None


def _get_omocode(
    code: str,
    subs: list[int],
    trans: dict[int, int],
) -> str:
    code_chars = list(code[0:15])
    for i in subs:
        code_chars[i] = code_chars[i].translate(trans)
    code = "".join(code_chars)
    code_cin = encode_cin(code)
    code += code_cin
    return code


def _get_omocodes(code: str) -> list[str]:
    code_root = _get_omocode(
        code, subs=_OMOCODIA_SUBS_INDEXES, trans=_OMOCODIA_DECODE_TRANS
    )
    codes = [
        _get_omocode(code_root, subs=subs, trans=_OMOCODIA_ENCODE_TRANS)
        for subs in _OMOCODIA_SUBS_INDEXES_COMBINATIONS
    ]
    return codes


def encode_lastname(lastname: str) -> str:
    """
    Encode lastname to the code used in italian fiscal code.

    :param lastname: The lastname
    :type lastname: string

    :returns: The code used in italian fiscal code
    :rtype: string
    """
    lastname_slug = slugify(lastname)
    lastname_consonants = _get_consonants(lastname_slug)
    lastname_vowels = _get_vowels(lastname_slug)
    lastname_code = _get_consonants_and_vowels(lastname_consonants, lastname_vowels)
    return lastname_code


def encode_firstname(firstname: str) -> str:
    """
    Encodes firstname to the code used in italian fiscal code.

    :param firstname: The firstname
    :type firstname: string

    :returns: The code used in italian fiscal code
    :rtype: string
    """
    firstname_slug = slugify(firstname)
    firstname_consonants = _get_consonants(firstname_slug)

    if len(firstname_consonants) > 3:
        del firstname_consonants[1]

    firstname_vowels = _get_vowels(firstname_slug)
    firstname_code = _get_consonants_and_vowels(firstname_consonants, firstname_vowels)
    return firstname_code


def encode_birthdate(
    birthdate: datetime | str | None,
    gender: Literal["m", "M", "f", "F"],
) -> str:
    """
    Encodes birthdate to the code used in italian fiscal code.

    :param birthdate: The birthdate
    :type birthdate: datetime or string
    :param gender: The gender, 'M' or 'F'
    :type gender: string

    :returns: The code used in italian fiscal code
    :rtype: string
    """
    if not birthdate:
        raise ValueError("[codicefiscale] 'birthdate' argument cant be None")
    date = _get_date(birthdate)
    if not date:
        raise ValueError("[codicefiscale] 'date' argument cant be None")

    if not gender:
        raise ValueError("[codicefiscale] 'gender' argument cant be None")
    gender_code = gender.upper()
    if gender_code not in ("M", "F"):
        raise ValueError("[codicefiscale] 'gender' argument must be 'M' or 'F'")

    year_code = str(date.year)[2:]
    month_code = _MONTHS[date.month - 1]
    day_code = str(date.day + (40 if gender_code == "F" else 0)).zfill(2).upper()
    date_code = f"{year_code}{month_code}{day_code}"
    return date_code


def encode_birthplace(
    birthplace: str,
    birthdate: datetime | str | None = None,
) -> str | None:
    """
    Encodes birthplace to the code used in italian fiscal code.

    :param birthplace: The birthplace
    :type birthplace: string

    :returns: The code used in italian fiscal code
    :rtype: string
    """
    if not birthplace:
        raise ValueError("[codicefiscale] 'birthplace' argument cant be None")

    birthplace_without_province = re.split(r",|\(", birthplace)[0]
    birthplace_data = _get_birthplace(
        birthplace,
        birthdate,
    ) or _get_birthplace(
        birthplace_without_province,
        birthdate,
    )

    if not birthplace_data:
        raise ValueError(
            "[codicefiscale] 'birthplace' / 'birthdate' arguments "
            f"({birthplace!r} / {birthdate!r}) not mapped to code"
        )

    birthplace_code = str(birthplace_data["code"])
    return birthplace_code


def encode_cin(code: str) -> str:
    """
    Encodes cin to the code used in italian fiscal code.

    :param code: The code
    :type code: string

    :returns: The code used in italian fiscal code
    :rtype: string
    """
    if not code:
        raise ValueError("[codicefiscale] 'code' argument cant be None")

    code_len = len(code)
    if code_len not in [15, 16]:
        raise ValueError(
            f"[codicefiscale] 'code' length must be 15 or 16, not: {code_len}"
        )

    cin_tot = 0
    for i, char in enumerate(code[0:15]):
        cin_tot += _CIN[char][int(bool((i + 1) % 2))]
    cin_code = _CIN_REMAINDERS[cin_tot % 26]

    # print(cin_code)
    return cin_code


def encode(
    lastname: str,
    firstname: str,
    gender: Literal["m", "M", "f", "F"],
    birthdate: datetime | str | None,
    birthplace: str,
) -> str:
    """
    Encodes the italian fiscal code.

    :param lastname: The lastname
    :type lastname: string
    :param firstname: The firstname
    :type firstname: string
    :param gender: The gender, 'M' or 'F'
    :type gender: string
    :param birthdate: The birthdate
    :type birthdate: datetime or string
    :param birthplace: The birthplace
    :type birthplace: string

    :returns: The italian fiscal code
    :rtype: string
    """

    lastname_code = encode_lastname(lastname)
    firstname_code = encode_firstname(firstname)
    birthdate_code = encode_birthdate(birthdate, gender)
    birthplace_code = encode_birthplace(birthplace, birthdate)
    code = f"{lastname_code}{firstname_code}{birthdate_code}{birthplace_code}"
    cin_code = encode_cin(code)
    code = f"{code}{cin_code}"

    # raise ValueError if code is not valid
    decode(code)
    return code


def decode_raw(code: str) -> dict[str, str]:
    """
    Decodes the raw data associated to the code.

    :param code: The code
    :type code: string

    :returns: The raw data associated to the code.
    :rtype: dict
    """
    code = slugify(code)
    code = code.replace("-", "")
    code = code.upper()

    match = CODICEFISCALE_RE.match(code)
    if not match:
        raise ValueError(f"[codicefiscale] invalid syntax: {code}")

    data = {
        "code": code,
        "lastname": match["lastname"],
        "firstname": match["firstname"],
        "birthdate": match["birthdate"],
        "birthdate_year": match["birthdate_year"],
        "birthdate_month": match["birthdate_month"],
        "birthdate_day": match["birthdate_day"],
        "birthplace": match["birthplace"],
        "cin": match["cin"],
    }

    return data


def decode(code: str) -> dict[str, Any]:
    """
    Decodes the italian fiscal code.

    :param code: The code
    :type code: string

    :returns: The data associated to the code and some additional info.
    :rtype: dict
    """
    raw = decode_raw(code)

    code = raw["code"]

    birthdate_year = int(raw["birthdate_year"].translate(_OMOCODIA_DECODE_TRANS))
    birthdate_month = _MONTHS.index(raw["birthdate_month"]) + 1
    birthdate_day = int(raw["birthdate_day"].translate(_OMOCODIA_DECODE_TRANS))

    if birthdate_day > 40:
        birthdate_day -= 40
        gender = "F"
    else:
        gender = "M"

    current_year = datetime.now().year
    current_year_century_prefix = str(current_year)[0:-2]
    birthdate_year_suffix = str(birthdate_year).zfill(2)
    birthdate_year = int(f"{current_year_century_prefix}{birthdate_year_suffix}")
    if birthdate_year > current_year:
        birthdate_year -= 100
    birthdate_str = f"{birthdate_year}/{birthdate_month}/{birthdate_day}"
    birthdate = _get_date(birthdate_str, separator="/")
    if not birthdate:
        raise ValueError(f"[codicefiscale] invalid date: {birthdate_str}")

    birthplace_code = raw["birthplace"][0] + raw["birthplace"][1:].translate(
        _OMOCODIA_DECODE_TRANS
    )
    birthplace = _get_birthplace(birthplace_code, birthdate)
    # print(birthplace)
    if not birthplace:
        raise ValueError(
            "[codicefiscale] wrong birthplace code: "
            f"{birthplace_code!r} / birthdate: {birthdate.isoformat()!r}."
        )

    cin = raw["cin"]
    cin_check = encode_cin(code)
    # print(cin, cin_check)
    if cin != cin_check:
        raise ValueError(
            "[codicefiscale] wrong CIN (Control Internal Number): "
            f"expected {cin_check!r}, found {cin!r}"
        )

    data = {
        "code": code,
        "omocodes": _get_omocodes(code),
        "gender": gender,
        "birthdate": birthdate,
        "birthplace": birthplace,
        "raw": raw,
    }

    # print(data)
    return data


def is_omocode(code: str) -> bool:
    """
    Determines whether the specified code is omocode or not.

    :param code: The code
    :type code: string

    :returns: True if the specified code is omocode, False otherwise.
    :rtype: boolean
    """
    data = decode(code)
    codes = data["omocodes"]
    codes.pop(0)
    return code in codes


def is_valid(code: str) -> bool:
    """
    Determines whether the specified code is valid.

    :param code: The code
    :type code: string

    :returns: True if the specified code is valid, False otherwise.
    :rtype: boolean
    """
    try:
        decode(code)
        return True
    except ValueError:
        return False