codicefiscale/codicefiscale.py
from __future__ import annotations
import re
import string
from datetime import datetime, timedelta
from itertools import combinations
from typing import Any, Literal
from re import Pattern
from dateutil import parser as date_parser
from slugify import slugify
from codicefiscale.data import get_indexed_data
_CONSONANTS: list[str] = list("bcdfghjklmnpqrstvwxyz")
_VOWELS: list[str] = list("aeiou")
_MONTHS: list[str] = list("ABCDEHLMPRST")
_CIN: dict[str, tuple[int, int]] = {
"0": (0, 1),
"1": (1, 0),
"2": (2, 5),
"3": (3, 7),
"4": (4, 9),
"5": (5, 13),
"6": (6, 15),
"7": (7, 17),
"8": (8, 19),
"9": (9, 21),
"A": (0, 1),
"B": (1, 0),
"C": (2, 5),
"D": (3, 7),
"E": (4, 9),
"F": (5, 13),
"G": (6, 15),
"H": (7, 17),
"I": (8, 19),
"J": (9, 21),
"K": (10, 2),
"L": (11, 4),
"M": (12, 18),
"N": (13, 20),
"O": (14, 11),
"P": (15, 3),
"Q": (16, 6),
"R": (17, 8),
"S": (18, 12),
"T": (19, 14),
"U": (20, 16),
"V": (21, 10),
"W": (22, 22),
"X": (23, 25),
"Y": (24, 24),
"Z": (25, 23),
}
_CIN_REMAINDERS: list[str] = list(string.ascii_uppercase)
_OMOCODIA: dict[str, str] = {
"0": "L",
"1": "M",
"2": "N",
"3": "P",
"4": "Q",
"5": "R",
"6": "S",
"7": "T",
"8": "U",
"9": "V",
}
_OMOCODIA_DIGITS: str = "".join(list(_OMOCODIA))
_OMOCODIA_LETTERS: str = "".join([_OMOCODIA[digit] for digit in _OMOCODIA])
_OMOCODIA_ENCODE_TRANS: dict[int, int] = "".maketrans(
_OMOCODIA_DIGITS, _OMOCODIA_LETTERS
)
_OMOCODIA_DECODE_TRANS: dict[int, int] = "".maketrans(
_OMOCODIA_LETTERS, _OMOCODIA_DIGITS
)
_OMOCODIA_SUBS_INDEXES: list[int] = list(reversed([6, 7, 9, 10, 12, 13, 14]))
_OMOCODIA_SUBS_INDEXES_COMBINATIONS: list[list[int]] = [[]]
for combo_size in range(1, len(_OMOCODIA_SUBS_INDEXES) + 1):
for combo in combinations(_OMOCODIA_SUBS_INDEXES, combo_size):
_OMOCODIA_SUBS_INDEXES_COMBINATIONS.append(list(combo))
_DATA: dict[str, dict[str, list[dict[str, Any]]]] = get_indexed_data()
CODICEFISCALE_RE: Pattern[str] = re.compile(
r"^"
r"(?P<lastname>[a-z]{3})"
r"(?P<firstname>[a-z]{3})"
r"(?P<birthdate>(?P<birthdate_year>[a-z\d]{2})(?P<birthdate_month>[abcdehlmprst]{1})(?P<birthdate_day>[a-z\d]{2}))" # noqa: B950, E501
r"(?P<birthplace>[a-z]{1}[a-z\d]{3})"
r"(?P<cin>[a-z]{1})$",
re.IGNORECASE,
)
def _get_consonants(s: str) -> list[str]:
return [char for char in s if char in _CONSONANTS]
def _get_vowels(s: str) -> list[str]:
return [char for char in s if char in _VOWELS]
def _get_consonants_and_vowels(
consonants: list[str],
vowels: list[str],
) -> str:
return "".join(list(consonants[:3] + vowels[:3] + (["X"] * 3))[:3]).upper()
def _get_date(
date: datetime | str | None,
separator: str = "-",
) -> datetime | None:
if not date:
return None
if isinstance(date, datetime):
date = date.replace(tzinfo=None)
return date
date_slug = slugify(date)
date_parts = date_slug.split("-")[:3]
date_parser_options = (
{
"yearfirst": True,
}
if len(date_parts[0]) == 4
else {
"dayfirst": True,
}
)
try:
date_obj = date_parser.parse(
date_slug,
parserinfo=date_parser.parserinfo(**date_parser_options),
)
date_obj = date_obj.replace(tzinfo=None)
return date_obj
except ValueError:
return None
def _get_birthplace(
birthplace: str,
birthdate: datetime | str | None = None,
) -> dict[str, dict[str, Any]] | None:
birthplace_slug = slugify(birthplace)
birthplace_code = birthplace_slug.upper()
birthplaces_options = _DATA["municipalities"].get(
birthplace_slug,
_DATA["countries"].get(
birthplace_slug,
_DATA["codes"].get(
birthplace_code,
),
),
)
if not birthplaces_options:
return None
birthdate_date = _get_date(birthdate)
if not birthdate_date:
return birthplaces_options[0].copy()
# search birthplace that has been created before / deleted after birthdate
for birthplace_option in birthplaces_options:
date_created = _get_date(birthplace_option["date_created"]) or datetime.min
date_deleted = _get_date(birthplace_option["date_deleted"]) or datetime.max
# print(birthdate_date, date_created, date_deleted)
if birthdate_date >= date_created and birthdate_date <= date_deleted:
return birthplace_option.copy()
return _get_birthplace_fallback(birthplaces_options, birthdate_date)
def _get_birthplace_fallback(
birthplaces_options: list[dict[str, Any]],
birthdate_date: datetime,
) -> dict[str, dict[str, Any]] | None:
# avoid wrong birthplace code error when birthdate falls in
# missing date-range in the data-source even if birthplace code is valid
if len(birthplaces_options) > 1:
for index in range(len(birthplaces_options) - 1):
birthplace_option = birthplaces_options[index]
birthplace_option_next = birthplaces_options[(index + 1)]
date_deleted = _get_date(birthplace_option["date_deleted"])
date_created = _get_date(birthplace_option_next["date_created"])
if date_deleted and date_created:
if birthdate_date >= date_deleted and date_deleted <= date_created:
# the birthdate is in between a deleted munipality and a created one
# if the deleted one has a very short active time delta,
# it means that probably the deleted_at value is wrong
date_created = _get_date(birthplace_option["date_created"])
date_deleted = _get_date(birthplace_option["date_deleted"])
if date_created and date_deleted:
date_delta = date_deleted - date_created
if date_delta <= timedelta(days=1):
return birthplace_option.copy()
return birthplace_option_next.copy()
return None
def _get_omocode(
code: str,
subs: list[int],
trans: dict[int, int],
) -> str:
code_chars = list(code[0:15])
for i in subs:
code_chars[i] = code_chars[i].translate(trans)
code = "".join(code_chars)
code_cin = encode_cin(code)
code += code_cin
return code
def _get_omocodes(code: str) -> list[str]:
code_root = _get_omocode(
code, subs=_OMOCODIA_SUBS_INDEXES, trans=_OMOCODIA_DECODE_TRANS
)
codes = [
_get_omocode(code_root, subs=subs, trans=_OMOCODIA_ENCODE_TRANS)
for subs in _OMOCODIA_SUBS_INDEXES_COMBINATIONS
]
return codes
def encode_lastname(lastname: str) -> str:
"""
Encode lastname to the code used in italian fiscal code.
:param lastname: The lastname
:type lastname: string
:returns: The code used in italian fiscal code
:rtype: string
"""
lastname_slug = slugify(lastname)
lastname_consonants = _get_consonants(lastname_slug)
lastname_vowels = _get_vowels(lastname_slug)
lastname_code = _get_consonants_and_vowels(lastname_consonants, lastname_vowels)
return lastname_code
def encode_firstname(firstname: str) -> str:
"""
Encodes firstname to the code used in italian fiscal code.
:param firstname: The firstname
:type firstname: string
:returns: The code used in italian fiscal code
:rtype: string
"""
firstname_slug = slugify(firstname)
firstname_consonants = _get_consonants(firstname_slug)
if len(firstname_consonants) > 3:
del firstname_consonants[1]
firstname_vowels = _get_vowels(firstname_slug)
firstname_code = _get_consonants_and_vowels(firstname_consonants, firstname_vowels)
return firstname_code
def encode_birthdate(
birthdate: datetime | str | None,
gender: Literal["m", "M", "f", "F"],
) -> str:
"""
Encodes birthdate to the code used in italian fiscal code.
:param birthdate: The birthdate
:type birthdate: datetime or string
:param gender: The gender, 'M' or 'F'
:type gender: string
:returns: The code used in italian fiscal code
:rtype: string
"""
if not birthdate:
raise ValueError("[codicefiscale] 'birthdate' argument cant be None")
date = _get_date(birthdate)
if not date:
raise ValueError("[codicefiscale] 'date' argument cant be None")
if not gender:
raise ValueError("[codicefiscale] 'gender' argument cant be None")
gender_code = gender.upper()
if gender_code not in ("M", "F"):
raise ValueError("[codicefiscale] 'gender' argument must be 'M' or 'F'")
year_code = str(date.year)[2:]
month_code = _MONTHS[date.month - 1]
day_code = str(date.day + (40 if gender_code == "F" else 0)).zfill(2).upper()
date_code = f"{year_code}{month_code}{day_code}"
return date_code
def encode_birthplace(
birthplace: str,
birthdate: datetime | str | None = None,
) -> str | None:
"""
Encodes birthplace to the code used in italian fiscal code.
:param birthplace: The birthplace
:type birthplace: string
:returns: The code used in italian fiscal code
:rtype: string
"""
if not birthplace:
raise ValueError("[codicefiscale] 'birthplace' argument cant be None")
birthplace_without_province = re.split(r",|\(", birthplace)[0]
birthplace_data = _get_birthplace(
birthplace,
birthdate,
) or _get_birthplace(
birthplace_without_province,
birthdate,
)
if not birthplace_data:
raise ValueError(
"[codicefiscale] 'birthplace' / 'birthdate' arguments "
f"({birthplace!r} / {birthdate!r}) not mapped to code"
)
birthplace_code = str(birthplace_data["code"])
return birthplace_code
def encode_cin(code: str) -> str:
"""
Encodes cin to the code used in italian fiscal code.
:param code: The code
:type code: string
:returns: The code used in italian fiscal code
:rtype: string
"""
if not code:
raise ValueError("[codicefiscale] 'code' argument cant be None")
code_len = len(code)
if code_len not in [15, 16]:
raise ValueError(
f"[codicefiscale] 'code' length must be 15 or 16, not: {code_len}"
)
cin_tot = 0
for i, char in enumerate(code[0:15]):
cin_tot += _CIN[char][int(bool((i + 1) % 2))]
cin_code = _CIN_REMAINDERS[cin_tot % 26]
# print(cin_code)
return cin_code
def encode(
lastname: str,
firstname: str,
gender: Literal["m", "M", "f", "F"],
birthdate: datetime | str | None,
birthplace: str,
) -> str:
"""
Encodes the italian fiscal code.
:param lastname: The lastname
:type lastname: string
:param firstname: The firstname
:type firstname: string
:param gender: The gender, 'M' or 'F'
:type gender: string
:param birthdate: The birthdate
:type birthdate: datetime or string
:param birthplace: The birthplace
:type birthplace: string
:returns: The italian fiscal code
:rtype: string
"""
lastname_code = encode_lastname(lastname)
firstname_code = encode_firstname(firstname)
birthdate_code = encode_birthdate(birthdate, gender)
birthplace_code = encode_birthplace(birthplace, birthdate)
code = f"{lastname_code}{firstname_code}{birthdate_code}{birthplace_code}"
cin_code = encode_cin(code)
code = f"{code}{cin_code}"
# raise ValueError if code is not valid
decode(code)
return code
def decode_raw(code: str) -> dict[str, str]:
"""
Decodes the raw data associated to the code.
:param code: The code
:type code: string
:returns: The raw data associated to the code.
:rtype: dict
"""
code = slugify(code)
code = code.replace("-", "")
code = code.upper()
match = CODICEFISCALE_RE.match(code)
if not match:
raise ValueError(f"[codicefiscale] invalid syntax: {code}")
data = {
"code": code,
"lastname": match["lastname"],
"firstname": match["firstname"],
"birthdate": match["birthdate"],
"birthdate_year": match["birthdate_year"],
"birthdate_month": match["birthdate_month"],
"birthdate_day": match["birthdate_day"],
"birthplace": match["birthplace"],
"cin": match["cin"],
}
return data
def decode(code: str) -> dict[str, Any]:
"""
Decodes the italian fiscal code.
:param code: The code
:type code: string
:returns: The data associated to the code and some additional info.
:rtype: dict
"""
raw = decode_raw(code)
code = raw["code"]
birthdate_year = int(raw["birthdate_year"].translate(_OMOCODIA_DECODE_TRANS))
birthdate_month = _MONTHS.index(raw["birthdate_month"]) + 1
birthdate_day = int(raw["birthdate_day"].translate(_OMOCODIA_DECODE_TRANS))
if birthdate_day > 40:
birthdate_day -= 40
gender = "F"
else:
gender = "M"
current_year = datetime.now().year
current_year_century_prefix = str(current_year)[0:-2]
birthdate_year_suffix = str(birthdate_year).zfill(2)
birthdate_year = int(f"{current_year_century_prefix}{birthdate_year_suffix}")
if birthdate_year > current_year:
birthdate_year -= 100
birthdate_str = f"{birthdate_year}/{birthdate_month}/{birthdate_day}"
birthdate = _get_date(birthdate_str, separator="/")
if not birthdate:
raise ValueError(f"[codicefiscale] invalid date: {birthdate_str}")
birthplace_code = raw["birthplace"][0] + raw["birthplace"][1:].translate(
_OMOCODIA_DECODE_TRANS
)
birthplace = _get_birthplace(birthplace_code, birthdate)
# print(birthplace)
if not birthplace:
raise ValueError(
"[codicefiscale] wrong birthplace code: "
f"{birthplace_code!r} / birthdate: {birthdate.isoformat()!r}."
)
cin = raw["cin"]
cin_check = encode_cin(code)
# print(cin, cin_check)
if cin != cin_check:
raise ValueError(
"[codicefiscale] wrong CIN (Control Internal Number): "
f"expected {cin_check!r}, found {cin!r}"
)
data = {
"code": code,
"omocodes": _get_omocodes(code),
"gender": gender,
"birthdate": birthdate,
"birthplace": birthplace,
"raw": raw,
}
# print(data)
return data
def is_omocode(code: str) -> bool:
"""
Determines whether the specified code is omocode or not.
:param code: The code
:type code: string
:returns: True if the specified code is omocode, False otherwise.
:rtype: boolean
"""
data = decode(code)
codes = data["omocodes"]
codes.pop(0)
return code in codes
def is_valid(code: str) -> bool:
"""
Determines whether the specified code is valid.
:param code: The code
:type code: string
:returns: True if the specified code is valid, False otherwise.
:rtype: boolean
"""
try:
decode(code)
return True
except ValueError:
return False