codicefiscale/codicefiscale.py
from __future__ import annotations
import re
import string
from datetime import datetime, timedelta
from itertools import combinations
from typing import Any, Literal, Pattern
from dateutil import parser as date_parser
from slugify import slugify
from codicefiscale.data import get_indexed_data
_CONSONANTS: list[str] = list("bcdfghjklmnpqrstvwxyz")
_VOWELS: list[str] = list("aeiou")
_MONTHS: list[str] = list("ABCDEHLMPRST")
_CIN: dict[str, tuple[int, int]] = {
"0": (0, 1),
"1": (1, 0),
"2": (2, 5),
"3": (3, 7),
"4": (4, 9),
"5": (5, 13),
"6": (6, 15),
"7": (7, 17),
"8": (8, 19),
"9": (9, 21),
"A": (0, 1),
"B": (1, 0),
"C": (2, 5),
"D": (3, 7),
"E": (4, 9),
"F": (5, 13),
"G": (6, 15),
"H": (7, 17),
"I": (8, 19),
"J": (9, 21),
"K": (10, 2),
"L": (11, 4),
"M": (12, 18),
"N": (13, 20),
"O": (14, 11),
"P": (15, 3),
"Q": (16, 6),
"R": (17, 8),
"S": (18, 12),
"T": (19, 14),
"U": (20, 16),
"V": (21, 10),
"W": (22, 22),
"X": (23, 25),
"Y": (24, 24),
"Z": (25, 23),
}
_CIN_REMAINDERS: list[str] = list(string.ascii_uppercase)
_OMOCODIA: dict[str, str] = {
"0": "L",
"1": "M",
"2": "N",
"3": "P",
"4": "Q",
"5": "R",
"6": "S",
"7": "T",
"8": "U",
"9": "V",
}
_OMOCODIA_DIGITS: str = "".join(list(_OMOCODIA))
_OMOCODIA_LETTERS: str = "".join([_OMOCODIA[digit] for digit in _OMOCODIA])
_OMOCODIA_ENCODE_TRANS: dict[int, int] = "".maketrans(
_OMOCODIA_DIGITS, _OMOCODIA_LETTERS
)
_OMOCODIA_DECODE_TRANS: dict[int, int] = "".maketrans(
_OMOCODIA_LETTERS, _OMOCODIA_DIGITS
)
_OMOCODIA_SUBS_INDEXES: list[int] = list(reversed([6, 7, 9, 10, 12, 13, 14]))
_OMOCODIA_SUBS_INDEXES_COMBINATIONS: list[list[int]] = [[]]
for combo_size in range(1, len(_OMOCODIA_SUBS_INDEXES) + 1):
for combo in combinations(_OMOCODIA_SUBS_INDEXES, combo_size):
_OMOCODIA_SUBS_INDEXES_COMBINATIONS.append(list(combo))
_DATA: dict[str, dict[str, list[dict[str, Any]]]] = get_indexed_data()
CODICEFISCALE_RE: Pattern[str] = re.compile(
r"^"
r"(?P<lastname>[a-z]{3})"
r"(?P<firstname>[a-z]{3})"
r"(?P<birthdate>(?P<birthdate_year>[a-z\d]{2})(?P<birthdate_month>[abcdehlmprst]{1})(?P<birthdate_day>[a-z\d]{2}))" # noqa: B950, E501
r"(?P<birthplace>[a-z]{1}[a-z\d]{3})"
r"(?P<cin>[a-z]{1})$",
re.IGNORECASE,
)
def _get_consonants(s: str) -> list[str]:
return [char for char in s if char in _CONSONANTS]
def _get_vowels(s: str) -> list[str]:
return [char for char in s if char in _VOWELS]
def _get_consonants_and_vowels(
consonants: list[str],
vowels: list[str],
) -> str:
return "".join(list(consonants[:3] + vowels[:3] + (["X"] * 3))[:3]).upper()
def _get_date(
date: datetime | str | None,
separator: str = "-",
) -> datetime | None:
if not date:
return None
if isinstance(date, datetime):
date = date.replace(tzinfo=None)
return date
date_slug = slugify(date)
date_parts = date_slug.split("-")[:3]
date_parser_options = (
{
"yearfirst": True,
}
if len(date_parts[0]) == 4
else {
"dayfirst": True,
}
)
try:
date_obj = date_parser.parse(
date_slug,
parserinfo=date_parser.parserinfo(**date_parser_options),
)
date_obj = date_obj.replace(tzinfo=None)
return date_obj
except ValueError:
return None
def _get_birthplace(
birthplace: str,
birthdate: datetime | str | None = None,
) -> dict[str, dict[str, Any]] | None:
birthplace_slug = slugify(birthplace)
birthplace_code = birthplace_slug.upper()
birthplaces_options = _DATA["municipalities"].get(
birthplace_slug,
_DATA["countries"].get(
birthplace_slug,
_DATA["codes"].get(
birthplace_code,
),
),
)
if not birthplaces_options:
return None
birthdate_date = _get_date(birthdate)
if not birthdate_date:
return birthplaces_options[0].copy()
# search birthplace that has been created before / deleted after birthdate
for birthplace_option in birthplaces_options:
date_created = _get_date(birthplace_option["date_created"]) or datetime.min
date_deleted = _get_date(birthplace_option["date_deleted"]) or datetime.max
# print(birthdate_date, date_created, date_deleted)
if birthdate_date >= date_created and birthdate_date <= date_deleted:
return birthplace_option.copy()
return _get_birthplace_fallback(birthplaces_options, birthdate_date)
def _get_birthplace_fallback(
birthplaces_options: list[dict[str, Any]],
birthdate_date: datetime,
) -> dict[str, dict[str, Any]] | None:
# avoid wrong birthplace code error when birthdate falls in
# missing date-range in the data-source even if birthplace code is valid
if len(birthplaces_options) > 1:
for index in range(len(birthplaces_options) - 1):
birthplace_option = birthplaces_options[index]
birthplace_option_next = birthplaces_options[(index + 1)]
date_deleted = _get_date(birthplace_option["date_deleted"])
date_created = _get_date(birthplace_option_next["date_created"])
if date_deleted and date_created:
if birthdate_date >= date_deleted and date_deleted <= date_created:
# the birthdate is in between a deleted munipality and a created one
# if the deleted one has a very short active time delta,
# it means that probably the deleted_at value is wrong
date_created = _get_date(birthplace_option["date_created"])
date_deleted = _get_date(birthplace_option["date_deleted"])
if date_created and date_deleted:
date_delta = date_deleted - date_created
if date_delta <= timedelta(days=1):
return birthplace_option.copy()
return birthplace_option_next.copy()
return None
def _get_omocode(
code: str,
subs: list[int],
trans: dict[int, int],
) -> str:
code_chars = list(code[0:15])
for i in subs:
code_chars[i] = code_chars[i].translate(trans)
code = "".join(code_chars)
code_cin = encode_cin(code)
code += code_cin
return code
def _get_omocodes(code: str) -> list[str]:
code_root = _get_omocode(
code, subs=_OMOCODIA_SUBS_INDEXES, trans=_OMOCODIA_DECODE_TRANS
)
codes = [
_get_omocode(code_root, subs=subs, trans=_OMOCODIA_ENCODE_TRANS)
for subs in _OMOCODIA_SUBS_INDEXES_COMBINATIONS
]
return codes
def encode_lastname(lastname: str) -> str:
"""
Encode lastname to the code used in italian fiscal code.
:param lastname: The lastname
:type lastname: string
:returns: The code used in italian fiscal code
:rtype: string
"""
lastname_slug = slugify(lastname)
lastname_consonants = _get_consonants(lastname_slug)
lastname_vowels = _get_vowels(lastname_slug)
lastname_code = _get_consonants_and_vowels(lastname_consonants, lastname_vowels)
return lastname_code
def encode_firstname(firstname: str) -> str:
"""
Encodes firstname to the code used in italian fiscal code.
:param firstname: The firstname
:type firstname: string
:returns: The code used in italian fiscal code
:rtype: string
"""
firstname_slug = slugify(firstname)
firstname_consonants = _get_consonants(firstname_slug)
if len(firstname_consonants) > 3:
del firstname_consonants[1]
firstname_vowels = _get_vowels(firstname_slug)
firstname_code = _get_consonants_and_vowels(firstname_consonants, firstname_vowels)
return firstname_code
def encode_birthdate(
birthdate: datetime | str | None,
gender: Literal["m", "M", "f", "F"],
) -> str:
"""
Encodes birthdate to the code used in italian fiscal code.
:param birthdate: The birthdate
:type birthdate: datetime or string
:param gender: The gender, 'M' or 'F'
:type gender: string
:returns: The code used in italian fiscal code
:rtype: string
"""
if not birthdate:
raise ValueError("[codicefiscale] 'birthdate' argument cant be None")
date = _get_date(birthdate)
if not date:
raise ValueError("[codicefiscale] 'date' argument cant be None")
if not gender:
raise ValueError("[codicefiscale] 'gender' argument cant be None")
gender_code = gender.upper()
if gender_code not in ("M", "F"):
raise ValueError("[codicefiscale] 'gender' argument must be 'M' or 'F'")
year_code = str(date.year)[2:]
month_code = _MONTHS[date.month - 1]
day_code = str(date.day + (40 if gender_code == "F" else 0)).zfill(2).upper()
date_code = f"{year_code}{month_code}{day_code}"
return date_code
def encode_birthplace(
birthplace: str,
birthdate: datetime | str | None = None,
) -> str | None:
"""
Encodes birthplace to the code used in italian fiscal code.
:param birthplace: The birthplace
:type birthplace: string
:returns: The code used in italian fiscal code
:rtype: string
"""
if not birthplace:
raise ValueError("[codicefiscale] 'birthplace' argument cant be None")
birthplace_without_province = re.split(r",|\(", birthplace)[0]
birthplace_data = _get_birthplace(
birthplace,
birthdate,
) or _get_birthplace(
birthplace_without_province,
birthdate,
)
if not birthplace_data:
raise ValueError(
"[codicefiscale] 'birthplace' / 'birthdate' arguments "
f"({birthplace!r} / {birthdate!r}) not mapped to code"
)
birthplace_code = str(birthplace_data["code"])
return birthplace_code
def encode_cin(code: str) -> str:
"""
Encodes cin to the code used in italian fiscal code.
:param code: The code
:type code: string
:returns: The code used in italian fiscal code
:rtype: string
"""
if not code:
raise ValueError("[codicefiscale] 'code' argument cant be None")
code_len = len(code)
if code_len not in [15, 16]:
raise ValueError(
f"[codicefiscale] 'code' length must be 15 or 16, not: {code_len}"
)
cin_tot = 0
for i, char in enumerate(code[0:15]):
cin_tot += _CIN[char][int(bool((i + 1) % 2))]
cin_code = _CIN_REMAINDERS[cin_tot % 26]
# print(cin_code)
return cin_code
def encode(
lastname: str,
firstname: str,
gender: Literal["m", "M", "f", "F"],
birthdate: datetime | str | None,
birthplace: str,
) -> str:
"""
Encodes the italian fiscal code.
:param lastname: The lastname
:type lastname: string
:param firstname: The firstname
:type firstname: string
:param gender: The gender, 'M' or 'F'
:type gender: string
:param birthdate: The birthdate
:type birthdate: datetime or string
:param birthplace: The birthplace
:type birthplace: string
:returns: The italian fiscal code
:rtype: string
"""
lastname_code = encode_lastname(lastname)
firstname_code = encode_firstname(firstname)
birthdate_code = encode_birthdate(birthdate, gender)
birthplace_code = encode_birthplace(birthplace, birthdate)
code = f"{lastname_code}{firstname_code}{birthdate_code}{birthplace_code}"
cin_code = encode_cin(code)
code = f"{code}{cin_code}"
# raise ValueError if code is not valid
decode(code)
return code
def decode_raw(code: str) -> dict[str, str]:
"""
Decodes the raw data associated to the code.
:param code: The code
:type code: string
:returns: The raw data associated to the code.
:rtype: dict
"""
code = slugify(code)
code = code.replace("-", "")
code = code.upper()
match = CODICEFISCALE_RE.match(code)
if not match:
raise ValueError(f"[codicefiscale] invalid syntax: {code}")
data = {
"code": code,
"lastname": match["lastname"],
"firstname": match["firstname"],
"birthdate": match["birthdate"],
"birthdate_year": match["birthdate_year"],
"birthdate_month": match["birthdate_month"],
"birthdate_day": match["birthdate_day"],
"birthplace": match["birthplace"],
"cin": match["cin"],
}
return data
def decode(code: str) -> dict[str, Any]:
"""
Decodes the italian fiscal code.
:param code: The code
:type code: string
:returns: The data associated to the code and some additional info.
:rtype: dict
"""
raw = decode_raw(code)
code = raw["code"]
birthdate_year = int(raw["birthdate_year"].translate(_OMOCODIA_DECODE_TRANS))
birthdate_month = _MONTHS.index(raw["birthdate_month"]) + 1
birthdate_day = int(raw["birthdate_day"].translate(_OMOCODIA_DECODE_TRANS))
if birthdate_day > 40:
birthdate_day -= 40
gender = "F"
else:
gender = "M"
current_year = datetime.now().year
current_year_century_prefix = str(current_year)[0:-2]
birthdate_year_suffix = str(birthdate_year).zfill(2)
birthdate_year = int(f"{current_year_century_prefix}{birthdate_year_suffix}")
if birthdate_year > current_year:
birthdate_year -= 100
birthdate_str = f"{birthdate_year}/{birthdate_month}/{birthdate_day}"
birthdate = _get_date(birthdate_str, separator="/")
if not birthdate:
raise ValueError(f"[codicefiscale] invalid date: {birthdate_str}")
birthplace_code = raw["birthplace"][0] + raw["birthplace"][1:].translate(
_OMOCODIA_DECODE_TRANS
)
birthplace = _get_birthplace(birthplace_code, birthdate)
# print(birthplace)
if not birthplace:
raise ValueError(
"[codicefiscale] wrong birthplace code: "
f"{birthplace_code!r} / birthdate: {birthdate.isoformat()!r}."
)
cin = raw["cin"]
cin_check = encode_cin(code)
# print(cin, cin_check)
if cin != cin_check:
raise ValueError(
"[codicefiscale] wrong CIN (Control Internal Number): "
f"expected {cin_check!r}, found {cin!r}"
)
data = {
"code": code,
"omocodes": _get_omocodes(code),
"gender": gender,
"birthdate": birthdate,
"birthplace": birthplace,
"raw": raw,
}
# print(data)
return data
def is_omocode(code: str) -> bool:
"""
Determines whether the specified code is omocode or not.
:param code: The code
:type code: string
:returns: True if the specified code is omocode, False otherwise.
:rtype: boolean
"""
data = decode(code)
codes = data["omocodes"]
codes.pop(0)
return code in codes
def is_valid(code: str) -> bool:
"""
Determines whether the specified code is valid.
:param code: The code
:type code: string
:returns: True if the specified code is valid, False otherwise.
:rtype: boolean
"""
try:
decode(code)
return True
except ValueError:
return False