timeseriesx/base/timestamp_series.py
import collections
import copy
import datetime as dt
import numbers
import warnings
from collections.abc import Iterable
import numpy as np
import pandas as pd
from pint import Quantity
from pint_pandas import PintArray, PintType
from timeseriesx.base.base_time_series import BaseTimeSeries
from timeseriesx.mixins.frequency import FrequencyMixin
from timeseriesx.mixins.time_zone import TimeZoneMixin
from timeseriesx.mixins.unit import UnitMixin
from timeseriesx.validation.timestamp_index import (
index_is_datetime,
index_is_sorted,
)
class TimestampMismatchWarning(RuntimeWarning):
"""
warning about implicit handling of mismatching timestamps
"""
pass
class TimestampSeries(UnitMixin, TimeZoneMixin, FrequencyMixin, BaseTimeSeries):
@staticmethod
def create_null_timeseries(start, end, freq, unit=None, time_zone='infer'):
"""
create a `TimestampSeries`-object from `start` to `end` with NaN-values
:param str/datetime.datetime/pandas.Timestamp start: the start timestamp of the
series (included)
:param str/datetime.datetime/pandas.Timestamp end: the end timestamp of the
series (included)
:param str/datetime.timedelta/pandas.Offset/pandas.Timedelta freq:
the frequency of the timestamp series, `pandas offset aliases <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_
supported
:param str/pint.Unit unit: the unit of the series's values, many string
representations of common units are supported, such as `m`, `s`, `kg`
and many more
:param str/tzinfo time_zone: the name of the time zone, (see `IANA <https://www.iana.org/time-zones>`_)
or a tzinfo-object, pass `'infer'` if you want the time zone to be derived
from `start` and `end`
:return: a new TimestampSeries-object
:rtype: TimestampSeries
"""
return TimestampSeries.create_constant_timeseries(
start, end, np.NaN, freq, unit, time_zone=time_zone)
@staticmethod
def create_constant_timeseries(start, end, value, freq, unit=None,
time_zone='infer'):
"""
create a `TimestampSeries`-object from `start` to `end` with constant value
:param str/datetime.datetime/pandas.Timestamp start: the start timestamp of the
series (included)
:param str/datetime.datetime/pandas.Timestamp end: the end timestamp of the
series (included)
:param int/float value: the constant value for each element
:param str/datetime.timedelta/pandas.Offset/pandas.Timedelta freq:
the frequency of the timestamp series, `pandas offset aliases <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_
supported
:param str/pint.Unit unit: the unit of the series's values, many string
representations of common units are supported, such as `m`, `s`, `kg`
and many more
:param str/tzinfo time_zone: the name of the time zone, (see `IANA <https://www.iana.org/time-zones>`_)
or a tzinfo-object, pass `'infer'` if you want the time zone to be derived
from `start` and `end`
:return: a new TimestampSeries-object
:rtype: TimestampSeries
"""
index = pd.date_range(start, end, freq=freq)
series = pd.Series([value] * len(index), index=index)
return TimestampSeries.create_from_pd_series(series, freq=freq, unit=unit,
time_zone=time_zone)
@staticmethod
def create_from_lists(timestamps, values, freq='infer', unit=None,
time_zone='infer'):
"""
create a `TimestampSeries`-object from a list of timestamps and values matched
by their index
:param list timestamps: the timestamps of the series
:param list values: the values of the series
:param str/datetime.timedelta/pandas.Offset/pandas.Timedelta freq:
the frequency of the timestamp series, `pandas offset aliases <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_
supported, pass `'infer'` if you want the frequency to be derived by the
timestamps
:param str/pint.Unit unit: the unit of the series's values, many string
representations of common units are supported, such as `m`, `s`, `kg`
and many more
:param str/tzinfo time_zone: the name of the time zone, (see `IANA <https://www.iana.org/time-zones>`_)
or a tzinfo-object, pass `'infer'` if you want the time zone to be derived
by the timestamps
:return: a new TimestampSeries-object
:rtype: TimestampSeries
"""
if not len(timestamps) == len(values):
raise ValueError('lengths of timestamps and values do not not match')
tuples = list(zip(timestamps, values))
return TimestampSeries.create_from_tuples(tuples, freq=freq, unit=unit,
time_zone=time_zone)
@staticmethod
def create_from_tuples(tuples, freq='infer', unit=None, time_zone='infer'):
"""
create a `TimestampSeries`-object from a list of tuples of timestamps and values
:param list tuples: list of tuples holding timestamp and value
:param str/datetime.timedelta/pandas.Offset/pandas.Timedelta freq:
the frequency of the timestamp series, `pandas offset aliases <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_
supported, pass `'infer'` if you want the frequency to be derived by the
timestamps
:param str/pint.Unit unit: the unit of the series's values, many string
representations of common units are supported, such as `m`, `s`, `kg`
and many more
:param str/tzinfo time_zone: the name of the time zone, (see `IANA <https://www.iana.org/time-zones>`_)
or a tzinfo-object, pass `'infer'` if you want the time zone to be derived
by the timestamps
:return: a new TimestampSeries-object
:rtype: TimestampSeries
"""
dictionary = {k: v for k, v in tuples}
return TimestampSeries.create_from_dict(dictionary, freq=freq, unit=unit,
time_zone=time_zone)
@staticmethod
def create_from_dict(dictionary, freq='infer', unit=None, time_zone='infer'):
"""
create a `TimestampSeries`-object from a dict timestamps as keys and values as
values
:param dict dictionary: dict with timestamps as keys and timeseries-values as
dict-values
:param str/datetime.timedelta/pandas.Offset/pandas.Timedelta freq:
the frequency of the timestamp series, `pandas offset aliases <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_
supported, pass `'infer'` if you want the frequency to be derived by the
timestamps
:param str/pint.Unit unit: the unit of the series's values, many string
representations of common units are supported, such as `m`, `s`, `kg`
and many more
:param str/tzinfo time_zone: the name of the time zone, (see `IANA <https://www.iana.org/time-zones>`_)
or a tzinfo-object, pass `'infer'` if you want the time zone to be derived
by the timestamps
:return: a new TimestampSeries-object
:rtype: TimestampSeries
"""
series = pd.Series(dictionary)
return TimestampSeries.create_from_pd_series(series, freq=freq, unit=unit,
time_zone=time_zone)
@staticmethod
def create_from_pd_series(series, freq='infer', unit=None, time_zone='infer'):
"""
create a `TimestampSeries`-object from a pandas `Series` with `DatetimeIndex`
:param pandas.Series: a pandas series-object with `DatetimeIndex`
:param str/datetime.timedelta/pandas.Offset/pandas.Timedelta freq:
the frequency of the timestamp series, `pandas offset aliases <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_
supported, pass `'infer'` if you want the frequency to be derived by the
timestamps
:param str/pint.Unit unit: the unit of the series's values, many string
representations of common units are supported, such as `m`, `s`, `kg`
and many more
:param str/tzinfo time_zone: the name of the time zone, (see `IANA <https://www.iana.org/time-zones>`_)
or a tzinfo-object, pass `'infer'` if you want the time zone to be derived
from `start` and `end`
:return: a new TimestampSeries-object
:rtype: TimestampSeries
"""
return TimestampSeries(series, freq=freq, unit=unit, time_zone=time_zone)
# ------------------------------ constructor ----------------------------- #
def __init__(self, series, freq=None, unit=None, time_zone=None):
"""
:param series: a pandas series-object with `DatetimeIndex`
:param str/datetime.timedelta/pandas.Offset/pandas.Timedelta freq:
the frequency of the timestamp series or None, `pandas offset aliases <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_
supported, pass `'infer'` if you want the frequency to be derived by the
timestamps
:param str/pint.Unit unit: the unit of the series's values or None, many string
representations of common units are supported, such as `'m'`, `'s'`, `'kg'`
and many more
:param str/tzinfo time_zone: the name of the time zone or None (see `IANA <https://www.iana.org/time-zones>`_)
or a tzinfo-object, pass `'infer'` if you want the time zone to be derived
from `start` and `end`
"""
self._series = series
self._validate()
super().__init__(freq=freq, unit=unit, time_zone=time_zone)
# ------------------------------ properties ------------------------------ #
@property
def values(self):
# would like to use self._series.pint.magnitude.tolist() here, but
# it is not updated, when updating self._series, to be reported @pint_array
return list(map(lambda value: getattr(value, 'magnitude', value),
self._series.values))
@property
def timestamps(self):
return self._series.index.to_pydatetime().tolist()
@property
def first(self):
if self.empty:
raise ValueError('empty series')
return self.timestamps[0], self.values[0]
@property
def last(self):
if self.empty:
raise ValueError('empty series')
return self.timestamps[-1], self.values[-1]
@property
def start(self):
if self.empty:
raise ValueError('empty series')
else:
return self._series.index[0].to_pydatetime()
@property
def end(self):
if self.empty:
raise ValueError('empty series')
else:
return self._series.index[-1].to_pydatetime()
@property
def time_range(self):
return self.start, self.end
# ---------------------------- functionality ----------------------------- #
def map(self, func, dimensionless=True):
"""
apply a custom function to each value of the series
:param function func: a function mapping a scalar to another scalar
:param bool dimensionless: if set to True, the mapping function takes
an argument of type Number (no unit, dimensionless). The resulting
timestamp series will keep the original unit. If set to False,
the mapping function takes an argument of type pint.Quantity.
The resulting timestamp series will have the unit of the mapped
values. Mapping values of one series to different units results in
an error. Mapping with dimensionless=False will result in a loop
and therefore perform slower.
:return: the series with mapped values
:rtype: TimestampSeries
"""
if self.empty:
return self
if isinstance(self._series.dtype, PintType):
if dimensionless:
mapped_values = self._get_magnitude_series().apply(func).values
self._series = pd.Series(PintArray(mapped_values, dtype=self.unit),
index=self._series.index)
else:
mapped_values = list(map(func, self._series.values))
mapped_unit = mapped_values[0].u
if any(map(lambda x: x.u != mapped_unit, mapped_values)):
raise ValueError("the mapped values do not have the same unit")
magnitudes = [v.magnitude for v in mapped_values]
self._series = pd.Series(PintArray(magnitudes, dtype=mapped_unit),
index=self._series.index)
else:
self._series = self._series.apply(func)
return self
def round(self, decimals):
"""
round the values of the series
:param decimals: no of decimal places to round to
:return: the series with rounded values
:rtype: TimestampSeries
"""
# ToDo: feature request at pint-pandas
if isinstance(self._series.dtype, PintType):
rounded_values = self._get_magnitude_series().values.round(decimals)
self._series = pd.Series(PintArray(rounded_values, dtype=self.unit),
index=self._series.index)
else:
self._series = self._series.round(decimals)
return self
def append(self, value):
"""
append a new value to a series with frequency
:param float/int value: the value to append
:return: the series with the new appended value
:rtype: TimestampSeries
"""
if not self.freq:
raise ValueError('cannot append to series without freq')
if self.empty:
raise ValueError('cannot append to empty series, '
'use __setitem__: ts[timestamp] = value instead')
values = [value]
if self.unit:
values = PintArray(values, dtype=self.unit)
self._series = self._series.append(
pd.Series(values,
index=[self._series.index.shift(periods=1, freq=self.freq)[-1]])
)
return self
def prepend(self, value):
"""
prepend a new value to a series with frequency
:param float/int value: the value to prepend
:return: the series with the new prepended value
:rtype: TimestampSeries
"""
if not self.freq:
raise ValueError('cannot prepend to series without freq')
if self.empty:
raise ValueError('cannot prepend to empty series, '
'use __setitem__: ts[timestamp] = value instead')
values = [value]
if self.unit:
values = PintArray(values, dtype=self.unit)
self._series = (
pd.Series(values,
index=[self._series.index.shift(periods=1, freq=-self.freq)[0]])
.append(self._series)
)
return self
def join(self, other_ts, fit=True):
raise NotImplementedError()
# --------------------------------- cast --------------------------------- #
def as_tuples(self):
return list(zip(self.timestamps, self.values))
def as_dict(self, ordered=False):
dict_class = dict if not ordered else collections.OrderedDict
return self.as_pd_series().to_dict(into=dict_class)
def as_time_period_series(self, align_left=True):
raise NotImplementedError()
# ---------------------------- magic methods ---------------------------- #
def __str__(self):
return f"Time zone: {str(self._time_zone)}, " \
f"Freq: {getattr(self._freq, 'freqstr', '')}, " \
f"Unit: {str(self._unit or None)}\n" \
f"{str(self._series)}"
def __repr__(self):
return "{klass}(series=Series(PintArray({values}, dtype={unit}), " \
"index={index}), " \
"freq={freq}, unit={unit}, time_zone={tz})".format(
klass=self.__class__.__name__,
values=self.values,
index=repr(self._series.index),
tz=f"'{self.time_zone}'" if self.time_zone else None,
freq=f"'{self.freq.freqstr}'" if self.freq else None,
unit=f"'{self.unit or ''}'")
def __eq__(self, other):
if not isinstance(other, TimestampSeries):
return False
self_values = self.values
other_values = other.values
self_timestamps = self.timestamps
other_timestamps = other.timestamps
if self.unit:
self_values = list(self._series.pint.to_base_units().values)
if other.unit:
other_values = list(other._series.pint.to_base_units().values)
if self.time_zone:
tmp_self = copy.deepcopy(self)
tmp_self.convert_time_zone('UTC')
self_timestamps = tmp_self.timestamps
if other.time_zone:
tmp_other = copy.deepcopy(other)
tmp_other.convert_time_zone('UTC')
other_timestamps = tmp_other.timestamps
return self_timestamps == other_timestamps and self_values == other_values
def __getitem__(self, item):
if isinstance(item, (slice, Iterable)):
new_ts = copy.deepcopy(self)
new_ts._series = new_ts._series[item]
return new_ts
else:
if isinstance(item, dt.datetime) and item.tzinfo is None \
and self.time_zone is not None:
item = self.time_zone.localize(item)
return self._series[item]
def __setitem__(self, key, value):
raise NotImplementedError()
# ---------------------------- calculations ------------------------------ #
def _basic_calc(self, operation, other, *args, **kwargs):
if isinstance(other, TimestampSeries):
return self._basic_calc_time_series(operation, other, **kwargs)
elif isinstance(other, pd.Series):
return self._basic_calc_pd_series(operation, other, **kwargs)
elif isinstance(other, (collections.abc.Sequence, np.ndarray, PintArray)):
return self._basic_calc_collection(operation, other)
else:
return self._basic_calc_scalar(operation, other)
def _basic_calc_time_series(self, operation, other, **kwargs):
tmp_series = copy.deepcopy(self)
if self.freq != other.freq:
raise ValueError("The time series have different frequencies")
if not self.unit == other.unit:
raise ValueError("The time series have different units")
if not self._series.index.equals(other._series.index):
warnings.warn("timestamps do not match, values are auto-filled",
category=TimestampMismatchWarning)
tmp_series._series = getattr(tmp_series._series, operation)(
other._series, **kwargs)
tmp_series.convert_time_zone(self.time_zone)
return tmp_series
def _basic_calc_pd_series(self, operation, other, **kwargs):
tmp_series = copy.deepcopy(self)
if not isinstance(other.index, pd.DatetimeIndex):
raise ValueError("The series has no proper DatetimeIndex")
if not all(map(lambda x: isinstance(x, numbers.Number), other)):
raise ValueError("sequence contains non-numeric values")
if not self._series.index.equals(other.index):
warnings.warn("timestamps do not match, values are auto-filled",
category=RuntimeWarning)
tmp_series._series = getattr(tmp_series._series, operation)(other, **kwargs)
# enforce resulting TimestampSeries' time zone to be equal to initial
# TimestampSeries (self)
tmp_series.convert_time_zone(self.time_zone)
if isinstance(tmp_series._series.dtype, PintType):
tmp_series._unit = tmp_series._series.pint.u
return tmp_series
def _basic_calc_collection(self, operation, other):
tmp_series = copy.deepcopy(self)
if len(other) != len(self):
raise ValueError("sequence has different length")
if not all(map(lambda x: isinstance(x, (numbers.Number, Quantity)), other)):
raise ValueError("sequence contains non-numeric values")
tmp_series._series = getattr(tmp_series._series, operation)(other)
if isinstance(tmp_series._series.dtype, PintType):
tmp_series._unit = tmp_series._series.pint.u
return tmp_series
def _basic_calc_scalar(self, operation, other):
tmp_series = copy.deepcopy(self)
if not isinstance(other, (numbers.Number, Quantity)):
raise ValueError('value is not numeric')
tmp_series._series = getattr(tmp_series._series, operation)(other)
if isinstance(other, Quantity):
tmp_series._unit = tmp_series._series.pint.u
return tmp_series
# ---------------------------- validation ------------------------------- #
def validate_all(self):
self._validate()
super()._validate_all()
def _validate(self):
index_is_datetime(self._series)
index_is_sorted(self._series)