src/new_fave/measurements/vowel_measurement.py
"""
This module contains classes to represent vowel measurements and their
aggregations at different levels.
```{mermaid}
classDiagram
direction LR
class VowelMeasurement~list~{
.vowel_class
}
class VowelClass~list~{
.vowel_system
}
class VowelClassCollection~dict~{
.corpus
}
class SpeakerCollection~dict~
SpeakerCollection --o VowelClassCollection
VowelClassCollection --o VowelClass
VowelClass --o VowelMeasurement
```
When a class has a numpy array for an attribute, its
type is annotated using [nptyping](https://pypi.org/project/nptyping/)
to provide the expected dimensions. For example:
```
cand_param (NDArray[Shape["Param, Formant, Cand"], Float])
```
This indicates that `cand_param` is a three dimensional array.
The first dimension is `"Param"` (the number of DCT parameters)
long, the second is `"Formant"` (the number of formants) long,
and the third is `"Cand"` (the number of candidates) long.
"""
import fasttrackpy
from fasttrackpy import CandidateTracks, OneTrack
from aligned_textgrid import AlignedTextGrid, SequenceInterval
from fave_measurement_point.heuristic import Heuristic
from fave_measurement_point.formants import FormantArray
from new_fave.utils.textgrid import get_textgrid
from new_fave.speaker.speaker import Speaker
from new_fave.measurements.calcs import mahalanobis, \
mahal_log_prob,\
param_to_cov,\
cov_to_icov,\
clear_cached_properties
from new_fave.measurements.decorators import MahalWrap,\
MahalCacheWrap,\
get_wrapped,\
set_prop
from collections import defaultdict
import numpy as np
from typing import Literal, ClassVar
import polars as pl
import scipy.stats as stats
from scipy.fft import idst, idct
from joblib import Parallel, delayed, cpu_count
from collections.abc import Sequence, Iterable
from dataclasses import dataclass, field
from nptyping import NDArray, Shape, Float
from functools import lru_cache, cached_property
NCPU = cpu_count()
import warnings
def blank():
return VowelClass()
def blank_list():
return []
EMPTY_LIST = blank_list()
class PropertySetter:
"""
A mixin class to dynamically create properties
necessary for calculating log-probabilities
from properties decorated with either MahalWrap
or MahalCacheWrap.
"""
def _make_attrs(self):
for wrapper in [MahalWrap, MahalCacheWrap]:
cand_attrs = get_wrapped(VowelMeasurement, wrapper)
winner_attrs = [
x.replace("cand_", "winner_")
for x in cand_attrs
]
set_prop(self, cand_attrs, winner_attrs, wrapper, "winner_factory")
mean_attrs = [
attr + "_mean"
for attr in winner_attrs
]
set_prop(self, winner_attrs, mean_attrs, wrapper, "mean_factory")
icov_attrs = [
attr + "_icov"
for attr in winner_attrs
]
set_prop(self, winner_attrs, icov_attrs, wrapper, "icov_factory")
speaker_byvclass_attrs = [
attr+"_logprob_speaker_byvclass"
for attr in cand_attrs
]
set_prop(self, cand_attrs, speaker_byvclass_attrs, wrapper, "speaker_byvclass")
speaker_global_attrs = [
attr+"_logprob_speaker_global"
for attr in cand_attrs
]
set_prop(self, cand_attrs, speaker_global_attrs, wrapper, "speaker_global")
@dataclass
class VowelMeasurement(Sequence, PropertySetter):
""" A class used to represent a vowel measurement.
## Intended Usage
Certain properties of a `VowelMeasurement` instance
are set by its membership within a [](`~new_fave.VowelClass`)
and that [](`~new_fave.VowelClass`)'s membership
in a [](`~new_fave.VowelClassCollection`). These memberships
are best managed by passing a list of `VowelMeasurement`s to
[](`~new_fave.SpeakerCollection`).
```{.python}
vowel_measurements = [VowelMeasurement(t) for t in fasttrack_tracks]
speakers = SpeakerCollection(vowel_measurements)
```
Args:
track (fasttrackpy.CandidateTracks):
A fasttrackpy.CandidateTrracks object
heuristic (Heuristic, optional):
A point measurement Heuristic to use.
Defaults to Heuristic().
Attributes:
track (fasttrackpy.CandidateTracks):
an object of CandidateTracks class
candidates (list):
list of candidates for the track
heuristic (Heuristic, optional):
an object of Heuristic class (default is Heuristic())
vowel_class (VowelClass):
The containing VowelClass object
formant_array (FormantArray):
A FormantArray object
file_name (str):
name of the file of the track
group (str):
TierGroup of the track
id (str):
id of the track
interval (aligned_textgrid.SequenceInterval):
interval of the track
label (str):
label of the track
n_formants (int):
number of formants in the track
optimized (int):
The number of optimization iterations the
vowel measurement has been through.
winner: fasttrackpy.OneTrack
The winning formant track
winner_index (int):
The index of the winning formant track
cand_param (NDArray[Shape["Param, Formant, Cand"], Float]):
A array of the candidate DCT parameters.
cand_maxformant (NDArray[Shape["1, Cand"], Float]):
An array of the candidate maximum formants.
cand_error (NDArray[Shape["Cand"], Float]):
An array of the candidate smoothing error.
cand_error_logprob_vm (NDArray[Shape["Cand"], Float]):
Conversion of the smooth error to log probabilities. The candidate with
the lowest error = log(1), and the candidate with the largest
error = log(0).
cand_param_(mahal/logprob)_speaker_byvclass (NDArray[Shape["Cand"], Float]):
The mahalanobis distance (`mahal`) or associated log probability (`logprob`)
for each candidate relative to the VowelClass for this speaker.
These are calculated by drawing the relevant mean and covariance matrix from
`vm.vowel_class`
cand_param_(mahal/logprob)_speaker_global (NDArray[Shape["Cand"], Float]):
The mahalanobis distance (`mahal`) or associated log probability (`logprob`)
for each candidate relative to *all* vowel measurements for this speaker.
These are calculated by drawing the relevant mean and covariance matrix from
`vm.vowel_class.vowel_system`
cand_param_(mahal/logprob)_corpus_byvclass (NDArray[Shape["Cand"], Float]):
The mahalanobis distance (`mahal`) or associated log probability (`logprob`)
for each candidate relative to this vowel class across all speakers.
These are calculated by drawing the relevant mean and covariance matrix from
`vm.vowel_class.vowel_system.corpus`
point_measure (pl.DataFrame):
A polars dataframe of the point measurement for this vowel.
vm_context (pl.DataFrame):
A polars dataframe of contextual information for the vowel measurement.
"""
track: CandidateTracks
heuristic: Heuristic = field(default = Heuristic())
def __post_init__(
self
):
super().__init__()
#self.label = self.track.label
self.candidates = self.track.candidates
self.n_formants = self.track.n_formants
self._winner = self.track.winner
self.interval = self.track.interval
self.group = self.track.group
self.id = self.track.id
self.file_name = self.track.file_name
self._label = None
self._expanded_formants = None
self._optimized = 0
self._make_attrs()
def __getitem__(self,i):
return self.candidates[i]
def __len__(self):
return len(self.candidates)
def __repr__(self):
out = (
"VowelMeasurement: {"
f"label: {self.label}, "
f"samples: {self.winner.formants.shape[1]}, "
f"optimized: {self.optimized}"
"}"
)
return out
@property
def label(self) -> str:
if (not self._label) or (self._label != self.interval.label):
for cand in self.candidates:
cand.label = self.interval.label
self.track.label = self.interval.label
self._label = self.interval.label
return self.interval.label
@label.setter
def label(self, x:str):
self.interval.label = x
@property
def formant_array(self) -> FormantArray:
return FormantArray(
self.winner.smoothed_formants,
self.winner.time_domain,
offset = self.track.window_length
)
@property
def vowel_class(self):
if self._vclass:
return self._vclass
@vowel_class.setter
def vowel_class(self, vclass: 'VowelClass'):
self._vclass = vclass
@property
def winner(self)->OneTrack:
return self._winner
@winner.setter
def winner(self, idx):
self._winner = self.candidates[idx]
self._reset_winners()
self.vowel_class.vowel_system._reset_winners()
self.vowel_class._reset_winners()
self._expanded_formants = None
self._optimized += 1
def _reset_winners(self):
clear_cached_properties(self)
@property
def optimized(self)->int:
return self._optimized
@property
def winner_index(self)->int:
return self.candidates.index(self.winner)
@property
def expanded_formants(
self
)->NDArray[Shape['N, Formant, Cand'], Float]:
if self._expanded_formants is not None:
return self._expanded_formants
self._expanded_formants = np.apply_along_axis(
lambda x: idct(x.T, n = 20, orthogonalize=True, norm = "forward"),
0,
self.cand_param
)
return self._expanded_formants
@property
@MahalCacheWrap
def cand_param(
self
) -> NDArray[Shape["Param, Formant, Cand"], Float]:
params = np.array(
[
x.parameters
for x in self.candidates
]
).T
return params
@property
@MahalCacheWrap
def cand_bparam(
self
) -> NDArray[Shape["Param, Formant, Cand"], Float]:
params = np.array([
x.bandwidth_parameters
for x in self.candidates
]).T
return params
@property
@MahalCacheWrap
def cand_maxformant(
self
) -> NDArray[Shape["1, 1, Cand"], Float]:
mf = np.array([[
c.maximum_formant
for c in self.candidates
]])
#mf = mf.reshape((1, np.newaxis, mf.shape[-1]))
return mf
@property
def cand_error(
self
) -> NDArray[Shape["Cand"], Float]:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
return np.array([
c.smooth_error
for c in self.candidates
])
@property
def cand_error_logprob_vm(
self
) -> NDArray[Shape["Cand"], Float]:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
err_norm = self.cand_error - np.nanmin(self.cand_error)
err_surv = 1 - (err_norm/np.nanmax(err_norm))
err_log_prob = np.log(err_surv)
return err_log_prob
@property
def point_measure(
self
) -> pl.DataFrame:
winner_slice = self.heuristic.apply_heuristic(
self.label,
formants=self.formant_array
)
point_dict = {
f"F{i+1}": winner_slice.formants[i]
for i in range(winner_slice.formants.size)
}
point_dict["max_formant"] = self.winner.maximum_formant
point_dict["smooth_error"] = self.winner.smooth_error
point_dict["time"] = winner_slice.time
point_dict["rel_time"] = winner_slice.rel_time
point_dict["prop_time"] = winner_slice.prop_time
point_dict["id"] = self.winner.id
point_dict["label"] = self.winner.label
point_dict["file_name"] = self.winner.file_name
point_dict["group"] = self.winner.group
return pl.DataFrame(point_dict)
@cached_property
def vm_context(
self
) -> pl.DataFrame:
optimized = self.optimized
id = self.winner.id
word = self.winner.interval.within.label
dur = self.winner.interval.end - self.winner.interval.start
pre_word = self.winner.interval.within.prev.label
fol_word = self.winner.interval.within.fol.label
pre_seg = self.winner.interval.prev.label
fol_seg = self.winner.interval.fol.label
abs_pre_seg = self.winner.interval.get_tierwise(-1).label
abs_fol_seg = self.winner.interval.get_tierwise(1).label
stress = ""
if hasattr(self.track.interval, "stress"):
stress = self.track.interval.stress
context = "internal"
if pre_seg == "#" and fol_seg != "#":
context = "initial"
if pre_seg != "#" and fol_seg == "#":
context = "final"
if pre_seg == "#" and fol_seg == "#":
context = "coextensive"
df = pl.DataFrame({
"optimized": optimized,
"id": id,
"word": word,
"stress": stress,
"dur": dur,
"pre_word": pre_word,
"fol_word": fol_word,
"pre_seg": pre_seg,
"fol_seg": fol_seg,
"abs_pre_seg": abs_pre_seg,
"abs_fol_seg": abs_fol_seg,
"context": context
})
return df
def to_tracks_df(self) -> pl.DataFrame:
"""Return a DataFrame of the formant tracks
Returns:
(pl.DataFrame):
A dataframe with formant track data.
"""
df = self.winner.to_df()
df = df.with_columns(
speaker_num = (
pl.col("id")
.str.extract("^(\d+)-")
.str.to_integer() + 1
)
)
df = df.join(self.vm_context, on = "id")
return df
def to_param_df(
self,
output:Literal["param", "log_param"] = "log_param"
) -> pl.DataFrame:
"""Return DataFrame of formant DCT parameters.
Returns:
(pl.DataFrame):
A DataFrame of formant DCT parameters
"""
df = self.winner.to_df(output=output)
df = df.with_columns(
max_formant = self.winner.maximum_formant,
speaker_num = (
pl.col("id")
.str.extract("^(\d+)-")
.str.to_integer() + 1
)
)
df = df.join(self.vm_context, on = "id")
return df
def to_point_df(self) -> pl.DataFrame:
"""Return a DataFrame of point measurements
Returns:
(pl.DataFrame):
A DataFrame of vowel point measures.
"""
df = self.point_measure
df = df.with_columns(
speaker_num = (
pl.col("id")
.str.extract("^(\d+)-")
.str.to_integer() + 1
),
point_heuristic = pl.lit(self.heuristic.heuristic)
)
df = df.join(self.vm_context, on = "id")
return(df)
@dataclass
class VowelClass(Sequence, PropertySetter):
"""A class used to represent a vowel class.
## Intended Usage
`VowelClass` subclasses [](`collections.abc.Sequence`), so
it is indexable. While it can be created on its own, it is
best to leave this up to either [](`~new_fave.VowelClassCollection`)
or [](`~new_fave.SpeakerCollection`).
```{.python}
vowel_measurements = [VowelMeasurement(t) for t in fasttrack_tracks]
vowel_class = VowelClass("ay", vowel_measurements)
```
Args:
label (str):
The vowel class label
tracks (list[VowelMeasurement]):
A list of VowelMeasurements
Attributes:
label (str):
label of the vowel class
tracks (list):
A list of VowelMeasurements
vowel_system (VowelClassCollection):
The containing vowel system
winners (list[OneTrack]):
A list of winner [](`~fasttrackpy.OneTrack`)s from the vowel class
winner_param (NDArray[Shape["Param, Formant, N"], Float]):
An np.array of winner DCT parameters from the vowel class
winner_param_mean (NDArray[Shape["ParamFormant, 1"], Float]):
Mean of winner DCT parameters
winner_param_cov (NDArray[Shape["ParamFormant, ParamFormant"], Float]):
Covariance of winner DCT parameters
winner_param_icov (NDArray[Shape["ParamFormant, ParamFormant"], Float]):
Inverse covariance of winner DCT parameters
"""
label: str = field(default="")
vowel_measurements: list[VowelMeasurement] = field(default_factory= lambda : [])
containing_class: ClassVar[type] = VowelMeasurement
scope: ClassVar[str] = "speaker_byvclass"
def __post_init__(self):
super().__init__()
self._make_attrs()
self._winners = [x.winner for x in self.vowel_measurements]
for t in self.vowel_measurements:
t.vowel_class = self
def __getitem__(self, i):
return self.vowel_measurements[i]
def __len__(self):
return len(self.vowel_measurements)
def __repr__(self):
out = (
"VowelClass: {"
f"label: {self.label}, "
f"len: {len(self)}"
"}"
)
return out
def _reset_winners(self):
clear_cached_properties(self)
@property
def vowel_system(self):
return self._vowel_system
@vowel_system.setter
def vowel_system(self, vowel_system: 'VowelClassCollection'):
self._vowel_system = vowel_system
@cached_property
def winners(self):
return [x.winner for x in self.vowel_measurements]
def to_param_df(
self,
output:Literal["param", "log_param"] = "log_param"
) -> pl.DataFrame:
"""Return DataFrame of formant DCT parameters.
Returns:
(pl.DataFrame):
A DataFrame of formant DCT parameters
"""
df = pl.concat(
[x.to_param_df(output=output) for x in self.vowel_measurements]
)
return df
def to_tracks_df(
self
) -> pl.DataFrame:
"""Return DataFrame of formanttracks.
Returns:
(pl.DataFrame):
A DataFrame of formant tracks
"""
df = pl.concat(
[x.to_tracks_df() for x in self.vowel_measurements]
)
return df
def to_point_df(self) -> pl.DataFrame:
"""Return a DataFrame of point measurements
Returns:
(pl.DataFrame):
A DataFrame of vowel point measures.
"""
df = pl.concat(
[x.to_point_df() for x in self.vowel_measurements]
)
return df
class VowelClassCollection(defaultdict, PropertySetter):
"""
A class for an entire vowel system.
## Intended Usage
It is a subclass of `defaultdict`, so it can be
keyed by vowel class label.
```{.python}
vowel_measurements = [VowelMeasurement(t) for t in fasttrack_tracks]
vowel_system = VowelClassCollection(vowel_measurements)
```
Args:
track_list (list[VowelMeasurement]):
A list of `VowelMeasurement`s.
Attributes:
winners (list[OneTrack]):
All winner tracks from the entire vowel system.
vowel_measurements (list[VowelMeasurement]):
All `VowelMeasurement` objects within this vowel system
textgrid (AlignedTextGrid):
The `AlignedTextGrid` associated with this vowel system.
winner_expanded_formants (NDArray[Shape["20, FormantN"], Float]):
A cached property that returns the expanded formants for the winners.
winner_param (NDArray[Shape["Param, Formant, N"], Float]):
An array of all parameters from all winners across the
vowel system.
winner_maxformant (NDArray[Shape["1, N"], Float]):
An array of the maximum formants of all winners across
the vowel system
winner_param_mean (NDArray[Shape["1, FormantParam"], Float]):
The mean of all DCT parameters across all formants for the winners
in this vowel system.
winner_param_cov (NDArray[Shape["FormantParam, FormantParam"], Float]):
The covariance of all parameters across all formants for the winners
in this vowel system
winner_param_icov (NDArray[Shape["FormantParam, FormantParam"], Float]):
The inverse of `winner_param_cov`.
winner_maxformant_mean (float):
The mean maximum formant across all winners in this vowel system.
winner_maxformant_cov (NDArray[Shape["1, 1"], Float]):
The covariance of the maximum formant across all winners
in this vowel system.
winner_maxformant_icov (NDArray[Shape["1, 1"], Float]):
The inverse of `winner_maxformant_cov`
"""
containing_class = VowelClass
scope = "speaker_global"
def __init__(self, track_list:list[VowelMeasurement] = EMPTY_LIST):
super().__init__(blank)
self.track_list = track_list
self.tracks_dict = defaultdict(blank_list)
if isinstance(self.track_list, Iterable):
self._make_tracks_dict()
self._dictify()
self._vowel_system()
self._file_name = None
self._corpus = None
self._make_attrs()
def __setitem__(self, __key, __value) -> None:
super().__setitem__(__key, __value)
def _reset_winners(self):
clear_cached_properties(self)
def _make_tracks_dict(self):
for v in self.track_list:
self.tracks_dict[v.label].append(v)
def _dictify(self):
for v in self.tracks_dict:
self[v] = VowelClass(
v,
self.tracks_dict[v]
)
def _vowel_system(self):
for v in self.tracks_dict:
self[v].vowel_system = self
def _reset_winners(self):
clear_cached_properties(self)
@property
def corpus(self):
return self._corpus
@corpus.setter
def corpus(self, corp):
self._corpus = corp
@cached_property
def winners(
self
) -> list[OneTrack]:
return [
x
for vc in self.values()
for x in vc.winners
]
@property
def vowel_measurements(
self
) -> list[VowelMeasurement]:
return [
x
for vc in self.values()
for x in vc.vowel_measurements
]
@cached_property
def textgrid(
self
) -> AlignedTextGrid:
return get_textgrid(self.vowel_measurements[0].interval)
@property
def file_name(
self
) -> str:
if self._file_name:
return self._file_name
self._file_name = self.vowel_measurements[0].winner.file_name
return self._file_name
@cached_property
def winner_expanded_formants(
self
) -> NDArray[Shape["20, FormantN"], Float]:
formants = np.hstack(
[
x.expanded_formants[:, :, x.winner_index]
for x in self.vowel_measurements
]
)
return formants
def to_tracks_df(self)->pl.DataFrame:
"""Return a DataFrame of the formant tracks
Returns:
(pl.DataFrame):
A dataframe with formant track data.
"""
df = pl.concat(
[x.to_tracks_df() for x in self.values()]
)
return df
def to_param_df(
self,
output:Literal["param", "log_param"] = "log_param"
) -> pl.DataFrame:
"""Return DataFrame of formant DCT parameters.
Returns:
(pl.DataFrame):
A DataFrame of formant DCT parameters
"""
df = pl.concat(
[x.to_param_df(output = output) for x in self.values()]
)
return df
def to_point_df(self) -> pl.DataFrame:
"""Return a DataFrame of point measurements
Returns:
(pl.DataFrame):
A DataFrame of vowel point measures.
"""
df = pl.concat(
[x.to_point_df() for x in self.values()]
)
return df
class SpeakerCollection(defaultdict, PropertySetter):
"""
A class to represent the vowel system of all
speakers in a TextGrid.
## Intended usage
It is a subclass of `defaultdict`,
and can be keyed by the `(file_name, group_name)` tuple.
```{.python}
vowel_measurements = [VowelMeasurement(t) for t in fasttrack_tracks]
speakers = SpeakerCollection(vowel_measurements)
```
Args:
track_list (list[VowelMeasurement]):
A list of `VowelMeasurement`s.
"""
__hash__ = object.__hash__
containing_class = VowelClassCollection
def __init__(self, track_list:list[VowelMeasurement] = []):
self.track_list = track_list
self.speakers_dict = defaultdict(blank_list)
self._make_tracks_dict()
self._dictify()
self._speaker = None
self._associate_corpus()
self._make_attrs()
def __setitem__(self, __key, __value) -> None:
super().__setitem__(__key, __value)
self._associate_corpus()
def _make_tracks_dict(self):
for v in self.track_list:
file_speaker = (v.file_name, v.group)
self.speakers_dict[file_speaker].append(v)
def _dictify(self):
for fs in self.speakers_dict:
self[fs] = VowelClassCollection(
self.speakers_dict[fs]
)
def _associate_corpus(self):
for speaker in self.values():
speaker.corpus = self
def _reset_winners(self):
clear_cached_properties(self)
@cached_property
def vowel_dict(
self
) -> defaultdict[str, list[VowelMeasurement]]:
out = defaultdict(blank_list)
for speaker in self.values():
for vowel in speaker:
out[vowel] += speaker[vowel]
return out
@cached_property
def vowel_winners(
self
) -> defaultdict[str, list[OneTrack]]:
out = defaultdict(blank_list)
for vowel in self.vowel_dict:
out[vowel] += [x.winner for x in self.vowel_dict[vowel]]
return out
@cached_property
def winner_param(
self
) -> defaultdict[str, NDArray[Shape["Param, Formant, N"], Float]]:
out = defaultdict(blank_list)
for vowel in self.vowel_winners:
params = np.array(
[
x.parameters
for x in self.vowel_winners[vowel]
]
).T
out[vowel] = params
return out
@cached_property
def winner_param_mean(
self
) -> defaultdict[str, NDArray[Shape["FormantParam, 1"], Float]]:
out = defaultdict(lambda: np.array([]))
for vowel in self.winner_param:
N = len(self.vowel_dict[vowel])
winner_mean = self.winner_param[vowel].reshape(-1, N).mean(axis = 1)
winner_mean = winner_mean[:, np.newaxis]
out[vowel] = winner_mean
return out
@property
def winner_param_cov(
self
)->defaultdict[str, NDArray[Shape["FormantParam, FormantParam"], Float]]:
out = defaultdict(lambda: np.array([]))
for vowel in self.winner_param:
param_cov = param_to_cov(self.winner_param[vowel])
out[vowel] = param_cov
return out
@cached_property
def winner_param_icov(
self
)->defaultdict[str, NDArray[Shape["FormantParam, FormantParam"], Float]]:
out = defaultdict(lambda: np.array([]))
for vowel in self.winner_param_cov:
params_icov = cov_to_icov(self.winner_param_cov[vowel])
out[vowel] = params_icov
return out
@property
def speaker(self):
return self._speaker
@speaker.setter
def speaker(self, speaker:Speaker):
self._speaker = speaker
def to_tracks_df(self)->pl.DataFrame:
"""
This will return a data frame of formant
tracks for all speakers.
Returns:
(pl.DataFrame): A dataframe of formant tracks for all speakers.
"""
df = pl.concat(
[x.to_tracks_df() for x in self.values()]
)
joinable = False
if self.speaker:
joinable = all([
x in self.speaker.df.columns
for x in ["file_name", "speaker_num"]
])
if joinable:
df = df.join(
self.speaker.df,
on = ["file_name", "speaker_num"],
how = "left"
)
return df
def to_param_df(
self,
output:Literal['param', 'log_param'] = "log_param"
) -> pl.DataFrame:
"""
This will return a dataframe of the DCT parameters for all speakers.
If `output` is passed `param`, it will be the DCT parameters in the
original Hz. If passed `log_param`, it will be the DCT parameters
over log(Hz).
Args:
output (Literal['param', 'log_param'], optional):
Which set of DCT parameters to return. Defaults to "log_param".
Returns:
(pl.DataFrame): A DataFrame of DCT parameters for all speakers.
"""
df = pl.concat(
[x.to_param_df(output = output) for x in self.values()]
)
joinable = False
if self.speaker:
joinable = all([
x in self.speaker.df.columns
for x in ["file_name", "speaker_num"]
])
if joinable:
df = df.join(
self.speaker.df,
on = ["file_name", "speaker_num"],
how = "left"
)
return df
def to_point_df(self) -> pl.DataFrame:
"""
This will return a DataFrame of point measurements
for all speakers
Returns:
(pl.DataFrame): A DataFrame of vowel point measurements.
"""
df = pl.concat(
[x.to_point_df() for x in self.values()]
)
joinable = False
if self.speaker:
joinable = all([
x in self.speaker.df.columns
for x in ["file_name", "speaker_num"]
])
if joinable:
df = df.join(
self.speaker.df,
on = ["file_name", "speaker_num"],
how = "left"
)
return df