Feature Extraction Benchmarks¶
This walkthrough serves as a benchmark for comparing functime
with tsfresh
feature extraction functions. We begin the analysis by evaluating the speed of feature extraction across time series of three different sizes: 100K, 1M, and 9M. Next, we assess the speed in a groupby and aggregation context, making a performance comparison between functime with polars and tsfresh using pandas.
%%capture
%pip install perfplot
%pip install pandas
%pip install tsfresh
%pip install functime
from typing import Callable
import pandas as pd
import perfplot
import polars as pl
from tsfresh.feature_extraction import feature_calculators as tsfresh
from functime import feature_extractors as fe
pl.Config.set_tbl_rows(100)
pl.Config.set_fmt_str_lengths(60)
pl.Config.set_tbl_hide_column_data_types(True)
1. Setup for the comparison¶
We are using the M4 dataset. We create a pd.DataFrame
and pl.DataFrame
and we define a list of dictionnary with the following structure:
(
<functime_function>
,
<tsfresh_function>
,
<functime_parameters>
,
<tsfresh_parameters>
)
_M4_DATASET = "../../data/m4_1d_train.parquet/*/*"
DF_PANDAS = (
pd.melt(pd.read_parquet(_M4_DATASET))
.drop(columns=["variable"])
.dropna()
.reset_index(drop=True)
)
DF_PL_EAGER = (
pl.read_parquet(_M4_DATASET).drop("V1").melt().drop("variable").drop_nulls()
)
FUNC_PARAMS_BENCH = [
(fe.absolute_energy, tsfresh.abs_energy, {}, {}),
(fe.absolute_maximum, tsfresh.absolute_maximum, {}, {}),
(fe.absolute_sum_of_changes, tsfresh.absolute_sum_of_changes, {}, {}),
(
fe.lempel_ziv_complexity,
tsfresh.lempel_ziv_complexity,
{"threshold": (pl.col("value").max() - pl.col("value").min()) / 2},
{"bins": 2},
),
(
fe.approximate_entropy,
tsfresh.approximate_entropy,
{"run_length": 2, "filtering_level": 0.5},
{"m": 2, "r": 0.5},
),
# (fe.augmented_dickey_fuller, tsfresh.augmented_dickey_fuller, "param")
(fe.autocorrelation, tsfresh.autocorrelation, {"n_lags": 4}, {"lag": 4}),
(
fe.autoregressive_coefficients,
tsfresh.ar_coefficient,
{"n_lags": 4},
{"param": [{"coeff": i, "k": 4}] for i in range(5)},
),
(fe.benford_correlation, tsfresh.benford_correlation, {}, {}),
(fe.binned_entropy, tsfresh.binned_entropy, {"bin_count": 10}, {"max_bins": 10}),
(fe.c3, tsfresh.c3, {"n_lags": 10}, {"lag": 10}),
(
fe.change_quantiles,
tsfresh.change_quantiles,
{"q_low": 0.1, "q_high": 0.9, "is_abs": True},
{"ql": 0.1, "qh": 0.9, "isabs": True, "f_agg": "mean"},
),
(fe.cid_ce, tsfresh.cid_ce, {"normalize": True}, {"normalize": True}),
(fe.count_above, tsfresh.count_above, {"threshold": 0.0}, {"t": 0.0}),
(fe.count_above_mean, tsfresh.count_above_mean, {}, {}),
(fe.count_below, tsfresh.count_below, {"threshold": 0.0}, {"t": 0.0}),
(fe.count_below_mean, tsfresh.count_below_mean, {}, {}),
# (fe.cwt_coefficients, tsfresh.cwt_coefficients, {"widths": (1, 2, 3), "n_coefficients": 2},{"param": {"widths": (1, 2, 3), "coeff": 2, "w": 1}}),
(
fe.energy_ratios,
tsfresh.energy_ratio_by_chunks,
{"n_chunks": 6},
{"param": [{"num_segments": 6, "segment_focus": i} for i in range(6)]},
),
(fe.first_location_of_maximum, tsfresh.first_location_of_maximum, {}, {}),
(fe.first_location_of_minimum, tsfresh.first_location_of_minimum, {}, {}),
# (fe.fourier_entropy, tsfresh.fourier_entropy, {"n_bins": 10}, {"bins": 10}),
# (fe.friedrich_coefficients, tsfresh.friedrich_coefficients, {"polynomial_order": 3, "n_quantiles": 30}, {"params": [{"m": 3, "r": 30}]}),
(fe.has_duplicate, tsfresh.has_duplicate, {}, {}),
(fe.has_duplicate_max, tsfresh.has_duplicate_max, {}, {}),
(fe.has_duplicate_min, tsfresh.has_duplicate_min, {}, {}),
(
fe.index_mass_quantile,
tsfresh.index_mass_quantile,
{"q": 0.5},
{"param": [{"q": 0.5}]},
),
(
fe.large_standard_deviation,
tsfresh.large_standard_deviation,
{"ratio": 0.25},
{"r": 0.25},
),
(fe.last_location_of_maximum, tsfresh.last_location_of_maximum, {}, {}),
(fe.last_location_of_minimum, tsfresh.last_location_of_minimum, {}, {}),
# (fe.lempel_ziv_complexity, tsfresh.lempel_ziv_complexity, {"n_bins": 5}, {"bins": 5}),
(
fe.linear_trend,
tsfresh.linear_trend,
{},
{
"param": [
{"attr": "pvalue"},
{"attr": "rvalue"},
{"attr": "intercept"},
{"attr": "slope"},
{"attr": "stderr"},
]
},
),
(fe.longest_streak_above_mean, tsfresh.longest_strike_above_mean, {}, {}),
(fe.longest_streak_below_mean, tsfresh.longest_strike_below_mean, {}, {}),
(fe.mean_abs_change, tsfresh.mean_abs_change, {}, {}),
(fe.mean_change, tsfresh.mean_change, {}, {}),
(
fe.mean_n_absolute_max,
tsfresh.mean_n_absolute_max,
{"n_maxima": 20},
{"number_of_maxima": 20},
),
(
fe.mean_second_derivative_central,
tsfresh.mean_second_derivative_central,
{},
{},
),
(
fe.number_crossings,
tsfresh.number_crossing_m,
{"crossing_value": 0.0},
{"m": 0.0},
),
(fe.number_cwt_peaks, tsfresh.number_cwt_peaks, {"max_width": 5}, {"n": 5}),
(fe.number_peaks, tsfresh.number_peaks, {"support": 5}, {"n": 5}),
# (fe.partial_autocorrelation, tsfresh.partial_autocorrelation, "param"),
(
fe.percent_reoccurring_values,
tsfresh.percentage_of_reoccurring_values_to_all_values,
{},
{},
),
(
fe.percent_reoccurring_points,
tsfresh.percentage_of_reoccurring_datapoints_to_all_datapoints,
{},
{},
),
(
fe.permutation_entropy,
tsfresh.permutation_entropy,
{"tau": 1, "n_dims": 3},
{"tau": 1, "dimension": 3},
),
(
fe.range_count,
tsfresh.range_count,
{"lower": 0, "upper": 9, "closed": "none"},
{"min": 0, "max": 9},
),
(fe.ratio_beyond_r_sigma, tsfresh.ratio_beyond_r_sigma, {"ratio": 2}, {"r": 2}),
(
fe.ratio_n_unique_to_length,
tsfresh.ratio_value_number_to_time_series_length,
{},
{},
),
(fe.root_mean_square, tsfresh.root_mean_square, {}, {}),
(fe.sample_entropy, tsfresh.sample_entropy, {}, {}),
(
fe.spkt_welch_density,
tsfresh.spkt_welch_density,
{"n_coeffs": 10},
{"param": [{"coeff": i} for i in range(10)]},
),
(fe.sum_reoccurring_points, tsfresh.sum_of_reoccurring_data_points, {}, {}),
(fe.sum_reoccurring_values, tsfresh.sum_of_reoccurring_values, {}, {}),
(
fe.symmetry_looking,
tsfresh.symmetry_looking,
{"ratio": 0.25},
{"param": [{"r": 0.25}]},
),
(
fe.time_reversal_asymmetry_statistic,
tsfresh.time_reversal_asymmetry_statistic,
{"n_lags": 3},
{"lag": 3},
),
(fe.variation_coefficient, tsfresh.variation_coefficient, {}, {}),
(fe.var_gt_std, tsfresh.variance_larger_than_standard_deviation, {}, {}),
]
2 Benchmark core functions¶
Benchmark core function for time series' length of 100_000, 1_000_000 and 9_000_000. (Except 10_000 for approximate_entropy
and 10_000/100_000 for number_cwt_peaks
and sample_entropy
). all_benchmarks()
iterates through the elements in the FUNC_PARAMS_BENCH
list and invoke benchmark()
for each function.
def benchmark(
f_feat: Callable, ts_feat: Callable, f_params: dict, ts_params: dict, is_expr: bool
):
if f_feat.__name__ == "approximate_entropy":
n_range = [10_000]
elif f_feat.__name__ in (
"number_cwt_peaks",
"sample_entropy",
"lempel_ziv_complexity",
):
n_range = [10_000, 100_000]
else:
n_range = [10_000, 100_000, 1_000_000, 9_000_000]
benchmark = perfplot.bench(
setup=lambda n: (DF_PL_EAGER.head(n), DF_PANDAS.head(n)),
kernels=[
lambda x, _y: f_feat(x["value"], **f_params)
if not is_expr
else x.select(f_feat(pl.col("value"), **f_params)),
lambda _x, y: ts_feat(y["value"], **ts_params),
],
n_range=n_range,
equality_check=False,
labels=["functime", "tsfresh"],
)
return benchmark
def all_benchmarks(params: list[tuple], is_expr: bool) -> list:
bench_df = pl.DataFrame(
schema={
"Feature name": pl.Utf8,
"n": pl.Int64,
"functime (ms)": pl.Float64,
"tfresh (ms)": pl.Float64,
"diff (ms)": pl.Float64,
"diff %": pl.Float64,
"speedup": pl.Float64,
}
)
for x in params:
try:
f_feat = x[0]
print(f"Feature: {f_feat.__name__}")
bench = benchmark(
f_feat=f_feat,
ts_feat=x[1],
f_params=x[2],
ts_params=x[3],
is_expr=is_expr,
)
bench_df = pl.concat(
[
pl.DataFrame(
{
"Feature name": [x[0].__name__] * len(bench.n_range),
"n": bench.n_range,
"functime (ms)": bench.timings_s[0] * 1_000,
"tfresh (ms)": bench.timings_s[1] * 1_000,
"diff (ms)": (bench.timings_s[0] - bench.timings_s[1])
* 1_000,
"diff %": 100
* (bench.timings_s[0] - bench.timings_s[1])
/ bench.timings_s[1],
"speedup": bench.timings_s[1] / bench.timings_s[0],
}
),
bench_df,
]
)
except ValueError:
print(f"Failed to compute feature {x[0].__name__}")
except ImportError:
print(f"Failed to import feature {x[0].__name__}")
except TypeError:
print(f"Feature {x[0].__name__} not implemented for pl.Expr")
return bench_df
3. Run benchmarks¶
# Code to prettify benchmark results
def table_prettifier(df: pl.DataFrame, n: int):
table = (
df.filter(pl.col("n") == n)
.drop("n")
.sort("speedup", descending=True)
.with_columns(
pl.when(pl.exclude("Feature name").abs() < 0.1)
.then(pl.exclude("Feature name").round(4))
.when(pl.exclude("Feature name").abs() < 1)
.then(pl.exclude("Feature name").round(2))
.when(pl.exclude("Feature name").abs() < 30)
.then(pl.exclude("Feature name").round(1))
.otherwise(pl.exclude("Feature name").round(1))
)
.with_columns(speedup="x " + pl.col("speedup").cast(pl.Utf8))
)
return table
%%capture
bench_expr = all_benchmarks(params=FUNC_PARAMS_BENCH, is_expr=True)
bench_series = all_benchmarks(params=FUNC_PARAMS_BENCH, is_expr=False)
# Lazy benchmarks
df_expr_10k = table_prettifier(bench_expr, n=10_000)
df_expr_100k = table_prettifier(bench_expr, n=100_000)
df_expr_1m = table_prettifier(bench_expr, n=1_000_000)
df_expr_9m = table_prettifier(bench_expr, n=9_000_000)
# Eager benchmarks
df_series_10k = table_prettifier(bench_series, n=10_000)
df_series_100k = table_prettifier(bench_series, n=100_000)
df_series_1m = table_prettifier(bench_series, n=1_000_000)
df_series_9m = table_prettifier(bench_series, n=9_000_000)
4. Benchmark results¶
Display 8 tables:
- For
pl.Series
: 10k, 100k, 1M and 9M rows - For
pl.Expr
: 10k, 100k, 1M and 9M rows
Each table contains the execution time (ms) for tsfresh and functime, the difference, the difference in % and the speedup:
4.1 Results for pl.Expr
¶
10k expr¶
df_expr_10k
100k expr¶
df_expr_100k
1M expr¶
df_expr_1m
9M expr¶
df_expr_9m
4.2 Results for pl.Series
¶
10k series¶
df_series_10k
100k series¶
df_series_100k
1M series¶
df_series_1m
9M series¶
df_series_9m
5. Benchmark Group by / Aggregation
context¶
Benchmark combining functime's feature extraction and polars' Group by / Aggregation
context.
_SP500_DATASET = "../../data/sp500.parquet"
SP500_PANDAS = pd.read_parquet(_SP500_DATASET)
SP500_PL_EAGER = pl.read_parquet(_SP500_DATASET)
SP500_PANDAS
We want to compare tsfresh
using pandas' groupby
with functime
using polars' groupby
such as:
%%timeit
SP500_PANDAS.groupby(by="ticker")["price"].agg(tsfresh.number_peaks, n=5)
%%timeit
SP500_PL_EAGER.group_by(pl.col("ticker")).agg(
pl.col("price").ts.number_peaks(support=5)
)
If we examine the previous benchmark, we can see that the number_peaks
operation is approximately 2.5 times faster when using functime
compared to tsfresh
.
In the groupby
context, it's 10 times faster!
def benchmark_groupby_context(
f_feat: Callable, ts_feat: Callable, f_params: dict, ts_params: dict
):
if f_feat.__name__ == "lempel_ziv_complexity":
f_params = {"threshold": (pl.col("price").max() - pl.col("price").min()) / 2}
benchmark = perfplot.bench(
setup=lambda _n: (SP500_PL_EAGER, SP500_PANDAS),
kernels=[
lambda x, _y: x.group_by(pl.col("ticker")).agg(
f_feat(pl.col("price"), **f_params)
), # functime + polars groupby
lambda _x, y: y.groupby("ticker")["price"].agg(
ts_feat, **ts_params
), # tsfresh + pandas groupby
],
n_range=[1],
equality_check=False,
labels=["functime", "tsfresh"],
)
return benchmark
def all_benchmarks_groupby(params: list[tuple]) -> list:
bench_df = pl.DataFrame(
schema={
"Feature name": pl.Utf8,
"n": pl.Int64,
"functime + pl groupby (ms)": pl.Float64,
"tfresh + pd groupby (ms)": pl.Float64,
"diff (ms)": pl.Float64,
"diff %": pl.Float64,
"speedup": pl.Float64,
}
)
for x in params:
try:
print(f"Feature: {x[0].__name__}")
bench = benchmark_groupby_context(
f_feat=x[0], ts_feat=x[1], f_params=x[2], ts_params=x[3]
)
bench_df = pl.concat(
[
pl.DataFrame(
{
"Feature name": [x[0].__name__] * len(bench.n_range),
"n": bench.n_range,
"functime + pl groupby (ms)": bench.timings_s[0] * 1_000,
"tfresh + pd groupby (ms)": bench.timings_s[1] * 1_000,
"diff (ms)": (bench.timings_s[0] - bench.timings_s[1])
* 1_000,
"diff %": 100
* (bench.timings_s[0] - bench.timings_s[1])
/ bench.timings_s[1],
"speedup": bench.timings_s[1] / bench.timings_s[0],
}
),
bench_df,
]
)
except ValueError:
print(f"Failed to compute feature {x[0].__name__}")
except ImportError:
print(f"Failed to import feature {x[0].__name__}")
except TypeError:
print(f"Feature {x[0].__name__} not implemented for pl.Expr")
return bench_df
%%capture
bench_groupby = all_benchmarks_groupby(params=FUNC_PARAMS_BENCH)
df_groupby = table_prettifier(df=bench_groupby, n=1)
S&P500 groupby¶
df_groupby