# Modified work:
# -----------------------------------------------------------------------------
# Copyright (c) 2020 Kota Yuhara (@wakamezake)
# -----------------------------------------------------------------------------
# Original work of aggregation:
# https://github.com/pfnet-research/xfeat/blob/master/xfeat/helper.py
# -----------------------------------------------------------------------------
# MIT License
#
# Copyright (c) 2020 Preferred Networks, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# -----------------------------------------------------------------------------
from inspect import isroutine
from types import FunctionType, LambdaType
from typing import Callable, List, Tuple, Union
import pandas as pd
from pandas.core.common import get_callable_name
def _is_lambda_function(obj):
"""
Example:
>>> import numpy as np
>>> def custom_function(x): return np.sum(x)
>>> _is_lambda_function(lambda x: np.sum(x))
True
>>> _is_lambda_function(np.sum)
False
>>> _is_lambda_function(custom_function)
False
"""
# It's worth noting that types.LambdaType is an alias for types.FunctionType
return isinstance(obj, LambdaType) and obj.__name__ == "<lambda>"
[docs]def aggregation(
input_df: pd.DataFrame,
group_key: str,
group_values: List[str],
agg_methods: List[Union[str, FunctionType]],
) -> Tuple[pd.DataFrame, List[str]]:
"""
Aggregate values after grouping table rows by a given key.
Args:
input_df:
Input data frame.
group_key:
Used to determine the groups for the groupby.
group_values:
Used to aggregate values for the groupby.
agg_methods:
List of function or function names, e.g. ['mean', 'max', 'min', numpy.mean].
Do not use a lambda function because the name attribute of the lambda
function cannot generate a unique string of column names in <lambda>.
Returns:
Tuple of output dataframe and new column names.
"""
new_df = input_df.copy()
new_cols = []
for agg_method in agg_methods:
if _is_lambda_function(agg_method):
raise ValueError('Not supported lambda function.')
elif isinstance(agg_method, str):
pass
elif isinstance(agg_method, FunctionType):
pass
elif isroutine(agg_method):
pass
else:
raise ValueError('Supported types are: {} or {}.'
' Got {} instead.'.format(str, Callable, type(agg_method)))
for agg_method in agg_methods:
for col in group_values:
# only str or FunctionType
if isinstance(agg_method, str):
agg_method_name = agg_method
else:
agg_method_name = get_callable_name(agg_method)
new_col = "agg_{}_{}_by_{}".format(agg_method_name, col, group_key)
df_agg = (
input_df[[col] + [group_key]].groupby(group_key)[[col]].agg(
agg_method)
)
df_agg.columns = [new_col]
new_cols.append(new_col)
new_df = new_df.merge(
df_agg, how="left", right_index=True, left_on=group_key
)
return new_df, new_cols