From 5d4c2bf4fc75715fb079f3b5c7d33e91cdad3202 Mon Sep 17 00:00:00 2001 From: ishan Date: Sat, 16 Nov 2024 18:49:13 +0530 Subject: [PATCH] added countDistinct aggregation --- src/danfojs-base/aggregators/groupby.ts | 13 ++++++++++++- src/danfojs-base/core/series.ts | 5 +++++ src/danfojs-base/shared/types.ts | 2 ++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/danfojs-base/aggregators/groupby.ts b/src/danfojs-base/aggregators/groupby.ts index 63671914..c933698f 100644 --- a/src/danfojs-base/aggregators/groupby.ts +++ b/src/danfojs-base/aggregators/groupby.ts @@ -231,7 +231,7 @@ export default class Groupby { private arithemetic(operation: {[key: string] : Array | string} | string): { [key: string ]: {} } { const opsName = [ "mean", "sum", "count", "mode", "std", "var", "cumsum", "cumprod", - "cummax", "cummin", "median" , "min", "max"]; + "cummax", "cummin", "median" , "min", "max", "countdistinct"]; if (typeof operation === "string" ) { if (!opsName.includes(operation)) { throw new Error(`group operation: ${operation} is not valid`) @@ -377,6 +377,9 @@ export default class Groupby { return sum }, 1) break; + case "countdistinct": + data.push(new Set(colVal).size); + break; } return data } @@ -516,6 +519,14 @@ export default class Groupby { return this.operations("min") } + /** + * Obtain the distinct number of columns for each group + * @returns DataFrame + */ + countDistinct(): DataFrame{ + return this.operations("countdistinct") + } + /** * Obtain a specific group * @param keys Array diff --git a/src/danfojs-base/core/series.ts b/src/danfojs-base/core/series.ts index e42f169d..60092d49 100644 --- a/src/danfojs-base/core/series.ts +++ b/src/danfojs-base/core/series.ts @@ -1210,6 +1210,11 @@ export default class Series extends NDframe implements SeriesInterface { return this.cumOps("min", ops); } + countDistinct(options?: { inplace?: boolean; }): Series | void { + const ops = { inplace: false, ...options } + return this.cumOps("countdistinct", ops); + } + /** * Returns cumulative maximum over a Series diff --git a/src/danfojs-base/shared/types.ts b/src/danfojs-base/shared/types.ts index d89be170..617b2fcc 100644 --- a/src/danfojs-base/shared/types.ts +++ b/src/danfojs-base/shared/types.ts @@ -154,6 +154,7 @@ export interface SeriesInterface extends NDframeInterface { cumMin(options?: { inplace?: boolean }): Series | void cumMax(options?: { inplace?: boolean }): Series | void cumProd(options?: { inplace?: boolean }): Series | void + countDistinct(options?: { inplace?: boolean }): Series | void lt(other: Series | number | Array | boolean[]): Series gt(other: Series | number | Array | boolean[]): Series le(other: Series | number | Array | boolean[]): Series @@ -233,6 +234,7 @@ export interface DataFrameInterface extends NDframeInterface { cumMin(options?: { axis?: 0 | 1 }): DataFrame | void cumMax(options?: { axis?: 0 | 1 }): DataFrame | void cumProd(options?: { axis?: 0 | 1 }): DataFrame | void + countDistinct(options?: { axis?: 0 | 1 }): DataFrame | void copy(): DataFrame resetIndex(options: { inplace?: boolean }): DataFrame | void setIndex(