From e08dfc02f79aedef9f39fc2899c8045da502d947 Mon Sep 17 00:00:00 2001 From: Jeff Moe Date: Thu, 19 Sep 2024 19:26:23 -0600 Subject: [PATCH] Count unique values per column --- hsparse/parse_csv_contacts.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/hsparse/parse_csv_contacts.py b/hsparse/parse_csv_contacts.py index 80f11ed..93d9481 100644 --- a/hsparse/parse_csv_contacts.py +++ b/hsparse/parse_csv_contacts.py @@ -78,11 +78,15 @@ def csv_non_empty(CSV): col: df[col].count() for col in df.columns if not df[col].isnull().all() } - sorted_columns = sorted(non_empty_columns.items(), key=lambda x: x[1], reverse=True) + unique_counts = {col: df[col].nunique() for col in non_empty_columns.keys()} + sorted_columns = sorted(unique_counts.items(), key=lambda x: x[1], reverse=True) + + print("Column\tNon-empty values\tUnique values") if sorted_columns: - for col, count in sorted_columns: - print(f"{col}: {count}") + for col, unique_count in sorted_columns: + count = non_empty_columns[col] + print(f"{col}\t{count}\t{unique_count}") else: print("No non-empty values found.")