111 lines
2.4 KiB
Python
111 lines
2.4 KiB
Python
# MIT License
|
|
# Copyright (c) 2024 Jeff Moe
|
|
|
|
""" Read CSV contacts file exported from hubspot."""
|
|
|
|
import argparse
|
|
import csv
|
|
import pandas as pd
|
|
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser(description="Parse Hubspot Contacts CSV Export")
|
|
|
|
parser.add_argument("csv_file", help="Contacts CSV File", type=str)
|
|
|
|
parser.add_argument(
|
|
"-d",
|
|
"--dump",
|
|
help="Dump CSV contents",
|
|
action="store_true",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"-e",
|
|
"--empty",
|
|
help="List empty columns",
|
|
action="store_true",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"-f",
|
|
"--fields",
|
|
help="Fields from CSV header",
|
|
action="store_true",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"-n",
|
|
"--non_empty",
|
|
help="List number of non-empty values for each column",
|
|
action="store_true",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
return args
|
|
|
|
|
|
def csv_dump(CSV):
|
|
df = pd.read_csv(CSV, low_memory=False, chunksize=1000)
|
|
for chunk in df:
|
|
print(chunk.to_string())
|
|
|
|
|
|
def csv_empty(CSV):
|
|
df = pd.read_csv(CSV, low_memory=False, header=0)
|
|
empty_columns = [col for col in df.columns if df[col].isnull().all()]
|
|
if empty_columns:
|
|
print("Empty columns:")
|
|
print("\n".join(empty_columns))
|
|
else:
|
|
print("No empty columns found.")
|
|
|
|
|
|
def csv_fields(CSV):
|
|
df = pd.read_csv(CSV, low_memory=False, header=0)
|
|
print("\n".join([col for col in df.columns]))
|
|
|
|
|
|
def csv_non_empty(CSV):
|
|
df = pd.read_csv(CSV, low_memory=False, header=0)
|
|
non_empty_columns = {
|
|
col: df[col].count() for col in df.columns if not df[col].isnull().all()
|
|
}
|
|
|
|
unique_counts = {col: df[col].nunique() for col in non_empty_columns.keys()}
|
|
sorted_columns = sorted(
|
|
unique_counts.items(),
|
|
key=lambda x: (unique_counts[x[0]], non_empty_columns[x[0]]),
|
|
reverse=True,
|
|
)
|
|
|
|
print("Column\tNon-empty values\tUnique values")
|
|
|
|
if sorted_columns:
|
|
for col, unique_count in sorted_columns:
|
|
count = non_empty_columns[col]
|
|
print(f"{col}\t{count}\t{unique_count}")
|
|
else:
|
|
print("No non-empty values found.")
|
|
|
|
|
|
def main():
|
|
args = parse_args()
|
|
CSV = args.csv_file
|
|
|
|
if args.dump:
|
|
csv_dump(CSV)
|
|
|
|
if args.empty:
|
|
csv_empty(CSV)
|
|
|
|
if args.fields:
|
|
csv_fields(CSV)
|
|
|
|
if args.non_empty:
|
|
csv_non_empty(CSV)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|