8 changed files with 12 additions and 1890 deletions
--- a/.gitignore
+++ b/.gitignore
@ -10,7 +10,6 @@ log
 tmp
 venv
 venv.coverage
-*.csv
 *.db
 *.doctrees
 *.env
@ -21,7 +20,6 @@ venv.coverage
 *.pyd
 *.pyo
 *.swp
-*.txt
 *.egg-info
 _build
 _version.py
--- a/.theia/launch.json
+++ b/.theia/launch.json
@ -1,17 +0,0 @@
-{
-  // Use IntelliSense to learn about possible attributes.
-  // Hover to view descriptions of existing attributes.
-  "version": "0.2.0",
-  "configurations": [
-  
-    {
-      "name": "Python: Current File",
-      "type": "python",
-      "request": "launch",
-      "program": "${file}",
-      "console": "integratedTerminal",
-      "justMyCode": true
-    }
-  ]
-}
-
--- a/CHANGELOG.txt
+++ b/CHANGELOG.txt
@ -1,4 +1 @@
-v0.2.1         Script to extract useful fields from CSV.
-v0.2.0         Functions for finding useful fields in CSV.
-v0.1.0         Setup scripts.
 v0.0.1         Hubspot Parse.
--- a/README.md
+++ b/README.md
@ -1,33 +1,2 @@
 # Hubspot Parse
 Scripts for parsing Hubspot data with a goal towards migrations.
-
-# Install
-Thusly.
-
-```
-git clone https://code.libre.is/libre/hsparse
-cd hsparse/
-python -m venv venv
-source venv/bin/activate
-pip install poetry
-poetry install
-```
-
-# Usage
-```
-$ hsparse-csv-contacts -h
-usage: hsparse-csv-contacts [-h] [-d] [-e] [-f] [-n] csv_file
-
-Parse Hubspot Contacts CSV Export
-
-positional arguments:
-  csv_file         Contacts CSV File
-
-options:
-  -h, --help       show this help message and exit
-  -d, --dump       Dump CSV contents
-  -e, --empty      List empty columns
-  -f, --fields     Fields from CSV header
-  -n, --non_empty  List number of non-empty values for each column
-```
-
--- a/hsparse/extract_columns_to_csv.py
+++ b/hsparse/extract_columns_to_csv.py
@ -1,71 +0,0 @@
-# MIT License
-# Copyright (c) 2024 Jeff Moe
-""" Read CSV and extract selected columns and write to new CVS"""
-
-import csv
-import argparse
-
-
-def parse_args():
-    parser = argparse.ArgumentParser(description="Extract CSV Columns, Output CSV")
-
-    parser.add_argument("headers_file", help="Headers File", type=str)
-    parser.add_argument("input_csv", help="Input CSV File", type=str)
-    parser.add_argument("output_csv", help="Output CSV File", type=str)
-
-    args = parser.parse_args()
-    return args
-
-
-def read_good_headers(filename):
-    """Reads and returns the list of 'good' headers from a given file."""
-    with open(filename, "r") as file:
-        return [line.strip() for line in file.readlines()]
-
-
-def filter_csv(input_csv, output_csv, good_headers):
-    """Filters an input CSV based on the provided good headers and writes to output CSV."""
-
-    # Read the original CSV
-    with open(input_csv, mode="r", newline="", encoding="utf-8") as infile:
-        reader = csv.DictReader(infile)
-
-        # Get only the required fieldnames ('good' headers)
-        filtered_fieldnames = [
-            field for field in reader.fieldnames if field in good_headers
-        ]
-
-        # Write to output CSV
-        with open(output_csv, mode="w", newline="", encoding="utf-8") as outfile:
-            writer = csv.DictWriter(outfile, fieldnames=filtered_fieldnames)
-
-            # Write the header line (column names) first
-            writer.writeheader()
-
-            for row in reader:
-                filtered_row = {
-                    key: value
-                    for key, value in row.items()
-                    if key in filtered_fieldnames
-                }
-                writer.writerow(filtered_row)
-
-
-def main():
-    args = parse_args()
-
-    headers_file = args.headers_file
-    input_csv = args.input_csv
-    output_csv = args.output_csv
-
-    # Step 1: Read the list of good headers
-    good_headers = read_good_headers(headers_file)
-
-    # Step 2: Filter the CSV based on these headers and write to a new file
-    filter_csv(input_csv, output_csv, good_headers)
-
-    print(f"Filtered CSV has been written to {output_csv}")
-
-
-if __name__ == "__main__":
-    main()
--- a/hsparse/parse_csv_contacts.py
+++ b/hsparse/parse_csv_contacts.py
@ -1,110 +1,14 @@
-# MIT License
-# Copyright (c) 2024 Jeff Moe
+#!/usr/bin/env python3
+''' Read CSV contacts file exported from hubspot.'''

-""" Read CSV contacts file exported from hubspot."""
-
-import argparse
 import csv
-import pandas as pd

+CSV="all-contacts.csv"

-def parse_args():
-    parser = argparse.ArgumentParser(description="Parse Hubspot Contacts CSV Export")
+print("Parsing" + CSV)

-    parser.add_argument("csv_file", help="Contacts CSV File", type=str)
+with open(CSV, newline='') as csvfile:
+    contactreader = csv.reader(csvfile, delimiter=',', quotechar='"')
+    for row in contactreader:
+        print(', '.join(row))

-    parser.add_argument(
-        "-d",
-        "--dump",
-        help="Dump CSV contents",
-        action="store_true",
-    )
-
-    parser.add_argument(
-        "-e",
-        "--empty",
-        help="List empty columns",
-        action="store_true",
-    )
-
-    parser.add_argument(
-        "-f",
-        "--fields",
-        help="Fields from CSV header",
-        action="store_true",
-    )
-
-    parser.add_argument(
-        "-n",
-        "--non_empty",
-        help="List number of non-empty values for each column",
-        action="store_true",
-    )
-
-    args = parser.parse_args()
-    return args
-
-
-def csv_dump(CSV):
-    df = pd.read_csv(CSV, low_memory=False, chunksize=1000)
-    for chunk in df:
-        print(chunk.to_string())
-
-
-def csv_empty(CSV):
-    df = pd.read_csv(CSV, low_memory=False, header=0)
-    empty_columns = [col for col in df.columns if df[col].isnull().all()]
-    if empty_columns:
-        print("Empty columns:")
-        print("\n".join(empty_columns))
-    else:
-        print("No empty columns found.")
-
-
-def csv_fields(CSV):
-    df = pd.read_csv(CSV, low_memory=False, header=0)
-    print("\n".join([col for col in df.columns]))
-
-
-def csv_non_empty(CSV):
-    df = pd.read_csv(CSV, low_memory=False, header=0)
-    non_empty_columns = {
-        col: df[col].count() for col in df.columns if not df[col].isnull().all()
-    }
-
-    unique_counts = {col: df[col].nunique() for col in non_empty_columns.keys()}
-    sorted_columns = sorted(
-        unique_counts.items(),
-        key=lambda x: (unique_counts[x[0]], non_empty_columns[x[0]]),
-        reverse=True,
-    )
-
-    print("Column\tNon-empty values\tUnique values")
-
-    if sorted_columns:
-        for col, unique_count in sorted_columns:
-            count = non_empty_columns[col]
-            print(f"{col}\t{count}\t{unique_count}")
-    else:
-        print("No non-empty values found.")
-
-
-def main():
-    args = parse_args()
-    CSV = args.csv_file
-
-    if args.dump:
-        csv_dump(CSV)
-
-    if args.empty:
-        csv_empty(CSV)
-
-    if args.fields:
-        csv_fields(CSV)
-
-    if args.non_empty:
-        csv_non_empty(CSV)
-
-
-if __name__ == "__main__":
-    main()
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -22,20 +22,20 @@ packages = [
    { include = "hsparse" },
 ]
 readme = "README.md"
-version = "0.2.1"
+version = "0.0.1"
+

 [tool.poetry.dependencies]
 python = "^3.10"
 setuptools_scm = "*"
-pandas = "^2.2.2"
+

 [build-system]
 requires = ["poetry-core", "setuptools_scm"]
 build-backend = "poetry.core.masonry.api"

 [tool.poetry.scripts]
-hsparse-csv-contacts = "hsparse.parse_csv_contacts:main"
-hsparse-extract-columns = "hsparse.extract_columns_to_csv:main"
+hsparse = "hsparse.main:parse_csv_contacts"

 [tool.poetry.urls]
 homepage = "https://libre.is/libre/hsparse"