Compare commits

..

No commits in common. "main" and "v0.2.0" have entirely different histories.
main ... v0.2.0

6 changed files with 8 additions and 97 deletions

2
.gitignore vendored
View file

@ -10,7 +10,6 @@ log
tmp tmp
venv venv
venv.coverage venv.coverage
*.csv
*.db *.db
*.doctrees *.doctrees
*.env *.env
@ -21,7 +20,6 @@ venv.coverage
*.pyd *.pyd
*.pyo *.pyo
*.swp *.swp
*.txt
*.egg-info *.egg-info
_build _build
_version.py _version.py

View file

@ -1,4 +1,3 @@
v0.2.1 Script to extract useful fields from CSV.
v0.2.0 Functions for finding useful fields in CSV. v0.2.0 Functions for finding useful fields in CSV.
v0.1.0 Setup scripts. v0.1.0 Setup scripts.
v0.0.1 Hubspot Parse. v0.0.1 Hubspot Parse.

View file

@ -13,21 +13,11 @@ pip install poetry
poetry install poetry install
``` ```
# Usage # Run
Thusly.
Example:
``` ```
$ hsparse-csv-contacts -h hsparse-csv-contacts --csv ~/all-contacts.csv
usage: hsparse-csv-contacts [-h] [-d] [-e] [-f] [-n] csv_file
Parse Hubspot Contacts CSV Export
positional arguments:
csv_file Contacts CSV File
options:
-h, --help show this help message and exit
-d, --dump Dump CSV contents
-e, --empty List empty columns
-f, --fields Fields from CSV header
-n, --non_empty List number of non-empty values for each column
``` ```

View file

@ -1,71 +0,0 @@
# MIT License
# Copyright (c) 2024 Jeff Moe
""" Read CSV and extract selected columns and write to new CVS"""
import csv
import argparse
def parse_args():
parser = argparse.ArgumentParser(description="Extract CSV Columns, Output CSV")
parser.add_argument("headers_file", help="Headers File", type=str)
parser.add_argument("input_csv", help="Input CSV File", type=str)
parser.add_argument("output_csv", help="Output CSV File", type=str)
args = parser.parse_args()
return args
def read_good_headers(filename):
"""Reads and returns the list of 'good' headers from a given file."""
with open(filename, "r") as file:
return [line.strip() for line in file.readlines()]
def filter_csv(input_csv, output_csv, good_headers):
"""Filters an input CSV based on the provided good headers and writes to output CSV."""
# Read the original CSV
with open(input_csv, mode="r", newline="", encoding="utf-8") as infile:
reader = csv.DictReader(infile)
# Get only the required fieldnames ('good' headers)
filtered_fieldnames = [
field for field in reader.fieldnames if field in good_headers
]
# Write to output CSV
with open(output_csv, mode="w", newline="", encoding="utf-8") as outfile:
writer = csv.DictWriter(outfile, fieldnames=filtered_fieldnames)
# Write the header line (column names) first
writer.writeheader()
for row in reader:
filtered_row = {
key: value
for key, value in row.items()
if key in filtered_fieldnames
}
writer.writerow(filtered_row)
def main():
args = parse_args()
headers_file = args.headers_file
input_csv = args.input_csv
output_csv = args.output_csv
# Step 1: Read the list of good headers
good_headers = read_good_headers(headers_file)
# Step 2: Filter the CSV based on these headers and write to a new file
filter_csv(input_csv, output_csv, good_headers)
print(f"Filtered CSV has been written to {output_csv}")
if __name__ == "__main__":
main()

View file

@ -73,11 +73,7 @@ def csv_non_empty(CSV):
} }
unique_counts = {col: df[col].nunique() for col in non_empty_columns.keys()} unique_counts = {col: df[col].nunique() for col in non_empty_columns.keys()}
sorted_columns = sorted( sorted_columns = sorted(unique_counts.items(), key=lambda x: x[1], reverse=True)
unique_counts.items(),
key=lambda x: (unique_counts[x[0]], non_empty_columns[x[0]]),
reverse=True,
)
print("Column\tNon-empty values\tUnique values") print("Column\tNon-empty values\tUnique values")

View file

@ -22,7 +22,7 @@ packages = [
{ include = "hsparse" }, { include = "hsparse" },
] ]
readme = "README.md" readme = "README.md"
version = "0.2.1" version = "0.2.0"
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = "^3.10" python = "^3.10"
@ -35,7 +35,6 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry.scripts] [tool.poetry.scripts]
hsparse-csv-contacts = "hsparse.parse_csv_contacts:main" hsparse-csv-contacts = "hsparse.parse_csv_contacts:main"
hsparse-extract-columns = "hsparse.extract_columns_to_csv:main"
[tool.poetry.urls] [tool.poetry.urls]
homepage = "https://libre.is/libre/hsparse" homepage = "https://libre.is/libre/hsparse"