Compare commits
No commits in common. "main" and "v0.2.0" have entirely different histories.
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -10,7 +10,6 @@ log
|
|||
tmp
|
||||
venv
|
||||
venv.coverage
|
||||
*.csv
|
||||
*.db
|
||||
*.doctrees
|
||||
*.env
|
||||
|
@ -21,7 +20,6 @@ venv.coverage
|
|||
*.pyd
|
||||
*.pyo
|
||||
*.swp
|
||||
*.txt
|
||||
*.egg-info
|
||||
_build
|
||||
_version.py
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
v0.2.1 Script to extract useful fields from CSV.
|
||||
v0.2.0 Functions for finding useful fields in CSV.
|
||||
v0.1.0 Setup scripts.
|
||||
v0.0.1 Hubspot Parse.
|
||||
|
|
22
README.md
22
README.md
|
@ -13,21 +13,11 @@ pip install poetry
|
|||
poetry install
|
||||
```
|
||||
|
||||
# Usage
|
||||
# Run
|
||||
Thusly.
|
||||
|
||||
Example:
|
||||
|
||||
```
|
||||
$ hsparse-csv-contacts -h
|
||||
usage: hsparse-csv-contacts [-h] [-d] [-e] [-f] [-n] csv_file
|
||||
|
||||
Parse Hubspot Contacts CSV Export
|
||||
|
||||
positional arguments:
|
||||
csv_file Contacts CSV File
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
-d, --dump Dump CSV contents
|
||||
-e, --empty List empty columns
|
||||
-f, --fields Fields from CSV header
|
||||
-n, --non_empty List number of non-empty values for each column
|
||||
hsparse-csv-contacts --csv ~/all-contacts.csv
|
||||
```
|
||||
|
||||
|
|
|
@ -1,71 +0,0 @@
|
|||
# MIT License
|
||||
# Copyright (c) 2024 Jeff Moe
|
||||
""" Read CSV and extract selected columns and write to new CVS"""
|
||||
|
||||
import csv
|
||||
import argparse
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="Extract CSV Columns, Output CSV")
|
||||
|
||||
parser.add_argument("headers_file", help="Headers File", type=str)
|
||||
parser.add_argument("input_csv", help="Input CSV File", type=str)
|
||||
parser.add_argument("output_csv", help="Output CSV File", type=str)
|
||||
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def read_good_headers(filename):
|
||||
"""Reads and returns the list of 'good' headers from a given file."""
|
||||
with open(filename, "r") as file:
|
||||
return [line.strip() for line in file.readlines()]
|
||||
|
||||
|
||||
def filter_csv(input_csv, output_csv, good_headers):
|
||||
"""Filters an input CSV based on the provided good headers and writes to output CSV."""
|
||||
|
||||
# Read the original CSV
|
||||
with open(input_csv, mode="r", newline="", encoding="utf-8") as infile:
|
||||
reader = csv.DictReader(infile)
|
||||
|
||||
# Get only the required fieldnames ('good' headers)
|
||||
filtered_fieldnames = [
|
||||
field for field in reader.fieldnames if field in good_headers
|
||||
]
|
||||
|
||||
# Write to output CSV
|
||||
with open(output_csv, mode="w", newline="", encoding="utf-8") as outfile:
|
||||
writer = csv.DictWriter(outfile, fieldnames=filtered_fieldnames)
|
||||
|
||||
# Write the header line (column names) first
|
||||
writer.writeheader()
|
||||
|
||||
for row in reader:
|
||||
filtered_row = {
|
||||
key: value
|
||||
for key, value in row.items()
|
||||
if key in filtered_fieldnames
|
||||
}
|
||||
writer.writerow(filtered_row)
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
|
||||
headers_file = args.headers_file
|
||||
input_csv = args.input_csv
|
||||
output_csv = args.output_csv
|
||||
|
||||
# Step 1: Read the list of good headers
|
||||
good_headers = read_good_headers(headers_file)
|
||||
|
||||
# Step 2: Filter the CSV based on these headers and write to a new file
|
||||
filter_csv(input_csv, output_csv, good_headers)
|
||||
|
||||
print(f"Filtered CSV has been written to {output_csv}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -73,11 +73,7 @@ def csv_non_empty(CSV):
|
|||
}
|
||||
|
||||
unique_counts = {col: df[col].nunique() for col in non_empty_columns.keys()}
|
||||
sorted_columns = sorted(
|
||||
unique_counts.items(),
|
||||
key=lambda x: (unique_counts[x[0]], non_empty_columns[x[0]]),
|
||||
reverse=True,
|
||||
)
|
||||
sorted_columns = sorted(unique_counts.items(), key=lambda x: x[1], reverse=True)
|
||||
|
||||
print("Column\tNon-empty values\tUnique values")
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ packages = [
|
|||
{ include = "hsparse" },
|
||||
]
|
||||
readme = "README.md"
|
||||
version = "0.2.1"
|
||||
version = "0.2.0"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.10"
|
||||
|
@ -35,7 +35,6 @@ build-backend = "poetry.core.masonry.api"
|
|||
|
||||
[tool.poetry.scripts]
|
||||
hsparse-csv-contacts = "hsparse.parse_csv_contacts:main"
|
||||
hsparse-extract-columns = "hsparse.extract_columns_to_csv:main"
|
||||
|
||||
[tool.poetry.urls]
|
||||
homepage = "https://libre.is/libre/hsparse"
|
||||
|
|
Loading…
Reference in a new issue