Command line options for extract

This commit is contained in:
Jeff Moe 2024-09-21 12:42:51 -06:00
parent 14c7c604fe
commit 78752392eb

View file

@ -1,40 +1,75 @@
# MIT License
# Copyright (c) 2024 Jeff Moe
""" Read CSV and extract selected columns and write to new CVS"""
import csv import csv
import argparse
def parse_args():
parser = argparse.ArgumentParser(description="Extract CSV Columns, Output CSV")
parser.add_argument("headers_file", help="Headers File", type=str)
parser.add_argument("input_csv", help="Input CSV File", type=str)
parser.add_argument("output_csv", help="Output CSV File", type=str)
args = parser.parse_args()
return args
def read_good_headers(filename): def read_good_headers(filename):
"""Reads and returns the list of 'good' headers from a given file.""" """Reads and returns the list of 'good' headers from a given file."""
with open(filename, 'r') as file: with open(filename, "r") as file:
return [line.strip() for line in file.readlines()] return [line.strip() for line in file.readlines()]
def filter_csv(input_file, output_file, good_headers):
def filter_csv(input_csv, output_csv, good_headers):
"""Filters an input CSV based on the provided good headers and writes to output CSV.""" """Filters an input CSV based on the provided good headers and writes to output CSV."""
# Read the original CSV # Read the original CSV
with open(input_file, mode='r', newline='', encoding='utf-8') as infile: with open(input_csv, mode="r", newline="", encoding="utf-8") as infile:
reader = csv.DictReader(infile) reader = csv.DictReader(infile)
# Get only the required fieldnames ('good' headers) # Get only the required fieldnames ('good' headers)
filtered_fieldnames = [field for field in reader.fieldnames if field in good_headers] filtered_fieldnames = [
field for field in reader.fieldnames if field in good_headers
]
# Write to output CSV # Write to output CSV
with open(output_file, mode='w', newline='', encoding='utf-8') as outfile: with open(output_csv, mode="w", newline="", encoding="utf-8") as outfile:
writer = csv.DictWriter(outfile, fieldnames=filtered_fieldnames) writer = csv.DictWriter(outfile, fieldnames=filtered_fieldnames)
# Write the header line (column names) first # Write the header line (column names) first
writer.writeheader() writer.writeheader()
for row in reader: for row in reader:
filtered_row = {key: value for key, value in row.items() if key in filtered_fieldnames} filtered_row = {
key: value
for key, value in row.items()
if key in filtered_fieldnames
}
writer.writerow(filtered_row) writer.writerow(filtered_row)
if __name__ == "__main__":
input_file = 'tmp/mycontacts.csv' def main():
good_headers_file = 'tmp/good_headers.txt' args = parse_args()
output_file = 'tmp/final.csv'
headers_file = args.headers_file
input_csv = args.input_csv
output_csv = args.output_csv
# input_file = "tmp/mycontacts.csv"
# good_headers_file = "tmp/good_headers.txt"
# output_file = "tmp/final.csv"
# Step 1: Read the list of good headers # Step 1: Read the list of good headers
good_headers = read_good_headers(good_headers_file) good_headers = read_good_headers(headers_file)
# Step 2: Filter the CSV based on these headers and write to a new file # Step 2: Filter the CSV based on these headers and write to a new file
filter_csv(input_file, output_file, good_headers) filter_csv(input_csv, output_csv, good_headers)
print(f"Filtered CSV has been written to {output_file}") print(f"Filtered CSV has been written to {output_csv}")
if __name__ == "__main__":
main()