diff --git a/hsparse/extract_columns_to_csv.py b/hsparse/extract_columns_to_csv.py index 6bc0502..4e0426d 100644 --- a/hsparse/extract_columns_to_csv.py +++ b/hsparse/extract_columns_to_csv.py @@ -1,40 +1,75 @@ +# MIT License +# Copyright (c) 2024 Jeff Moe +""" Read CSV and extract selected columns and write to new CVS""" + import csv +import argparse + + +def parse_args(): + parser = argparse.ArgumentParser(description="Extract CSV Columns, Output CSV") + + parser.add_argument("headers_file", help="Headers File", type=str) + parser.add_argument("input_csv", help="Input CSV File", type=str) + parser.add_argument("output_csv", help="Output CSV File", type=str) + + args = parser.parse_args() + return args + def read_good_headers(filename): """Reads and returns the list of 'good' headers from a given file.""" - with open(filename, 'r') as file: + with open(filename, "r") as file: return [line.strip() for line in file.readlines()] -def filter_csv(input_file, output_file, good_headers): + +def filter_csv(input_csv, output_csv, good_headers): """Filters an input CSV based on the provided good headers and writes to output CSV.""" - + # Read the original CSV - with open(input_file, mode='r', newline='', encoding='utf-8') as infile: + with open(input_csv, mode="r", newline="", encoding="utf-8") as infile: reader = csv.DictReader(infile) - + # Get only the required fieldnames ('good' headers) - filtered_fieldnames = [field for field in reader.fieldnames if field in good_headers] - + filtered_fieldnames = [ + field for field in reader.fieldnames if field in good_headers + ] + # Write to output CSV - with open(output_file, mode='w', newline='', encoding='utf-8') as outfile: + with open(output_csv, mode="w", newline="", encoding="utf-8") as outfile: writer = csv.DictWriter(outfile, fieldnames=filtered_fieldnames) - + # Write the header line (column names) first writer.writeheader() - + for row in reader: - filtered_row = {key: value for key, value in row.items() if key in filtered_fieldnames} + filtered_row = { + key: value + for key, value in row.items() + if key in filtered_fieldnames + } writer.writerow(filtered_row) -if __name__ == "__main__": - input_file = 'tmp/mycontacts.csv' - good_headers_file = 'tmp/good_headers.txt' - output_file = 'tmp/final.csv' + +def main(): + args = parse_args() + + headers_file = args.headers_file + input_csv = args.input_csv + output_csv = args.output_csv + + # input_file = "tmp/mycontacts.csv" + # good_headers_file = "tmp/good_headers.txt" + # output_file = "tmp/final.csv" # Step 1: Read the list of good headers - good_headers = read_good_headers(good_headers_file) - - # Step 2: Filter the CSV based on these headers and write to a new file - filter_csv(input_file, output_file, good_headers) + good_headers = read_good_headers(headers_file) -print(f"Filtered CSV has been written to {output_file}") + # Step 2: Filter the CSV based on these headers and write to a new file + filter_csv(input_csv, output_csv, good_headers) + + print(f"Filtered CSV has been written to {output_csv}") + + +if __name__ == "__main__": + main()