Command line options for extract

2024-09-21 12:42:51 -06:00 · 2024-09-21 12:42:51 -06:00 · 78752392eb
parent 14c7c604fe
commit 78752392eb
1 changed files with 55 additions and 20 deletions
--- a/hsparse/extract_columns_to_csv.py
+++ b/hsparse/extract_columns_to_csv.py
@ -1,40 +1,75 @@
 # MIT License
 # Copyright (c) 2024 Jeff Moe
 """ Read CSV and extract selected columns and write to new CVS"""
 import csv
 import argparse
 def parse_args():
    parser = argparse.ArgumentParser(description="Extract CSV Columns, Output CSV")
    parser.add_argument("headers_file", help="Headers File", type=str)
    parser.add_argument("input_csv", help="Input CSV File", type=str)
    parser.add_argument("output_csv", help="Output CSV File", type=str)
    args = parser.parse_args()
    return args
 def read_good_headers(filename):
    """Reads and returns the list of 'good' headers from a given file."""
-    with open(filename, 'r') as file:
+    with open(filename, "r") as file:
        return [line.strip() for line in file.readlines()]
-def filter_csv(input_file, output_file, good_headers):
+
 def filter_csv(input_csv, output_csv, good_headers):
    """Filters an input CSV based on the provided good headers and writes to output CSV."""
    # Read the original CSV
-    with open(input_file, mode='r', newline='', encoding='utf-8') as infile:
+    with open(input_csv, mode="r", newline="", encoding="utf-8") as infile:
        reader = csv.DictReader(infile)
        # Get only the required fieldnames ('good' headers)
-        filtered_fieldnames = [field for field in reader.fieldnames if field in good_headers]
+        filtered_fieldnames = [
            field for field in reader.fieldnames if field in good_headers
        ]
        # Write to output CSV
-        with open(output_file, mode='w', newline='', encoding='utf-8') as outfile:
+        with open(output_csv, mode="w", newline="", encoding="utf-8") as outfile:
            writer = csv.DictWriter(outfile, fieldnames=filtered_fieldnames)
            # Write the header line (column names) first
            writer.writeheader()
            for row in reader:
-                filtered_row = {key: value for key, value in row.items() if key in filtered_fieldnames}
+                filtered_row = {
                    key: value
                    for key, value in row.items()
                    if key in filtered_fieldnames
                }
                writer.writerow(filtered_row)
-if __name__ == "__main__":
+
-    input_file = 'tmp/mycontacts.csv'
+def main():
-    good_headers_file = 'tmp/good_headers.txt'
+    args = parse_args()
-    output_file = 'tmp/final.csv'
+
    headers_file = args.headers_file
    input_csv = args.input_csv
    output_csv = args.output_csv
    # input_file = "tmp/mycontacts.csv"
    # good_headers_file = "tmp/good_headers.txt"
    # output_file = "tmp/final.csv"
    # Step 1: Read the list of good headers
-    good_headers = read_good_headers(good_headers_file)
+    good_headers = read_good_headers(headers_file)
    # Step 2: Filter the CSV based on these headers and write to a new file
-    filter_csv(input_file, output_file, good_headers)
+    filter_csv(input_csv, output_csv, good_headers)
-print(f"Filtered CSV has been written to {output_file}")
+    print(f"Filtered CSV has been written to {output_csv}")
 if __name__ == "__main__":
    main()