Command line options for extract

2024-09-21 12:42:51 -06:00 · 2024-09-21 12:42:51 -06:00 · 78752392eb
parent 14c7c604fe
commit 78752392eb
1 changed files with 55 additions and 20 deletions
--- a/hsparse/extract_columns_to_csv.py
+++ b/hsparse/extract_columns_to_csv.py
@ -1,40 +1,75 @@
+# MIT License
+# Copyright (c) 2024 Jeff Moe
+""" Read CSV and extract selected columns and write to new CVS"""
+
 import csv
+import argparse
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Extract CSV Columns, Output CSV")
+
+    parser.add_argument("headers_file", help="Headers File", type=str)
+    parser.add_argument("input_csv", help="Input CSV File", type=str)
+    parser.add_argument("output_csv", help="Output CSV File", type=str)
+
+    args = parser.parse_args()
+    return args
+

 def read_good_headers(filename):
    """Reads and returns the list of 'good' headers from a given file."""
-    with open(filename, 'r') as file:
+    with open(filename, "r") as file:
        return [line.strip() for line in file.readlines()]

-def filter_csv(input_file, output_file, good_headers):
+
+def filter_csv(input_csv, output_csv, good_headers):
    """Filters an input CSV based on the provided good headers and writes to output CSV."""
-    
+
    # Read the original CSV
-    with open(input_file, mode='r', newline='', encoding='utf-8') as infile:
+    with open(input_csv, mode="r", newline="", encoding="utf-8") as infile:
        reader = csv.DictReader(infile)
-        
+
        # Get only the required fieldnames ('good' headers)
-        filtered_fieldnames = [field for field in reader.fieldnames if field in good_headers]
-        
+        filtered_fieldnames = [
+            field for field in reader.fieldnames if field in good_headers
+        ]
+
        # Write to output CSV
-        with open(output_file, mode='w', newline='', encoding='utf-8') as outfile:
+        with open(output_csv, mode="w", newline="", encoding="utf-8") as outfile:
            writer = csv.DictWriter(outfile, fieldnames=filtered_fieldnames)
-            
+
            # Write the header line (column names) first
            writer.writeheader()
-            
+
            for row in reader:
-                filtered_row = {key: value for key, value in row.items() if key in filtered_fieldnames}
+                filtered_row = {
+                    key: value
+                    for key, value in row.items()
+                    if key in filtered_fieldnames
+                }
                writer.writerow(filtered_row)

-if __name__ == "__main__":
-    input_file = 'tmp/mycontacts.csv'
-    good_headers_file = 'tmp/good_headers.txt'
-    output_file = 'tmp/final.csv'
+
+def main():
+    args = parse_args()
+
+    headers_file = args.headers_file
+    input_csv = args.input_csv
+    output_csv = args.output_csv
+
+    # input_file = "tmp/mycontacts.csv"
+    # good_headers_file = "tmp/good_headers.txt"
+    # output_file = "tmp/final.csv"

    # Step 1: Read the list of good headers
-    good_headers = read_good_headers(good_headers_file)
-    
-    # Step 2: Filter the CSV based on these headers and write to a new file
-    filter_csv(input_file, output_file, good_headers)
+    good_headers = read_good_headers(headers_file)

-print(f"Filtered CSV has been written to {output_file}")
+    # Step 2: Filter the CSV based on these headers and write to a new file
+    filter_csv(input_csv, output_csv, good_headers)
+
+    print(f"Filtered CSV has been written to {output_csv}")
+
+
+if __name__ == "__main__":
+    main()