# MIT License # Copyright (c) 2024 Jeff Moe """ Read CSV and extract selected columns and write to new CVS""" import csv import argparse def parse_args(): parser = argparse.ArgumentParser(description="Extract CSV Columns, Output CSV") parser.add_argument("headers_file", help="Headers File", type=str) parser.add_argument("input_csv", help="Input CSV File", type=str) parser.add_argument("output_csv", help="Output CSV File", type=str) args = parser.parse_args() return args def read_good_headers(filename): """Reads and returns the list of 'good' headers from a given file.""" with open(filename, "r") as file: return [line.strip() for line in file.readlines()] def filter_csv(input_csv, output_csv, good_headers): """Filters an input CSV based on the provided good headers and writes to output CSV.""" # Read the original CSV with open(input_csv, mode="r", newline="", encoding="utf-8") as infile: reader = csv.DictReader(infile) # Get only the required fieldnames ('good' headers) filtered_fieldnames = [ field for field in reader.fieldnames if field in good_headers ] # Write to output CSV with open(output_csv, mode="w", newline="", encoding="utf-8") as outfile: writer = csv.DictWriter(outfile, fieldnames=filtered_fieldnames) # Write the header line (column names) first writer.writeheader() for row in reader: filtered_row = { key: value for key, value in row.items() if key in filtered_fieldnames } writer.writerow(filtered_row) def main(): args = parse_args() headers_file = args.headers_file input_csv = args.input_csv output_csv = args.output_csv # input_file = "tmp/mycontacts.csv" # good_headers_file = "tmp/good_headers.txt" # output_file = "tmp/final.csv" # Step 1: Read the list of good headers good_headers = read_good_headers(headers_file) # Step 2: Filter the CSV based on these headers and write to a new file filter_csv(input_csv, output_csv, good_headers) print(f"Filtered CSV has been written to {output_csv}") if __name__ == "__main__": main()