Draft script to select columns and write CSV

This commit is contained in:
Jeff Moe 2024-09-21 12:30:23 -06:00
parent 9a2662a22c
commit 01117d4ee5

View file

@ -0,0 +1,40 @@
import csv
def read_good_headers(filename):
"""Reads and returns the list of 'good' headers from a given file."""
with open(filename, 'r') as file:
return [line.strip() for line in file.readlines()]
def filter_csv(input_file, output_file, good_headers):
"""Filters an input CSV based on the provided good headers and writes to output CSV."""
# Read the original CSV
with open(input_file, mode='r', newline='', encoding='utf-8') as infile:
reader = csv.DictReader(infile)
# Get only the required fieldnames ('good' headers)
filtered_fieldnames = [field for field in reader.fieldnames if field in good_headers]
# Write to output CSV
with open(output_file, mode='w', newline='', encoding='utf-8') as outfile:
writer = csv.DictWriter(outfile, fieldnames=filtered_fieldnames)
# Write the header line (column names) first
writer.writeheader()
for row in reader:
filtered_row = {key: value for key, value in row.items() if key in filtered_fieldnames}
writer.writerow(filtered_row)
if __name__ == "__main__":
input_file = 'tmp/mycontacts.csv'
good_headers_file = 'tmp/good_headers.txt'
output_file = 'tmp/final.csv'
# Step 1: Read the list of good headers
good_headers = read_good_headers(good_headers_file)
# Step 2: Filter the CSV based on these headers and write to a new file
filter_csv(input_file, output_file, good_headers)
print(f"Filtered CSV has been written to {output_file}")