diff --git a/hsparse/select_columns_to_csv.py b/hsparse/select_columns_to_csv.py new file mode 100644 index 0000000..6bc0502 --- /dev/null +++ b/hsparse/select_columns_to_csv.py @@ -0,0 +1,40 @@ +import csv + +def read_good_headers(filename): + """Reads and returns the list of 'good' headers from a given file.""" + with open(filename, 'r') as file: + return [line.strip() for line in file.readlines()] + +def filter_csv(input_file, output_file, good_headers): + """Filters an input CSV based on the provided good headers and writes to output CSV.""" + + # Read the original CSV + with open(input_file, mode='r', newline='', encoding='utf-8') as infile: + reader = csv.DictReader(infile) + + # Get only the required fieldnames ('good' headers) + filtered_fieldnames = [field for field in reader.fieldnames if field in good_headers] + + # Write to output CSV + with open(output_file, mode='w', newline='', encoding='utf-8') as outfile: + writer = csv.DictWriter(outfile, fieldnames=filtered_fieldnames) + + # Write the header line (column names) first + writer.writeheader() + + for row in reader: + filtered_row = {key: value for key, value in row.items() if key in filtered_fieldnames} + writer.writerow(filtered_row) + +if __name__ == "__main__": + input_file = 'tmp/mycontacts.csv' + good_headers_file = 'tmp/good_headers.txt' + output_file = 'tmp/final.csv' + + # Step 1: Read the list of good headers + good_headers = read_good_headers(good_headers_file) + + # Step 2: Filter the CSV based on these headers and write to a new file + filter_csv(input_file, output_file, good_headers) + +print(f"Filtered CSV has been written to {output_file}")