Command line options for extract
This commit is contained in:
parent
14c7c604fe
commit
78752392eb
|
@ -1,40 +1,75 @@
|
||||||
|
# MIT License
|
||||||
|
# Copyright (c) 2024 Jeff Moe
|
||||||
|
""" Read CSV and extract selected columns and write to new CVS"""
|
||||||
|
|
||||||
import csv
|
import csv
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser(description="Extract CSV Columns, Output CSV")
|
||||||
|
|
||||||
|
parser.add_argument("headers_file", help="Headers File", type=str)
|
||||||
|
parser.add_argument("input_csv", help="Input CSV File", type=str)
|
||||||
|
parser.add_argument("output_csv", help="Output CSV File", type=str)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
def read_good_headers(filename):
|
def read_good_headers(filename):
|
||||||
"""Reads and returns the list of 'good' headers from a given file."""
|
"""Reads and returns the list of 'good' headers from a given file."""
|
||||||
with open(filename, 'r') as file:
|
with open(filename, "r") as file:
|
||||||
return [line.strip() for line in file.readlines()]
|
return [line.strip() for line in file.readlines()]
|
||||||
|
|
||||||
def filter_csv(input_file, output_file, good_headers):
|
|
||||||
|
def filter_csv(input_csv, output_csv, good_headers):
|
||||||
"""Filters an input CSV based on the provided good headers and writes to output CSV."""
|
"""Filters an input CSV based on the provided good headers and writes to output CSV."""
|
||||||
|
|
||||||
# Read the original CSV
|
# Read the original CSV
|
||||||
with open(input_file, mode='r', newline='', encoding='utf-8') as infile:
|
with open(input_csv, mode="r", newline="", encoding="utf-8") as infile:
|
||||||
reader = csv.DictReader(infile)
|
reader = csv.DictReader(infile)
|
||||||
|
|
||||||
# Get only the required fieldnames ('good' headers)
|
# Get only the required fieldnames ('good' headers)
|
||||||
filtered_fieldnames = [field for field in reader.fieldnames if field in good_headers]
|
filtered_fieldnames = [
|
||||||
|
field for field in reader.fieldnames if field in good_headers
|
||||||
|
]
|
||||||
|
|
||||||
# Write to output CSV
|
# Write to output CSV
|
||||||
with open(output_file, mode='w', newline='', encoding='utf-8') as outfile:
|
with open(output_csv, mode="w", newline="", encoding="utf-8") as outfile:
|
||||||
writer = csv.DictWriter(outfile, fieldnames=filtered_fieldnames)
|
writer = csv.DictWriter(outfile, fieldnames=filtered_fieldnames)
|
||||||
|
|
||||||
# Write the header line (column names) first
|
# Write the header line (column names) first
|
||||||
writer.writeheader()
|
writer.writeheader()
|
||||||
|
|
||||||
for row in reader:
|
for row in reader:
|
||||||
filtered_row = {key: value for key, value in row.items() if key in filtered_fieldnames}
|
filtered_row = {
|
||||||
|
key: value
|
||||||
|
for key, value in row.items()
|
||||||
|
if key in filtered_fieldnames
|
||||||
|
}
|
||||||
writer.writerow(filtered_row)
|
writer.writerow(filtered_row)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
input_file = 'tmp/mycontacts.csv'
|
def main():
|
||||||
good_headers_file = 'tmp/good_headers.txt'
|
args = parse_args()
|
||||||
output_file = 'tmp/final.csv'
|
|
||||||
|
headers_file = args.headers_file
|
||||||
|
input_csv = args.input_csv
|
||||||
|
output_csv = args.output_csv
|
||||||
|
|
||||||
|
# input_file = "tmp/mycontacts.csv"
|
||||||
|
# good_headers_file = "tmp/good_headers.txt"
|
||||||
|
# output_file = "tmp/final.csv"
|
||||||
|
|
||||||
# Step 1: Read the list of good headers
|
# Step 1: Read the list of good headers
|
||||||
good_headers = read_good_headers(good_headers_file)
|
good_headers = read_good_headers(headers_file)
|
||||||
|
|
||||||
# Step 2: Filter the CSV based on these headers and write to a new file
|
# Step 2: Filter the CSV based on these headers and write to a new file
|
||||||
filter_csv(input_file, output_file, good_headers)
|
filter_csv(input_csv, output_csv, good_headers)
|
||||||
|
|
||||||
print(f"Filtered CSV has been written to {output_file}")
|
print(f"Filtered CSV has been written to {output_csv}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
Loading…
Reference in a new issue