You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
The idea was to convert the Freecam and Tool archive spreadsheet into a markdown file.
The code snippet:
import re
import os
import argparse
import pandas as pd
input_formats_supported = ["csv", "xlsx"]
output_formats_supported = ["md", "html"]
def read_file(file_name):
"""
This function reads a file and returns a pandas dataframe
"""
if not os.path.exists(file_name):
raise Exception('Input File does not exist.')
if file_name.endswith('.csv'):
return pd.read_csv(file_name)
elif file_name.endswith('.xlsx'):
return pd.read_excel(file_name)
else:
raise Exception('Input File type not supported')
def process_df(df):
"""
This function is written to parse and clean Nico's Freecam-Tools Spreadsheet. Change it to parse and clean your own data.
"""
#df = df.head(10)
# Replace NaN values with empty strings
df.fillna('', inplace=True)
# Replace new-line characters in each string in the columns with whitespaces
for col in df.columns:
df[col] = df[col].str.replace('\n', ' ', regex = True)
return df
def process_markdown_string(string):
"""
This function cleans the markdown string.
"""
# Removing unncessary hyphens used to create the headers
string = re.sub("-+", "-", string)
# Cleaning whitespaces except newline and carriage return
string = re.sub("[^\S\r\n]+", " ", string)
return string
def save_file(df, file_name):
"""
Saves the dataframe to a file
"""
if file_name.endswith('.md'):
string = df.to_markdown(index = False)
string = process_markdown_string(string)
elif file_name.endswith('.html'):
string = df.to_html(index = False, justify = 'center')
else:
raise Exception('Output File type not supported')
print("The final String.... \n\n" + string)
with open(file_name, "w", encoding="utf-8", errors="xmlcharrefreplace") as output_file:
output_file.write(string)
def read_and_convert(input_path, output_path):
"""
This function reads the input file, processes it and converts it to the output file.
"""
df = read_file(input_path)
df = process_df(df)
save_file(df, output_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("input_file", help = "Path of the input file to convert. Supported formats are: " + ", ".join(input_formats_supported))
parser.add_argument("output_file", help = "Path of the output file. Supported formats are: " + ", ".join(output_formats_supported))
args = parser.parse_args()
input_path = args.input_file
output_path = args.output_file
input_file_ext = input_path.split(".")[-1]
output_file_ext = output_path.split(".")[-1]
if input_file_ext not in input_formats_supported:
raise Exception("Input file format not supported. Only the following formats are supported: " + ", ".join(input_formats_supported))
if output_file_ext not in output_formats_supported:
raise Exception("Output file format not supported. Only the following formats are supported: " + ", ".join(output_formats_supported))
read_and_convert(input_path, output_path)
I didn't create a new PR since I wasn't sure where (or even if) the code should be placed in the repo.
The text was updated successfully, but these errors were encountered:
The idea was to convert the Freecam and Tool archive spreadsheet into a markdown file.
The code snippet:
I didn't create a new PR since I wasn't sure where (or even if) the code should be placed in the repo.
The text was updated successfully, but these errors were encountered: