Coding

How Python helps outside of work: ods data to excel

I hate copying and pasting, so might as well automate it

9:43:24 PM Trying to complete my research paper.. …


The script below helped me with some research data for my Research Module (wish me good luck!)

If you have any question, do ask in the comment section. See you!

This allows for transforming my .ods file to .xlsx
//Note --> You will have to tweak this to your needs. This is a personalised solution

[sourcecode lang=”python”]
#USAGE python getdata.py

from pyexcel_ods import get_data
import os , re, json, openpyxl, logging, typing, sys

logging.basicConfig(level=logging.DEBUG, format="%(asctime)s – %(levelname)s – %(message)s")

def get_rows_xy(sheet: typing.List ):
”’
argument:
sheet : a multidimensional list
Gets all the xy data
returns : list
”’
start = False #when true, starts appending to the data list
data = []
for sub_list in sheet:
if len(sub_list) == 0:
break
if start:
data.append(sub_list[:2])
if "XYDATA" in sub_list: #searches for XYDATA string in a sublist
start = True
return comma_to_dot(data)

def comma_to_dot(rows: typing.List ):
”’
transform a German format number to US format e.g. 100, 36 to 100.36
arguments : rows -> list
returns: data -> transformed rows (all values are numbers with "," replaced with ".")
”’
data = []
pattern = re.compile(r"(\d+),(\d+)")
for sub_row in rows:
temp = []
for value in sub_row:
if type(value) == int: #my data was weird.
#u = float(value) / 1000
#u = str(u)
#print(str(value) + " = " + u)
value = float(value) / 1000
value = str(value)
update = re.sub(pattern.pattern, r"\1.\2", str(value))
update = float(update)
temp.append(update)
data.append(temp)
return data

def add_rows_to_sheet(rows: typing.List, sheet):
”’
Adds data to worksheet
arguments :
rows: list of data
sheet: worksheet
returns :
worksheet (updated with rows)
”’
for row in range(len( rows )):
sub_row = rows[row]
for col in range(len(sub_row) ):
sheet.cell(row=row + 1, column=col + 1, value= sub_row[col] )
return sheet

def ods_to_excel(filepath, target)->None:
”’
Generates an excel workbook with ods data
argument:
filepath –> a valid filepath to the directory containing ods documents
target -> a valid filepath to the directory for the generated excel sheets
returns:
None
”’
filename = os.path.basename(filepath)
filename = filename.split(".")[0]
data = get_data(filepath)
json_data = json.dumps(data)
json_data = json.loads(json_data)
workbook_path = os.path.join( target, filename + ".xlsx" )
if not os.path.exists(workbook_path):
wb = openpyxl.Workbook()
else:
wb = openpyxl.load_workbook(workbook_path)
for sheetname in json_data:
ws = wb.active
ws.title = sheetname[:21]
data_update = get_rows_xy(json_data[sheetname])
ws = add_rows_to_sheet(data_update, ws)
logging.info("Adding data to " + sheetname )
wb.save(workbook_path)
logging.info("Complete")

if __name__ == "__main__":
if len(sys.argv) == 2:
path = sys.argv[1]
filepath = os.path.join(path, filename)
if os.path.exists(filepath):
ods_to_excel(filepath)<span data-mce-type="bookmark" style="display: inline-block; width: 0px; overflow: hidden; line-height: 0;" class="mce_SELRES_start"></span>
[/sourcecode]

This on the other hand, allows me to transform all ods files in a directory and save the newly generated excel workbooks in a given target path

[sourcecode lang=”python”]

#USAGE : python pode.py <source path> <target path>
import getdata, os, sys

def task(source , target ):
for filename in os.listdir(source):
if(filename.endswith(".ods")):
filepath = os.path.join(source, filename)
getdata.ods_to_excel(filepath, target)

if __name__ == "__main__":
if len( sys.argv ) == 3:
source = os.path.abspath(sys.argv[1])
target = os.path.abspath(sys.argv[2])
if not os.path.exists(target):
os.mkdir(target)
if os.path.exists(source):
task(source, target)
else:
print("The source directory is invalid")
else:
print("Requires source directory and target directory")<span data-mce-type="bookmark" style="display: inline-block; width: 0px; overflow: hidden; line-height: 0;" class="mce_SELRES_start"></span>

[/sourcecode]

Newer Versions <–

 

error: Content is protected !!