Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Parses one or more XLS files, each containing arbitrary number of sheets, and filters all rows for a pattern
#!/usr/bin/python3
#-*- coding: utf-8 -*-
"""
Parses one or more XLS files, each containing arbitrary number of sheets, and filters all rows for a
pattern as determined by filter_lines=... below. Prints matching names.
Typical invocation
python3 ./xlsfilter.py *201{6,7,8}.{1,2}.xlsx | vi -
Supposed to help find interesting spectroscopic data on a shared network drive. Can be adapted for
any similar task in the future.
"""
## Import common moduli
import sys, os, time, re
from pathlib import Path
import pandas as pd
import collections as co
matching_names, matching_dates, matching_xls = [], [], []
errors = []
duplicities = []
for arg in sys.argv[1:]:
#print('loading file {:}', arg);
sheet_to_df_map = co.OrderedDict()
xls = pd.ExcelFile(arg)
skipped_sheet_names = []
for sheet_name in xls.sheet_names:
if re.search('\d\d\d\d', sheet_name):
sheet_to_df_map[sheet_name] = xls.parse(sheet_name)
else:
skipped_sheet_names += [sheet_name]
#print('Note: in file "{:}" parsed sheets: {:} '.format(arg, sheet_to_df_map.keys()))
#print('Note: in file "{:}" ignored sheets: {:} '.format(arg, skipped_sheet_names))
for key,df in list(sheet_to_df_map.items())[::-1]:
try:
filter_lines = (df['Filtr (%)']==0.1) & (df['Acqt. (s)']==1) & (df['x']==-25000)
new_names = list(df[filter_lines]['Name'].values)
for nn in new_names:
if nn in matching_names: duplicities.append(nn)
matching_names += new_names
matching_dates += [key[:]] * len(new_names)
matching_xls += [arg[:]] * len(new_names)
except KeyError as e:
errors.append("ERROR: could not parse or filter {:} in {:}:\n\t\t {:}".format(key,arg,e))
#print(e)
matching_triplets = [n+'\t'+d+'\t'+a for (n,d,a) in zip(matching_names, matching_dates, matching_xls)]
#matching_triplets.sort()
for mp in matching_triplets:
print(mp)
for err in errors:
print(err)
if duplicities:
print('WARNING: found {:} duplicities in above printed names: {:}'.format(len(duplicities),duplicities))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment