Last active
May 16, 2019 03:05
-
-
Save mr-eyes/02b3113102d45ba9bf3d35ae4bc20834 to your computer and use it in GitHub Desktop.
Merge multiple <key:value> tables files.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
if len(sys.argv) < 3: | |
sys.exit("run: python filter_table.py <file> <threshold>") | |
else: | |
file_path = sys.argv[1] | |
threshold = int(sys.argv[2]) | |
with open(file_path, 'r') as f: | |
print(next(f).strip()) | |
for line in f: | |
values = line.strip().split("\t")[2:] | |
values = list(map(int, values)) | |
_max = max(values) | |
if _max > threshold: | |
print(line.strip()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sqlite3 as sql | |
import glob | |
import sys | |
import os | |
class Merger: | |
DELIMITER = " " | |
def __init__(self, wildcart): | |
""" | |
create a database connection to a database that resides in the memory | |
""" | |
if len(wildcart) == 1: | |
if not os.path.isdir(wildcart[0]): | |
print(f"'{wildcart[0]}' directory is not found", file = sys.stderr) | |
sys.exit(1) | |
self.files_paths = sorted(glob.glob(wildcart[0] + "/*")) | |
else: | |
self.files_paths = wildcart | |
try: | |
self.conn = sql.connect(':memory:') | |
except sql.Error as e: | |
print(e) | |
exit() | |
self.conn.execute('''CREATE TABLE IF NOT EXISTS combined | |
(ID INTEGER PRIMARY KEY, | |
key TEXT DEFAULT "" NOT NULL UNIQUE);''') | |
for file in self.files_paths: | |
file = os.path.basename(file) | |
self.conn.execute(f"ALTER TABLE combined ADD COLUMN '{file}' INT DEFAULT 0 NOT NULL;") | |
self.conn.commit() | |
def parse_files(self): | |
for file in self.files_paths: | |
with open(file, 'r') as f: | |
for line in f: | |
line = line.strip().split(self.DELIMITER) | |
where = line[0] | |
value = line[1] | |
key = os.path.basename(file) | |
self.conn.execute(f"INSERT OR IGNORE INTO combined (key) values ('{where}');") | |
self.conn.execute(f"UPDATE combined SET '{key}'={value} WHERE key='{where}'") | |
self.conn.commit() | |
def export_to_tsv(self): | |
curs = self.conn.execute('select * from combined') | |
col_names = list(map(lambda x: x[0], curs.description)) | |
rows = curs.fetchall() # collect the data | |
print("\t".join(col_names)) | |
for row in rows: | |
row = list(map(str, row)) | |
print("\t".join(row)) | |
def main(wildcart): | |
parser = Merger(wildcart) | |
parser.DELIMITER = " " # Optional, delimiter is " " by default. | |
parser.parse_files() | |
parser.export_to_tsv() | |
if __name__ == "__main__": | |
if len(sys.argv) < 2 or sys.argv[1] in ["-h", "--help"]: | |
print(f"Examples:", file=sys.stderr) | |
print(f"python merge_tables.py <dir_path>|<wildcart_expression>", file=sys.stderr) | |
print(f"python merge_tables.py files", file=sys.stderr) | |
print(f"python merge_tables.py files/*txt", file=sys.stderr) | |
sys.exit(1) | |
elif len(sys.argv) == 2: | |
wildcart = [sys.argv[1]] | |
elif len(sys.argv) > 2: | |
wildcart = sys.argv[1:] | |
main(wildcart) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment