Skip to content

Instantly share code, notes, and snippets.

@oshikrororo
Created April 10, 2023 10:54
Show Gist options
  • Save oshikrororo/48d6bd3f4a8cf3b3aa8a2ae716762414 to your computer and use it in GitHub Desktop.
Save oshikrororo/48d6bd3f4a8cf3b3aa8a2ae716762414 to your computer and use it in GitHub Desktop.
import gzip
def proteome_to_dict(proteome_lines):
proteome_lines.append('nnIDnnn')
protein = ''
name = ''
proteome = {}
for line in proteome_lines:
r = str(line)[2: -3] + '\n'
if r.startswith("ID"):
if protein != '':
proteome[name] = protein
protein = ''
if r != 'ID\n':
name = r.split()[1]
protein += r
return proteome
def proteins_in_both(proteome1, proteome2):
name = ''
for protein in proteome2.keys():
name = protein.split('_')[1]
break
for protein in proteome1.keys():
try:
if proteome2[f'{protein.split("_")[0]}_{name}']:
print(protein)
except KeyError:
continue
return
with gzip.open('ecoli_reviewed_proteome.gz') as file:
ecoli = file.readlines()
ecoli_proteome = proteome_to_dict(ecoli)
with gzip.open('bacsu_reviewed_proteome.gz') as file:
bacsu = file.readlines()
bacsu_proteome = proteome_to_dict(bacsu)
proteins_in_both(ecoli_proteome, bacsu_proteome)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment