Skip to content

Instantly share code, notes, and snippets.

@lazear
Created May 18, 2023 15:26
Show Gist options
  • Save lazear/fbdcd2de7b4d6e9505acac8acb087263 to your computer and use it in GitHub Desktop.
Save lazear/fbdcd2de7b4d6e9505acac8acb087263 to your computer and use it in GitHub Desktop.
import requests
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
URL = 'https://app.dimensions.ai/viz/data/publication/timeline-source-published.json?search_mode=content&search_text="{engine}" AND "proteomics"&search_type=kws&search_field=full_search'
def get(engine: str):
data = requests.get(URL.format(engine=engine)).json()
pubs = np.array(data['data']).flatten()
years = [x['id'] for x in data['dimensions'][1]['items']]
print(pubs)
print(years)
df = pd.DataFrame(data=pubs, index=years)
df['engine'] = engine
return df
free = [
"Comet",
"Morpheus",
"X!Tandem",
"OMSSA",
"MyriMatch",
"IdentiPy"
]
prop = [
"MaxQuant",
"Proteome Discoverer",
"Mascot",
"Sequest",
"ProteinPilot",
"Byonic",
"MSFragger",
"PEAKS DB",
"MS-GF",
"MS Amanda",
"Spectrum Mill",
"pFind",
"CHIMERYS"
]
dfs = []
for engine in free:
df = get(engine)
df["free"] = True
dfs.append(df)
for engine in prop:
df = get(engine)
df["free"] = False
dfs.append(df)
df = pd.concat(dfs).reset_index().rename({0: 'pubs', 'index': 'year'},axis=1)
df = df[df.year < 2023]
fig, (ax0, ax1) = plt.subplots(nrows=1,ncols=2)
t = df.pivot_table(index='year', columns='engine', values='pubs').T.sort_values(by=2022, ascending=False)
sns.heatmap(t, annot=True, fmt='d', norm=LogNorm(), cmap='mako', ax=ax0)
p = df.pivot_table(index='year', columns='free', values='pubs', aggfunc='sum')
ax1.plot(p.index, p[False], label='Closed source or non-free')
ax1.plot(p.index, p[True], label='Open source and free')
ax1.legend()
fig.suptitle("Publications mentioning DDA search engine (dimensions.ai)")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment