Skip to content

Instantly share code, notes, and snippets.

View larssono's full-sized avatar

Larsson Omberg larssono

View GitHub Profile
@larssono
larssono / scrape_diseases.py
Created April 29, 2021 06:58
Scrape diseases and conditions
from bs4 import BeautifulSoup
from urllib.request import urlopen, HTTPError
import string
import time
import random
url = 'https://www.mayoclinic.org/diseases-conditions/index?letter=%s'
output = 'diseases.txt'
fp = open(output,'w')
import synapseclient
import synapsebridgehelpers as sb
FILTER_QUERY = "SELECT distinct healthCode FROM syn7841519 where substudyMemberships like '%Cirrhosis_pilot%'"
ORIG_PROJECT = 'syn7838471'
DESTINATION_PROJECT = 'syn18589808'
TABLE_MAPPING = {'syn7841519': 'syn18632063'}
syn = synapseclient.login()
import pandas as pd
import bridgeclient
import synapseclient
from multiprocessing.dummy import Pool
from synapseutils.monitor import with_progress_bar
mp = Pool(4)
bridge = bridgeclient.bridgeConnector(<email>, <password>, study='journey-pro')
syn = synapseclient.login()
@larssono
larssono / determineMD5.py
Created June 1, 2017 15:27
Get MD5 for a large number of files in Synapse
import synapseclient
import pandas as pd
from multiprocessing.dummy import Pool
def with_progress_bar(func, totalCalls, prefix = '', postfix='', isBytes=False):
"""Adds a progress bar to calls to func
:param func: Function being wrapped with progress Bar
:param totalCalls: total number of items/bytes when completed
:param prefix: String printed before progress bar
import synapseclient
syn=synapseclient.Synapse()
syn.login()
def determineNewSampleCount():
"""
"""
table = syn.tableQuery('SELECT * FROM syn3281840')
df = table.asDataFrame()
import sys
import synapseclient
syn=synapseclient.Synapse()
syn.login(silent=True)
md5 = sys.argv[1]
files = syn.restGET('/entity/md5/%s' % md5)
for f in files['results']:
import pandas as pd
import json
graph = json.load(open('graph.json'))
df = pd.DataFrame(graph['vertices'])
df.to_csv('vertices.csv')

##Load graph into memory g = new TinkerGraph() g.loadGraphSON('/Users/lom/Downloads/graph.json')

##Find all activities g.V('concreteType', 'activity').name nActivities = g.V('concreteType', 'activity').count()

@larssono
larssono / getGitPermalink.py
Created May 16, 2016 22:27 — forked from kdaily/getGitPermalink.py
Get permalink to git commit for a specific path.
ids = [i['file.id'] for i in syn.chunkedQuery('select id from file where parentId=="syn5521815"')]
for i in ids:
e = syn.get(i, downloadFile=False)
e.properties.fileNameOverride = e.name
syn.store(e, forceVersion=False)