Skip to content

Instantly share code, notes, and snippets.

@yjzhang
yjzhang / neo4j_cypher_cheatsheet.md
Created October 19, 2022 01:23 — forked from DaniSancas/neo4j_cypher_cheatsheet.md
Neo4j's Cypher queries cheatsheet

Neo4j Tutorial

Fundamentals

Store any kind of data using the following graph concepts:

  • Node: Graph data records
  • Relationship: Connect nodes (has direction and a type)
  • Property: Stores data in key-value pair in nodes and relationships
  • Label: Groups nodes and relationships (optional)
# https://ftp.ncbi.nlm.nih.gov/pubmed/baseline/
import requests
from selectolax.parser import HTMLParser
base_url = 'https://ftp.ncbi.nlm.nih.gov/pubmed/baseline/'
r = requests.get(base_url)
tree = HTMLParser(r.content)
for node in tree.css('a'):
@yjzhang
yjzhang / node2vec.py
Created September 29, 2022 21:26
very basic node2vec implementation
import functools
#from numba import jit
import numpy as np
from scipy import sparse
def random_walks(adj_list, r, l, p=1, q=1, verbose=False):
"""
Biased random walk starting from node i.
@yjzhang
yjzhang / find_all_shortest_paths.py
Created September 29, 2022 21:23
Find all shortest paths between two nodes in a graph using BFS.
def find_all_shortest_paths(dic_node, n1, n2, step_threshold):
return_paths = []
visit_queue = [[n1]]
# this can be updated to get all shortest paths for all visited nodes
# for the shortest path from N1 to N2, all intermediate paths are also shortest paths between their respective nodes.
visited_nodes_prev = set()
visited_nodes = set()
cur_distance = 0
while len(visit_queue):
cur_path = visit_queue.pop(0)
@yjzhang
yjzhang / pagerank_sparse.py
Created September 15, 2022 02:02
Implementation of PageRank in Python using sparse matrices
# Using a sparse matrix imported from...
import numpy as np
from scipy import sparse, io
def pagerank(adjacency, probs=None, n_iters=20, resid=0.85, modify_matrix=True):
"""
Args:
adjacency - sparse matrix
import subprocess
import numpy as np
from scipy import sparse, io
threshold = 1000
folders = ['M7_5', 'M8_3', 'X5.3.4', 'X5_2', 'X6.1']
for f in folders:
print(f)
#!/usr/bin/env python3
import sys
import textstat
path = 'scripted_test_outputs/000_output.txt'
if len(sys.argv) > 1:
path = sys.argv[1]
import os
import subprocess
filenames = os.listdir('.')
for filename in filenames:
name = filename.split('_')[0]
if not os.path.exists(name):
os.makedirs(name)
os.rename(filename, os.path.join(name, filename))
if filename.endswith('zip'):
# makes an svg bingo board
import svgwrite
def text_to_font_size(text):
if isinstance(text, str):
total_text = text
else:
total_text = ''.join(text)
if len(total_text) < 10:
return 18
@yjzhang
yjzhang / cocluster_heatmap.py
Created June 13, 2019 22:56
spectral coclustering heatmap
import numpy as np
from sklearn.cluster.bicluster import SpectralCoclustering
spec = SpectralCoclustering(18)
cluster_counts_subset = np.vstack([cluster_counts[:31, :], cluster_counts[32:,:]])
spec.fit(cluster_counts + 0.0001)
row_labels = spec.row_labels_
column_labels = spec.column_labels_
row_order = np.argsort(row_labels)