Skip to content

Instantly share code, notes, and snippets.

View Gabryxx7's full-sized avatar
:electron:

Gabriele Marini, Ph.D Gabryxx7

:electron:
View GitHub Profile
@Gabryxx7
Gabryxx7 / dblp_data.py
Created September 21, 2020 02:12
Extract DBLP data to a formatted yml file
import dblplib as dblp
import oyaml as yaml
def getAuthor(name):
ret = name
if 'arini' in name:
ret = '<ins><strong>'+str(name)+'</strong></ins>'
return ''.join(i for i in ret if not i.isdigit()).strip() # removing numbers and trimming spaces
#do a simple author search for michael ley
authors = dblp.search('Gabriele Marini')
@Gabryxx7
Gabryxx7 / github_readmes.py
Created September 21, 2020 02:11
Download all GitHub repos README.md with embedded images
from github import Github
import markdown
from markdown.treeprocessors import Treeprocessor
from markdown.extensions import Extension
import requests
import oyaml
import re
from xml.etree import ElementTree
import os
from urllib.parse import urlparse
@Gabryxx7
Gabryxx7 / instagram-to-yml.py
Last active February 1, 2022 13:36
Format all instagram photos metadata from an Instagram data export
import json
import os
from datetime import datetime
import oyaml as yaml
from geopy.geocoders import Nominatim
from geopy.adapters import AioHTTPAdapter
import requests #to make TMDB API calls
import urllib.parse
class InstaPhoto:
@Gabryxx7
Gabryxx7 / canvas_groups.js
Created September 21, 2020 02:08
Extra groups from Canvas LMS (run in chrome console on canvas groups page)
var count = 0;
var data =""
function download(filename, text) {
var element = document.createElement('a');
element.setAttribute('href', 'data:text/csv;charset=utf8,' + encodeURIComponent(text));
element.setAttribute('download', filename);
element.style.display = 'none';
document.body.appendChild(element);
element.click();
document.body.removeChild(element);
@Gabryxx7
Gabryxx7 / repos_downloader.py
Created September 21, 2020 01:58
Project Repository Downloader
import os
import re
import oyaml
from git import Repo
from github import Github
from datetime import datetime, timedelta
URL_REGEX = r"""(?i)\b((?:https?:(?:/{1,3}|[a-z0-9%])|[a-z0-9.\-]+[.](?:com|net|org|edu|gov|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|name|post|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|Ja|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg
@Gabryxx7
Gabryxx7 / plant_data_scraping.py
Created August 30, 2020 01:14
Scraping plants cvalues from a Plant DNA C-values Database
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from bs4 import BeautifulSoup
import re
import pandas as pd
import os
import time
import csv