Skip to content

Instantly share code, notes, and snippets.

View eseiver's full-sized avatar

E. Seiver eseiver

View GitHub Profile
@eseiver
eseiver / vod.py
Last active February 10, 2023 02:10
Pywikibot script for updating criticalrole.miraheze.org when a new VOD is released on YouTube
View vod.py
#!/usr/bin/python3
import sys
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
try:
TIMEZONE = ZoneInfo("America/Los_Angeles")
except ZoneInfoNotFoundError:
print("\nTimezone info not found. Please run `pip install tzdata` and try again.\n", file=sys.stderr)
sys.exit()
"""
View Getting editor info on PLOS articles.ipynb
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@eseiver
eseiver / plos_dois.py
Last active November 14, 2017 19:23
How to get every PLOS article DOI using PLOS's search API
View plos_dois.py
""" This small script will return a list of every PLOS article DOI.
It uses regular expressions to return only DOIs to full articles.
For more information on text and data-mining PLOS content, see http://api.plos.org/text-and-data-mining/
"""
import requests
def get_all_solr_dois():
"""
Get every article published by PLOS, up to 500,000, as indexed by Solr on api.plos.org.
URL includes regex to exclude sub-DOIs and image DOIs.
@eseiver
eseiver / file_rename.py
Last active October 18, 2017 23:02
For renaming annotation articles in existing PLOS XML corpus directory
View file_rename.py
"""
This renames annotation article files in an existing local copy of corpusdir to fit the new naming scheme.
See https://github.com/PLOS/allofplos/issues/28
"""
import os
import re
from plos_corpus import listdir_nohidden, corpusdir
from plos_regex import validate_file
@eseiver
eseiver / plos_pmc.py
Last active August 28, 2017 22:03
Get all PLOS PMCIDs from PMC using Entrez
View plos_pmc.py
import requests
import time
def get_all_pmc_dois(retstart=0, retmax=80000, count=None):
"""Query the entrez database to get a comprehensive list of all PMCIDs associated with all PLOS journals,
individually included in the search url.
See https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch for more info on search parameters
:return: the full list of PMCIDs in PMC for PLOS articles
"""
pmc_allplos_query_url = ('https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pmc&term='
@eseiver
eseiver / string_diff.py
Created August 22, 2017 20:44
Check the difference between two strings
View string_diff.py
# With thanks to https://stackoverflow.com/questions/17904097/python-difference-between-two-strings
# Describes difflib-detected differences in easy-to-understand English
import difflib
cases = [('apple', 'apples'), ('banaana', 'bananas')]
for a, b in cases:
print('{} => {}'.format(a, b))
for i, s in enumerate(difflib.ndiff(a, b)):
@eseiver
eseiver / gdrive_download.py
Created August 11, 2017 02:37
Functions for downloading files from Google Drive
View gdrive_download.py
# With thanks to https://stackoverflow.com/questions/42190691/python-how-to-find-google-drive-filetype & @egh
import requests
from tqdm import tqdm
def download_file_from_google_drive(id, destination):
""" General method for downloading from Google Drive.
Doesn't require using API or having credentials
:param id: Google Drive id for file (constant even if filename changes)
:param destination: directory where to download the file
@eseiver
eseiver / Itunes_timecode.applescript
Last active August 2, 2017 03:06
An applescript that returns the timecode position of the current iTunes track. Handy if transcribing or taking notes on an audio file!
View Itunes_timecode.applescript
tell application "iTunes"
set elapsed_seconds to player position
set this_elapsed_minutes to round elapsed_seconds / 60 rounding down
set N to result
if N < 10 then
set N to 0 & N
end if
set this_elapsed_seconds to elapsed_seconds mod 60
if {1, 2, 3, 4, 5, 6, 7, 8, 9, 0} contains this_elapsed_seconds then
set this_elapsed_seconds to "0" & (this_elapsed_seconds as string)