Skip to content

Instantly share code, notes, and snippets.

View caseydm's full-sized avatar

Casey Meyer caseydm

View GitHub Profile
@caseydm
caseydm / no_affiliations.py
Created November 13, 2023 16:21
get openalex authors
import csv
from concurrent.futures import ThreadPoolExecutor, as_completed
import requests
import time
def safe_nested_get(dct, keys, default=None):
"""
Safely get a nested value from a dictionary.
import csv
from app import db
with open("publishers_image_thumbnail_urls.csv", "r") as f:
reader = csv.reader(f)
for row in reader:
publisher_id = int(row[0].replace("https://openalex.org/P", ""))
image_thumbnail_url = row[2]
db.session.execute('UPDATE mid.publisher SET image_thumbnail_url = :image_thumbnail_url WHERE publisher_id = :publisher_id', {"image_thumbnail_url": image_thumbnail_url, "publisher_id": publisher_id})
@caseydm
caseydm / stats.py
Created March 10, 2023 19:09
Parseland stats
import requests
def main():
result = {}
doi_count = 0
no_landing_page_in_s3 = 0
for page in range(1, 11):
url = f"https://api.openalex.org/works?page={page}&per-page=100&sample=1000&seed=23&filter=has_doi:true"
r1 = requests.get(url)
import grequests
def test_rate_limit_no_key():
# test url 20 times at once
# 10 of the responses are 429, rate limited
urls = []
for i in range(20):
urls.append(
"https://api.openalex.org/works?filter=doi:10.1016/j.jfca.2023.105165")
rs = (grequests.get(u) for u in urls)
@caseydm
caseydm / citation_count.py
Created July 1, 2022 20:09
OpenAlex citation counts
import requests
def get_citation_counts():
# sample author
author_id = "https://openalex.org/A2151238091"
# get up to 50 works for this author
r2 = requests.get(
f"https://api.openalex.org/works?filter=authorships.author.id:{author_id}"
@caseydm
caseydm / global_south.py
Last active June 5, 2022 20:09
global south stats from openalex api
import requests
from iso3166 import countries
GLOBAL_SOUTH_COUNTRIES = [
# source https://meta.wikimedia.org/wiki/List_of_countries_by_regional_classification
"Afghanistan",
"Algeria",
"American Samoa",
"Angola",
"Anguilla",
@caseydm
caseydm / app.py
Last active September 27, 2021 14:48
Flask API Pagination
from flask import Flask
from flask_marshmallow import Marshmallow
from flask_sqlalchemy import SQLAlchemy
from models import Magazine
from utils import build_link_header, validate_per_page
app = Flask(__name__)
db = SQLAlchemy(app)
ma = Marshmallow(app)
@caseydm
caseydm / employees.py (version 5)
Created June 29, 2020 01:31
Elasticsearch 5 to 6 upgrade
from decimal import Decimal
from django_elasticsearch_dsl import DocType, Index, fields
from record.templatetags.record_tags import currency
from .models import Employee, Jurisdiction
# Name of the Elasticsearch index
employee = Index('employees')
# See Elasticsearch Indices API reference for available settings
employee.settings(
number_of_shards=1,
@caseydm
caseydm / Dockerfile
Last active March 24, 2018 13:53
Docker local development
FROM python:2.7.14
ENV PYTHONUNBUFFERED 1
RUN mkdir /code
WORKDIR /code
ADD . /code/
ADD requirements /requirements
RUN pip install -r /requirements/local.txt
@caseydm
caseydm / zappa_deployer.json
Created July 6, 2017 13:36
Zappa IAM Policy
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"iam:GetRole",
"iam:PutRolePolicy"
],
"Resource": [