Skip to content

Instantly share code, notes, and snippets.

View lcjohnso's full-sized avatar

Cliff Johnson lcjohnso

View GitHub Profile
@lcjohnso
lcjohnso / ga_session_sum.py
Created May 14, 2025 18:38
Combine session counts from Google Analytics across multiple workflows (via Pandas groupby)
# Script: Sum GA session count by country (for case of multiple workflows, etc)
import pandas as pd
datasets = ['GA_ElephantID_AllTime', 'GA_ElephantID_2024q2']
for dataset in datasets:
d = pd.read_csv(dataset+'.csv')
out = d.groupby('Country')['Sessions'].sum().sort_values(ascending=False)
out.to_csv(dataset+'_sum.csv')
@lcjohnso
lcjohnso / unique_users.py
Created May 14, 2025 18:29
Compute number of unique users (logged in and logged out via IP hash) from Zooniverse classification export.
# Unique Users (Brooke's method)
import numpy as np # using 1.10.1
import pandas as pd # using 0.13.1
import json
########################################
# INPUT PARAMETERS
from panoptes_client import Panoptes,Caesar,Workflow
# Login: interactive - you will be prompted for username and password
Panoptes.connect(login='interactive')
# Login Alternative 1: set PANOPTES_USERNAME and PANOPTES_PASSWORD env params
# Panoptes.connect()
# Login Alternative 2: input username and password in connect() call
# Panoptes.connect(username='example', password='example')
@lcjohnso
lcjohnso / jwst_ero_s3download_fits.sh
Created July 13, 2022 18:10
JWST AWS Bulk Download Scripts: ERO Imaging Programs - FITS and JPG of combined mosaics *only*
#!/bin/sh
#
#
# This script uses the command line tool `curl` to query
# STScI MAST on the cloud for JWST ERO data.
#
# The products you will download with this script are:
# Level 3 i2d FITS mosaics from all imaging ERO programs (2731, 2732, 2733, 2736)
# stored in your current directory the same way they are stored in the S3 bucket.
@lcjohnso
lcjohnso / users_stats_viaAPI.py
Created March 14, 2022 21:40
Zooniverse: individual user stats via zoo-event-stats API
# Goal: Query user-specific classification counts for list of user_ids via
# zoo-event-stats API (https://github.com/zooniverse/zoo-event-stats)
# Example Query URL: https://stats.zooniverse.org/counts/classification/year?user_id=999&project_id=11440,8900,9863
# Explanation: counts/classification/<time bin>?user_id=<user id>&project_id=<one or multiple users (comma separated)>
# This specific use case: query user classification counts across 33 NestQuestGo projects over all time
# INPUT = users.csv - CSV output from DB query with `user_id` field
# OUTPUT = add a new `n_class` column to user.csv and save as user_nclass.csv
@lcjohnso
lcjohnso / subject_set_remediation.py
Last active April 3, 2020 17:27
Zooniverse: remediation of interloper subjects in subject set
# Purpose: remove interloper subjects (where project_id for subject and subject_set do not match) from subject set
from panoptes_client import Panoptes, SubjectSet, Subject
from getpass import getpass
# Client Connect: either use hardcoded version or allow prompt
#user = USERNAME
#pswd = PASSWORD
user = input('Zooniverse Username: ')
pswd = getpass('Zooniverse Password: ')
Panoptes.connect(username=user, password=pswd)