Skip to content

Instantly share code, notes, and snippets.

@camallen
camallen / test_file_mime_type.py
Created September 21, 2018 08:17
Python magic lib test file mime types
# https://github.com/ahupp/python-magic#usage
import magic, csv
file_paths = (
'480_CornellFeeders_20171024_0921_000.mp4',
'480_CornellFeeders_20171024_0921_000.mp4',
'480_CornellFeeders_20171024_0921_001.mp4',
'480_CornellFeeders_20171024_0921_002.mp4'
)
@camallen
camallen / convert_csv_to_geo_json.py
Created August 23, 2018 13:32
Convert csv RGB layer data to geojson points
import csv, json, pdb;
from geojson import Feature, FeatureCollection, Point
def convertBox2MidPoint(lower_lat, lower_lon, upper_lat, upper_lon):
delta_lon = abs(lower_lon - upper_lon) / 2
delta_lat = abs(lower_lat - upper_lat) / 2
mid_lon = lower_lon + delta_lon
mid_lat = lower_lat + delta_lat
# geojson is lon, lat ordering
return (mid_lon, mid_lat)
@camallen
camallen / extract_gz_subject_location_data.rb
Created July 26, 2018 10:12
Extract Galaxy Zoo Subject location data
def _iterate_cursor(collection: nil, query: { }, opts: { }, message: '')
opts.reverse_merge! timeout: false
index = 0
total = collection.find(query).count
message = "#{ message } Galaxy Zoo Subjects"
collection.find(query, opts) do |cursor|
while cursor.has_next?
index += 1
@camallen
camallen / postgres_index_sizes.sql
Created July 5, 2018 09:14
PG Table & Index size queries
SELECT
nspname AS schema_name,
relname AS index_name,
round(100 * pg_relation_size(indexrelid) / pg_relation_size(indrelid)) / 100 AS index_ratio,
pg_size_pretty(pg_relation_size(indexrelid)) AS index_size,
pg_size_pretty(pg_relation_size(indrelid)) AS table_size
FROM
pg_index I
@camallen
camallen / public_stream_data_format.json
Created April 23, 2018 10:23
Zooniverse public stream example data format
{
"classification_id": "103101552",
"project_id": "825",
"workflow_id": "2647",
"user_id": "6",
"subject_ids": [
"15686058"
],
"subject_urls": [
{
[
{
"userName": "zooniverse",
"repo": "panoptes"
},
{
"userName": "zooniverse",
"repo": "panoptes-front-end"
},
{
DIRS=(local_image_directory)
for dir_to_process in "${DIRS[@]}" ; do
echo "converting files in $dir_to_process"
cd $dir_to_process
# possibly speed up using parallels? https://unix.stackexchange.com/questions/320877/how-to-use-convert-and-xargs-together
OUT_PATH="../converted/${dir_to_process}"
# this is from another project but I manually tested conversion of images to determine the following values
# resize to max width @ 2048 (match other sites) and 80% quality to get under 1M / 900K
# run some manual tests to see what works for you, e.g.
@camallen
camallen / project_classifications_csv_dump_export.rb
Last active June 18, 2019 10:25
Manual classification csv exports for a panoptes project
# Manual csv classifications dump
# ensure the config/database.yml is configured to use the read replica database and not the production db.
#
# run via rails runner from the panoptes cmd line via
# rails r project_classifications_csv_dump_export.rb
require 'csv'
PROJECT_ID = 1
@camallen
camallen / find_database_relation_sized.sql
Created November 21, 2017 13:27
List top 10 table sizes and report the index usage
select relation, pg_size_pretty(total_size), pg_size_pretty(size), pg_size_pretty(total_size - size) as index_size from
(SELECT relname AS "relation", pg_total_relation_size(C.oid) AS "total_size", pg_relation_size(C.oid) AS "size"
FROM pg_class C LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace)
WHERE nspname NOT IN ('pg_catalog', 'information_schema')
ORDER BY pg_relation_size(C.oid) DESC
) as derived
LIMIT 10;
@camallen
camallen / emr_install_pandas.sh
Created June 21, 2017 09:52
Install pandas EMR bootstrap
#!/bin/bash
echo " Installing pandas"
echo "*****************************************"
sudo pip install pandas