Campbell Allen camallen

## test_file_mime_type.py
# https://github.com/ahupp/python-magic#usage

import magic, csv
file_paths = (
    '480_CornellFeeders_20171024_0921_000.mp4',
    '480_CornellFeeders_20171024_0921_000.mp4',
    '480_CornellFeeders_20171024_0921_001.mp4',
    '480_CornellFeeders_20171024_0921_002.mp4'
)

## convert_csv_to_geo_json.py
import csv, json, pdb;
from geojson import Feature, FeatureCollection, Point

def convertBox2MidPoint(lower_lat, lower_lon, upper_lat, upper_lon):
    delta_lon = abs(lower_lon - upper_lon) / 2
    delta_lat = abs(lower_lat - upper_lat) / 2
    mid_lon = lower_lon + delta_lon
    mid_lat = lower_lat + delta_lat
    # geojson is lon, lat ordering
    return (mid_lon, mid_lat)

## extract_gz_subject_location_data.rb
def _iterate_cursor(collection: nil, query: { }, opts: { }, message: '')
  opts.reverse_merge! timeout: false
  index = 0
  total = collection.find(query).count
  message = "#{ message } Galaxy Zoo Subjects"

  collection.find(query, opts) do |cursor|
    while cursor.has_next?
      index += 1

## postgres_index_sizes.sql
SELECT
nspname AS schema_name,
relname AS index_name,
round(100 * pg_relation_size(indexrelid) / pg_relation_size(indrelid)) / 100 AS index_ratio,
pg_size_pretty(pg_relation_size(indexrelid)) AS index_size,
pg_size_pretty(pg_relation_size(indrelid)) AS table_size

FROM
pg_index I

## public_stream_data_format.json
{
  "classification_id": "103101552",
  "project_id": "825",
  "workflow_id": "2647",
  "user_id": "6",
  "subject_ids": [
    "15686058"
  ],
  "subject_urls": [
    {

## fourth_wall_pr_repos.json
[
  {
    "userName": "zooniverse",
    "repo": "panoptes"
  },
  {
    "userName": "zooniverse",
    "repo": "panoptes-front-end"
  },
  {

## convert_images.sh
DIRS=(local_image_directory)
for dir_to_process in "${DIRS[@]}" ; do
  echo "converting files in $dir_to_process"
  cd $dir_to_process
  # possibly speed up using parallels? https://unix.stackexchange.com/questions/320877/how-to-use-convert-and-xargs-together
  OUT_PATH="../converted/${dir_to_process}"

  # this is from another project but I manually tested conversion of images to determine the following values
  # resize to max width @ 2048 (match other sites) and 80% quality to get under 1M / 900K
  # run some manual tests to see what works for you, e.g.

## project_classifications_csv_dump_export.rb
# Manual csv classifications dump
# ensure the config/database.yml is configured to use the read replica database and not the production db.
#
# run via rails runner from the panoptes cmd line via
# rails r project_classifications_csv_dump_export.rb

require 'csv'

PROJECT_ID = 1

## find_database_relation_sized.sql
select relation, pg_size_pretty(total_size), pg_size_pretty(size), pg_size_pretty(total_size - size) as index_size from
(SELECT relname AS "relation", pg_total_relation_size(C.oid) AS "total_size", pg_relation_size(C.oid) AS "size"
  FROM pg_class C LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace)
  WHERE nspname NOT IN ('pg_catalog', 'information_schema')
  ORDER BY pg_relation_size(C.oid) DESC
 ) as derived
 LIMIT 10;

## emr_install_pandas.sh
#!/bin/bash
echo "  Installing pandas"
echo "*****************************************"
sudo pip install pandas
	# https://github.com/ahupp/python-magic#usage

	import magic, csv
	file_paths = (
	'480_CornellFeeders_20171024_0921_000.mp4',
	'480_CornellFeeders_20171024_0921_000.mp4',
	'480_CornellFeeders_20171024_0921_001.mp4',
	'480_CornellFeeders_20171024_0921_002.mp4'
	)
	import csv, json, pdb;
	from geojson import Feature, FeatureCollection, Point

	def convertBox2MidPoint(lower_lat, lower_lon, upper_lat, upper_lon):
	delta_lon = abs(lower_lon - upper_lon) / 2
	delta_lat = abs(lower_lat - upper_lat) / 2
	mid_lon = lower_lon + delta_lon
	mid_lat = lower_lat + delta_lat
	# geojson is lon, lat ordering
	return (mid_lon, mid_lat)
	def _iterate_cursor(collection: nil, query: { }, opts: { }, message: '')
	opts.reverse_merge! timeout: false
	index = 0
	total = collection.find(query).count
	message = "#{ message } Galaxy Zoo Subjects"

	collection.find(query, opts) do \|cursor\|
	while cursor.has_next?
	index += 1
	SELECT
	nspname AS schema_name,
	relname AS index_name,
	round(100 * pg_relation_size(indexrelid) / pg_relation_size(indrelid)) / 100 AS index_ratio,
	pg_size_pretty(pg_relation_size(indexrelid)) AS index_size,
	pg_size_pretty(pg_relation_size(indrelid)) AS table_size

	FROM
	pg_index I
	{
	"classification_id": "103101552",
	"project_id": "825",
	"workflow_id": "2647",
	"user_id": "6",
	"subject_ids": [
	"15686058"
	],
	"subject_urls": [
	{
	[
	{
	"userName": "zooniverse",
	"repo": "panoptes"
	},
	{
	"userName": "zooniverse",
	"repo": "panoptes-front-end"
	},
	{
	DIRS=(local_image_directory)
	for dir_to_process in "${DIRS[@]}" ; do
	echo "converting files in $dir_to_process"
	cd $dir_to_process
	# possibly speed up using parallels? https://unix.stackexchange.com/questions/320877/how-to-use-convert-and-xargs-together
	OUT_PATH="../converted/${dir_to_process}"

	# this is from another project but I manually tested conversion of images to determine the following values
	# resize to max width @ 2048 (match other sites) and 80% quality to get under 1M / 900K
	# run some manual tests to see what works for you, e.g.
	# Manual csv classifications dump
	# ensure the config/database.yml is configured to use the read replica database and not the production db.
	#
	# run via rails runner from the panoptes cmd line via
	# rails r project_classifications_csv_dump_export.rb

	require 'csv'

	PROJECT_ID = 1
	select relation, pg_size_pretty(total_size), pg_size_pretty(size), pg_size_pretty(total_size - size) as index_size from
	(SELECT relname AS "relation", pg_total_relation_size(C.oid) AS "total_size", pg_relation_size(C.oid) AS "size"
	FROM pg_class C LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace)
	WHERE nspname NOT IN ('pg_catalog', 'information_schema')
	ORDER BY pg_relation_size(C.oid) DESC
	) as derived
	LIMIT 10;
	#!/bin/bash
	echo " Installing pandas"
	echo "*****************************************"
	sudo pip install pandas