Martín Anzorena martjanz

## gist.sh
#!/bin/sh
fd -t f -e jpg -x convert "{}" -quality 80% "low/{}"

## convert.sh
#!/bin/sh

# (Linux, Unix) Convert to mp3 at maximum VBR using ffmpeg, then delete source audio files
fd -t f -e aiff -x ffmpeg -i "{}" -qscale:a 0 "{.}.mp3" && fd -t f -e aiff -x rm {}
fd -t f -e flac -x ffmpeg -i "{}" -qscale:a 0 "{.}.mp3" && fd -t f -e flac -x rm {}
fd -t f -e m4a -x ffmpeg -i "{}" -qscale:a 0 "{.}.mp3" && fd -t f -e m4a -x rm {}
fd -t f -e wav -x ffmpeg -i "{}" -qscale:a 0 "{.}.mp3" && fd -t f -e wav -x rm {}
fd -t f -e wma -x ffmpeg -i "{}" -qscale:a 0 "{.}.mp3" && fd -t f -e wma -x rm {}

# (Mac) Convert bmp to jpeg

## deletuips.py
#!/bin/python3
#
# Requirements:
#     - Python 3
#     - tweepy Python library (pip install tweepy)
#     - Twitter API keys and secrets (from https://developer.twitter.com/)
#
# Instructions:
#     - Request and download your Twitter archive (from https://twitter.com/settings/your_twitter_data)
#     - Rename downloaded zip file to twitter-archive.zip

## channel-downloader.py
# YouTube Channel Downloader
#
# Download all videos from all user/channel playlists
#
# TODO: check pagination. Tested with up to 10 playlists and up to 50 videos each.
import json
import re
import time
import traceback
from urllib.request import urlopen

## playlist-downloader.py
# -- Requirements (Python 3)--
import re
import time
import traceback

# External dependency: pytube3 (pip install pytube3)
from pytube import Playlist
from pytube import YouTube

# -- Parameters --

## downloader.py
"""
Heinrich - Sanguinetti Archive photo downloader

Downloads the photo archive from the Endangered Archives Programme of the British Public Library.

This script assumes that photos are numbered consecutively inside and between folders. There are
some (few) exceptions, a manual review after the run to check if all files were downloaded will be needed.

Sample image URL: http://images.eap.bl.uk/EAP755/EAP755_1_1_295/2987.jp2/full/1287,/0/default.jpg
"""

## generator.py
words_male = list('004' + f'{n:07}' for n in range(99999999))
words_female = list('014' + f'{n:07}' for n in range(99999999))

with open('words.txt', 'w') as fhandler:
  for item in (words_male + words_female):
    	fhandler.write('%s\n' % item)

## snippet.sql
CREATE ROLE downloader NOSUPERUSER NOCREATEDB NOCREATEROLE NOINHERIT LOGIN PASSWORD 'the-password';
GRANT CONNECT ON DATABASE db_name TO downloader;
GRANT ALL ON SCHEMA public TO downloader;
GRANT SELECT ON ALL TABLES IN SCHEMA public TO downloader;
GRANT SELECT ON ALL SEQUENCES IN SCHEMA public TO downloader;

ALTER DEFAULT PRIVILEGES IN SCHEMA public
	GRANT SELECT ON TABLES TO downloader;

## create_table.sql
CREATE TABLE dict_english (
	word varchar NOT NULL
);

CREATE UNIQUE INDEX dict_english_word_idx ON dict_english (word);

## README.md

      
        
          
            
              
              1 file
            
          
          
            
              
              0 forks
            
          
          
            
              
              0 comments
            
          
          
            
              
              0 stars
            
          
        
        
          
              
          
          
            
                martjanz
                / README.md
            
            
              Last active
              August 16, 2019 20:07
            
              
                Geocoding Establecimientos Electorales 2019
              
          
        
      
        
  
      
    Tenemos:

polígonos de circuitos (DINE)
establecimientos por circuito (DINE)
puntos de establecimientos (Educación)

Si bien no tenemos puntos de establecimientos DINE sabemos (suponemos) que están dentro del circuito. Con los puntos de establecimientos Educación, que suponemos también están dentro del circuito (ST_Contains()) nos deja deja no más de diez/veinte establecimientos de cada lado para matchear. Con un fuzzy sobre el nombre podemos resolver varios de ellos, alivianando el fuzzy match hasta encontrar algo más o menos óptimo. Los resultados de los matches pueden ir guardándose en distintas tablas, uno por cada grado de fuzzy match para luego revisar manualmente y ver cuál se ajusta mejor. Estimo que al ser una muy acotada cantidad de establecimientos por circuito podremos matchear incluso con un fuzzy muy bajo, solo necesitamos que los nombres sean remotamente parecidos o al menos el más parecido dentro de los que disponemos para ese circuito.
Hay circuitos que parecieran haber cambiado de nume
	#!/bin/sh
	fd -t f -e jpg -x convert "{}" -quality 80% "low/{}"
	#!/bin/sh

	# (Linux, Unix) Convert to mp3 at maximum VBR using ffmpeg, then delete source audio files
	fd -t f -e aiff -x ffmpeg -i "{}" -qscale:a 0 "{.}.mp3" && fd -t f -e aiff -x rm {}
	fd -t f -e flac -x ffmpeg -i "{}" -qscale:a 0 "{.}.mp3" && fd -t f -e flac -x rm {}
	fd -t f -e m4a -x ffmpeg -i "{}" -qscale:a 0 "{.}.mp3" && fd -t f -e m4a -x rm {}
	fd -t f -e wav -x ffmpeg -i "{}" -qscale:a 0 "{.}.mp3" && fd -t f -e wav -x rm {}
	fd -t f -e wma -x ffmpeg -i "{}" -qscale:a 0 "{.}.mp3" && fd -t f -e wma -x rm {}

	# (Mac) Convert bmp to jpeg
	#!/bin/python3
	#
	# Requirements:
	# - Python 3
	# - tweepy Python library (pip install tweepy)
	# - Twitter API keys and secrets (from https://developer.twitter.com/)
	#
	# Instructions:
	# - Request and download your Twitter archive (from https://twitter.com/settings/your_twitter_data)
	# - Rename downloaded zip file to twitter-archive.zip
	# YouTube Channel Downloader
	#
	# Download all videos from all user/channel playlists
	#
	# TODO: check pagination. Tested with up to 10 playlists and up to 50 videos each.
	import json
	import re
	import time
	import traceback
	from urllib.request import urlopen
	# -- Requirements (Python 3)--
	import re
	import time
	import traceback

	# External dependency: pytube3 (pip install pytube3)
	from pytube import Playlist
	from pytube import YouTube

	# -- Parameters --
	"""
	Heinrich - Sanguinetti Archive photo downloader

	Downloads the photo archive from the Endangered Archives Programme of the British Public Library.

	This script assumes that photos are numbered consecutively inside and between folders. There are
	some (few) exceptions, a manual review after the run to check if all files were downloaded will be needed.

	Sample image URL: http://images.eap.bl.uk/EAP755/EAP755_1_1_295/2987.jp2/full/1287,/0/default.jpg
	"""
	words_male = list('004' + f'{n:07}' for n in range(99999999))
	words_female = list('014' + f'{n:07}' for n in range(99999999))

	with open('words.txt', 'w') as fhandler:
	for item in (words_male + words_female):
	fhandler.write('%s\n' % item)
	CREATE ROLE downloader NOSUPERUSER NOCREATEDB NOCREATEROLE NOINHERIT LOGIN PASSWORD 'the-password';
	GRANT CONNECT ON DATABASE db_name TO downloader;
	GRANT ALL ON SCHEMA public TO downloader;
	GRANT SELECT ON ALL TABLES IN SCHEMA public TO downloader;
	GRANT SELECT ON ALL SEQUENCES IN SCHEMA public TO downloader;

	ALTER DEFAULT PRIVILEGES IN SCHEMA public
	GRANT SELECT ON TABLES TO downloader;
	CREATE TABLE dict_english (
	word varchar NOT NULL
	);

	CREATE UNIQUE INDEX dict_english_word_idx ON dict_english (word);