biggert

## gist:270c4d6a7144a78496f94143fa82580f
CREATE UNLOGGED TABLE exclude_test(id integer primary key);
INSERT INTO exclude_test(id) SELECT generate_series(1,50000);

CREATE UNLOGGED TABLE exclude AS SELECT x AS item FROM generate_series(1,40000,4) x;

-- Horrific AND list, takes 80s to plan and execute here:

EXPLAIN ANALYZE SELECT id FROM exclude_test WHERE id <> 1 AND id <> 5 AND id <> 9 AND id <> 13 AND id <> 17 AND id <> 21 AND id <> 25 AND id <> 29 AND id <> 33 AND id <> 37 AND id <> 41 AND id <> 45 AND id <> 49 AND id <> 53 AND id <> 57 AND id <> 61 AND id <> 65 AND id <> 69 AND id <> 73 AND id <> 77 AND id <> 81 AND id <> 85 AND id <> 89 AND id <> 93 AND id <> 97 AND id <> 101 AND id <> 105 AND id <> 109 AND id <> 113 AND id <> 117 AND id <> 121 AND id <> 125 AND id <> 129 AND id <> 133 AND id <> 137 AND id <> 141 AND id <> 145 AND id <> 149 AND id <> 153 AND id <> 157 AND id <> 161 AND id <> 165 AND id <> 169 AND id <> 173 AND id <> 177 AND id <> 181 AND id <> 185 AND id <> 189 AND id <> 193 AND id <> 197 AND id <> 201 AND id <> 205 AND id <>

## fetchall_athena.py
# Does NOT implement the PEP 249 spec, but the return type is suggested by the .fetchall function as specified here: https://www.python.org/dev/peps/pep-0249/#fetchall

import time
import boto3

# query_string: a SQL-like query that Athena will execute
# client: an Athena client created with boto3
def fetchall_athena(query_string, client):
    query_id = client.start_query_execution(
        QueryString=query_string,

## psycopg_load.md

      
              1 file
            
          
              0 forks
            
          
              1 comment
            
          
              16 stars
            
          
                revbucket
                / psycopg_load.md
            
            
              Created
              October 7, 2016 21:38
            
              
                psycopg2 bulk loading
              
          
    Bulk Loading from Python to Postgres

Here at MaestroIQ we deal with large amounts of user data as we need impression-level information to generate an accurate picture of our customer’s user’s paths to purchase. We store all this data in a postgres 9.3 instance. I’m hesitant to call it “big data,” but our largest tables have over 500M rows, occupying over 200GB of disk space (if you include the indices), so performance is a very real issue that we have to worry about. We are also a python shop on the backend. All of our production code that manages data analytics and machine learning is python, as well as the code that controls our ETL workflow. To interact with our Postgres instances, we use the psycopg2 package. It’s my hope through this post to share some tips, tricks, and best practices we’ve discovered by heavily using psycopg2.
Bulk loading

In dealing with large postgres tables, one has to first make them large. Bulk-loading is an extremely common task, and as we’ll see, there are good ways and

  
## postgres-cheatsheet.md

      
              1 file
            
          
              1139 forks
            
          
              66 comments
            
          
              3532 stars
            
          
                Kartones
                / postgres-cheatsheet.md
            
            
              Last active
              July 25, 2024 09:09
            
              
                PostgreSQL command line cheatsheet
              
          
    PSQL

Magic words:
psql -U postgres
Some interesting flags (to see all, use -h or --help depending on your psql version):

-E: will describe the underlaying queries of the \ commands (cool for learning!)
-l: psql will list all databases and then exit (useful if the user you connect with doesn't has a default database, like at AWS RDS)


## gist:6453648
(ns util
  (:require [clojure.java.io :refer (file)]
            [clojure.string :as str]
            [clojure.java.io :as io])
  (:import org.apache.commons.io.input.BOMInputStream
           org.apache.commons.io.ByteOrderMark))

(def bom-array
  (into-array [ByteOrderMark/UTF_16LE
               ByteOrderMark/UTF_16BE

## gist:5755247
CREATE UNLOGGED TABLE exclude_test(id integer primary key);
INSERT INTO exclude_test(id) SELECT generate_series(1,50000);

CREATE UNLOGGED TABLE exclude AS SELECT x AS item FROM generate_series(1,40000,4) x;

-- Horrific AND list, takes 80s to plan and execute here:

EXPLAIN ANALYZE SELECT id FROM exclude_test WHERE id <> 1 AND id <> 5 AND id <> 9 AND id <> 13 AND id <> 17 AND id <> 21 AND id <> 25 AND id <> 29 AND id <> 33 AND id <> 37 AND id <> 41 AND id <> 45 AND id <> 49 AND id <> 53 AND id <> 57 AND id <> 61 AND id <> 65 AND id <> 69 AND id <> 73 AND id <> 77 AND id <> 81 AND id <> 85 AND id <> 89 AND id <> 93 AND id <> 97 AND id <> 101 AND id <> 105 AND id <> 109 AND id <> 113 AND id <> 117 AND id <> 121 AND id <> 125 AND id <> 129 AND id <> 133 AND id <> 137 AND id <> 141 AND id <> 145 AND id <> 149 AND id <> 153 AND id <> 157 AND id <> 161 AND id <> 165 AND id <> 169 AND id <> 173 AND id <> 177 AND id <> 181 AND id <> 185 AND id <> 189 AND id <> 193 AND id <> 197 AND id <> 201 AND id <> 205 AND id <>

## gist:5457242
; using https://code.google.com/p/language-detection/
; i.e. [com.cybozu.labs/langdetect "1.1-20120112"]

(require '[clojure.java.io :as io])

(->> #{"af" "ar" "bg" "bn" "cs" "da" "de" "el" "en" "es" "et" "fa" "fi" "fr" "gu"
       "he" "hi" "hr" "hu" "id" "it" "ja" "kn" "ko" "lt" "lv" "mk" "ml" "mr" "ne"
       "nl" "no" "pa" "pl" "pt" "ro" "ru" "sk" "sl" "so" "sq" "sv" "sw" "ta" "te"
       "th" "tl" "tr" "uk" "ur" "vi" "zh-cn" "zh-tw"}
     (map (partial str "profiles/"))

## delete-recursively.clj
(defn delete-recursively [fname]
  (let [func (fn [func f]
               (when (.isDirectory f)
                 (doseq [f2 (.listFiles f)]
                   (func func f2)))
               (clojure.java.io/delete-file f))]
    (func func (clojure.java.io/file fname))))

## render_query.py
from datetime import datetime, date
from sqlalchemy.orm.query import Query

def render_query(statement, bind=None):
    """
    Generate an SQL expression string with bound parameters rendered inline
    for the given SQLAlchemy statement.

    WARNING: This method of escaping is insecure, incomplete, and for debugging
    purposes only. Executing SQL statements with inline-rendered user values is

## postgres_queries_and_commands.sql
-- show running queries (pre 9.2)
SELECT procpid, age(clock_timestamp(), query_start), usename, current_query
FROM pg_stat_activity
WHERE current_query != '<IDLE>' AND current_query NOT ILIKE '%pg_stat_activity%'
ORDER BY query_start desc;

-- show running queries (9.2)
SELECT pid, age(clock_timestamp(), query_start), usename, query
FROM pg_stat_activity
WHERE query != '<IDLE>' AND query NOT ILIKE '%pg_stat_activity%'
	CREATE UNLOGGED TABLE exclude_test(id integer primary key);
	INSERT INTO exclude_test(id) SELECT generate_series(1,50000);

	CREATE UNLOGGED TABLE exclude AS SELECT x AS item FROM generate_series(1,40000,4) x;

	-- Horrific AND list, takes 80s to plan and execute here:

	EXPLAIN ANALYZE SELECT id FROM exclude_test WHERE id <> 1 AND id <> 5 AND id <> 9 AND id <> 13 AND id <> 17 AND id <> 21 AND id <> 25 AND id <> 29 AND id <> 33 AND id <> 37 AND id <> 41 AND id <> 45 AND id <> 49 AND id <> 53 AND id <> 57 AND id <> 61 AND id <> 65 AND id <> 69 AND id <> 73 AND id <> 77 AND id <> 81 AND id <> 85 AND id <> 89 AND id <> 93 AND id <> 97 AND id <> 101 AND id <> 105 AND id <> 109 AND id <> 113 AND id <> 117 AND id <> 121 AND id <> 125 AND id <> 129 AND id <> 133 AND id <> 137 AND id <> 141 AND id <> 145 AND id <> 149 AND id <> 153 AND id <> 157 AND id <> 161 AND id <> 165 AND id <> 169 AND id <> 173 AND id <> 177 AND id <> 181 AND id <> 185 AND id <> 189 AND id <> 193 AND id <> 197 AND id <> 201 AND id <> 205 AND id <>
	# Does NOT implement the PEP 249 spec, but the return type is suggested by the .fetchall function as specified here: https://www.python.org/dev/peps/pep-0249/#fetchall

	import time
	import boto3

	# query_string: a SQL-like query that Athena will execute
	# client: an Athena client created with boto3
	def fetchall_athena(query_string, client):
	query_id = client.start_query_execution(
	QueryString=query_string,
	(ns util
	(:require [clojure.java.io :refer (file)]
	[clojure.string :as str]
	[clojure.java.io :as io])
	(:import org.apache.commons.io.input.BOMInputStream
	org.apache.commons.io.ByteOrderMark))

	(def bom-array
	(into-array [ByteOrderMark/UTF_16LE
	ByteOrderMark/UTF_16BE
	; using https://code.google.com/p/language-detection/
	; i.e. [com.cybozu.labs/langdetect "1.1-20120112"]

	(require '[clojure.java.io :as io])

	(->> #{"af" "ar" "bg" "bn" "cs" "da" "de" "el" "en" "es" "et" "fa" "fi" "fr" "gu"
	"he" "hi" "hr" "hu" "id" "it" "ja" "kn" "ko" "lt" "lv" "mk" "ml" "mr" "ne"
	"nl" "no" "pa" "pl" "pt" "ro" "ru" "sk" "sl" "so" "sq" "sv" "sw" "ta" "te"
	"th" "tl" "tr" "uk" "ur" "vi" "zh-cn" "zh-tw"}
	(map (partial str "profiles/"))
	from datetime import datetime, date
	from sqlalchemy.orm.query import Query

	def render_query(statement, bind=None):
	"""
	Generate an SQL expression string with bound parameters rendered inline
	for the given SQLAlchemy statement.

	WARNING: This method of escaping is insecure, incomplete, and for debugging
	purposes only. Executing SQL statements with inline-rendered user values is
	-- show running queries (pre 9.2)
	SELECT procpid, age(clock_timestamp(), query_start), usename, current_query
	FROM pg_stat_activity
	WHERE current_query != '<IDLE>' AND current_query NOT ILIKE '%pg_stat_activity%'
	ORDER BY query_start desc;

	-- show running queries (9.2)
	SELECT pid, age(clock_timestamp(), query_start), usename, query
	FROM pg_stat_activity
	WHERE query != '<IDLE>' AND query NOT ILIKE '%pg_stat_activity%'