asw456 asw456

## .block
license: gpl-3.0

## pandas_dbms.py
# -*- coding: utf-8 -*-
"""
LICENSE: BSD (same as pandas)
example use of pandas with oracle mysql postgresql sqlite
    - updated 9/18/2012 with better column name handling; couple of bug fixes.
    - used ~20 times for various ETL jobs.  Mostly MySQL, but some Oracle.

    to do:
            save/restore index (how to check table existence? just do select count(*)?),
            finish odbc,

## gist:4041029
http://danzambonini.com/self-improving-bayesian-sentiment-analysis-for-twitter/

http://www.laurentluce.com/posts/twitter-sentiment-analysis-using-python-and-nltk/

[PDF]
Sentiment Analysis of Twitter Data - Department of Computer ...
www.cs.columbia.edu/~julia/papers/Agarwaletal11.pdf


## gist:4273631
cat mysql-statement
> insert into mytable (k, v) values (1,2);
> insert into mytable (k, v) values (2,2);
> insert into mytable (k, v) values (3,2);

scp mysql-statement to an ec2 machine
ssh to the ec2 machine

$ mysql -h hostname -u username -p password database_name < mysql-statement

## ntf_idf.pig
DEFINE tf_idf(token_records, id_field, token_field) RETURNS out_relation {

  /* Calculate the term count per document */
  doc_word_totals = foreach (group $token_records by ($id_field, $token_field)) generate
    FLATTEN(group) as ($id_field, token),
    COUNT_STAR($token_records) as doc_total;

  /* Calculate the document size */
  pre_term_counts = foreach (group doc_word_totals by $id_field) generate
    group AS $id_field,

## classify.pig
register /me/Software/elephant-bird/pig/target/elephant-bird-pig-3.0.6-SNAPSHOT.jar
register /me/Software/pig/build/ivy/lib/Pig/json-simple-1.1.jar
set elephantbird.jsonloader.nestedLoad 'true'

set default_parallel 4

/* Remove files from previous runs */
rmf /tmp/prior_words.txt
rmf /tmp/prior_genres.txt
rmf /tmp/p_word_given_genre.txt

## bayesian_ab_test.py
from matplotlib import use

from pylab import *
from scipy.stats import beta, norm, uniform
from random import random
from numpy import *
import numpy as np
import os

# Input data

## cython_tricks.md

      
              1 file
            
          
              14 forks
            
          
              2 comments
            
          
              87 stars
            
          
                ctokheim
                / cython_tricks.md
            
            
              Last active
              March 4, 2024 23:27
            
              
                cython tricks
              
          
    Cython

Cython has two major benefits:

Making python code faster, particularly things that can't be done in scipy/numpy
Wrapping/interfacing with C/C++ code

Cython gains most of it's benefit from statically typing arguments. However, statically typing is not required, in fact, regular python code is valid cython (but don't expect much of a speed up). By incrementally adding more type information, the code can speed up by several factors. This gist just provides a very basic usage of cython.

  
## celeryconfig.py
# This is a quickstart! In the real world use a real broker (message queue)
# such as Redis or RabbitMQ !!
BROKER_URL = 'sqlalchemy+sqlite:///tasks.db'

CELERY_RESULT_BACKEND = "db+sqlite:///results.db"

CELERY_IMPORTS = ['tasks']

CELERY_TASK_SERIALIZER = 'json'
CELERY_RESULT_SERIALIZER = 'json'

## lcal.py
#!/usr/bin/env python3

import datetime
import optparse
import re

def createDate(stringDate):
    if re.match("\d\d\d\d-\d\d-\d\d", stringDate) is None:
        raise ValueError("This is not in the correct date format. Use YYYY-MM-DD")
	# -- coding: utf-8 --
	"""
	LICENSE: BSD (same as pandas)
	example use of pandas with oracle mysql postgresql sqlite
	- updated 9/18/2012 with better column name handling; couple of bug fixes.
	- used ~20 times for various ETL jobs. Mostly MySQL, but some Oracle.

	to do:
	save/restore index (how to check table existence? just do select count(*)?),
	finish odbc,
	http://danzambonini.com/self-improving-bayesian-sentiment-analysis-for-twitter/

	http://www.laurentluce.com/posts/twitter-sentiment-analysis-using-python-and-nltk/

	[PDF]
	Sentiment Analysis of Twitter Data - Department of Computer ...
	www.cs.columbia.edu/~julia/papers/Agarwaletal11.pdf
	cat mysql-statement
	> insert into mytable (k, v) values (1,2);
	> insert into mytable (k, v) values (2,2);
	> insert into mytable (k, v) values (3,2);

	scp mysql-statement to an ec2 machine
	ssh to the ec2 machine

	$ mysql -h hostname -u username -p password database_name < mysql-statement
	DEFINE tf_idf(token_records, id_field, token_field) RETURNS out_relation {

	/* Calculate the term count per document */
	doc_word_totals = foreach (group $token_records by ($id_field, $token_field)) generate
	FLATTEN(group) as ($id_field, token),
	COUNT_STAR($token_records) as doc_total;

	/* Calculate the document size */
	pre_term_counts = foreach (group doc_word_totals by $id_field) generate
	group AS $id_field,
	register /me/Software/elephant-bird/pig/target/elephant-bird-pig-3.0.6-SNAPSHOT.jar
	register /me/Software/pig/build/ivy/lib/Pig/json-simple-1.1.jar
	set elephantbird.jsonloader.nestedLoad 'true'

	set default_parallel 4

	/* Remove files from previous runs */
	rmf /tmp/prior_words.txt
	rmf /tmp/prior_genres.txt
	rmf /tmp/p_word_given_genre.txt
	from matplotlib import use

	from pylab import *
	from scipy.stats import beta, norm, uniform
	from random import random
	from numpy import *
	import numpy as np
	import os

	# Input data
	# This is a quickstart! In the real world use a real broker (message queue)
	# such as Redis or RabbitMQ !!
	BROKER_URL = 'sqlalchemy+sqlite:///tasks.db'

	CELERY_RESULT_BACKEND = "db+sqlite:///results.db"

	CELERY_IMPORTS = ['tasks']

	CELERY_TASK_SERIALIZER = 'json'
	CELERY_RESULT_SERIALIZER = 'json'
	#!/usr/bin/env python3

	import datetime
	import optparse
	import re

	def createDate(stringDate):
	if re.match("\d\d\d\d-\d\d-\d\d", stringDate) is None:
	raise ValueError("This is not in the correct date format. Use YYYY-MM-DD")