Chris Harland cwharland

## postgresql_upsert.py
def upsert(db_cur, table, pk_fields, schema=None, **kwargs):
    """Updates the specified relation with the key-value pairs in kwargs if a
    row matching the primary key value(s) already exists.  Otherwise, a new row
    is inserted.  Returns True if a new row was inserted.

    schema     the schema to use, if any (not sanitized)
    table      the table to use (not sanitized)
    pk_fields  tuple of field names which are part of the primary key
    kwargs     all key-value pairs which should be set in the row
    """

## ranking.py
"""
Implementation of pairwise ranking using scikit-learn LinearSVC

Reference:

    "Large Margin Rank Boundaries for Ordinal Regression", R. Herbrich,
    T. Graepel, K. Obermayer 1999

    "Learning to rank from medical imaging data." Pedregosa, Fabian, et al.,
    Machine Learning in Medical Imaging 2012.

## ranking.py
"""
Implementation of pairwise ranking using scikit-learn LinearSVC

Reference: "Large Margin Rank Boundaries for Ordinal Regression", R. Herbrich,
    T. Graepel, K. Obermayer.

Authors: Fabian Pedregosa <fabian@fseoane.net>
         Alexandre Gramfort <alexandre.gramfort@inria.fr>
"""

## pymc_multinomial_propoptions.py
#! /usr/bin/env python

import sys
import random
import pymc
import numpy
from dendropy.mathlib import probability as prob
from dendropy.mathlib import statistics as stats

rng = random.Random()

## bigrams.py
#!/usr/bin/env python
import json
import urllib

def estimated_count_for(search_term):
  url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&%s' % urllib.urlencode({'q': search_term})
  results = json.loads(urllib.urlopen(url).read())
  try:
      return results['responseData']['cursor']['estimatedResultCount']
  except KeyError:

## iterm2-solarized.md

      
              2 files
            
          
              1631 forks
            
          
              494 comments
            
          
              8822 stars
            
          
                kevin-smets
                / iterm2-solarized.md
            
            
              Last active
              July 31, 2024 06:33
            
              
                iTerm2 + Oh My Zsh + Solarized color scheme + Source Code Pro Powerline + Font Awesome + [Powerlevel10k] - (macOS)
              
          
    Default


Powerlevel10k


## run_in_new_thread_decorator.py
import functools
import threading

# A decorator that will run its wrapped function in a new thread
def run_in_new_thread(function):
    # functool.wraps will copy over the docstring and some other metadata
    # from the original function
    @functools.wraps(function)
    def fn_(*args, **kwargs):
        thread = threading.Thread(target=function, args=args, kwargs=kwargs)

## xval_ALS.scala
import java.lang.Math
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.recommendation.Rating
import org.apache.spark.mllib.recommendation.ALS
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel
import org.apache.spark.mllib.util.MLUtils.kFold

// Preload some Rating data for my own convenience
val txt = sc.textFile("/home/eje/git/ratorade/data/bgr.dat")
val ratings = txt.map(_.split('\t') match { case Array(user, item, rating, _, _) => Rating(user.toInt, item.toInt, rating.toDouble / 100.0)})

## gist:62910e58db46b7397de2
from urllib2 import urlopen
from json import load
import re, nltk
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import wordnet, stopwords
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
    level=logging.INFO)
from gensim import corpora, models, similarities, matutils
import numpy as np

## aggregateByKey.md

      
              1 file
            
          
              3 forks
            
          
              2 comments
            
          
              9 stars
            
          
                tdhopper
                / aggregateByKey.md
            
            
              Last active
              May 23, 2017 20:04
            
          
    Moved to tdhopper.com.
	def upsert(db_cur, table, pk_fields, schema=None, **kwargs):
	"""Updates the specified relation with the key-value pairs in kwargs if a
	row matching the primary key value(s) already exists. Otherwise, a new row
	is inserted. Returns True if a new row was inserted.

	schema the schema to use, if any (not sanitized)
	table the table to use (not sanitized)
	pk_fields tuple of field names which are part of the primary key
	kwargs all key-value pairs which should be set in the row
	"""
	"""
	Implementation of pairwise ranking using scikit-learn LinearSVC

	Reference:

	"Large Margin Rank Boundaries for Ordinal Regression", R. Herbrich,
	T. Graepel, K. Obermayer 1999

	"Learning to rank from medical imaging data." Pedregosa, Fabian, et al.,
	Machine Learning in Medical Imaging 2012.
	"""
	Implementation of pairwise ranking using scikit-learn LinearSVC

	Reference: "Large Margin Rank Boundaries for Ordinal Regression", R. Herbrich,
	T. Graepel, K. Obermayer.

	Authors: Fabian Pedregosa <fabian@fseoane.net>
	Alexandre Gramfort <alexandre.gramfort@inria.fr>
	"""
	#! /usr/bin/env python

	import sys
	import random
	import pymc
	import numpy
	from dendropy.mathlib import probability as prob
	from dendropy.mathlib import statistics as stats

	rng = random.Random()
	#!/usr/bin/env python
	import json
	import urllib

	def estimated_count_for(search_term):
	url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&%s' % urllib.urlencode({'q': search_term})
	results = json.loads(urllib.urlopen(url).read())
	try:
	return results['responseData']['cursor']['estimatedResultCount']
	except KeyError:
	import functools
	import threading

	# A decorator that will run its wrapped function in a new thread
	def run_in_new_thread(function):
	# functool.wraps will copy over the docstring and some other metadata
	# from the original function
	@functools.wraps(function)
	def fn_(args, *kwargs):
	thread = threading.Thread(target=function, args=args, kwargs=kwargs)
	import java.lang.Math
	import org.apache.spark.rdd.RDD
	import org.apache.spark.mllib.recommendation.Rating
	import org.apache.spark.mllib.recommendation.ALS
	import org.apache.spark.mllib.recommendation.MatrixFactorizationModel
	import org.apache.spark.mllib.util.MLUtils.kFold

	// Preload some Rating data for my own convenience
	val txt = sc.textFile("/home/eje/git/ratorade/data/bgr.dat")
	val ratings = txt.map(_.split('\t') match { case Array(user, item, rating, _, _) => Rating(user.toInt, item.toInt, rating.toDouble / 100.0)})
	from urllib2 import urlopen
	from json import load
	import re, nltk
	from nltk.stem.wordnet import WordNetLemmatizer
	from nltk.corpus import wordnet, stopwords
	import logging
	logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
	level=logging.INFO)
	from gensim import corpora, models, similarities, matutils
	import numpy as np