Ali Hürriyetoğlu ahurriyetoglu

## pearson_vectors.py
# Author: denis.engemann@gmail.com
# License: simplified BSD (3 clause)
# Note: code is based on scipy.stats.pearsonr
from scipy import stats

def compute_corr(x, y):
    x = np.asarray(x)
    y = np.asarray(y)
    mx = x.mean(axis=-1)
    my = y.mean(axis=-1)

## turkceOzelsimler
"Ecrin"
"Eymen"
"Ceylin"
"Ebrar"
"Tuana"
"Esila"
"Esra"
"Enes"
"Talha"
"Ömer"

## plot_correlogram.py
def plot_correlogram(df,figsize=(20,20)):
    ''' Creat an n x n matrix of scatter plots for every
    combination of numeric columns in a dataframe'''

    cols = list(df.columns[df.dtypes=='float64'])
    n = len(cols)
    fig, ax = plt.subplots(n,n,figsize=figsize)
    for i,y in enumerate(cols):
        for j,x in enumerate(cols):
            if i != n-1:

## marisa_count_vectorizer.py
import numpy as np
import marisa_trie
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.externals import six

class MarisaCountVectorizer(CountVectorizer):

    # ``CountVectorizer.fit`` method calls ``fit_transform`` so
    # ``fit`` is not provided
    def fit_transform(self, raw_documents, y=None):

## gist:8172796

      
              1 file
            
          
              404 forks
            
          
              23 comments
            
          
              1645 stars
            
          
                debasishg
                / gist:8172796
            
            
              Last active
              July 5, 2024 11:53
            
              
                A collection of links for streaming algorithms and data structures
              
          
    General Background and Overview


Probabilistic Data Structures for Web Analytics and Data Mining : A great overview of the space of probabilistic data structures and how they are used in approximation algorithm implementation.
Models and Issues in Data Stream Systems
Philippe Flajolet’s contribution to streaming algorithms : A presentation by Jérémie Lumbroso that visits some of the hostorical perspectives and how it all began with Flajolet
Approximate Frequency Counts over Data Streams by Gurmeet Singh Manku & Rajeev Motwani : One of the early papers on the subject.
[Methods for Finding Frequent Items in Data Streams](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.187.9800&amp;rep=rep1&amp;t


## rest-server.py
#!flask/bin/python
from flask import Flask, jsonify, abort, request, make_response, url_for
from flask_httpauth import HTTPBasicAuth

app = Flask(__name__, static_url_path = "")
auth = HTTPBasicAuth()

@auth.get_password
def get_password(username):
    if username == 'miguel':

## kmeans.py
#!/usr/bin/python
#
# K-means clustering using Lloyd's algorithm in pure Python.
# Written by Lars Buitinck. This code is in the public domain.
#
# The main program runs the clustering algorithm on a bunch of text documents
# specified as command-line arguments. These documents are first converted to
# sparse vectors, represented as lists of (index, value) pairs.

from collections import defaultdict

## gist:3745866
>>> from pandas import DataFrame
>>> from sklearn.feature_extraction.text import CountVectorizer
>>> docs = ["You can catch more flies with honey than you can with vinegar.",
...         "You can lead a horse to water, but you can't make him drink."]
>>> vect = CountVectorizer(min_df=0., max_df=1.0)
>>> X = vect.fit_transform(docs)
>>> print(DataFrame(X.A, columns=vect.get_feature_names()).to_string())
   but  can  catch  drink  flies  him  honey  horse  lead  make  more  than  to  vinegar  water  with  you
0    0    2      1      0      1    0      1      0     0     0     1     1   0        1      0     2    2
1    1    2      0      1      0    1      0      1     1     1     0     0   1        0      1     0    2

## rank_metrics.py
"""Information Retrieval metrics

Useful Resources:
http://www.cs.utexas.edu/~mooney/ir-course/slides/Evaluation.ppt
http://www.nii.ac.jp/TechReports/05-014E.pdf
http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
http://hal.archives-ouvertes.fr/docs/00/72/67/60/PDF/07-busa-fekete.pdf
Learning to Rank for Information Retrieval (Tie-Yan Liu)
"""
import numpy as np

## incrementalmr.py
def incremental_map_reduce(
		map_f,
		reduce_f,
		db,
		source_table_name,
		target_table_name,
		source_queued_date_field_name,
		counter_table_name = "IncrementalMRCounters",
		counter_key = None,
		max_datetime = None,
	# Author: denis.engemann@gmail.com
	# License: simplified BSD (3 clause)
	# Note: code is based on scipy.stats.pearsonr
	from scipy import stats

	def compute_corr(x, y):
	x = np.asarray(x)
	y = np.asarray(y)
	mx = x.mean(axis=-1)
	my = y.mean(axis=-1)
	"Ecrin"
	"Eymen"
	"Ceylin"
	"Ebrar"
	"Tuana"
	"Esila"
	"Esra"
	"Enes"
	"Talha"
	"Ömer"
	def plot_correlogram(df,figsize=(20,20)):
	''' Creat an n x n matrix of scatter plots for every
	combination of numeric columns in a dataframe'''

	cols = list(df.columns[df.dtypes=='float64'])
	n = len(cols)
	fig, ax = plt.subplots(n,n,figsize=figsize)
	for i,y in enumerate(cols):
	for j,x in enumerate(cols):
	if i != n-1:
	import numpy as np
	import marisa_trie
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.externals import six

	class MarisaCountVectorizer(CountVectorizer):

	# ``CountVectorizer.fit`` method calls ``fit_transform`` so
	# ``fit`` is not provided
	def fit_transform(self, raw_documents, y=None):
	#!flask/bin/python
	from flask import Flask, jsonify, abort, request, make_response, url_for
	from flask_httpauth import HTTPBasicAuth

	app = Flask(__name__, static_url_path = "")
	auth = HTTPBasicAuth()

	@auth.get_password
	def get_password(username):
	if username == 'miguel':
	#!/usr/bin/python
	#
	# K-means clustering using Lloyd's algorithm in pure Python.
	# Written by Lars Buitinck. This code is in the public domain.
	#
	# The main program runs the clustering algorithm on a bunch of text documents
	# specified as command-line arguments. These documents are first converted to
	# sparse vectors, represented as lists of (index, value) pairs.

	from collections import defaultdict
	>>> from pandas import DataFrame
	>>> from sklearn.feature_extraction.text import CountVectorizer
	>>> docs = ["You can catch more flies with honey than you can with vinegar.",
	... "You can lead a horse to water, but you can't make him drink."]
	>>> vect = CountVectorizer(min_df=0., max_df=1.0)
	>>> X = vect.fit_transform(docs)
	>>> print(DataFrame(X.A, columns=vect.get_feature_names()).to_string())
	but can catch drink flies him honey horse lead make more than to vinegar water with you
	0 0 2 1 0 1 0 1 0 0 0 1 1 0 1 0 2 2
	1 1 2 0 1 0 1 0 1 1 1 0 0 1 0 1 0 2
	"""Information Retrieval metrics

	Useful Resources:
	http://www.cs.utexas.edu/~mooney/ir-course/slides/Evaluation.ppt
	http://www.nii.ac.jp/TechReports/05-014E.pdf
	http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
	http://hal.archives-ouvertes.fr/docs/00/72/67/60/PDF/07-busa-fekete.pdf
	Learning to Rank for Information Retrieval (Tie-Yan Liu)
	"""
	import numpy as np
	def incremental_map_reduce(
	map_f,
	reduce_f,
	db,
	source_table_name,
	target_table_name,
	source_queued_date_field_name,
	counter_table_name = "IncrementalMRCounters",
	counter_key = None,
	max_datetime = None,