Ali Hürriyetoğlu ahurriyetoglu

## turkceOzelsimler
"Ecrin"
"Eymen"
"Ceylin"
"Ebrar"
"Tuana"
"Esila"
"Esra"
"Enes"
"Talha"
"Ömer"

## rank_metrics.py
"""Information Retrieval metrics

Useful Resources:
http://www.cs.utexas.edu/~mooney/ir-course/slides/Evaluation.ppt
http://www.nii.ac.jp/TechReports/05-014E.pdf
http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
http://hal.archives-ouvertes.fr/docs/00/72/67/60/PDF/07-busa-fekete.pdf
Learning to Rank for Information Retrieval (Tie-Yan Liu)
"""
import numpy as np

## rest-server.py
#!flask/bin/python
from flask import Flask, jsonify, abort, request, make_response, url_for
from flask.ext.httpauth import HTTPBasicAuth

app = Flask(__name__, static_url_path = "")
auth = HTTPBasicAuth()

@auth.get_password
def get_password(username):
    if username == 'miguel':

## plot_correlogram.py
def plot_correlogram(df,figsize=(20,20)):
    ''' Creat an n x n matrix of scatter plots for every
    combination of numeric columns in a dataframe'''

    cols = list(df.columns[df.dtypes=='float64'])
    n = len(cols)
    fig, ax = plt.subplots(n,n,figsize=figsize)
    for i,y in enumerate(cols):
        for j,x in enumerate(cols):
            if i != n-1:

## peakdetect.py
import numpy as np
from math import pi, log
import pylab
from scipy import fft, ifft
from scipy.optimize import curve_fit

i = 10000
x = np.linspace(0, 3.5 * pi, i)
y = (0.3*np.sin(x) + np.sin(1.3 * x) + 0.9 * np.sin(4.2 * x) + 0.06 *
    np.random.randn(i))

## medfilt.py
#!/usr/bin/env python

import numpy as np


def medfilt (x, k):
    """Apply a length-k median filter to a 1D array x.
    Boundaries are extended by repeating endpoints.
    """
    assert k % 2 == 1, "Median filter length must be odd."

## gist:38574f7ac70cb04e8eb6

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                ahurriyetoglu
                / gist:38574f7ac70cb04e8eb6
            
            
              Created
              June 23, 2014 08:23
                — forked from debasishg/gist:8172796
            
          
General Background and Overview


Probabilistic Data Structures for Web Analytics and Data Mining : A great overview of the space of probabilistic data structures and how they are used in approximation algorithm implementation.
Models and Issues in Data Stream Systems
Philippe Flajolet’s contribution to streaming algorithms : A presentation by Jérémie Lumbroso that visits some of the hostorical perspectives and how it all began with Flajolet
Approximate Frequency Counts over Data Streams by Gurmeet Singh Manku & Rajeev Motwani : One of the early papers on the subject.
[Methods for Finding Frequent Items in Data Streams](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.187.9800&amp;rep


## gist:29189bae26bbd0f7a82a
>>> from pandas import DataFrame
>>> from sklearn.feature_extraction.text import CountVectorizer
>>> docs = ["You can catch more flies with honey than you can with vinegar.",
...         "You can lead a horse to water, but you can't make him drink."]
>>> vect = CountVectorizer(min_df=0., max_df=1.0)
>>> X = vect.fit_transform(docs)
>>> print(DataFrame(X.A, columns=vect.get_feature_names()).to_string())
   but  can  catch  drink  flies  him  honey  horse  lead  make  more  than  to  vinegar  water  with  you
0    0    2      1      0      1    0      1      0     0     0     1     1   0        1      0     2    2
1    1    2      0      1      0    1      0      1     1     1     0     0   1        0      1     0    2

## kmeans.py
#!/usr/bin/python
#
# K-means clustering using Lloyd's algorithm in pure Python.
# Written by Lars Buitinck. This code is in the public domain.
#
# The main program runs the clustering algorithm on a bunch of text documents
# specified as command-line arguments. These documents are first converted to
# sparse vectors, represented as lists of (index, value) pairs.

from collections import defaultdict
	"Ecrin"
	"Eymen"
	"Ceylin"
	"Ebrar"
	"Tuana"
	"Esila"
	"Esra"
	"Enes"
	"Talha"
	"Ömer"
	"""Information Retrieval metrics

	Useful Resources:
	http://www.cs.utexas.edu/~mooney/ir-course/slides/Evaluation.ppt
	http://www.nii.ac.jp/TechReports/05-014E.pdf
	http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
	http://hal.archives-ouvertes.fr/docs/00/72/67/60/PDF/07-busa-fekete.pdf
	Learning to Rank for Information Retrieval (Tie-Yan Liu)
	"""
	import numpy as np
	#!flask/bin/python
	from flask import Flask, jsonify, abort, request, make_response, url_for
	from flask.ext.httpauth import HTTPBasicAuth

	app = Flask(__name__, static_url_path = "")
	auth = HTTPBasicAuth()

	@auth.get_password
	def get_password(username):
	if username == 'miguel':
	def plot_correlogram(df,figsize=(20,20)):
	''' Creat an n x n matrix of scatter plots for every
	combination of numeric columns in a dataframe'''

	cols = list(df.columns[df.dtypes=='float64'])
	n = len(cols)
	fig, ax = plt.subplots(n,n,figsize=figsize)
	for i,y in enumerate(cols):
	for j,x in enumerate(cols):
	if i != n-1:
	import numpy as np
	from math import pi, log
	import pylab
	from scipy import fft, ifft
	from scipy.optimize import curve_fit

	i = 10000
	x = np.linspace(0, 3.5 * pi, i)
	y = (0.3np.sin(x) + np.sin(1.3 x) + 0.9 * np.sin(4.2 * x) + 0.06 *
	np.random.randn(i))
	#!/usr/bin/env python

	import numpy as np


	def medfilt (x, k):
	"""Apply a length-k median filter to a 1D array x.
	Boundaries are extended by repeating endpoints.
	"""
	assert k % 2 == 1, "Median filter length must be odd."
	>>> from pandas import DataFrame
	>>> from sklearn.feature_extraction.text import CountVectorizer
	>>> docs = ["You can catch more flies with honey than you can with vinegar.",
	... "You can lead a horse to water, but you can't make him drink."]
	>>> vect = CountVectorizer(min_df=0., max_df=1.0)
	>>> X = vect.fit_transform(docs)
	>>> print(DataFrame(X.A, columns=vect.get_feature_names()).to_string())
	but can catch drink flies him honey horse lead make more than to vinegar water with you
	0 0 2 1 0 1 0 1 0 0 0 1 1 0 1 0 2 2
	1 1 2 0 1 0 1 0 1 1 1 0 0 1 0 1 0 2
	#!/usr/bin/python
	#
	# K-means clustering using Lloyd's algorithm in pure Python.
	# Written by Lars Buitinck. This code is in the public domain.
	#
	# The main program runs the clustering algorithm on a bunch of text documents
	# specified as command-line arguments. These documents are first converted to
	# sparse vectors, represented as lists of (index, value) pairs.

	from collections import defaultdict