Skip to content

Instantly share code, notes, and snippets.

View ahurriyetoglu's full-sized avatar

Ali Hürriyetoğlu ahurriyetoglu

View GitHub Profile
#!/usr/bin/python
#
# K-means clustering using Lloyd's algorithm in pure Python.
# Written by Lars Buitinck. This code is in the public domain.
#
# The main program runs the clustering algorithm on a bunch of text documents
# specified as command-line arguments. These documents are first converted to
# sparse vectors, represented as lists of (index, value) pairs.
from collections import defaultdict
>>> from pandas import DataFrame
>>> from sklearn.feature_extraction.text import CountVectorizer
>>> docs = ["You can catch more flies with honey than you can with vinegar.",
... "You can lead a horse to water, but you can't make him drink."]
>>> vect = CountVectorizer(min_df=0., max_df=1.0)
>>> X = vect.fit_transform(docs)
>>> print(DataFrame(X.A, columns=vect.get_feature_names()).to_string())
but can catch drink flies him honey horse lead make more than to vinegar water with you
0 0 2 1 0 1 0 1 0 0 0 1 1 0 1 0 2 2
1 1 2 0 1 0 1 0 1 1 1 0 0 1 0 1 0 2
  1. General Background and Overview
#!/usr/bin/env python
import numpy as np
def medfilt (x, k):
"""Apply a length-k median filter to a 1D array x.
Boundaries are extended by repeating endpoints.
"""
assert k % 2 == 1, "Median filter length must be odd."
@ahurriyetoglu
ahurriyetoglu / pyTwiNL.py
Last active August 29, 2015 14:07
This python code provides easy access to twiqs.nl API. Information about twiqs.nl can be found at http://ifarm.nl/erikt/twinl/
# code to connect to twiqs.nl API
# Current IP address of twiqs.nl is: http://145.100.57.182
import requests
urlto_twiqsNL = "http://145.100.57.222/cgi-bin/twitter"
s = requests.Session()
r = s.post(urlto_twiqsNL, data={"NAME":user_name, "PASSWD":passwd})
print('Cookie Created')
import numpy as np
from math import pi, log
import pylab
from scipy import fft, ifft
from scipy.optimize import curve_fit
i = 10000
x = np.linspace(0, 3.5 * pi, i)
y = (0.3*np.sin(x) + np.sin(1.3 * x) + 0.9 * np.sin(4.2 * x) + 0.06 *
np.random.randn(i))
def plot_correlogram(df,figsize=(20,20)):
''' Creat an n x n matrix of scatter plots for every
combination of numeric columns in a dataframe'''
cols = list(df.columns[df.dtypes=='float64'])
n = len(cols)
fig, ax = plt.subplots(n,n,figsize=figsize)
for i,y in enumerate(cols):
for j,x in enumerate(cols):
if i != n-1:
#!flask/bin/python
from flask import Flask, jsonify, abort, request, make_response, url_for
from flask.ext.httpauth import HTTPBasicAuth
app = Flask(__name__, static_url_path = "")
auth = HTTPBasicAuth()
@auth.get_password
def get_password(username):
if username == 'miguel':
import requests as r
import json
import datetime
# Docs: http://developer.nytimes.com/docs
# Try at: http://developer.nytimes.com/io-docs
srch_api_key = "Your key <check docs>"
keyphrase = 'anything_you_want'
srch_api_result = r.get("http://api.nytimes.com/svc/search/v2/articlesearch.json?q="+keyphrase+"&api-key="+srch_api_key)
# Docs: http://open-platform.theguardian.com/documentation/
import requests as r
import json
g_api_key = "Your API key" # limit is 12 calls per second.
keyterm = "Your Keyterm"
#from-date=1920-
g_articles_strt = r.get("http://content.guardianapis.com/search?q="+keyterm+"/politics&show-blocks=all&from-date=2015-01-01&api-key="+g_api_key)