Skip to content

Instantly share code, notes, and snippets.

View micaleel's full-sized avatar
:octocat:
I may be slow to respond.

Khalil micaleel

:octocat:
I may be slow to respond.
View GitHub Profile
# coding=UTF-8
from __future__ import division
import nltk
from collections import Counter
# This is a simple tool for adding automatic hashtags into an article title
# Created by Shlomi Babluki
# Sep, 2013
public class Maze {
public int counter = 0;
public char[][] maze =
{{'#', '#', '#', '#', '#', '#', '#', '#', '#', '#'},
{'#', ' ', ' ', ' ', '#', ' ', '#', ' ', ' ', '#'},
{'#', ' ', ' ', ' ', '#', ' ', '#', ' ', '#', '#'},
{'#', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '#'},
# coding=UTF-8
from __future__ import division
import nltk
import re
import requests
# Add your freebase key here
# If you don't have one, register at https://code.google.com/apis/console
FREEBASE_KEY = ""
# coding=UTF-8
from __future__ import division
import nltk
import re
import requests
# Add your freebase key here
# If you don't have one, register at https://code.google.com/apis/console
FREEBASE_KEY = ""
# coding=UTF-8
import nltk
from nltk.corpus import brown
# This is a fast and simple noun phrase extractor (based on NLTK)
# Feel free to use it, just keep a link back to this post
# http://thetokenizer.com/2013/05/09/efficient-way-to-extract-the-main-topics-of-a-sentence/
# Create by Shlomi Babluki
# May, 2013
# coding=UTF-8
from __future__ import division
import re
# This is a naive text summarization algorithm
# Created by Shlomi Babluki
# April, 2013
class SummaryTool(object):
public class Maze_Best {
public int counter = 0;
private final static int MAX_VALUE = 1000;
int best_solution = MAX_VALUE;
public char[][] maze =
{{'#', '#', '#', '#', '#', '#', '#', '#', '#', '#'},
{'#', ' ', ' ', ' ', '#', ' ', '#', ' ', ' ', '#'},
auth = OAuthHandler(CLIENT_ID, CLIENT_SECRET, CALLBACK)
auth.set_access_token(ACCESS_TOKEN)
api = API(auth)
venue = api.venues(id='4bd47eeb5631c9b69672a230')
stopwords = nltk.corpus.stopwords.words('portuguese')
tokenizer = RegexpTokenizer("[\w’]+", flags=re.UNICODE)
#-*- coding: utf-8 -*-
import re
import nltk
from nltk.tokenize import RegexpTokenizer
from nltk import bigrams, trigrams
import math
stopwords = nltk.corpus.stopwords.words('portuguese')
# -*- coding: utf-8 -*-
import unicodedata
""" Normalise (normalize) unicode data in Python to remove umlauts, accents etc. """
data = u'naïve café'
normal = unicodedata.normalize('NFKD', data).encode('ASCII', 'ignore')
print normal