This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import string | |
| import re | |
| from nltk.corpus import stopwords | |
| from nltk.tokenize import TweetTokenizer | |
| cache_english_stopwords=stopwords.words('english') | |
| def tweet_clean(tweet): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # 利用python的正则去解决 | |
| import re | |
| def remove_url(txt): | |
| regex =re.compile(r'https://[a-zA-Z0-9.?/&=:]*',re.S) | |
| return regex.sub("",txt) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| import sys | |
| #from numpy import array, diag, zeros, ones_like, identity, amax, maximum, amin, minimum, loadtxt, kron, ones, reshape | |
| import numpy as np | |
| from numpy.random import rand | |
| #from matplotlib.pyplot import * | |
| from optparse import OptionParser |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| import json | |
| import math | |
| import random | |
| import fileinput | |
| import collections | |
| class LDASampler(object): | |
| def __init__(self, docs=None, num_topics=None, alpha=0.1, beta=0.1, state=None): | |
| if state: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """Information Retrieval metrics | |
| Useful Resources: | |
| http://www.cs.utexas.edu/~mooney/ir-course/slides/Evaluation.ppt | |
| http://www.nii.ac.jp/TechReports/05-014E.pdf | |
| http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf | |
| http://hal.archives-ouvertes.fr/docs/00/72/67/60/PDF/07-busa-fekete.pdf | |
| Learning to Rank for Information Retrieval (Tie-Yan Liu) | |
| """ | |
| import numpy as np |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # (C) Mathieu Blondel, November 2013 | |
| # License: BSD 3 clause | |
| import numpy as np | |
| def ranking_precision_score(y_true, y_score, k=10): | |
| """Precision at rank k | |
| Parameters |
NewerOlder