This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# (C) Mathieu Blondel, November 2013 | |
# License: BSD 3 clause | |
import numpy as np | |
def ranking_precision_score(y_true, y_score, k=10): | |
"""Precision at rank k | |
Parameters |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
The path is a directory by default | |
''' | |
def hdfs_list(path, subtract_one=True): | |
fs = spark._jvm.org.apache.hadoop.fs.FileSystem.get(spark._jsc.hadoopConfiguration()) | |
list_status = fs.listStatus(spark._jvm.org.apache.hadoop.fs.Path(path)) | |
# file.getPath().getName(), file.getBlockSize(), file.getLen() | |
files_size = [file.getLen() for file in list_status] | |
totol_size_in_MB = sum(files_size) / 1024.0 / 1024.0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" Implementation of OKapi BM25 with sklearn's TfidfVectorizer | |
Distributed as CC-0 (https://creativecommons.org/publicdomain/zero/1.0/) | |
""" | |
import numpy as np | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from scipy import sparse | |
class BM25(object): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch, torch.nn as nn, torch.nn.functional as F | |
import numpy as np | |
import torch.optim as optim | |
# tied autoencoder using off the shelf nn modules | |
class TiedAutoEncoderOffTheShelf(nn.Module): | |
def __init__(self, inp, out, weight): | |
super().__init__() | |
self.encoder = nn.Linear(inp, out, bias=False) | |
self.decoder = nn.Linear(out, inp, bias=False) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Information Retrieval metrics | |
Useful Resources: | |
http://www.cs.utexas.edu/~mooney/ir-course/slides/Evaluation.ppt | |
http://www.nii.ac.jp/TechReports/05-014E.pdf | |
http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf | |
http://hal.archives-ouvertes.fr/docs/00/72/67/60/PDF/07-busa-fekete.pdf | |
Learning to Rank for Information Retrieval (Tie-Yan Liu) | |
""" | |
import numpy as np |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# Program to filter Wikipedia XML dumps to "clean" text consisting only of lowercase | |
# letters (a-z, converted from A-Z), and spaces (never consecutive). | |
# All other characters are converted to spaces. Only text which normally appears | |
# in the web browser is displayed. Tables are removed. Image captions are | |
# preserved. Links are converted to normal text. Digits are spelled out. | |
# Written by Matt Mahoney, June 10, 2006. This program is released to the public domain. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# cool_gpu2.sh This script will enable or disable fixed gpu fan speed | |
# | |
# Description: A script to control GPU fan speed on headless (non-X) linux nodes | |
# Original Script by Axel Kohlmeyer <akohlmey@gmail.com> | |
# https://sites.google.com/site/akohlmey/random-hacks/nvidia-gpu-coolness | |
# | |
# Modified for newer drivers and removed old work-arounds |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
def print_confusion_matrix(confusion_matrix, class_names, figsize = (10,7), fontsize=14): | |
"""Prints a confusion matrix, as returned by sklearn.metrics.confusion_matrix, as a heatmap. | |
Arguments | |
--------- | |
confusion_matrix: numpy.ndarray |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
## This gist contains step by step instructions to install cuda v9.0 and cudnn 7.2 in ubuntu 18.04 | |
### steps #### | |
# verify the system has a cuda-capable gpu | |
# download and install the nvidia cuda toolkit and cudnn | |
# setup environmental variables | |
# verify the installation | |
### |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
import sys | |
from logging.handlers import TimedRotatingFileHandler | |
FORMATTER = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") | |
LOG_FILE = "my_app.log" | |
def get_console_handler(): | |
console_handler = logging.StreamHandler(sys.stdout) | |
console_handler.setFormatter(FORMATTER) |
NewerOlder