Skip to content

Instantly share code, notes, and snippets.

View kyoungrok0517's full-sized avatar

Kyoungrok Jang kyoungrok0517

View GitHub Profile
@kyoungrok0517
kyoungrok0517 / letor_metrics.py
Created September 1, 2023 03:40 — forked from mblondel/letor_metrics.py
Learning to rank metrics.
# (C) Mathieu Blondel, November 2013
# License: BSD 3 clause
import numpy as np
def ranking_precision_score(y_true, y_score, k=10):
"""Precision at rank k
Parameters
@kyoungrok0517
kyoungrok0517 / pyspark_hdfs_utils.py
Created November 8, 2022 03:44 — forked from idiomer/pyspark_hdfs_utils.py
Using PySpark to handle HDFS, such as list (ls), rename (mv), delete (rm)
'''
The path is a directory by default
'''
def hdfs_list(path, subtract_one=True):
fs = spark._jvm.org.apache.hadoop.fs.FileSystem.get(spark._jsc.hadoopConfiguration())
list_status = fs.listStatus(spark._jvm.org.apache.hadoop.fs.Path(path))
# file.getPath().getName(), file.getBlockSize(), file.getLen()
files_size = [file.getLen() for file in list_status]
totol_size_in_MB = sum(files_size) / 1024.0 / 1024.0
@kyoungrok0517
kyoungrok0517 / bm25.py
Created October 8, 2022 08:34 — forked from koreyou/bm25.py
Implementation of OKapi BM25 with sklearn's TfidfVectorizer
""" Implementation of OKapi BM25 with sklearn's TfidfVectorizer
Distributed as CC-0 (https://creativecommons.org/publicdomain/zero/1.0/)
"""
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy import sparse
class BM25(object):
@kyoungrok0517
kyoungrok0517 / tied_linear.py
Created March 25, 2020 10:56 — forked from InnovArul/tied_linear.py
tied linear layer experiment
import torch, torch.nn as nn, torch.nn.functional as F
import numpy as np
import torch.optim as optim
# tied autoencoder using off the shelf nn modules
class TiedAutoEncoderOffTheShelf(nn.Module):
def __init__(self, inp, out, weight):
super().__init__()
self.encoder = nn.Linear(inp, out, bias=False)
self.decoder = nn.Linear(out, inp, bias=False)
"""Information Retrieval metrics
Useful Resources:
http://www.cs.utexas.edu/~mooney/ir-course/slides/Evaluation.ppt
http://www.nii.ac.jp/TechReports/05-014E.pdf
http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
http://hal.archives-ouvertes.fr/docs/00/72/67/60/PDF/07-busa-fekete.pdf
Learning to Rank for Information Retrieval (Tie-Yan Liu)
"""
import numpy as np
@kyoungrok0517
kyoungrok0517 / wikifil.pl
Created November 26, 2019 06:22 — forked from kylemcdonald/wikifil.pl
Fork of Matt Mahoney's Wikipedia cleaning script.
#!/usr/bin/perl
# Program to filter Wikipedia XML dumps to "clean" text consisting only of lowercase
# letters (a-z, converted from A-Z), and spaces (never consecutive).
# All other characters are converted to spaces. Only text which normally appears
# in the web browser is displayed. Tables are removed. Image captions are
# preserved. Links are converted to normal text. Digits are spelled out.
# Written by Matt Mahoney, June 10, 2006. This program is released to the public domain.
@kyoungrok0517
kyoungrok0517 / cool_gpu.sh
Created September 17, 2019 13:23 — forked from ernestp/cool_gpu.sh
A script to control Nvidia GPU fan speed on ubuntu desktop
#!/bin/bash
# cool_gpu2.sh This script will enable or disable fixed gpu fan speed
#
# Description: A script to control GPU fan speed on headless (non-X) linux nodes
# Original Script by Axel Kohlmeyer <akohlmey@gmail.com>
# https://sites.google.com/site/akohlmey/random-hacks/nvidia-gpu-coolness
#
# Modified for newer drivers and removed old work-arounds
@kyoungrok0517
kyoungrok0517 / seaborn_confusion_matrix_pretty_print.py
Created May 13, 2019 05:43 — forked from shaypal5/confusion_matrix_pretty_print.py
Pretty print a confusion matrix with seaborn
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
def print_confusion_matrix(confusion_matrix, class_names, figsize = (10,7), fontsize=14):
"""Prints a confusion matrix, as returned by sklearn.metrics.confusion_matrix, as a heatmap.
Arguments
---------
confusion_matrix: numpy.ndarray
@kyoungrok0517
kyoungrok0517 / cuda_installation_on_ubuntu_18.04
Created January 27, 2019 03:00 — forked from Mahedi-61/cuda_11.8_installation_on_Ubuntu_22.04
cuda 9.0 complete installation procedure for ubuntu 18.04 LTS
#!/bin/bash
## This gist contains step by step instructions to install cuda v9.0 and cudnn 7.2 in ubuntu 18.04
### steps ####
# verify the system has a cuda-capable gpu
# download and install the nvidia cuda toolkit and cudnn
# setup environmental variables
# verify the installation
###
@kyoungrok0517
kyoungrok0517 / log.py
Created March 25, 2018 11:25 — forked from nguyenkims/log.py
Basic example on how setup a Python logger
import logging
import sys
from logging.handlers import TimedRotatingFileHandler
FORMATTER = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
LOG_FILE = "my_app.log"
def get_console_handler():
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setFormatter(FORMATTER)