Skip to content

Instantly share code, notes, and snippets.

View OsmanMutlu's full-sized avatar

Osman Mutlu OsmanMutlu

  • Koç University
  • Istanbul
View GitHub Profile
@OsmanMutlu
OsmanMutlu / assign_paper_choices.py
Created October 18, 2020 13:28
Takes the comp541 paper selection form results in csv format and writes out each user's first choice. You can use the intermediate dictionaries to solve the conflicts.
import sys
import csv
input_csv_file = sys.argv[1]
user_prefs = {}
first_paper_prefs = {}
with open(input_csv_file, "r", encoding="utf-8") as csv_file:
lines = list(csv.reader(csv_file, delimiter=','))
paper_names = lines[0][2:]
@OsmanMutlu
OsmanMutlu / gist:ce4165ad143264d18a2cf9bac112a18a
Created October 25, 2019 15:36 — forked from wrburgess/gist:3711050
Permanently remove file from Git history

Reference

Remove sensitive files from Git/Github

In Terminal

git filter-branch --index-filter 'git rm --cached --ignore-unmatch [file path/name]' --prune-empty --tag-name-filter cat -- --all

Example:

import lxml.html
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import re
import codecs
import json
with codecs.open("urls.jl","r","utf-8") as f:
import pandas as pd
# y1 and y2 are series
def calculate_kalpha(y1,y2):
df = pd.concat([y1,y2], axis=1)
df.columns = ['y1', 'y2']
agg = 0
diss = 0
for label in df.y1.unique().tolist():
agg += 2*len(df[(df.y1 == label) & (df.y2 == label)])
@OsmanMutlu
OsmanMutlu / things.py
Created December 3, 2018 10:44
Small functions
def url_to_filename(row): #Takes a pandas row
url = row.url
url = re.sub(r"://", r"__", url)
url = re.sub(r"/", r"_", url)
row.url = re.sub(r"\.?(cms|html|ece|ece1)?$", r".folia.xml", url)
return row
def getwordtext(entity): #Takes a folia entity
annot = ""
for word in entity.wrefs():
@OsmanMutlu
OsmanMutlu / gist:02541286614333de1d0136a70cfd6caa
Created October 6, 2018 08:02
Krippendorff's alpha for two annotators with nominal data
import pandas as pd
def calculate_Kalpha2(y1,y2):
df = pd.concat([y1,y2], axis=1)
df.columns = ['y1', 'y2']
n = 2*len(df)
agg = 0
all = 0
for label in df.y1.unique().tolist():
agg += 2*len(df[(df.y1 == label) & (df.y2 == label)])
import pandas as pd
import re
import codecs
from glob import glob
from pynlpl.formats import folia
files = glob("alladjudicated/http*")
all_df = pd.DataFrame(files, columns=["filename"])