Skip to content

Instantly share code, notes, and snippets.

Avatar

Osman Mutlu OsmanMutlu

  • Koç University
  • Istanbul
View GitHub Profile
@OsmanMutlu
OsmanMutlu / assign_paper_choices.py
Created Oct 18, 2020
Takes the comp541 paper selection form results in csv format and writes out each user's first choice. You can use the intermediate dictionaries to solve the conflicts.
View assign_paper_choices.py
import sys
import csv
input_csv_file = sys.argv[1]
user_prefs = {}
first_paper_prefs = {}
with open(input_csv_file, "r", encoding="utf-8") as csv_file:
lines = list(csv.reader(csv_file, delimiter=','))
paper_names = lines[0][2:]
@OsmanMutlu
OsmanMutlu / gist:ce4165ad143264d18a2cf9bac112a18a
Created Oct 25, 2019 — forked from wrburgess/gist:3711050
Permanently remove file from Git history
View gist:ce4165ad143264d18a2cf9bac112a18a

Reference

Remove sensitive files from Git/Github

In Terminal

git filter-branch --index-filter 'git rm --cached --ignore-unmatch [file path/name]' --prune-empty --tag-name-filter cat -- --all

Example:

View getnews_selenium.py
import lxml.html
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import re
import codecs
import json
with codecs.open("urls.jl","r","utf-8") as f:
View kalpha.py
import pandas as pd
# y1 and y2 are series
def calculate_kalpha(y1,y2):
df = pd.concat([y1,y2], axis=1)
df.columns = ['y1', 'y2']
agg = 0
diss = 0
for label in df.y1.unique().tolist():
agg += 2*len(df[(df.y1 == label) & (df.y2 == label)])
View things.py
def url_to_filename(row): #Takes a pandas row
url = row.url
url = re.sub(r"://", r"__", url)
url = re.sub(r"/", r"_", url)
row.url = re.sub(r"\.?(cms|html|ece|ece1)?$", r".folia.xml", url)
return row
def getwordtext(entity): #Takes a folia entity
annot = ""
for word in entity.wrefs():
@OsmanMutlu
OsmanMutlu / gist:02541286614333de1d0136a70cfd6caa
Created Oct 6, 2018
Krippendorff's alpha for two annotators with nominal data
View gist:02541286614333de1d0136a70cfd6caa
import pandas as pd
def calculate_Kalpha2(y1,y2):
df = pd.concat([y1,y2], axis=1)
df.columns = ['y1', 'y2']
n = 2*len(df)
agg = 0
all = 0
for label in df.y1.unique().tolist():
agg += 2*len(df[(df.y1 == label) & (df.y2 == label)])
View getentities.py
import pandas as pd
import re
import codecs
from glob import glob
from pynlpl.formats import folia
files = glob("alladjudicated/http*")
all_df = pd.DataFrame(files, columns=["filename"])