Osman Mutlu OsmanMutlu

## assign_paper_choices.py
import sys
import csv

input_csv_file = sys.argv[1]

user_prefs = {}
first_paper_prefs = {}
with open(input_csv_file, "r", encoding="utf-8") as csv_file:
    lines = list(csv.reader(csv_file, delimiter=','))
    paper_names = lines[0][2:]

## gist:ce4165ad143264d18a2cf9bac112a18a

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                OsmanMutlu
                / gist:ce4165ad143264d18a2cf9bac112a18a
            
            
              Created
              October 25, 2019 15:36
                — forked from wrburgess/gist:3711050
            
              
                Permanently remove file from Git history
              
          
    Reference
Remove sensitive files from Git/Github
In Terminal
git filter-branch --index-filter 'git rm --cached --ignore-unmatch [file path/name]' --prune-empty --tag-name-filter cat -- --all

Example:

  
## getnews_selenium.py
import lxml.html
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import re
import codecs
import json

with codecs.open("urls.jl","r","utf-8") as f:

## kalpha.py
import pandas as pd

# y1 and y2 are series
def calculate_kalpha(y1,y2):
    df = pd.concat([y1,y2], axis=1)
    df.columns = ['y1', 'y2']
    agg = 0
    diss = 0
    for label in df.y1.unique().tolist():
        agg += 2*len(df[(df.y1 == label) & (df.y2 == label)])

## things.py
def url_to_filename(row): #Takes a pandas row
    url = row.url
    url = re.sub(r"://", r"__", url)
    url = re.sub(r"/", r"_", url)
    row.url = re.sub(r"\.?(cms|html|ece|ece1)?$", r".folia.xml", url)
    return row

def getwordtext(entity): #Takes a folia entity
    annot = ""
    for word in entity.wrefs():

## gist:02541286614333de1d0136a70cfd6caa
import pandas as pd

def calculate_Kalpha2(y1,y2):
    df = pd.concat([y1,y2], axis=1)
    df.columns = ['y1', 'y2']
    n = 2*len(df)
    agg = 0
    all = 0
    for label in df.y1.unique().tolist():
        agg += 2*len(df[(df.y1 == label) & (df.y2 == label)])

## getentities.py
import pandas as pd
import re
import codecs
from glob import glob
from pynlpl.formats import folia

files = glob("alladjudicated/http*")

all_df = pd.DataFrame(files, columns=["filename"])
	import sys
	import csv

	input_csv_file = sys.argv[1]

	user_prefs = {}
	first_paper_prefs = {}
	with open(input_csv_file, "r", encoding="utf-8") as csv_file:
	lines = list(csv.reader(csv_file, delimiter=','))
	paper_names = lines[0][2:]
	import lxml.html
	from selenium import webdriver
	from selenium.webdriver.support.ui import WebDriverWait
	from selenium.webdriver.support import expected_conditions as EC
	from selenium.common.exceptions import TimeoutException
	import re
	import codecs
	import json

	with codecs.open("urls.jl","r","utf-8") as f:
	import pandas as pd

	# y1 and y2 are series
	def calculate_kalpha(y1,y2):
	df = pd.concat([y1,y2], axis=1)
	df.columns = ['y1', 'y2']
	agg = 0
	diss = 0
	for label in df.y1.unique().tolist():
	agg += 2*len(df[(df.y1 == label) & (df.y2 == label)])
	def url_to_filename(row): #Takes a pandas row
	url = row.url
	url = re.sub(r"://", r"__", url)
	url = re.sub(r"/", r"_", url)
	row.url = re.sub(r"\.?(cms\|html\|ece\|ece1)?$", r".folia.xml", url)
	return row

	def getwordtext(entity): #Takes a folia entity
	annot = ""
	for word in entity.wrefs():
	import pandas as pd

	def calculate_Kalpha2(y1,y2):
	df = pd.concat([y1,y2], axis=1)
	df.columns = ['y1', 'y2']
	n = 2*len(df)
	agg = 0
	all = 0
	for label in df.y1.unique().tolist():
	agg += 2*len(df[(df.y1 == label) & (df.y2 == label)])