Reference
Remove sensitive files from Git/Github
In Terminal
git filter-branch --index-filter 'git rm --cached --ignore-unmatch [file path/name]' --prune-empty --tag-name-filter cat -- --all
Example:
import sys | |
import csv | |
input_csv_file = sys.argv[1] | |
user_prefs = {} | |
first_paper_prefs = {} | |
with open(input_csv_file, "r", encoding="utf-8") as csv_file: | |
lines = list(csv.reader(csv_file, delimiter=',')) | |
paper_names = lines[0][2:] |
Reference
Remove sensitive files from Git/Github
In Terminal
git filter-branch --index-filter 'git rm --cached --ignore-unmatch [file path/name]' --prune-empty --tag-name-filter cat -- --all
Example:
import lxml.html | |
from selenium import webdriver | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
from selenium.common.exceptions import TimeoutException | |
import re | |
import codecs | |
import json | |
with codecs.open("urls.jl","r","utf-8") as f: |
import pandas as pd | |
# y1 and y2 are series | |
def calculate_kalpha(y1,y2): | |
df = pd.concat([y1,y2], axis=1) | |
df.columns = ['y1', 'y2'] | |
agg = 0 | |
diss = 0 | |
for label in df.y1.unique().tolist(): | |
agg += 2*len(df[(df.y1 == label) & (df.y2 == label)]) |
def url_to_filename(row): #Takes a pandas row | |
url = row.url | |
url = re.sub(r"://", r"__", url) | |
url = re.sub(r"/", r"_", url) | |
row.url = re.sub(r"\.?(cms|html|ece|ece1)?$", r".folia.xml", url) | |
return row | |
def getwordtext(entity): #Takes a folia entity | |
annot = "" | |
for word in entity.wrefs(): |
import pandas as pd | |
def calculate_Kalpha2(y1,y2): | |
df = pd.concat([y1,y2], axis=1) | |
df.columns = ['y1', 'y2'] | |
n = 2*len(df) | |
agg = 0 | |
all = 0 | |
for label in df.y1.unique().tolist(): | |
agg += 2*len(df[(df.y1 == label) & (df.y2 == label)]) |
import pandas as pd | |
import re | |
import codecs | |
from glob import glob | |
from pynlpl.formats import folia | |
files = glob("alladjudicated/http*") | |
all_df = pd.DataFrame(files, columns=["filename"]) |