This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
#set cookies | |
s = requests.Session() | |
r_init = s.get('http://forlap.ristekdikti.go.id/perguruantinggi') | |
jar = r_init.cookies | |
jar.set('FORLAPDIKTI',FORLAP_COOKIE,domain='forlap.ristekdikti.go.id',path='/perguruantinggi') | |
jar.set('PHPSESSID',SESSION_COOKIE,domain='forlap.ristekdikti.go.id',path='/perguruantinggi') | |
jar.set('FGTServer',FGT_COOKIE,domain='forlap.ristekdikti.go.id',path='/perguruantinggi') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import os | |
files = os.listdir("files") | |
df_list = [pd.read_stata("files/"+i) for i in files] | |
for k,df in enumerate(df_list): | |
df.to_csv(files[k][:-4]+".csv") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import pandas as pd | |
#open email chat export | |
f = open('_chat.txt', 'r') | |
msg_arr = [] | |
msg_cnt = 0 | |
#loop through line | |
for idx, val in enumerate(f): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
wilayah = pd.read_csv('wilayah.csv') | |
w4 = wilayah[wilayah.tingkat==4] | |
w3 = wilayah[wilayah.tingkat==3] | |
w2 = wilayah[wilayah.tingkat==2] | |
w1 = wilayah[wilayah.tingkat==1] | |
w34 = pd.merge(w3,w4,how='left',left_on='wilayah_id',right_on='parent') | |
w23 = pd.merge(w2,w34,how='left',left_on='wilayah_id',right_on='parent_x') | |
w12 = pd.merge(w1,w23,how='left',left_on='wilayah_id',right_on='parent') | |
wmerged=w12.drop(w12.columns[[1,3,5,7,9,11,13,15]], axis=1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
domain<-read.csv("domainsporn") | |
domain2<-read.csv("domains") | |
library(tldextract) | |
library(descr) | |
data("tldnames") | |
head(tldnames) | |
tld<-getTLD() | |
colnames(domain)<-"url" | |
colnames(domain2)<-"url" | |
d<-tldextract(domain$url,tldnames = tld) |