Skip to content

Instantly share code, notes, and snippets.

@ceaksan
Last active August 12, 2021 21:06
Show Gist options
  • Save ceaksan/2a234ca147e8bfd370ca717d9ce86190 to your computer and use it in GitHub Desktop.
Save ceaksan/2a234ca147e8bfd370ca717d9ce86190 to your computer and use it in GitHub Desktop.
#!pip install tabula-py
#!java -version
import tabula
import pandas as pd
# NEW VERSION
url = "https://fenbil.aku.edu.tr/FENBILENS/takvim/2014-2022-1RESMI.pdf"
df = tabula.read_pdf(url, pages='all')
holidayList = []
'''
for j in range(len(df)):
for lab, row in pd.Series(df)[j].iterrows():
for i in row:
holidayList.append(i)
'''
for i in range(len(df)):
for val in df[i].values:
for j in pd.Series(val):
holidayList.append(j)
holidays = pd.DataFrame(holidayList, columns=['ds']).applymap(lambda x: '-'.join(x.split()[0:3]))
d = {
'Ocak':'01',
'Şubat':'02',
'Mart':'03',
'Nisan':'04',
'Mayıs':'05',
'Haziran':'06',
'Temmuz':'07',
'Ağustos':'08',
'Eylül':'09',
'Ekim':'10',
'Kasım':'11',
'Aralık':'12',
}
holidays = pd.to_datetime(holidays['ds'].replace(d, regex=True), format='%d-%m-%Y')
holidays.head()
@ceaksan
Copy link
Author

ceaksan commented Aug 10, 2021

OLD VERSION

#!pip install tabula-py
#!java -version

import tabula
import pandas as pd

url = "https://fenbil.aku.edu.tr/FENBILENS/takvim/2014-2022-1RESMI.pdf"

df = tabula.read_pdf(url, pages='all')
data = pd.DataFrame(columns=['date'])

data['date'] = data['date'].map(lambda x: '-'.join(x.split()[0:3]))

for val in df:
  cal1 = pd.DataFrame(val.iloc[:, 0])
  cal1.rename(columns={cal1.columns[0]:'date'}, inplace=True)

  cal2 = pd.DataFrame(val.iloc[:, 1])
  cal2.rename(columns={cal2.columns[0]:'date'}, inplace=True)

  cal3 = pd.DataFrame(val.iloc[:, 2])
  cal3.rename(columns={cal3.columns[0]:'date'}, inplace=True)

  data = data.append(cal1, ignore_index=True)
  data = data.append(cal2, ignore_index=True)
  data = data.append(cal3, ignore_index=True)

  del(cal1, cal2, cal3)

data['date'] = data['date'].map(lambda x: '-'.join(x.split()[0:3]))

d = {
    'Ocak':'01',
    'Şubat':'02',
    'Mart':'03',
    'Nisan':'04',
    'Mayıs':'05',
    'Haziran':'06',
    'Temmuz':'07',
    'Ağustos':'08',
    'Eylül':'09',
    'Ekim':'10',
    'Kasım':'11',
    'Aralık':'12',
    }

holidays = pd.to_datetime(data['date'].replace(d, regex=True), format='%d-%m-%Y')
holidays.head()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment