Skip to content

Instantly share code, notes, and snippets.

@pritul2
Created July 12, 2022 05:59
Show Gist options
  • Save pritul2/6fe2e0fc203426e58e80476bb81682a0 to your computer and use it in GitHub Desktop.
Save pritul2/6fe2e0fc203426e58e80476bb81682a0 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 12 10:49:03 2022
@author: pritul
"""
import pandas as pd
import os
import asyncio
from datetime import datetime
files = os.listdir('../data/july_11_binning')
train_path = '../data/train_binning_jul11/'
test_path = '../data/test_binning_jul11/'
train_size = 0
test_size = 0
async def get_file(f):
df = pd.read_csv(f'../data/july_11_binning/{f}')
return df,df.shape[0]
async def get_train_test_split(df,N):
global train_size,test_size
test_cnt = int(N*0.1)
train_cnt = N - test_cnt
train_size += train_cnt
test_size += test_cnt
train_df, test_df = df.iloc[:train_cnt,::], df.iloc[:test_cnt,::]
return train_df, test_df
async def store_files(train_df,test_df,f):
global train_path, test_path
train_df.to_csv(f"{train_path}{f}")
test_df.to_csv(f"{test_path}{f}")
async def process_file():
while len(files)>0:
f = files.pop()
print("[{}] Processing file: {}".format(datetime.now(), f))
df,size_data = await get_file(f)
train_df, test_df = await get_train_test_split(df,size_data)
await store_files(train_df,test_df,f)
async def run():
coros = [process_file() for i in range(4)]
await asyncio.gather(*coros)
loop = asyncio.get_event_loop()
loop.run_until_complete(run())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment