Created
July 12, 2022 05:59
-
-
Save pritul2/6fe2e0fc203426e58e80476bb81682a0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" | |
Created on Tue Jul 12 10:49:03 2022 | |
@author: pritul | |
""" | |
import pandas as pd | |
import os | |
import asyncio | |
from datetime import datetime | |
files = os.listdir('../data/july_11_binning') | |
train_path = '../data/train_binning_jul11/' | |
test_path = '../data/test_binning_jul11/' | |
train_size = 0 | |
test_size = 0 | |
async def get_file(f): | |
df = pd.read_csv(f'../data/july_11_binning/{f}') | |
return df,df.shape[0] | |
async def get_train_test_split(df,N): | |
global train_size,test_size | |
test_cnt = int(N*0.1) | |
train_cnt = N - test_cnt | |
train_size += train_cnt | |
test_size += test_cnt | |
train_df, test_df = df.iloc[:train_cnt,::], df.iloc[:test_cnt,::] | |
return train_df, test_df | |
async def store_files(train_df,test_df,f): | |
global train_path, test_path | |
train_df.to_csv(f"{train_path}{f}") | |
test_df.to_csv(f"{test_path}{f}") | |
async def process_file(): | |
while len(files)>0: | |
f = files.pop() | |
print("[{}] Processing file: {}".format(datetime.now(), f)) | |
df,size_data = await get_file(f) | |
train_df, test_df = await get_train_test_split(df,size_data) | |
await store_files(train_df,test_df,f) | |
async def run(): | |
coros = [process_file() for i in range(4)] | |
await asyncio.gather(*coros) | |
loop = asyncio.get_event_loop() | |
loop.run_until_complete(run()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment