Skip to content

Instantly share code, notes, and snippets.

import pandas as pd
chunksize = 10 ** 8
for chunk in pd.read_csv(filename, chunksize=chunksize):
process(chunk)
@salma71
salma71 / read_boost
Created February 8, 2021 13:57
The idea here is to asynchronously process chunk of data by pushing it into a multiprocessing pool queue. Each process in pool will work on the task, and return the result. Note, it is important to create the Pool inside the __main__ block. That is
import pandas as pd
import multiprocessing as mp
LARGE_FILE = "D:\\my_large_file.txt"
CHUNKSIZE = 100000 # processing 100,000 rows at a time
def process_frame(df):
# process data frame
return len(df)
from gtts import gTTS
from pdfminer.high_level import extract_text
import gradio as gr
def pdf_to_text(file_obj):
text = extract_text(file_obj.name)
myobj = gTTS(text=text, lang='en', slow=False)
myobj.save("test.wav")
return 'test.wav'
from gtts import gTTS
from pdfminer.high_level import extract_text
import gradio as gr
def pdf_to_text(file_obj):
text = extract_text(file_obj.name)
myobj = gTTS(text=text, lang='en', slow=False)
myobj.save("output.wav")
return 'output.wav'
from gtts import gTTS
from pdfminer.high_level import extract_text
def pdf_to_text(file_obj):
text = extract_text(file_obj.name)
myobj = gTTS(text=text, lang='en', slow=False)
myobj.save("test.wav")
return 'test.wav'
from typing import List
import random
def random_sampling(k: int, array_list: List[int]) -> None:
"""high level support for doing this and that."""
for i in range(k):
random_index = random.randint(i, len(array_list) - 1)
# swap element based on the generated index
array_list[i], array_list[random_index] = array_list[random_index], array_list[i]
name: configuration for python env
on: [push, pull_request]
jobs:
build:
runs-on: ubuntu-latest
timeout-minutes: 10
services:
db_service:
name: Configure pyenv and pick the right version
on: [push, pull_request]
jobs:
# python installtion
install-python:
# should run on linux based operating system
runs-on: ubuntu-latest
steps:
name: Configuring python env for postgress
# do the following jobs on push and on pull requests
on: [push, pull_request]
# define the jobs
jobs:
# first job is to set the servicea
setting-services:
# must runs on linux based operating system
runs-on: ubuntu-latest
# custom_unpickling.py
import pickle
import pickle
import functools
import multiprocessing as mp
from math import sqrt
class Process: