Skip to content

Instantly share code, notes, and snippets.

View priyanlc's full-sized avatar

Priyan priyanlc

  • google
View GitHub Profile
from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from datetime import datetime
# scheduled once every 10 mins
dag = DAG(
'filter_images',
description='Run scripts to remove bad images DAG',
start_date=datetime(2023, 3, 22),
schedule_interval='*/30 * * * *',
from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from datetime import datetime
# runs every 10 min
dag = DAG(
'delete_small_images',
description='Run scripts to images out of size range DAG',
start_date=datetime(2023, 3, 22),
schedule_interval='*/10 * * * *',
from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from datetime import datetime
# runs on demand
dag = DAG(
'download_images',
description='Run scripts to remove bad images DAG',
start_date=datetime(2023, 3, 22),
catchup=False
@priyanlc
priyanlc / download_images.sh
Created April 28, 2024 21:15
downlaod images
#!/bin/bash
# This is tested and it worked
# Name of your Conda environment
ENV_NAME="deepface"
CONDA_PATH="$HOME/anaconda3"
LOG_FILE="$HOME/logfiles/download_images.log"
@priyanlc
priyanlc / DeepFaceAnalyzer.py
Created April 28, 2024 20:55
delete with deep face
from deepface import DeepFace
import os
class DeepFaceAnalyzer:
@staticmethod
def process_image(filename):
try:
detect = DeepFace.extract_faces(img_path=filename, detector_backend='fastmtcnn')
if not detect:
from PIL import Image
import os
class ImageValidator:
@staticmethod
def validate_image(image_path):
try:
with Image.open(image_path) as img:
img.verify() # Verify if it's an image
@priyanlc
priyanlc / ImageBatchProcessor.py
Created April 19, 2024 06:38
Delete Images with characters with easyocr
from multiprocessing import get_context
import os
class ImageBatchProcessor:
def __init__(self, processor, batch_size=60, num_processes=4):
self.processor = processor
self.batch_size = batch_size
self.num_processes = num_processes
def process_batch(self, batch):
@priyanlc
priyanlc / EasyOCRProcessor.py
Created April 19, 2024 06:37
Delete Images with characters with easyocr
import os
import easyocr
class EasyOCRProcessor:
def __init__(self, languages=None): # Default to Japanese
if languages is None:
languages = ['ja']
self.reader = easyocr.Reader(languages)
@priyanlc
priyanlc / Application.py
Created April 19, 2024 06:35
Delete Images with characters with easyocr
from pathlib import Path
import torch
from main.common.ConfigLoader import ConfigLoader
from main.delete_character.EasyOCRProcessor import EasyOCRProcessor
from main.delete_character.ImageBatchProcessor import ImageBatchProcessor
class Application:
@priyanlc
priyanlc / ImageBatchManager.py
Created April 19, 2024 06:13
Identify Images with faces with face_recongnition
import os
from multiprocessing import Pool, cpu_count
class ImageBatchManager:
def __init__(self, processor, batch_size=100):
self.processor = processor
self.batch_size = batch_size
def process_batch(self, batch):