chiroptical/gen_scapes_df.py

## gen_scapes_df.py
#!/usr/bin/env python3
import pandas as pd
import numpy as np
from pathlib import Path

from scipy import signal
from librosa import load

from sklearn.preprocessing import MinMaxScaler
from PIL import Image

from multiprocessing import cpu_count
from concurrent.futures import ProcessPoolExecutor
from concurrent.futures import as_completed

import json


def decibel_filter(spectrogram, db_cutoff=-100.0):
    remove_zeros = np.copy(spectrogram)
    remove_zeros[remove_zeros == 0.0] = np.nan
    inDb = 10.0 * np.log10(remove_zeros)
    inDb[inDb <= db_cutoff] = db_cutoff
    return np.nan_to_num(10.0 ** (inDb / 10.0))


def chunk_preprocess(chunk):
    results = [None] * chunk.shape[0]
    for idx, (_, row) in enumerate(chunk.iterrows()):
        results[idx] = (row["Index"], preprocess(row["Filename"]))
    return results


def preprocess(filename):
    # The path for p.stem
    p = Path(filename)

    # Generate frequencies and times
    samples, sample_rate = load(
        f"{base}/{p.parent}/{p.stem}.wav", mono=False, sr=22050, res_type="kaiser_fast"
    )
    freq, time, spec = signal.spectrogram(
        samples,
        sample_rate,
        window="hann",
        nperseg=512,
        noverlap=384,
        nfft=512,
        scaling="spectrum",
    )

    # Filters
    spec = decibel_filter(spec)
    spec = np.log10(spec)
    spec_mean = np.mean(spec)
    spec_std = np.std(spec)
    spec = (spec - spec_mean) / spec_std

    # Save the picture
    scaler = MinMaxScaler(feature_range=(0, 255))
    spec = scaler.fit_transform(spec)
    image = Image.fromarray(np.flip(spec, axis=0))
    image = image.convert("RGB")
    image = image.resize((10304, 256))
    image.save(f"{base}/JPEGImages/{p.parent}-{p.stem}.jpg")

    # Read the corresponding CSV
    inner = pd.read_csv(f"{base}/{p.parent}/{p.stem}.csv")

    labels = set()
    with open(f"{base}/labels/{p.parent}-{p.stem}.txt", "w") as f:
        for idx, row in inner.iterrows():
            # Generate labels
            x_mins = json.loads(row["Start_times"])
            x_maxs = json.loads(row["End_times"])
            y_mins = [row["Freq_low"]] * len(x_mins)
            y_maxs = [row["Freq_high"]] * len(x_mins)

            time_min = np.min(time)
            time_max = np.max(time)
            freq_min = np.min(freq)
            freq_max = np.max(freq)
            y_length = freq_max - freq_min
            x_length = time_max - time_min

            for x_min, x_max, y_min, y_max in zip(x_mins, x_maxs, y_mins, y_maxs):
                # Find label
                label = names.iloc[:, 0].values.tolist().index(row["Bird"])
                labels.add(row["Bird"])

                # Generate percentages
                if x_min < time_min:
                    x_min = time_min
                if x_max > time_max:
                    x_max = time_max
                x = x_min / x_length
                y = y_min / y_length
                width = (x_max - x_min) / x_length
                height = (y_max - y_min) / y_length
                if x + width > 1.0:
                    width = 1.0 - x
                if y + height > 1.0:
                    height = 1.0 - y
                assert x + width <= 1.0 and y + height <= 1.0
                f.write(
                    f"{label} {x + 0.5 * width} {y + 0.5 * height} {width} {height}\n"
                )

    return f"{base}/JPEGImages/{p.parent}-{p.stem}.jpg", f"{list(labels)}"


base = "/media/powdermill/new_scapes"

names = pd.read_csv("pnre.names", header=None)

df = pd.read_csv(f"{base}/wav.files.csv")
df["Index"] = df.index.values

results = pd.DataFrame(index=df.index.values, columns=["X", "y"], dtype=str)

nprocs = cpu_count()
chunks = np.array_split(df[["Filename", "Index"]], nprocs)

executor = ProcessPoolExecutor(nprocs)
futs = [executor.submit(chunk_preprocess, chunk) for chunk in chunks]
for fut in as_completed(futs):
    res = fut.result()
    for idx, (X, y) in res:
        results.loc[idx, "X"] = X
        results.loc[idx, "y"] = y

results.to_csv("all_files.csv", index=None)
	#!/usr/bin/env python3
	import pandas as pd
	import numpy as np
	from pathlib import Path

	from scipy import signal
	from librosa import load

	from sklearn.preprocessing import MinMaxScaler
	from PIL import Image

	from multiprocessing import cpu_count
	from concurrent.futures import ProcessPoolExecutor
	from concurrent.futures import as_completed

	import json


	def decibel_filter(spectrogram, db_cutoff=-100.0):
	remove_zeros = np.copy(spectrogram)
	remove_zeros[remove_zeros == 0.0] = np.nan
	inDb = 10.0 * np.log10(remove_zeros)
	inDb[inDb <= db_cutoff] = db_cutoff
	return np.nan_to_num(10.0 ** (inDb / 10.0))


	def chunk_preprocess(chunk):
	results = [None] * chunk.shape[0]
	for idx, (_, row) in enumerate(chunk.iterrows()):
	results[idx] = (row["Index"], preprocess(row["Filename"]))
	return results


	def preprocess(filename):
	# The path for p.stem
	p = Path(filename)

	# Generate frequencies and times
	samples, sample_rate = load(
	f"{base}/{p.parent}/{p.stem}.wav", mono=False, sr=22050, res_type="kaiser_fast"
	)
	freq, time, spec = signal.spectrogram(
	samples,
	sample_rate,
	window="hann",
	nperseg=512,
	noverlap=384,
	nfft=512,
	scaling="spectrum",
	)

	# Filters
	spec = decibel_filter(spec)
	spec = np.log10(spec)
	spec_mean = np.mean(spec)
	spec_std = np.std(spec)
	spec = (spec - spec_mean) / spec_std

	# Save the picture
	scaler = MinMaxScaler(feature_range=(0, 255))
	spec = scaler.fit_transform(spec)
	image = Image.fromarray(np.flip(spec, axis=0))
	image = image.convert("RGB")
	image = image.resize((10304, 256))
	image.save(f"{base}/JPEGImages/{p.parent}-{p.stem}.jpg")

	# Read the corresponding CSV
	inner = pd.read_csv(f"{base}/{p.parent}/{p.stem}.csv")

	labels = set()
	with open(f"{base}/labels/{p.parent}-{p.stem}.txt", "w") as f:
	for idx, row in inner.iterrows():
	# Generate labels
	x_mins = json.loads(row["Start_times"])
	x_maxs = json.loads(row["End_times"])
	y_mins = [row["Freq_low"]] * len(x_mins)
	y_maxs = [row["Freq_high"]] * len(x_mins)

	time_min = np.min(time)
	time_max = np.max(time)
	freq_min = np.min(freq)
	freq_max = np.max(freq)
	y_length = freq_max - freq_min
	x_length = time_max - time_min

	for x_min, x_max, y_min, y_max in zip(x_mins, x_maxs, y_mins, y_maxs):
	# Find label
	label = names.iloc[:, 0].values.tolist().index(row["Bird"])
	labels.add(row["Bird"])

	# Generate percentages
	if x_min < time_min:
	x_min = time_min
	if x_max > time_max:
	x_max = time_max
	x = x_min / x_length
	y = y_min / y_length
	width = (x_max - x_min) / x_length
	height = (y_max - y_min) / y_length
	if x + width > 1.0:
	width = 1.0 - x
	if y + height > 1.0:
	height = 1.0 - y
	assert x + width <= 1.0 and y + height <= 1.0
	f.write(
	f"{label} {x + 0.5 * width} {y + 0.5 * height} {width} {height}\n"
	)

	return f"{base}/JPEGImages/{p.parent}-{p.stem}.jpg", f"{list(labels)}"


	base = "/media/powdermill/new_scapes"

	names = pd.read_csv("pnre.names", header=None)

	df = pd.read_csv(f"{base}/wav.files.csv")
	df["Index"] = df.index.values

	results = pd.DataFrame(index=df.index.values, columns=["X", "y"], dtype=str)

	nprocs = cpu_count()
	chunks = np.array_split(df[["Filename", "Index"]], nprocs)

	executor = ProcessPoolExecutor(nprocs)
	futs = [executor.submit(chunk_preprocess, chunk) for chunk in chunks]
	for fut in as_completed(futs):
	res = fut.result()
	for idx, (X, y) in res:
	results.loc[idx, "X"] = X
	results.loc[idx, "y"] = y

	results.to_csv("all_files.csv", index=None)