Skip to content

Instantly share code, notes, and snippets.

@chiroptical
Created April 17, 2019 15:45
Show Gist options
  • Save chiroptical/49b94ebee358f48417ab30f690c165eb to your computer and use it in GitHub Desktop.
Save chiroptical/49b94ebee358f48417ab30f690c165eb to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import pandas as pd
import numpy as np
from pathlib import Path
from scipy import signal
from librosa import load
from sklearn.preprocessing import MinMaxScaler
from PIL import Image
from multiprocessing import cpu_count
from concurrent.futures import ProcessPoolExecutor
from concurrent.futures import as_completed
import json
def decibel_filter(spectrogram, db_cutoff=-100.0):
remove_zeros = np.copy(spectrogram)
remove_zeros[remove_zeros == 0.0] = np.nan
inDb = 10.0 * np.log10(remove_zeros)
inDb[inDb <= db_cutoff] = db_cutoff
return np.nan_to_num(10.0 ** (inDb / 10.0))
def chunk_preprocess(chunk):
results = [None] * chunk.shape[0]
for idx, (_, row) in enumerate(chunk.iterrows()):
results[idx] = (row["Index"], preprocess(row["Filename"]))
return results
def preprocess(filename):
# The path for p.stem
p = Path(filename)
# Generate frequencies and times
samples, sample_rate = load(
f"{base}/{p.parent}/{p.stem}.wav", mono=False, sr=22050, res_type="kaiser_fast"
)
freq, time, spec = signal.spectrogram(
samples,
sample_rate,
window="hann",
nperseg=512,
noverlap=384,
nfft=512,
scaling="spectrum",
)
# Filters
spec = decibel_filter(spec)
spec = np.log10(spec)
spec_mean = np.mean(spec)
spec_std = np.std(spec)
spec = (spec - spec_mean) / spec_std
# Save the picture
scaler = MinMaxScaler(feature_range=(0, 255))
spec = scaler.fit_transform(spec)
image = Image.fromarray(np.flip(spec, axis=0))
image = image.convert("RGB")
image = image.resize((10304, 256))
image.save(f"{base}/JPEGImages/{p.parent}-{p.stem}.jpg")
# Read the corresponding CSV
inner = pd.read_csv(f"{base}/{p.parent}/{p.stem}.csv")
labels = set()
with open(f"{base}/labels/{p.parent}-{p.stem}.txt", "w") as f:
for idx, row in inner.iterrows():
# Generate labels
x_mins = json.loads(row["Start_times"])
x_maxs = json.loads(row["End_times"])
y_mins = [row["Freq_low"]] * len(x_mins)
y_maxs = [row["Freq_high"]] * len(x_mins)
time_min = np.min(time)
time_max = np.max(time)
freq_min = np.min(freq)
freq_max = np.max(freq)
y_length = freq_max - freq_min
x_length = time_max - time_min
for x_min, x_max, y_min, y_max in zip(x_mins, x_maxs, y_mins, y_maxs):
# Find label
label = names.iloc[:, 0].values.tolist().index(row["Bird"])
labels.add(row["Bird"])
# Generate percentages
if x_min < time_min:
x_min = time_min
if x_max > time_max:
x_max = time_max
x = x_min / x_length
y = y_min / y_length
width = (x_max - x_min) / x_length
height = (y_max - y_min) / y_length
if x + width > 1.0:
width = 1.0 - x
if y + height > 1.0:
height = 1.0 - y
assert x + width <= 1.0 and y + height <= 1.0
f.write(
f"{label} {x + 0.5 * width} {y + 0.5 * height} {width} {height}\n"
)
return f"{base}/JPEGImages/{p.parent}-{p.stem}.jpg", f"{list(labels)}"
base = "/media/powdermill/new_scapes"
names = pd.read_csv("pnre.names", header=None)
df = pd.read_csv(f"{base}/wav.files.csv")
df["Index"] = df.index.values
results = pd.DataFrame(index=df.index.values, columns=["X", "y"], dtype=str)
nprocs = cpu_count()
chunks = np.array_split(df[["Filename", "Index"]], nprocs)
executor = ProcessPoolExecutor(nprocs)
futs = [executor.submit(chunk_preprocess, chunk) for chunk in chunks]
for fut in as_completed(futs):
res = fut.result()
for idx, (X, y) in res:
results.loc[idx, "X"] = X
results.loc[idx, "y"] = y
results.to_csv("all_files.csv", index=None)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment