Created
April 21, 2023 00:11
-
-
Save 796F/2743543aef6da3093bb25114c9995b08 to your computer and use it in GitHub Desktop.
compute nsfw using laion nsfw model and cos similariy, print data for comparison.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# don't use latest version of ak, it's broken | |
# pip install autokeras==1.0.19 --no-deps | |
import os | |
import numpy as np | |
import clip | |
import torch | |
import autokeras as ak | |
from PIL import Image | |
from tensorflow.keras.models import load_model | |
from numpy import dot | |
from numpy.linalg import norm | |
CLIP_MODEL = 'ViT-L/14' # specific clip model that laion nsfw was trained on | |
dim = 768 | |
model_dir = "./utils/clip_autokeras_binary_nsfw" # dl from https://github.com/LAION-AI/CLIP-based-NSFW-Detector | |
# init clip, use gpu if available | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
model, preprocess = clip.load(CLIP_MODEL, device=device) | |
model = model.eval().requires_grad_(False) # this improves inf resource util. | |
# init nsfw model | |
nsfw_model = load_model(model_dir, custom_objects=ak.CUSTOM_OBJECTS) | |
# run it as a sanity check | |
nsfw_model.predict(np.random.rand(10**3, dim).astype("float32"), batch_size=10**3) | |
def cos_sim(a, b): | |
return dot(a, b)/(norm(a)*norm(b)) | |
def pil_for_path(path): | |
return Image.open(path) | |
def embedding_for_text(text): | |
token = clip.tokenize([text]).to(device) | |
with torch.no_grad(): | |
features = model.encode_text(token) | |
return features.detach().numpy() | |
def embedding_for_image(pil_image): | |
# check that pil_image is type Image | |
assert isinstance(pil_image, Image.Image), f'embedding_for_image expected PIL.Image, got {type(pil_image)}' | |
image = preprocess(pil_image).unsqueeze(0).to(device) | |
with torch.no_grad(): | |
features = model.encode_image(image) | |
return features.detach().numpy() | |
def normalized(a, axis=-1, order=2): | |
l2 = np.atleast_1d(np.linalg.norm(a, order, axis)) | |
l2[l2 == 0] = 1 | |
return a / np.expand_dims(l2, axis) | |
# path to downloaded images | |
DIR_PATH = "./utils/images/" | |
# glob all jpegs, sort by filename so consistent | |
filenames = [] | |
for root, _, files in os.walk(DIR_PATH): | |
for file in files: | |
if file.endswith('jpg'): | |
filenames.append(os.path.join(root, file)) | |
filenames.sort() | |
# pre-init embeddings array to speed up memory alloc | |
embeddings = np.zeros(shape=(len(filenames), ), dtype=np.float16) | |
# compute normalized embeddings for all images | |
embeddings = np.vstack([np.asarray(normalized(embedding_for_image(pil_for_path(file)))) for file in filenames]) | |
# use laion to compue nsfw values | |
nsfw_values = nsfw_model.predict(embeddings, batch_size=embeddings.shape[0]) | |
# compute cosine similarity between nsfw and all images as a reference. | |
nsfw = embedding_for_text('nsfw') | |
cossim_values = [cos_sim(nsfw[0], emb) for emb in embeddings] | |
# print res, round to 3 dec. | |
for img, laion, cossim in list(zip(filenames, nsfw_values, cossim_values)): | |
print(f'{img} \t laion: {round(float(laion), 3)} \t cossim {round(float(cossim), 3)}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
should print
distribution looks like this. cos not very useful in comparison.