Skip to content

Instantly share code, notes, and snippets.

@796F
Created April 21, 2023 00:11
Show Gist options
  • Save 796F/2743543aef6da3093bb25114c9995b08 to your computer and use it in GitHub Desktop.
Save 796F/2743543aef6da3093bb25114c9995b08 to your computer and use it in GitHub Desktop.
compute nsfw using laion nsfw model and cos similariy, print data for comparison.
# don't use latest version of ak, it's broken
# pip install autokeras==1.0.19 --no-deps
import os
import numpy as np
import clip
import torch
import autokeras as ak
from PIL import Image
from tensorflow.keras.models import load_model
from numpy import dot
from numpy.linalg import norm
CLIP_MODEL = 'ViT-L/14' # specific clip model that laion nsfw was trained on
dim = 768
model_dir = "./utils/clip_autokeras_binary_nsfw" # dl from https://github.com/LAION-AI/CLIP-based-NSFW-Detector
# init clip, use gpu if available
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load(CLIP_MODEL, device=device)
model = model.eval().requires_grad_(False) # this improves inf resource util.
# init nsfw model
nsfw_model = load_model(model_dir, custom_objects=ak.CUSTOM_OBJECTS)
# run it as a sanity check
nsfw_model.predict(np.random.rand(10**3, dim).astype("float32"), batch_size=10**3)
def cos_sim(a, b):
return dot(a, b)/(norm(a)*norm(b))
def pil_for_path(path):
return Image.open(path)
def embedding_for_text(text):
token = clip.tokenize([text]).to(device)
with torch.no_grad():
features = model.encode_text(token)
return features.detach().numpy()
def embedding_for_image(pil_image):
# check that pil_image is type Image
assert isinstance(pil_image, Image.Image), f'embedding_for_image expected PIL.Image, got {type(pil_image)}'
image = preprocess(pil_image).unsqueeze(0).to(device)
with torch.no_grad():
features = model.encode_image(image)
return features.detach().numpy()
def normalized(a, axis=-1, order=2):
l2 = np.atleast_1d(np.linalg.norm(a, order, axis))
l2[l2 == 0] = 1
return a / np.expand_dims(l2, axis)
# path to downloaded images
DIR_PATH = "./utils/images/"
# glob all jpegs, sort by filename so consistent
filenames = []
for root, _, files in os.walk(DIR_PATH):
for file in files:
if file.endswith('jpg'):
filenames.append(os.path.join(root, file))
filenames.sort()
# pre-init embeddings array to speed up memory alloc
embeddings = np.zeros(shape=(len(filenames), ), dtype=np.float16)
# compute normalized embeddings for all images
embeddings = np.vstack([np.asarray(normalized(embedding_for_image(pil_for_path(file)))) for file in filenames])
# use laion to compue nsfw values
nsfw_values = nsfw_model.predict(embeddings, batch_size=embeddings.shape[0])
# compute cosine similarity between nsfw and all images as a reference.
nsfw = embedding_for_text('nsfw')
cossim_values = [cos_sim(nsfw[0], emb) for emb in embeddings]
# print res, round to 3 dec.
for img, laion, cossim in list(zip(filenames, nsfw_values, cossim_values)):
print(f'{img} \t laion: {round(float(laion), 3)} \t cossim {round(float(cossim), 3)}')
@796F
Copy link
Author

796F commented Apr 21, 2023

should print

image

distribution looks like this. cos not very useful in comparison.

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment