796F/nsfw-detector.py

## nsfw-detector.py
# don't use latest version of ak, it's broken
#   pip install autokeras==1.0.19 --no-deps

import os
import numpy as np
import clip
import torch
import autokeras as ak

from PIL import Image
from tensorflow.keras.models import load_model

from numpy import dot
from numpy.linalg import norm


CLIP_MODEL = 'ViT-L/14' # specific clip model that laion nsfw was trained on
dim = 768
model_dir = "./utils/clip_autokeras_binary_nsfw" # dl from https://github.com/LAION-AI/CLIP-based-NSFW-Detector

# init clip, use gpu if available
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load(CLIP_MODEL, device=device)
model = model.eval().requires_grad_(False) # this improves inf resource util.

# init nsfw model
nsfw_model = load_model(model_dir, custom_objects=ak.CUSTOM_OBJECTS)
# run it as a sanity check
nsfw_model.predict(np.random.rand(10**3, dim).astype("float32"), batch_size=10**3)

def cos_sim(a, b):
  return dot(a, b)/(norm(a)*norm(b))

def pil_for_path(path):
  return Image.open(path)

def embedding_for_text(text):
  token = clip.tokenize([text]).to(device)
  with torch.no_grad():
    features = model.encode_text(token)
  return features.detach().numpy()

def embedding_for_image(pil_image):
  # check that pil_image is type Image
  assert isinstance(pil_image, Image.Image), f'embedding_for_image expected PIL.Image, got {type(pil_image)}'
  image = preprocess(pil_image).unsqueeze(0).to(device)
  with torch.no_grad():
    features = model.encode_image(image)
  return features.detach().numpy()

def normalized(a, axis=-1, order=2):
    l2 = np.atleast_1d(np.linalg.norm(a, order, axis))
    l2[l2 == 0] = 1
    return a / np.expand_dims(l2, axis)

# path to downloaded images
DIR_PATH = "./utils/images/"
# glob all jpegs, sort by filename so consistent
filenames = []
for root, _, files in os.walk(DIR_PATH):
  for file in files:
    if file.endswith('jpg'):
      filenames.append(os.path.join(root, file))
filenames.sort()

# pre-init embeddings array to speed up memory alloc
embeddings = np.zeros(shape=(len(filenames), ), dtype=np.float16)
# compute normalized embeddings for all images
embeddings = np.vstack([np.asarray(normalized(embedding_for_image(pil_for_path(file)))) for file in filenames])
# use laion to compue nsfw values
nsfw_values = nsfw_model.predict(embeddings, batch_size=embeddings.shape[0])

# compute cosine similarity between nsfw and all images as a reference.
nsfw = embedding_for_text('nsfw')
cossim_values = [cos_sim(nsfw[0], emb) for emb in embeddings]

# print res, round to 3 dec.
for img, laion, cossim in list(zip(filenames, nsfw_values, cossim_values)):
  print(f'{img} \t laion: {round(float(laion), 3)} \t cossim {round(float(cossim), 3)}')
	# don't use latest version of ak, it's broken
	# pip install autokeras==1.0.19 --no-deps

	import os
	import numpy as np
	import clip
	import torch
	import autokeras as ak

	from PIL import Image
	from tensorflow.keras.models import load_model

	from numpy import dot
	from numpy.linalg import norm


	CLIP_MODEL = 'ViT-L/14' # specific clip model that laion nsfw was trained on
	dim = 768
	model_dir = "./utils/clip_autokeras_binary_nsfw" # dl from https://github.com/LAION-AI/CLIP-based-NSFW-Detector

	# init clip, use gpu if available
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model, preprocess = clip.load(CLIP_MODEL, device=device)
	model = model.eval().requires_grad_(False) # this improves inf resource util.

	# init nsfw model
	nsfw_model = load_model(model_dir, custom_objects=ak.CUSTOM_OBJECTS)
	# run it as a sanity check
	nsfw_model.predict(np.random.rand(103, dim).astype("float32"), batch_size=103)

	def cos_sim(a, b):
	return dot(a, b)/(norm(a)*norm(b))

	def pil_for_path(path):
	return Image.open(path)

	def embedding_for_text(text):
	token = clip.tokenize([text]).to(device)
	with torch.no_grad():
	features = model.encode_text(token)
	return features.detach().numpy()

	def embedding_for_image(pil_image):
	# check that pil_image is type Image
	assert isinstance(pil_image, Image.Image), f'embedding_for_image expected PIL.Image, got {type(pil_image)}'
	image = preprocess(pil_image).unsqueeze(0).to(device)
	with torch.no_grad():
	features = model.encode_image(image)
	return features.detach().numpy()

	def normalized(a, axis=-1, order=2):
	l2 = np.atleast_1d(np.linalg.norm(a, order, axis))
	l2[l2 == 0] = 1
	return a / np.expand_dims(l2, axis)

	# path to downloaded images
	DIR_PATH = "./utils/images/"
	# glob all jpegs, sort by filename so consistent
	filenames = []
	for root, _, files in os.walk(DIR_PATH):
	for file in files:
	if file.endswith('jpg'):
	filenames.append(os.path.join(root, file))
	filenames.sort()

	# pre-init embeddings array to speed up memory alloc
	embeddings = np.zeros(shape=(len(filenames), ), dtype=np.float16)
	# compute normalized embeddings for all images
	embeddings = np.vstack([np.asarray(normalized(embedding_for_image(pil_for_path(file)))) for file in filenames])
	# use laion to compue nsfw values
	nsfw_values = nsfw_model.predict(embeddings, batch_size=embeddings.shape[0])

	# compute cosine similarity between nsfw and all images as a reference.
	nsfw = embedding_for_text('nsfw')
	cossim_values = [cos_sim(nsfw[0], emb) for emb in embeddings]

	# print res, round to 3 dec.
	for img, laion, cossim in list(zip(filenames, nsfw_values, cossim_values)):
	print(f'{img} \t laion: {round(float(laion), 3)} \t cossim {round(float(cossim), 3)}')