Skip to content

Instantly share code, notes, and snippets.

View robgon-art's full-sized avatar
😀

Robert A. Gonsalves robgon-art

😀
View GitHub Profile
@robgon-art
robgon-art / scrape-wikiart-by-artist.py
Created July 30, 2020 13:19
Iterate through all artists on WikiArt.org
import urllib
from bs4 import BeautifulSoup
file_path = "art/wikiart2"
base_url = "https://www.wikiart.org"
# iterate through all artists by last name alphabetically
for c in range(ord('n'), ord('z')+1):
char = chr(c)
artist_list_url = base_url + '/en/Alphabet/' + char + '/text-list'
@robgon-art
robgon-art / remove-frames.py
Last active July 30, 2020 18:09
Automatically remove frames from pictures of paintings.
# set the file paths
from_path = 'art/wikiart/'
to_path = 'art/cropped/'
# set up some paramters
thresh1 = 15000
thresh2 = 30
pad = 30
# loop through each of the files
@robgon-art
robgon-art / prep_images.py
Created July 30, 2020 19:18
Prepare the images for GAN training
# set up the file paths
from_path = 'art/cropped/'
to_path = 'art/resized/'
# set up some parameters
size = 1024
num_augmentations = 6
# set up the image augmenter
seq = iaa.Sequential([
@robgon-art
robgon-art / get_aspect_ratio.py
Created July 31, 2020 12:30
Get a random aspect ratio
# Initialize the x and y arrays
x = np.linspace(0, 849, 850)
y = np.empty(shape=(850))
# Read the file containing the paintings and aspect ratios
info_file = open('painting_info.txt', 'r')
lines = info_file.readlines()
# Loop through the lines, capturing the aspect ratio in the y array
count = 0
@robgon-art
robgon-art / post_process_images.py
Last active April 17, 2022 16:38
Process an image by adding some noise and resize with super resolution
import numpy as np
from PIL import Image
from ISR.models import RDN, RRDN
# Import the image
img = Image.open('input.png')
# Load the GAN model that will perform a 4x resize
model = RRDN(weights='gans')
@robgon-art
robgon-art / preprocess.py
Created August 20, 2020 01:39
Preprocess book summaries for GPT-2 training
# Download and unzip the CMU Book Summary Dataset
!wget -O booksummaries.tar.gz http://www.cs.cmu.edu/~dbamman/data/booksummaries.tar.gz
!tar -xf booksummaries.tar.gz
# Import support for CSV files and the JSON format
import csv
import json
# Initialize the genre dictionary
genre_groups = {}
@robgon-art
robgon-art / train-gpt2.py
Last active August 20, 2020 02:17
Train GPT-2
# Use TensorFlow 1.15
%tensorflow_version 1.x
# Install GPT-2, download the medium model, and start the session
!pip install -q gpt-2-simple
import gpt_2_simple as gpt2
model = "774M" # 124M 355M 774M 1558M
gpt2.download_gpt2(model_name=model)
sess = gpt2.start_tf_sess()
@robgon-art
robgon-art / generate-text-samples.py
Last active August 21, 2020 11:12
Create text samples using G
# Get some new plots
plot_ideas = gpt2.generate(sess, length=150, temperature=0.7,
prefix="GENRE:", nsamples=1, batch_size=1, return_as_list=True,
include_prefix=True, truncate="\n")
# Print out the results
import textwrap
for plot in plot_ideas:
print(textwrap.fill(plot, width=180),"\n")
@robgon-art
robgon-art / compile-list-of-titles.py
Created August 23, 2020 16:41
Compile a list of titles
# Function to remove articles at the head of titles
def remove_leading_article(title):
if title.startswith("The "):
title = title[4:]
if title.startswith("A "):
title = title[2:]
return title
# Get the titles of books, movies, and TV shows
import csv
@robgon-art
robgon-art / check-for-repeats.py
Created August 23, 2020 17:46
Check for repeated phrases
# Function to check if the text contain a repeated phrase
def repeats(s, num):
substrings = {}
parts = s.split(' ')
does_repeat = False
for i in range(len(parts)-num):
substring = parts[i]
for j in range(1, num):
substring += ' ' + parts[i+j]
if substring in substrings: