Robert A. Gonsalves robgon-art

## scrape-wikiart-by-artist.py
import urllib
from bs4 import BeautifulSoup

file_path = "art/wikiart2"
base_url = "https://www.wikiart.org"

# iterate through all artists by last name alphabetically
for c in range(ord('n'), ord('z')+1):
    char = chr(c)
    artist_list_url = base_url + '/en/Alphabet/' + char + '/text-list'

## remove-frames.py
# set the file paths
from_path = 'art/wikiart/'
to_path   = 'art/cropped/'

# set up some paramters
thresh1 = 15000
thresh2 = 30
pad = 30

# loop through each of the files

## prep_images.py
# set up the file paths
from_path = 'art/cropped/'
to_path = 'art/resized/'

# set up some parameters
size = 1024
num_augmentations = 6

# set up the image augmenter
seq = iaa.Sequential([

## get_aspect_ratio.py
# Initialize the x and y arrays
x = np.linspace(0, 849, 850)
y = np.empty(shape=(850))

# Read the file containing the paintings and aspect ratios
info_file = open('painting_info.txt', 'r')
lines = info_file.readlines()

# Loop through the lines, capturing the aspect ratio in the y array
count = 0

## post_process_images.py
import numpy as np
from PIL import Image
from ISR.models import RDN, RRDN

# Import the image
img = Image.open('input.png')

# Load the GAN model that will perform a 4x resize
model = RRDN(weights='gans')

## preprocess.py
# Download and unzip the CMU Book Summary Dataset
!wget -O booksummaries.tar.gz http://www.cs.cmu.edu/~dbamman/data/booksummaries.tar.gz
!tar -xf booksummaries.tar.gz

# Import support for CSV files and the JSON format
import csv
import json

# Initialize the genre dictionary
genre_groups = {}

## train-gpt2.py
# Use TensorFlow 1.15
%tensorflow_version 1.x

# Install GPT-2, download the medium model, and start the session
!pip install -q gpt-2-simple
import gpt_2_simple as gpt2
model = "774M" # 124M 355M 774M 1558M
gpt2.download_gpt2(model_name=model)
sess = gpt2.start_tf_sess()

## generate-text-samples.py
# Get some new plots
plot_ideas = gpt2.generate(sess, length=150, temperature=0.7,
  prefix="GENRE:", nsamples=1, batch_size=1, return_as_list=True,
  include_prefix=True, truncate="\n")

# Print out the results
import textwrap
for plot in plot_ideas:
  print(textwrap.fill(plot, width=180),"\n")

## compile-list-of-titles.py
# Function to remove articles at the head of titles
def remove_leading_article(title):
  if title.startswith("The "):
    title = title[4:]
  if title.startswith("A "):
    title = title[2:]
  return title

# Get the titles of books, movies, and TV shows
import csv

## check-for-repeats.py
# Function to check if the text contain a repeated phrase
def repeats(s, num):
  substrings = {}
  parts = s.split(' ')
  does_repeat = False
  for i in range(len(parts)-num):
    substring = parts[i]
    for j in range(1, num):
      substring += ' ' + parts[i+j]
    if substring in substrings:
	import urllib
	from bs4 import BeautifulSoup

	file_path = "art/wikiart2"
	base_url = "https://www.wikiart.org"

	# iterate through all artists by last name alphabetically
	for c in range(ord('n'), ord('z')+1):
	char = chr(c)
	artist_list_url = base_url + '/en/Alphabet/' + char + '/text-list'
	# set up the file paths
	from_path = 'art/cropped/'
	to_path = 'art/resized/'

	# set up some parameters
	size = 1024
	num_augmentations = 6

	# set up the image augmenter
	seq = iaa.Sequential([
	# Initialize the x and y arrays
	x = np.linspace(0, 849, 850)
	y = np.empty(shape=(850))

	# Read the file containing the paintings and aspect ratios
	info_file = open('painting_info.txt', 'r')
	lines = info_file.readlines()

	# Loop through the lines, capturing the aspect ratio in the y array
	count = 0
	import numpy as np
	from PIL import Image
	from ISR.models import RDN, RRDN

	# Import the image
	img = Image.open('input.png')

	# Load the GAN model that will perform a 4x resize
	model = RRDN(weights='gans')
	# Download and unzip the CMU Book Summary Dataset
	!wget -O booksummaries.tar.gz http://www.cs.cmu.edu/~dbamman/data/booksummaries.tar.gz
	!tar -xf booksummaries.tar.gz

	# Import support for CSV files and the JSON format
	import csv
	import json

	# Initialize the genre dictionary
	genre_groups = {}
	# Use TensorFlow 1.15
	%tensorflow_version 1.x

	# Install GPT-2, download the medium model, and start the session
	!pip install -q gpt-2-simple
	import gpt_2_simple as gpt2
	model = "774M" # 124M 355M 774M 1558M
	gpt2.download_gpt2(model_name=model)
	sess = gpt2.start_tf_sess()
	# Get some new plots
	plot_ideas = gpt2.generate(sess, length=150, temperature=0.7,
	prefix="GENRE:", nsamples=1, batch_size=1, return_as_list=True,
	include_prefix=True, truncate="\n")

	# Print out the results
	import textwrap
	for plot in plot_ideas:
	print(textwrap.fill(plot, width=180),"\n")
	# Function to remove articles at the head of titles
	def remove_leading_article(title):
	if title.startswith("The "):
	title = title[4:]
	if title.startswith("A "):
	title = title[2:]
	return title

	# Get the titles of books, movies, and TV shows
	import csv
	# Function to check if the text contain a repeated phrase
	def repeats(s, num):
	substrings = {}
	parts = s.split(' ')
	does_repeat = False
	for i in range(len(parts)-num):
	substring = parts[i]
	for j in range(1, num):
	substring += ' ' + parts[i+j]
	if substring in substrings: