Last active
February 6, 2020 10:33
-
-
Save yunpengn/fa9d04a63abaa42a93b291f70c49c8b8 to your computer and use it in GitHub Desktop.
A simple script to setup IMDb movie database
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
printf "Program starts at %s.\n" "$(date)" | |
printf "Going to clean and create new folder ...\n" | |
rm -rf imdb-datasets-ftp/ imdb-queries/ | |
mkdir imdb-datasets-ftp/ | |
printf "Going to download datasets from FTP server ...\n" | |
wget http://homepages.cwi.nl/~boncz/job/imdb.tgz | |
tar -xvzf imdb.tgz -C imdb-datasets-ftp/ | |
rm -f imdb.tgz | |
printf "Going to download JOB query set ...\n" | |
wget http://www-db.in.tum.de/~leis/qo/job.tgz | |
tar -xvzf job.tgz | |
mv job/ imdb-queries/ | |
rm -f job.tgz | |
printf "Going to create a new database ...\n" | |
psql -c "DROP DATABASE IF EXISTS imdb" | |
psql -c "CREATE DATABASE imdb" | |
printf "Going to initialize the database ...\n" | |
psql -d imdb -f imdb-queries/schema.sql | |
psql -d imdb -f imdb-queries/fkindexes.sql | |
printf "Going to populate the database ...\n" | |
psql -d imdb -c "COPY aka_name FROM '$(pwd)/imdb-datasets-ftp/aka_name.csv' WITH (FORMAT csv, DELIMITER ',', QUOTE '\"', ESCAPE '\\')"; | |
psql -d imdb -c "COPY aka_title FROM '$(pwd)/imdb-datasets-ftp/aka_title.csv' WITH (FORMAT csv, DELIMITER ',', QUOTE '\"', ESCAPE '\\')"; | |
psql -d imdb -c "COPY cast_info FROM '$(pwd)/imdb-datasets-ftp/cast_info.csv' WITH (FORMAT csv, DELIMITER ',', QUOTE '\"', ESCAPE '\\')"; | |
psql -d imdb -c "COPY char_name FROM '$(pwd)/imdb-datasets-ftp/char_name.csv' WITH (FORMAT csv, DELIMITER ',', QUOTE '\"', ESCAPE '\\')"; | |
psql -d imdb -c "COPY comp_cast_type FROM '$(pwd)/imdb-datasets-ftp/comp_cast_type.csv' WITH (FORMAT csv, DELIMITER ',', QUOTE '\"', ESCAPE '\\')"; | |
psql -d imdb -c "COPY company_name FROM '$(pwd)/imdb-datasets-ftp/company_name.csv' WITH (FORMAT csv, DELIMITER ',', QUOTE '\"', ESCAPE '\\')"; | |
psql -d imdb -c "COPY company_type FROM '$(pwd)/imdb-datasets-ftp/company_type.csv' WITH (FORMAT csv, DELIMITER ',', QUOTE '\"', ESCAPE '\\')"; | |
psql -d imdb -c "COPY complete_cast FROM '$(pwd)/imdb-datasets-ftp/complete_cast.csv' WITH (FORMAT csv, DELIMITER ',', QUOTE '\"', ESCAPE '\\')"; | |
psql -d imdb -c "COPY info_type FROM '$(pwd)/imdb-datasets-ftp/info_type.csv' WITH (FORMAT csv, DELIMITER ',', QUOTE '\"', ESCAPE '\\')"; | |
psql -d imdb -c "COPY keyword FROM '$(pwd)/imdb-datasets-ftp/keyword.csv' WITH (FORMAT csv, DELIMITER ',', QUOTE '\"', ESCAPE '\\')"; | |
psql -d imdb -c "COPY kind_type FROM '$(pwd)/imdb-datasets-ftp/kind_type.csv' WITH (FORMAT csv, DELIMITER ',', QUOTE '\"', ESCAPE '\\')"; | |
psql -d imdb -c "COPY link_type FROM '$(pwd)/imdb-datasets-ftp/link_type.csv' WITH (FORMAT csv, DELIMITER ',', QUOTE '\"', ESCAPE '\\')"; | |
psql -d imdb -c "COPY movie_companies FROM '$(pwd)/imdb-datasets-ftp/movie_companies.csv' WITH (FORMAT csv, DELIMITER ',', QUOTE '\"', ESCAPE '\\')"; | |
psql -d imdb -c "COPY movie_info FROM '$(pwd)/imdb-datasets-ftp/movie_info.csv' WITH (FORMAT csv, DELIMITER ',', QUOTE '\"', ESCAPE '\\')"; | |
psql -d imdb -c "COPY movie_info_idx FROM '$(pwd)/imdb-datasets-ftp/movie_info_idx.csv' WITH (FORMAT csv, DELIMITER ',', QUOTE '\"', ESCAPE '\\')"; | |
psql -d imdb -c "COPY movie_keyword FROM '$(pwd)/imdb-datasets-ftp/movie_keyword.csv' WITH (FORMAT csv, DELIMITER ',', QUOTE '\"', ESCAPE '\\')"; | |
psql -d imdb -c "COPY movie_link FROM '$(pwd)/imdb-datasets-ftp/movie_link.csv' WITH (FORMAT csv, DELIMITER ',', QUOTE '\"', ESCAPE '\\')"; | |
psql -d imdb -c "COPY name FROM '$(pwd)/imdb-datasets-ftp/name.csv' WITH (FORMAT csv, DELIMITER ',', QUOTE '\"', ESCAPE '\\')"; | |
psql -d imdb -c "COPY person_info FROM '$(pwd)/imdb-datasets-ftp/person_info.csv' WITH (FORMAT csv, DELIMITER ',', QUOTE '\"', ESCAPE '\\')"; | |
psql -d imdb -c "COPY role_type FROM '$(pwd)/imdb-datasets-ftp/role_type.csv' WITH (FORMAT csv, DELIMITER ',', QUOTE '\"', ESCAPE '\\')"; | |
psql -d imdb -c "COPY title FROM '$(pwd)/imdb-datasets-ftp/title.csv' WITH (FORMAT csv, DELIMITER ',', QUOTE '\"', ESCAPE '\\')"; | |
printf "Program ends at %s.\n" "$(date)" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
printf "Program starts at %s.\n" "$(date)" | |
printf "Going to clean and create new folder ...\n" | |
rm -rf imdb-datasets/ imdbpy-master/ | |
mkdir imdb-datasets/ | |
printf "Going to download datasets from FTP server ...\n" | |
cd imdb-datasets/ | |
wget https://datasets.imdbws.com/name.basics.tsv.gz | |
wget https://datasets.imdbws.com/title.akas.tsv.gz | |
wget https://datasets.imdbws.com/title.basics.tsv.gz | |
wget https://datasets.imdbws.com/title.crew.tsv.gz | |
wget https://datasets.imdbws.com/title.episode.tsv.gz | |
wget https://datasets.imdbws.com/title.principals.tsv.gz | |
wget https://datasets.imdbws.com/title.ratings.tsv.gz | |
cd .. | |
printf "Going to download IMDbPy ...\n" | |
wget https://github.com/alberanid/imdbpy/archive/master.zip | |
unzip master.zip | |
rm -f master.zip | |
printf "Going to create virtual environment ...\n" | |
cd imdbpy-master/ | |
virtualenv .venv | |
source .venv/bin/activate | |
printf "Going to install dependencies ...\n" | |
pip3 install -r requirements.txt | |
pip3 install psycopg2 | |
printf "Going to create a new database ...\n" | |
psql -c "DROP DATABASE IF EXISTS imdb_new" | |
psql -c "CREATE DATABASE imdb_new" | |
printf "Going to populate the database ...\n" | |
mv bin/s32imdbpy.py . | |
./s32imdbpy.py --verbose ../imdb-datasets/ postgres://yunpeng:<your_password_here>@127.0.0.1/imdb_new | |
printf "Done!\n" | |
deactivate | |
printf "Program ends at %s.\n" "$(date)" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment