postgresqlimdb
Differences
This shows you the differences between two versions of the page.
| Next revision | Previous revision | ||
| postgresqlimdb [2024/03/08 10:15] – created z0hpvk | postgresqlimdb [2025/03/08 22:24] (current) – external edit 127.0.0.1 | ||
|---|---|---|---|
| Line 1: | Line 1: | ||
| ====== IMDB Database ====== | ====== IMDB Database ====== | ||
| - | The import of the data was done using the script here: [[https:// | + | The import of the data was based on the script here: [[https:// |
| + | Run the below script as the postgres user to avoid permission issues.\\ | ||
| + | <code BASH> | ||
| + | printf " | ||
| + | rm -rf imdb-datasets/ | ||
| + | mkdir imdb-datasets/ | ||
| + | |||
| + | printf " | ||
| + | cd imdb-datasets | ||
| + | curl -O https:// | ||
| + | curl -O https:// | ||
| + | curl -O https:// | ||
| + | curl -O https:// | ||
| + | curl -O https:// | ||
| + | curl -O https:// | ||
| + | curl -O https:// | ||
| + | |||
| + | printf " | ||
| + | gzip -dk *.gz | ||
| + | cd .. | ||
| + | |||
| + | printf " | ||
| + | psql -d ' | ||
| + | psql -d ' | ||
| + | |||
| + | printf " | ||
| + | psql -d imdb -c " | ||
| + | psql -d imdb -c " | ||
| + | psql -d imdb -c " | ||
| + | psql -d imdb -c " | ||
| + | psql -d imdb -c " | ||
| + | psql -d imdb -c " | ||
| + | psql -d imdb -c " | ||
| + | |||
| + | printf " | ||
| + | psql -d imdb -c "COPY title_ratings FROM ' | ||
| + | psql -d imdb -c "COPY name_basics FROM ' | ||
| + | psql -d imdb -c "COPY title_akas FROM ' | ||
| + | psql -d imdb -c "COPY title_basics FROM ' | ||
| + | psql -d imdb -c "COPY title_crew FROM ' | ||
| + | psql -d imdb -c "COPY title_episode FROM ' | ||
| + | psql -d imdb -c "COPY title_principals FROM ' | ||
| + | </ | ||
| + | |||
| + | Next run the below SQL script to | ||
| + | * Create Primary Keys | ||
| + | * Fix Data Issues | ||
| + | * Create Foreign Keys | ||
| + | * Convert Column Lists to Arrays | ||
| + | |||
| + | <code SQL> | ||
| + | ALTER TABLE title_ratings ADD CONSTRAINT title_ratings_pk PRIMARY KEY (tconst); | ||
| + | ALTER TABLE title_akas ADD CONSTRAINT title_akas_pk PRIMARY KEY (titleid, ordering); | ||
| + | ALTER TABLE title_basics ADD CONSTRAINT title_basics_pk PRIMARY KEY (tconst); | ||
| + | ALTER TABLE title_crew ADD CONSTRAINT title_crew_pk PRIMARY KEY (tconst); | ||
| + | ALTER TABLE title_episode ADD CONSTRAINT title_episode_pk PRIMARY KEY (const); | ||
| + | ALTER TABLE title_principals ADD CONSTRAINT title_principals_pk PRIMARY KEY (tconst, ordering); | ||
| + | ALTER TABLE name_basics ADD CONSTRAINT name_basics_pk PRIMARY KEY (nconst); | ||
| + | |||
| + | INSERT INTO name_basics (nconst, | ||
| + | (' | ||
| + | |||
| + | ALTER TABLE title_ratings ADD CONSTRAINT title_ratings_fk FOREIGN KEY (tconst) REFERENCES title_basics (tconst); | ||
| + | ALTER TABLE title_episode ADD CONSTRAINT title_episode_fk FOREIGN KEY (parenttconst) REFERENCES title_basics (tconst); | ||
| + | ALTER TABLE title_principals ADD CONSTRAINT title_principals_fk FOREIGN KEY (nconst) REFERENCES name_basics (nconst); | ||
| + | |||
| + | ALTER TABLE name_basics ALTER primaryprofession TYPE text[] USING string_to_array(primaryprofession, | ||
| + | ALTER TABLE name_basics ALTER knownfortitles TYPE text[] USING string_to_array(knownfortitles, | ||
| + | ALTER TABLE title_basics ALTER genres TYPE text[] USING string_to_array(genres, | ||
| + | ALTER TABLE title_crew ALTER directors TYPE text[] USING string_to_array(directors, | ||
| + | ALTER TABLE title_crew ALTER writers TYPE text[] USING string_to_array(writers, | ||
| + | </ | ||
postgresqlimdb.1709892920.txt.gz · Last modified: (external edit)
