shreyasiv commited on
Commit
dcaabff
·
1 Parent(s): f1272bb

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +110 -0
  2. movie_list.pkl +3 -0
  3. notebook86c26b4f17.ipynb +1 -0
  4. requirements.txt +48 -0
  5. similarity.pkl +3 -0
app.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import streamlit as st
3
+ import requests
4
+
5
+ # Set page title and sidebar properties
6
+ st.set_page_config(page_title="Insightly")
7
+
8
+ st.markdown(
9
+ """
10
+ <style>
11
+ .image-container {
12
+ margin-bottom: 60px;
13
+ }
14
+ .sidebar-link {
15
+ display: flex;
16
+ justify-content: left;
17
+ font-size: 28px;
18
+ margin-top: 10px; /* Adjust margin-top value to control space on the top */
19
+ margin-left: 20px; /* Adjust margin-left value to add space from the left */
20
+ }
21
+ .vertical-space {
22
+ height: 20px;
23
+ }
24
+ .movie-title {
25
+ font-size: 18px;
26
+ font-weight: bold;
27
+ }
28
+ .row-padding {
29
+ padding-bottom: 40px;
30
+ }
31
+ </style>
32
+ """,
33
+ unsafe_allow_html=True,
34
+ )
35
+
36
+ # Sidebar contents
37
+ with st.sidebar:
38
+ st.image("https://i.ibb.co/bX6GdqG/insightly-wbg.png", use_column_width=True)
39
+ st.markdown("<p class='sidebar-link'>📈 <a href='https://insightly-csv-bot.hf.space/'> CSV Bot</a></p>", unsafe_allow_html=True)
40
+ st.markdown("<p class='sidebar-link'>📚 <a href='https://chandrakalagowda-demo2.hf.space/'> PDF Bot </a></p>", unsafe_allow_html=True)
41
+ st.markdown("<p class='sidebar-link'>📸 <a href='https://insightly-frame-capturer.hf.space/'> Frame Capturer</a></p>", unsafe_allow_html=True)
42
+ st.markdown("<p class='sidebar-link'>🖼️ <a href='https://insightly-image-reader.hf.space/'> Image Reader</a></p>", unsafe_allow_html=True)
43
+ st.markdown("<div class='vertical-space'></div>", unsafe_allow_html=True)
44
+
45
+
46
+
47
+ def fetch_poster(movie_id):
48
+ url = "https://api.themoviedb.org/3/movie/{}?api_key=8265bd1679663a7ea12ac168da84d2e8&language=en-US".format(movie_id)
49
+ data = requests.get(url)
50
+ data = data.json()
51
+ poster_path = data['poster_path']
52
+ full_path = "https://image.tmdb.org/t/p/w500/" + poster_path
53
+ return full_path
54
+
55
+ def recommend(movie):
56
+ index = movies[movies['title'] == movie].index[0]
57
+ distances = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda x: x[1])
58
+ recommended_movie_names = []
59
+ recommended_movie_posters = []
60
+ for i in distances[1:6]:
61
+ # fetch the movie poster
62
+ movie_id = movies.iloc[i[0]].movie_id
63
+ recommended_movie_posters.append(fetch_poster(movie_id))
64
+ recommended_movie_names.append(movies.iloc[i[0]].title)
65
+
66
+ return recommended_movie_names,recommended_movie_posters
67
+
68
+
69
+ st.title('Movie Recommender 🎬')
70
+
71
+ # Provide the correct absolute paths to the pickled data
72
+ movie_list_path = "/home/oem/Desktop/TRUEINFO LABS/movie-recommender-system-tmdb-dataset/movie_list.pkl"
73
+ similarity_path = "/home/oem/Desktop/TRUEINFO LABS/movie-recommender-system-tmdb-dataset/similarity.pkl"
74
+
75
+ movies = pickle.load(open(movie_list_path, 'rb'))
76
+ similarity = pickle.load(open(similarity_path, 'rb'))
77
+
78
+
79
+ movies = pickle.load(open(movie_list_path, 'rb'))
80
+ similarity = pickle.load(open(similarity_path, 'rb'))
81
+
82
+ movie_list = movies['title'].values
83
+ selected_movie = st.selectbox(
84
+ "Type or select a movie from the dropdown",
85
+ movie_list
86
+ )
87
+
88
+ if st.button('Show Recommendation'):
89
+ recommended_movie_names, recommended_movie_posters = recommend(selected_movie)
90
+
91
+ # Create columns based on the number of recommended movies
92
+ num_recommendations = len(recommended_movie_names)
93
+ num_columns = 3
94
+ num_rows = (num_recommendations + num_columns - 1) // num_columns # Calculate the number of rows required
95
+
96
+ # Create a list of columns
97
+ cols = [st.columns(num_columns) for _ in range(num_rows)]
98
+
99
+ # Loop through recommended movies and posters and display them in the columns
100
+ for i, movie_name in enumerate(recommended_movie_names):
101
+ col_index = i // num_columns
102
+ row_index = i % num_columns
103
+ cols[col_index][row_index].markdown(f"<span class='movie-title'>{movie_name}</span>", unsafe_allow_html=True)
104
+ cols[col_index][row_index].image(recommended_movie_posters[i])
105
+
106
+ # Add padding between the rows
107
+ st.markdown("<br>", unsafe_allow_html=True)
108
+ st.write('<div class="row-padding"></div>', unsafe_allow_html=True)
109
+
110
+
movie_list.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0465afd83e7b82c8753b750147a9de4ec0feb2289408784f74f2270acaf05d88
3
+ size 2396340
notebook86c26b4f17.ipynb ADDED
@@ -0,0 +1 @@
 
 
1
+ {"cells":[{"cell_type":"code","execution_count":1,"metadata":{"_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","execution":{"iopub.execute_input":"2021-06-21T12:32:55.394360Z","iopub.status.busy":"2021-06-21T12:32:55.393741Z","iopub.status.idle":"2021-06-21T12:32:55.415999Z","shell.execute_reply":"2021-06-21T12:32:55.414664Z","shell.execute_reply.started":"2021-06-21T12:32:55.394257Z"},"trusted":true},"outputs":[],"source":["# This Python 3 environment comes with many helpful analytics libraries installed\n","# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n","# For example, here's several helpful packages to load\n","\n","import numpy as np # linear algebra\n","import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n","\n","# Input data files are available in the read-only \"../input/\" directory\n","# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n","\n","import os\n","for dirname, _, filenames in os.walk('/kaggle/input'):\n"," for filename in filenames:\n"," print(os.path.join(dirname, filename))\n","\n","# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n","# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session"]},{"cell_type":"code","execution_count":2,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:36:31.348105Z","iopub.status.busy":"2021-06-21T12:36:31.347717Z","iopub.status.idle":"2021-06-21T12:36:32.003856Z","shell.execute_reply":"2021-06-21T12:36:32.002744Z","shell.execute_reply.started":"2021-06-21T12:36:31.348073Z"},"trusted":true},"outputs":[{"ename":"FileNotFoundError","evalue":"[Errno 2] No such file or directory: '/kaggle/input/tmdb-movie-metadata/tmdb_5000_movies.csv'","output_type":"error","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)","Cell \u001b[0;32mIn[2], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m movies \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39;49mread_csv(\u001b[39m'\u001b[39;49m\u001b[39m/kaggle/input/tmdb-movie-metadata/tmdb_5000_movies.csv\u001b[39;49m\u001b[39m'\u001b[39;49m)\n\u001b[1;32m 2\u001b[0m credits \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mread_csv(\u001b[39m'\u001b[39m\u001b[39m/kaggle/input/tmdb-movie-metadata/tmdb_5000_credits.csv\u001b[39m\u001b[39m'\u001b[39m) \n","File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/io/parsers/readers.py:912\u001b[0m, in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m 899\u001b[0m kwds_defaults \u001b[39m=\u001b[39m _refine_defaults_read(\n\u001b[1;32m 900\u001b[0m dialect,\n\u001b[1;32m 901\u001b[0m delimiter,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 908\u001b[0m dtype_backend\u001b[39m=\u001b[39mdtype_backend,\n\u001b[1;32m 909\u001b[0m )\n\u001b[1;32m 910\u001b[0m kwds\u001b[39m.\u001b[39mupdate(kwds_defaults)\n\u001b[0;32m--> 912\u001b[0m \u001b[39mreturn\u001b[39;00m _read(filepath_or_buffer, kwds)\n","File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/io/parsers/readers.py:577\u001b[0m, in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 574\u001b[0m _validate_names(kwds\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mnames\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m))\n\u001b[1;32m 576\u001b[0m \u001b[39m# Create the parser.\u001b[39;00m\n\u001b[0;32m--> 577\u001b[0m parser \u001b[39m=\u001b[39m TextFileReader(filepath_or_buffer, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwds)\n\u001b[1;32m 579\u001b[0m \u001b[39mif\u001b[39;00m chunksize \u001b[39mor\u001b[39;00m iterator:\n\u001b[1;32m 580\u001b[0m \u001b[39mreturn\u001b[39;00m parser\n","File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1407\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 1404\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39moptions[\u001b[39m\"\u001b[39m\u001b[39mhas_index_names\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m kwds[\u001b[39m\"\u001b[39m\u001b[39mhas_index_names\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[1;32m 1406\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles: IOHandles \u001b[39m|\u001b[39m \u001b[39mNone\u001b[39;00m \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m-> 1407\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_engine \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_make_engine(f, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mengine)\n","File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1661\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[0;34m(self, f, engine)\u001b[0m\n\u001b[1;32m 1659\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mb\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m mode:\n\u001b[1;32m 1660\u001b[0m mode \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mb\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m-> 1661\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles \u001b[39m=\u001b[39m get_handle(\n\u001b[1;32m 1662\u001b[0m f,\n\u001b[1;32m 1663\u001b[0m mode,\n\u001b[1;32m 1664\u001b[0m encoding\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mencoding\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[1;32m 1665\u001b[0m compression\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mcompression\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[1;32m 1666\u001b[0m memory_map\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mmemory_map\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mFalse\u001b[39;49;00m),\n\u001b[1;32m 1667\u001b[0m is_text\u001b[39m=\u001b[39;49mis_text,\n\u001b[1;32m 1668\u001b[0m errors\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mencoding_errors\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mstrict\u001b[39;49m\u001b[39m\"\u001b[39;49m),\n\u001b[1;32m 1669\u001b[0m storage_options\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mstorage_options\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[1;32m 1670\u001b[0m )\n\u001b[1;32m 1671\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 1672\u001b[0m f \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles\u001b[39m.\u001b[39mhandle\n","File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/io/common.py:859\u001b[0m, in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m 854\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(handle, \u001b[39mstr\u001b[39m):\n\u001b[1;32m 855\u001b[0m \u001b[39m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[1;32m 856\u001b[0m \u001b[39m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[1;32m 857\u001b[0m \u001b[39mif\u001b[39;00m ioargs\u001b[39m.\u001b[39mencoding \u001b[39mand\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mb\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m ioargs\u001b[39m.\u001b[39mmode:\n\u001b[1;32m 858\u001b[0m \u001b[39m# Encoding\u001b[39;00m\n\u001b[0;32m--> 859\u001b[0m handle \u001b[39m=\u001b[39m \u001b[39mopen\u001b[39;49m(\n\u001b[1;32m 860\u001b[0m handle,\n\u001b[1;32m 861\u001b[0m ioargs\u001b[39m.\u001b[39;49mmode,\n\u001b[1;32m 862\u001b[0m encoding\u001b[39m=\u001b[39;49mioargs\u001b[39m.\u001b[39;49mencoding,\n\u001b[1;32m 863\u001b[0m errors\u001b[39m=\u001b[39;49merrors,\n\u001b[1;32m 864\u001b[0m newline\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[1;32m 865\u001b[0m )\n\u001b[1;32m 866\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 867\u001b[0m \u001b[39m# Binary mode\u001b[39;00m\n\u001b[1;32m 868\u001b[0m handle \u001b[39m=\u001b[39m \u001b[39mopen\u001b[39m(handle, ioargs\u001b[39m.\u001b[39mmode)\n","\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/kaggle/input/tmdb-movie-metadata/tmdb_5000_movies.csv'"]}],"source":["movies = pd.read_csv('/home/oem/Downloads/archive (3) (1)/tmdb_5000_movies.csv')\n","credits = pd.read_csv('/home/oem/Downloads/archive (3) (1)/tmdb_5000_credits.csv') "]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:36:33.757778Z","iopub.status.busy":"2021-06-21T12:36:33.757346Z","iopub.status.idle":"2021-06-21T12:36:33.781729Z","shell.execute_reply":"2021-06-21T12:36:33.780738Z","shell.execute_reply.started":"2021-06-21T12:36:33.757743Z"},"trusted":true},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>budget</th>\n"," <th>genres</th>\n"," <th>homepage</th>\n"," <th>id</th>\n"," <th>keywords</th>\n"," <th>original_language</th>\n"," <th>original_title</th>\n"," <th>overview</th>\n"," <th>popularity</th>\n"," <th>production_companies</th>\n"," <th>production_countries</th>\n"," <th>release_date</th>\n"," <th>revenue</th>\n"," <th>runtime</th>\n"," <th>spoken_languages</th>\n"," <th>status</th>\n"," <th>tagline</th>\n"," <th>title</th>\n"," <th>vote_average</th>\n"," <th>vote_count</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>237000000</td>\n"," <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n"," <td>http://www.avatarmovie.com/</td>\n"," <td>19995</td>\n"," <td>[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...</td>\n"," <td>en</td>\n"," <td>Avatar</td>\n"," <td>In the 22nd century, a paraplegic Marine is di...</td>\n"," <td>150.437577</td>\n"," <td>[{\"name\": \"Ingenious Film Partners\", \"id\": 289...</td>\n"," <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n"," <td>2009-12-10</td>\n"," <td>2787965087</td>\n"," <td>162.0</td>\n"," <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...</td>\n"," <td>Released</td>\n"," <td>Enter the World of Pandora.</td>\n"," <td>Avatar</td>\n"," <td>7.2</td>\n"," <td>11800</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>300000000</td>\n"," <td>[{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...</td>\n"," <td>http://disney.go.com/disneypictures/pirates/</td>\n"," <td>285</td>\n"," <td>[{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...</td>\n"," <td>en</td>\n"," <td>Pirates of the Caribbean: At World's End</td>\n"," <td>Captain Barbossa, long believed to be dead, ha...</td>\n"," <td>139.082615</td>\n"," <td>[{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"...</td>\n"," <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n"," <td>2007-05-19</td>\n"," <td>961000000</td>\n"," <td>169.0</td>\n"," <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n"," <td>Released</td>\n"," <td>At the end of the world, the adventure begins.</td>\n"," <td>Pirates of the Caribbean: At World's End</td>\n"," <td>6.9</td>\n"," <td>4500</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>"],"text/plain":[" budget genres \\\n","0 237000000 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n","1 300000000 [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"... \n","\n"," homepage id \\\n","0 http://www.avatarmovie.com/ 19995 \n","1 http://disney.go.com/disneypictures/pirates/ 285 \n","\n"," keywords original_language \\\n","0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... en \n","1 [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na... en \n","\n"," original_title \\\n","0 Avatar \n","1 Pirates of the Caribbean: At World's End \n","\n"," overview popularity \\\n","0 In the 22nd century, a paraplegic Marine is di... 150.437577 \n","1 Captain Barbossa, long believed to be dead, ha... 139.082615 \n","\n"," production_companies \\\n","0 [{\"name\": \"Ingenious Film Partners\", \"id\": 289... \n","1 [{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"... \n","\n"," production_countries release_date revenue \\\n","0 [{\"iso_3166_1\": \"US\", \"name\": \"United States o... 2009-12-10 2787965087 \n","1 [{\"iso_3166_1\": \"US\", \"name\": \"United States o... 2007-05-19 961000000 \n","\n"," runtime spoken_languages status \\\n","0 162.0 [{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso... Released \n","1 169.0 [{\"iso_639_1\": \"en\", \"name\": \"English\"}] Released \n","\n"," tagline \\\n","0 Enter the World of Pandora. \n","1 At the end of the world, the adventure begins. \n","\n"," title vote_average vote_count \n","0 Avatar 7.2 11800 \n","1 Pirates of the Caribbean: At World's End 6.9 4500 "]},"execution_count":31,"metadata":{},"output_type":"execute_result"}],"source":["movies.head(2)"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:33:06.308311Z","iopub.status.busy":"2021-06-21T12:33:06.307921Z","iopub.status.idle":"2021-06-21T12:33:06.316052Z","shell.execute_reply":"2021-06-21T12:33:06.314903Z","shell.execute_reply.started":"2021-06-21T12:33:06.308279Z"},"trusted":true},"outputs":[{"data":{"text/plain":["(4803, 20)"]},"execution_count":4,"metadata":{},"output_type":"execute_result"}],"source":["movies.shape"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:33:08.787791Z","iopub.status.busy":"2021-06-21T12:33:08.787389Z","iopub.status.idle":"2021-06-21T12:33:08.800864Z","shell.execute_reply":"2021-06-21T12:33:08.799834Z","shell.execute_reply.started":"2021-06-21T12:33:08.787758Z"},"trusted":true},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>movie_id</th>\n"," <th>title</th>\n"," <th>cast</th>\n"," <th>crew</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>19995</td>\n"," <td>Avatar</td>\n"," <td>[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...</td>\n"," <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>285</td>\n"," <td>Pirates of the Caribbean: At World's End</td>\n"," <td>[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...</td>\n"," <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>206647</td>\n"," <td>Spectre</td>\n"," <td>[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...</td>\n"," <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>49026</td>\n"," <td>The Dark Knight Rises</td>\n"," <td>[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...</td>\n"," <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>49529</td>\n"," <td>John Carter</td>\n"," <td>[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...</td>\n"," <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>"],"text/plain":[" movie_id title \\\n","0 19995 Avatar \n","1 285 Pirates of the Caribbean: At World's End \n","2 206647 Spectre \n","3 49026 The Dark Knight Rises \n","4 49529 John Carter \n","\n"," cast \\\n","0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n","1 [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... \n","2 [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr... \n","3 [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba... \n","4 [{\"cast_id\": 5, \"character\": \"John Carter\", \"c... \n","\n"," crew \n","0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n","1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... \n","2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... \n","3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... \n","4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... "]},"execution_count":5,"metadata":{},"output_type":"execute_result"}],"source":["credits.head()"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:36:38.687005Z","iopub.status.busy":"2021-06-21T12:36:38.686521Z","iopub.status.idle":"2021-06-21T12:36:38.713343Z","shell.execute_reply":"2021-06-21T12:36:38.711939Z","shell.execute_reply.started":"2021-06-21T12:36:38.686963Z"},"trusted":true},"outputs":[],"source":["movies = movies.merge(credits,on='title')"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-19T12:48:23.470754Z","iopub.status.busy":"2021-06-19T12:48:23.470187Z","iopub.status.idle":"2021-06-19T12:48:23.5041Z","shell.execute_reply":"2021-06-19T12:48:23.503327Z","shell.execute_reply.started":"2021-06-19T12:48:23.470724Z"},"trusted":true},"outputs":[],"source":["movies.head()\n","# budget\n","# homepage\n","# id\n","# original_language\n","# original_title\n","# popularity\n","# production_comapny\n","# production_countries\n","# release-date(not sure)"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:36:41.393733Z","iopub.status.busy":"2021-06-21T12:36:41.393326Z","iopub.status.idle":"2021-06-21T12:36:41.406883Z","shell.execute_reply":"2021-06-21T12:36:41.405822Z","shell.execute_reply.started":"2021-06-21T12:36:41.393699Z"},"trusted":true},"outputs":[],"source":["movies = movies[['movie_id','title','overview','genres','keywords','cast','crew']]"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:33:39.517061Z","iopub.status.busy":"2021-06-21T12:33:39.516481Z","iopub.status.idle":"2021-06-21T12:33:39.532499Z","shell.execute_reply":"2021-06-21T12:33:39.531584Z","shell.execute_reply.started":"2021-06-21T12:33:39.517012Z"},"trusted":true},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>movie_id</th>\n"," <th>title</th>\n"," <th>overview</th>\n"," <th>genres</th>\n"," <th>keywords</th>\n"," <th>cast</th>\n"," <th>crew</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>19995</td>\n"," <td>Avatar</td>\n"," <td>In the 22nd century, a paraplegic Marine is di...</td>\n"," <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n"," <td>[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...</td>\n"," <td>[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...</td>\n"," <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>285</td>\n"," <td>Pirates of the Caribbean: At World's End</td>\n"," <td>Captain Barbossa, long believed to be dead, ha...</td>\n"," <td>[{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...</td>\n"," <td>[{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...</td>\n"," <td>[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...</td>\n"," <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>206647</td>\n"," <td>Spectre</td>\n"," <td>A cryptic message from Bond’s past sends him o...</td>\n"," <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n"," <td>[{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...</td>\n"," <td>[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...</td>\n"," <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>49026</td>\n"," <td>The Dark Knight Rises</td>\n"," <td>Following the death of District Attorney Harve...</td>\n"," <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam...</td>\n"," <td>[{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...</td>\n"," <td>[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...</td>\n"," <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>49529</td>\n"," <td>John Carter</td>\n"," <td>John Carter is a war-weary, former military ca...</td>\n"," <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n"," <td>[{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...</td>\n"," <td>[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...</td>\n"," <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>"],"text/plain":[" movie_id title \\\n","0 19995 Avatar \n","1 285 Pirates of the Caribbean: At World's End \n","2 206647 Spectre \n","3 49026 The Dark Knight Rises \n","4 49529 John Carter \n","\n"," overview \\\n","0 In the 22nd century, a paraplegic Marine is di... \n","1 Captain Barbossa, long believed to be dead, ha... \n","2 A cryptic message from Bond’s past sends him o... \n","3 Following the death of District Attorney Harve... \n","4 John Carter is a war-weary, former military ca... \n","\n"," genres \\\n","0 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n","1 [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"... \n","2 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n","3 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam... \n","4 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n","\n"," keywords \\\n","0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... \n","1 [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na... \n","2 [{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name... \n","3 [{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,... \n","4 [{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":... \n","\n"," cast \\\n","0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n","1 [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... \n","2 [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr... \n","3 [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba... \n","4 [{\"cast_id\": 5, \"character\": \"John Carter\", \"c... \n","\n"," crew \n","0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n","1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... \n","2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... \n","3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... \n","4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... "]},"execution_count":8,"metadata":{},"output_type":"execute_result"}],"source":["movies.head()"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:34:19.382856Z","iopub.status.busy":"2021-06-21T12:34:19.382331Z","iopub.status.idle":"2021-06-21T12:34:19.387416Z","shell.execute_reply":"2021-06-21T12:34:19.386451Z","shell.execute_reply.started":"2021-06-21T12:34:19.382822Z"},"trusted":true},"outputs":[],"source":["import ast"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:36:47.540453Z","iopub.status.busy":"2021-06-21T12:36:47.539904Z","iopub.status.idle":"2021-06-21T12:36:47.545014Z","shell.execute_reply":"2021-06-21T12:36:47.544243Z","shell.execute_reply.started":"2021-06-21T12:36:47.540418Z"},"trusted":true},"outputs":[],"source":["def convert(text):\n"," L = []\n"," for i in ast.literal_eval(text):\n"," L.append(i['name']) \n"," return L "]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:36:50.059538Z","iopub.status.busy":"2021-06-21T12:36:50.059006Z","iopub.status.idle":"2021-06-21T12:36:50.073095Z","shell.execute_reply":"2021-06-21T12:36:50.071577Z","shell.execute_reply.started":"2021-06-21T12:36:50.059504Z"},"trusted":true},"outputs":[],"source":["movies.dropna(inplace=True)"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:36:52.344121Z","iopub.status.busy":"2021-06-21T12:36:52.343645Z","iopub.status.idle":"2021-06-21T12:36:52.523910Z","shell.execute_reply":"2021-06-21T12:36:52.522805Z","shell.execute_reply.started":"2021-06-21T12:36:52.344082Z"},"trusted":true},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>movie_id</th>\n"," <th>title</th>\n"," <th>overview</th>\n"," <th>genres</th>\n"," <th>keywords</th>\n"," <th>cast</th>\n"," <th>crew</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>19995</td>\n"," <td>Avatar</td>\n"," <td>In the 22nd century, a paraplegic Marine is di...</td>\n"," <td>[Action, Adventure, Fantasy, Science Fiction]</td>\n"," <td>[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...</td>\n"," <td>[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...</td>\n"," <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>285</td>\n"," <td>Pirates of the Caribbean: At World's End</td>\n"," <td>Captain Barbossa, long believed to be dead, ha...</td>\n"," <td>[Adventure, Fantasy, Action]</td>\n"," <td>[{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...</td>\n"," <td>[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...</td>\n"," <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>206647</td>\n"," <td>Spectre</td>\n"," <td>A cryptic message from Bond���s past sends him o...</td>\n"," <td>[Action, Adventure, Crime]</td>\n"," <td>[{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...</td>\n"," <td>[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...</td>\n"," <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>49026</td>\n"," <td>The Dark Knight Rises</td>\n"," <td>Following the death of District Attorney Harve...</td>\n"," <td>[Action, Crime, Drama, Thriller]</td>\n"," <td>[{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...</td>\n"," <td>[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...</td>\n"," <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>49529</td>\n"," <td>John Carter</td>\n"," <td>John Carter is a war-weary, former military ca...</td>\n"," <td>[Action, Adventure, Science Fiction]</td>\n"," <td>[{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...</td>\n"," <td>[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...</td>\n"," <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>"],"text/plain":[" movie_id title \\\n","0 19995 Avatar \n","1 285 Pirates of the Caribbean: At World's End \n","2 206647 Spectre \n","3 49026 The Dark Knight Rises \n","4 49529 John Carter \n","\n"," overview \\\n","0 In the 22nd century, a paraplegic Marine is di... \n","1 Captain Barbossa, long believed to be dead, ha... \n","2 A cryptic message from Bond’s past sends him o... \n","3 Following the death of District Attorney Harve... \n","4 John Carter is a war-weary, former military ca... \n","\n"," genres \\\n","0 [Action, Adventure, Fantasy, Science Fiction] \n","1 [Adventure, Fantasy, Action] \n","2 [Action, Adventure, Crime] \n","3 [Action, Crime, Drama, Thriller] \n","4 [Action, Adventure, Science Fiction] \n","\n"," keywords \\\n","0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... \n","1 [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na... \n","2 [{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name... \n","3 [{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,... \n","4 [{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":... \n","\n"," cast \\\n","0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n","1 [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... \n","2 [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr... \n","3 [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba... \n","4 [{\"cast_id\": 5, \"character\": \"John Carter\", \"c... \n","\n"," crew \n","0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n","1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... \n","2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... \n","3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... \n","4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... "]},"execution_count":36,"metadata":{},"output_type":"execute_result"}],"source":["movies['genres'] = movies['genres'].apply(convert)\n","movies.head()"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:36:55.304439Z","iopub.status.busy":"2021-06-21T12:36:55.304070Z","iopub.status.idle":"2021-06-21T12:36:55.738525Z","shell.execute_reply":"2021-06-21T12:36:55.737123Z","shell.execute_reply.started":"2021-06-21T12:36:55.304408Z"},"trusted":true},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>movie_id</th>\n"," <th>title</th>\n"," <th>overview</th>\n"," <th>genres</th>\n"," <th>keywords</th>\n"," <th>cast</th>\n"," <th>crew</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>19995</td>\n"," <td>Avatar</td>\n"," <td>In the 22nd century, a paraplegic Marine is di...</td>\n"," <td>[Action, Adventure, Fantasy, Science Fiction]</td>\n"," <td>[culture clash, future, space war, space colon...</td>\n"," <td>[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...</td>\n"," <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>285</td>\n"," <td>Pirates of the Caribbean: At World's End</td>\n"," <td>Captain Barbossa, long believed to be dead, ha...</td>\n"," <td>[Adventure, Fantasy, Action]</td>\n"," <td>[ocean, drug abuse, exotic island, east india ...</td>\n"," <td>[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...</td>\n"," <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>206647</td>\n"," <td>Spectre</td>\n"," <td>A cryptic message from Bond’s past sends him o...</td>\n"," <td>[Action, Adventure, Crime]</td>\n"," <td>[spy, based on novel, secret agent, sequel, mi...</td>\n"," <td>[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...</td>\n"," <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>49026</td>\n"," <td>The Dark Knight Rises</td>\n"," <td>Following the death of District Attorney Harve...</td>\n"," <td>[Action, Crime, Drama, Thriller]</td>\n"," <td>[dc comics, crime fighter, terrorist, secret i...</td>\n"," <td>[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...</td>\n"," <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>49529</td>\n"," <td>John Carter</td>\n"," <td>John Carter is a war-weary, former military ca...</td>\n"," <td>[Action, Adventure, Science Fiction]</td>\n"," <td>[based on novel, mars, medallion, space travel...</td>\n"," <td>[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...</td>\n"," <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>"],"text/plain":[" movie_id title \\\n","0 19995 Avatar \n","1 285 Pirates of the Caribbean: At World's End \n","2 206647 Spectre \n","3 49026 The Dark Knight Rises \n","4 49529 John Carter \n","\n"," overview \\\n","0 In the 22nd century, a paraplegic Marine is di... \n","1 Captain Barbossa, long believed to be dead, ha... \n","2 A cryptic message from Bond’s past sends him o... \n","3 Following the death of District Attorney Harve... \n","4 John Carter is a war-weary, former military ca... \n","\n"," genres \\\n","0 [Action, Adventure, Fantasy, Science Fiction] \n","1 [Adventure, Fantasy, Action] \n","2 [Action, Adventure, Crime] \n","3 [Action, Crime, Drama, Thriller] \n","4 [Action, Adventure, Science Fiction] \n","\n"," keywords \\\n","0 [culture clash, future, space war, space colon... \n","1 [ocean, drug abuse, exotic island, east india ... \n","2 [spy, based on novel, secret agent, sequel, mi... \n","3 [dc comics, crime fighter, terrorist, secret i... \n","4 [based on novel, mars, medallion, space travel... \n","\n"," cast \\\n","0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n","1 [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... \n","2 [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr... \n","3 [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba... \n","4 [{\"cast_id\": 5, \"character\": \"John Carter\", \"c... \n","\n"," crew \n","0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n","1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... \n","2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... \n","3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... \n","4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... "]},"execution_count":37,"metadata":{},"output_type":"execute_result"}],"source":["movies['keywords'] = movies['keywords'].apply(convert)\n","movies.head()"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-19T13:07:21.572473Z","iopub.status.busy":"2021-06-19T13:07:21.572154Z","iopub.status.idle":"2021-06-19T13:07:21.578686Z","shell.execute_reply":"2021-06-19T13:07:21.577661Z","shell.execute_reply.started":"2021-06-19T13:07:21.572446Z"},"trusted":true},"outputs":[],"source":["import ast\n","ast.literal_eval('[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"name\": \"Fantasy\"}, {\"id\": 878, \"name\": \"Science Fiction\"}]')"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:36:59.511226Z","iopub.status.busy":"2021-06-21T12:36:59.510859Z","iopub.status.idle":"2021-06-21T12:36:59.517043Z","shell.execute_reply":"2021-06-21T12:36:59.515878Z","shell.execute_reply.started":"2021-06-21T12:36:59.511192Z"},"trusted":true},"outputs":[],"source":["def convert3(text):\n"," L = []\n"," counter = 0\n"," for i in ast.literal_eval(text):\n"," if counter < 3:\n"," L.append(i['name'])\n"," counter+=1\n"," return L "]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:37:00.829409Z","iopub.status.busy":"2021-06-21T12:37:00.828661Z","iopub.status.idle":"2021-06-21T12:37:04.117090Z","shell.execute_reply":"2021-06-21T12:37:04.115822Z","shell.execute_reply.started":"2021-06-21T12:37:00.829355Z"},"trusted":true},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>movie_id</th>\n"," <th>title</th>\n"," <th>overview</th>\n"," <th>genres</th>\n"," <th>keywords</th>\n"," <th>cast</th>\n"," <th>crew</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>19995</td>\n"," <td>Avatar</td>\n"," <td>In the 22nd century, a paraplegic Marine is di...</td>\n"," <td>[Action, Adventure, Fantasy, Science Fiction]</td>\n"," <td>[culture clash, future, space war, space colon...</td>\n"," <td>[Sam Worthington, Zoe Saldana, Sigourney Weave...</td>\n"," <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>285</td>\n"," <td>Pirates of the Caribbean: At World's End</td>\n"," <td>Captain Barbossa, long believed to be dead, ha...</td>\n"," <td>[Adventure, Fantasy, Action]</td>\n"," <td>[ocean, drug abuse, exotic island, east india ...</td>\n"," <td>[Johnny Depp, Orlando Bloom, Keira Knightley, ...</td>\n"," <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>206647</td>\n"," <td>Spectre</td>\n"," <td>A cryptic message from Bond’s past sends him o...</td>\n"," <td>[Action, Adventure, Crime]</td>\n"," <td>[spy, based on novel, secret agent, sequel, mi...</td>\n"," <td>[Daniel Craig, Christoph Waltz, Léa Seydoux, R...</td>\n"," <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>49026</td>\n"," <td>The Dark Knight Rises</td>\n"," <td>Following the death of District Attorney Harve...</td>\n"," <td>[Action, Crime, Drama, Thriller]</td>\n"," <td>[dc comics, crime fighter, terrorist, secret i...</td>\n"," <td>[Christian Bale, Michael Caine, Gary Oldman, A...</td>\n"," <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>49529</td>\n"," <td>John Carter</td>\n"," <td>John Carter is a war-weary, former military ca...</td>\n"," <td>[Action, Adventure, Science Fiction]</td>\n"," <td>[based on novel, mars, medallion, space travel...</td>\n"," <td>[Taylor Kitsch, Lynn Collins, Samantha Morton,...</td>\n"," <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>"],"text/plain":[" movie_id title \\\n","0 19995 Avatar \n","1 285 Pirates of the Caribbean: At World's End \n","2 206647 Spectre \n","3 49026 The Dark Knight Rises \n","4 49529 John Carter \n","\n"," overview \\\n","0 In the 22nd century, a paraplegic Marine is di... \n","1 Captain Barbossa, long believed to be dead, ha... \n","2 A cryptic message from Bond’s past sends him o... \n","3 Following the death of District Attorney Harve... \n","4 John Carter is a war-weary, former military ca... \n","\n"," genres \\\n","0 [Action, Adventure, Fantasy, Science Fiction] \n","1 [Adventure, Fantasy, Action] \n","2 [Action, Adventure, Crime] \n","3 [Action, Crime, Drama, Thriller] \n","4 [Action, Adventure, Science Fiction] \n","\n"," keywords \\\n","0 [culture clash, future, space war, space colon... \n","1 [ocean, drug abuse, exotic island, east india ... \n","2 [spy, based on novel, secret agent, sequel, mi... \n","3 [dc comics, crime fighter, terrorist, secret i... \n","4 [based on novel, mars, medallion, space travel... \n","\n"," cast \\\n","0 [Sam Worthington, Zoe Saldana, Sigourney Weave... \n","1 [Johnny Depp, Orlando Bloom, Keira Knightley, ... \n","2 [Daniel Craig, Christoph Waltz, Léa Seydoux, R... \n","3 [Christian Bale, Michael Caine, Gary Oldman, A... \n","4 [Taylor Kitsch, Lynn Collins, Samantha Morton,... \n","\n"," crew \n","0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n","1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... \n","2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... \n","3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... \n","4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... "]},"execution_count":39,"metadata":{},"output_type":"execute_result"}],"source":["movies['cast'] = movies['cast'].apply(convert)\n","movies.head()"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:37:07.352886Z","iopub.status.busy":"2021-06-21T12:37:07.352464Z","iopub.status.idle":"2021-06-21T12:37:07.367808Z","shell.execute_reply":"2021-06-21T12:37:07.366250Z","shell.execute_reply.started":"2021-06-21T12:37:07.352854Z"},"trusted":true},"outputs":[],"source":["movies['cast'] = movies['cast'].apply(lambda x:x[0:3])"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:37:09.763317Z","iopub.status.busy":"2021-06-21T12:37:09.762909Z","iopub.status.idle":"2021-06-21T12:37:09.770917Z","shell.execute_reply":"2021-06-21T12:37:09.770002Z","shell.execute_reply.started":"2021-06-21T12:37:09.763278Z"},"trusted":true},"outputs":[],"source":["def fetch_director(text):\n"," L = []\n"," for i in ast.literal_eval(text):\n"," if i['job'] == 'Director':\n"," L.append(i['name'])\n"," return L "]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:37:12.190468Z","iopub.status.busy":"2021-06-21T12:37:12.189921Z","iopub.status.idle":"2021-06-21T12:37:15.825662Z","shell.execute_reply":"2021-06-21T12:37:15.824562Z","shell.execute_reply.started":"2021-06-21T12:37:12.190407Z"},"trusted":true},"outputs":[],"source":["movies['crew'] = movies['crew'].apply(fetch_director)"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:34:54.994779Z","iopub.status.busy":"2021-06-21T12:34:54.994397Z","iopub.status.idle":"2021-06-21T12:34:55.019276Z","shell.execute_reply":"2021-06-21T12:34:55.017718Z","shell.execute_reply.started":"2021-06-21T12:34:54.994738Z"},"trusted":true},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>movie_id</th>\n"," <th>title</th>\n"," <th>overview</th>\n"," <th>genres</th>\n"," <th>keywords</th>\n"," <th>cast</th>\n"," <th>crew</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>1183</th>\n"," <td>9889</td>\n"," <td>Shallow Hal</td>\n"," <td>A shallow man falls in love with a 300 pound w...</td>\n"," <td>[Comedy, Romance]</td>\n"," <td>[overweight, beauty, hypnosis, overweight man,...</td>\n"," <td>[Gwyneth Paltrow, Jack Black, Jason Alexander]</td>\n"," <td>[Bobby Farrelly, Peter Farrelly]</td>\n"," </tr>\n"," <tr>\n"," <th>4215</th>\n"," <td>15976</td>\n"," <td>The Bubble</td>\n"," <td>The movie follows a group of young friends in ...</td>\n"," <td>[Drama, Romance]</td>\n"," <td>[gay]</td>\n"," <td>[Ohad Knoller, Yousef Sweid, Daniella Wircer]</td>\n"," <td>[Eytan Fox]</td>\n"," </tr>\n"," <tr>\n"," <th>1571</th>\n"," <td>22947</td>\n"," <td>Up in the Air</td>\n"," <td>George Clooney plays the dry cynical character...</td>\n"," <td>[Drama, Romance]</td>\n"," <td>[suitcase, business, omaha, on the road, downs...</td>\n"," <td>[George Clooney, Vera Farmiga, Anna Kendrick]</td>\n"," <td>[Jason Reitman]</td>\n"," </tr>\n"," <tr>\n"," <th>205</th>\n"," <td>58574</td>\n"," <td>Sherlock Holmes: A Game of Shadows</td>\n"," <td>There is a new criminal mastermind at large (P...</td>\n"," <td>[Adventure, Action, Crime, Mystery]</td>\n"," <td>[detective inspector, steampunk, criminal mast...</td>\n"," <td>[Robert Downey Jr., Jude Law, Jared Harris]</td>\n"," <td>[Guy Ritchie]</td>\n"," </tr>\n"," <tr>\n"," <th>352</th>\n"," <td>10674</td>\n"," <td>Mulan</td>\n"," <td>A tomboyish girl disguises herself as a young ...</td>\n"," <td>[Animation, Family, Adventure]</td>\n"," <td>[homeland, musical, training, daughter, cricke...</td>\n"," <td>[Eddie Murphy, Jackie Chan, Ming-Na Wen]</td>\n"," <td>[Tony Bancroft, Barry Cook]</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>"],"text/plain":[" movie_id title \\\n","1183 9889 Shallow Hal \n","4215 15976 The Bubble \n","1571 22947 Up in the Air \n","205 58574 Sherlock Holmes: A Game of Shadows \n","352 10674 Mulan \n","\n"," overview \\\n","1183 A shallow man falls in love with a 300 pound w... \n","4215 The movie follows a group of young friends in ... \n","1571 George Clooney plays the dry cynical character... \n","205 There is a new criminal mastermind at large (P... \n","352 A tomboyish girl disguises herself as a young ... \n","\n"," genres \\\n","1183 [Comedy, Romance] \n","4215 [Drama, Romance] \n","1571 [Drama, Romance] \n","205 [Adventure, Action, Crime, Mystery] \n","352 [Animation, Family, Adventure] \n","\n"," keywords \\\n","1183 [overweight, beauty, hypnosis, overweight man,... \n","4215 [gay] \n","1571 [suitcase, business, omaha, on the road, downs... \n","205 [detective inspector, steampunk, criminal mast... \n","352 [homeland, musical, training, daughter, cricke... \n","\n"," cast \\\n","1183 [Gwyneth Paltrow, Jack Black, Jason Alexander] \n","4215 [Ohad Knoller, Yousef Sweid, Daniella Wircer] \n","1571 [George Clooney, Vera Farmiga, Anna Kendrick] \n","205 [Robert Downey Jr., Jude Law, Jared Harris] \n","352 [Eddie Murphy, Jackie Chan, Ming-Na Wen] \n","\n"," crew \n","1183 [Bobby Farrelly, Peter Farrelly] \n","4215 [Eytan Fox] \n","1571 [Jason Reitman] \n","205 [Guy Ritchie] \n","352 [Tony Bancroft, Barry Cook] "]},"execution_count":22,"metadata":{},"output_type":"execute_result"}],"source":["#movies['overview'] = movies['overview'].apply(lambda x:x.split())\n","movies.sample(5)"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:37:20.073320Z","iopub.status.busy":"2021-06-21T12:37:20.072749Z","iopub.status.idle":"2021-06-21T12:37:20.079118Z","shell.execute_reply":"2021-06-21T12:37:20.077997Z","shell.execute_reply.started":"2021-06-21T12:37:20.073270Z"},"trusted":true},"outputs":[],"source":["def collapse(L):\n"," L1 = []\n"," for i in L:\n"," L1.append(i.replace(\" \",\"\"))\n"," return L1"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:37:21.283329Z","iopub.status.busy":"2021-06-21T12:37:21.282768Z","iopub.status.idle":"2021-06-21T12:37:21.486755Z","shell.execute_reply":"2021-06-21T12:37:21.485878Z","shell.execute_reply.started":"2021-06-21T12:37:21.283292Z"},"trusted":true},"outputs":[],"source":["movies['cast'] = movies['cast'].apply(collapse)\n","movies['crew'] = movies['crew'].apply(collapse)\n","movies['genres'] = movies['genres'].apply(collapse)\n","movies['keywords'] = movies['keywords'].apply(collapse)"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:35:23.278589Z","iopub.status.busy":"2021-06-21T12:35:23.278025Z","iopub.status.idle":"2021-06-21T12:35:23.311346Z","shell.execute_reply":"2021-06-21T12:35:23.309971Z","shell.execute_reply.started":"2021-06-21T12:35:23.278539Z"},"trusted":true},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>movie_id</th>\n"," <th>title</th>\n"," <th>overview</th>\n"," <th>genres</th>\n"," <th>keywords</th>\n"," <th>cast</th>\n"," <th>crew</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>19995</td>\n"," <td>Avatar</td>\n"," <td>In the 22nd century, a paraplegic Marine is di...</td>\n"," <td>[Action, Adventure, Fantasy, ScienceFiction]</td>\n"," <td>[cultureclash, future, spacewar, spacecolony, ...</td>\n"," <td>[SamWorthington, ZoeSaldana, SigourneyWeaver]</td>\n"," <td>[JamesCameron]</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>285</td>\n"," <td>Pirates of the Caribbean: At World's End</td>\n"," <td>Captain Barbossa, long believed to be dead, ha...</td>\n"," <td>[Adventure, Fantasy, Action]</td>\n"," <td>[ocean, drugabuse, exoticisland, eastindiatrad...</td>\n"," <td>[JohnnyDepp, OrlandoBloom, KeiraKnightley]</td>\n"," <td>[GoreVerbinski]</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>206647</td>\n"," <td>Spectre</td>\n"," <td>A cryptic message from Bond’s past sends him o...</td>\n"," <td>[Action, Adventure, Crime]</td>\n"," <td>[spy, basedonnovel, secretagent, sequel, mi6, ...</td>\n"," <td>[DanielCraig, ChristophWaltz, LéaSeydoux]</td>\n"," <td>[SamMendes]</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>49026</td>\n"," <td>The Dark Knight Rises</td>\n"," <td>Following the death of District Attorney Harve...</td>\n"," <td>[Action, Crime, Drama, Thriller]</td>\n"," <td>[dccomics, crimefighter, terrorist, secretiden...</td>\n"," <td>[ChristianBale, MichaelCaine, GaryOldman]</td>\n"," <td>[ChristopherNolan]</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>49529</td>\n"," <td>John Carter</td>\n"," <td>John Carter is a war-weary, former military ca...</td>\n"," <td>[Action, Adventure, ScienceFiction]</td>\n"," <td>[basedonnovel, mars, medallion, spacetravel, p...</td>\n"," <td>[TaylorKitsch, LynnCollins, SamanthaMorton]</td>\n"," <td>[AndrewStanton]</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>"],"text/plain":[" movie_id title \\\n","0 19995 Avatar \n","1 285 Pirates of the Caribbean: At World's End \n","2 206647 Spectre \n","3 49026 The Dark Knight Rises \n","4 49529 John Carter \n","\n"," overview \\\n","0 In the 22nd century, a paraplegic Marine is di... \n","1 Captain Barbossa, long believed to be dead, ha... \n","2 A cryptic message from Bond’s past sends him o... \n","3 Following the death of District Attorney Harve... \n","4 John Carter is a war-weary, former military ca... \n","\n"," genres \\\n","0 [Action, Adventure, Fantasy, ScienceFiction] \n","1 [Adventure, Fantasy, Action] \n","2 [Action, Adventure, Crime] \n","3 [Action, Crime, Drama, Thriller] \n","4 [Action, Adventure, ScienceFiction] \n","\n"," keywords \\\n","0 [cultureclash, future, spacewar, spacecolony, ... \n","1 [ocean, drugabuse, exoticisland, eastindiatrad... \n","2 [spy, basedonnovel, secretagent, sequel, mi6, ... \n","3 [dccomics, crimefighter, terrorist, secretiden... \n","4 [basedonnovel, mars, medallion, spacetravel, p... \n","\n"," cast crew \n","0 [SamWorthington, ZoeSaldana, SigourneyWeaver] [JamesCameron] \n","1 [JohnnyDepp, OrlandoBloom, KeiraKnightley] [GoreVerbinski] \n","2 [DanielCraig, ChristophWaltz, LéaSeydoux] [SamMendes] \n","3 [ChristianBale, MichaelCaine, GaryOldman] [ChristopherNolan] \n","4 [TaylorKitsch, LynnCollins, SamanthaMorton] [AndrewStanton] "]},"execution_count":26,"metadata":{},"output_type":"execute_result"}],"source":["movies.head()"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:37:31.936583Z","iopub.status.busy":"2021-06-21T12:37:31.936003Z","iopub.status.idle":"2021-06-21T12:37:31.975155Z","shell.execute_reply":"2021-06-21T12:37:31.973928Z","shell.execute_reply.started":"2021-06-21T12:37:31.936546Z"},"trusted":true},"outputs":[],"source":["movies['overview'] = movies['overview'].apply(lambda x:x.split())"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:37:34.466349Z","iopub.status.busy":"2021-06-21T12:37:34.465925Z","iopub.status.idle":"2021-06-21T12:37:34.572742Z","shell.execute_reply":"2021-06-21T12:37:34.571676Z","shell.execute_reply.started":"2021-06-21T12:37:34.466313Z"},"trusted":true},"outputs":[],"source":["movies['tags'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:37:40.975174Z","iopub.status.busy":"2021-06-21T12:37:40.974644Z","iopub.status.idle":"2021-06-21T12:37:40.981843Z","shell.execute_reply":"2021-06-21T12:37:40.981059Z","shell.execute_reply.started":"2021-06-21T12:37:40.975140Z"},"trusted":true},"outputs":[],"source":["new = movies.drop(columns=['overview','genres','keywords','cast','crew'])\n","#new.head()"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:38:00.131473Z","iopub.status.busy":"2021-06-21T12:38:00.130879Z","iopub.status.idle":"2021-06-21T12:38:00.160438Z","shell.execute_reply":"2021-06-21T12:38:00.159476Z","shell.execute_reply.started":"2021-06-21T12:38:00.131439Z"},"trusted":true},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>movie_id</th>\n"," <th>title</th>\n"," <th>tags</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>19995</td>\n"," <td>Avatar</td>\n"," <td>In the 22nd century, a paraplegic Marine is di...</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>285</td>\n"," <td>Pirates of the Caribbean: At World's End</td>\n"," <td>Captain Barbossa, long believed to be dead, ha...</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>206647</td>\n"," <td>Spectre</td>\n"," <td>A cryptic message from Bond’s past sends him o...</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>49026</td>\n"," <td>The Dark Knight Rises</td>\n"," <td>Following the death of District Attorney Harve...</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>49529</td>\n"," <td>John Carter</td>\n"," <td>John Carter is a war-weary, former military ca...</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>"],"text/plain":[" movie_id title \\\n","0 19995 Avatar \n","1 285 Pirates of the Caribbean: At World's End \n","2 206647 Spectre \n","3 49026 The Dark Knight Rises \n","4 49529 John Carter \n","\n"," tags \n","0 In the 22nd century, a paraplegic Marine is di... \n","1 Captain Barbossa, long believed to be dead, ha... \n","2 A cryptic message from Bond’s past sends him o... \n","3 Following the death of District Attorney Harve... \n","4 John Carter is a war-weary, former military ca... "]},"execution_count":48,"metadata":{},"output_type":"execute_result"}],"source":["new['tags'] = new['tags'].apply(lambda x: \" \".join(x))\n","new.head()"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:38:06.545110Z","iopub.status.busy":"2021-06-21T12:38:06.544599Z","iopub.status.idle":"2021-06-21T12:38:07.488307Z","shell.execute_reply":"2021-06-21T12:38:07.487238Z","shell.execute_reply.started":"2021-06-21T12:38:06.545079Z"},"trusted":true},"outputs":[],"source":["from sklearn.feature_extraction.text import CountVectorizer\n","cv = CountVectorizer(max_features=5000,stop_words='english')\n"," "]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:38:10.164314Z","iopub.status.busy":"2021-06-21T12:38:10.163889Z","iopub.status.idle":"2021-06-21T12:38:10.821200Z","shell.execute_reply":"2021-06-21T12:38:10.820175Z","shell.execute_reply.started":"2021-06-21T12:38:10.164279Z"},"trusted":true},"outputs":[],"source":["vector = cv.fit_transform(new['tags']).toarray()"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-19T14:00:41.894747Z","iopub.status.busy":"2021-06-19T14:00:41.894223Z","iopub.status.idle":"2021-06-19T14:00:41.900786Z","shell.execute_reply":"2021-06-19T14:00:41.899989Z","shell.execute_reply.started":"2021-06-19T14:00:41.894699Z"},"trusted":true},"outputs":[],"source":["vector.shape"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:38:13.339451Z","iopub.status.busy":"2021-06-21T12:38:13.339041Z","iopub.status.idle":"2021-06-21T12:38:13.390575Z","shell.execute_reply":"2021-06-21T12:38:13.389373Z","shell.execute_reply.started":"2021-06-21T12:38:13.339412Z"},"trusted":true},"outputs":[],"source":["from sklearn.metrics.pairwise import cosine_similarity"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:38:15.770001Z","iopub.status.busy":"2021-06-21T12:38:15.769495Z","iopub.status.idle":"2021-06-21T12:38:18.170463Z","shell.execute_reply":"2021-06-21T12:38:18.169319Z","shell.execute_reply.started":"2021-06-21T12:38:15.769960Z"},"trusted":true},"outputs":[],"source":["similarity = cosine_similarity(vector)"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-19T14:06:45.473087Z","iopub.status.busy":"2021-06-19T14:06:45.472777Z","iopub.status.idle":"2021-06-19T14:06:45.479647Z","shell.execute_reply":"2021-06-19T14:06:45.478831Z","shell.execute_reply.started":"2021-06-19T14:06:45.473061Z"},"trusted":true},"outputs":[],"source":["similarity"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-19T14:08:46.716222Z","iopub.status.busy":"2021-06-19T14:08:46.7159Z","iopub.status.idle":"2021-06-19T14:08:46.7239Z","shell.execute_reply":"2021-06-19T14:08:46.722946Z","shell.execute_reply.started":"2021-06-19T14:08:46.716196Z"},"trusted":true},"outputs":[],"source":["new[new['title'] == 'The Lego Movie'].index[0]"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:38:22.542900Z","iopub.status.busy":"2021-06-21T12:38:22.542487Z","iopub.status.idle":"2021-06-21T12:38:22.549786Z","shell.execute_reply":"2021-06-21T12:38:22.548271Z","shell.execute_reply.started":"2021-06-21T12:38:22.542867Z"},"trusted":true},"outputs":[],"source":["def recommend(movie):\n"," index = new[new['title'] == movie].index[0]\n"," distances = sorted(list(enumerate(similarity[index])),reverse=True,key = lambda x: x[1])\n"," for i in distances[1:6]:\n"," print(new.iloc[i[0]].title)\n"," \n"," "]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:38:24.914650Z","iopub.status.busy":"2021-06-21T12:38:24.914258Z","iopub.status.idle":"2021-06-21T12:38:24.930018Z","shell.execute_reply":"2021-06-21T12:38:24.928876Z","shell.execute_reply.started":"2021-06-21T12:38:24.914616Z"},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["Gandhi, My Father\n","The Wind That Shakes the Barley\n","A Passage to India\n","Guiana 1838\n","Ramanujan\n"]}],"source":["recommend('Gandhi')"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:39:59.606924Z","iopub.status.busy":"2021-06-21T12:39:59.606475Z","iopub.status.idle":"2021-06-21T12:39:59.612177Z","shell.execute_reply":"2021-06-21T12:39:59.610858Z","shell.execute_reply.started":"2021-06-21T12:39:59.606890Z"},"trusted":true},"outputs":[],"source":["import pickle"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2021-06-21T12:40:53.373581Z","iopub.status.busy":"2021-06-21T12:40:53.373186Z","iopub.status.idle":"2021-06-21T12:40:53.784869Z","shell.execute_reply":"2021-06-21T12:40:53.783635Z","shell.execute_reply.started":"2021-06-21T12:40:53.373547Z"},"trusted":true},"outputs":[],"source":["pickle.dump(new,open('movie_list.pkl','wb'))\n","pickle.dump(similarity,open('similarity.pkl','wb'))"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":[]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.12"},"vscode":{"interpreter":{"hash":"97cc609b13305c559618ec78a438abc56230b9381f827f22d070313b9a1f3777"}}},"nbformat":4,"nbformat_minor":4}
requirements.txt ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ altair==5.0.1
2
+ attrs==23.1.0
3
+ blinker==1.6.2
4
+ cachetools==5.3.1
5
+ certifi==2023.5.7
6
+ charset-normalizer==3.2.0
7
+ click==8.1.6
8
+ decorator==5.1.1
9
+ gitdb==4.0.10
10
+ GitPython==3.1.32
11
+ idna==3.4
12
+ importlib-metadata==6.8.0
13
+ Jinja2==3.1.2
14
+ jsonschema==4.18.4
15
+ jsonschema-specifications==2023.7.1
16
+ markdown-it-py==3.0.0
17
+ MarkupSafe==2.1.3
18
+ mdurl==0.1.2
19
+ numpy==1.25.1
20
+ packaging==23.1
21
+ pandas==2.0.3
22
+ Pillow==9.5.0
23
+ protobuf==4.23.4
24
+ pyarrow==12.0.1
25
+ pydeck==0.8.0
26
+ Pygments==2.15.1
27
+ Pympler==1.0.1
28
+ python-dateutil==2.8.2
29
+ pytz==2023.3
30
+ pytz-deprecation-shim==0.1.0.post0
31
+ referencing==0.30.0
32
+ requests==2.31.0
33
+ rich==13.4.2
34
+ rpds-py==0.9.2
35
+ six==1.16.0
36
+ smmap==5.0.0
37
+ streamlit==1.25.0
38
+ tenacity==8.2.2
39
+ toml==0.10.2
40
+ toolz==0.12.0
41
+ tornado==6.3.2
42
+ typing_extensions==4.7.1
43
+ tzdata==2023.3
44
+ tzlocal==4.3.1
45
+ urllib3==2.0.4
46
+ validators==0.20.0
47
+ watchdog==3.0.0
48
+ zipp==3.16.2
similarity.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25a9e28d112e4e4bdf3a707790ac03588ec19cc8ef1f0bdc9978825f5cf89c01
3
+ size 184781251