laxminarasimha6 commited on
Commit
bcf31c9
1 Parent(s): f6672ae

Upload 5 files

Browse files
Files changed (5) hide show
  1. .env +1 -0
  2. app.py +78 -0
  3. courses.db +0 -0
  4. main.py +76 -0
  5. requirements.txt +7 -0
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ GOOGLE_API_KEY = "AIzaSyDuDsrNUG3OnOawsxaop-pYEjXFlCXUfws"
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import sqlite3
5
+ from dotenv import load_dotenv
6
+
7
+ # Load environment variables
8
+ load_dotenv()
9
+
10
+
11
+ # Database setup
12
+ def create_database():
13
+ conn = sqlite3.connect('courses.db')
14
+ c = conn.cursor()
15
+ # Create table with an additional column 'price' to indicate free or paid
16
+ c.execute('''CREATE TABLE IF NOT EXISTS courses
17
+ (id INTEGER PRIMARY KEY, title TEXT, description TEXT, price TEXT)''')
18
+ conn.commit()
19
+ conn.close()
20
+
21
+
22
+ # Web scraping function to get course data from a specific page
23
+ def scrape_courses_from_page(page_number):
24
+ url = f"https://courses.analyticsvidhya.com/collections/courses?page={page_number}"
25
+ response = requests.get(url)
26
+ soup = BeautifulSoup(response.text, 'html.parser')
27
+
28
+ courses = []
29
+
30
+ # Find all course items from the products__list
31
+ course_items = soup.find_all('li', class_='products__list-item')
32
+
33
+ for course_item in course_items:
34
+ # Extract the course title
35
+ title_tag = course_item.find('h3')
36
+ title = title_tag.text.strip() if title_tag else 'No title available'
37
+
38
+ # Extract course price
39
+ price_tag = course_item.find('span', class_='course-card__price')
40
+ price = price_tag.text.strip() if price_tag else "Price not available"
41
+
42
+ # Description is not always explicitly provided, using course body for more details
43
+ description_tag = course_item.find('h4')
44
+ description = description_tag.text.strip() if description_tag else 'No description available'
45
+
46
+ # Append course details (title, description, price)
47
+ courses.append((title, description, price))
48
+
49
+ return courses
50
+
51
+
52
+ # Scrape all pages (total 8 pages) and insert data into the database
53
+ def scrape_all_pages():
54
+ all_courses = []
55
+
56
+ # Loop through pages 1 to 8
57
+ for page in range(1, 9):
58
+ print(f"Scraping page {page}...")
59
+ courses = scrape_courses_from_page(page)
60
+ all_courses.extend(courses)
61
+
62
+ return all_courses
63
+
64
+
65
+ # Insert scraped data into the database
66
+ def insert_data_to_db(courses):
67
+ conn = sqlite3.connect('courses.db')
68
+ c = conn.cursor()
69
+ c.executemany('INSERT INTO courses (title, description, price) VALUES (?, ?, ?)', courses)
70
+ conn.commit()
71
+ conn.close()
72
+
73
+
74
+ if __name__ == "__main__":
75
+ create_database()
76
+ all_courses = scrape_all_pages()
77
+ insert_data_to_db(all_courses)
78
+ print(f"Data from all pages has been successfully scraped and inserted into the database.")
courses.db ADDED
Binary file (20.5 kB). View file
 
main.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sqlite3
3
+ import streamlit as st
4
+ import google.generativeai as genai
5
+ from dotenv import load_dotenv
6
+
7
+ # Load environment variables from .env
8
+ load_dotenv()
9
+
10
+ # Set up the Google API key for Gemini
11
+ api_key = os.getenv("GOOGLE_API_KEY")
12
+ if api_key is None:
13
+ st.error("GOOGLE_API_KEY not found in environment variables. Please check your .env file.")
14
+ else:
15
+ # Configure Google Generative AI API
16
+ genai.configure(api_key=api_key)
17
+
18
+
19
+ # Function to fetch all courses from the SQLite database
20
+ def fetch_all_courses():
21
+ conn = sqlite3.connect('courses.db')
22
+ cur = conn.cursor()
23
+ cur.execute("SELECT title, description, price FROM courses")
24
+ rows = cur.fetchall()
25
+ conn.close()
26
+ return rows
27
+
28
+
29
+ # Function to generate a response using Google Generative AI based on user prompt and available courses
30
+ def generate_response(prompt, courses):
31
+ try:
32
+ # Prepare a detailed context prompt for the LLM
33
+ course_details = "\n".join(
34
+ [f"Title: {course[0]}, Description: {course[1]}, Price: {course[2]}" for course in courses])
35
+
36
+ genai_prompt = f"""
37
+ You are an expert assistant tasked with finding relevant courses based on user queries.
38
+ Below are details of available courses:
39
+
40
+ {course_details}
41
+
42
+ Based on this information, respond to the user's query in the most relevant way:
43
+ {prompt}
44
+ """
45
+
46
+ # Generate a response using Google Generative AI
47
+ model = genai.GenerativeModel('gemini-pro')
48
+ response = model.generate_content([genai_prompt, prompt])
49
+ return response.text.strip() # Return the natural language response
50
+ except Exception as e:
51
+ st.error(f"Error generating a response: {e}")
52
+ return None
53
+
54
+
55
+ # Streamlit interface
56
+ st.set_page_config(page_title="Smart Search for Courses")
57
+ st.header("Find Relevant Courses on Analytics Vidhya")
58
+
59
+ # User prompt input
60
+ user_query = st.text_input("Enter your search query (e.g., 'Show me all free courses on machine learning'):")
61
+
62
+ submit = st.button("Search")
63
+
64
+ # Fetch all courses from the database
65
+ courses = fetch_all_courses()
66
+
67
+ # If user submits the query
68
+ if submit and user_query:
69
+ # Generate a response from Google Generative AI
70
+ response = generate_response(user_query, courses)
71
+
72
+ if response:
73
+ st.subheader("Search Results:")
74
+ st.write(response)
75
+ else:
76
+ st.write("Could not generate a response. Please try again.")
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ requests
2
+ beautifulsoup4
3
+ gradio
4
+ streamlit
5
+ sqlite3
6
+ python-dotenv
7
+ openai