Spaces:
Build error
Build error
laxminarasimha6
commited on
Commit
•
bcf31c9
1
Parent(s):
f6672ae
Upload 5 files
Browse files- .env +1 -0
- app.py +78 -0
- courses.db +0 -0
- main.py +76 -0
- requirements.txt +7 -0
.env
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
GOOGLE_API_KEY = "AIzaSyDuDsrNUG3OnOawsxaop-pYEjXFlCXUfws"
|
app.py
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import requests
|
3 |
+
from bs4 import BeautifulSoup
|
4 |
+
import sqlite3
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
|
7 |
+
# Load environment variables
|
8 |
+
load_dotenv()
|
9 |
+
|
10 |
+
|
11 |
+
# Database setup
|
12 |
+
def create_database():
|
13 |
+
conn = sqlite3.connect('courses.db')
|
14 |
+
c = conn.cursor()
|
15 |
+
# Create table with an additional column 'price' to indicate free or paid
|
16 |
+
c.execute('''CREATE TABLE IF NOT EXISTS courses
|
17 |
+
(id INTEGER PRIMARY KEY, title TEXT, description TEXT, price TEXT)''')
|
18 |
+
conn.commit()
|
19 |
+
conn.close()
|
20 |
+
|
21 |
+
|
22 |
+
# Web scraping function to get course data from a specific page
|
23 |
+
def scrape_courses_from_page(page_number):
|
24 |
+
url = f"https://courses.analyticsvidhya.com/collections/courses?page={page_number}"
|
25 |
+
response = requests.get(url)
|
26 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
27 |
+
|
28 |
+
courses = []
|
29 |
+
|
30 |
+
# Find all course items from the products__list
|
31 |
+
course_items = soup.find_all('li', class_='products__list-item')
|
32 |
+
|
33 |
+
for course_item in course_items:
|
34 |
+
# Extract the course title
|
35 |
+
title_tag = course_item.find('h3')
|
36 |
+
title = title_tag.text.strip() if title_tag else 'No title available'
|
37 |
+
|
38 |
+
# Extract course price
|
39 |
+
price_tag = course_item.find('span', class_='course-card__price')
|
40 |
+
price = price_tag.text.strip() if price_tag else "Price not available"
|
41 |
+
|
42 |
+
# Description is not always explicitly provided, using course body for more details
|
43 |
+
description_tag = course_item.find('h4')
|
44 |
+
description = description_tag.text.strip() if description_tag else 'No description available'
|
45 |
+
|
46 |
+
# Append course details (title, description, price)
|
47 |
+
courses.append((title, description, price))
|
48 |
+
|
49 |
+
return courses
|
50 |
+
|
51 |
+
|
52 |
+
# Scrape all pages (total 8 pages) and insert data into the database
|
53 |
+
def scrape_all_pages():
|
54 |
+
all_courses = []
|
55 |
+
|
56 |
+
# Loop through pages 1 to 8
|
57 |
+
for page in range(1, 9):
|
58 |
+
print(f"Scraping page {page}...")
|
59 |
+
courses = scrape_courses_from_page(page)
|
60 |
+
all_courses.extend(courses)
|
61 |
+
|
62 |
+
return all_courses
|
63 |
+
|
64 |
+
|
65 |
+
# Insert scraped data into the database
|
66 |
+
def insert_data_to_db(courses):
|
67 |
+
conn = sqlite3.connect('courses.db')
|
68 |
+
c = conn.cursor()
|
69 |
+
c.executemany('INSERT INTO courses (title, description, price) VALUES (?, ?, ?)', courses)
|
70 |
+
conn.commit()
|
71 |
+
conn.close()
|
72 |
+
|
73 |
+
|
74 |
+
if __name__ == "__main__":
|
75 |
+
create_database()
|
76 |
+
all_courses = scrape_all_pages()
|
77 |
+
insert_data_to_db(all_courses)
|
78 |
+
print(f"Data from all pages has been successfully scraped and inserted into the database.")
|
courses.db
ADDED
Binary file (20.5 kB). View file
|
|
main.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sqlite3
|
3 |
+
import streamlit as st
|
4 |
+
import google.generativeai as genai
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
|
7 |
+
# Load environment variables from .env
|
8 |
+
load_dotenv()
|
9 |
+
|
10 |
+
# Set up the Google API key for Gemini
|
11 |
+
api_key = os.getenv("GOOGLE_API_KEY")
|
12 |
+
if api_key is None:
|
13 |
+
st.error("GOOGLE_API_KEY not found in environment variables. Please check your .env file.")
|
14 |
+
else:
|
15 |
+
# Configure Google Generative AI API
|
16 |
+
genai.configure(api_key=api_key)
|
17 |
+
|
18 |
+
|
19 |
+
# Function to fetch all courses from the SQLite database
|
20 |
+
def fetch_all_courses():
|
21 |
+
conn = sqlite3.connect('courses.db')
|
22 |
+
cur = conn.cursor()
|
23 |
+
cur.execute("SELECT title, description, price FROM courses")
|
24 |
+
rows = cur.fetchall()
|
25 |
+
conn.close()
|
26 |
+
return rows
|
27 |
+
|
28 |
+
|
29 |
+
# Function to generate a response using Google Generative AI based on user prompt and available courses
|
30 |
+
def generate_response(prompt, courses):
|
31 |
+
try:
|
32 |
+
# Prepare a detailed context prompt for the LLM
|
33 |
+
course_details = "\n".join(
|
34 |
+
[f"Title: {course[0]}, Description: {course[1]}, Price: {course[2]}" for course in courses])
|
35 |
+
|
36 |
+
genai_prompt = f"""
|
37 |
+
You are an expert assistant tasked with finding relevant courses based on user queries.
|
38 |
+
Below are details of available courses:
|
39 |
+
|
40 |
+
{course_details}
|
41 |
+
|
42 |
+
Based on this information, respond to the user's query in the most relevant way:
|
43 |
+
{prompt}
|
44 |
+
"""
|
45 |
+
|
46 |
+
# Generate a response using Google Generative AI
|
47 |
+
model = genai.GenerativeModel('gemini-pro')
|
48 |
+
response = model.generate_content([genai_prompt, prompt])
|
49 |
+
return response.text.strip() # Return the natural language response
|
50 |
+
except Exception as e:
|
51 |
+
st.error(f"Error generating a response: {e}")
|
52 |
+
return None
|
53 |
+
|
54 |
+
|
55 |
+
# Streamlit interface
|
56 |
+
st.set_page_config(page_title="Smart Search for Courses")
|
57 |
+
st.header("Find Relevant Courses on Analytics Vidhya")
|
58 |
+
|
59 |
+
# User prompt input
|
60 |
+
user_query = st.text_input("Enter your search query (e.g., 'Show me all free courses on machine learning'):")
|
61 |
+
|
62 |
+
submit = st.button("Search")
|
63 |
+
|
64 |
+
# Fetch all courses from the database
|
65 |
+
courses = fetch_all_courses()
|
66 |
+
|
67 |
+
# If user submits the query
|
68 |
+
if submit and user_query:
|
69 |
+
# Generate a response from Google Generative AI
|
70 |
+
response = generate_response(user_query, courses)
|
71 |
+
|
72 |
+
if response:
|
73 |
+
st.subheader("Search Results:")
|
74 |
+
st.write(response)
|
75 |
+
else:
|
76 |
+
st.write("Could not generate a response. Please try again.")
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
requests
|
2 |
+
beautifulsoup4
|
3 |
+
gradio
|
4 |
+
streamlit
|
5 |
+
sqlite3
|
6 |
+
python-dotenv
|
7 |
+
openai
|