Course-Finder-AI-Large / text_processing.py
raghuv-aditya's picture
Create text_processing.py
68a165d verified
def sanitize_text(text):
"""
Cleans and standardizes text by replacing special characters.
Args:
text (str): Text to sanitize.
Returns:
str: Sanitized text.
"""
if isinstance(text, str):
text = text.replace("’", "'").replace("β€˜", "'") \
.replace("β€œ", '"').replace("”", '"') \
.replace("–", "-").replace("β€”", "-")
return text
def generate_text(course_data):
"""
Formats scraped course data into structured text.
Args:
course_data (list): List of dictionaries containing course data.
Returns:
str: Formatted text of all courses.
"""
all_text = ""
for course in course_data:
all_text += f"## {sanitize_text(course['Title'])}\n\n"
all_text += f"**Description:**\n{sanitize_text(course['Description'])}\n\n"
all_text += f"**Curriculum:**\n{sanitize_text(course['Curriculum'])}\n\n"
all_text += f"**Link:**\n{sanitize_text(course['Link'])}\n\n"
all_text += "-------------------------------------\n\n"
return all_text