File size: 2,400 Bytes
e294914 0871e09 e294914 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import os
import re
import yaml
from typing import Any, Dict, List
from langchain.prompts import PromptTemplate
cwd: str = os.getcwd()
propmt_file_path: str = os.path.join(cwd, "utils/prompts.yaml")
# Load prompts from yaml
def load_prompts():
try:
with open(propmt_file_path, "r") as f:
return yaml.safe_load(f)['prompts']
except Exception as e:
print(f"Reading prompts file has failed {e}")
# Preprocess text and keywords
def __preprocess_text(text: str) -> List[Any]:
return re.findall(r'\b\w+\b', text.lower())
def __preprocess_keywords(keywords: Dict[str, List[str]]) -> Dict[str, List[str]]:
preprocessed_keywords = {}
for category, kw_list in keywords.items():
preprocessed_keywords[category] = set(kw.lower() for kw in kw_list)
return preprocessed_keywords
# Check for keywords in input text
def __check_for_keywords(text: str, keywords: Dict[str, List[str]]) -> Dict[str, List[str]]:
preprocessed_keywords = __preprocess_keywords(keywords)
matched_keywords = {category: [] for category in keywords}
words = __preprocess_text(text)
for word in words:
for category, kw_set in preprocessed_keywords.items():
if word in kw_set:
matched_keywords[category].append(word)
matched_keywords = {category: list(set(matches)) for category, matches in matched_keywords.items() if matches}
return matched_keywords
# Select the most appropriate prompt based on matched keywords
def select_prompt(input_text: str, prompts: Any, keywords: Dict[str, List[str]]) -> str:
matched_keywords = __check_for_keywords(input_text, keywords)
matched_categories = list(matched_keywords.keys())
# Default to the highest rated common prompt if no specific category is matched
selected_prompt = max((p for p in prompts if 'common' in p['purpose'] or 'загальні' in p['purpose']), key=lambda p: p['rate'], default=None)
for category in matched_categories:
category_prompts = [p for p in prompts if category in p['purpose']]
if category_prompts:
selected_prompt = max(category_prompts, key=lambda p: p['rate'], default=selected_prompt)
prompt_template = PromptTemplate(template=selected_prompt['prompt_template'], input_variables=['entity'])
prompt = prompt_template.format(entity=input_text)
return prompt |