Spaces:
Sleeping
Sleeping
File size: 2,829 Bytes
bec3852 1860099 bec3852 dfbb3dc 325b992 bec3852 c478c9d 581c82f 37003f2 e47dc76 bec3852 7db4d16 e47dc76 653f52a efde512 8517578 3250271 565090b 5c863e5 5da73dd 5c863e5 d120fce 88f9f20 e47dc76 88f9f20 bec3852 e47dc76 bec3852 2380cec e47dc76 e0d369e 2380cec e47dc76 e0d369e 2380cec bb4b951 a7f4c99 3250271 37003f2 3250271 37003f2 3250271 37003f2 3250271 1c5ffc3 3250271 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import torch
import pandas as pd
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain.llms import HuggingFacePipeline
from huggingface_hub import login
from pydantic import BaseModel, model_validator
# Token Secret of Hugging Face
huggingface_token = st.secrets["HUGGINGFACEHUB_API_TOKEN"]
login(huggingface_token)
# Load Llama 3.2
# model_name = "meta-llama/Llama-3.2-3B-Instruct"
model_name = "meta-llama/Llama-3.2-1B-Instruct"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name, device_map = 'auto')
# Detectar si hay una GPU disponible y ajustar el dispositivo
device = 0 if torch.cuda.is_available() else -1
# Configuración del pipeline para generación de texto
#pipe = pipeline(model=model, tokenizer=tokenizer, max_length = 512) # Check documentation without "feature-extraction"
pipe = pipeline(task = 'text-generation', model=model, tokenizer=tokenizer, max_length = 512, device = device) # Check documentation without "feature-extraction"
# Use transformers pipeline
llm_pipeline = HuggingFacePipeline(pipeline=pipe)
# Interfase of Streamlit
st.title("Cosine Similarity with Llama 3.1")
# initialize query
query="aspiring human resources specialist"
# Upload CSV file
uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
print("Query: ", query)
if uploaded_file is not None:
df = pd.read_csv(uploaded_file)
if 'job_title' not in df.columns:
st.error("The uploaded CSV must contain a 'job_title' column.")
else:
job_titles = df['job_title'].tolist()
if query:
st.write("Query:", query)
prompt = f"""
You are an AI assistant. You have a list of job titles and a search query.
Your task is to rank these job titles by their semantic similarity to the given query.
Please provide the ranking from most relevant to least relevant.
Do not calculate cosine similarity; instead, focus on understanding the semantic relevance of each job title to the query.
Format your response like this:
1. [Most Relevant Job Title]
2. [Second Most Relevant Job Title]
...
N. [Least Relevant Job Title]
Query: "{query}"
Job Titles: {job_titles}
"""
# Llamar al modelo con el prompt
try:
response = pipe(prompt, max_new_tokens = 300, max_length=1024, num_return_sequences=1)
# Mostrar la respuesta del modelo
st.write("Model Answer:")
st.write(response[0]['generated_text'])
except Exception as e:
st.error(f"Error while processing: {str(e)}") |