File size: 2,829 Bytes
bec3852
1860099
bec3852
dfbb3dc
325b992
bec3852
c478c9d
581c82f
37003f2
e47dc76
bec3852
 
7db4d16
e47dc76
653f52a
efde512
8517578
3250271
565090b
5c863e5
 
 
 
 
5da73dd
5c863e5
d120fce
88f9f20
e47dc76
88f9f20
bec3852
e47dc76
 
bec3852
2380cec
e47dc76
e0d369e
2380cec
e47dc76
 
e0d369e
2380cec
bb4b951
 
a7f4c99
3250271
 
 
 
 
 
 
 
 
37003f2
 
 
 
 
 
 
 
3250271
37003f2
3250271
37003f2
3250271
 
 
 
 
1c5ffc3
3250271
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import torch
import pandas as pd
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain.llms import HuggingFacePipeline
from huggingface_hub import login
from pydantic import BaseModel, model_validator


# Token Secret of Hugging Face
huggingface_token = st.secrets["HUGGINGFACEHUB_API_TOKEN"]
login(huggingface_token)

# Load Llama 3.2
# model_name = "meta-llama/Llama-3.2-3B-Instruct"
model_name = "meta-llama/Llama-3.2-1B-Instruct"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name, device_map = 'auto')

# Detectar si hay una GPU disponible y ajustar el dispositivo
device = 0 if torch.cuda.is_available() else -1

# Configuración del pipeline para generación de texto

#pipe = pipeline(model=model, tokenizer=tokenizer, max_length = 512) # Check documentation without "feature-extraction"
pipe = pipeline(task = 'text-generation', model=model, tokenizer=tokenizer, max_length = 512, device = device) # Check documentation without "feature-extraction"


# Use transformers pipeline
llm_pipeline = HuggingFacePipeline(pipeline=pipe)

# Interfase of Streamlit
st.title("Cosine Similarity with Llama 3.1")


# initialize query
query="aspiring human resources specialist"

# Upload CSV file
uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
print("Query: ", query)

if uploaded_file is not None:
    df = pd.read_csv(uploaded_file)

    if 'job_title' not in df.columns:
        st.error("The uploaded CSV must contain a 'job_title' column.")
    else:
        job_titles = df['job_title'].tolist()

        if query:
            st.write("Query:", query)

            prompt = f"""
            You are an AI assistant. You have a list of job titles and a search query.
            Your task is to rank these job titles by their semantic similarity to the given query. 
            Please provide the ranking from most relevant to least relevant. 
            Do not calculate cosine similarity; instead, focus on understanding the semantic relevance of each job title to the query.
            
            Format your response like this:
            1. [Most Relevant Job Title]
            2. [Second Most Relevant Job Title]
            ...
            N. [Least Relevant Job Title]
            
            Query: "{query}"
            Job Titles: {job_titles}
            """

            # Llamar al modelo con el prompt
            try:
                response = pipe(prompt, max_new_tokens =  300, max_length=1024, num_return_sequences=1)

                # Mostrar la respuesta del modelo
                st.write("Model Answer:")
                st.write(response[0]['generated_text'])

            except Exception as e:
                st.error(f"Error while processing: {str(e)}")