|
import os |
|
import time |
|
from operator import itemgetter |
|
from collections import Counter |
|
from langchain_community.document_loaders import PyPDFLoader, TextLoader |
|
from chainlit.types import AskFileResponse |
|
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter |
|
from langchain.schema.runnable import Runnable, RunnablePassthrough, RunnableLambda |
|
from langchain.schema.runnable.config import RunnableConfig |
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
from langchain.chains import ConversationalRetrievalChain, create_extraction_chain |
|
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler |
|
from langchain_community.llms import HuggingFaceEndpoint |
|
from langchain.chains import LLMChain |
|
from langchain_core.prompts import PromptTemplate |
|
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder |
|
from langchain.schema import StrOutputParser |
|
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT |
|
from langchain.chains.question_answering import load_qa_chain |
|
from langchain.chains.qa_with_sources import load_qa_with_sources_chain |
|
from langchain_pinecone import PineconeVectorStore |
|
from pinecone import Pinecone |
|
from langchain.memory import ChatMessageHistory, ConversationBufferMemory |
|
import pandas as pd |
|
import numpy as np |
|
import chainlit as cl |
|
from chainlit.input_widget import Select, TextInput |
|
from chainlit import user_session |
|
from offres_emploi import Api |
|
from offres_emploi.utils import dt_to_str_iso |
|
import datetime |
|
import plotly.express as px |
|
import bcrypt |
|
import json |
|
import requests |
|
import http.client |
|
from bs4 import BeautifulSoup |
|
from literalai import LiteralClient |
|
literal_client = LiteralClient(api_key=os.getenv("LITERAL_API_KEY")) |
|
literal_client.instrument_openai() |
|
|
|
@cl.password_auth_callback |
|
def auth_callback(username: str, password: str): |
|
auth = json.loads(os.environ['CHAINLIT_AUTH_LOGIN']) |
|
ident = next(d['ident'] for d in auth if d['ident'] == username) |
|
pwd = next(d['pwd'] for d in auth if d['ident'] == username) |
|
resultLogAdmin = bcrypt.checkpw(username.encode('utf-8'), bcrypt.hashpw(ident.encode('utf-8'), bcrypt.gensalt())) |
|
resultPwdAdmin = bcrypt.checkpw(password.encode('utf-8'), bcrypt.hashpw(pwd.encode('utf-8'), bcrypt.gensalt())) |
|
resultRole = next(d['role'] for d in auth if d['ident'] == username) |
|
if resultLogAdmin and resultPwdAdmin and resultRole == "admindatapcc": |
|
return cl.User( |
|
identifier=ident + " : 🧑💼 Admin Datapcc", metadata={"role": "admin", "provider": "credentials"} |
|
) |
|
elif resultLogAdmin and resultPwdAdmin and resultRole == "userdatapcc": |
|
return cl.User( |
|
identifier=ident + " : 🧑🎓 User Datapcc", metadata={"role": "user", "provider": "credentials"} |
|
) |
|
|
|
def process_file(file: AskFileResponse): |
|
if file.type == "text/plain": |
|
Loader = TextLoader |
|
elif file.type == "application/pdf": |
|
Loader = PyPDFLoader |
|
|
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) |
|
loader = Loader(file.path) |
|
documents = loader.load() |
|
docs = text_splitter.split_documents(documents) |
|
|
|
return docs |
|
|
|
def removeTags(all): |
|
for data in all(['style', 'script']): |
|
data.decompose() |
|
return ' '.join(all.stripped_strings) |
|
|
|
def localisation(): |
|
ListCentroids = [ |
|
{ "ID": "01", "Longitude": 5.3245259, "Latitude":46.0666003 }, |
|
{ "ID": "02", "Longitude": 3.5960246, "Latitude": 49.5519632 }, |
|
{ "ID": "03", "Longitude": 3.065278, "Latitude": 46.4002783 }, |
|
{ "ID": "04", "Longitude": 6.2237688, "Latitude": 44.1105837 }, |
|
{ "ID": "05", "Longitude": 6.2018836, "Latitude": 44.6630487 }, |
|
{ "ID": "06", "Longitude": 7.0755745, "Latitude":43.9463082 }, |
|
{ "ID": "07", "Longitude": 4.3497308, "Latitude": 44.7626044 }, |
|
{ "ID": "08", "Longitude": 4.6234893, "Latitude": 49.6473884 }, |
|
{ "ID": "09", "Longitude": 1.6037147, "Latitude": 42.9696091 }, |
|
{ "ID": "10", "Longitude": 4.1394954, "Latitude": 48.2963286 }, |
|
{ "ID": "11", "Longitude": 2.3140163, "Latitude": 43.1111427 }, |
|
{ "ID": "12", "Longitude": 2.7365234, "Latitude": 44.2786323 }, |
|
{ "ID": "13", "Longitude": 5.0515492, "Latitude": 43.5539098 }, |
|
{ "ID": "14", "Longitude": -0.3930779, "Latitude": 49.1024215 }, |
|
{ "ID": "15", "Longitude": 2.6367657, "Latitude": 44.9643217 }, |
|
{ "ID": "16", "Longitude": 0.180475, "Latitude": 45.706264 }, |
|
{ "ID": "17", "Longitude": -0.7082589, "Latitude": 45.7629699 }, |
|
{ "ID": "18", "Longitude": 2.5292424, "Latitude": 47.0926687 }, |
|
{ "ID": "19", "Longitude": 1.8841811, "Latitude": 45.3622055 }, |
|
{ "ID": "2A", "Longitude": 8.9906834, "Latitude": 41.8619761 }, |
|
{ "ID": "2B", "Longitude": 9.275489, "Latitude": 42.372014 }, |
|
{ "ID": "21", "Longitude": 4.7870471, "Latitude": 47.4736746 }, |
|
{ "ID": "22", "Longitude": -2.9227591, "Latitude": 48.408402 }, |
|
{ "ID": "23", "Longitude": 2.0265508, "Latitude": 46.0837382 }, |
|
{ "ID": "24", "Longitude": 0.7140145, "Latitude": 45.1489678 }, |
|
{ "ID": "25", "Longitude": 6.3991355, "Latitude": 47.1879451 }, |
|
{ "ID": "26", "Longitude": 5.1717552, "Latitude": 44.8055408 }, |
|
{ "ID": "27", "Longitude": 0.9488116, "Latitude": 49.1460288 }, |
|
{ "ID": "28", "Longitude": 1.2793491, "Latitude": 48.3330017 }, |
|
{ "ID": "29", "Longitude": -4.1577074, "Latitude": 48.2869945 }, |
|
{ "ID": "30", "Longitude": 4.2650329, "Latitude": 43.9636468 }, |
|
{ "ID": "31", "Longitude": 1.2728958, "Latitude": 43.3671081 }, |
|
{ "ID": "32", "Longitude": 0.4220039, "Latitude": 43.657141 }, |
|
{ "ID": "33", "Longitude": -0.5760716, "Latitude": 44.8406068 }, |
|
{ "ID": "34", "Longitude": 3.4197556, "Latitude": 43.62585 }, |
|
{ "ID": "35", "Longitude": -1.6443812, "Latitude": 48.1801254 }, |
|
{ "ID": "36", "Longitude": 1.6509938, "Latitude": 46.7964222 }, |
|
{ "ID": "37", "Longitude": 0.7085619, "Latitude": 47.2802601 }, |
|
{ "ID": "38", "Longitude": 5.6230772, "Latitude": 45.259805 }, |
|
{ "ID": "39", "Longitude": 5.612871, "Latitude": 46.7398138 }, |
|
{ "ID": "40", "Longitude": -0.8771738, "Latitude": 44.0161251 }, |
|
{ "ID": "41", "Longitude": 1.3989178, "Latitude": 47.5866519 }, |
|
{ "ID": "42", "Longitude": 4.2262355, "Latitude": 45.7451186 }, |
|
{ "ID": "43", "Longitude": 3.8118151, "Latitude": 45.1473029 }, |
|
{ "ID": "44", "Longitude": -1.7642949, "Latitude": 47.4616509 }, |
|
{ "ID": "45", "Longitude": 2.2372695, "Latitude": 47.8631395 }, |
|
{ "ID": "46", "Longitude": 1.5732157, "Latitude": 44.6529284 }, |
|
{ "ID": "47", "Longitude": 0.4788052, "Latitude": 44.4027215 }, |
|
{ "ID": "48", "Longitude": 3.4991239, "Latitude": 44.5191573 }, |
|
{ "ID": "49", "Longitude": -0.5136056, "Latitude": 47.3945201 }, |
|
{ "ID": "50", "Longitude": -1.3203134, "Latitude": 49.0162072 }, |
|
{ "ID": "51", "Longitude": 4.2966555, "Latitude": 48.9479636 }, |
|
{ "ID": "52", "Longitude": 5.1325796, "Latitude": 48.1077196 }, |
|
{ "ID": "53", "Longitude": -0.7073921, "Latitude": 48.1225795 }, |
|
{ "ID": "54", "Longitude": 6.144792, "Latitude": 48.7995163 }, |
|
{ "ID": "55", "Longitude": 5.2888292, "Latitude": 49.0074545 }, |
|
{ "ID": "56", "Longitude": -2.8746938, "Latitude": 47.9239486 }, |
|
{ "ID": "57", "Longitude": 6.5610683, "Latitude": 49.0399233 }, |
|
{ "ID": "58", "Longitude": 3.5544332, "Latitude": 47.1122301 }, |
|
{ "ID": "59", "Longitude": 3.2466616, "Latitude": 50.4765414 }, |
|
{ "ID": "60", "Longitude": 2.4161734, "Latitude": 49.3852913 }, |
|
{ "ID": "61", "Longitude": 0.2248368, "Latitude": 48.5558919 }, |
|
{ "ID": "62", "Longitude": 2.2555152, "Latitude": 50.4646795 }, |
|
{ "ID": "63", "Longitude": 3.1322144, "Latitude": 45.7471805 }, |
|
{ "ID": "64", "Longitude": -0.793633, "Latitude": 43.3390984 }, |
|
{ "ID": "65", "Longitude": 0.1478724, "Latitude": 43.0526238 }, |
|
{ "ID": "66", "Longitude": 2.5239855, "Latitude": 42.5825094 }, |
|
{ "ID": "67", "Longitude": 7.5962225, "Latitude": 48.662515 }, |
|
{ "ID": "68", "Longitude": 7.2656284, "Latitude": 47.8586205 }, |
|
{ "ID": "69", "Longitude": 4.6859896, "Latitude": 45.8714754 }, |
|
{ "ID": "70", "Longitude": 6.1388571, "Latitude": 47.5904191 }, |
|
{ "ID": "71", "Longitude": 4.6394021, "Latitude": 46.5951234 }, |
|
{ "ID": "72", "Longitude": 0.1947322, "Latitude": 48.0041421 }, |
|
{ "ID": "73", "Longitude": 6.4662232, "Latitude": 45.4956055 }, |
|
{ "ID": "74", "Longitude": 6.3609606, "Latitude": 46.1045902 }, |
|
{ "ID": "75", "Longitude": 2.3416082, "Latitude": 48.8626759 }, |
|
{ "ID": "76", "Longitude": 1.025579, "Latitude": 49.6862911 }, |
|
{ "ID": "77", "Longitude": 2.8977309, "Latitude": 48.5957831 }, |
|
{ "ID": "78", "Longitude": 1.8080138, "Latitude": 48.7831982 }, |
|
{ "ID": "79", "Longitude": -0.3159014, "Latitude": 46.5490257 }, |
|
{ "ID": "80", "Longitude": 2.3380595, "Latitude": 49.9783317 }, |
|
{ "ID": "81", "Longitude": 2.2072751, "Latitude": 43.8524305 }, |
|
{ "ID": "82", "Longitude": 1.2649374, "Latitude": 44.1254902 }, |
|
{ "ID": "83", "Longitude": 6.1486127, "Latitude": 43.5007903 }, |
|
{ "ID": "84", "Longitude": 5.065418, "Latitude": 44.0001599 }, |
|
{ "ID": "85", "Longitude": -1.3956692, "Latitude": 46.5929102 }, |
|
{ "ID": "86", "Longitude": 0.4953679, "Latitude": 46.5719095 }, |
|
{ "ID": "87", "Longitude": 1.2500647, "Latitude": 45.9018644 }, |
|
{ "ID": "88", "Longitude": 6.349702, "Latitude": 48.1770451 }, |
|
{ "ID": "89", "Longitude": 3.5634078, "Latitude": 47.8474664 }, |
|
{ "ID": "90", "Longitude": 6.9498114, "Latitude": 47.6184394 }, |
|
{ "ID": "91", "Longitude": 2.2714555, "Latitude": 48.5203114 }, |
|
{ "ID": "92", "Longitude": 2.2407148, "Latitude": 48.835321 }, |
|
{ "ID": "93", "Longitude": 2.4811577, "Latitude": 48.9008719 }, |
|
{ "ID": "94", "Longitude": 2.4549766, "Latitude": 48.7832368 }, |
|
{ "ID": "95", "Longitude": 2.1802056, "Latitude": 49.076488 }, |
|
{ "ID": "974", "Longitude": 55.536384, "Latitude": -21.115141 }, |
|
{ "ID": "973", "Longitude": -53.125782, "Latitude": 3.933889 }, |
|
{ "ID": "972", "Longitude": -61.024174, "Latitude": 14.641528 }, |
|
{ "ID": "971", "Longitude": -61.551, "Latitude": 16.265 } |
|
] |
|
|
|
return ListCentroids |
|
|
|
def plotDemandeur(dataframe, coderome): |
|
df = dataframe.sort_values(by=['Indicateur']) |
|
fig_demandeur = px.histogram(df, x='Indicateur', y='Valeur', height=1000, title="Demandeurs d'emploi et offres d'emploi du code ROME : " + coderome, color='Indicateur', labels={'Valeur':'Nombre'}, text_auto=True).update_layout(font=dict(size=9,color="RebeccaPurple"),autosize=True) |
|
fig_demandeur.show() |
|
|
|
def plotSalaire(dataframe): |
|
df = dataframe.sort_values(by=['salaire']) |
|
fig_demandeur = px.histogram(df, x='emploi', y='salaire', barmode='group', title="Salaires médians", color='categorie', text_auto=True).update_layout(font=dict(size=9,color="RebeccaPurple"),autosize=True) |
|
fig_demandeur.show() |
|
|
|
def plotDifficulte(dataframe): |
|
if len(dataframe) == 0: |
|
title = "Aucune donnée difficulté de recrutement renseignée!" |
|
else: |
|
title = "Difficulté de recrutement" |
|
df = dataframe.sort_values(by=['Valeur']) |
|
fig_demandeur = px.histogram(df, x='Indicateur', y='Valeur', title=title, color='Indicateur', labels={'Valeur':'Pourcentage'}, text_auto=True).update_layout(font=dict(size=9,color="RebeccaPurple"),autosize=True) |
|
fig_demandeur.show() |
|
|
|
def plotRepartition(dataframe,title): |
|
df = dataframe.sort_values(by=['Valeur']) |
|
fig_repartition = px.pie(df, names='Indicateur', values='Valeur', color='Indicateur', title=title, labels={'Valeur':'pourcentage'}, color_discrete_sequence=px.colors.qualitative.Safe).update_traces(textposition='inside', textinfo='percent+label').update_layout(font=dict(size=10,color="RebeccaPurple")) |
|
fig_repartition.show() |
|
|
|
def removeTags(all): |
|
for data in all(['style', 'script']): |
|
data.decompose() |
|
return ''.join(all.stripped_strings) |
|
|
|
def htmlToDataframe(htmlTable): |
|
data = [] |
|
list_header = [] |
|
soup = BeautifulSoup(htmlTable,'html.parser') |
|
header = soup.find_all("table")[0].find("tr") |
|
for items in header: |
|
try: |
|
list_header.append(items.get_text()) |
|
except: |
|
continue |
|
HTML_data = soup.find_all("table")[0].find_all("tr")[1:] |
|
for element in HTML_data: |
|
sub_data = [] |
|
for sub_element in element: |
|
try: |
|
sub_data.append(sub_element.get_text()) |
|
except: |
|
continue |
|
data.append(sub_data) |
|
dataFrame = pd.DataFrame(data = data, columns = list_header) |
|
return dataFrame |
|
|
|
def listToString(list): |
|
return str(list) |
|
|
|
def arrayToString(array): |
|
arrayList = [] |
|
|
|
for i in range(0,len(array)): |
|
if listToString(array[i]).find("libelle")!=-1: |
|
arrayList.append(array[i]['libelle']) |
|
else: |
|
arrayList.append("; ") |
|
string = ', '.join(arrayList) |
|
return string + '; ' |
|
|
|
def searchByRome(rome,index): |
|
libelle = '' |
|
if rome.find(',') != -1: |
|
romeArray = rome.split(',') |
|
for i in range(0,len(romeArray)): |
|
codeRome = romeArray[i].strip() |
|
if i <= 5 and len(codeRome) == 5: |
|
all_docs = index.query( |
|
top_k=1, |
|
vector= [0] * 768, |
|
namespace='', |
|
filter={"categorie": {"$eq": "rome"}, "rome":{"$eq": codeRome}}, |
|
include_metadata=True |
|
) |
|
libelle = libelle + " " + all_docs['matches'][0]['metadata']['libelle_rome'] |
|
else: |
|
all_docs = index.query( |
|
top_k=1, |
|
vector= [0] * 768, |
|
namespace='', |
|
filter={"categorie": {"$eq": "rome"}, "rome":{"$eq": rome}}, |
|
include_metadata=True |
|
) |
|
libelle = libelle + " " + all_docs['matches'][0]['metadata']['libelle_rome'] |
|
return libelle |
|
|
|
@cl.author_rename |
|
def rename(orig_author: str): |
|
rename_dict = {"ConversationalRetrievalChain": "💬 Assistant conversationnel", "Retriever": "Agent conversationnel", "StuffDocumentsChain": "Chaîne de documents", "LLMChain": "Agent", "HuggingFaceEndpoint": "Mistral AI 🤖"} |
|
return rename_dict.get(orig_author, orig_author) |
|
|
|
@cl.action_callback("download") |
|
async def on_action(action): |
|
content = [] |
|
content.append(action.value) |
|
arrayContent = np.array(content) |
|
df = pd.DataFrame(arrayContent) |
|
with open('./' + action.description + '.txt', 'wb') as csv_file: |
|
df.to_csv(path_or_buf=csv_file, index=False,header=False, encoding='utf-8') |
|
elements = [ |
|
cl.File( |
|
name= action.description + ".txt", |
|
path="./" + action.description + ".txt", |
|
display="inline", |
|
), |
|
] |
|
await cl.Message( |
|
author="Datapcc : 🌐🌐🌐", content="[Lien] 🔗", elements=elements |
|
).send() |
|
await action.remove() |
|
|
|
@cl.action_callback("saveMemory") |
|
async def on_action(action): |
|
buffer = cl.user_session.get("saveMemory") |
|
cl.user_session.set("saveMemory", buffer + action.value) |
|
await cl.Message( |
|
author="Datapcc : 🌐🌐🌐", content="🗃️ Document sauvegardé dans le buffer Memory!" |
|
).send() |
|
await action.remove() |
|
|
|
@cl.cache |
|
def to_cache(file): |
|
|
|
return "https://cipen.univ-gustave-eiffel.fr/fileadmin/CIPEN/datas/assets/docs/" + file + ".csv" |
|
|
|
|
|
@cl.set_chat_profiles |
|
async def chat_profile(): |
|
return [ |
|
cl.ChatProfile(name="Note composante sectorielle - NCS",markdown_description="Note composante sectorielle",icon="./public/favicon.png",), |
|
] |
|
@cl.on_chat_start |
|
async def start(): |
|
await cl.Avatar( |
|
name="You", |
|
path="./public/logo-ofipe.jpg", |
|
).send() |
|
chat_profile = cl.user_session.get("chat_profile") |
|
chatProfile = chat_profile.split(' - ') |
|
|
|
if chatProfile[1] == 'NCS': |
|
app_user = cl.user_session.get("user") |
|
welcomeUser = app_user.identifier |
|
welcomeUserArray = welcomeUser.split('@') |
|
welcomeUserStr = welcomeUserArray[0].replace('.',' ') |
|
await cl.Message(f"> Bonjour {welcomeUserStr}").send() |
|
|
|
df_allcompetences = pd.read_csv('./public/referentiel_competence.csv') |
|
df_competences = df_allcompetences[['libelle_competence']].copy() |
|
df_competences = df_competences.sort_values(by=['libelle_competence']) |
|
competences_list = df_competences['libelle_competence'].tolist() |
|
competences_list.sort() |
|
competences_list.insert(0, "") |
|
cl.user_session.set("arraySettingsComp", competences_list) |
|
settings = await cl.ChatSettings( |
|
[ |
|
Select( |
|
id="competence", |
|
label="Compétences", |
|
values=competences_list, |
|
initial_index=0, |
|
), |
|
TextInput(id="competenceInput", label="ou saisir une compétence voire des objectifs pédagogiques", placeholder="ou saisir une compétence voire des objectifs pédagogiques", tooltip="saisir une compétence voire des objectifs pédagogiques"), |
|
] |
|
).send() |
|
value = settings["competence"] |
|
|
|
if len(value) < 2: |
|
warning = [ |
|
cl.Image(name="Warning", size="small", display="inline", path="./public/warning.png") |
|
] |
|
await cl.Message(author="Datapcc : 🌐🌐🌐",content="1️⃣ Cliquez sur le bouton dont l'image suit, dans le prompt, pour commencer à élaborer une note sectorielle de la chaîne documentaire APCC!").send() |
|
await cl.Message(author="Datapcc : 🌐🌐🌐",content="", elements=warning).send() |
|
await cl.Message(author="Datapcc : 🌐🌐🌐",content="2️⃣ Puis sélectionnez ou saisissez une compétence ou des objectifs pédagogiques. Et vous êtes prêt!\n\n🔗 Plateforme de feedback et de fil d'activité : https://cloud.getliteral.ai/").send() |
|
contextChat = cl.user_session.get("contextChatBot") |
|
if not contextChat: |
|
contextChat = df_competences.to_string(index = False) |
|
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.environ['HUGGINGFACEHUB_API_TOKEN'] |
|
repo_id = "mistralai/Mistral-7B-Instruct-v0.3" |
|
|
|
model = HuggingFaceEndpoint( |
|
repo_id=repo_id, |
|
max_new_tokens=6000, |
|
temperature=1.0, |
|
streaming=True |
|
) |
|
cl.user_session.set("memory", ConversationBufferMemory(return_messages=True)) |
|
memory = cl.user_session.get("memory") |
|
prompt = ChatPromptTemplate.from_messages( |
|
[ |
|
( |
|
"system", |
|
f"Contexte : Vous êtes un spécialiste du marché de l'emploi en fonction du niveau de qualification, des compétences professionnelles, des compétences transversales, du salaire et de l'expérience. Vous êtes doué pour faire des analyses du système travail sur les métiers les plus demandés grâce à votre aptitude à synthétiser les informations en fonction des critères définis ci-avant. En fonction des informations suivantes et du contexte suivant seulement et strictement. Contexte : {contextChat[0:26500]}. Réponds à la question suivante de la manière la plus pertinente, la plus exhaustive et la plus détaillée possible, avec au minimum 3000 tokens jusqu'à 4000 tokens, seulement et strictement dans le contexte et les informations fournies. Essayez donc de comprendre en profondeur le contexte et répondez uniquement en vous basant sur les informations fournies.", |
|
), |
|
MessagesPlaceholder(variable_name="history"), |
|
("human", "{question}, dans le contexte fourni."), |
|
] |
|
) |
|
runnable = ( |
|
RunnablePassthrough.assign( |
|
history=RunnableLambda(memory.load_memory_variables) | itemgetter("history") |
|
) |
|
| prompt |
|
| model |
|
) |
|
cl.user_session.set("runnable", runnable) |
|
|
|
@literal_client.step(type="run") |
|
async def construction_NCS(competenceList): |
|
context = await contexte(competenceList) |
|
emploisST = context.to_string(index = False) |
|
romeListArray = cl.user_session.get("codeRomeArray") |
|
ficheClesMetier = await document_chiffres_cles_emplois("https://dataemploi.francetravail.fr/metier/chiffres-cles/NAT/FR/", romeListArray) |
|
contentChatBot = str(emploisST).replace('[','').replace(']','').replace('{','').replace('}','') + ficheClesMetier |
|
cl.user_session.set("contextChatBot", contentChatBot[0:28875]) |
|
finals_df = context[['intitule','typeContratLibelle','experienceLibelle','competences','description','qualitesProfessionnelles','salaire','lieuTravail','formations']].copy() |
|
listEmplois = finals_df.values.tolist() |
|
stringEmplois = '' |
|
for i in range(0,len(listEmplois)): |
|
stringEmplois += "\n✔️ Emploi : " + str(listEmplois[i][0]) + ";\n◉ Contrat : " + str(listEmplois[i][1]) + ";\n◉Compétences professionnelles : " + str(listEmplois[i][3]) + ";\n" + "◉ Salaire : " + str(listEmplois[i][6]) + ";\n◉ Qualification : " + str(listEmplois[i][5]) + ";\n◉ Localisation : " + str(listEmplois[i][7]) + ";\n◉ Expérience : " + str(listEmplois[i][2]) + ";\n◉ Niveau de qualification : " + str(listEmplois[i][8]) + ";\n◉ Description de l'emploi : " + str(listEmplois[i][4]) + "\n" |
|
await cl.sleep(1) |
|
listEmplois_name = f"Liste des emplois" |
|
text_elements = [] |
|
text_elements.append( |
|
cl.Text(content="Question : " + competenceList + "\n\nRéponse :\n" + stringEmplois.replace('[','').replace(']','').replace('{','').replace('}','').replace("'code'","\n• 'code'"), name=listEmplois_name) |
|
) |
|
await cl.Message(author="Datapcc : 🌐🌐🌐",content="👨💼 Source France Travail : " + listEmplois_name, elements=text_elements).send() |
|
await cl.sleep(1) |
|
codeArray = romeListArray |
|
ficheMetiers = [] |
|
for i in range(0,len(codeArray)): |
|
ficheMetiers = [ |
|
cl.File(name= "Fiche métier " + codeArray[i],url="https://www.soi-tc.fr/assets/fiches_pe/FEM_" + codeArray[i] + ".pdf",display="inline",) |
|
] |
|
await cl.Message( |
|
author="Datapcc : 🌐🌐🌐", content="[Fiches métiers] 🔗", elements=ficheMetiers |
|
).send() |
|
|
|
await cl.sleep(1) |
|
listClesMetier_name = f"Chiffres clés des emplois" |
|
text_ClesMetier = [] |
|
text_ClesMetier.append( |
|
cl.Text(content="Question : " + competenceList + "\n\nRéponse :\n" + ficheClesMetier, name=listClesMetier_name) |
|
) |
|
await cl.Message(author="Datapcc : 🌐🌐🌐",content="📊 Source France Travail : " + listClesMetier_name, elements=text_ClesMetier).send() |
|
|
|
return "datavisualisation des statistiques de l'emploi" |
|
|
|
@cl.step(type="run") |
|
async def recuperation_contexte(getNote): |
|
getContext = cl.user_session.get(getNote) |
|
return getNote + " :\n" + getContext |
|
@cl.step(type="retrieval") |
|
async def contexte(competence): |
|
results = await creation_liste_code_Rome(competence) |
|
await cl.sleep(1) |
|
romeListArray = cl.user_session.get("codeRomeArray") |
|
df_emplois = await API_France_Travail(romeListArray) |
|
await cl.sleep(1) |
|
for j in range(0, len(romeListArray)): |
|
table = await datavisualisation_chiffres_cles_emplois("https://dataemploi.pole-emploi.fr/metier/chiffres-cles/NAT/FR/" + romeListArray[j]) |
|
plot_demandeur = plotDemandeur(htmlToDataframe(table[0]), romeListArray[j]) |
|
if len(table[1]) > 0: |
|
plot_salaire = plotSalaire(htmlToDataframe(table[1])) |
|
plot_difficulte = plotDifficulte(htmlToDataframe(table[2])) |
|
plot_repartitionContrat = plotRepartition(htmlToDataframe(table[3]), "Répartition des embauches du métier : type de contrat") |
|
plot_repartitionEntreprise = plotRepartition(htmlToDataframe(table[4]), "Répartition des embauches du métier : type entreprise") |
|
|
|
return df_emplois |
|
|
|
@cl.step(type="tool") |
|
async def document_chiffres_cles_emplois(url, codes): |
|
all = "" |
|
codeArray = codes |
|
for i in range(0,len(codeArray)): |
|
response = requests.get(url + codeArray[i]) |
|
soup = BeautifulSoup(response.text, "html.parser") |
|
if soup.select('h1#titreMetier'): |
|
alltitre = soup.select('h1#titreMetier') |
|
allTitre = removeTags(alltitre[0]) |
|
else: |
|
allTitre = "" |
|
if soup.select('div.jobs_item-container-flex'): |
|
allembauches = soup.select('div.jobs_item-container-flex') |
|
allEmbauches = removeTags(allembauches[0]) |
|
else: |
|
allEmbauches = "" |
|
if soup.select('div.key-number_block.shadow.inset'): |
|
allsalaires = soup.select('div.key-number_block.shadow.inset') |
|
allSalaires = removeTags(allsalaires[0]) |
|
else: |
|
allSalaires = "" |
|
if soup.select('tbody.sectorTable__body'): |
|
allsalairesMedian = soup.select('tbody.sectorTable__body') |
|
allSalairesMedian = removeTags(allsalairesMedian[0]) |
|
else: |
|
allSalairesMedian = "" |
|
if soup.select('div.dynamism_canvas-wrapper > p.sr-only'): |
|
allDiff = soup.select('div.dynamism_canvas-wrapper > p.sr-only') |
|
alldiff = removeTags(allDiff[0]) |
|
else: |
|
alldiff = "" |
|
if soup.select('div.tabs-main-data_persp-col2'): |
|
allDiffOrigin = soup.select('div.tabs-main-data_persp-col2') |
|
alldiffOrigin = removeTags(allDiffOrigin[0]) |
|
else: |
|
alldiffOrigin = "" |
|
allTypeContrat = "" |
|
if soup.find_all("div", class_="hiring-contract_legende_item ng-star-inserted"): |
|
allContrat = soup.find_all("div", class_="hiring-contract_legende_item ng-star-inserted") |
|
for j in range(0,len(allContrat)): |
|
allTypeContrat = allTypeContrat + removeTags(allContrat[j]) + ", " |
|
if soup.find_all("div", class_="horizontal-graph_patterns"): |
|
allEntreprise = soup.find_all("div", class_="horizontal-graph_patterns") |
|
allentreprise = removeTags(allEntreprise[0]) |
|
else: |
|
allentreprise = "" |
|
all = all + "\n\nChiffres-clés Métier " + allTitre + ":\nDemandeurs d'emploi et Offres d'emploi : " + allEmbauches + ". Salaires proposés dans les offres : " + allSalaires + ". Salaires médians constatés : " + allSalairesMedian + ". Difficultés de recrutement pour les entreprises : " + alldiff + ". Origine des difficultés : " + alldiffOrigin + ". Répartition des embauches par type de contrat : " + allTypeContrat + ". Répartition des embauches par taille d'entreprise : " + allentreprise + "." |
|
return all |
|
|
|
@cl.step(type="tool") |
|
async def datavisualisation_chiffres_cles_emplois(url): |
|
response = requests.get(url) |
|
soup = BeautifulSoup(response.text, "lxml") |
|
|
|
alldemandeurs = '' |
|
allsalaires = '' |
|
alldifficultes = '' |
|
allrepartitions = '' |
|
allentreprises = '' |
|
allembauches = soup.select('p.population_category') |
|
allnumembauchesfirst = soup.select('p.population_main-num.data') |
|
allnumembauches = removeTags(allnumembauchesfirst[0]).split('\xa0') |
|
allnumembauches = ''.join(allnumembauches) |
|
allnumoffres = removeTags(allnumembauchesfirst[1]).split('\xa0') |
|
allnumoffres = ''.join(allnumoffres) |
|
alldetailembauches = soup.select('p.hiring_text.ng-star-inserted') |
|
allnumevolutionembauches = soup.select('p.main.ng-star-inserted') |
|
alldetailevolutionembauches = soup.select('p.population_bubble-title') |
|
alldemandeurs = "<table><tr><td>Indicateur</td><td>Valeur</td></tr><tr><td>" + removeTags(allembauches[0]) + " (" + removeTags(alldetailembauches[0]) + ");\nÉvolution demandeurs d'emploi (" + removeTags(alldetailevolutionembauches[0]) + ": " + removeTags(allnumevolutionembauches[0]) + ")</td><td>" + allnumembauches + "</td></tr>" |
|
alldemandeurs += "<tr><td>" + removeTags(allembauches[1]) + " (" + removeTags(alldetailembauches[1]) + "); Évolution offres d'emploi (" + removeTags(alldetailevolutionembauches[1]) + ": " + removeTags(allnumevolutionembauches[1]) + ")</td><td>" + allnumoffres + "</td></tr>" |
|
alldemandeurs += "</table>" |
|
|
|
allFAP = soup.select('tr.sectorTable__line.ng-star-inserted') |
|
allcategorie = soup.select('td.sectorTable__cell') |
|
alltypesalaires = soup.select('th.sectorTable__cell') |
|
allFAPsalaires = soup.select('p.sectorTable__cellValue') |
|
if len(allFAPsalaires) >= 3: |
|
allsalaires = "<table><tr><td>categorie</td><td>emploi</td><td>salaire</td></tr>" |
|
allsalaires += "<tr><td>" + removeTags(alltypesalaires[1]) + "</td><td>" + removeTags(allcategorie[0]) + "</td><td>" + removeTags(allFAPsalaires[0]).replace('\xa0','').replace(' ','').replace('€','') + "</td></tr>" |
|
allsalaires += "<tr><td>" + removeTags(alltypesalaires[2]) + "</td><td>" + removeTags(allcategorie[0]) + "</td><td>" + removeTags(allFAPsalaires[1]).replace('\xa0','').replace(' ','').replace('€','') + "</td></tr>" |
|
allsalaires += "<tr><td>" + removeTags(alltypesalaires[3]) + "</td><td>" + removeTags(allcategorie[0]) + "</td><td>" + removeTags(allFAPsalaires[2]).replace('\xa0','').replace(' ','').replace('€','') + "</td></tr>" |
|
if len(allFAP) >= 2 and len(allFAPsalaires) == 6: |
|
allsalaires += "<tr><td>" + removeTags(alltypesalaires[1]) + "</td><td>" + removeTags(allcategorie[4]) + "</td><td>" + removeTags(allFAPsalaires[3]).replace('\xa0','').replace(' ','').replace('€','') + "</td></tr>" |
|
allsalaires += "<tr><td>" + removeTags(alltypesalaires[2]) + "</td><td>" + removeTags(allcategorie[4]) + "</td><td>" + removeTags(allFAPsalaires[4]).replace('\xa0','').replace(' ','').replace('€','') + "</td></tr>" |
|
allsalaires += "<tr><td>" + removeTags(alltypesalaires[3]) + "</td><td>" + removeTags(allcategorie[4]) + "</td><td>" + removeTags(allFAPsalaires[5]).replace('\xa0','').replace(' ','').replace('€','') + "</td></tr>" |
|
allsalaires += "</table>" |
|
|
|
alltypedifficultes = soup.select('.tabs-main-content_persp-col2-bar.ng-star-inserted') |
|
alldifficulte = soup.select('p.horizontal-graph_title') |
|
allpcdifficulte = soup.select('div.horizontal-graph_data') |
|
alldifficultes = "<table><tr><td>Indicateur</td><td>Valeur</td></tr>" |
|
for i in range(0,len(alltypedifficultes)): |
|
alldifficultes += "<tr><td>" + removeTags(alldifficulte[i]) + "</td><td>" + removeTags(allpcdifficulte[i]).replace('Pour le territoire principal FRANCE pour les ' + removeTags(alldifficulte[i]),'').replace('%','') + "</td></tr>" |
|
alldifficultes += "</table>" |
|
|
|
alltyperepartitions = soup.select('div.hiring-contract_legende_item.ng-star-inserted') |
|
allrepartition = soup.select('p.hiring-contract_legende_item_label') |
|
allpcrepartition = soup.select('span.hiring-contract_legende_item-first') |
|
allrepartitions = "<table><tr><td>Indicateur</td><td>Valeur</td></tr>" |
|
for i in range(0,len(alltyperepartitions)): |
|
allrepartitions += "<tr><td>" + removeTags(allrepartition[i]).replace('(' + removeTags(allpcrepartition[i]) + ')','') + "</td><td>" + removeTags(allpcrepartition[i]).replace('%','').replace(',','.') + "</td></tr>" |
|
allrepartitions += "</table>" |
|
|
|
allentrepriserepartitions = soup.select('div.horizontal-graph_pattern.sm-bubble_wrapper > span') |
|
allentreprise = soup.select('span.sr-only') |
|
allpcentreprise = soup.select('span.data.ng-star-inserted') |
|
allentreprises = "<table><tr><td>Indicateur</td><td>Valeur</td></tr>" |
|
for i in range(0,len(allentrepriserepartitions)): |
|
allentreprises += "<tr><td>" + removeTags(allentrepriserepartitions[i])[0:-4] + "</td><td>" + removeTags(allentrepriserepartitions[i])[-4:].replace('%','').replace(',','.') + "</td></tr>" |
|
allentreprises += "</table>" |
|
|
|
return [alldemandeurs, allsalaires, alldifficultes, allrepartitions, allentreprises] |
|
|
|
@cl.step(type="tool") |
|
async def datavisualisation_statistiques_emplois(results_df): |
|
arraydataframe = [] |
|
arrayfirstdataframe = [] |
|
arraylocalisationdataframe = [] |
|
results = [] |
|
count = 0 |
|
if results_df.empty == False: |
|
count = count + 1 |
|
finals = results_df[['intitule','typeContratLibelle','experienceLibelle','competences','qualitesProfessionnelles','salaire','lieuTravail','formations']].copy() |
|
finals["lieuTravail"] = finals["lieuTravail"].apply(lambda x: x['libelle']).apply(lambda x: x[0:3]).apply(lambda x: x.strip()) |
|
finals_df = finals |
|
finals_df.dropna(subset=['qualitesProfessionnelles','formations','competences'], inplace=True) |
|
finals_df["competences"] = finals_df["competences"].apply(lambda x:[str(e['libelle']) for e in x]).apply(lambda x:'; '.join(map(str, x))) |
|
finals_df["qualitesProfessionnelles"] = finals_df["qualitesProfessionnelles"].apply(lambda x:[str(e['libelle']) + ": " + str(e['description']) for e in x]).apply(lambda x:'; '.join(map(str, x))) |
|
finals_df["formations"] = finals_df["formations"].apply(lambda x:[str(e['niveauLibelle']) for e in x]).apply(lambda x:'; '.join(map(str, x))) |
|
finals_df = finals_df.sort_values(by=['lieuTravail']) |
|
finals_localisation = results_df[['lieuTravail']].copy() |
|
finals_localisation["lieuTravail"] = finals_localisation["lieuTravail"].apply(lambda x: np.array(x)).apply(lambda x: x['libelle']).apply(lambda x: x[0:3]).apply(lambda x: x.strip()) |
|
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'Fra'].index, inplace = True) |
|
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'FRA'].index, inplace = True) |
|
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'Ile'].index, inplace = True) |
|
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'Mar'].index, inplace = True) |
|
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'Bou'].index, inplace = True) |
|
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == '976'].index, inplace = True) |
|
arraylocalisationdataframe.append(finals_localisation) |
|
arrayfirstdataframe.append(results_df) |
|
if len(finals_df) != 0: |
|
arraydataframe.append(finals_df) |
|
first_df = pd.concat(arrayfirstdataframe) |
|
finals_df = pd.concat(arraydataframe) |
|
localisation_df = pd.concat(arraylocalisationdataframe) |
|
|
|
|
|
df_intitule = first_df.groupby('intitule').size().reset_index(name='obs') |
|
df_intitule = df_intitule.sort_values(by=['obs']) |
|
df_intitule = df_intitule.iloc[-25:] |
|
fig_intitule = px.bar(df_intitule, x='obs', y='intitule', orientation='h', color='obs', title="Les principaux emplois", labels={'obs':'nombre'}, color_continuous_scale="Teal", text_auto=True).update_layout(font=dict(size=10,color="RebeccaPurple"),autosize=True).update_traces(hovertemplate=df_intitule["intitule"] + ' <br>Nombre : %{x}', y=[y[:100] + "..." for y in df_intitule["intitule"]], showlegend=False) |
|
|
|
|
|
df_contrat = first_df.groupby('typeContratLibelle').size().reset_index(name='obs') |
|
fig_contrat = px.pie(df_contrat, names='typeContratLibelle', values='obs', color='obs', title="Les types de contrat", labels={'obs':'nombre'}, color_discrete_sequence=px.colors.qualitative.Safe).update_traces(textposition='inside', textinfo='percent+label').update_layout(font=dict(size=10,color="RebeccaPurple")) |
|
|
|
df_secteur = first_df.groupby('secteurActiviteLibelle').size().reset_index(name='obs') |
|
df_secteur = df_secteur.sort_values(by=['obs']) |
|
df_secteur = df_secteur.iloc[-25:] |
|
fig_secteur = px.bar(df_secteur, x='obs', y='secteurActiviteLibelle', orientation='h', color='obs', title="Les principaux secteurs d'activités", labels={'obs':'nombre'}, color_continuous_scale="Teal", text_auto=True).update_layout(font=dict(size=10,color="RebeccaPurple"),autosize=True).update_traces(hovertemplate=df_secteur["secteurActiviteLibelle"] + ' <br>Nombre : %{x}', y=[y[:100] + "..." for y in df_secteur["secteurActiviteLibelle"]], showlegend=False) |
|
|
|
|
|
df1 = finals_df |
|
df1['competences'] = finals_df['competences'].str.split(';') |
|
df2 = df1.explode('competences') |
|
df2 = df2.groupby('competences').size().reset_index(name='obs') |
|
df2 = df2.sort_values(by=['obs']) |
|
df2 = df2.iloc[-20:] |
|
fig_competences = px.bar(df2, x='obs', y='competences', orientation='h', color='obs', title="Les principales compétences professionnelles", labels={'obs':'nombre'}, color_continuous_scale="Teal", text_auto=True).update_layout(font=dict(size=10,color="RebeccaPurple"),autosize=True).update_traces(hovertemplate=df2["competences"] + ' <br>Nombre : %{x}', y=[y[:100] + "..." for y in df2['competences']], showlegend=False) |
|
|
|
|
|
df_transversales = finals_df |
|
df_transversales['qualitesProfessionnelles'] = finals_df['qualitesProfessionnelles'].str.split(';') |
|
df_comptransversales = df_transversales.explode('qualitesProfessionnelles') |
|
df_comptransversales = df_comptransversales.groupby('qualitesProfessionnelles').size().reset_index(name='obs') |
|
df_comptransversales = df_comptransversales.sort_values(by=['obs']) |
|
df_comptransversales = df_comptransversales.iloc[-20:] |
|
fig_transversales = px.bar(df_comptransversales, x='obs', y='qualitesProfessionnelles', orientation='h', color='obs', title="Les principales compétences transversales", labels={'obs':'nombre'}, color_continuous_scale="Teal", text_auto=True).update_layout(font=dict(size=10,color="RebeccaPurple"),autosize=True).update_traces(hovertemplate=df_comptransversales["qualitesProfessionnelles"] + ' <br>Nombre : %{x}', y=[y[:100] + "..." for y in df_comptransversales["qualitesProfessionnelles"]], showlegend=False) |
|
|
|
|
|
df_formations = finals_df.groupby('formations').size().reset_index(name='obs') |
|
fig_formations = px.pie(df_formations, names='formations', values='obs', color='obs', title="Les niveaux de qualification", labels={'obs':'nombre'}, color_discrete_sequence=px.colors.qualitative.Safe).update_traces(textposition='inside', textinfo='percent+label').update_layout(font=dict(size=10,color="RebeccaPurple")) |
|
|
|
|
|
df_experience = finals_df.groupby('experienceLibelle').size().reset_index(name='obs') |
|
fig_experience = px.pie(df_experience, names='experienceLibelle', values='obs', color='obs', title="Les expériences professionnelles", labels={'obs':'nombre'}, color_discrete_sequence=px.colors.qualitative.Safe).update_traces(textposition='inside', textinfo='percent+label').update_layout(font=dict(size=10,color="RebeccaPurple")) |
|
|
|
res = requests.get( |
|
"https://raw.githubusercontent.com/codeforgermany/click_that_hood/main/public/data/spain-provinces.geojson" |
|
) |
|
|
|
ListCentroids = localisation() |
|
df_localisation = localisation_df.groupby('lieuTravail').size().reset_index(name='obs') |
|
df_localisation = df_localisation.sort_values(by=['lieuTravail']) |
|
df_localisation['longitude'] = df_localisation['lieuTravail'] |
|
df_localisation['latitude'] = df_localisation['lieuTravail'] |
|
|
|
|
|
df_localisation["longitude"] = df_localisation['longitude'].apply(lambda x:[loc['Longitude'] for loc in ListCentroids if loc['ID'] == x]).apply(lambda x:''.join(map(str, x))) |
|
df_localisation["longitude"] = pd.to_numeric(df_localisation["longitude"], downcast="float") |
|
df_localisation["latitude"] = df_localisation['latitude'].apply(lambda x:[loc['Latitude'] for loc in ListCentroids if loc['ID'] == x]).apply(lambda x:''.join(map(str, x))) |
|
df_localisation["latitude"] = pd.to_numeric(df_localisation["latitude"], downcast="float") |
|
|
|
fig_localisation = px.scatter_mapbox(df_localisation, lat="latitude", lon="longitude", hover_name="lieuTravail", size="obs").update_layout( |
|
mapbox={ |
|
"style": "carto-positron", |
|
"center": {"lon": 2, "lat" : 47}, |
|
"zoom": 4.5, |
|
"layers": [ |
|
{ |
|
"source": res.json(), |
|
"type": "line", |
|
"color": "green", |
|
"line": {"width": 0}, |
|
} |
|
], |
|
} |
|
) |
|
|
|
elements = [] |
|
elements.append(cl.Plotly(name="chart_intitule", figure=fig_intitule, display="inline", size="large")) |
|
elements.append(cl.Plotly(name="chart_contrat", figure=fig_contrat, display="inline", size="large")) |
|
elements.append(cl.Plotly(name="chart_competences", figure=fig_competences, display="inline", size="large")) |
|
elements.append(cl.Plotly(name="chart_transversales", figure=fig_transversales, display="inline", size="large")) |
|
elements.append(cl.Plotly(name="chart_formations", figure=fig_formations, display="inline", size="large")) |
|
elements.append(cl.Plotly(name="chart_experience", figure=fig_experience, display="inline", size="large")) |
|
elements.append(cl.Plotly(name="chart_secteur", figure=fig_secteur, display="inline", size="large")) |
|
elements.append(cl.Plotly(name="chart_localisation", figure=fig_localisation, display="inline", size="large")) |
|
|
|
await cl.Message(content="Datavisualisation du marché de l'emploi", elements=elements).send() |
|
|
|
@cl.step(type="tool") |
|
async def API_France_Travail(romeListArray): |
|
client = await connexion_France_Travail() |
|
todayDate = datetime.datetime.today() |
|
month, year = (todayDate.month-1, todayDate.year) if todayDate.month != 1 else (12, todayDate.year-1) |
|
start_dt = todayDate.replace(day=1, month=month, year=year) |
|
end_dt = datetime.datetime.today() |
|
results = [] |
|
for k in romeListArray: |
|
params = {"motsCles": k,'minCreationDate': dt_to_str_iso(start_dt),'maxCreationDate': dt_to_str_iso(end_dt),'range':'0-149'} |
|
search_on_big_data = client.search(params=params) |
|
results += search_on_big_data["resultats"] |
|
results_df = pd.DataFrame(results) |
|
return results_df |
|
|
|
@cl.step(type="tool") |
|
async def creation_liste_code_Rome(competence): |
|
os.environ['PINECONE_API_KEYROME'] = os.environ['PINECONE_API_KEYROME'] |
|
docsearch = await connexion_catalogue_Rome() |
|
retrieve_comp = docsearch.similarity_search(competence, k=30, filter={"categorie": {"$eq": os.environ['PINECONE_API_KEYROME']}}) |
|
retrieve = pd.DataFrame(retrieve_comp) |
|
codeRome = [] |
|
competence = [] |
|
metier = [] |
|
for i in range(0,len(retrieve_comp)): |
|
codeRome.append(retrieve_comp[i].metadata['code_rome']) |
|
competence.append(retrieve_comp[i].metadata['libelle_competence']) |
|
metier.append(retrieve_comp[i].metadata['libelle_appellation_long']) |
|
|
|
results_df = pd.DataFrame({'codeRome': codeRome,'competence': competence, 'metier': metier}) |
|
arrayresults = results_df.values.tolist() |
|
displayresults = '| Code Rome | Compétence | Métier |\n| -------- | ------- | ------- |' |
|
for j in range(0, len(arrayresults)): |
|
displayresults += '\n| ' + arrayresults[j][0] + ' | ' + arrayresults[j][1] + ' | ' + arrayresults[j][2] + ' |' |
|
|
|
print(arrayresults[0][0] + arrayresults[0][1] + arrayresults[0][2]) |
|
await cl.Message(author="Datapcc : 🌐🌐🌐",content="Voici le résultat de la recherche sémantique sur le catalogue Rome :\n" + displayresults).send() |
|
|
|
results_df = results_df.drop_duplicates(subset=["codeRome"]) |
|
results_df = results_df.head(5) |
|
codeRomeString = results_df["codeRome"].to_string(index = False) |
|
codeRome_list = results_df["codeRome"].tolist() |
|
actionRome = await cl.AskActionMessage( |
|
content="Etes-vous d'accord avec la sélection des 5 codes Rome automatiques issus de la recherche sémantique ? :\n" + codeRomeString.replace(' ',','), |
|
actions=[ |
|
cl.Action(name="continue", value="Offres d'emploi en temps réel", label="✅ Oui, je veux continuer vers l'extraction en temps réel des offres d'emploi"), |
|
cl.Action(name="cancel", value="Saisie des codes Rome", label="❌ Non, je veux saisir ma liste de codes Rome, séparés par des virgules"), |
|
], timeout=3600 |
|
).send() |
|
if actionRome and actionRome.get("name") == "continue": |
|
await cl.Message( |
|
content="Connexion à France Travail, et récupération des offres d'emploi", |
|
).send() |
|
cl.user_session.set("codeRomeArray", codeRome_list) |
|
else: |
|
actionsaisierome = await cl.AskUserMessage(content="Saisissez vos codes Rome dans le prompt? ⚠️ Attention, indiquez seulement des codes Rome séparés par des virgules", timeout=3600).send() |
|
if actionsaisierome: |
|
await cl.Message( |
|
content=f"Votre saisie est : {actionsaisierome['output']}", |
|
).send() |
|
stringCodeRome = actionsaisierome['output'].replace(' ','') |
|
stopWords = [';','.',':','!','|'] |
|
teststringCodeRome = [ele for ele in stopWords if(ele in stringCodeRome)] |
|
teststringCodeRome = bool(teststringCodeRome) |
|
if teststringCodeRome == False: |
|
arrayCodeRome = stringCodeRome.split(',') |
|
else: |
|
arrayCodeRome = codeRome_list |
|
await cl.Message(author="Datapcc : 🌐🌐🌐",content="Votre ssaisie est erronée. Nous continuons l'action avec les codes Rome sélectionnés automatiquement pour vous : " + codeRome_list).send() |
|
cl.user_session.set("codeRomeArray", arrayCodeRome) |
|
|
|
@cl.step(type="tool") |
|
async def connexion_France_Travail(): |
|
client = Api(client_id=os.environ['POLE_EMPLOI_CLIENT_ID'], |
|
client_secret=os.environ['POLE_EMPLOI_CLIENT_SECRET']) |
|
return client |
|
|
|
@cl.step(type="tool") |
|
async def connexion_catalogue_Rome(): |
|
os.environ['PINECONE_API_KEY'] = os.environ['PINECONE_API_KEY'] |
|
os.environ['PINECONE_INDEX_NAME'] = os.environ['PINECONE_INDEX_NAME'] |
|
embeddings = HuggingFaceEmbeddings() |
|
docsearch = PineconeVectorStore.from_existing_index(os.environ['PINECONE_INDEX_NAME'], embeddings) |
|
return docsearch |
|
|
|
@cl.step(type="llm") |
|
async def IA(): |
|
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.environ['HUGGINGFACEHUB_API_TOKEN'] |
|
repo_id = "mistralai/Mistral-7B-Instruct-v0.3" |
|
|
|
llm = HuggingFaceEndpoint( |
|
repo_id=repo_id, max_new_tokens=5000, temperature=1.0, task="text2text-generation", streaming=True |
|
) |
|
return llm |
|
|
|
|
|
@cl.on_settings_update |
|
async def setup_agent(settings): |
|
if not settings['competence'] and not settings['competenceInput']: |
|
await cl.Message( |
|
author="Datapcc : 🌐🌐🌐",content=f"⚠️ Pas de contexte : {settings['competence']}\n⛔ Vous ne pouvez pas élaborer de note sectorielle!" |
|
).send() |
|
elif settings['competence'] and not settings['competenceInput']: |
|
await cl.Message( |
|
author="Datapcc : 🌐🌐🌐",content=f"👍 Changement de contexte : {settings['competence']}" |
|
).send() |
|
competenceList = settings['competence'] |
|
cl.user_session.set("competenceFree", competenceList) |
|
else: |
|
await cl.Message( |
|
author="Datapcc : 🌐🌐🌐",content=f"👍 Changement de contexte : {settings['competenceInput']}" |
|
).send() |
|
competenceList = settings['competenceInput'] |
|
cl.user_session.set("competenceFree", competenceList) |
|
|
|
if not cl.user_session.get("saveMemory"): |
|
cl.user_session.set("saveMemory", "") |
|
|
|
await construction_NCS(competenceList) |
|
|
|
contextChat = cl.user_session.get("contextChatBot") |
|
if not contextChat: |
|
contextChat = "Il n'y a pas de contexte." |
|
|
|
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.environ['HUGGINGFACEHUB_API_TOKEN'] |
|
repo_id = "mistralai/Mistral-7B-Instruct-v0.3" |
|
|
|
model = HuggingFaceEndpoint( |
|
repo_id=repo_id, |
|
max_new_tokens=3600, |
|
temperature=0.5, |
|
streaming=True |
|
) |
|
|
|
memory = cl.user_session.get("memory") |
|
prompt = ChatPromptTemplate.from_messages( |
|
[ |
|
( |
|
"system", |
|
f"Contexte : Vous êtes un spécialiste du marché de l'emploi en fonction du niveau de qualification, des compétences professionnelles, des compétences transversales, du salaire et de l'expérience. Vous êtes doué pour faire des analyses du système travail sur les métiers les plus demandés grâce à votre aptitude à synthétiser les informations en fonction des critères définis ci-avant. En fonction des informations suivantes et du contexte suivant seulement et strictement. Contexte : {contextChat[0:28875]}. Réponds à la question suivante de la manière la plus pertinente, la plus exhaustive et la plus détaillée possible, avec au minimum 3000 tokens jusqu'à 3600 tokens, seulement et strictement dans le contexte et les informations fournies. Essayez donc de comprendre en profondeur le contexte et répondez uniquement en vous basant sur les informations fournies.", |
|
), |
|
MessagesPlaceholder(variable_name="history"), |
|
("human", "{question}, dans le contexte fourni."), |
|
] |
|
) |
|
runnable = ( |
|
RunnablePassthrough.assign( |
|
history=RunnableLambda(memory.load_memory_variables) | itemgetter("history") |
|
) |
|
| prompt |
|
| model |
|
) |
|
cl.user_session.set("runnable", runnable) |
|
|
|
@cl.on_message |
|
async def main(message: cl.Message): |
|
async with cl.Step(root=True, name="Réponse de Mistral", type="llm") as parent_step: |
|
parent_step.input = message.content |
|
chat_profile = cl.user_session.get("chat_profile") |
|
chatProfile = chat_profile.split(' - ') |
|
memory = cl.user_session.get("memory") |
|
runnable = cl.user_session.get("runnable") |
|
msg = cl.Message(author="Datapcc : 🌐🌐🌐",content="") |
|
text_elements = [] |
|
answer = [] |
|
async for chunk in runnable.astream({"question": message.content}, |
|
config=RunnableConfig(callbacks=[cl.AsyncLangchainCallbackHandler(stream_final_answer=True)])): |
|
await parent_step.stream_token(chunk) |
|
await msg.stream_token(chunk) |
|
|
|
QA_context_name = f"Question-réponse sur le contexte" |
|
text_elements.append( |
|
cl.Text(content="Question : " + message.content + "\n\nRéponse :\n" + msg.content, name=QA_context_name) |
|
) |
|
actions = [ |
|
cl.Action(name="download", value="Question : " + message.content + "\n\nRéponse : " + msg.content, description="download_QA_emplois") |
|
] |
|
await cl.Message(author="Datapcc : 🌐🌐🌐",content="Download", actions=actions).send() |
|
await cl.sleep(2) |
|
saves = [ |
|
cl.Action(name="saveToMemory", value="Question : " + message.content + "\n\nRéponse : " + msg.content, description="Mettre en mémoire la réponse à votre requête") |
|
] |
|
await cl.Message(author="Datapcc : 🌐🌐🌐",content="Mettre en mémoire la réponse à votre requête", actions=saves).send() |
|
await cl.sleep(2) |
|
memories = [ |
|
cl.Action(name="download", value=cl.user_session.get('saveMemory'), description="download_referentiel") |
|
] |
|
await cl.Message(author="Datapcc : 🌐🌐🌐",content="Télécharger la mise en mémoire de vos fiches", actions=memories).send() |
|
await cl.sleep(1.5) |
|
await cl.Message(author="Datapcc : 🌐🌐🌐",content="Contexte : " + QA_context_name, elements=text_elements).send() |
|
|
|
memory.chat_memory.add_user_message(message.content) |
|
memory.chat_memory.add_ai_message(msg.content) |