|
import os |
|
import time |
|
from operator import itemgetter |
|
from collections import Counter |
|
from langchain_community.document_loaders import PyPDFLoader, TextLoader |
|
from chainlit.types import AskFileResponse |
|
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter |
|
from langchain.schema.runnable import Runnable, RunnablePassthrough, RunnableLambda |
|
from langchain.schema.runnable.config import RunnableConfig |
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
from langchain.chains import ConversationalRetrievalChain, create_extraction_chain |
|
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler |
|
from langchain_community.llms import HuggingFaceEndpoint |
|
from langchain.chains import LLMChain |
|
from langchain_core.prompts import PromptTemplate |
|
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder |
|
from langchain.schema import StrOutputParser |
|
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT |
|
from langchain.chains.question_answering import load_qa_chain |
|
from langchain.chains.qa_with_sources import load_qa_with_sources_chain |
|
from langchain_pinecone import PineconeVectorStore |
|
from pinecone import Pinecone |
|
from langchain.memory import ChatMessageHistory, ConversationBufferMemory |
|
import pandas as pd |
|
import numpy as np |
|
import chainlit as cl |
|
from chainlit.input_widget import Select, TextInput |
|
from chainlit import user_session |
|
from homeskills import homeRome, homeEsco, homeRne |
|
from offres_emploi import Api |
|
from offres_emploi.utils import dt_to_str_iso |
|
import datetime |
|
import plotly.express as px |
|
import bcrypt |
|
import ast |
|
import json |
|
import requests |
|
import http.client |
|
from bs4 import BeautifulSoup |
|
from literalai import LiteralClient |
|
literal_client = LiteralClient(api_key=os.getenv("LITERAL_API_KEY")) |
|
literal_client.instrument_openai() |
|
|
|
@cl.password_auth_callback |
|
def auth_callback(username: str, password: str): |
|
auth = json.loads(os.environ['CHAINLIT_AUTH_LOGIN']) |
|
ident = next(d['ident'] for d in auth if d['ident'] == username) |
|
pwd = next(d['pwd'] for d in auth if d['ident'] == username) |
|
resultLogAdmin = bcrypt.checkpw(username.encode('utf-8'), bcrypt.hashpw(ident.encode('utf-8'), bcrypt.gensalt())) |
|
resultPwdAdmin = bcrypt.checkpw(password.encode('utf-8'), bcrypt.hashpw(pwd.encode('utf-8'), bcrypt.gensalt())) |
|
resultRole = next(d['role'] for d in auth if d['ident'] == username) |
|
if resultLogAdmin and resultPwdAdmin and resultRole == "admindatapcc": |
|
return cl.User( |
|
identifier=ident + " : 🧑💼 Admin Datapcc", metadata={"role": "admin", "provider": "credentials"} |
|
) |
|
elif resultLogAdmin and resultPwdAdmin and resultRole == "userdatapcc": |
|
return cl.User( |
|
identifier=ident + " : 🧑🎓 User Datapcc", metadata={"role": "user", "provider": "credentials"} |
|
) |
|
|
|
def process_file(file: AskFileResponse): |
|
if file.type == "text/plain": |
|
Loader = TextLoader |
|
elif file.type == "application/pdf": |
|
Loader = PyPDFLoader |
|
|
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) |
|
loader = Loader(file.path) |
|
documents = loader.load() |
|
docs = text_splitter.split_documents(documents) |
|
|
|
return docs |
|
|
|
def removeTags(all): |
|
for data in all(['style', 'script']): |
|
data.decompose() |
|
return ' '.join(all.stripped_strings) |
|
|
|
def localisation(): |
|
ListCentroids = [ |
|
{ "ID": "01", "Longitude": 5.3245259, "Latitude":46.0666003 }, |
|
{ "ID": "02", "Longitude": 3.5960246, "Latitude": 49.5519632 }, |
|
{ "ID": "03", "Longitude": 3.065278, "Latitude": 46.4002783 }, |
|
{ "ID": "04", "Longitude": 6.2237688, "Latitude": 44.1105837 }, |
|
{ "ID": "05", "Longitude": 6.2018836, "Latitude": 44.6630487 }, |
|
{ "ID": "06", "Longitude": 7.0755745, "Latitude":43.9463082 }, |
|
{ "ID": "07", "Longitude": 4.3497308, "Latitude": 44.7626044 }, |
|
{ "ID": "08", "Longitude": 4.6234893, "Latitude": 49.6473884 }, |
|
{ "ID": "09", "Longitude": 1.6037147, "Latitude": 42.9696091 }, |
|
{ "ID": "10", "Longitude": 4.1394954, "Latitude": 48.2963286 }, |
|
{ "ID": "11", "Longitude": 2.3140163, "Latitude": 43.1111427 }, |
|
{ "ID": "12", "Longitude": 2.7365234, "Latitude": 44.2786323 }, |
|
{ "ID": "13", "Longitude": 5.0515492, "Latitude": 43.5539098 }, |
|
{ "ID": "14", "Longitude": -0.3930779, "Latitude": 49.1024215 }, |
|
{ "ID": "15", "Longitude": 2.6367657, "Latitude": 44.9643217 }, |
|
{ "ID": "16", "Longitude": 0.180475, "Latitude": 45.706264 }, |
|
{ "ID": "17", "Longitude": -0.7082589, "Latitude": 45.7629699 }, |
|
{ "ID": "18", "Longitude": 2.5292424, "Latitude": 47.0926687 }, |
|
{ "ID": "19", "Longitude": 1.8841811, "Latitude": 45.3622055 }, |
|
{ "ID": "2A", "Longitude": 8.9906834, "Latitude": 41.8619761 }, |
|
{ "ID": "2B", "Longitude": 9.275489, "Latitude": 42.372014 }, |
|
{ "ID": "21", "Longitude": 4.7870471, "Latitude": 47.4736746 }, |
|
{ "ID": "22", "Longitude": -2.9227591, "Latitude": 48.408402 }, |
|
{ "ID": "23", "Longitude": 2.0265508, "Latitude": 46.0837382 }, |
|
{ "ID": "24", "Longitude": 0.7140145, "Latitude": 45.1489678 }, |
|
{ "ID": "25", "Longitude": 6.3991355, "Latitude": 47.1879451 }, |
|
{ "ID": "26", "Longitude": 5.1717552, "Latitude": 44.8055408 }, |
|
{ "ID": "27", "Longitude": 0.9488116, "Latitude": 49.1460288 }, |
|
{ "ID": "28", "Longitude": 1.2793491, "Latitude": 48.3330017 }, |
|
{ "ID": "29", "Longitude": -4.1577074, "Latitude": 48.2869945 }, |
|
{ "ID": "30", "Longitude": 4.2650329, "Latitude": 43.9636468 }, |
|
{ "ID": "31", "Longitude": 1.2728958, "Latitude": 43.3671081 }, |
|
{ "ID": "32", "Longitude": 0.4220039, "Latitude": 43.657141 }, |
|
{ "ID": "33", "Longitude": -0.5760716, "Latitude": 44.8406068 }, |
|
{ "ID": "34", "Longitude": 3.4197556, "Latitude": 43.62585 }, |
|
{ "ID": "35", "Longitude": -1.6443812, "Latitude": 48.1801254 }, |
|
{ "ID": "36", "Longitude": 1.6509938, "Latitude": 46.7964222 }, |
|
{ "ID": "37", "Longitude": 0.7085619, "Latitude": 47.2802601 }, |
|
{ "ID": "38", "Longitude": 5.6230772, "Latitude": 45.259805 }, |
|
{ "ID": "39", "Longitude": 5.612871, "Latitude": 46.7398138 }, |
|
{ "ID": "40", "Longitude": -0.8771738, "Latitude": 44.0161251 }, |
|
{ "ID": "41", "Longitude": 1.3989178, "Latitude": 47.5866519 }, |
|
{ "ID": "42", "Longitude": 4.2262355, "Latitude": 45.7451186 }, |
|
{ "ID": "43", "Longitude": 3.8118151, "Latitude": 45.1473029 }, |
|
{ "ID": "44", "Longitude": -1.7642949, "Latitude": 47.4616509 }, |
|
{ "ID": "45", "Longitude": 2.2372695, "Latitude": 47.8631395 }, |
|
{ "ID": "46", "Longitude": 1.5732157, "Latitude": 44.6529284 }, |
|
{ "ID": "47", "Longitude": 0.4788052, "Latitude": 44.4027215 }, |
|
{ "ID": "48", "Longitude": 3.4991239, "Latitude": 44.5191573 }, |
|
{ "ID": "49", "Longitude": -0.5136056, "Latitude": 47.3945201 }, |
|
{ "ID": "50", "Longitude": -1.3203134, "Latitude": 49.0162072 }, |
|
{ "ID": "51", "Longitude": 4.2966555, "Latitude": 48.9479636 }, |
|
{ "ID": "52", "Longitude": 5.1325796, "Latitude": 48.1077196 }, |
|
{ "ID": "53", "Longitude": -0.7073921, "Latitude": 48.1225795 }, |
|
{ "ID": "54", "Longitude": 6.144792, "Latitude": 48.7995163 }, |
|
{ "ID": "55", "Longitude": 5.2888292, "Latitude": 49.0074545 }, |
|
{ "ID": "56", "Longitude": -2.8746938, "Latitude": 47.9239486 }, |
|
{ "ID": "57", "Longitude": 6.5610683, "Latitude": 49.0399233 }, |
|
{ "ID": "58", "Longitude": 3.5544332, "Latitude": 47.1122301 }, |
|
{ "ID": "59", "Longitude": 3.2466616, "Latitude": 50.4765414 }, |
|
{ "ID": "60", "Longitude": 2.4161734, "Latitude": 49.3852913 }, |
|
{ "ID": "61", "Longitude": 0.2248368, "Latitude": 48.5558919 }, |
|
{ "ID": "62", "Longitude": 2.2555152, "Latitude": 50.4646795 }, |
|
{ "ID": "63", "Longitude": 3.1322144, "Latitude": 45.7471805 }, |
|
{ "ID": "64", "Longitude": -0.793633, "Latitude": 43.3390984 }, |
|
{ "ID": "65", "Longitude": 0.1478724, "Latitude": 43.0526238 }, |
|
{ "ID": "66", "Longitude": 2.5239855, "Latitude": 42.5825094 }, |
|
{ "ID": "67", "Longitude": 7.5962225, "Latitude": 48.662515 }, |
|
{ "ID": "68", "Longitude": 7.2656284, "Latitude": 47.8586205 }, |
|
{ "ID": "69", "Longitude": 4.6859896, "Latitude": 45.8714754 }, |
|
{ "ID": "70", "Longitude": 6.1388571, "Latitude": 47.5904191 }, |
|
{ "ID": "71", "Longitude": 4.6394021, "Latitude": 46.5951234 }, |
|
{ "ID": "72", "Longitude": 0.1947322, "Latitude": 48.0041421 }, |
|
{ "ID": "73", "Longitude": 6.4662232, "Latitude": 45.4956055 }, |
|
{ "ID": "74", "Longitude": 6.3609606, "Latitude": 46.1045902 }, |
|
{ "ID": "75", "Longitude": 2.3416082, "Latitude": 48.8626759 }, |
|
{ "ID": "76", "Longitude": 1.025579, "Latitude": 49.6862911 }, |
|
{ "ID": "77", "Longitude": 2.8977309, "Latitude": 48.5957831 }, |
|
{ "ID": "78", "Longitude": 1.8080138, "Latitude": 48.7831982 }, |
|
{ "ID": "79", "Longitude": -0.3159014, "Latitude": 46.5490257 }, |
|
{ "ID": "80", "Longitude": 2.3380595, "Latitude": 49.9783317 }, |
|
{ "ID": "81", "Longitude": 2.2072751, "Latitude": 43.8524305 }, |
|
{ "ID": "82", "Longitude": 1.2649374, "Latitude": 44.1254902 }, |
|
{ "ID": "83", "Longitude": 6.1486127, "Latitude": 43.5007903 }, |
|
{ "ID": "84", "Longitude": 5.065418, "Latitude": 44.0001599 }, |
|
{ "ID": "85", "Longitude": -1.3956692, "Latitude": 46.5929102 }, |
|
{ "ID": "86", "Longitude": 0.4953679, "Latitude": 46.5719095 }, |
|
{ "ID": "87", "Longitude": 1.2500647, "Latitude": 45.9018644 }, |
|
{ "ID": "88", "Longitude": 6.349702, "Latitude": 48.1770451 }, |
|
{ "ID": "89", "Longitude": 3.5634078, "Latitude": 47.8474664 }, |
|
{ "ID": "90", "Longitude": 6.9498114, "Latitude": 47.6184394 }, |
|
{ "ID": "91", "Longitude": 2.2714555, "Latitude": 48.5203114 }, |
|
{ "ID": "92", "Longitude": 2.2407148, "Latitude": 48.835321 }, |
|
{ "ID": "93", "Longitude": 2.4811577, "Latitude": 48.9008719 }, |
|
{ "ID": "94", "Longitude": 2.4549766, "Latitude": 48.7832368 }, |
|
{ "ID": "95", "Longitude": 2.1802056, "Latitude": 49.076488 }, |
|
{ "ID": "974", "Longitude": 55.536384, "Latitude": -21.115141 }, |
|
{ "ID": "973", "Longitude": -53.125782, "Latitude": 3.933889 }, |
|
{ "ID": "972", "Longitude": -61.024174, "Latitude": 14.641528 }, |
|
{ "ID": "971", "Longitude": -61.551, "Latitude": 16.265 } |
|
] |
|
|
|
return ListCentroids |
|
|
|
def plotDemandeur(dataframe, coderome): |
|
df = dataframe.sort_values(by=['Indicateur']) |
|
fig_demandeur = px.histogram(df, x='Indicateur', y='Valeur', height=1000, title="Demandeurs d'emploi et offres d'emploi du code ROME : " + coderome, color='Indicateur', labels={'Valeur':'Nombre'}, text_auto=True).update_layout(font=dict(size=9,color="RebeccaPurple"),autosize=True) |
|
return fig_demandeur |
|
|
|
def plotSalaire(dataframe): |
|
df = dataframe.sort_values(by=['salaire']) |
|
fig_demandeur = px.histogram(df, x='emploi', y='salaire', barmode='group', title="Salaires médians", color='categorie', text_auto=True).update_layout(font=dict(size=9,color="RebeccaPurple"),autosize=True) |
|
return fig_demandeur |
|
|
|
def plotDifficulte(dataframe): |
|
if len(dataframe) == 0: |
|
title = "Aucune donnée difficulté de recrutement renseignée!" |
|
else: |
|
title = "Difficulté de recrutement" |
|
df = dataframe.sort_values(by=['Valeur']) |
|
fig_demandeur = px.histogram(df, x='Indicateur', y='Valeur', title=title, color='Indicateur', labels={'Valeur':'Pourcentage'}, text_auto=True).update_layout(font=dict(size=9,color="RebeccaPurple"),autosize=True) |
|
return fig_demandeur |
|
|
|
def plotRepartition(dataframe,title): |
|
df = dataframe.sort_values(by=['Valeur']) |
|
fig_repartition = px.pie(df, names='Indicateur', values='Valeur', color='Indicateur', title=title, labels={'Valeur':'pourcentage'}, color_discrete_sequence=px.colors.qualitative.Safe).update_traces(textposition='inside', textinfo='percent+label').update_layout(font=dict(size=10,color="RebeccaPurple")) |
|
return fig_repartition |
|
|
|
def removeTags(all): |
|
for data in all(['style', 'script']): |
|
data.decompose() |
|
return ''.join(all.stripped_strings) |
|
|
|
def htmlToDataframe(htmlTable): |
|
data = [] |
|
list_header = [] |
|
soup = BeautifulSoup(htmlTable,'html.parser') |
|
header = soup.find_all("table")[0].find("tr") |
|
for items in header: |
|
try: |
|
list_header.append(items.get_text()) |
|
except: |
|
continue |
|
HTML_data = soup.find_all("table")[0].find_all("tr")[1:] |
|
for element in HTML_data: |
|
sub_data = [] |
|
for sub_element in element: |
|
try: |
|
sub_data.append(sub_element.get_text()) |
|
except: |
|
continue |
|
data.append(sub_data) |
|
dataFrame = pd.DataFrame(data = data, columns = list_header) |
|
return dataFrame |
|
|
|
def datavisualisation_chiffres_cles_emplois(url): |
|
response = requests.get(url) |
|
soup = BeautifulSoup(response.text, "lxml") |
|
|
|
alldemandeurs = '' |
|
allsalaires = '' |
|
alldifficultes = '' |
|
allrepartitions = '' |
|
allentreprises = '' |
|
allembauches = soup.select('p.population_category') |
|
allnumembauchesfirst = soup.select('p.population_main-num.data') |
|
allnumembauches = removeTags(allnumembauchesfirst[0]).split('\xa0') |
|
allnumembauches = ''.join(allnumembauches) |
|
allnumoffres = removeTags(allnumembauchesfirst[1]).split('\xa0') |
|
allnumoffres = ''.join(allnumoffres) |
|
alldetailembauches = soup.select('p.hiring_text.ng-star-inserted') |
|
allnumevolutionembauches = soup.select('p.main.ng-star-inserted') |
|
alldetailevolutionembauches = soup.select('p.population_bubble-title') |
|
alldemandeurs = "<table><tr><td>Indicateur</td><td>Valeur</td></tr><tr><td>" + removeTags(allembauches[0]) + " (" + removeTags(alldetailembauches[0]) + ");" |
|
if len(alldetailevolutionembauches) >= 1 and len(allnumevolutionembauches) >= 1: |
|
alldemandeurs += "\nÉvolution demandeurs d'emploi (" + removeTags(alldetailevolutionembauches[0]) + ": " + removeTags(allnumevolutionembauches[0]) + ")</td>" |
|
else: |
|
alldemandeurs += "</td>" |
|
alldemandeurs += "<td>" + allnumembauches + "</td></tr>" |
|
alldemandeurs += "<tr><td>" + removeTags(allembauches[1]) + " (" + removeTags(alldetailembauches[1]) + ");" |
|
if len(alldetailevolutionembauches) >= 2 and len(allnumevolutionembauches) >= 2: |
|
alldemandeurs += "\nÉvolution offres d'emploi (" + removeTags(alldetailevolutionembauches[1]) + ": " + removeTags(allnumevolutionembauches[1]) + ")</td>" |
|
else: |
|
alldemandeurs += "</td>" |
|
alldemandeurs += "<td>" + allnumoffres + "</td></tr>" |
|
alldemandeurs += "</table>" |
|
|
|
allFAP = soup.select('tr.sectorTable__line.ng-star-inserted') |
|
allcategorie = soup.select('td.sectorTable__cell') |
|
alltypesalaires = soup.select('th.sectorTable__cell') |
|
allFAPsalaires = soup.select('p.sectorTable__cellValue') |
|
if len(allFAPsalaires) >= 3: |
|
allsalaires = "<table><tr><td>categorie</td><td>emploi</td><td>salaire</td></tr>" |
|
allsalaires += "<tr><td>" + removeTags(alltypesalaires[1]) + "</td><td>" + removeTags(allcategorie[0]) + "</td><td>" + removeTags(allFAPsalaires[0]).replace('\xa0','').replace(' ','').replace('€','') + "</td></tr>" |
|
allsalaires += "<tr><td>" + removeTags(alltypesalaires[2]) + "</td><td>" + removeTags(allcategorie[0]) + "</td><td>" + removeTags(allFAPsalaires[1]).replace('\xa0','').replace(' ','').replace('€','') + "</td></tr>" |
|
allsalaires += "<tr><td>" + removeTags(alltypesalaires[3]) + "</td><td>" + removeTags(allcategorie[0]) + "</td><td>" + removeTags(allFAPsalaires[2]).replace('\xa0','').replace(' ','').replace('€','') + "</td></tr>" |
|
if len(allFAP) >= 2 and len(allFAPsalaires) == 6: |
|
allsalaires += "<tr><td>" + removeTags(alltypesalaires[1]) + "</td><td>" + removeTags(allcategorie[4]) + "</td><td>" + removeTags(allFAPsalaires[3]).replace('\xa0','').replace(' ','').replace('€','') + "</td></tr>" |
|
allsalaires += "<tr><td>" + removeTags(alltypesalaires[2]) + "</td><td>" + removeTags(allcategorie[4]) + "</td><td>" + removeTags(allFAPsalaires[4]).replace('\xa0','').replace(' ','').replace('€','') + "</td></tr>" |
|
allsalaires += "<tr><td>" + removeTags(alltypesalaires[3]) + "</td><td>" + removeTags(allcategorie[4]) + "</td><td>" + removeTags(allFAPsalaires[5]).replace('\xa0','').replace(' ','').replace('€','') + "</td></tr>" |
|
allsalaires += "</table>" |
|
|
|
alltypedifficultes = soup.select('.tabs-main-content_persp-col2-bar.ng-star-inserted') |
|
alldifficulte = soup.select('p.horizontal-graph_title') |
|
allpcdifficulte = soup.select('div.horizontal-graph_data') |
|
alldifficultes = "<table><tr><td>Indicateur</td><td>Valeur</td></tr>" |
|
for i in range(0,len(alltypedifficultes)): |
|
alldifficultes += "<tr><td>" + removeTags(alldifficulte[i]) + "</td><td>" + removeTags(allpcdifficulte[i]).replace('Pour le territoire principal FRANCE pour les ' + removeTags(alldifficulte[i]),'').replace('%','') + "</td></tr>" |
|
alldifficultes += "</table>" |
|
|
|
alltyperepartitions = soup.select('div.hiring-contract_legende_item.ng-star-inserted') |
|
allrepartition = soup.select('p.hiring-contract_legende_item_label') |
|
allpcrepartition = soup.select('span.hiring-contract_legende_item-first') |
|
allrepartitions = "<table><tr><td>Indicateur</td><td>Valeur</td></tr>" |
|
for i in range(0,len(alltyperepartitions)): |
|
allrepartitions += "<tr><td>" + removeTags(allrepartition[i]).replace('(' + removeTags(allpcrepartition[i]) + ')','') + "</td><td>" + removeTags(allpcrepartition[i]).replace('%','').replace(',','.') + "</td></tr>" |
|
allrepartitions += "</table>" |
|
|
|
allentrepriserepartitions = soup.select('div.horizontal-graph_pattern.sm-bubble_wrapper > span') |
|
allentreprise = soup.select('span.sr-only') |
|
allpcentreprise = soup.select('span.data.ng-star-inserted') |
|
allentreprises = "<table><tr><td>Indicateur</td><td>Valeur</td></tr>" |
|
for i in range(0,len(allentrepriserepartitions)): |
|
allentreprises += "<tr><td>" + removeTags(allentrepriserepartitions[i])[0:-4] + "</td><td>" + removeTags(allentrepriserepartitions[i])[-4:].replace('%','').replace(',','.') + "</td></tr>" |
|
allentreprises += "</table>" |
|
|
|
return [alldemandeurs, allsalaires, alldifficultes, allrepartitions, allentreprises] |
|
|
|
def listToString(list): |
|
return str(list) |
|
|
|
def arrayOfSecteur(array): |
|
df_naf = pd.read_csv("./public/secteur_naf_v01.csv") |
|
df_naf = df_naf[['libelle_secteur_naf', 'code_rome']].copy() |
|
df_naf = df_naf.groupby(df_naf['code_rome'], as_index=False).agg({'libelle_secteur_naf': '; '.join}) |
|
|
|
df_rome = pd.DataFrame({'code_rome': array}) |
|
|
|
df_romeNaf = pd.merge(df_rome, df_naf, on="code_rome") |
|
df_secteur = df_romeNaf[['libelle_secteur_naf']].copy() |
|
return df_secteur.values.tolist() |
|
|
|
def arrayToString(array): |
|
arrayList = [] |
|
|
|
for i in range(0,len(array)): |
|
if listToString(array[i]).find("libelle")!=-1: |
|
arrayList.append(array[i]['libelle']) |
|
else: |
|
arrayList.append("; ") |
|
string = ', '.join(arrayList) |
|
return string + '; ' |
|
|
|
def searchByRome(rome,index): |
|
libelle = '' |
|
if rome.find(',') != -1: |
|
romeArray = rome.split(',') |
|
for i in range(0,len(romeArray)): |
|
codeRome = romeArray[i].strip() |
|
if i <= 5 and len(codeRome) == 5: |
|
all_docs = index.query( |
|
top_k=1, |
|
vector= [0] * 768, |
|
namespace='', |
|
filter={"categorie": {"$eq": "rome"}, "rome":{"$eq": codeRome}}, |
|
include_metadata=True |
|
) |
|
libelle = libelle + " " + all_docs['matches'][0]['metadata']['libelle_rome'] |
|
else: |
|
all_docs = index.query( |
|
top_k=1, |
|
vector= [0] * 768, |
|
namespace='', |
|
filter={"categorie": {"$eq": "rome"}, "rome":{"$eq": rome}}, |
|
include_metadata=True |
|
) |
|
libelle = libelle + " " + all_docs['matches'][0]['metadata']['libelle_rome'] |
|
return libelle |
|
|
|
@cl.author_rename |
|
def rename(orig_author: str): |
|
rename_dict = {"ConversationalRetrievalChain": "💬 Assistant conversationnel", "Retriever": "Agent conversationnel", "StuffDocumentsChain": "Chaîne de documents", "LLMChain": "Agent", "HuggingFaceEndpoint": "Mistral AI 🤖"} |
|
return rename_dict.get(orig_author, orig_author) |
|
|
|
@cl.action_callback("listOfEntreprises") |
|
async def on_action(action): |
|
romeListArray = ast.literal_eval(action.value) |
|
stringLsitOfEntreprise = await creation_liste_entreprises(arrayOfSecteur(romeListArray)) |
|
await cl.Message( |
|
author="Datapcc : 🌐🌐🌐", content=stringLsitOfEntreprise |
|
).send() |
|
|
|
@cl.action_callback("datavizChiffresClesMetiers") |
|
async def on_action(action): |
|
romeListArray = ast.literal_eval(action.value) |
|
elements = [] |
|
for j in range(0, len(romeListArray)): |
|
table = datavisualisation_chiffres_cles_emplois("https://dataemploi.pole-emploi.fr/metier/chiffres-cles/NAT/FR/" + romeListArray[j]) |
|
plot_demandeur = plotDemandeur(htmlToDataframe(table[0]), romeListArray[j]) |
|
elements.append(cl.Plotly(name="chart_demandeur", figure=plot_demandeur, display="inline", size="large")) |
|
if len(table[1]) > 0: |
|
plot_salaire = plotSalaire(htmlToDataframe(table[1])) |
|
elements.append(cl.Plotly(name="chart_salaire", figure=plot_salaire, display="inline", size="large")) |
|
plot_difficulte = plotDifficulte(htmlToDataframe(table[2])) |
|
elements.append(cl.Plotly(name="chart_difficulte", figure=plot_difficulte, display="inline", size="large")) |
|
plot_repartitionContrat = plotRepartition(htmlToDataframe(table[3]), "Répartition des embauches du métier : type de contrat") |
|
elements.append(cl.Plotly(name="chart_repatitionContrat", figure=plot_repartitionContrat, display="inline", size="large")) |
|
plot_repartitionEntreprise = plotRepartition(htmlToDataframe(table[4]), "Répartition des embauches du métier : type entreprise") |
|
elements.append(cl.Plotly(name="chart_repartitionEntreprise", figure=plot_repartitionEntreprise, display="inline", size="large")) |
|
|
|
await cl.Message(content="Datavisualisation des chiffres clés des Métiers", elements=elements).send() |
|
|
|
|
|
@cl.action_callback("download") |
|
async def on_action(action): |
|
content = [] |
|
content.append(action.value) |
|
arrayContent = np.array(content) |
|
df = pd.DataFrame(arrayContent) |
|
with open('./' + action.description + '.txt', 'wb') as csv_file: |
|
df.to_csv(path_or_buf=csv_file, index=False,header=False, encoding='utf-8') |
|
elements = [ |
|
cl.File( |
|
name= action.description + ".txt", |
|
path="./" + action.description + ".txt", |
|
display="inline", |
|
), |
|
] |
|
await cl.Message( |
|
author="Datapcc : 🌐🌐🌐", content="[Lien] 🔗", elements=elements |
|
).send() |
|
await action.remove() |
|
|
|
@cl.action_callback("saveMemory") |
|
async def on_action(action): |
|
buffer = cl.user_session.get("saveMemory") |
|
cl.user_session.set("saveMemory", buffer + action.value) |
|
await cl.Message( |
|
author="Datapcc : 🌐🌐🌐", content="🗃️ Document sauvegardé dans le buffer Memory!" |
|
).send() |
|
await action.remove() |
|
|
|
@cl.cache |
|
def to_cache(file): |
|
|
|
return "https://cipen.univ-gustave-eiffel.fr/fileadmin/CIPEN/datas/assets/docs/" + file + ".csv" |
|
|
|
|
|
@cl.set_chat_profiles |
|
async def chat_profile(): |
|
return [ |
|
cl.ChatProfile(name="Catalogue ROME - ROMESKILLS",markdown_description="Les compétences du catalogue ROME",icon="./public/favicon.png",), |
|
cl.ChatProfile(name="Classification ESCO - ESCOSKILLS",markdown_description="Les compétences de la classification ESCO",icon="./public/favicon.png",), |
|
] |
|
@cl.on_chat_start |
|
async def start(): |
|
await cl.Avatar( |
|
name="You", |
|
path="./public/logo-ofipe.jpg", |
|
).send() |
|
chat_profile = cl.user_session.get("chat_profile") |
|
chatProfile = chat_profile.split(' - ') |
|
|
|
if chatProfile[1] == 'ROMESKILLS': |
|
contextChat = await homeRome() |
|
categorie = cl.user_session.set("categorie", os.environ['PINECONE_API_KEYROME']) |
|
elif chatProfile[1] == 'ESCOSKILLS': |
|
contextChat = await homeEsco() |
|
categorie = cl.user_session.set("categorie", os.environ['PINECONE_API_KEYESCO']) |
|
elif chatProfile[1] == 'RNE': |
|
categorie = cl.user_session.set("categorie", os.environ['PINECONE_API_KEYESCO']) |
|
contextChat = await homeRne() |
|
|
|
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.environ['HUGGINGFACEHUB_API_TOKEN'] |
|
repo_id = "mistralai/Mistral-7B-Instruct-v0.3" |
|
|
|
model = HuggingFaceEndpoint( |
|
repo_id=repo_id, |
|
max_new_tokens=6000, |
|
temperature=1.0, |
|
streaming=True |
|
) |
|
if not cl.user_session.get("saveMemory"): |
|
cl.user_session.set("saveMemory", "") |
|
cl.user_session.set("memory", ConversationBufferMemory(return_messages=True)) |
|
memory = cl.user_session.get("memory") |
|
prompt = ChatPromptTemplate.from_messages( |
|
[ |
|
( |
|
"system", |
|
f"Contexte : Vous êtes un spécialiste du marché de l'emploi en fonction du niveau de qualification, des compétences professionnelles, des compétences transversales, du salaire et de l'expérience. Vous êtes doué pour faire des analyses du système travail sur les métiers les plus demandés grâce à votre aptitude à synthétiser les informations en fonction des critères définis ci-avant. En fonction des informations suivantes et du contexte suivant seulement et strictement. Contexte : {contextChat[0:26500]}. Réponds à la question suivante de la manière la plus pertinente, la plus exhaustive et la plus détaillée possible, avec au minimum 3000 tokens jusqu'à 4000 tokens, seulement et strictement dans le contexte et les informations fournies. Essayez donc de comprendre en profondeur le contexte et répondez uniquement en vous basant sur les informations fournies.", |
|
), |
|
MessagesPlaceholder(variable_name="history"), |
|
("human", "{question}, dans le contexte fourni."), |
|
] |
|
) |
|
runnable = ( |
|
RunnablePassthrough.assign( |
|
history=RunnableLambda(memory.load_memory_variables) | itemgetter("history") |
|
) |
|
| prompt |
|
| model |
|
) |
|
cl.user_session.set("runnable", runnable) |
|
|
|
@literal_client.step(type="run") |
|
async def construction_NCS(competenceList, chatProfile): |
|
context = await contexte(competenceList, chatProfile) |
|
emploisST = context.to_string(index = False) |
|
if chatProfile == 'ROMESKILLS': |
|
romeListArray = cl.user_session.get("codeRomeArray") |
|
ficheClesMetier = await document_chiffres_cles_emplois("https://dataemploi.francetravail.fr/metier/chiffres-cles/NAT/FR/", romeListArray) |
|
contentChatBot = str(emploisST).replace('[','').replace(']','').replace('{','').replace('}','') + ficheClesMetier |
|
finals_df = context[['intitule','typeContratLibelle','experienceLibelle','competences','description','qualitesProfessionnelles','salaire','lieuTravail','formations']].copy() |
|
listEmplois = finals_df.values.tolist() |
|
stringEmplois = '' |
|
for i in range(0,len(listEmplois)): |
|
stringEmplois += "\n✔️ Emploi : " + str(listEmplois[i][0]) + ";\n◉ Contrat : " + str(listEmplois[i][1]) + ";\n◉ Compétences professionnelles : " + str(listEmplois[i][3]) + ";\n" + "◉ Salaire : " + str(listEmplois[i][6]) + ";\n◉ Qualification : " + str(listEmplois[i][5]).replace("'libelle'","\n• 'libelle") + ";\n◉ Localisation : " + str(listEmplois[i][7]) + ";\n◉ Expérience : " + str(listEmplois[i][2]) + ";\n◉ Niveau de qualification : " + str(listEmplois[i][8]) + ";\n◉ Description de l'emploi : " + str(listEmplois[i][4]) + "\n" |
|
await cl.sleep(1) |
|
listEmplois_name = f"Liste des emplois" |
|
text_elements = [] |
|
text_elements.append( |
|
cl.Text(content="Question : " + competenceList + "\n\nRéponse :\n" + stringEmplois.replace('[','').replace(']','').replace('{','').replace('}','').replace("'code'","\n• 'code'"), name=listEmplois_name) |
|
) |
|
await cl.Message(author="Datapcc : 🌐🌐🌐",content="👨💼 Source France Travail : " + listEmplois_name, elements=text_elements).send() |
|
await cl.sleep(1) |
|
listClesMetier_name = f"Chiffres clés des emplois" |
|
text_ClesMetier = [] |
|
text_ClesMetier.append( |
|
cl.Text(content="Question : " + competenceList + "\n\nRéponse :\n" + ficheClesMetier, name=listClesMetier_name) |
|
) |
|
await cl.Message(author="Datapcc : 🌐🌐🌐",content="📈 Source France Travail : " + listClesMetier_name, elements=text_ClesMetier).send() |
|
await cl.sleep(1) |
|
listOfEntreprises = [ |
|
cl.Action(name="listOfEntreprises", value=str(romeListArray), description="Afficher la liste des entreprises par code secteur d'actvités") |
|
] |
|
await cl.Message(author="Datapcc : 🌐🌐🌐",content="🏭 Afficher la liste des entreprises par code secteur d'actvités", actions=listOfEntreprises).send() |
|
await cl.sleep(1) |
|
datavizChiffresClesMetiers = [ |
|
cl.Action(name="datavizChiffresClesMetiers", value=str(romeListArray), description="Afficher la datavisualisation des chiffres clés des métiers") |
|
] |
|
await cl.Message(author="Datapcc : 🌐🌐🌐",content="📊 Afficher la datavisualisation des chiffres clés des métiers", actions=datavizChiffresClesMetiers).send() |
|
await cl.sleep(1) |
|
codeArray = romeListArray |
|
ficheMetiers = [] |
|
for i in range(0,len(codeArray)): |
|
ficheMetiers = [ |
|
cl.File(name= "Fiche métier " + codeArray[i],url="https://www.soi-tc.fr/assets/fiches_pe/FEM_" + codeArray[i] + ".pdf",display="inline",) |
|
] |
|
await cl.Message( |
|
author="Datapcc : 🌐🌐🌐", content="[Fiches métiers] 🔗", elements=ficheMetiers |
|
).send() |
|
else: |
|
contentChatBot = str(emploisST).replace('[','').replace(']','').replace('{','').replace('}','') |
|
finals_df = context[['intitule','typeContratLibelle','experienceLibelle','competences','description','qualitesProfessionnelles','salaire','lieuTravail','formations']].copy() |
|
listEmplois = finals_df.values.tolist() |
|
stringEmplois = '' |
|
for i in range(0,len(listEmplois)): |
|
stringEmplois += "\n✔️ Emploi : " + str(listEmplois[i][0]) + ";\n◉ Contrat : " + str(listEmplois[i][1]) + ";\n◉ Compétences professionnelles : " + str(listEmplois[i][3]) + ";\n" + "◉ Salaire : " + str(listEmplois[i][6]) + ";\n◉ Qualification : " + str(listEmplois[i][5]).replace("'libelle'","\n• 'libelle") + ";\n◉ Localisation : " + str(listEmplois[i][7]) + ";\n◉ Expérience : " + str(listEmplois[i][2]) + ";\n◉ Niveau de qualification : " + str(listEmplois[i][8]) + ";\n◉ Description de l'emploi : " + str(listEmplois[i][4]) + "\n" |
|
await cl.sleep(1) |
|
listEmplois_name = f"Liste des emplois" |
|
text_elements = [] |
|
text_elements.append( |
|
cl.Text(content="Question : " + competenceList + "\n\nRéponse :\n" + stringEmplois.replace('[','').replace(']','').replace('{','').replace('}','').replace("'code'","\n• 'code'"), name=listEmplois_name) |
|
) |
|
await cl.Message(author="Datapcc : 🌐🌐🌐",content="👨💼 Source France Travail : " + listEmplois_name, elements=text_elements).send() |
|
cl.user_session.set("contextChatBot", contentChatBot[0:28875]) |
|
|
|
await datavisualisation_statistiques_emplois(context) |
|
|
|
return "datavisualisation des statistiques de l'emploi" |
|
|
|
@cl.step(type="run") |
|
async def recuperation_contexte(getNote): |
|
getContext = cl.user_session.get(getNote) |
|
return getNote + " :\n" + getContext |
|
@cl.step(type="retrieval") |
|
async def contexte(competence, chatProfile): |
|
|
|
|
|
if chatProfile == 'ROMESKILLS': |
|
results = await creation_liste_code_Rome(competence, cl.user_session.get("categorie")) |
|
elif chatProfile == 'ESCOSKILLS': |
|
results = await creation_liste_skills_Esco(competence, cl.user_session.get("categorie")) |
|
await cl.sleep(1) |
|
romeListArray = cl.user_session.get("codeRomeArray") |
|
df_emplois = await API_France_Travail(romeListArray) |
|
|
|
return df_emplois |
|
|
|
@cl.step(type="tool") |
|
async def document_chiffres_cles_emplois(url, codes): |
|
all = "" |
|
codeArray = codes |
|
for i in range(0,len(codeArray)): |
|
response = requests.get(url + codeArray[i]) |
|
soup = BeautifulSoup(response.text, "html.parser") |
|
if soup.select('h1#titreMetier'): |
|
alltitre = soup.select('h1#titreMetier') |
|
allTitre = removeTags(alltitre[0]) |
|
else: |
|
allTitre = "" |
|
if soup.select('div.jobs_item-container-flex'): |
|
allembauches = soup.select('div.jobs_item-container-flex') |
|
allEmbauches = removeTags(allembauches[0]) |
|
else: |
|
allEmbauches = "" |
|
if soup.select('div.key-number_block.shadow.inset'): |
|
allsalaires = soup.select('div.key-number_block.shadow.inset') |
|
allSalaires = removeTags(allsalaires[0]) |
|
else: |
|
allSalaires = "" |
|
if soup.select('tbody.sectorTable__body'): |
|
allsalairesMedian = soup.select('tbody.sectorTable__body') |
|
allSalairesMedian = removeTags(allsalairesMedian[0]) |
|
else: |
|
allSalairesMedian = "" |
|
if soup.select('div.dynamism_canvas-wrapper > p.sr-only'): |
|
allDiff = soup.select('div.dynamism_canvas-wrapper > p.sr-only') |
|
alldiff = removeTags(allDiff[0]) |
|
else: |
|
alldiff = "" |
|
if soup.select('div.tabs-main-data_persp-col2'): |
|
allDiffOrigin = soup.select('div.tabs-main-data_persp-col2') |
|
alldiffOrigin = removeTags(allDiffOrigin[0]) |
|
else: |
|
alldiffOrigin = "" |
|
allTypeContrat = "" |
|
if soup.find_all("div", class_="hiring-contract_legende_item ng-star-inserted"): |
|
allContrat = soup.find_all("div", class_="hiring-contract_legende_item ng-star-inserted") |
|
for j in range(0,len(allContrat)): |
|
allTypeContrat = allTypeContrat + removeTags(allContrat[j]) + ", " |
|
if soup.find_all("div", class_="horizontal-graph_patterns"): |
|
allEntreprise = soup.find_all("div", class_="horizontal-graph_patterns") |
|
allentreprise = removeTags(allEntreprise[0]) |
|
else: |
|
allentreprise = "" |
|
all = all + "\n\nChiffres-clés Métier : \n**" + allTitre + "**:\n◉ Demandeurs d'emploi et Offres d'emploi : " + allEmbauches.replace("Plus de données sur les Demandeurs d'emploi","").replace("Plus de données","") + ".\n◉ Salaires proposés dans les offres : " + allSalaires + ".\n◉ Salaires médians constatés : " + allSalairesMedian + ".\n◉ Difficultés de recrutement pour les entreprises : " + alldiff + ".\n◉ Origine des difficultés : " + alldiffOrigin + ".\n◉ Répartition des embauches par type de contrat : " + allTypeContrat + ".\n◉ Répartition des embauches par taille d'entreprise : " + allentreprise + "." |
|
return all |
|
|
|
@cl.step(type="tool") |
|
async def datavisualisation_statistiques_emplois(results_df): |
|
arraydataframe = [] |
|
arrayfirstdataframe = [] |
|
arraylocalisationdataframe = [] |
|
results = [] |
|
count = 0 |
|
if results_df.empty == False: |
|
count = count + 1 |
|
finals = results_df[['intitule','typeContratLibelle','experienceLibelle','competences','qualitesProfessionnelles','salaire','lieuTravail','formations']].copy() |
|
finals["lieuTravail"] = finals["lieuTravail"].apply(lambda x: x['libelle']).apply(lambda x: x[0:3]).apply(lambda x: x.strip()) |
|
finals_df = finals |
|
finals_df.dropna(subset=['qualitesProfessionnelles','formations','competences'], inplace=True) |
|
finals_df["competences"] = finals_df["competences"].apply(lambda x:[str(e['libelle']) for e in x]).apply(lambda x:'; '.join(map(str, x))) |
|
finals_df["qualitesProfessionnelles"] = finals_df["qualitesProfessionnelles"].apply(lambda x:[str(e['libelle']) + ": " + str(e['description']) for e in x]).apply(lambda x:'; '.join(map(str, x))) |
|
finals_df["formations"] = finals_df["formations"].apply(lambda x:[str(e['niveauLibelle']) for e in x]).apply(lambda x:'; '.join(map(str, x))) |
|
finals_df = finals_df.sort_values(by=['lieuTravail']) |
|
finals_localisation = results_df[['lieuTravail']].copy() |
|
finals_localisation["lieuTravail"] = finals_localisation["lieuTravail"].apply(lambda x: np.array(x)).apply(lambda x: x['libelle']).apply(lambda x: x[0:3]).apply(lambda x: x.strip()) |
|
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'Fra'].index, inplace = True) |
|
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'FRA'].index, inplace = True) |
|
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'Ile'].index, inplace = True) |
|
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'Mar'].index, inplace = True) |
|
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'Bou'].index, inplace = True) |
|
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == '976'].index, inplace = True) |
|
arraylocalisationdataframe.append(finals_localisation) |
|
arrayfirstdataframe.append(results_df) |
|
if len(finals_df) != 0: |
|
arraydataframe.append(finals_df) |
|
first_df = pd.concat(arrayfirstdataframe) |
|
finals_df = pd.concat(arraydataframe) |
|
localisation_df = pd.concat(arraylocalisationdataframe) |
|
|
|
|
|
df_intitule = first_df.groupby('intitule').size().reset_index(name='obs') |
|
df_intitule = df_intitule.sort_values(by=['obs']) |
|
df_intitule = df_intitule.iloc[-25:] |
|
fig_intitule = px.bar(df_intitule, x='obs', y='intitule', orientation='h', color='obs', title="Les principaux emplois", labels={'obs':'nombre'}, color_continuous_scale="Teal", text_auto=True).update_layout(font=dict(size=10,color="RebeccaPurple"),autosize=True).update_traces(hovertemplate=df_intitule["intitule"] + ' <br>Nombre : %{x}', y=[y[:100] + "..." for y in df_intitule["intitule"]], showlegend=False) |
|
|
|
|
|
df_contrat = first_df.groupby('typeContratLibelle').size().reset_index(name='obs') |
|
fig_contrat = px.pie(df_contrat, names='typeContratLibelle', values='obs', color='obs', title="Les types de contrat", labels={'obs':'nombre'}, color_discrete_sequence=px.colors.qualitative.Safe).update_traces(textposition='inside', textinfo='percent+label').update_layout(font=dict(size=10,color="RebeccaPurple")) |
|
|
|
df_secteur = first_df.groupby('secteurActiviteLibelle').size().reset_index(name='obs') |
|
df_secteur = df_secteur.sort_values(by=['obs']) |
|
df_secteur = df_secteur.iloc[-25:] |
|
fig_secteur = px.bar(df_secteur, x='obs', y='secteurActiviteLibelle', orientation='h', color='obs', title="Les principaux secteurs d'activités", labels={'obs':'nombre'}, color_continuous_scale="Teal", text_auto=True).update_layout(font=dict(size=10,color="RebeccaPurple"),autosize=True).update_traces(hovertemplate=df_secteur["secteurActiviteLibelle"] + ' <br>Nombre : %{x}', y=[y[:100] + "..." for y in df_secteur["secteurActiviteLibelle"]], showlegend=False) |
|
|
|
|
|
df1 = finals_df |
|
df1['competences'] = finals_df['competences'].str.split(';') |
|
df2 = df1.explode('competences') |
|
df2 = df2.groupby('competences').size().reset_index(name='obs') |
|
df2 = df2.sort_values(by=['obs']) |
|
df2 = df2.iloc[-20:] |
|
fig_competences = px.bar(df2, x='obs', y='competences', orientation='h', color='obs', title="Les principales compétences professionnelles", labels={'obs':'nombre'}, color_continuous_scale="Teal", text_auto=True).update_layout(font=dict(size=10,color="RebeccaPurple"),autosize=True).update_traces(hovertemplate=df2["competences"] + ' <br>Nombre : %{x}', y=[y[:100] + "..." for y in df2['competences']], showlegend=False) |
|
|
|
|
|
df_transversales = finals_df |
|
df_transversales['qualitesProfessionnelles'] = finals_df['qualitesProfessionnelles'].str.split(';') |
|
df_comptransversales = df_transversales.explode('qualitesProfessionnelles') |
|
df_comptransversales = df_comptransversales.groupby('qualitesProfessionnelles').size().reset_index(name='obs') |
|
df_comptransversales = df_comptransversales.sort_values(by=['obs']) |
|
df_comptransversales = df_comptransversales.iloc[-20:] |
|
fig_transversales = px.bar(df_comptransversales, x='obs', y='qualitesProfessionnelles', orientation='h', color='obs', title="Les principales compétences transversales", labels={'obs':'nombre'}, color_continuous_scale="Teal", text_auto=True).update_layout(font=dict(size=10,color="RebeccaPurple"),autosize=True).update_traces(hovertemplate=df_comptransversales["qualitesProfessionnelles"] + ' <br>Nombre : %{x}', y=[y[:100] + "..." for y in df_comptransversales["qualitesProfessionnelles"]], showlegend=False) |
|
|
|
|
|
df_formations = finals_df.groupby('formations').size().reset_index(name='obs') |
|
fig_formations = px.pie(df_formations, names='formations', values='obs', color='obs', title="Les niveaux de qualification", labels={'obs':'nombre'}, color_discrete_sequence=px.colors.qualitative.Safe).update_traces(textposition='inside', textinfo='percent+label').update_layout(font=dict(size=10,color="RebeccaPurple")) |
|
|
|
|
|
df_experience = finals_df.groupby('experienceLibelle').size().reset_index(name='obs') |
|
fig_experience = px.pie(df_experience, names='experienceLibelle', values='obs', color='obs', title="Les expériences professionnelles", labels={'obs':'nombre'}, color_discrete_sequence=px.colors.qualitative.Safe).update_traces(textposition='inside', textinfo='percent+label').update_layout(font=dict(size=10,color="RebeccaPurple")) |
|
|
|
res = requests.get( |
|
"https://raw.githubusercontent.com/codeforgermany/click_that_hood/main/public/data/spain-provinces.geojson" |
|
) |
|
|
|
ListCentroids = localisation() |
|
df_localisation = localisation_df.groupby('lieuTravail').size().reset_index(name='obs') |
|
df_localisation = df_localisation.sort_values(by=['lieuTravail']) |
|
df_localisation['longitude'] = df_localisation['lieuTravail'] |
|
df_localisation['latitude'] = df_localisation['lieuTravail'] |
|
|
|
|
|
df_localisation["longitude"] = df_localisation['longitude'].apply(lambda x:[loc['Longitude'] for loc in ListCentroids if loc['ID'] == x]).apply(lambda x:''.join(map(str, x))) |
|
df_localisation["longitude"] = pd.to_numeric(df_localisation["longitude"], downcast="float") |
|
df_localisation["latitude"] = df_localisation['latitude'].apply(lambda x:[loc['Latitude'] for loc in ListCentroids if loc['ID'] == x]).apply(lambda x:''.join(map(str, x))) |
|
df_localisation["latitude"] = pd.to_numeric(df_localisation["latitude"], downcast="float") |
|
|
|
fig_localisation = px.scatter_mapbox(df_localisation, lat="latitude", lon="longitude", hover_name="lieuTravail", size="obs").update_layout( |
|
mapbox={ |
|
"style": "carto-positron", |
|
"center": {"lon": 2, "lat" : 47}, |
|
"zoom": 4.5, |
|
"layers": [ |
|
{ |
|
"source": res.json(), |
|
"type": "line", |
|
"color": "green", |
|
"line": {"width": 0}, |
|
} |
|
], |
|
} |
|
) |
|
|
|
elements = [] |
|
elements.append(cl.Plotly(name="chart_intitule", figure=fig_intitule, display="inline", size="large")) |
|
elements.append(cl.Plotly(name="chart_contrat", figure=fig_contrat, display="inline", size="large")) |
|
elements.append(cl.Plotly(name="chart_competences", figure=fig_competences, display="inline", size="large")) |
|
elements.append(cl.Plotly(name="chart_transversales", figure=fig_transversales, display="inline", size="large")) |
|
elements.append(cl.Plotly(name="chart_formations", figure=fig_formations, display="inline", size="large")) |
|
elements.append(cl.Plotly(name="chart_experience", figure=fig_experience, display="inline", size="large")) |
|
elements.append(cl.Plotly(name="chart_secteur", figure=fig_secteur, display="inline", size="large")) |
|
elements.append(cl.Plotly(name="chart_localisation", figure=fig_localisation, display="inline", size="large")) |
|
|
|
await cl.Message(content="Datavisualisation du marché de l'emploi", elements=elements).send() |
|
|
|
@cl.step(type="tool") |
|
async def API_France_Travail(romeListArray): |
|
client = await connexion_France_Travail() |
|
todayDate = datetime.datetime.today() |
|
month, year = (todayDate.month-1, todayDate.year) if todayDate.month != 1 else (12, todayDate.year-1) |
|
start_dt = todayDate.replace(day=1, month=month, year=year) |
|
end_dt = datetime.datetime.today() |
|
results = [] |
|
for k in romeListArray: |
|
if k[0:1] == ' ': |
|
k = k[1:] |
|
params = {"motsCles": k.replace('/', '').replace('-', '').replace(',', '').replace(' ', ','),'minCreationDate': dt_to_str_iso(start_dt),'maxCreationDate': dt_to_str_iso(end_dt),'range':'0-149'} |
|
try: |
|
search_on_big_data = client.search(params=params) |
|
results += search_on_big_data["resultats"] |
|
except: |
|
print("Il n'y a pas d'offres d'emploi.") |
|
|
|
results_df = pd.DataFrame(results) |
|
return results_df |
|
|
|
@cl.step(type="tool") |
|
async def creation_liste_entreprises(arrayOfsecteur): |
|
docsearch = await connexion_vector_database() |
|
ficheEntreprise = "" |
|
for j in range(0, len(arrayOfsecteur)): |
|
retrieve_comp = docsearch.similarity_search(arrayOfsecteur[j][0], k=1, filter={"categorie": {"$eq": "inpiSecteur"}}) |
|
codeSecteur = int(retrieve_comp[0].metadata['codefinal']) |
|
if len(str(codeSecteur)) == 7: |
|
codeSecteurSTR = "0" + str(codeSecteur) |
|
else: |
|
codeSecteurSTR = str(codeSecteur) |
|
token = await connexion_registre_national_entreprises() |
|
url = f"https://registre-national-entreprises.inpi.fr/api/companies?page=1&pageSize=500&codeCategory={codeSecteurSTR}" |
|
print(url) |
|
headers = {"Authorization": f"Bearer {token}"} |
|
response = requests.get(url, headers=headers) |
|
if response.status_code == 200: |
|
print('OK') |
|
documents = response.json() |
|
df = pd.DataFrame(documents) |
|
ficheEntreprise += "**Code secteur d'activités : " + codeSecteurSTR + "**" |
|
for i in range (0, len(df)): |
|
if str(df['formality'][i]['content']).find('cessation') == -1 and str(df['formality'][i]['content']).find('personneMorale') != -1: |
|
if str(df['formality'][i]['content']['personneMorale']).find('adresseEntreprise') != -1: |
|
ficheEntreprise += "\n🏭 Dénomination : " + str(df['formality'][i]['content']['personneMorale']['identite']['entreprise']['denomination']) + "; Code SIREN : " + str(df['formality'][i]['content']['personneMorale']['identite']['entreprise']['siren']) + "\n\tAdresse : " |
|
if str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']).find("'numVoie'") != -1: |
|
ficheEntreprise += "n° " + str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']['numVoie']) |
|
if str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']).find("'typeVoie'") != -1: |
|
ficheEntreprise += " " + str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']['typeVoie']) |
|
if str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']).find("'voie'") != -1: |
|
ficheEntreprise += " " + str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']['voie']) |
|
if str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']).find("'complementLocalisation'") != -1: |
|
ficheEntreprise += ", " + str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']['complementLocalisation'])+ "," |
|
if str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']).find("'codePostal'") != -1: |
|
ficheEntreprise += " " + str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']['codePostal']) |
|
if str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']).find("'commune'") != -1: |
|
ficheEntreprise += " " + str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']['commune']) |
|
if str(df['formality'][i]['content']['personneMorale']).find("autresEtablissements") != -1: |
|
ficheEntreprise += "\n\tActivités : " + str(df['formality'][i]['content']['personneMorale']['autresEtablissements'][0]['activites'][0]['descriptionDetaillee']) + "\n\tCode APE : " + str(df['formality'][i]['content']['personneMorale']['autresEtablissements'][0]['activites'][0]['codeApe']) |
|
else: |
|
ficheEntreprise += "Il n'y a pas d'entreprises pour le secteur sélectionné : " + arrayOfsecteur[j][0] |
|
return ficheEntreprise |
|
|
|
|
|
@cl.step(type="tool") |
|
async def creation_liste_code_Rome(competence, categorie): |
|
docsearch = await connexion_vector_database() |
|
retrieve_comp = docsearch.similarity_search(competence, k=30, filter={"categorie": {"$eq": categorie}}) |
|
retrieve = pd.DataFrame(retrieve_comp) |
|
codeRome = [] |
|
competence = [] |
|
metier = [] |
|
for i in range(0,len(retrieve_comp)): |
|
codeRome.append(retrieve_comp[i].metadata['code_rome']) |
|
competence.append(retrieve_comp[i].metadata['libelle_competence']) |
|
metier.append(retrieve_comp[i].metadata['libelle_appellation_long']) |
|
|
|
results_df = pd.DataFrame({'codeRome': codeRome,'competence': competence, 'metier': metier}) |
|
arrayresults = results_df.values.tolist() |
|
displayresults = '| Code Rome | Compétence | Métier |\n| -------- | ------- | ------- |' |
|
for j in range(0, len(arrayresults)): |
|
displayresults += '\n| ' + arrayresults[j][0] + ' | ' + arrayresults[j][1] + ' | ' + arrayresults[j][2] + ' |' |
|
|
|
print(arrayresults[0][0] + arrayresults[0][1] + arrayresults[0][2]) |
|
await cl.Message(author="Datapcc : 🌐🌐🌐",content="Voici le résultat de la recherche sémantique sur le catalogue Rome :\n" + displayresults).send() |
|
|
|
results_df = results_df.drop_duplicates(subset=["codeRome"]) |
|
results_df = results_df.head(5) |
|
codeRomeString = results_df["codeRome"].to_string(index = False) |
|
codeRome_list = results_df["codeRome"].tolist() |
|
actionRome = await cl.AskActionMessage( |
|
content="Etes-vous d'accord avec la sélection des 5 codes Rome automatiques issus de la recherche sémantique ? :\n" + codeRomeString.replace(' ',','), |
|
actions=[ |
|
cl.Action(name="continue", value="Offres d'emploi en temps réel", label="✅ Oui, je veux continuer vers l'extraction en temps réel des offres d'emploi"), |
|
cl.Action(name="cancel", value="Saisie des codes Rome", label="❌ Non, je veux saisir ma liste de codes Rome, séparés par des virgules"), |
|
], timeout=3600 |
|
).send() |
|
if actionRome and actionRome.get("name") == "continue": |
|
await cl.Message( |
|
content="Connexion à France Travail, et récupération des offres d'emploi", |
|
).send() |
|
cl.user_session.set("codeRomeArray", codeRome_list) |
|
else: |
|
actionsaisierome = await cl.AskUserMessage(content="Saisissez vos codes Rome dans le prompt? ⚠️ Attention, indiquez seulement des codes Rome séparés par des virgules", timeout=3600).send() |
|
if actionsaisierome: |
|
await cl.Message( |
|
content=f"Votre saisie est : {actionsaisierome['output']}", |
|
).send() |
|
stringCodeRome = actionsaisierome['output'].replace(' ','') |
|
stopWords = [';','.',':','!','|'] |
|
teststringCodeRome = [ele for ele in stopWords if(ele in stringCodeRome)] |
|
teststringCodeRome = bool(teststringCodeRome) |
|
if teststringCodeRome == False: |
|
arrayCodeRome = stringCodeRome.split(',') |
|
else: |
|
arrayCodeRome = codeRome_list |
|
await cl.Message(author="Datapcc : 🌐🌐🌐",content="Votre saisie est erronée. Nous continuons l'action avec les codes Rome sélectionnés automatiquement pour vous : " + codeRomeString).send() |
|
cl.user_session.set("codeRomeArray", arrayCodeRome) |
|
|
|
@cl.step(type="tool") |
|
async def creation_liste_skills_Esco(competence, categorie): |
|
docsearch = await connexion_vector_database() |
|
retrieve_comp = docsearch.similarity_search(competence, k=40, filter={"categorie": {"$eq": categorie}}) |
|
retrieve = pd.DataFrame(retrieve_comp) |
|
competence = [] |
|
description_competence = [] |
|
metier = [] |
|
description_metier = [] |
|
for i in range(0,len(retrieve_comp)): |
|
competence.append(retrieve_comp[i].metadata['compétence']) |
|
description_competence.append(retrieve_comp[i].metadata['description_compétence']) |
|
metier.append(retrieve_comp[i].metadata['métier']) |
|
description_metier.append(retrieve_comp[i].metadata['description_métier']) |
|
|
|
results_df = pd.DataFrame({'compétence': competence,'description_compétence': description_competence, 'métier': metier, 'description_métier': description_metier}) |
|
arrayresults = results_df.values.tolist() |
|
displayresults = '| Compétence | Description Compétence | Métier | Description Métier |\n| -------- | ------- | ------- | ------- |' |
|
for j in range(0, len(arrayresults)): |
|
displayresults += '\n| ' + arrayresults[j][0] + ' | ' + arrayresults[j][1] + ' | ' + arrayresults[j][2] + ' | ' + arrayresults[j][3] + ' |' |
|
|
|
await cl.Message(author="Datapcc : 🌐🌐🌐",content="Voici le résultat de la recherche sémantique sur la classification ESCO :\n" + displayresults).send() |
|
|
|
results_df = results_df.drop_duplicates(subset=["métier"]) |
|
results_df = results_df.head(10) |
|
codeRomeString = results_df["métier"].to_string(index = False) |
|
codeRome_list = results_df["métier"].tolist() |
|
actionRome = await cl.AskActionMessage( |
|
content="Etes-vous d'accord avec la sélection des 10 métiers automatiques pour compléter la liste des compétences avec celles attendues sur le marché du travail? :\n" + codeRomeString, |
|
actions=[ |
|
cl.Action(name="continue", value="Offres d'emploi en temps réel", label="✅ Oui, je veux continuer vers l'extraction en temps réel des offres d'emploi"), |
|
cl.Action(name="cancel", value="Saisie des codes Rome", label="❌ Non, je veux saisir ma liste de métiers, séparés par des points-virgules"), |
|
], timeout=3600 |
|
).send() |
|
if actionRome and actionRome.get("name") == "continue": |
|
await cl.Message( |
|
content="Connexion à France Travail, et récupération des offres d'emploi", |
|
).send() |
|
cl.user_session.set("codeRomeArray", codeRome_list) |
|
else: |
|
actionsaisierome = await cl.AskUserMessage(content="Saisissez vos métiers dans le prompt? ⚠️ Attention, indiquez seulement des métiers séparés par des points-virgules", timeout=3600).send() |
|
if actionsaisierome: |
|
await cl.Message( |
|
content=f"Votre saisie est : {actionsaisierome['output']}", |
|
).send() |
|
stringCodeRome = actionsaisierome['output'] |
|
stopWords = [';'] |
|
teststringCodeRome = [ele for ele in stopWords if(ele in stringCodeRome)] |
|
teststringCodeRome = bool(teststringCodeRome) |
|
if teststringCodeRome == True: |
|
arrayCodeRome = stringCodeRome.split(';') |
|
else: |
|
arrayCodeRome = codeRome_list |
|
await cl.Message(author="Datapcc : 🌐🌐🌐",content="Votre saisie est erronée. Nous continuons l'action avec les métiers sélectionnés automatiquement pour vous : " + codeRomeString).send() |
|
cl.user_session.set("codeRomeArray", arrayCodeRome) |
|
|
|
@cl.step(type="tool") |
|
async def connexion_registre_national_entreprises(): |
|
url = "https://registre-national-entreprises.inpi.fr/api/sso/login" |
|
headers = {"Content-Type": "application/json"} |
|
data = {"username": os.environ['RNE_CLIENT_ID'], "password": os.environ['RNE_CLIENT_SECRET']} |
|
response = requests.post(url, json=data, headers=headers) |
|
if response.status_code == 200: |
|
return response.json()["token"] |
|
else: |
|
raise Exception(f"Échec de l'authentification. Code d'erreur : {response.status_code}") |
|
|
|
@cl.step(type="tool") |
|
async def connexion_France_Travail(): |
|
client = Api(client_id=os.environ['POLE_EMPLOI_CLIENT_ID'], |
|
client_secret=os.environ['POLE_EMPLOI_CLIENT_SECRET']) |
|
return client |
|
|
|
@cl.step(type="tool") |
|
async def connexion_vector_database(): |
|
os.environ['PINECONE_API_KEY'] = os.environ['PINECONE_API_KEY'] |
|
os.environ['PINECONE_INDEX_NAME'] = os.environ['PINECONE_INDEX_NAME'] |
|
embeddings = HuggingFaceEmbeddings() |
|
docsearch = PineconeVectorStore.from_existing_index(os.environ['PINECONE_INDEX_NAME'], embeddings) |
|
return docsearch |
|
|
|
@cl.step(type="llm") |
|
async def IA(): |
|
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.environ['HUGGINGFACEHUB_API_TOKEN'] |
|
repo_id = "mistralai/Mistral-7B-Instruct-v0.3" |
|
llm = HuggingFaceEndpoint( |
|
repo_id=repo_id, max_new_tokens=5000, temperature=1.0, task="text2text-generation", streaming=True |
|
) |
|
return llm |
|
|
|
|
|
@cl.on_settings_update |
|
async def setup_agent(settings): |
|
if not settings['competence'] and not settings['competenceInput']: |
|
await cl.Message( |
|
author="Datapcc : 🌐🌐🌐",content=f"⚠️ Pas de contexte : {settings['competence']}\n⛔ Vous ne pouvez pas élaborer de note sectorielle!" |
|
).send() |
|
elif settings['competence'] and not settings['competenceInput']: |
|
await cl.Message( |
|
author="Datapcc : 🌐🌐🌐",content=f"👍 Changement de contexte : {settings['competence']}" |
|
).send() |
|
competenceList = settings['competence'] |
|
cl.user_session.set("competenceFree", competenceList) |
|
else: |
|
await cl.Message( |
|
author="Datapcc : 🌐🌐🌐",content=f"👍 Changement de contexte : {settings['competenceInput']}" |
|
).send() |
|
competenceList = settings['competenceInput'] |
|
cl.user_session.set("competenceFree", competenceList) |
|
|
|
if not cl.user_session.get("saveMemory"): |
|
cl.user_session.set("saveMemory", "") |
|
|
|
chat_profile = cl.user_session.get("chat_profile") |
|
chatProfile = chat_profile.split(' - ') |
|
|
|
if chatProfile[1] == "ROMESKILLS" or chatProfile[1] == "ESCOSKILLS": |
|
await construction_NCS(competenceList, chatProfile[1]) |
|
elif chatProfile[1] == "RNE": |
|
competenceArray = [] |
|
competenceArray.append(competenceList) |
|
await creation_liste_entreprises(competenceArray) |
|
|
|
contextChat = cl.user_session.get("contextChatBot") |
|
if not contextChat: |
|
contextChat = "Il n'y a pas de contexte." |
|
|
|
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.environ['HUGGINGFACEHUB_API_TOKEN'] |
|
repo_id = "mistralai/Mistral-7B-Instruct-v0.3" |
|
model = HuggingFaceEndpoint( |
|
repo_id=repo_id, |
|
max_new_tokens=3600, |
|
temperature=0.5, |
|
streaming=True |
|
) |
|
|
|
memory = cl.user_session.get("memory") |
|
prompt = ChatPromptTemplate.from_messages( |
|
[ |
|
( |
|
"system", |
|
f"Contexte : Vous êtes un spécialiste du marché de l'emploi en fonction du niveau de qualification, des compétences professionnelles, des compétences transversales, du salaire et de l'expérience. Vous êtes doué pour faire des analyses du système travail sur les métiers les plus demandés grâce à votre aptitude à synthétiser les informations en fonction des critères définis ci-avant. En fonction des informations suivantes et du contexte suivant seulement et strictement. Contexte : {contextChat[0:28875]}. Réponds à la question suivante de la manière la plus pertinente, la plus exhaustive et la plus détaillée possible, avec au minimum 3000 tokens jusqu'à 3600 tokens, seulement et strictement dans le contexte et les informations fournies. Essayez donc de comprendre en profondeur le contexte et répondez uniquement en vous basant sur les informations fournies.", |
|
), |
|
MessagesPlaceholder(variable_name="history"), |
|
("human", "{question}, dans le contexte fourni."), |
|
] |
|
) |
|
runnable = ( |
|
RunnablePassthrough.assign( |
|
history=RunnableLambda(memory.load_memory_variables) | itemgetter("history") |
|
) |
|
| prompt |
|
| model |
|
) |
|
cl.user_session.set("runnable", runnable) |
|
|
|
@cl.on_message |
|
async def main(message: cl.Message): |
|
async with cl.Step(root=True, name="Réponse de Mistral", type="llm") as parent_step: |
|
parent_step.input = message.content |
|
chat_profile = cl.user_session.get("chat_profile") |
|
chatProfile = chat_profile.split(' - ') |
|
memory = cl.user_session.get("memory") |
|
runnable = cl.user_session.get("runnable") |
|
msg = cl.Message(author="Datapcc : 🌐🌐🌐",content="") |
|
text_elements = [] |
|
answer = [] |
|
async for chunk in runnable.astream({"question": message.content}, |
|
config=RunnableConfig(callbacks=[cl.AsyncLangchainCallbackHandler(stream_final_answer=True)])): |
|
await parent_step.stream_token(chunk) |
|
await msg.stream_token(chunk) |
|
|
|
QA_context_name = f"Question-réponse sur le contexte" |
|
text_elements.append( |
|
cl.Text(content="Question : " + message.content + "\n\nRéponse :\n" + msg.content, name=QA_context_name) |
|
) |
|
actions = [ |
|
cl.Action(name="download", value="Question : " + message.content + "\n\nRéponse : " + msg.content, description="download_QA_emplois") |
|
] |
|
await cl.Message(author="Datapcc : 🌐🌐🌐",content="Download", actions=actions).send() |
|
await cl.sleep(2) |
|
saves = [ |
|
cl.Action(name="saveToMemory", value="Question : " + message.content + "\n\nRéponse : " + msg.content, description="Mettre en mémoire la réponse à votre requête") |
|
] |
|
await cl.Message(author="Datapcc : 🌐🌐🌐",content="Mettre en mémoire la réponse à votre requête", actions=saves).send() |
|
await cl.sleep(2) |
|
memories = [ |
|
cl.Action(name="download", value=cl.user_session.get('saveMemory'), description="download_referentiel") |
|
] |
|
await cl.Message(author="Datapcc : 🌐🌐🌐",content="Télécharger la mise en mémoire de vos fiches", actions=memories).send() |
|
await cl.sleep(1.5) |
|
await cl.Message(author="Datapcc : 🌐🌐🌐",content="Contexte : " + QA_context_name, elements=text_elements).send() |
|
|
|
memory.chat_memory.add_user_message(message.content) |
|
memory.chat_memory.add_ai_message(msg.content) |