Update main.py
Browse files
main.py
CHANGED
@@ -177,53 +177,59 @@ def localisation():
|
|
177 |
{ "ID": "972", "Longitude": -61.024174, "Latitude": 14.641528 },
|
178 |
{ "ID": "971", "Longitude": -61.551, "Latitude": 16.265 }
|
179 |
]
|
180 |
-
longLat = pd.DataFrame(ListCentroids)
|
181 |
-
return longLat
|
182 |
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
def
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
|
228 |
def listToString(list):
|
229 |
return str(list)
|
@@ -289,134 +295,6 @@ async def on_action(action):
|
|
289 |
author="Datapcc : 🌐🌐🌐", content="[Lien] 🔗", elements=elements
|
290 |
).send()
|
291 |
await action.remove()
|
292 |
-
|
293 |
-
@cl.action_callback("datavizEmploi")
|
294 |
-
async def on_action(action):
|
295 |
-
client = Api(client_id=os.environ['POLE_EMPLOI_CLIENT_ID'],
|
296 |
-
client_secret=os.environ['POLE_EMPLOI_CLIENT_SECRET'])
|
297 |
-
todayDate = datetime.datetime.today()
|
298 |
-
month, year = (todayDate.month-1, todayDate.year) if todayDate.month != 1 else (12, todayDate.year-1)
|
299 |
-
start_dt = todayDate.replace(day=1, month=month, year=year)
|
300 |
-
end_dt = datetime.datetime.today()
|
301 |
-
arraydataframe = []
|
302 |
-
arrayfirstdataframe = []
|
303 |
-
arraylocalisationdataframe = []
|
304 |
-
results = []
|
305 |
-
count = 0
|
306 |
-
listrome = action.value
|
307 |
-
arrayrome = listrome.split(',')
|
308 |
-
for k in arrayrome:
|
309 |
-
params = {"motsCles": k,'minCreationDate': dt_to_str_iso(start_dt),'maxCreationDate': dt_to_str_iso(end_dt),'range':'0-149'}
|
310 |
-
search_on_big_data = client.search(params=params)
|
311 |
-
results += search_on_big_data["resultats"]
|
312 |
-
results_df = pd.DataFrame(results)
|
313 |
-
if results_df.empty == False:
|
314 |
-
count = count + 1
|
315 |
-
finals = results_df[['intitule','typeContratLibelle','experienceLibelle','competences','qualitesProfessionnelles','salaire','lieuTravail','formations']]
|
316 |
-
finals["lieuTravail"] = finals["lieuTravail"].apply(lambda x: np.array(x)).apply(lambda x: x['libelle']).apply(lambda x: x[0:3]).apply(lambda x: x.strip())
|
317 |
-
finals_df = finals
|
318 |
-
finals_df.dropna(subset=['qualitesProfessionnelles','formations','competences'], inplace=True)
|
319 |
-
finals_df["competences"] = finals_df["competences"].apply(lambda x:[str(e['libelle']) for e in x]).apply(lambda x:'; '.join(map(str, x)))
|
320 |
-
finals_df["qualitesProfessionnelles"] = finals_df["qualitesProfessionnelles"].apply(lambda x:[str(e['libelle']) + ": " + str(e['description']) for e in x]).apply(lambda x:'; '.join(map(str, x)))
|
321 |
-
finals_df["formations"] = finals_df["formations"].apply(lambda x:[str(e['niveauLibelle']) for e in x]).apply(lambda x:'; '.join(map(str, x)))
|
322 |
-
finals_df = finals_df.sort_values(by=['lieuTravail'])
|
323 |
-
finals_localisation = results_df[['lieuTravail']]
|
324 |
-
finals_localisation["lieuTravail"] = finals_localisation["lieuTravail"].apply(lambda x: np.array(x)).apply(lambda x: x['libelle']).apply(lambda x: x[0:3]).apply(lambda x: x.strip())
|
325 |
-
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'Fra'].index, inplace = True)
|
326 |
-
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'FRA'].index, inplace = True)
|
327 |
-
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'Ile'].index, inplace = True)
|
328 |
-
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'Mar'].index, inplace = True)
|
329 |
-
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'Bou'].index, inplace = True)
|
330 |
-
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == '976'].index, inplace = True)
|
331 |
-
arraylocalisationdataframe.append(finals_localisation)
|
332 |
-
arrayfirstdataframe.append(results_df)
|
333 |
-
if len(finals_df) != 0:
|
334 |
-
arraydataframe.append(finals_df)
|
335 |
-
first_df = pd.concat(arrayfirstdataframe)
|
336 |
-
finals_df = pd.concat(arraydataframe)
|
337 |
-
localisation_df = pd.concat(arraylocalisationdataframe)
|
338 |
-
|
339 |
-
######## Emplois ########
|
340 |
-
df_intitule = first_df.groupby('intitule').size().reset_index(name='obs')
|
341 |
-
df_intitule = df_intitule.sort_values(by=['obs'])
|
342 |
-
df_intitule = df_intitule.iloc[-25:]
|
343 |
-
fig_intitule = px.bar(df_intitule, x='obs', y='intitule', width=800, height=600, orientation='h', color='obs', title="Les principaux emplois", labels={'obs':'nombre'}, color_continuous_scale="Teal", text_auto=True).update_layout(font=dict(size=10,color="RebeccaPurple"),autosize=True).update_traces(hovertemplate=df_intitule["intitule"] + ' <br>Nombre : %{x}', y=[y[:100] + "..." for y in df_intitule["intitule"]], showlegend=False)
|
344 |
-
|
345 |
-
######## Types de contrat ########
|
346 |
-
df_contrat = first_df.groupby('typeContratLibelle').size().reset_index(name='obs')
|
347 |
-
fig_contrat = px.pie(df_contrat, names='typeContratLibelle', width=800, height=800, values='obs', color='obs', title="Les types de contrat", labels={'obs':'nombre'}, color_discrete_sequence=px.colors.qualitative.Safe).update_traces(textposition='inside', textinfo='percent+label').update_layout(font=dict(size=10,color="RebeccaPurple"))
|
348 |
-
|
349 |
-
df_secteur = first_df.groupby('secteurActiviteLibelle').size().reset_index(name='obs')
|
350 |
-
df_secteur = df_secteur.sort_values(by=['obs'])
|
351 |
-
df_secteur = df_secteur.iloc[-25:]
|
352 |
-
fig_secteur = px.bar(df_secteur, x='obs', y='secteurActiviteLibelle', width=800, height=600, orientation='h', color='obs', title="Les principaux secteurs d'activités", labels={'obs':'nombre'}, color_continuous_scale="Teal", text_auto=True).update_layout(font=dict(size=10,color="RebeccaPurple"),autosize=True).update_traces(hovertemplate=df_secteur["secteurActiviteLibelle"] + ' <br>Nombre : %{x}', y=[y[:100] + "..." for y in df_secteur["secteurActiviteLibelle"]], showlegend=False)
|
353 |
-
|
354 |
-
######## Compétences professionnelles ########
|
355 |
-
df1 = finals_df
|
356 |
-
df1['competences'] = finals_df['competences'].str.split(';')
|
357 |
-
df2 = df1.explode('competences')
|
358 |
-
df2 = df2.groupby('competences').size().reset_index(name='obs')
|
359 |
-
df2 = df2.sort_values(by=['obs'])
|
360 |
-
df2 = df2.iloc[-20:]
|
361 |
-
fig_competences = px.bar(df2, x='obs', y='competences', width=800, height=550, orientation='h', color='obs', title="Les principales compétences professionnelles", labels={'obs':'nombre'}, color_continuous_scale="Teal", text_auto=True).update_layout(font=dict(size=10,color="RebeccaPurple"),autosize=True).update_traces(hovertemplate=df2["competences"] + ' <br>Nombre : %{x}', y=[y[:100] + "..." for y in df2['competences']], showlegend=False)
|
362 |
-
|
363 |
-
######## Compétences transversales ########
|
364 |
-
df_transversales = finals_df
|
365 |
-
df_transversales['qualitesProfessionnelles'] = finals_df['qualitesProfessionnelles'].str.split(';')
|
366 |
-
df_comptransversales = df_transversales.explode('qualitesProfessionnelles')
|
367 |
-
df_comptransversales = df_comptransversales.groupby('qualitesProfessionnelles').size().reset_index(name='obs')
|
368 |
-
df_comptransversales = df_comptransversales.sort_values(by=['obs'])
|
369 |
-
df_comptransversales = df_comptransversales.iloc[-20:]
|
370 |
-
fig_transversales = px.bar(df_comptransversales, x='obs', y='qualitesProfessionnelles', width=800, height=550, orientation='h', color='obs', title="Les principales compétences transversales", labels={'obs':'nombre'}, color_continuous_scale="Teal", text_auto=True).update_layout(font=dict(size=10,color="RebeccaPurple"),autosize=True).update_traces(hovertemplate=df_comptransversales["qualitesProfessionnelles"] + ' <br>Nombre : %{x}', y=[y[:100] + "..." for y in df_comptransversales["qualitesProfessionnelles"]], showlegend=False)
|
371 |
-
|
372 |
-
######## Niveaux de qualification ########
|
373 |
-
df_formations = finals_df.groupby('formations').size().reset_index(name='obs')
|
374 |
-
fig_formations = px.pie(df_formations, names='formations', width=800, height=800, values='obs', color='obs', title="Les niveaux de qualification", labels={'obs':'nombre'}, color_discrete_sequence=px.colors.qualitative.Safe).update_traces(textposition='inside', textinfo='percent+label').update_layout(font=dict(size=10,color="RebeccaPurple"))
|
375 |
-
|
376 |
-
######## Expériences professionnelles ########
|
377 |
-
df_experience = finals_df.groupby('experienceLibelle').size().reset_index(name='obs')
|
378 |
-
fig_experience = px.pie(df_experience, names='experienceLibelle', width=800, height=800, values='obs', color='obs', title="Les expériences professionnelles", labels={'obs':'nombre'}, color_discrete_sequence=px.colors.qualitative.Safe).update_traces(textposition='inside', textinfo='percent+label').update_layout(font=dict(size=10,color="RebeccaPurple"))
|
379 |
-
|
380 |
-
res = requests.get(
|
381 |
-
"https://raw.githubusercontent.com/codeforgermany/click_that_hood/main/public/data/spain-provinces.geojson"
|
382 |
-
)
|
383 |
-
######## localisation ########
|
384 |
-
longLat = localisation()
|
385 |
-
df_localisation = localisation_df.groupby('lieuTravail').size().reset_index(name='obs')
|
386 |
-
df_localisation = df_localisation.sort_values(by=['lieuTravail'])
|
387 |
-
df_localisation['longitude'] = df_localisation['lieuTravail']
|
388 |
-
df_localisation['latitude'] = df_localisation['lieuTravail']
|
389 |
-
|
390 |
-
df_localisation["longitude"] = df_localisation['longitude'].apply(lambda x:longLat.loc[longLat['ID'] == x, 'Longitude'].iloc[0])
|
391 |
-
df_localisation["latitude"] = df_localisation['latitude'].apply(lambda x:longLat.loc[longLat['ID'] == x, 'Latitude'].iloc[0])
|
392 |
-
|
393 |
-
fig_localisation = px.scatter_mapbox(df_localisation, lat="latitude", lon="longitude", height=600,hover_name="lieuTravail", size="obs").update_layout(
|
394 |
-
mapbox={
|
395 |
-
"style": "carto-positron",
|
396 |
-
"center": {"lon": 2, "lat" : 47},
|
397 |
-
"zoom": 4.5,
|
398 |
-
"layers": [
|
399 |
-
{
|
400 |
-
"source": res.json(),
|
401 |
-
"type": "line",
|
402 |
-
"color": "green",
|
403 |
-
"line": {"width": 0},
|
404 |
-
}
|
405 |
-
],
|
406 |
-
}
|
407 |
-
)
|
408 |
-
|
409 |
-
elements.append(cl.Plotly(name="chart_intitule", figure=fig_intitule, display="inline", size="large"))
|
410 |
-
elements.append(cl.Plotly(name="chart_contrat", figure=fig_contrat, display="inline", size="large"))
|
411 |
-
elements.append(cl.Plotly(name="chart_competences", figure=fig_competences, display="inline", size="large"))
|
412 |
-
elements.append(cl.Plotly(name="chart_transversales", figure=fig_transversales, display="inline", size="large"))
|
413 |
-
elements.append(cl.Plotly(name="chart_formations", figure=fig_formations, display="inline", size="large"))
|
414 |
-
elements.append(cl.Plotly(name="chart_experience", figure=fig_experience, display="inline", size="large"))
|
415 |
-
elements.append(cl.Plotly(name="chart_secteur", figure=fig_secteur, display="inline", size="large"))
|
416 |
-
elements.append(cl.Plotly(name="chart_localisation", figure=fig_localisation, display="inline", size="large"))
|
417 |
-
|
418 |
-
await cl.Message(content="Datavisualisation du marché de l'emploi", elements=elements).send()
|
419 |
-
await action.remove()
|
420 |
|
421 |
@cl.action_callback("saveMemory")
|
422 |
async def on_action(action):
|
@@ -517,54 +395,23 @@ async def start():
|
|
517 |
@literal_client.step(type="run")
|
518 |
async def construction_NCS(competenceList):
|
519 |
context = await contexte(competenceList)
|
520 |
-
emploisST =
|
521 |
-
|
522 |
-
|
523 |
-
|
524 |
-
|
525 |
-
|
526 |
-
|
527 |
-
|
528 |
-
En fonction des informations suivantes et du contexte suivant seulement et strictement, répondez à la question ci-dessous à partir du contexte ci-dessous :
|
529 |
-
{context}
|
530 |
-
{question} [/INST] </s>
|
531 |
-
"""
|
532 |
-
question_p ="""
|
533 |
-
Peux-tu créer une note sectorielle d'après le modèle de note sectorielle précédent en respectant ses parties : 2., 2.1, 2.1.1, 2.2, 2.2.1, 2.2.2, 2.2.3 et d'après le contexte en vous réferrant strictement aux données du contexte fixé? Réponse sous forme d'un texte généré d'après le modèle et le contexte en 5000 mots et en langue française absolument.
|
534 |
-
"""
|
535 |
-
context_p = f"Contexte : {context}. {definitions} Modèle de note sectorielle : {structure}. Réponds en langue française strictement à la question suivante en respectant strictement les données du contexte. Si vous ne pouvez pas répondre à la question sur la base des informations, dites que vous ne trouvez pas de réponse ou que vous ne parvenez pas à trouver de réponse. Essayez donc de comprendre en profondeur le contexte et répondez uniquement en vous basant sur les informations fournies. Ne générez pas de réponses non pertinentes. Si les informations du contexte sont insuffisantes, procédez à une projection sur le secteur, les entreprises et le marché de l'emploi, pour construire la note de composante sectorielle."
|
536 |
-
prompt = PromptTemplate(template=template, input_variables=["question","context"])
|
537 |
-
#llm_chain = LLMChain(prompt=prompt, llm=client_llm)
|
538 |
-
#completion_NCS = llm_chain.run({"question":question_p,"context":context_p}, callbacks=[StreamingStdOutCallbackHandler()])
|
539 |
-
chain = (
|
540 |
-
RunnablePassthrough.assign(
|
541 |
-
history=RunnableLambda(memory.load_memory_variables) | itemgetter("history")
|
542 |
-
)
|
543 |
-
| prompt | client_llm
|
544 |
-
)
|
545 |
-
|
546 |
-
msg = cl.Message(author="Datapcc : 🌐🌐🌐",content="")
|
547 |
-
async for chunk in chain.astream({"question":question_p,"context":context_p},
|
548 |
-
config=RunnableConfig(callbacks=[cl.AsyncLangchainCallbackHandler(stream_final_answer=True)])):
|
549 |
-
await msg.stream_token(chunk)
|
550 |
-
|
551 |
-
cl.user_session.set("NCS" + romeListArray[0], msg.content)
|
552 |
-
cl.user_session.set("contextChatBot", context + "\n" + msg.content)
|
553 |
-
await cl.sleep(2)
|
554 |
listEmplois_name = f"Liste des emplois"
|
555 |
text_elements = []
|
556 |
text_elements.append(
|
557 |
-
cl.Text(content="Question : " +
|
558 |
)
|
559 |
-
await cl.Message(author="Datapcc : 🌐🌐🌐",content="👨💼 Source
|
560 |
-
await cl.sleep(
|
561 |
-
|
562 |
-
|
563 |
-
]
|
564 |
-
await cl.Message(author="Datapcc : 🌐🌐🌐",content="📊 Afficher la datavisualisation du marché de l'emploi", actions=datavizEmploi).send()
|
565 |
-
await cl.sleep(2)
|
566 |
-
if romeListArray[0].find(',') != -1:
|
567 |
-
codeArray = romeListArray[0].split(',')
|
568 |
ficheMetiers = []
|
569 |
for i in range(0,len(codeArray)):
|
570 |
ficheMetiers = [
|
@@ -575,86 +422,48 @@ async def construction_NCS(competenceList):
|
|
575 |
).send()
|
576 |
else:
|
577 |
ficheMetiers = [
|
578 |
-
cl.File(name= "Fiche métier " + romeListArray
|
579 |
]
|
580 |
await cl.Message(
|
581 |
author="Datapcc : 🌐🌐🌐", content="[Fiches métiers] 🔗", elements=ficheMetiers
|
582 |
).send()
|
583 |
|
584 |
-
await cl.sleep(
|
585 |
-
|
586 |
-
|
587 |
-
|
588 |
-
|
|
|
589 |
)
|
590 |
-
await cl.Message(author="Datapcc : 🌐🌐🌐",content="
|
591 |
-
|
592 |
-
|
593 |
-
cl.Action(name="download", value=msg.content, description="download_note_sectorielle")
|
594 |
-
]
|
595 |
-
await cl.Message(author="Datapcc : 🌐🌐🌐",content="Télécharger la note", actions=actions).send()
|
596 |
-
await cl.sleep(2)
|
597 |
-
saves = [
|
598 |
-
cl.Action(name="saveToMemory", value=msg.content, description="Mettre en mémoire la note")
|
599 |
-
]
|
600 |
-
await cl.Message(author="Datapcc : 🌐🌐🌐",content="Mettre en mémoire la note", actions=saves).send()
|
601 |
-
await cl.sleep(2)
|
602 |
-
|
603 |
-
memory.chat_memory.add_user_message(question_p)
|
604 |
-
memory.chat_memory.add_ai_message(msg.content)
|
605 |
-
return "Construction de la Note Sectorielle"
|
606 |
|
607 |
@cl.step(type="run")
|
608 |
async def recuperation_contexte(getNote):
|
609 |
getContext = cl.user_session.get(getNote)
|
610 |
return getNote + " :\n" + getContext
|
611 |
@cl.step(type="retrieval")
|
612 |
-
async def contexte(
|
613 |
-
|
614 |
-
|
615 |
-
|
616 |
-
|
617 |
-
|
618 |
-
|
619 |
-
|
620 |
-
|
621 |
-
|
622 |
-
|
623 |
-
|
624 |
-
|
625 |
-
|
626 |
-
|
627 |
-
|
628 |
-
|
629 |
-
#metierSecteurContexteTravail = await Metier_secteur_contexte_travail(romeListArray[0])
|
630 |
-
cl.user_session.set("EmploiST", context)
|
631 |
-
return "Fiches Métiers :\n" + ficheMetier + "\nChiifres clés métiers :\n" + ficheClesMetier + "\nListe des emplois issus de France Travail :\n" + context
|
632 |
-
#return "Fiche métier Compétences Savoirs :\n" + ficheMetiersCompetencesSavoirs + "\nMetier secteur contexte au travail :\n" + metierSecteurContexteTravail + "\nListe des emplois issus de France Travail :\n" + context
|
633 |
-
#return "Liste des emplois issus de France Travail :\n" + context
|
634 |
-
#return "\nMetier secteur contexte au travail :\n" + metierSecteurContexteTravail + "\nListe des emplois issus de France Travail :\n" + context
|
635 |
-
@cl.step(type="tool")
|
636 |
-
async def FicheMetier(url, codes):
|
637 |
-
if codes.find(',') != -1:
|
638 |
-
all = ""
|
639 |
-
codeArray = codes.split(',')
|
640 |
-
for i in range(0,len(codeArray)):
|
641 |
-
response = requests.get(url + codeArray[i])
|
642 |
-
soup = BeautifulSoup(response.text, "html.parser")
|
643 |
-
allmissions = soup.select('div.fm-presentation-text')
|
644 |
-
allcompetences = soup.select('div#part2')
|
645 |
-
allcontextes = soup.select('div#part3')
|
646 |
-
all = all + "Fiche Métier " + codeArray[i] + ":\nLes missions principales : " + removeTags(allmissions[0]) + ". Les compétences recherchées : " + removeTags(allcompetences[0]) + ". Les contextes au travail : " + removeTags(allcontextes[0]) + "."
|
647 |
-
else:
|
648 |
-
response = requests.get(url + codes)
|
649 |
-
soup = BeautifulSoup(response.text, "html.parser")
|
650 |
-
allmissions = soup.select('div.fm-presentation-text')
|
651 |
-
allcompetences = soup.select('div#part2')
|
652 |
-
allcontextes = soup.select('div#part3')
|
653 |
-
all = "Fiche Métier " + codes + ":\nLes missions principales : " + removeTags(allmissions[0]) + ". Les compétences recherchées : " + removeTags(allcompetences[0]) + ". Les contextes au travail : " + removeTags(allcontextes[0]) + "."
|
654 |
-
return all
|
655 |
|
656 |
@cl.step(type="tool")
|
657 |
-
async def
|
658 |
if codes.find(',') != -1:
|
659 |
all = ""
|
660 |
codeArray = codes.split(',')
|
@@ -749,7 +558,198 @@ async def ChiffresClesMetier(url, codes):
|
|
749 |
return all
|
750 |
|
751 |
@cl.step(type="tool")
|
752 |
-
async def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
753 |
os.environ['PINECONE_API_KEYROME'] = os.environ['PINECONE_API_KEYROME']
|
754 |
docsearch = await connexion_catalogue_Rome()
|
755 |
retrieve_comp = docsearch.similarity_search(competence, k=30, filter={"categorie": {"$eq": os.environ['PINECONE_API_KEYROME']}})
|
@@ -773,7 +773,7 @@ async def creation_liste_code_Rome_et_emplois(competence):
|
|
773 |
|
774 |
results_df = results_df.drop_duplicates(subset=["codeRome"])
|
775 |
results_df = results_df.head(5)
|
776 |
-
codeRomeString = results_df["codeRome"].to_string(index = False)
|
777 |
codeRome_list = results_df["codeRome"].tolist()
|
778 |
actionRome = await cl.AskActionMessage(
|
779 |
content="Etes-vous d'accord avec la sélection des 5 codes Rome automatiques issus de la recherche sémantique ? :" + codeRomeString,
|
@@ -786,7 +786,6 @@ async def creation_liste_code_Rome_et_emplois(competence):
|
|
786 |
await cl.Message(
|
787 |
content="Connexion à France Travail, et récupération des offres d'emploi",
|
788 |
).send()
|
789 |
-
df_emplois = await API_France_Travail(codeRome_list)
|
790 |
cl.user_session.set("codeRomeArray", codeRome_list)
|
791 |
else:
|
792 |
actionsaisierome = await cl.AskUserMessage(content="Saisissez vos codes Rome dans le prompt? ⚠️ Attention, indiquez seulement des codes Rome séparés par des virgules", timeout=3600).send()
|
@@ -799,14 +798,17 @@ async def creation_liste_code_Rome_et_emplois(competence):
|
|
799 |
teststringCodeRome = [ele for ele in stopWords if(ele in stringCodeRome)]
|
800 |
teststringCodeRome = bool(teststringCodeRome)
|
801 |
if teststringCodeRome == False:
|
802 |
-
arrayCodeRome = stringCodeRome.
|
803 |
else:
|
804 |
arrayCodeRome = codeRome_list
|
805 |
await cl.Message(author="Datapcc : 🌐🌐🌐",content="Votre ssaisie est erronée. Nous continuons l'action avec les codes Rome sélectionnés automatiquement pour vous : " + codeRome_list).send()
|
806 |
-
df_emplois = await API_France_Travail(arrayCodeRome)
|
807 |
cl.user_session.set("codeRomeArray", arrayCodeRome)
|
808 |
-
|
809 |
-
|
|
|
|
|
|
|
|
|
810 |
|
811 |
@cl.step(type="tool")
|
812 |
async def connexion_catalogue_Rome():
|
@@ -816,22 +818,6 @@ async def connexion_catalogue_Rome():
|
|
816 |
docsearch = PineconeVectorStore.from_existing_index(os.environ['PINECONE_INDEX_NAME'], embeddings)
|
817 |
return docsearch
|
818 |
|
819 |
-
@cl.step(type="tool")
|
820 |
-
async def API_France_Travail(romeListArray):
|
821 |
-
client = Api(client_id=os.environ['POLE_EMPLOI_CLIENT_ID'],
|
822 |
-
client_secret=os.environ['POLE_EMPLOI_CLIENT_SECRET'])
|
823 |
-
todayDate = datetime.datetime.today()
|
824 |
-
month, year = (todayDate.month-1, todayDate.year) if todayDate.month != 1 else (12, todayDate.year-1)
|
825 |
-
start_dt = todayDate.replace(day=1, month=month, year=year)
|
826 |
-
end_dt = datetime.datetime.today()
|
827 |
-
results = []
|
828 |
-
for k in romeListArray:
|
829 |
-
params = {"motsCles": k,'minCreationDate': dt_to_str_iso(start_dt),'maxCreationDate': dt_to_str_iso(end_dt),'range':'0-149'}
|
830 |
-
search_on_big_data = client.search(params=params)
|
831 |
-
results += search_on_big_data["resultats"]
|
832 |
-
results_df = pd.DataFrame(results)
|
833 |
-
return results_df
|
834 |
-
|
835 |
@cl.step(type="llm")
|
836 |
async def IA():
|
837 |
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.environ['HUGGINGFACEHUB_API_TOKEN']
|
|
|
177 |
{ "ID": "972", "Longitude": -61.024174, "Latitude": 14.641528 },
|
178 |
{ "ID": "971", "Longitude": -61.551, "Latitude": 16.265 }
|
179 |
]
|
|
|
|
|
180 |
|
181 |
+
return ListCentroids
|
182 |
+
|
183 |
+
def plotDemandeur(dataframe, coderome):
|
184 |
+
df = dataframe.sort_values(by=['Indicateur'])
|
185 |
+
fig_demandeur = px.histogram(df, x='Indicateur', y='Valeur', height=1000, title="Demandeurs d'emploi et offres d'emploi du code ROME : " + coderome, color='Indicateur', labels={'Valeur':'Nombre'}, text_auto=True).update_layout(font=dict(size=9,color="RebeccaPurple"),autosize=True)
|
186 |
+
fig_demandeur.show()
|
187 |
+
|
188 |
+
def plotSalaire(dataframe):
|
189 |
+
df = dataframe.sort_values(by=['salaire'])
|
190 |
+
fig_demandeur = px.histogram(df, x='emploi', y='salaire', barmode='group', title="Salaires médians", color='categorie', text_auto=True).update_layout(font=dict(size=9,color="RebeccaPurple"),autosize=True)
|
191 |
+
fig_demandeur.show()
|
192 |
+
|
193 |
+
def plotDifficulte(dataframe):
|
194 |
+
if len(dataframe) == 0:
|
195 |
+
title = "Aucune donnée difficulté de recrutement renseignée!"
|
196 |
+
else:
|
197 |
+
title = "Difficulté de recrutement"
|
198 |
+
df = dataframe.sort_values(by=['Valeur'])
|
199 |
+
fig_demandeur = px.histogram(df, x='Indicateur', y='Valeur', title=title, color='Indicateur', labels={'Valeur':'Pourcentage'}, text_auto=True).update_layout(font=dict(size=9,color="RebeccaPurple"),autosize=True)
|
200 |
+
fig_demandeur.show()
|
201 |
+
|
202 |
+
def plotRepartition(dataframe,title):
|
203 |
+
df = dataframe.sort_values(by=['Valeur'])
|
204 |
+
fig_repartition = px.pie(df, names='Indicateur', values='Valeur', color='Indicateur', title=title, labels={'Valeur':'pourcentage'}, color_discrete_sequence=px.colors.qualitative.Safe).update_traces(textposition='inside', textinfo='percent+label').update_layout(font=dict(size=10,color="RebeccaPurple"))
|
205 |
+
fig_repartition.show()
|
206 |
+
|
207 |
+
def removeTags(all):
|
208 |
+
for data in all(['style', 'script']):
|
209 |
+
data.decompose()
|
210 |
+
return ''.join(all.stripped_strings)
|
211 |
+
|
212 |
+
def htmlToDataframe(htmlTable):
|
213 |
+
data = []
|
214 |
+
list_header = []
|
215 |
+
soup = BeautifulSoup(htmlTable,'html.parser')
|
216 |
+
header = soup.find_all("table")[0].find("tr")
|
217 |
+
for items in header:
|
218 |
+
try:
|
219 |
+
list_header.append(items.get_text())
|
220 |
+
except:
|
221 |
+
continue
|
222 |
+
HTML_data = soup.find_all("table")[0].find_all("tr")[1:]
|
223 |
+
for element in HTML_data:
|
224 |
+
sub_data = []
|
225 |
+
for sub_element in element:
|
226 |
+
try:
|
227 |
+
sub_data.append(sub_element.get_text())
|
228 |
+
except:
|
229 |
+
continue
|
230 |
+
data.append(sub_data)
|
231 |
+
dataFrame = pd.DataFrame(data = data, columns = list_header)
|
232 |
+
return dataFrame
|
233 |
|
234 |
def listToString(list):
|
235 |
return str(list)
|
|
|
295 |
author="Datapcc : 🌐🌐🌐", content="[Lien] 🔗", elements=elements
|
296 |
).send()
|
297 |
await action.remove()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
298 |
|
299 |
@cl.action_callback("saveMemory")
|
300 |
async def on_action(action):
|
|
|
395 |
@literal_client.step(type="run")
|
396 |
async def construction_NCS(competenceList):
|
397 |
context = await contexte(competenceList)
|
398 |
+
emploisST = context.to_string(index = False)
|
399 |
+
cl.user_session.set("contextChatBot", emploisST)
|
400 |
+
romeListArray = cl.user_session.get("codeRomeArray")
|
401 |
+
listEmplois = context.values.tolist()
|
402 |
+
stringEmplois = ''
|
403 |
+
for i in range(0,len(listEmplois)):
|
404 |
+
stringEmplois += "\n✔️ Emploi : " + listEmplois[i][0] + "; Contrat : " + listEmplois[i][1] + "; Compétences professionnelles : " + arrayToString(listEmplois[i][3]) + "; " + "Salaire : " + listToString(listEmplois[i][5]) + "; Qualification : " + listEmplois[i][4] + "; Localisation : " + listEmplois[i][6] + "; Expérience : " + listEmplois[i][2] + "; Niveau de qualification : " + listToString(listEmplois[i][7]) + "; Description de l'emploi : " + listToString(listEmplois[i][8])) + "\n"
|
405 |
+
await cl.sleep(1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
406 |
listEmplois_name = f"Liste des emplois"
|
407 |
text_elements = []
|
408 |
text_elements.append(
|
409 |
+
cl.Text(content="Question : " + competenceList + "\n\nRéponse :\n" + stringEmplois, name=listEmplois_name)
|
410 |
)
|
411 |
+
await cl.Message(author="Datapcc : 🌐🌐🌐",content="👨💼 Source France Travail : " + listEmplois_name, elements=text_elements).send()
|
412 |
+
await cl.sleep(1)
|
413 |
+
if romeListArray.find(',') != -1:
|
414 |
+
codeArray = romeListArray
|
|
|
|
|
|
|
|
|
|
|
415 |
ficheMetiers = []
|
416 |
for i in range(0,len(codeArray)):
|
417 |
ficheMetiers = [
|
|
|
422 |
).send()
|
423 |
else:
|
424 |
ficheMetiers = [
|
425 |
+
cl.File(name= "Fiche métier " + romeListArray,url="https://www.soi-tc.fr/assets/fiches_pe/FEM_" + romeListArray + ".pdf",display="inline",)
|
426 |
]
|
427 |
await cl.Message(
|
428 |
author="Datapcc : 🌐🌐🌐", content="[Fiches métiers] 🔗", elements=ficheMetiers
|
429 |
).send()
|
430 |
|
431 |
+
await cl.sleep(1)
|
432 |
+
ficheClesMetier = await document_chiffres_cles_emplois("https://dataemploi.francetravail.fr/metier/chiffres-cles/NAT/FR/", romeListArray)
|
433 |
+
listClesMetier_name = f"Chiffres clés des emplois"
|
434 |
+
text_ClesMetier = []
|
435 |
+
text_ClesMetier.append(
|
436 |
+
cl.Text(content="Question : " + competenceList + "\n\nRéponse :\n" + ficheClesMetier, name=listClesMetier_name)
|
437 |
)
|
438 |
+
await cl.Message(author="Datapcc : 🌐🌐🌐",content="📊 Source France Travail : " + listClesMetier_name, elements=text_ClesMetier).send()
|
439 |
+
|
440 |
+
return "datavisualisation des statistiques de l'emploi"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
441 |
|
442 |
@cl.step(type="run")
|
443 |
async def recuperation_contexte(getNote):
|
444 |
getContext = cl.user_session.get(getNote)
|
445 |
return getNote + " :\n" + getContext
|
446 |
@cl.step(type="retrieval")
|
447 |
+
async def contexte(competence):
|
448 |
+
results = await creation_liste_code_Rome(competence)
|
449 |
+
await cl.sleep(1)
|
450 |
+
romeListArray = cl.user_session.get("codeRomeArray")
|
451 |
+
df_emplois = await API_France_Travail(romeListArray)
|
452 |
+
await cl.sleep(1)
|
453 |
+
await datavisualisation_statistiques_emplois(df_emplois)
|
454 |
+
await cl.sleep(1)
|
455 |
+
for j in range(0, len(romeListArray)):
|
456 |
+
table = await datavisualisation_chiffres_cles_emplois("https://dataemploi.pole-emploi.fr/metier/chiffres-cles/NAT/FR/" + romeListArray[j])
|
457 |
+
plot_demandeur = plotDemandeur(htmlToDataframe(table[0]), codeRome_list[j])
|
458 |
+
plot_salaire = plotSalaire(htmlToDataframe(table[1]))
|
459 |
+
plot_difficulte = plotDifficulte(htmlToDataframe(table[2]))
|
460 |
+
plot_repartitionContrat = plotRepartition(htmlToDataframe(table[3]), "Répartition des embauches du métier : type de contrat")
|
461 |
+
plot_repartitionEntreprise = plotRepartition(htmlToDataframe(table[4]), "Répartition des embauches du métier : type entreprise")
|
462 |
+
|
463 |
+
return df_emplois
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
464 |
|
465 |
@cl.step(type="tool")
|
466 |
+
async def document_chiffres_cles_emplois(url, codes):
|
467 |
if codes.find(',') != -1:
|
468 |
all = ""
|
469 |
codeArray = codes.split(',')
|
|
|
558 |
return all
|
559 |
|
560 |
@cl.step(type="tool")
|
561 |
+
async def datavisualisation_chiffres_cles_emplois(url):
|
562 |
+
response = requests.get(url)
|
563 |
+
soup = BeautifulSoup(response.text, "lxml")
|
564 |
+
|
565 |
+
allembauches = soup.select('p.population_category')
|
566 |
+
allnumembauchesfirst = soup.select('p.population_main-num.data')
|
567 |
+
allnumembauches = removeTags(allnumembauchesfirst[0]).split('\xa0')
|
568 |
+
allnumembauches = ''.join(allnumembauches)
|
569 |
+
allnumoffres = removeTags(allnumembauchesfirst[1]).split('\xa0')
|
570 |
+
allnumoffres = ''.join(allnumoffres)
|
571 |
+
alldetailembauches = soup.select('p.hiring_text.ng-star-inserted')
|
572 |
+
allnumevolutionembauches = soup.select('p.main.ng-star-inserted')
|
573 |
+
alldetailevolutionembauches = soup.select('p.population_bubble-title')
|
574 |
+
alldemandeurs = "<table><tr><td>Indicateur</td><td>Valeur</td></tr><tr><td>" + removeTags(allembauches[0]) + " (" + removeTags(alldetailembauches[0]) + ");\nÉvolution demandeurs d'emploi (" + removeTags(alldetailevolutionembauches[0]) + ": " + removeTags(allnumevolutionembauches[0]) + ")</td><td>" + allnumembauches + "</td></tr>"
|
575 |
+
alldemandeurs += "<tr><td>" + removeTags(allembauches[1]) + " (" + removeTags(alldetailembauches[1]) + "); Évolution offres d'emploi (" + removeTags(alldetailevolutionembauches[1]) + ": " + removeTags(allnumevolutionembauches[1]) + ")</td><td>" + allnumoffres + "</td></tr>"
|
576 |
+
alldemandeurs += "</table>"
|
577 |
+
|
578 |
+
allFAP = soup.select('tr.sectorTable__line.ng-star-inserted')
|
579 |
+
allcategorie = soup.select('td.sectorTable__cell')
|
580 |
+
alltypesalaires = soup.select('th.sectorTable__cell')
|
581 |
+
allFAPsalaires = soup.select('p.sectorTable__cellValue')
|
582 |
+
allsalaires = "<table><tr><td>categorie</td><td>emploi</td><td>salaire</td></tr>"
|
583 |
+
allsalaires += "<tr><td>" + removeTags(alltypesalaires[1]) + "</td><td>" + removeTags(allcategorie[0]) + "</td><td>" + removeTags(allFAPsalaires[0]).replace('\xa0','').replace(' ','').replace('€','') + "</td></tr>"
|
584 |
+
allsalaires += "<tr><td>" + removeTags(alltypesalaires[2]) + "</td><td>" + removeTags(allcategorie[0]) + "</td><td>" + removeTags(allFAPsalaires[1]).replace('\xa0','').replace(' ','').replace('€','') + "</td></tr>"
|
585 |
+
allsalaires += "<tr><td>" + removeTags(alltypesalaires[3]) + "</td><td>" + removeTags(allcategorie[0]) + "</td><td>" + removeTags(allFAPsalaires[2]).replace('\xa0','').replace(' ','').replace('€','') + "</td></tr>"
|
586 |
+
if len(allFAP) >= 2:
|
587 |
+
allsalaires += "<tr><td>" + removeTags(alltypesalaires[1]) + "</td><td>" + removeTags(allcategorie[4]) + "</td><td>" + removeTags(allFAPsalaires[3]).replace('\xa0','').replace(' ','').replace('€','') + "</td></tr>"
|
588 |
+
allsalaires += "<tr><td>" + removeTags(alltypesalaires[2]) + "</td><td>" + removeTags(allcategorie[4]) + "</td><td>" + removeTags(allFAPsalaires[4]).replace('\xa0','').replace(' ','').replace('€','') + "</td></tr>"
|
589 |
+
allsalaires += "<tr><td>" + removeTags(alltypesalaires[3]) + "</td><td>" + removeTags(allcategorie[4]) + "</td><td>" + removeTags(allFAPsalaires[5]).replace('\xa0','').replace(' ','').replace('€','') + "</td></tr>"
|
590 |
+
allsalaires += "</table>"
|
591 |
+
|
592 |
+
alltypedifficultes = soup.select('.tabs-main-content_persp-col2-bar.ng-star-inserted')
|
593 |
+
alldifficulte = soup.select('p.horizontal-graph_title')
|
594 |
+
allpcdifficulte = soup.select('div.horizontal-graph_data')
|
595 |
+
alldifficultes = "<table><tr><td>Indicateur</td><td>Valeur</td></tr>"
|
596 |
+
for i in range(0,len(alltypedifficultes)):
|
597 |
+
alldifficultes += "<tr><td>" + removeTags(alldifficulte[i]) + "</td><td>" + removeTags(allpcdifficulte[i]).replace('Pour le territoire principal FRANCE pour les ' + removeTags(alldifficulte[i]),'').replace('%','') + "</td></tr>"
|
598 |
+
alldifficultes += "</table>"
|
599 |
+
|
600 |
+
alltyperepartitions = soup.select('div.hiring-contract_legende_item.ng-star-inserted')
|
601 |
+
allrepartition = soup.select('p.hiring-contract_legende_item_label')
|
602 |
+
allpcrepartition = soup.select('span.hiring-contract_legende_item-first')
|
603 |
+
allrepartitions = "<table><tr><td>Indicateur</td><td>Valeur</td></tr>"
|
604 |
+
for i in range(0,len(alltyperepartitions)):
|
605 |
+
allrepartitions += "<tr><td>" + removeTags(allrepartition[i]).replace('(' + removeTags(allpcrepartition[i]) + ')','') + "</td><td>" + removeTags(allpcrepartition[i]).replace('%','').replace(',','.') + "</td></tr>"
|
606 |
+
allrepartitions += "</table>"
|
607 |
+
|
608 |
+
allentrepriserepartitions = soup.select('div.horizontal-graph_pattern.sm-bubble_wrapper > span')
|
609 |
+
allentreprise = soup.select('span.sr-only')
|
610 |
+
allpcentreprise = soup.select('span.data.ng-star-inserted')
|
611 |
+
allentreprises = "<table><tr><td>Indicateur</td><td>Valeur</td></tr>"
|
612 |
+
for i in range(0,len(allentrepriserepartitions)):
|
613 |
+
allentreprises += "<tr><td>" + removeTags(allentrepriserepartitions[i])[0:-4] + "</td><td>" + removeTags(allentrepriserepartitions[i])[-4:].replace('%','').replace(',','.') + "</td></tr>"
|
614 |
+
allentreprises += "</table>"
|
615 |
+
|
616 |
+
return [alldemandeurs, allsalaires, alldifficultes, allrepartitions, allentreprises]
|
617 |
+
|
618 |
+
@cl.step(type="tool")
|
619 |
+
async def datavisualisation_statistiques_emplois(results_df):
|
620 |
+
arraydataframe = []
|
621 |
+
arrayfirstdataframe = []
|
622 |
+
arraylocalisationdataframe = []
|
623 |
+
results = []
|
624 |
+
count = 0
|
625 |
+
if results_df.empty == False:
|
626 |
+
count = count + 1
|
627 |
+
finals = results_df[['intitule','typeContratLibelle','experienceLibelle','competences','qualitesProfessionnelles','salaire','lieuTravail','formations']].copy()
|
628 |
+
finals["lieuTravail"] = finals["lieuTravail"].apply(lambda x: x['libelle']).apply(lambda x: x[0:3]).apply(lambda x: x.strip())
|
629 |
+
finals_df = finals
|
630 |
+
finals_df.dropna(subset=['qualitesProfessionnelles','formations','competences'], inplace=True)
|
631 |
+
finals_df["competences"] = finals_df["competences"].apply(lambda x:[str(e['libelle']) for e in x]).apply(lambda x:'; '.join(map(str, x)))
|
632 |
+
finals_df["qualitesProfessionnelles"] = finals_df["qualitesProfessionnelles"].apply(lambda x:[str(e['libelle']) + ": " + str(e['description']) for e in x]).apply(lambda x:'; '.join(map(str, x)))
|
633 |
+
finals_df["formations"] = finals_df["formations"].apply(lambda x:[str(e['niveauLibelle']) for e in x]).apply(lambda x:'; '.join(map(str, x)))
|
634 |
+
finals_df = finals_df.sort_values(by=['lieuTravail'])
|
635 |
+
finals_localisation = results_df[['lieuTravail']].copy()
|
636 |
+
finals_localisation["lieuTravail"] = finals_localisation["lieuTravail"].apply(lambda x: np.array(x)).apply(lambda x: x['libelle']).apply(lambda x: x[0:3]).apply(lambda x: x.strip())
|
637 |
+
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'Fra'].index, inplace = True)
|
638 |
+
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'FRA'].index, inplace = True)
|
639 |
+
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'Ile'].index, inplace = True)
|
640 |
+
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'Mar'].index, inplace = True)
|
641 |
+
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'Bou'].index, inplace = True)
|
642 |
+
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == '976'].index, inplace = True)
|
643 |
+
arraylocalisationdataframe.append(finals_localisation)
|
644 |
+
arrayfirstdataframe.append(results_df)
|
645 |
+
if len(finals_df) != 0:
|
646 |
+
arraydataframe.append(finals_df)
|
647 |
+
first_df = pd.concat(arrayfirstdataframe)
|
648 |
+
finals_df = pd.concat(arraydataframe)
|
649 |
+
localisation_df = pd.concat(arraylocalisationdataframe)
|
650 |
+
|
651 |
+
######## Emplois ########
|
652 |
+
df_intitule = first_df.groupby('intitule').size().reset_index(name='obs')
|
653 |
+
df_intitule = df_intitule.sort_values(by=['obs'])
|
654 |
+
df_intitule = df_intitule.iloc[-25:]
|
655 |
+
fig_intitule = px.bar(df_intitule, x='obs', y='intitule', orientation='h', color='obs', title="Les principaux emplois", labels={'obs':'nombre'}, color_continuous_scale="Teal", text_auto=True).update_layout(font=dict(size=10,color="RebeccaPurple"),autosize=True).update_traces(hovertemplate=df_intitule["intitule"] + ' <br>Nombre : %{x}', y=[y[:100] + "..." for y in df_intitule["intitule"]], showlegend=False)
|
656 |
+
|
657 |
+
######## Types de contrat ########
|
658 |
+
df_contrat = first_df.groupby('typeContratLibelle').size().reset_index(name='obs')
|
659 |
+
fig_contrat = px.pie(df_contrat, names='typeContratLibelle', values='obs', color='obs', title="Les types de contrat", labels={'obs':'nombre'}, color_discrete_sequence=px.colors.qualitative.Safe).update_traces(textposition='inside', textinfo='percent+label').update_layout(font=dict(size=10,color="RebeccaPurple"))
|
660 |
+
|
661 |
+
df_secteur = first_df.groupby('secteurActiviteLibelle').size().reset_index(name='obs')
|
662 |
+
df_secteur = df_secteur.sort_values(by=['obs'])
|
663 |
+
df_secteur = df_secteur.iloc[-25:]
|
664 |
+
fig_secteur = px.bar(df_secteur, x='obs', y='secteurActiviteLibelle', orientation='h', color='obs', title="Les principaux secteurs d'activités", labels={'obs':'nombre'}, color_continuous_scale="Teal", text_auto=True).update_layout(font=dict(size=10,color="RebeccaPurple"),autosize=True).update_traces(hovertemplate=df_secteur["secteurActiviteLibelle"] + ' <br>Nombre : %{x}', y=[y[:100] + "..." for y in df_secteur["secteurActiviteLibelle"]], showlegend=False)
|
665 |
+
|
666 |
+
######## Compétences professionnelles ########
|
667 |
+
df1 = finals_df
|
668 |
+
df1['competences'] = finals_df['competences'].str.split(';')
|
669 |
+
df2 = df1.explode('competences')
|
670 |
+
df2 = df2.groupby('competences').size().reset_index(name='obs')
|
671 |
+
df2 = df2.sort_values(by=['obs'])
|
672 |
+
df2 = df2.iloc[-20:]
|
673 |
+
fig_competences = px.bar(df2, x='obs', y='competences', orientation='h', color='obs', title="Les principales compétences professionnelles", labels={'obs':'nombre'}, color_continuous_scale="Teal", text_auto=True).update_layout(font=dict(size=10,color="RebeccaPurple"),autosize=True).update_traces(hovertemplate=df2["competences"] + ' <br>Nombre : %{x}', y=[y[:100] + "..." for y in df2['competences']], showlegend=False)
|
674 |
+
|
675 |
+
######## Compétences transversales ########
|
676 |
+
df_transversales = finals_df
|
677 |
+
df_transversales['qualitesProfessionnelles'] = finals_df['qualitesProfessionnelles'].str.split(';')
|
678 |
+
df_comptransversales = df_transversales.explode('qualitesProfessionnelles')
|
679 |
+
df_comptransversales = df_comptransversales.groupby('qualitesProfessionnelles').size().reset_index(name='obs')
|
680 |
+
df_comptransversales = df_comptransversales.sort_values(by=['obs'])
|
681 |
+
df_comptransversales = df_comptransversales.iloc[-20:]
|
682 |
+
fig_transversales = px.bar(df_comptransversales, x='obs', y='qualitesProfessionnelles', orientation='h', color='obs', title="Les principales compétences transversales", labels={'obs':'nombre'}, color_continuous_scale="Teal", text_auto=True).update_layout(font=dict(size=10,color="RebeccaPurple"),autosize=True).update_traces(hovertemplate=df_comptransversales["qualitesProfessionnelles"] + ' <br>Nombre : %{x}', y=[y[:100] + "..." for y in df_comptransversales["qualitesProfessionnelles"]], showlegend=False)
|
683 |
+
|
684 |
+
######## Niveaux de qualification ########
|
685 |
+
df_formations = finals_df.groupby('formations').size().reset_index(name='obs')
|
686 |
+
fig_formations = px.pie(df_formations, names='formations', values='obs', color='obs', title="Les niveaux de qualification", labels={'obs':'nombre'}, color_discrete_sequence=px.colors.qualitative.Safe).update_traces(textposition='inside', textinfo='percent+label').update_layout(font=dict(size=10,color="RebeccaPurple"))
|
687 |
+
|
688 |
+
######## Expériences professionnelles ########
|
689 |
+
df_experience = finals_df.groupby('experienceLibelle').size().reset_index(name='obs')
|
690 |
+
fig_experience = px.pie(df_experience, names='experienceLibelle', values='obs', color='obs', title="Les expériences professionnelles", labels={'obs':'nombre'}, color_discrete_sequence=px.colors.qualitative.Safe).update_traces(textposition='inside', textinfo='percent+label').update_layout(font=dict(size=10,color="RebeccaPurple"))
|
691 |
+
|
692 |
+
res = requests.get(
|
693 |
+
"https://raw.githubusercontent.com/codeforgermany/click_that_hood/main/public/data/spain-provinces.geojson"
|
694 |
+
)
|
695 |
+
######## localisation ########
|
696 |
+
ListCentroids = localisation()
|
697 |
+
df_localisation = localisation_df.groupby('lieuTravail').size().reset_index(name='obs')
|
698 |
+
df_localisation = df_localisation.sort_values(by=['lieuTravail'])
|
699 |
+
df_localisation['longitude'] = df_localisation['lieuTravail']
|
700 |
+
df_localisation['latitude'] = df_localisation['lieuTravail']
|
701 |
+
|
702 |
+
|
703 |
+
df_localisation["longitude"] = df_localisation['longitude'].apply(lambda x:[loc['Longitude'] for loc in ListCentroids if loc['ID'] == x]).apply(lambda x:''.join(map(str, x)))
|
704 |
+
df_localisation["longitude"] = pd.to_numeric(df_localisation["longitude"], downcast="float")
|
705 |
+
df_localisation["latitude"] = df_localisation['latitude'].apply(lambda x:[loc['Latitude'] for loc in ListCentroids if loc['ID'] == x]).apply(lambda x:''.join(map(str, x)))
|
706 |
+
df_localisation["latitude"] = pd.to_numeric(df_localisation["latitude"], downcast="float")
|
707 |
+
|
708 |
+
fig_localisation = px.scatter_mapbox(df_localisation, lat="latitude", lon="longitude", hover_name="lieuTravail", size="obs").update_layout(
|
709 |
+
mapbox={
|
710 |
+
"style": "carto-positron",
|
711 |
+
"center": {"lon": 2, "lat" : 47},
|
712 |
+
"zoom": 4.5,
|
713 |
+
"layers": [
|
714 |
+
{
|
715 |
+
"source": res.json(),
|
716 |
+
"type": "line",
|
717 |
+
"color": "green",
|
718 |
+
"line": {"width": 0},
|
719 |
+
}
|
720 |
+
],
|
721 |
+
}
|
722 |
+
)
|
723 |
+
|
724 |
+
elements = []
|
725 |
+
elements.append(cl.Plotly(name="chart_intitule", figure=fig_intitule, display="inline", size="large"))
|
726 |
+
elements.append(cl.Plotly(name="chart_contrat", figure=fig_contrat, display="inline", size="large"))
|
727 |
+
elements.append(cl.Plotly(name="chart_competences", figure=fig_competences, display="inline", size="large"))
|
728 |
+
elements.append(cl.Plotly(name="chart_transversales", figure=fig_transversales, display="inline", size="large"))
|
729 |
+
elements.append(cl.Plotly(name="chart_formations", figure=fig_formations, display="inline", size="large"))
|
730 |
+
elements.append(cl.Plotly(name="chart_experience", figure=fig_experience, display="inline", size="large"))
|
731 |
+
elements.append(cl.Plotly(name="chart_secteur", figure=fig_secteur, display="inline", size="large"))
|
732 |
+
elements.append(cl.Plotly(name="chart_localisation", figure=fig_localisation, display="inline", size="large"))
|
733 |
+
|
734 |
+
await cl.Message(content="Datavisualisation du marché de l'emploi", elements=elements).send()
|
735 |
+
|
736 |
+
@cl.step(type="tool")
|
737 |
+
async def API_France_Travail(romeListArray):
|
738 |
+
client = await connexion_France_Travail()
|
739 |
+
todayDate = datetime.datetime.today()
|
740 |
+
month, year = (todayDate.month-1, todayDate.year) if todayDate.month != 1 else (12, todayDate.year-1)
|
741 |
+
start_dt = todayDate.replace(day=1, month=month, year=year)
|
742 |
+
end_dt = datetime.datetime.today()
|
743 |
+
results = []
|
744 |
+
for k in romeListArray:
|
745 |
+
params = {"motsCles": k,'minCreationDate': dt_to_str_iso(start_dt),'maxCreationDate': dt_to_str_iso(end_dt),'range':'0-149'}
|
746 |
+
search_on_big_data = client.search(params=params)
|
747 |
+
results += search_on_big_data["resultats"]
|
748 |
+
results_df = pd.DataFrame(results)
|
749 |
+
return results_df
|
750 |
+
|
751 |
+
@cl.step(type="tool")
|
752 |
+
async def creation_liste_code_Rome(competence):
|
753 |
os.environ['PINECONE_API_KEYROME'] = os.environ['PINECONE_API_KEYROME']
|
754 |
docsearch = await connexion_catalogue_Rome()
|
755 |
retrieve_comp = docsearch.similarity_search(competence, k=30, filter={"categorie": {"$eq": os.environ['PINECONE_API_KEYROME']}})
|
|
|
773 |
|
774 |
results_df = results_df.drop_duplicates(subset=["codeRome"])
|
775 |
results_df = results_df.head(5)
|
776 |
+
codeRomeString = ' | ' + results_df["codeRome"].to_string(index = False) + ' |\n| -------- |'
|
777 |
codeRome_list = results_df["codeRome"].tolist()
|
778 |
actionRome = await cl.AskActionMessage(
|
779 |
content="Etes-vous d'accord avec la sélection des 5 codes Rome automatiques issus de la recherche sémantique ? :" + codeRomeString,
|
|
|
786 |
await cl.Message(
|
787 |
content="Connexion à France Travail, et récupération des offres d'emploi",
|
788 |
).send()
|
|
|
789 |
cl.user_session.set("codeRomeArray", codeRome_list)
|
790 |
else:
|
791 |
actionsaisierome = await cl.AskUserMessage(content="Saisissez vos codes Rome dans le prompt? ⚠️ Attention, indiquez seulement des codes Rome séparés par des virgules", timeout=3600).send()
|
|
|
798 |
teststringCodeRome = [ele for ele in stopWords if(ele in stringCodeRome)]
|
799 |
teststringCodeRome = bool(teststringCodeRome)
|
800 |
if teststringCodeRome == False:
|
801 |
+
arrayCodeRome = stringCodeRome.split(',')
|
802 |
else:
|
803 |
arrayCodeRome = codeRome_list
|
804 |
await cl.Message(author="Datapcc : 🌐🌐🌐",content="Votre ssaisie est erronée. Nous continuons l'action avec les codes Rome sélectionnés automatiquement pour vous : " + codeRome_list).send()
|
|
|
805 |
cl.user_session.set("codeRomeArray", arrayCodeRome)
|
806 |
+
|
807 |
+
@cl.step(type="tool")
|
808 |
+
async def connexion_France_Travail():
|
809 |
+
client = Api(client_id=os.environ['POLE_EMPLOI_CLIENT_ID'],
|
810 |
+
client_secret=os.environ['POLE_EMPLOI_CLIENT_SECRET'])
|
811 |
+
return client
|
812 |
|
813 |
@cl.step(type="tool")
|
814 |
async def connexion_catalogue_Rome():
|
|
|
818 |
docsearch = PineconeVectorStore.from_existing_index(os.environ['PINECONE_INDEX_NAME'], embeddings)
|
819 |
return docsearch
|
820 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
821 |
@cl.step(type="llm")
|
822 |
async def IA():
|
823 |
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.environ['HUGGINGFACEHUB_API_TOKEN']
|