Ammar-Abdelhady-ai commited on
Commit
c8477d8
1 Parent(s): e00720e

Your commit message here

Browse files
Files changed (1) hide show
  1. main.py +7 -8
main.py CHANGED
@@ -2,7 +2,6 @@ import threading
2
  from functions import extract_text_from_pdf, get_most_similar_job
3
  from fastapi import UploadFile, HTTPException, FastAPI
4
  import pandas as pd
5
- from fastapi import UploadFile, HTTPException
6
  from sklearn.feature_extraction.text import TfidfVectorizer
7
 
8
 
@@ -38,14 +37,13 @@ def fit_threads(text):
38
 
39
 
40
  df = pd.read_csv("all.csv")
41
- df['concatenated_column'] = pd.concat([df['job_title'] + df['job_description'] + df['job_requirements'], df['city_name']], axis=1).astype(str).agg(''.join, axis=1)
42
- x = df['concatenated_column']
43
  y = df["label"]
44
  vectorizer = TfidfVectorizer(stop_words='english')
45
-
46
  vectorizer.fit(x)
47
  df_vect = vectorizer.transform(x)
48
- print(df.shape, len(df))
49
  # Initialize the summarizer model
50
 
51
 
@@ -58,14 +56,13 @@ def summarization(text):
58
  part = summarizer(text, max_length=150, min_length=30, do_sample=False)
59
  summ_data.append(part[0]["summary_text"].replace("\xa0", ""))
60
 
61
-
62
  app = FastAPI(project_name="cv")
63
 
64
  @app.get("/")
65
  async def read_root():
66
  return {"Hello": "World, Project name is : CV Description"}
67
 
68
-
69
  @app.post("/prediction")
70
  async def detect(cv: UploadFile, number_of_jobs: int):
71
 
@@ -79,6 +76,8 @@ async def detect(cv: UploadFile, number_of_jobs: int):
79
  status_code=415, detail="Please inter PDF file "
80
  )
81
 
 
 
82
  cv_data = extract_text_from_pdf(await cv.read())
83
  index = len(cv_data)//3
84
  text = [cv_data[:index], cv_data[index:2*index], cv_data[2*index:]]
@@ -89,7 +88,7 @@ async def detect(cv: UploadFile, number_of_jobs: int):
89
  cv_vect = vectorizer.transform([data])
90
  indices = get_most_similar_job(data=data, cv_vect=cv_vect, df_vect=df_vect)
91
  # Check if all threads have finished
92
- print("ALL Done")
93
 
94
  prediction_data = df.iloc[indices[:number_of_jobs]].applymap(lambda x: str(x)).to_dict(orient='records')
95
 
 
2
  from functions import extract_text_from_pdf, get_most_similar_job
3
  from fastapi import UploadFile, HTTPException, FastAPI
4
  import pandas as pd
 
5
  from sklearn.feature_extraction.text import TfidfVectorizer
6
 
7
 
 
37
 
38
 
39
  df = pd.read_csv("all.csv")
40
+ concatenated_column = pd.concat([df['job_title'] + df['job_description'] + df['job_requirements'], df['city_name']], axis=1).astype(str).agg(''.join, axis=1)
41
+ x = concatenated_column
42
  y = df["label"]
43
  vectorizer = TfidfVectorizer(stop_words='english')
44
+ print("df done")
45
  vectorizer.fit(x)
46
  df_vect = vectorizer.transform(x)
 
47
  # Initialize the summarizer model
48
 
49
 
 
56
  part = summarizer(text, max_length=150, min_length=30, do_sample=False)
57
  summ_data.append(part[0]["summary_text"].replace("\xa0", ""))
58
 
59
+ print("start api code")
60
  app = FastAPI(project_name="cv")
61
 
62
  @app.get("/")
63
  async def read_root():
64
  return {"Hello": "World, Project name is : CV Description"}
65
 
 
66
  @app.post("/prediction")
67
  async def detect(cv: UploadFile, number_of_jobs: int):
68
 
 
76
  status_code=415, detail="Please inter PDF file "
77
  )
78
 
79
+
80
+
81
  cv_data = extract_text_from_pdf(await cv.read())
82
  index = len(cv_data)//3
83
  text = [cv_data[:index], cv_data[index:2*index], cv_data[2*index:]]
 
88
  cv_vect = vectorizer.transform([data])
89
  indices = get_most_similar_job(data=data, cv_vect=cv_vect, df_vect=df_vect)
90
  # Check if all threads have finished
91
+ print("ALL Done \n\n")
92
 
93
  prediction_data = df.iloc[indices[:number_of_jobs]].applymap(lambda x: str(x)).to_dict(orient='records')
94