kkurihara-cs commited on
Commit
000bdc7
·
verified ·
1 Parent(s): f91473a

Create script.py

Browse files
Files changed (1) hide show
  1. script.py +22 -0
script.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import numpy as np
3
+ import pandas as pd
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+
6
+ test_df = pd.read_csv("/tmp/data/test.csv")
7
+
8
+ with open("model.pkl", "rb") as f:
9
+ model = pickle.load(f)
10
+
11
+ scores = []
12
+ for _, row in test_df.iterrows():
13
+ X_query = model["tokenizer"].transform([row["Query"]])
14
+ is_cand = sum([(model["faq_ids"] == row[f"FAQ{i+1}"]).astype(int) for i in range(3)]) > 0
15
+ sim = cosine_similarity(X_query, model["X_faq"][is_cand])[0]
16
+ score = sim.max()
17
+ scores.append(score)
18
+
19
+ predict = (np.array(scores) > model["thr"]).astype(int)
20
+
21
+ df = pd.DataFrame([(f"testid{i:04}", v) for i, v in enumerate(predict)], columns=["id", "pred"])
22
+ df.to_csv("submission.csv", index=None)