vietdata commited on
Commit
8144c2e
1 Parent(s): b001ab7

first update

Browse files
Files changed (3) hide show
  1. app.py +1 -0
  2. requirements.txt +3 -0
  3. utils.py +17 -0
app.py CHANGED
@@ -6,6 +6,7 @@ import requests
6
  import os
7
  from langdetect import detect
8
  import pandas as pd
 
9
 
10
  # Load the source dataset
11
  source_dataset = load_dataset("vietdata/eng_echo", split="train")
 
6
  import os
7
  from langdetect import detect
8
  import pandas as pd
9
+ from utils import *
10
 
11
  # Load the source dataset
12
  source_dataset = load_dataset("vietdata/eng_echo", split="train")
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ deep-translator
2
+ langdetect
3
+ thefuzz
utils.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from thefuzz import fuzz
2
+ from deep_translator import GoogleTranslator
3
+ import time
4
+
5
+ def gg_score(original, translation, target="en"):
6
+ while True:
7
+ try:
8
+ back_original = GoogleTranslator(source='auto', target=target).translate(translation)
9
+ gg_translation = GoogleTranslator(source='auto', target=target).translate(original)
10
+ break
11
+ except:
12
+ time.sleep(1)
13
+ continue
14
+ return (fuzz.token_set_ratio(original, back_original) + fuzz.token_set_ratio(gg_translation, translation))/200
15
+
16
+ def miner_score(translation, grouth):
17
+ return fuzz.token_set_ratio(translation, grouth)/100