fadliaulawi commited on
Commit
7af6232
1 Parent(s): 7d77e87

Refactor method for manual rsID

Browse files
Files changed (2) hide show
  1. app.py +3 -2
  2. process.py +3 -3
app.py CHANGED
@@ -106,9 +106,10 @@ if uploaded_files and submit:
106
  with ThreadPoolExecutor() as executor:
107
  result_text = executor.submit(process.get_entity, (chunks, 'alls')).result()
108
  result_table = executor.submit(process.get_table, pdf.name).result()
109
-
 
110
  # Manually search for rsID
111
- result_text = process.get_rsid(result_text, passage)
112
 
113
  # Combine two results
114
  result_text['Source'] = 'Text'
 
106
  with ThreadPoolExecutor() as executor:
107
  result_text = executor.submit(process.get_entity, (chunks, 'alls')).result()
108
  result_table = executor.submit(process.get_table, pdf.name).result()
109
+ result_rsid = executor.submit(process.get_rsid, passage).result()
110
+
111
  # Manually search for rsID
112
+ result_text = pd.concat([result_text, result_rsid]).fillna('').reset_index(drop=True)
113
 
114
  # Combine two results
115
  result_text['Source'] = 'Text'
process.py CHANGED
@@ -158,10 +158,10 @@ class Process():
158
 
159
  return df_result
160
 
161
- def get_rsid(self, df, text):
162
 
163
  rsids = re.findall('(rs[\d]{3,})', text)
 
164
  df_rsid = pd.DataFrame(rsids, columns=['rsID'])
165
- df = pd.concat([df, df_rsid]).fillna('').reset_index(drop=True)
166
 
167
- return df
 
158
 
159
  return df_result
160
 
161
+ def get_rsid(self, text):
162
 
163
  rsids = re.findall('(rs[\d]{3,})', text)
164
+ rsids = list(set(rsids))
165
  df_rsid = pd.DataFrame(rsids, columns=['rsID'])
 
166
 
167
+ return df_rsid