fadliaulawi
commited on
Commit
•
7af6232
1
Parent(s):
7d77e87
Refactor method for manual rsID
Browse files- app.py +3 -2
- process.py +3 -3
app.py
CHANGED
@@ -106,9 +106,10 @@ if uploaded_files and submit:
|
|
106 |
with ThreadPoolExecutor() as executor:
|
107 |
result_text = executor.submit(process.get_entity, (chunks, 'alls')).result()
|
108 |
result_table = executor.submit(process.get_table, pdf.name).result()
|
109 |
-
|
|
|
110 |
# Manually search for rsID
|
111 |
-
result_text =
|
112 |
|
113 |
# Combine two results
|
114 |
result_text['Source'] = 'Text'
|
|
|
106 |
with ThreadPoolExecutor() as executor:
|
107 |
result_text = executor.submit(process.get_entity, (chunks, 'alls')).result()
|
108 |
result_table = executor.submit(process.get_table, pdf.name).result()
|
109 |
+
result_rsid = executor.submit(process.get_rsid, passage).result()
|
110 |
+
|
111 |
# Manually search for rsID
|
112 |
+
result_text = pd.concat([result_text, result_rsid]).fillna('').reset_index(drop=True)
|
113 |
|
114 |
# Combine two results
|
115 |
result_text['Source'] = 'Text'
|
process.py
CHANGED
@@ -158,10 +158,10 @@ class Process():
|
|
158 |
|
159 |
return df_result
|
160 |
|
161 |
-
def get_rsid(self,
|
162 |
|
163 |
rsids = re.findall('(rs[\d]{3,})', text)
|
|
|
164 |
df_rsid = pd.DataFrame(rsids, columns=['rsID'])
|
165 |
-
df = pd.concat([df, df_rsid]).fillna('').reset_index(drop=True)
|
166 |
|
167 |
-
return
|
|
|
158 |
|
159 |
return df_result
|
160 |
|
161 |
+
def get_rsid(self, text):
|
162 |
|
163 |
rsids = re.findall('(rs[\d]{3,})', text)
|
164 |
+
rsids = list(set(rsids))
|
165 |
df_rsid = pd.DataFrame(rsids, columns=['rsID'])
|
|
|
166 |
|
167 |
+
return df_rsid
|