domenicrosati commited on
Commit
4b582d1
β€’
1 Parent(s): 1137a5a

abstract snip

Browse files
Files changed (1) hide show
  1. app.py +16 -2
app.py CHANGED
@@ -107,11 +107,25 @@ def find_source(text, docs, matched):
107
  if matched and remove_html(doc[3]).strip() != matched.strip():
108
  continue
109
  new_text = text
110
- for sent in nltk.sent_tokenize(remove_html(doc[3])):
 
 
111
  if text in sent:
112
  new_text = sent
 
 
 
 
 
 
 
 
 
 
 
 
113
  return {
114
- 'citation_statement': new_text if text != new_text else remove_html(doc[3]).replace('<strong class="highlight">', '').replace('</strong>', ''),
115
  'text': new_text,
116
  'from': doc[0],
117
  'supporting': doc[0],
 
107
  if matched and remove_html(doc[3]).strip() != matched.strip():
108
  continue
109
  new_text = text
110
+ sent_loc = None
111
+ sents = nltk.sent_tokenize(remove_html(doc[3]))
112
+ for i, sent in enumerate(sents):
113
  if text in sent:
114
  new_text = sent
115
+ sent_loc = i
116
+
117
+ context = remove_html(doc[3]).replace('<strong class="highlight">', '').replace('</strong>', '')
118
+ if sent_loc:
119
+ context_len = 2
120
+ sent_beg = sent_loc - context_len
121
+ if sent_beg <= 0: sent_beg = 0
122
+ sent_end = sent_loc + context_len
123
+ if sent_end >= len(sents):
124
+ sent_end = len(sents)
125
+ context = ''.join(sents[sent_beg:sent_end])
126
+
127
  return {
128
+ 'citation_statement': context,
129
  'text': new_text,
130
  'from': doc[0],
131
  'supporting': doc[0],