Spaces:
Sleeping
Sleeping
AnonymousSub
commited on
Commit
•
1de9c91
1
Parent(s):
e1f526d
Upload 4 files
Browse files- retrieve.py +445 -0
- utils.py +422 -0
- wiki_1.json +25 -0
- wiki_2.json +31 -0
retrieve.py
ADDED
@@ -0,0 +1,445 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from tenacity import retry, stop_after_attempt, wait_random_exponential
|
2 |
+
from tqdm import tqdm
|
3 |
+
import time
|
4 |
+
import sys
|
5 |
+
|
6 |
+
# MODEL_NAME = str(sys.argv[1])
|
7 |
+
# num_shots = int(sys.argv[2])
|
8 |
+
# method = str(sys.argv[3]) #['fixed', 'random', 'bm25']
|
9 |
+
|
10 |
+
# ADDED K-SHOT SETTING, WHERE K IS VARIABLE
|
11 |
+
|
12 |
+
# import openai
|
13 |
+
import time
|
14 |
+
# import pandas as pd
|
15 |
+
import random
|
16 |
+
random.seed(1)
|
17 |
+
|
18 |
+
import csv
|
19 |
+
import os
|
20 |
+
import pickle
|
21 |
+
import json
|
22 |
+
import nltk
|
23 |
+
nltk.download('punkt')
|
24 |
+
nltk.download('stopwords')
|
25 |
+
from nltk.tokenize import sent_tokenize
|
26 |
+
from nltk.corpus import stopwords
|
27 |
+
import string
|
28 |
+
|
29 |
+
from langchain.chat_models import AzureChatOpenAI
|
30 |
+
from langchain.schema import HumanMessage, SystemMessage
|
31 |
+
from langchain.callbacks import get_openai_callback
|
32 |
+
from langchain.llms import OpenAI
|
33 |
+
import tiktoken
|
34 |
+
|
35 |
+
import re
|
36 |
+
from nltk.tokenize import sent_tokenize
|
37 |
+
from collections import defaultdict
|
38 |
+
|
39 |
+
|
40 |
+
import nltk
|
41 |
+
from nltk.tokenize import sent_tokenize
|
42 |
+
from nltk.tokenize import word_tokenize
|
43 |
+
import numpy as np
|
44 |
+
|
45 |
+
# Get the parent directory
|
46 |
+
# parent_dir = "/home/abnandy/sensei-fs-link"#os.path.abspath(os.path.join(os.getcwd(), os.pardir))
|
47 |
+
# Add the parent directory to the system path
|
48 |
+
# sys.path.append(parent_dir)
|
49 |
+
|
50 |
+
from utils import AzureModels, write_to_file, read_from_file
|
51 |
+
# from utils_open import OpenModels
|
52 |
+
|
53 |
+
def remove_stopwords_and_punctuation(text):
|
54 |
+
# Get the list of stopwords
|
55 |
+
stop_words = set(stopwords.words('english'))
|
56 |
+
|
57 |
+
# Remove punctuation from text
|
58 |
+
text = text.translate(str.maketrans('', '', string.punctuation.replace('_', '').replace('@', '')))
|
59 |
+
|
60 |
+
# Split the text into words
|
61 |
+
words = text.split()
|
62 |
+
|
63 |
+
# Remove stopwords
|
64 |
+
filtered_words = [word for word in words if word.lower() not in stop_words]
|
65 |
+
|
66 |
+
# Join the words back into a single string
|
67 |
+
filtered_text = ' '.join(filtered_words)
|
68 |
+
|
69 |
+
return filtered_text
|
70 |
+
|
71 |
+
def get_key(list_):
|
72 |
+
tmp_str = '@cite'
|
73 |
+
for item in list_:
|
74 |
+
tmp_str+=item.replace('@cite', '')
|
75 |
+
return tmp_str
|
76 |
+
|
77 |
+
def group_citations(key):
|
78 |
+
list_ = ["@cite_" + item for item in key.replace("@cite_", "").split("_")]
|
79 |
+
return ", ".join(list_)
|
80 |
+
|
81 |
+
def code_to_extra_info(code_str):
|
82 |
+
citation_bracket_keys = []
|
83 |
+
sentence_keys = []
|
84 |
+
code_lines = code_str.split("\n")
|
85 |
+
for line in code_lines:
|
86 |
+
if "citation_bracket[" in line.split("=")[0]:
|
87 |
+
citation_bracket_keys.append(line.split("=")[0].split('citation_bracket["')[-1].split('"]')[0])
|
88 |
+
if "sentence[" in line.split("=")[0]:
|
89 |
+
sentence_keys.append(line.split("=")[0].split('sentence["')[-1].split('"]')[0])
|
90 |
+
|
91 |
+
cb_template = "{} are in the same citation bracket (i.e., they are right next to each other) within the section of the Wikipedia Article."
|
92 |
+
sent_template = "{} are in the same sentence within the section of the Wikipedia Article."
|
93 |
+
|
94 |
+
cb_list = [cb_template.format(group_citations(key)) for key in citation_bracket_keys if key.count("_")>1]
|
95 |
+
sent_list = [sent_template.format(group_citations(key)) for key in sentence_keys if key.count("_")>1]
|
96 |
+
|
97 |
+
if len(cb_list) + len(sent_list) == 0:
|
98 |
+
return ""
|
99 |
+
return_str = "\n\nNOTE THAT -\n\n" + "\n".join(cb_list) + "\n\n" + "\n".join(sent_list)
|
100 |
+
|
101 |
+
return return_str
|
102 |
+
|
103 |
+
def get_code_str(related_work, reference_dict):
|
104 |
+
# print(reference_dict.keys())
|
105 |
+
citation_bracket_code_lines = []
|
106 |
+
sentence_code_lines = []
|
107 |
+
|
108 |
+
# Tokenize the related work into sentences
|
109 |
+
sentences = sent_tokenize(related_work)
|
110 |
+
|
111 |
+
# Get all citation tags from the reference_dict
|
112 |
+
citation_tags = list(reference_dict.keys())
|
113 |
+
|
114 |
+
|
115 |
+
for sentence in sentences:
|
116 |
+
tmp_sentence_list = []
|
117 |
+
parts = remove_stopwords_and_punctuation(sentence).split(' ')
|
118 |
+
cb_list = []
|
119 |
+
str_cb_list = []
|
120 |
+
|
121 |
+
# print(parts)
|
122 |
+
# print(reference_dict.keys())
|
123 |
+
# print(1/0)
|
124 |
+
|
125 |
+
for word in parts:
|
126 |
+
if word in reference_dict:
|
127 |
+
cb_list.append(word)
|
128 |
+
str_cb_list.append('"' + word + '"')
|
129 |
+
else:
|
130 |
+
if len(cb_list)>0:
|
131 |
+
# print(cb_list)
|
132 |
+
citation_bracket_code_lines.append('citation_bracket["{}"] = {}'.format(get_key(cb_list), str(str_cb_list)))
|
133 |
+
tmp_sentence_list.append(get_key(cb_list))
|
134 |
+
cb_list = []
|
135 |
+
str_cb_list = []
|
136 |
+
|
137 |
+
if len(cb_list) > 0:
|
138 |
+
citation_bracket_code_lines.append('citation_bracket["{}"] = {}'.format(get_key(cb_list), str(str_cb_list)))
|
139 |
+
tmp_sentence_list.append(get_key(cb_list))
|
140 |
+
cb_list = []
|
141 |
+
str_cb_list = []
|
142 |
+
|
143 |
+
tmp_values = []
|
144 |
+
for key in tmp_sentence_list:
|
145 |
+
tmp_values.append('citation_bracket["{}"]'.format(key))
|
146 |
+
if len(tmp_values) > 0:
|
147 |
+
sentence_code_lines.append('sentence["{}"] = {}'.format(get_key(tmp_sentence_list), str(tmp_values)))
|
148 |
+
|
149 |
+
return " " + "\n ".join(citation_bracket_code_lines).replace("'", "") + "\n\n " + "\n ".join(sentence_code_lines).replace("'", "")
|
150 |
+
|
151 |
+
def get_prompt(list_, i, prompt_template):
|
152 |
+
gt_summary = list_[i]['related_work'].strip()
|
153 |
+
inp_intent = list_[i]['abstract'].strip()
|
154 |
+
|
155 |
+
input_code_str = " "
|
156 |
+
input_code_list = []
|
157 |
+
|
158 |
+
# print(sent_tokenize(gt_summary))
|
159 |
+
# print()
|
160 |
+
|
161 |
+
# print(1/0)
|
162 |
+
tmp_list = list_[i]['ref_abstract']
|
163 |
+
# abstract_list = []
|
164 |
+
# cite_tags = []
|
165 |
+
abstract_dict = {}
|
166 |
+
# write_to_file("dummy.json", tmp_list)
|
167 |
+
for key in tmp_list:
|
168 |
+
abstract_dict[key] = tmp_list[key]['abstract'].strip()
|
169 |
+
for key in abstract_dict:
|
170 |
+
input_code_list.append('reference_articles["{}"] = "{}"'.format(key, abstract_dict[key]))
|
171 |
+
input_code_list.append('intent = "{}"'.format(inp_intent))
|
172 |
+
input_code_str += "\n ".join(input_code_list)
|
173 |
+
code_str = get_code_str(gt_summary, tmp_list)
|
174 |
+
prompt = prompt_template.format(input_code_str)
|
175 |
+
return gt_summary, prompt, code_str
|
176 |
+
|
177 |
+
def preprocess_retrieved_out(tmp_keys, out):
|
178 |
+
new_dict = {}
|
179 |
+
for key in tmp_keys:
|
180 |
+
for line in out.split("\n"):
|
181 |
+
if key in line:
|
182 |
+
summ_doc = line.split(":", 1)[-1].strip()
|
183 |
+
new_dict[key] = {"abstract": summ_doc}
|
184 |
+
print(key)
|
185 |
+
print(summ_doc)
|
186 |
+
print()
|
187 |
+
break
|
188 |
+
return new_dict
|
189 |
+
|
190 |
+
def get_slide(topic, text):
|
191 |
+
slide_prompt = '''Convert this text into more structured text (in markdown) that can be put into the content of a slide in a presentation (e.g. use bullet points, numbered points, proper layout, etc.). Also, the include the topic "{}" of the slide. -
|
192 |
+
|
193 |
+
{}'''
|
194 |
+
azure_models = AzureModels("gpt4o")
|
195 |
+
slide_prompt = slide_prompt.format(topic, text)
|
196 |
+
out_ = azure_models.get_completion(slide_prompt, 100)
|
197 |
+
time.sleep(2)
|
198 |
+
return out_
|
199 |
+
|
200 |
+
def get_retrieved_results(MODEL_NAME, num_shots, method, train_list, test_list, code=False, organize_out=None):
|
201 |
+
response_template = ''
|
202 |
+
instruction_template = ''
|
203 |
+
|
204 |
+
final_dict = {}
|
205 |
+
|
206 |
+
pred_dict = {}
|
207 |
+
start_idx = 0
|
208 |
+
|
209 |
+
icl_extra_info = ""
|
210 |
+
test_extra_info = ""
|
211 |
+
|
212 |
+
if 'gpt4' in MODEL_NAME:
|
213 |
+
azure_models = AzureModels(MODEL_NAME)
|
214 |
+
else:
|
215 |
+
if code:
|
216 |
+
instruction_template = '''Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
217 |
+
|
218 |
+
### Instruction:
|
219 |
+
'''
|
220 |
+
response_template = '### Response:\n'
|
221 |
+
else:
|
222 |
+
response_template = '### Assistant: '
|
223 |
+
if MODEL_NAME=='gemma2b':
|
224 |
+
model_id = "google/gemma-2b-it"
|
225 |
+
elif MODEL_NAME=='gemma7b':
|
226 |
+
model_id = "google/gemma-7b-it"
|
227 |
+
elif MODEL_NAME=='mistral7b':
|
228 |
+
model_id = "mistralai/Mistral-7B-Instruct-v0.3"
|
229 |
+
elif MODEL_NAME=="llama7b":
|
230 |
+
model_id = "meta-llama/Llama-2-7b-chat-hf"
|
231 |
+
elif MODEL_NAME=="llama13b":
|
232 |
+
model_id = "meta-llama/Llama-2-13b-chat-hf"
|
233 |
+
elif MODEL_NAME=="llama3":
|
234 |
+
model_id="meta-llama/Meta-Llama-3-8B-Instruct"
|
235 |
+
elif MODEL_NAME=="galactica7b":
|
236 |
+
model_id = "facebook/galactica-6.7b"
|
237 |
+
open_models = OpenModels(model_id)
|
238 |
+
|
239 |
+
prompt_template = '''Given are a set of articles referenced in a Wikipedia Article, and the intent -
|
240 |
+
|
241 |
+
Reference Articles:
|
242 |
+
{}
|
243 |
+
|
244 |
+
Intent:
|
245 |
+
{}
|
246 |
+
|
247 |
+
Summarize each reference article (generate in the format "@cite_K : <SUMMARIZED CONTENT CORREPONDING TO @cite_K>", each in a new line, where @cite_K represents each of the following citation/reference tags - {}, given in Reference Articles), given the reference articles as documents, and the intent.{}
|
248 |
+
|
249 |
+
{}Answer: '''
|
250 |
+
|
251 |
+
if organize_out!=None:
|
252 |
+
prompt_template = '''Given are a set of articles referenced in a Wikipedia Article, and the intent -
|
253 |
+
|
254 |
+
Reference Articles:
|
255 |
+
{}
|
256 |
+
|
257 |
+
Intent:
|
258 |
+
{}
|
259 |
+
|
260 |
+
Generate the wikipedia article section in 100-200 words based on the intent as an intent-based multi-document summary, given the reference articles as documents, and the intent.{}
|
261 |
+
|
262 |
+
{}Answer: '''
|
263 |
+
|
264 |
+
if code:
|
265 |
+
prompt_template = '''def main():
|
266 |
+
# Given is a dictionary of articles that are referenced in a section of the Wikipedia Article, and the intent -
|
267 |
+
|
268 |
+
reference_articles = dict()
|
269 |
+
|
270 |
+
{}'''
|
271 |
+
|
272 |
+
if method == 'bm25':
|
273 |
+
retrieve_dict = read_from_file("bm25_10_icl_samples_50_holdout_samples.json")
|
274 |
+
elif method == "gat":
|
275 |
+
retrieve_dict = read_from_file("gat_20_icl_samples_50_holdout_samples.json")
|
276 |
+
|
277 |
+
#len(test_list))):
|
278 |
+
|
279 |
+
icl_train_indices = [0,1]
|
280 |
+
|
281 |
+
|
282 |
+
if code:
|
283 |
+
for i in tqdm(range(start_idx, len(test_list))):#start_idx, len(test_list))):
|
284 |
+
if len(test_list[i]['ref_abstract']) > 1:
|
285 |
+
|
286 |
+
full_icl_prompt = ""
|
287 |
+
|
288 |
+
hier_cluster_prompt = "\n def hierarchical_clustering():\n # Hierarchical Clustering of references within a section of the Wikipedia Article, based on the reference articles and the intent\n citation_bracket = {} # This dictionary contains lists as values that shows how references are grouped within the same citation bracket in the section of the Wikipedia Article\n sentence = {} # This dictionary contains lists, where each list contains references in a sentence in the section of the Wikipedia Article\n\n"
|
289 |
+
|
290 |
+
if num_shots > 0:
|
291 |
+
|
292 |
+
if method == "random":
|
293 |
+
icl_train_indices = random.sample(holdout_indices, num_shots)#random.sample(np.arange(len(train_list)).tolist())
|
294 |
+
elif (method == "bm25") or (method == "gat"):
|
295 |
+
icl_train_indices = [int(retrieve_dict[str(i)][j]) for j in range(num_shots)]
|
296 |
+
elif method == 'fixed':
|
297 |
+
icl_train_indices = icl_train_indices[:num_shots]
|
298 |
+
|
299 |
+
for enum_idx, icl_train_idx in enumerate(icl_train_indices):
|
300 |
+
|
301 |
+
# Fixed ICL Sample
|
302 |
+
icl_gt_summary, icl_prompt, icl_code_str = get_prompt(train_list, icl_train_idx, prompt_template) # this particular example has 6 citations
|
303 |
+
# icl_gt_summary_2, icl_prompt_2, icl_code_str_2 = get_prompt(train_list, 85) # this particular example has 12 citations, 4 of which are missing
|
304 |
+
|
305 |
+
full_icl_prompt += "##Example {}:\n\n".format(enum_idx + 1) + instruction_template + icl_prompt + hier_cluster_prompt + icl_code_str + "\n\n"
|
306 |
+
|
307 |
+
full_icl_prompt += "##Example {}:\n\n".format(num_shots+1)
|
308 |
+
|
309 |
+
gt_summary, prompt, code_str = get_prompt(test_list, i, prompt_template)
|
310 |
+
|
311 |
+
|
312 |
+
|
313 |
+
|
314 |
+
# full_icl_prompt_2 = "##Example 2:\n\n" + icl_prompt_2 + hier_cluster_prompt + icl_code_str_2
|
315 |
+
|
316 |
+
final_prompt = full_icl_prompt + instruction_template + prompt + hier_cluster_prompt + " # only generate the code that comes after this, as if you are on autocomplete mode\n" + response_template
|
317 |
+
|
318 |
+
# final_prompt = full_icl_prompt + "\n\n" + full_icl_prompt_2 + "\n\n" + prompt
|
319 |
+
|
320 |
+
# final_prompt = full_icl_prompt + "\n\n" + prompt
|
321 |
+
|
322 |
+
# print(get_num_inp_tokens(final_prompt))
|
323 |
+
# print(gt_summary)
|
324 |
+
# print("---------")
|
325 |
+
# print(final_prompt)
|
326 |
+
# print("---------")
|
327 |
+
# print("GT:")
|
328 |
+
# print(code_str)
|
329 |
+
# print("---------")
|
330 |
+
|
331 |
+
max_tokens = 500
|
332 |
+
|
333 |
+
if 'gpt4' in MODEL_NAME:
|
334 |
+
out_ = azure_models.get_completion(final_prompt, max_tokens)
|
335 |
+
time.sleep(2)
|
336 |
+
else:
|
337 |
+
out_ = open_models.open_completion(final_prompt, max_tokens, stop_token="##Example {}".format(num_shots + 2))
|
338 |
+
|
339 |
+
# print("Predicted:")
|
340 |
+
# print(out_)
|
341 |
+
|
342 |
+
final_dict[i] = out_
|
343 |
+
|
344 |
+
return final_dict
|
345 |
+
|
346 |
+
# write_to_file(save_filepath, final_dict)
|
347 |
+
|
348 |
+
|
349 |
+
else:
|
350 |
+
if organize_out==None:
|
351 |
+
tmp_max_tok_len=1000
|
352 |
+
else:
|
353 |
+
tmp_max_tok_len=300
|
354 |
+
|
355 |
+
|
356 |
+
for i in tqdm(range(start_idx, len(test_list))):#len(test_list))):
|
357 |
+
if len(test_list[i]['ref_abstract']) > 1:
|
358 |
+
|
359 |
+
icl_prompt = ""
|
360 |
+
|
361 |
+
if num_shots > 0:
|
362 |
+
|
363 |
+
if method == "random":
|
364 |
+
icl_train_indices = random.sample(holdout_indices, num_shots)#random.sample(np.arange(len(train_list)).tolist())
|
365 |
+
elif method == "bm25":
|
366 |
+
icl_train_indices = [int(retrieve_dict[str(i)][j]) for j in range(num_shots)]
|
367 |
+
elif method == 'fixed':
|
368 |
+
icl_train_indices = icl_train_indices[:num_shots]
|
369 |
+
|
370 |
+
for enum_idx, icl_train_idx in enumerate(icl_train_indices):
|
371 |
+
icl_tmp_list = train_list[icl_train_idx]['ref_abstract']
|
372 |
+
icl_inp_intent = train_list[icl_train_idx]['abstract']
|
373 |
+
icl_gt_summary = train_list[icl_train_idx]['related_work']
|
374 |
+
|
375 |
+
if organize_out!=None:
|
376 |
+
icl_code_str = get_code_str(icl_gt_summary, icl_tmp_list)
|
377 |
+
icl_extra_info = code_to_extra_info(icl_code_str)
|
378 |
+
|
379 |
+
icl_abstract_dict = {}
|
380 |
+
|
381 |
+
for key in icl_tmp_list:
|
382 |
+
if organize_out==None:
|
383 |
+
icl_abstract_dict[key] = icl_tmp_list[key]#['abstract']
|
384 |
+
else:
|
385 |
+
icl_abstract_dict[key] = icl_tmp_list[key]['abstract']
|
386 |
+
|
387 |
+
icl_abstract_list = [key + " : " + icl_abstract_dict[key] for key in icl_abstract_dict]
|
388 |
+
|
389 |
+
icl_paper_abstracts = "\n".join(icl_abstract_list)
|
390 |
+
|
391 |
+
icl_prompt += "##Example {}:\n\n".format(enum_idx + 1) + prompt_template.format(icl_paper_abstracts, icl_inp_intent, " ".join(list(icl_tmp_list.keys())), icl_extra_info, response_template) + icl_gt_summary.strip() + "\n\n"
|
392 |
+
|
393 |
+
icl_prompt += "##Example {}:\n\n".format(num_shots+1)
|
394 |
+
|
395 |
+
gt_summary = test_list[i]['related_work']
|
396 |
+
inp_intent = test_list[i]['abstract']
|
397 |
+
if organize_out!=None:
|
398 |
+
test_code_str = organize_out[str(i)]
|
399 |
+
test_extra_info = code_to_extra_info(test_code_str)
|
400 |
+
|
401 |
+
# print(sent_tokenize(gt_summary))
|
402 |
+
# print()
|
403 |
+
|
404 |
+
# print(1/0)
|
405 |
+
tmp_list = test_list[i]['ref_abstract']
|
406 |
+
# abstract_list = []
|
407 |
+
# cite_tags = []
|
408 |
+
abstract_dict = {}
|
409 |
+
for key in tmp_list:
|
410 |
+
if organize_out==None:
|
411 |
+
abstract_dict[key] = tmp_list[key]#['abstract']
|
412 |
+
else:
|
413 |
+
abstract_dict[key] = tmp_list[key]['abstract']
|
414 |
+
|
415 |
+
abstract_list = [key + " : " + abstract_dict[key] for key in abstract_dict]
|
416 |
+
|
417 |
+
paper_abstracts = "\n".join(abstract_list)
|
418 |
+
|
419 |
+
prompt = prompt_template.format(paper_abstracts, inp_intent, " ".join(list(tmp_list.keys())), test_extra_info, response_template)
|
420 |
+
|
421 |
+
# if num_shots == 1:
|
422 |
+
prompt = icl_prompt + prompt
|
423 |
+
|
424 |
+
# print(prompt)
|
425 |
+
# print("-----------")
|
426 |
+
|
427 |
+
if 'gpt4' in MODEL_NAME:
|
428 |
+
out_ = azure_models.get_completion(prompt, tmp_max_tok_len)
|
429 |
+
time.sleep(2)
|
430 |
+
else:
|
431 |
+
# try:
|
432 |
+
out_ = open_models.open_completion(prompt, tmp_max_tok_len, temperature=0.7)
|
433 |
+
|
434 |
+
if organize_out==None:
|
435 |
+
test_list[i]["ref_abstract"] = preprocess_retrieved_out(tmp_list, out_)
|
436 |
+
else:
|
437 |
+
pred_dict[i] = out_
|
438 |
+
|
439 |
+
|
440 |
+
# return pred_dict
|
441 |
+
# write_to_file("retrieved_docs.json", test_list)
|
442 |
+
if organize_out==None:
|
443 |
+
return test_list
|
444 |
+
else:
|
445 |
+
return pred_dict
|
utils.py
ADDED
@@ -0,0 +1,422 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sklearn
|
2 |
+
from tenacity import retry, stop_after_attempt, wait_random_exponential
|
3 |
+
from tqdm import tqdm
|
4 |
+
import sys
|
5 |
+
# import openai
|
6 |
+
import time
|
7 |
+
# import pandas as pd
|
8 |
+
import random
|
9 |
+
import csv
|
10 |
+
import os
|
11 |
+
import pickle
|
12 |
+
import json
|
13 |
+
|
14 |
+
from langchain.chat_models import AzureChatOpenAI
|
15 |
+
from langchain.schema import HumanMessage, SystemMessage
|
16 |
+
from langchain.callbacks import get_openai_callback
|
17 |
+
from langchain.llms import OpenAI
|
18 |
+
|
19 |
+
import tiktoken
|
20 |
+
|
21 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
22 |
+
from collections import Counter
|
23 |
+
import math
|
24 |
+
|
25 |
+
import io
|
26 |
+
import contextlib
|
27 |
+
|
28 |
+
# os.system('pip install pandas reportlab')
|
29 |
+
# os.system('pip install openai==0.27.2')
|
30 |
+
# os.system('pip install tenacity')
|
31 |
+
|
32 |
+
import requests
|
33 |
+
from bs4 import BeautifulSoup
|
34 |
+
import ast
|
35 |
+
|
36 |
+
import nltk
|
37 |
+
nltk.download('punkt')
|
38 |
+
nltk.download('stopwords')
|
39 |
+
from nltk.tokenize import sent_tokenize
|
40 |
+
from nltk.corpus import stopwords
|
41 |
+
import string
|
42 |
+
from nltk.tokenize import sent_tokenize
|
43 |
+
from nltk.tokenize import word_tokenize
|
44 |
+
import numpy as np
|
45 |
+
import evaluate
|
46 |
+
|
47 |
+
def tree_edit_distance(tree1, tree2):
|
48 |
+
def cost(node1, node2):
|
49 |
+
""" Cost to transform node1 to node2 """
|
50 |
+
if node1 == node2:
|
51 |
+
return 0
|
52 |
+
return 1
|
53 |
+
|
54 |
+
def tree_size(tree):
|
55 |
+
""" Calculate the size of the tree """
|
56 |
+
if not isinstance(tree, list) or not tree:
|
57 |
+
return 1
|
58 |
+
return 1 + sum(tree_size(child) for child in tree)
|
59 |
+
|
60 |
+
def ted(tree1, tree2):
|
61 |
+
""" Compute tree edit distance between two trees """
|
62 |
+
if not isinstance(tree1, list) and not isinstance(tree2, list):
|
63 |
+
return cost(tree1, tree2)
|
64 |
+
if not isinstance(tree1, list):
|
65 |
+
return tree_size(tree2)
|
66 |
+
if not isinstance(tree2, list):
|
67 |
+
return tree_size(tree1)
|
68 |
+
if not tree1 and not tree2:
|
69 |
+
return 0
|
70 |
+
if not tree1:
|
71 |
+
return sum(tree_size(child) for child in tree2)
|
72 |
+
if not tree2:
|
73 |
+
return sum(tree_size(child) for child in tree1)
|
74 |
+
|
75 |
+
dp = [[0] * (len(tree2) + 1) for _ in range(len(tree1) + 1)]
|
76 |
+
|
77 |
+
for i in range(1, len(tree1) + 1):
|
78 |
+
dp[i][0] = dp[i-1][0] + tree_size(tree1[i-1])
|
79 |
+
for j in range(1, len(tree2) + 1):
|
80 |
+
dp[0][j] = dp[0][j-1] + tree_size(tree2[j-1])
|
81 |
+
|
82 |
+
for i in range(1, len(tree1) + 1):
|
83 |
+
for j in range(1, len(tree2) + 1):
|
84 |
+
dp[i][j] = min(dp[i-1][j] + tree_size(tree1[i-1]),
|
85 |
+
dp[i][j-1] + tree_size(tree2[j-1]),
|
86 |
+
dp[i-1][j-1] + ted(tree1[i-1], tree2[j-1]))
|
87 |
+
|
88 |
+
return dp[len(tree1)][len(tree2)]
|
89 |
+
|
90 |
+
return ted(tree1, tree2)
|
91 |
+
|
92 |
+
def preprocess_code_str(code_str):
|
93 |
+
prefix = "citation_bracket = {}\nsentence = {}\n"
|
94 |
+
code_str = code_str.replace(" ", "")
|
95 |
+
code_lines = code_str.split("\n")
|
96 |
+
code_line_list = []
|
97 |
+
for line in code_lines:
|
98 |
+
if "citation_bracket[" in line.split("=")[0]:
|
99 |
+
code_line_list.append(line)
|
100 |
+
if "sentence[" in line.split("=")[0]:
|
101 |
+
code_line_list.append(line)
|
102 |
+
|
103 |
+
return prefix + "\n".join(code_line_list) + "\nprint(sentence)"
|
104 |
+
|
105 |
+
def run_code(code_str):
|
106 |
+
# Redirect stdout to capture print statements
|
107 |
+
f = io.StringIO()
|
108 |
+
with contextlib.redirect_stdout(f):
|
109 |
+
exec(preprocess_code_str(code_str))
|
110 |
+
|
111 |
+
# Get the standard output
|
112 |
+
output = f.getvalue()
|
113 |
+
return ast.literal_eval(output)
|
114 |
+
|
115 |
+
def replace_with_char(input_list, char='a'):
|
116 |
+
def replace_in_nested_list(nested_list):
|
117 |
+
if isinstance(nested_list, list):
|
118 |
+
return [replace_in_nested_list(item) for item in nested_list]
|
119 |
+
else:
|
120 |
+
return char
|
121 |
+
|
122 |
+
return replace_in_nested_list(input_list)
|
123 |
+
|
124 |
+
def top_k_keys(input_dict, k):
|
125 |
+
# Sort the dictionary items by value in descending order and extract the keys
|
126 |
+
sorted_keys = sorted(input_dict, key=input_dict.get, reverse=True)
|
127 |
+
# Return the top-k keys
|
128 |
+
return sorted_keys[:k]
|
129 |
+
|
130 |
+
|
131 |
+
|
132 |
+
def keys_with_least_k_values(d, k):
|
133 |
+
if k <= 0:
|
134 |
+
return []
|
135 |
+
|
136 |
+
# Get the sorted list of (key, value) tuples based on the values
|
137 |
+
sorted_items = sorted(d.items(), key=lambda item: item[1])
|
138 |
+
|
139 |
+
# Extract the keys of the first k items
|
140 |
+
least_k_keys = [item[0] for item in sorted_items[:k]]
|
141 |
+
|
142 |
+
return least_k_keys
|
143 |
+
|
144 |
+
def edit_distance_code_str(code1, code2, just_tree_structure=False):
|
145 |
+
|
146 |
+
# code1 = preprocess_code_str(code1)
|
147 |
+
# code2 = preprocess_code_str(code2)
|
148 |
+
sentence1 = run_code(code1)
|
149 |
+
list_1 = [sentence1[key] for key in sentence1]
|
150 |
+
sentence2 = run_code(code2)
|
151 |
+
list_2 = [sentence2[key] for key in sentence2]
|
152 |
+
|
153 |
+
if just_tree_structure:
|
154 |
+
list_1 = replace_with_char(list_1)
|
155 |
+
list_2 = replace_with_char(list_2)
|
156 |
+
|
157 |
+
return tree_edit_distance(list_1, list_2)
|
158 |
+
|
159 |
+
class eval_metrics:
|
160 |
+
def __init__(self):
|
161 |
+
pass
|
162 |
+
# if is_bertscore:
|
163 |
+
# pass
|
164 |
+
|
165 |
+
def get_rouge_l(self, pred, refs):
|
166 |
+
rouge = evaluate.load('rouge')
|
167 |
+
results = rouge.compute(predictions=pred, references=refs)
|
168 |
+
return results['rougeL']
|
169 |
+
|
170 |
+
def get_bleu(self, pred, refs):
|
171 |
+
bleu = evaluate.load('bleu')
|
172 |
+
tmp_refs = [[item] for item in refs]
|
173 |
+
results = bleu.compute(predictions=pred, references=tmp_refs)
|
174 |
+
return results['bleu']
|
175 |
+
|
176 |
+
def get_meteor(self, pred, refs):
|
177 |
+
meteor = evaluate.load('meteor')
|
178 |
+
results = meteor.compute(predictions=pred, references=refs)
|
179 |
+
return results['meteor']
|
180 |
+
|
181 |
+
def get_bertscore(self, pred, refs):
|
182 |
+
bertscore = evaluate.load('bertscore')
|
183 |
+
results = bertscore.compute(predictions=pred, references=refs, lang = "en")
|
184 |
+
return np.mean(results['f1'])
|
185 |
+
|
186 |
+
def get_bleurt(self, pred, refs):
|
187 |
+
bleurt = evaluate.load('bleurt', module_type="metric")
|
188 |
+
# tmp_refs = [[item] for item in refs]
|
189 |
+
results = bleurt.compute(predictions=pred, references=refs)
|
190 |
+
return np.mean(results['scores'])
|
191 |
+
|
192 |
+
class BM25:
|
193 |
+
def __init__(self, documents, k1=1.5, b=0.75):
|
194 |
+
self.documents = documents
|
195 |
+
self.k1 = k1
|
196 |
+
self.b = b
|
197 |
+
self.vectorizer = CountVectorizer().fit(documents)
|
198 |
+
self.doc_term_matrix = self.vectorizer.transform(documents)
|
199 |
+
self.doc_lengths = np.array(self.doc_term_matrix.sum(axis=1)).flatten()
|
200 |
+
self.avg_doc_length = np.mean(self.doc_lengths)
|
201 |
+
self.df = np.diff(self.doc_term_matrix.tocsc().indptr)
|
202 |
+
self.idf = self.compute_idf()
|
203 |
+
|
204 |
+
def compute_idf(self):
|
205 |
+
N = len(self.documents)
|
206 |
+
idf = np.log((N - self.df + 0.5) / (self.df + 0.5) + 1)
|
207 |
+
return idf
|
208 |
+
|
209 |
+
def compute_bm25(self, query):
|
210 |
+
query_vec = self.vectorizer.transform([query])
|
211 |
+
scores = []
|
212 |
+
for doc_idx in range(self.doc_term_matrix.shape[0]):
|
213 |
+
score = 0
|
214 |
+
for term_idx in query_vec.indices:
|
215 |
+
if term_idx in self.doc_term_matrix[doc_idx].indices:
|
216 |
+
tf = self.doc_term_matrix[doc_idx, term_idx]
|
217 |
+
idf = self.idf[term_idx]
|
218 |
+
numerator = tf * (self.k1 + 1)
|
219 |
+
denominator = tf + self.k1 * (1 - self.b + self.b * (self.doc_lengths[doc_idx] / self.avg_doc_length))
|
220 |
+
score += idf * numerator / denominator
|
221 |
+
scores.append(score)
|
222 |
+
return scores
|
223 |
+
|
224 |
+
def get_top_k(self, query, k=5):
|
225 |
+
scores = self.compute_bm25(query)
|
226 |
+
top_k_indices = np.argsort(scores)[::-1][:k]
|
227 |
+
top_k_docs = [self.documents[i] for i in top_k_indices]
|
228 |
+
return top_k_docs, top_k_indices
|
229 |
+
|
230 |
+
def get_nmis(true_dict, pred_dict):
|
231 |
+
labels_true = []
|
232 |
+
labels_pred = []
|
233 |
+
|
234 |
+
# print(true_dict.keys())
|
235 |
+
# print(pred_dict.keys())
|
236 |
+
# print()
|
237 |
+
|
238 |
+
for key in true_dict:
|
239 |
+
labels_true.append(true_dict[key])
|
240 |
+
if key not in pred_dict:
|
241 |
+
labels_pred.append(-1)
|
242 |
+
else:
|
243 |
+
labels_pred.append(pred_dict[key])
|
244 |
+
if len(labels_pred) == 0:
|
245 |
+
max_label_pred = 0
|
246 |
+
else:
|
247 |
+
max_label_pred = np.max(labels_pred) + 1
|
248 |
+
for label_idx, item in enumerate(labels_pred):
|
249 |
+
if item==-1:
|
250 |
+
labels_pred[label_idx] = max_label_pred
|
251 |
+
max_label_pred+=1
|
252 |
+
|
253 |
+
return sklearn.metrics.normalized_mutual_info_score(labels_true=labels_true, labels_pred=labels_pred), sklearn.metrics.adjusted_mutual_info_score(labels_true=labels_true, labels_pred=labels_pred)
|
254 |
+
|
255 |
+
def calculate_precision_recall_f1(predicted, ground_truth):
|
256 |
+
|
257 |
+
# print(predicted)
|
258 |
+
# print()
|
259 |
+
# print(ground_truth)
|
260 |
+
# print("-------------")
|
261 |
+
|
262 |
+
# Convert lists to sets to handle duplicates and perform set operations
|
263 |
+
predicted_set = set(predicted)
|
264 |
+
ground_truth_set = set(ground_truth)
|
265 |
+
|
266 |
+
# Calculate true positives (intersection of predicted and ground truth)
|
267 |
+
true_positives = predicted_set.intersection(ground_truth_set)
|
268 |
+
|
269 |
+
# Calculate precision
|
270 |
+
precision = len(true_positives) / len(predicted_set) if predicted_set else 0
|
271 |
+
|
272 |
+
# Calculate recall
|
273 |
+
recall = len(true_positives) / len(ground_truth_set) if ground_truth_set else 0
|
274 |
+
|
275 |
+
# Calculate F1-score
|
276 |
+
if precision + recall == 0:
|
277 |
+
f1_score = 0
|
278 |
+
else:
|
279 |
+
f1_score = 2 * (precision * recall) / (precision + recall)
|
280 |
+
|
281 |
+
return precision, recall, f1_score
|
282 |
+
|
283 |
+
def get_introduction(arxiv_id):
|
284 |
+
# Step 1: Construct the URL
|
285 |
+
url = f"https://ar5iv.org/html/{arxiv_id}"
|
286 |
+
|
287 |
+
# Step 2: Fetch the HTML content of the page
|
288 |
+
response = requests.get(url)
|
289 |
+
if response.status_code != 200:
|
290 |
+
raise Exception(f"Failed to fetch the page: Status code {response.status_code}")
|
291 |
+
|
292 |
+
# Step 3: Parse the HTML content
|
293 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
294 |
+
|
295 |
+
# Step 4: Locate the introduction section
|
296 |
+
# We assume the introduction is the first section after the abstract
|
297 |
+
# This heuristic might need adjustment depending on the exact structure of the paper
|
298 |
+
introduction_text = ""
|
299 |
+
found_introduction = False
|
300 |
+
|
301 |
+
# Look for h2 tags which usually denote sections
|
302 |
+
for tag in soup.find_all(['h2', 'h3']):
|
303 |
+
# print(tag.text.lower())
|
304 |
+
if 'introduction' in tag.text.lower():
|
305 |
+
# print(tag.text)
|
306 |
+
introduction_text += tag.text.strip() + "\n\n"
|
307 |
+
next_node = tag.find_next_sibling()
|
308 |
+
while next_node and next_node.name not in ['h2', 'h3']:
|
309 |
+
introduction_text += next_node.get_text().strip() + "\n\n"
|
310 |
+
next_node = next_node.find_next_sibling()
|
311 |
+
break
|
312 |
+
|
313 |
+
return introduction_text
|
314 |
+
|
315 |
+
def write_to_file(filepath, content):
|
316 |
+
if '.txt' in filepath:
|
317 |
+
with open(filepath, 'w') as fw:
|
318 |
+
fw.write(content)
|
319 |
+
elif '.json' in filepath:
|
320 |
+
with open(filepath, 'w') as fw:
|
321 |
+
json.dump(content, fw)
|
322 |
+
elif '.pickle' in filepath or '.pkl' in filepath:
|
323 |
+
with open(filepath, 'wb') as fw:
|
324 |
+
pickle.dump(content, fw)
|
325 |
+
elif '.npy' in filepath:
|
326 |
+
np.save(filepath, content)
|
327 |
+
|
328 |
+
def read_from_file(filepath):
|
329 |
+
if '.txt' in filepath:
|
330 |
+
with open(filepath, 'r') as fr:
|
331 |
+
return fr.read()
|
332 |
+
elif '.json' in filepath:
|
333 |
+
with open(filepath, 'r') as fr:
|
334 |
+
return json.load(fr)
|
335 |
+
elif '.pickle' in filepath or '.pkl' in filepath:
|
336 |
+
with open(filepath, 'rb') as fr:
|
337 |
+
return pickle.load(fr)
|
338 |
+
elif '.npy' in filepath:
|
339 |
+
return np.load(filepath)
|
340 |
+
|
341 |
+
def remove_stopwords_and_punctuation(text):
|
342 |
+
# Get the list of stopwords
|
343 |
+
stop_words = set(stopwords.words('english'))
|
344 |
+
|
345 |
+
# Remove punctuation from text
|
346 |
+
text = text.translate(str.maketrans('', '', string.punctuation.replace('_', '').replace('@', '')))
|
347 |
+
|
348 |
+
# Split the text into words
|
349 |
+
words = text.split()
|
350 |
+
|
351 |
+
# Remove stopwords
|
352 |
+
filtered_words = [word for word in words if word.lower() not in stop_words]
|
353 |
+
|
354 |
+
# Join the words back into a single string
|
355 |
+
filtered_text = ' '.join(filtered_words)
|
356 |
+
|
357 |
+
return filtered_text
|
358 |
+
|
359 |
+
class AzureModels:
|
360 |
+
|
361 |
+
def __init__(self, model_name):
|
362 |
+
|
363 |
+
|
364 |
+
|
365 |
+
if model_name == "gpt4":
|
366 |
+
DEPLOYMENT_NAME = "gentech-gpt4-research"
|
367 |
+
BASE_URL = "https://gentechworkbench-stage.openai.azure.com/"
|
368 |
+
API_KEY = "f074d7f2bfdf486783db5f4605b263a6"
|
369 |
+
|
370 |
+
|
371 |
+
self.model = AzureChatOpenAI(
|
372 |
+
openai_api_base=BASE_URL,
|
373 |
+
openai_api_version="2023-03-15-preview",
|
374 |
+
deployment_name=DEPLOYMENT_NAME,
|
375 |
+
openai_api_key=API_KEY,
|
376 |
+
openai_api_type="azure",
|
377 |
+
)
|
378 |
+
|
379 |
+
self.enc = tiktoken.encoding_for_model("gpt-4-0314")
|
380 |
+
elif model_name == "gpt4o":
|
381 |
+
DEPLOYMENT_NAME = "gpt-4o"
|
382 |
+
BASE_URL = "https://docexpresearch.openai.azure.com/"
|
383 |
+
API_KEY = "2d6dc256edd94e65a2fa4b5658651377"
|
384 |
+
|
385 |
+
|
386 |
+
self.model = AzureChatOpenAI(
|
387 |
+
openai_api_base=BASE_URL,
|
388 |
+
openai_api_version="2023-07-01-preview",
|
389 |
+
deployment_name=DEPLOYMENT_NAME,
|
390 |
+
openai_api_key=API_KEY,
|
391 |
+
openai_api_type="azure",
|
392 |
+
)
|
393 |
+
|
394 |
+
self.enc = tiktoken.encoding_for_model("gpt-4o")
|
395 |
+
|
396 |
+
|
397 |
+
@retry(wait=wait_random_exponential(min=30, max=80), stop=stop_after_attempt(5))
|
398 |
+
def get_completion(self, question, max_tokens, stop=None):
|
399 |
+
|
400 |
+
gpt_answer = self.model(
|
401 |
+
[
|
402 |
+
HumanMessage(
|
403 |
+
content=question
|
404 |
+
)
|
405 |
+
], max_tokens = max_tokens, stop=stop
|
406 |
+
)
|
407 |
+
gpt_answer_content = gpt_answer.content # Access the content attribute
|
408 |
+
|
409 |
+
# Convert the answer_content to string datatype
|
410 |
+
if isinstance(gpt_answer_content, str):
|
411 |
+
gpt_answer_string = gpt_answer_content # If the content is already a string, use it directly
|
412 |
+
else:
|
413 |
+
gpt_answer_string = str(gpt_answer_content) # Convert to string if it's not already a string
|
414 |
+
|
415 |
+
return gpt_answer_string
|
416 |
+
|
417 |
+
|
418 |
+
|
419 |
+
def get_num_inp_tokens(self, inp):
|
420 |
+
tokens = self.enc.encode(inp)
|
421 |
+
return len(tokens)
|
422 |
+
|
wiki_1.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"abstract": "Write the 'Introduction' section of the article titled 'Adobe Firefly'.",
|
4 |
+
"related_work": "Adobe Firefly is a generative machine learning model included as part of Adobe Creative Cloud. It is currently being tested in an open beta phase @cite_1 @cite_2 @cite_3. Adobe Firefly is developed using Adobe's Sensei platform. Firefly is trained with images from Creative Commons, Wikimedia and Flickr Commons as well as 300 million images and videos in Adobe Stock and the public domain @cite_4 @cite_5 @cite_6. It uses image data sets to generate various designs @cite_7.",
|
5 |
+
"ref_abstract":
|
6 |
+
{
|
7 |
+
"@cite_1": "Adobe brings Firefly to the enterprise. Adobe today announced that it is bringing its Firefly generative image generator to its enterprise customers and allowing them to customize the model with their own branded assets. In conjunction with this, the company is also bringing its Adobe Express design app (which you remember under its previous name of Adobe Spark) to enterprise users, who will be able to access Firefly from there as well. “Enterprise leaders expect content demands will increase by five-fold over the next two years, making it imperative for them to drive efficiencies internally,” said David Wadhwani, president, Digital Media Business at Adobe. “This new enterprise offering empowers users of any skill level to instantly turn ideas into content with Firefly, while tapping into the power of Express and Creative Cloud to quickly modify assets and deliver standout designs.” Today’s announcement comes only two weeks after Adobe also integrated Firefly into Photoshop (where it has now been used more than 150 million times). Like all of the major tech companies, Adobe is moving quickly to integrate these new capabilities across its product portfolio. The major advantage that Adobe has been banking on since the launch of Firefly is that it produces commercially safe images. It’s training the model on images from its stock imagery marketplace (in addition to openly licensed images and public domain content), which means it has the rights to all of these images and doesn’t have to scrape the web to gather them, with all of the copyright issues that entails. In return, though, the model is a bit more limited in the kind of images it can produce. In the enterprise context, though, being commercially safe will likely trump flexibility. Adobe is willing to bet on this and will indemnify businesses that use Firefly-generated images. With this move, Firefly is now available in the standalone Firefly web app, Adobe Express and Creative Cloud. In addition to these Firefly announcements, Adobe also today launched a number of other generative AI-powered services as part of its Sensei GenAI platform. While Firefly focuses on images, Sensei GenAI is Adobe’s branding for text and data-centric models that leverage multiple large language models, including OpenAI through Microsoft Azure and the Google-incubated FLAN-T5 model. Maybe the most interesting of these use cases here is that Adobe Experience Manager and Adobe Journey Optimized now get a generative AI-based marketing copy generator (currently in beta). This will allow brands to edit, rephrase and summarize their marketing copy by selecting their preferred tone of voice, for example. Marketing copy — and SEO content generation — are one of the lowest-hanging fruits for generative text generators (maybe because readers’ expectations here are already low?). One interesting twist here is that brands can tune the model with their own data to ensure that the automatically generated content remains on-brand. Users of Customer Journey Analytics will now also be able to use natural language queries to analyze their data and the service can now automatically caption charts and graphs. Meanwhile, a new chat tool will provide brands with “an automated way to interact with prospects who engage online, addressing questions on products while assisting sales teams with custom responses and summarized interactions” that can be used inside of Marketo Engage. Adobe says it’s already working with “hundreds of brands,” including Mattel, IBM and Dentsu, to help them adopt these AI-powered tools.",
|
8 |
+
"@cite_2": "Adobe Brings Its Generative AI Tool Firefly To Businesses. Firefly saw the “highest number of users” engage in any of Adobe’s beta products. Now, the company plans to introduce its newest AI capabilities to enterprise customers. Users have created about 200 million AI-generated images using Adobe’s in-house text-to-image model Firefly since it launched in March. After seeing broad consumer adoption, the creative design software giant announced Thursday that it plans to launch the features for its 12,000 enterprise customers. Firefly is trained on more than 100 million images including Adobe’s stock images, licensed images and public images whose copyrights have expired. The company relies on its treasure trove of high-quality stock images sourced from contributors who typically get 33% of royalties when their images are sold or used. Adobe Stock consists of more than 330 million assets ranging from photos and illustrations to videos and music tracks. A recently added category of assets is AI-generated images submitted by contributors, which are accepted only if they have the rights to use them. Multiple Adobe Stock contributors have expressed concerns over the use of AI-generated images to train their consumer-facing AI tool, Firefly, and that contributors can’t opt out of their work being used to train and create tools like Firefly. Fu confirmed that they are not able to opt out because contributors have signed licensing agreements stating that their images may be used for AI training purposes. But the company says it plans to compensate them in the future when Firefly comes out of beta (Fu declined to say how much it plans to pay the company’s contributors). As some text-to-image AI models have faced legal ramifications for using copyrighted content to train their systems, enterprises want a guarantee that they will not get sued or face backlash for using generative AI tools to create images or content, Meredith Cooper, senior director of digital media business at Adobe, told Forbes in an interview. “Firefly is designed to be commercially safe and backed by Adobe via indemnification,” Cooper says. Adobe, which counts Coca Cola, Walgreens, Home Depot, General Motors and U.S. Bank among its enterprise customers, recorded $17.6 billion in revenue in 2022. Its generative AI tools (still in beta) were rolled out to its Creative Cloud users through photo editing software Adobe Photoshop in late May. One user used Adobe’s Generative Fill tool to edit popular memes by expanding the images and showing what could have been beyond the original frame of the meme. There is also potential for misuse. In response to people using its tools to create deepfakes and spread misinformation, Adobe in 2019 launched the Content Authenticity Initiative to appropriately label images or content produced by AI and tell if an image has been tampered with using AI. Any content created with Firefly comes with content credentials including metadata about the creation of the image such as name, data and any edits made to the image. The company also announced in early May that it also plans to bring its prompt-based text-to-image tools to Google’s conversational chatbot Bard, where users will be able to create synthetic images within Bard using Firefly. In addition to offering AI tools to create text, photo or video marketing copy for the web, enterprises will also be able to access language models from Microsoft Azure OpenAI service, Google’s language model Flan-T5 and others. Through its enterprise-focused product, Adobe Sensei, enterprise users can automate tasks like analyzing customer information, querying data and adjusting advertising budgets. Adobe did not disclose pricing for any of its enterprise generative AI services nor when it plans to launch them.",
|
9 |
+
"@cite_3": "Adobe is adding AI image generator Firefly to Photoshop. Adobe Photoshop is getting a new generative AI tool that allows users to quickly extend images and add or remove objects using text prompts. The feature is called Generative Fill, and is one of the first Creative Cloud applications to use Adobe’s AI image generator Firefly, which was released as a web-only beta in March. Generative Fill is launching today in beta, but Adobe says it will see a full release in Photoshop later this year. As a regular Photoshop tool, Generative Fill works within individual layers in a Photoshop image file. If you use it to expand the borders of an image (also known as outpainting) or generate new objects, it’ll provide you with three options to choose from. When used for outpainting, users can leave the prompt blank and the system will try to expand the image on its own, but it works better if you give it some direction. Think of it as similar to Photoshop’s existing Content-Aware Fill feature, but offering more control to the user. I haven’t been able to try Generative Fill myself, but did get a live demonstration. It’s simultaneously impressive and far from perfect. Some of the objects it generated like cars and puddles didn’t look like they were a natural part of the image, but I was surprised how well it handled backgrounds and filling in blank spaces. In some examples, it even managed to carry over features from the photograph being edited, such as mimicking light sources and ‘reflecting’ existing parts of an image in generated water. Such feats won’t be a huge surprise for creators familiar with AI image generation tools, but, as ever, it’s the integration of this technology into mainstream apps like Photoshop that bring them to a much wider audience. Apart from the functionality, another important element of Firefly is its training data. Adobe claims that the model is only trained on content that the company has the right to use — such as Adobe Stock images, openly licensed content, and content without any copyright restrictions. In theory, this means anything created using Generative Fill feature should be safe for commercial use compared to AI models that are less transparent about their training data. This will likely be a consolation to creatives and agencies who have been wary about using AI tools for fear of potential legal repercussions. Generative Fill also supports Content Credentials, a “nutrition label” system that attaches attribution data to images before sharing them online, informing viewers if the content was created or edited using AI. You can check the Content Credentials of an image by inspecting it via verify.contentauthenticity.org, where you’ll find an overview of information. By integrating Firefly directly into workflows as a creative co-pilot, Adobe is accelerating ideation, exploration and production for all of our customers,” said Ashley Still, senior vice president, Digital Media at Adobe. “Generative Fill combines the speed and ease of generative AI with the power and precision of Photoshop, empowering customers to bring their visions to life at the speed of their imaginations. Generative Fill isn’t available in the full release of Photoshop just yet, but you can try it out today by downloading the desktop beta app or as a module within the Firefly beta app. Adobe says we can expect to see a full release onto the public Photoshop app in “the second half of 2023.” Adobe has been injecting AI-powered tools into its products for some time now. At Adobe Max last year the company rolled out some new Photoshop features like higher-quality object selections that are powered by Sensei, another of Adobe’s AI models. Firefly is already being used in Adobe Illustrator to recolor vector-based images, and Adobe also said that it plans to integrate Firefly with Adobe Express, a cloud-based design platform rivaling services like Canva, though there’s still no confirmation on when that will be released.",
|
10 |
+
"@cite_4": "What Are the Limitations of AI Image Generation in Adobe Firefly? There’s an abundance of artificial intelligence image generation software that lets you create almost anything. But what are the limitations you might stumble upon? If you’re interested in using Adobe Firefly, in particular, you’ll be surprised to know there are a handful of limitations to using this generative AI software. Find out what Adobe Firefly is and what the limitations of using it are. What Is Adobe Firefly? Adobe Firefly is Adobe’s generative AI software tool. Adobe announced Firefly in March 2023 as a beta model tool with plans to integrate its features across various Adobe Creative Cloud software. Since it’s in beta mode, many things might change before the features are rolled out publicly. With Firefly, you can recolor vector graphics, which is a big deal for designers, generate a variety of 3D text designs, and use text-to-image prompts to generate images. Adobe also released an AI tool called Generative Fill, which removes and replaces parts of an image. There are plans for new features to arrive. These include a 3D model-to-image generation for use with Adobe Substance 3D software, and an extend-image software to easily change the ratio of images in one click. There are many more exciting features to be released, but for now, let’s look at the limitations of Adobe Firefly’s AI image generation technology. 1. You Can Only Create Still Images Menu MUO logo Sign In Now Close TRENDING SUBMENU PC & MOBILE SUBMENU INTERNET SUBMENU PRODUCTIVITY SUBMENU LIFESTYLE SUBMENU TECH EXPLAINED SUBMENU REVIEWS BUYING GUIDES MORE SUBMENU Sign In Newsletter What Are the Limitations of AI Image Generation in Adobe Firefly? 4 By Ruby Helyer Published May 27, 2023 Follow Share Link copied to clipboard Sign In To Your MUO Account Firefly-on-mac There’s an abundance of artificial intelligence image generation software that lets you create almost anything. But what are the limitations you might stumble upon? If you’re interested in using Adobe Firefly, in particular, you’ll be surprised to know there are a handful of limitations to using this generative AI software. Find out what Adobe Firefly is and what the limitations of using it are. What Is Adobe Firefly? Adobe Firefly is Adobe’s generative AI software tool. Adobe announced Firefly in March 2023 as a beta model tool with plans to integrate its features across various Adobe Creative Cloud software. Since it’s in beta mode, many things might change before the features are rolled out publicly. With Firefly, you can recolor vector graphics, which is a big deal for designers, generate a variety of 3D text designs, and use text-to-image prompts to generate images. Adobe also released an AI tool called Generative Fill, which removes and replaces parts of an image. There are plans for new features to arrive. These include a 3D model-to-image generation for use with Adobe Substance 3D software, and an extend-image software to easily change the ratio of images in one click. There are many more exciting features to be released, but for now, let’s look at the limitations of Adobe Firefly’s AI image generation technology. 1. You Can Only Create Still Images Adobe Firefly Text to Image AI prompt Although it is expected that Adobe Firefly features will be integrated into Adobe’s animation and video software—After Effects and Premier Pro—at the time of writing, you are limited to creating still images. While it’s not quite the same as Firefly, Adobe previously integrated Adobe Sensei AI features into most Adobe software. You can still benefit from Adobe’s AI tools for video or animation, although Sensei features are integrated within the software rather than being standalone tools. You can add fun textures to your text, recolor any vector graphics, or use the text-to-image generator. All results are static and there is no way to animate them. And none of the current Firefly features are specifically for video or animation projects. 2. Results Are for Personal Use Only Anything you create using Adobe Firefly is not available for commercial use while Firefly is still in beta mode. This means you cannot sell any designs you’ve made with Firefly or make money from them for the time being. You should use Adobe Firefly for personal projects and experimentation. While the software is in beta mode, it’s the best time to experiment and explore what you can do with it. When the software becomes widely available, you should be able to design for commercial use, and by then you’ll have experimented through the learning curve and can jump straight into the deep end. 3. You’re Limited to the Provided Artwork This point isn’t strictly true for the entire of Firefly’s offerings, but it is prominent to note for its AI text generation tool. There isn’t a way to upload your own art or designs to use as inspiration or to edit using Firefly technology. The 3D text effects feature only creates artwork from a text prompt. This is also the same for the text-to-image feature (although the clue is in the name for that one). While future features may allow you to use AI technology with your own images or creations, at the time of writing, most of the features only allow you to generate from scratch. However, all is not to be lost; the vector recoloring tool can be used with your own vectors, so long as they are saved in SVG format. It’s easy to save SVGs, they can be used for many purposes including using SVGs for editable elements in Canva. 4. The AI Is Trained With Adobe Stock Files If you’ve ever wondered if Adobe uses your content to train its AI, you can be rest assured that it does not. Adobe says it only uses existing files in Adobe Stock to train its Adobe Firefly AI systems. While this is great for copyright reasons and IP standards, it does potentially limit how the AI can be trained, since it only has access to Adobe Stock files for training. Adobe Stock is vast, but it can never be as extensive as the Internet of Things as a whole. You may find other generative AI systems are trained better due to broader access to training materials—even if they are sought unethically. Better training equals better results, but maybe Adobe will surprise us. 5. Only Adobe Users Get Invited to the Beta Many other AI tools allow anyone to access them, even if they need to sign up to do so. Most AI tools are free, although they may have a waiting list to join. Adobe Firefly is limited to Adobe subscribers—of any plan—who sign up for the Firefly beta group. Not only that, but you must actively request an invitation to the Firefly beta and painstakingly wait for the email invite from Adobe. If you’ve managed to get on the beta list, you won’t have any further limitations to accessing Firefly, other than the limitations we’ve listed here for its use. 6. Limited File Formats and Save Options As mentioned earlier, the vector recoloring feature can only use SVG files. While it’s not difficult to save a vector as an SVG, it does add an extra step to use the tool. The 3D text effects generation is limited in its save options for your results. You can copy it to your clipboard to open it directly in Photoshop or Illustrator, or you can save it. Text effect results are saved in PNG format with a transparent background and an Adobe watermark. Text-to-image results are saved as a JPG file. Quite the limited choice of file formats and not the one most digital artists would choose to use. Once integrated, we expect our save options to reflect those found within all the Creative Cloud programs. 7. Watermarked Images After generating an AI image in Adobe Firefly, you have options to copy your results to the clipboard—for pasting into Photoshop or Illustrator—or saving it. Both options are limiting because your results will have a garish red and purple Adobe Firefly watermark in the lefthand corner. The watermark comes with a label reminding you that your generated art is not for commercial use. This is the same for the majority of Firefly’s generated image results, and it looks like we’ll have to wait for the full roll-out of software before it offers watermark-free results. The vector recoloring tool does not watermark your vectors—and allows you to save in high-quality SVG format. This tool is fairly versatile without the limit of watermarking. 8. Your Results Are Uneditable Outside of Firefly Along with the watermarks, limited save formats, and the fact that none of Firefly’s tools are yet fully integrated into the Creative Cloud, it’s probably no wonder that the results of your efforts are also uneditable. Technically, you could edit them globally with adjustments or effects like warping or blur. But the downloaded Firefly images are flattened, so it’s not possible for more detailed or isolated editing. While we won’t know until the tools reach the Creative Cloud, we can imagine that this won’t be the case forever. Once integrated into the software, you’ll probably—hopefully—be able to edit in layers and move, remove, or add elements to your new AI-generated work as easily as with typical Adobe projects. Know Your Limits With Adobe Firefly Adobe has brought the future of digital design to your computer screen. It’s exciting to see the newest AI offerings, but you shouldn’t go into using them without understanding that just because AI seems limitless, doesn’t mean it is. Adobe hasn’t specifically stated whether these limits will be lifted in the future, but given Firefly is only in beta mode, you should expect that these limitations are only temporary.",
|
11 |
+
"@cite_5": "Is Adobe Using Your Files to Train Its AI? How to Opt Out if So. Adobe is one of the biggest creative software companies. It provides leading technology to help you design your greatest work. With its cloud-based subscription model, you are always ahead of the curve and provided access to its latest tools and features. But could this be a negative? Is there a chance that Adobe is accessing your data to train its AI? Is Adobe Using Your Data to Train Its AI? Since Adobe became a largely cloud-based product in 2013, we’ve seen the benefits in the quick release of new design tools, the option to back up your work to the cloud, and the easy integration of saving and opening work across different Adobe products. But your files in the cloud might be used to train Adobe’s AI. Adobe Sensei is Adobe’s AI technology integrated into its software to help improve your workflow. How does it improve your workflow? It uses your data for content analysis to track common work patterns. Adobe’s content analysis FAQ page claims it may analyze your content to develop and improve its products and services. It isn’t explicit what this means or how it works, but if you’re precious about your work, it may be an issue. How to Opt Out of Content Analysis The good news is you can opt out of content analysis. Log in to your Adobe account and go to the Privacy and personal data page. Switch off the Content analysis toggle. You can also toggle off Desktop app usage to opt out of any tracking. Another way to ensure Adobe cannot access your work for any reason is by not using cloud storage via Adobe Creative Cloud and saving your work locally instead. Then your work will be safe from Adobe’s prying eyes. Adobe Creative Cloud works in clever ways to help keep your local storage unused and offers multiple programs via the cloud, but by analyzing your data and workflow for other purposes, it could also be to your detriment. Stay in Control of Your Files It’s sneaky of Adobe to not make it obvious that it wants to follow your workflow and track your data, but it’s easy to opt out. This is a small reminder to read the terms and conditions when signing up for products and to routinely check privacy information for cloud-based services like Adobe Creative Cloud. Of course, Adobe doesn’t make it obvious what it means by analyzing your content, so agreeing to let it use your information for product improvements is a personal choice.",
|
12 |
+
"@cite_6": "Adobe’s Firefly Image Generator Is Going to Make Photoshop Much Easier to Use. Soon, even your grandparents could be Photoshop experts. Adobe’s Photoshop has grown to become an incredibly capable and powerful image editing tool, but its ever-increasing complexity also makes it harder for new users to learn how to use it. That could change later this year, however, as Photoshop will soon introduce some new AI-powered tools that allow users to perform complex edits by simply asking the app to do it for them. In March, Adobe revealed its AI image generation tool, called Firefly, which stood out from similar tools with a promise that it wouldn’t infringe upon the existing work of artists and photographers, as the AI was only trained on images from Adobe’s own stock image site, public domain content, and openly licensed work. It didn’t take long for Adobe to reveal its grander plans for Firefly, as just a few weeks later, the company revealed the tool would be incorporated into some of its video and image editing apps. Coming as a surprise to no one, today Adobe also revealed that Firefly would be incorporated into Adobe Photoshop, the company’s flagship image editing tool, through a new tool called Generative Fill. The tool can either expand the borders of an image by generating additional out-of-frame content that matches the lighting, style, and perspective of the original photo, or completely replace parts of an image based on simple text prompts from the user. One of the examples Adobe has provided that demonstrates the benefits of Generative Fill is changing the size or aspect ratio of an image. Often times, a photo is shared on several different platforms, including social media consumed on phones, or in a browser on a laptop. Cropping an image to a smaller size is easy enough, but expanding the borders of a photo to make it taller or wider often involves very complex editing by a skilled Photoshop artist. Generative Fill promises to fill in the missing areas of an expanded photo automatically in a matter of seconds. Generative Fill is useful beyond just recreating out-of-frame areas of a photo. Adobe also demonstrates how the tool can be used to intelligently replace or edit parts of a photo the user has highlighted using Photoshop’s selection tools using a simple text prompt. In this instance, the middle of the road was highlighted and Generative Fill was asked to add yellow lines to make it more obvious this cyclist was riding on an empty road, and not a random section of pavement. The added lines not only matched the perspective of the photo, but also the level of wear already on the road. As with many of Photoshop’s tools, the automated Generative Fill edits are non-destructive, and are added to a document as additional layers that can be turned on and off, or further manually tweaked by an artist using other filters. And in some cases, Generative Fill can also suggest several different versions of an edit, letting the artist have the final say in which one is actually used. Users with access to beta releases of the desktop version of Adobe Photoshop will have access to Generative Fill starting today, while wider availability of the tool in release versions of Photoshop is expected sometime in “the second half of 2023.”",
|
13 |
+
"@cite_7": "Generative Fill in Photoshop (Beta) Hands-On. It takes something special to make jaded photographers exclaim in genuine surprise when editing photos. The exclamations were rampant after Adobe recently released the latest public beta of Adobe Photoshop with a new Generative Fill feature that can create photorealistic objects, backgrounds, and scene extensions in existing photos using generative AI technology. We’ve seen Adobe’s implementation of generative AI in Adobe Firefly, the web-based tool for creating entire synthetic images from text prompts. Generative Fill uses Firefly technology to edit existing images in a more targeted way, bringing generative AI to Photoshop as a standard feature soon. (A few third-party Photoshop plug-ins that tie into other generative AI systems have been available for a while, such as Alpaca and Stability AI.) How the image-making machine works A generative AI system like Firefly creates entirely original images based on what’s described in a text prompt. It doesn’t lift sections of existing images and throw them together to create a new composition. Instead, using what it has learned from ingesting millions of photos, the system invents scenes and objects to match what it understands the text to mean. A prompt such as 'Antique car on a rainy street at night' assembles an image from a random mass of pixels to match what the system understands is a 'car,' 'rain,' 'street,' and 'night.' The systems usually provide multiple variations on your theme. What if you already have a photo of a rainy street at night, and you want to add an antique car to the composition? Using Photoshop’s Generative Fill feature, you can select an area where you want the car to appear and type 'Antique car' to generate one (this is also known as 'inpainting'). Or you can select objects you want to remove from an image and use Generative Fill without a specific text prompt to let the tool determine how to fill the missing area. Adobe is making this process more user friendly than other generative AI systems. It has the home-field advantage of building the tool directly into Photoshop, where Generative Fill sports a clean and direct interface. In comparison, the popular service Midjourney requires that you join a Discord server, subscribe to the service, enter a chat room set up to receive text prompts, and then type what you want to generate using commands such as 'Imagine antique car on a rainy street at night.' Your results appear in a scrolling discussion along with images generated by others who are in the same chat room. Photoshop's approach debuts a new Contextual Task Bar with commands such as Select Subject or Remove Background. When you make a selection using any of the selection tools, such as the Lasso tool, one option in the bar is a Generative Fill button. Clicking that button reveals a text field where you can describe what should be created within the selection. Or, you can leave the field blank and click the Generate button to have Photoshop determine what will appear based on the context of the surrounding scene. Once you click 'Generate,' Photoshop produces three Firefly-generated variations and shows you the first. You can cycle through them using buttons in the Contextual Task Bar or by clicking the thumbnails in the Properties panel. If none of them look good, you can click 'Generate' again to get three more variations. (By the way, if you get frustrated with the Contextual Task Bar appearing directly below every selection, you can drag it to where you want, then click the three-dot icon on the bar, and choose Pin Bar Position from the menu.) All of the variations are contained in a new type of layer, the Generative Layer, that also includes a mask for the area you selected. If you apply Generative Fill to another area of the image, a new Generative Layer is created. All the variations are saved in those layers, so you can go back and try variations nondestructively, hide or show the layers, set the blend mode and opacity, and use all the other flexible attributes of layers. Also note that Generative Fill is creating results at the same resolution as the original photos. This is in contrast to most systems, including Firefly on the web, where the generated images are low resolution, typically 1024 by 1024 pixels. Now let’s look at what Generative Fill can do. Remove objects Usually when you use tools to remove unwanted items from a photo, the software attempts to backfill the missing area using pixels from elsewhere in the image (see Remove All the Things: Using modern software to erase pesky objects). That becomes more difficult when the area to be removed is large, leading to repeating artifacts that make it obvious something has been removed. Generative Fill instead looks at the context of the image and attempts to create what would make sense in its place. In the examples above where we removed the tourists, Photoshop recreated the lines and colors of the buildings and matched the texture of the ground. But you can’t assume that the feature will get it right every time. Take the image of two people below. We can attempt the removal of one person (the man on the left) by making a loose selection around him with the Lasso tool to define the area we want to replace, and then clicking Generate with nothing in the text box. Strangely, in multiple attempts the tool assumed that we wanted to replace the person with another, random, person. And these were nightmare-inducing renditions of synthetic people. According to Adobe, there is no need to type a command such as 'Remove person' as a prompt when using Generative Fill, but in the end, typing that prompt gave us the result we wanted. Note, though, that while Photoshop returned one variation without a person (see below), it also created two variations that still included people. We can perhaps chalk this up to the feature being pre-release, although more likely it reveals that despite the amount of machine learning, the software is still just making guesses. Replace items and areas Removing objects is one thing, but what about replacing them with something entirely different? With Generative Fill, you can create things that were never in the original image. For instance, in the following photo we can turn one of the desserts into a blueberry tart by making a selection around the raspberries (expanding the selection slightly helps to pick up the paper texture behind them) and typing 'Blueberries' in the text prompt field. It took a couple of iterations to find one that matched, but these blueberries look pretty convincing. Or what about the drinks in the background? Instead of cold brew coffee, we can select the glass on the left and enter 'Pint of beer' as the prompt. Notice that not only is the glass slightly out of focus to match the depth of field of what it replaced, it also includes a hint of a reflection of the raspberry tart in front of it and the coffee to the side. Adding arbitrary items to an empty space can be more hit or miss. You'll get better results if the selection you make is roughly the correct size and shape of the item in context. In this case, we drew a rectangular selection in the foreground and typed the prompt 'Dog lying down,' whereupon Photoshop created several pup variations, of which we liked the one below best. The angle of the light and shadow matches fairly well. In addition to replacing or adding foreground objects, by using Select Subject and inverting the selection to select the background, we can enter a prompt to redefine the whole context of the scene. Generative Fill can also be used in more surgical ways, such as changing someone’s clothing. This, too, can have unpredictable results depending on what you’re looking to create. However, in some cases the rendered image looks fine. Keep in mind that it can take multiple prompt requests and revisions to get what you want. and people with true fashion sense are likely to quibble with Photoshop’s choices. You can see some different resulting outfits below."
|
14 |
+
}
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"abstract": "Write the 'History' section of the article titled 'Adobe Firefly'.",
|
18 |
+
"related_work": "Adobe Firefly was first announced in September 2022 at Adobe's MAX conference. It was initially released as a public beta in March 2023 @cite_9, and is currently available to all Adobe Creative Cloud subscribers. Adobe Firefly is built on top of Adobe Sensei, the company's AI platform. Sensei has been used to power a variety of features in Adobe's creative software, such as object selection in Photoshop and image auto-enhancement in Lightroom. Firefly expanded its capabilities to Illustrator, Premiere Pro, and Express, particularly for generating photos, videos and audio to enhance or alter specific parts of the media. NVIDIA Picasso runs some Adobe Firefly models @cite_10.",
|
19 |
+
"ref_abstract":
|
20 |
+
{
|
21 |
+
"@cite_9": "Adobe opens up its Firefly generative AI model to businesses. Firefly for Enterprise is a new offering that allows businesses to custom-train Adobe’s generative AI with their own branded assets. Adobe has unveiled a new platform for its Firefly generative AI model that’s designed to help organizations address the growing demand for content creation across their workplace. Announced during today’s Adobe Summit event, Adobe Firefly for Enterprise allows every employee within a company — regardless of their creative skills — to instantly generate images or copy from text-based descriptions, which can then be used in marketing campaigns, social media promotions, corporate presentations, and more. Enterprise users will be able to access Firefly through the standalone Firefly application, Creative Cloud, or Adobe Express — Adobe’s cloud-based design platform. Businesses can also build Firefly into their own ecosystem by training the AI model with their own branded company assets, which will allow Firefly to replicate the brand’s style when generating images and copy. “Enterprise leaders expect content demands will increase by five-fold over the next two years, making it imperative for them to drive efficiencies internally,” said David Wadhwani, president of digital media business at Adobe. “This new enterprise offering empowers users of any skill level to instantly turn ideas into content with Firefly, while tapping into the power of Express and Creative Cloud to quickly modify assets and deliver standout designs.” Adobe doesn’t have exact pricing to share for Firefly for Enterprise yet, but Ashley Still, senior vice president of digital media at Adobe, confirmed to The Verge that licenses that can be deployed broadly to employees will be available to brands for a flat price, which will be based on the needs and size of the organization. There is also no confirmed release date for Firefly for Enterprise — only that it will launch sometime after Firefly comes out of beta. This new enterprise-level product isn’t an unexpected move from Adobe, especially if you’re already familiar with its Firefly AI model. Adobe created Firefly to be safe for commercial use by training it on Adobe Stock images, openly licensed content, and content without copyright restrictions within the public domain. That sets it apart from many other generative AI models, such as OpenAI’s Dall-E, which could cause copyright issues for organizations as they haven’t disclosed their training data. Aside from its assured commercial viability, Firefly’s explosive popularity — largely fueled by its high-quality results — will likely hold plenty of appeal for businesses looking to explore generative AI solutions. Firefly beta users have generated over 200 million images since it launched in March 2023, and over 150 million images have been generated in just two weeks using Photoshop’s new Firefly-powered Generative Fill feature. The company also recently launched an Enterprise tier for its Adobe Express product that’s designed to support collaboration across organizations.",
|
22 |
+
"@cite_10": "Adobe and NVIDIA Partner to Unlock the Power of Generative AI. Adobe and NVIDIA will co-develop a new generation of advanced generative AI models Partnership will focus on deep integration of generative AI in creative workflows Both companies commit to content transparency and Content Credentials powered by Adobe’s Content Authenticity Initiative GTC—Today, Adobe (Nasdaq:ADBE) and NVIDIA, longstanding R&D partners, announced a new partnership to unlock the power of generative AI to further advance creative workflows. Adobe and NVIDIA will co-develop a new generation of advanced generative AI models with a focus on deep integration into applications the world’s leading creators and marketers use. Some of these models will be jointly developed and brought to market through Adobe’s Creative Cloud flagship products like Adobe Photoshop, Adobe Premiere Pro, and Adobe After Effects, as well as through the new NVIDIA Picasso cloud service for broad reach to third-party developers. Priorities of the partnership include supporting commercial viability of the new technology and ensuring content transparency and Content Credentials powered by Adobe’s Content Authenticity Initiative. Part of the NVIDIA AI Foundations cloud services for generative AI announced today, NVIDIA Picasso lets users build and deploy generative AI-powered image, video, and 3D applications with advanced text-to-image, text-to-video, and text-to-3D capabilities to supercharge productivity for creativity, design, and digital simulation through simple cloud APIs. “Adobe and NVIDIA have a long history of working closely together to advance the technology of creativity and marketing,” said Scott Belsky, Chief Strategy Officer and EVP, Design and Emerging Products, Adobe. “We’re thrilled to partner with them on ways that generative AI can give our customers more creative options, speed their work, and help scale content production.” “Generative AI provides powerful new tools to empower unprecedented creativity,” said Greg Estes, VP, Corporate Marketing and Developer Programs, NVIDIA. “With NVIDIA Picasso and Adobe tools like Creative Cloud, we’ll be able to bring the transformational capabilities of generative AI to enterprises to help them explore more ideas to efficiently produce and scale incredible creative content and digital experiences.” Adobe Firefly Earlier today, Adobe introduced Adobe Firefly, Adobe’s new family of creative generative AI models, and unveiled the beta of its first model focused on the generation of images and text effects designed to be safe for commercial use. Firefly will bring even more precision, power, speed, and ease directly into Adobe Creative Cloud, Adobe Document Cloud, and Adobe Experience Cloud workflows that involve the creation and modification of content. Hosting some of Adobe Firefly’s models on NVIDIA Picasso will optimize performance and generate high-quality assets to meet customers’ expectations. (For more information on Firefly, including how it is trained and how it honors the role of creators, please see this blog post.) Adobe is also developing new generative AI services to assist in the creation of video and 3D assets and to help marketers scale and personalize content for digital experiences through advancing end-to-end marketing workflows. Content Authenticity Initiative Adobe founded the Content Authenticity Initiative (CAI) to develop open industry standards for establishing attribution and Content Credentials. Through Content Credentials that CAI adds to content at the point of capture, creation, edit, or generation, people will have a way to see when content was generated or modified using generative AI. Adobe and NVIDIA, along with 900 other members of the CAI, support Content Credentials so people can make informed decisions about the content they encounter."
|
23 |
+
}
|
24 |
+
}
|
25 |
+
]
|
wiki_2.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"abstract": "Write the 'Introduction' section of the article titled 'Adobe Acrobat'.",
|
4 |
+
"related_work": "Adobe Acrobat is a family of application software and Web services developed by Adobe Inc. to view, create, manipulate, print and manage Portable Document Format (PDF) files @cite_17.\nThe family comprises Acrobat Reader (formerly Reader), Acrobat (formerly Exchange) and Acrobat.com. The basic Acrobat Reader, available for several desktop and mobile platforms, is freeware; it supports viewing, printing, scaling or resizing @cite_18 and annotating of PDF files @cite_19. Additional, Premium services are available on paid subscription. The commercial proprietary Acrobat, available for Microsoft Windows and macOS only, can also create, edit, convert, digitally sign, encrypt, export and publish PDF files. Acrobat.com complements the family with a variety of enterprise content management and file hosting services.",
|
5 |
+
"ref_abstract":
|
6 |
+
{
|
7 |
+
"@cite_17": "Do it all with Acrobat. Create and edit PDFs. Collaborate with ease. E-sign documents and collect signatures. Get everything done in one app, wherever you work. Acrobat’s got it. Acrobat. Your essential document solution, anywhere you go. Stay on top of your documents with powerful features for desktop, mobile, and web. Edit text and images right in Acrobat. Easily fix text and swap images without jumping to another app. All it takes is a few clicks. Create PDFs from other file types. Images. PowerPoint slides. Spreadsheets. Word docs. Convert your file into a PDF you can easily share with anyone. E-sign and request signatures. Sign documents or request signatures on any device, including mobile. Recipients don’t need to log in. Ask. Acrobat answers. Meet AI Assistant for Acrobat. Ask your document questions. Get one-click summaries for fast insights and level up your productivity. Early-access pricing of AI Assistant for Acrobat starts at US$4.99/mo. Extended to September 4, 2024",
|
8 |
+
"@cite_18": "Scale or resize PDF images\nAutomatically scale to fit paper Acrobat can size the pages of a PDF to fit the selected paper size. Choose the hamburger menu (Windows) or the File menu (macOS) > Print. From the Page Scaling pop-up menu, select one of the following options: Fit Scale small pages up and large pages down to fit the paper. Shrink oversized pages only large pages to fit the paper and print small pages as they are",
|
9 |
+
"@cite_19": "What can you do with the Acrobat Reader - Share and review. Send PDFs to anyone with the Share feature — no login needed to view or comment on your PDF. Fill out forms and add signatures. Fill out forms and add signatures. Type out your responses to questions on any device. Add your signature and return your form — no printer required. Collaborate and comment. Collaborate and comment. Send documents to collaborators and get feedback fast with comments, sticky notes, highlights, strikethroughs, and more. Adobe Acrobat Reader software is the free, trusted global standard for viewing, printing, signing, sharing, and annotating PDFs. It's the only PDF viewer that can open and interact with all types of PDF content – including forms and multimedia. And now, it’s connected to Adobe Document Cloud services – so you can work with PDFs on any device, from anywhere."
|
10 |
+
}
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"abstract": "Write the 'History' section of the article titled 'Adobe Acrobat'.",
|
14 |
+
"related_work": "Adobe Acrobat was launched in 1993 and had to compete with other products and proprietary formats that aimed to create digital documents: Common Ground from No Hands Software Inc @cite_20 Envoy from WordPerfect Corporation Folio Views from NextPage Replica from Farallon Computing @cite_21 WorldView from Interleaf @cite_22 DjVu from AT&T Laboratories Acrobat XI Reader XI Distiller XI Acrobat.com CreatePDF ExportPDF EchoSign FormsCentral SendNow Old logos of Acrobat programs and services Adobe has renamed the Acrobat products several times, in addition to merging, splitting and discontinuing them. Initially, the offered products were called Acrobat Reader, Acrobat Exchange and Acrobat Distiller. Acrobat Exchange soon became Acrobat. Over time, Acrobat Reader became Reader. Between versions 3 and 5, Acrobat did not have several editions.[clarification needed] In 1999, the Acrobat.com service came to being and introduced several web services whose names started with Acrobat, but eventually, Acrobat.com was downgraded from the name of the family of services, to that of one of those services. Unlike most other Adobe products, such as members of Adobe Creative Suite family, the Acrobat products do not have icons that display two letters on a colored rectangle.",
|
15 |
+
"ref_abstract":
|
16 |
+
{
|
17 |
+
"@cite_20": "PAGE 24 PAPERLESS/ NEWS/SOFTWARE Tools resurrect hope for paperless office concept But like so many Biblical characters suddenly struck mutually incoherent, PC users at Combs' university and elsewhere have long been at the mercy of incom- patible fonts and applications running on different platforms. The results aren't pretty: Stark ASCII text. Ugly line breaks. Incompatible ap- plications. How can we distribute it electronical- ly and make it look good? Combs asked. For Combs and others, it may finally be time to answer that question and kiss ASCII goodbye with the help of two new software programs for electronically pub- Amount spent on overnight delivery services each year $11 BILLION lishing but not printing documents. Saving money is only one of many ben- efits Combs and others may find as they begin to use electronic documents. People will see the added capabilities and see this as a better way to do busi- ness, Combs said, BEYOND BABEL, Combs has been testing Adobe Systems Inc.'s Acrobat and No Hands Software Inc.'s Common Ground -two products that go beyond previous attempts at document portability. There have been so many things that were going to finally eliminate paper for- ever, said Tom Evslin, the inventor of Glue and now general manager of the business connectivity unit at Microsoft. [But] the problem hasn't been com- pletely solved. Evslin invented the Macintosh-based Glue to make documents cross-applica- tion compatible. But Glue didn't address the font or cross-platform questions. Other programs, from companies such as Interleaf Inc. and Frame Technology Corp., let authors publish long docu- ments, such as airplane manuals, and then distribute them with electronic read- ers for on-line viewing. But such solu- tions often restrict users to authoring their documents in that vendor's pro- gram. Other programs, such as Interleaf's $10,000 WorldView, are impractical in cost and scope for interoffice memos. Acrobat and Common Ground both cost less than $200 and create portable documents from applications that print. Acrobat, formerly code-named Carousel and first demonstrated in 1991, ships this week to Mac and Windows users. (See related story, page 17.) Common Ground, an outgrowth of Farallon Computing Inc.'s Disk Paper, shipped to Mac users in May. (See Com- mon Ground beats Acrobat to to the Mac- intosh market, May 31, page 18.) INFOWORLD A Windows version of Common Ground is due out this summer, with viewers for DOS users this fall and for Unix and OS/2 users next year. DOS and Unix versions of Acrobat are due this fall. An OS/2 version is not currently scheduled. The two products have a number of similarities. Both produce compressed, portable documents from which users can copy and paste, and both offer basic search functions. Acrobat lets users an- notate documents; a future version of Common Ground will do the same. One key difference between them is that Common Ground users can include a miniviewer with their portable docu- ment, so recipients don't need Common Ground to view the document. Mountain View, Calif.-based Adobe does sell stand-alone readers for Acro- bat's Portable Document Format (PDF) files, but only for bulk purchases of 50 or more. Average users sending out a memo don't have a cost effective way to deliver a PDF reader to recipients. fonts differ- The programs also handle ently. When a document's font is not on recipient's compute Acrobat mimics the metrics of a given font and preserves the document's formatting. Common Ground uses the host operating system's imaging model to reproduce the fonts' exact look as a bit-mapped image. When the document has been created with fancy stylized fonts, Acrobat has no way to reproduce them, said Paul Za- gaeski, senior analyst at the Yankee Group, in Boston. That difference can be important to users of non-Roman fonts like Kanji, which Common Ground currently dis- plays. A Japanese version for searching on Kanji files is due out next month, said No Hands president Tony Stayner. Adobe also plans to release a Kanji ver- sion of Acrobat but didn't say when. The difference in font strategies affect document resolution. The PDF carries full font information, so document reso- lution is device independent. Common Ground's bit-map documents stop at 300 dots per inch, but Stayner said the Bel- mont, Calif.-based No Hands is working on a version to offer higher resolutions. At one level, Common Ground is truer to the original image, but at anoth- er level, Acrobat is more versatile in its output devices, said Paul Saffo, a re- search fellow at the Institute For the Fu- ture, in Menlo Park, Calif. The programs may differ most in future plans. For instance, Adobe will add so- Number of times the average document is copied 19 times Number of hours executives spend looking for lost info each week phisticated search technology from Veri- ty Inc. by year end. We're designing an open-ended API, said senior product marketing manager Rob Babcock. We will deliver it with Verity, but our cus- tomers may want to use other kinds of search and indexing engines. Next year, Adobe also plans to offer OCR capabilities and support for Stan- dard Generalized Markup Language (SGML) in Acrobat products. Stayner said the next version of Com- mon Ground will have new indexing fea- tures and will support document struc- ture in future versions. THE PAPERLESS PAYOFF. As with any new technology, it may take time for paper- less documents to catch on. At the moment, reading a newspaper on a computer screen is about as appeal- ing as reading a newspaper with field glasses, Saffo said. Still, personalized versions of publica- tions distributed digitally could become hugely popular. You'll be able to get a customized version of Info World, spec- ulated Pieter Hartsook, editor of The Hartsook Letter, in Alameda, Calif. Distinguishing between computer screens and paper formats may be critical to the technology's ultimate success. My biggest concern with all of these document viewers is that they start with a printed-page image, said Pete Dyson, editor of the Seybold Report on Desktop Publishing. I believe documents should be formatted for the medium that they are intended for. But once the market adjusts to digital documentation, PC mangers and users. can expect a variety of benefits. For instance, digital information can be JUNE 14, 1993 reused without being re-input, and with the right technology, the information can be indexed and searched. Instead of distributing hard copy. I can now distribute it electronically and archive it, said Joel Wecksell, program director at The Gartner Group, in Stam- ford, Conn. I can search on it, print it lo- cally, and annotate it. Sending a document cross-country over a modem or via electronic mail takes a fraction of the time and cost of even the speediest delivery service. And once the document arrives, it's easier to search the electronic documents for useful data. We're in information overload, and 95 percent of that information is on paper,' said Rob Auster, vice president of elec tronic printing at BIS Strategic Decisions Inc., in Norwell, Mass. Paper is a detri- ment to finding information when you need it. The paperless office will be pa- perless relative to the sorting, retrieving, and accessing of information. Going paperless can also save money. Wecksell referred to a client who cut costs for printing and distributing pro- gram manuals from $572 per set to $15 by Number of documents computers produce each day 600 MILLION publishing on CD-ROM. As corporations discover the different ways paperless documents can save mon- ey, resources, and time, new technologies like Acrobat and Common Ground are likely to change communications around the digital Tower of Babel. Everyone is so accustomed to reading off paper, switching to a screen is going to take longer, concluded Brian Sobus, Acrobat beta-tester and editor at Xemplar Books, in Bethesda, Md. Slowly but surely peo- ple will move away from paper depen- dence towards screen readership. More paperless products planned 0 tures in QuickDraw GX this year. The new operating system software will let users create Portable Digital Documents that will include all the information nec- essary for viewing and printing documents accurately on a Macintosh. With Quick- Draw GX-savvy applications, users will be able to embed fonts (those that fout vendors say can be embedded) and maintain document format and color. Microsoft Corp. has also partially addressed the problem, last year giving Win- dows users the option to embed some fonts in their documents. ther products currently under development may also speed the path to pa- perless communications. Apple Computer Inc. promises to deliver some document portability fea- Magus Software Corp., another contender in the paperless document field, is working on a Windows and OS/2 product called Page Turner. Due to ship next quarter, the $150 software will be a reader that lets users view and print any docu- ment saved as a PostScript file. Page Turner will have keyword search capabilities but not the option to cut and paste from a document. Magus president Kevin Thompson said Mac and Unix versions of Page Turner will follow, but the Moun- tain View, Calif.-based company has no plans for a DOS version. NATALJE JEDAY/INFOWORLD-SOURCE ADOBE SYSTEMS Copyrighted material",
|
18 |
+
"@cite_21": "CLIENT/SERVER APPLICATIONS Distributed Databases, Messaging, Groupware, Imaging and Multimedia Windows version of Notes taps key niche BY WAYNE ECKERSON Framingham, Mass. One of the surprise features in Lotus Development Corp.'s Notes Version 3.0 announced this spring was a Notes server that runs on Win- dows 3.1 workstations. scale, said Matt Merrick, manager of information systems at The Merrick Printing Company, Inc. of Louisville, Ky. Some users are deploying the Win- dows server to support production applications that run on laptop com puters, while others are using it as an inexpensive method of developing and prototyping Notes before deploying it throughout the enter prise. Most users have purchased the Windows Notes server as part of Lotus Starter Pack option, which contains a Windows or OS/2 Notes server and two Noticing Notes According to Lotus Development Corp., more than 1,500 organizations are using Lotus Notes worldwide and Lotus sold more than 75,000 Notes licenses in last-quarter 1992. So far, user reaction to the Windows Notes server has been positive and in line with Lotus' purpose in offering a scaled-down platform for running Notes applications. That purpose was to provide a low-cost way for users to implement Notes, accord- ing to Ray Ozzi, president of Iris Associates, Inc. Iris developed the core Notes technology for Lotus. The Windows server version of Notes has given us a cheap way to experiment with Notes and determine if we want to deploy Notes on a larger BRIEFS Windows clients for $995. When you have to spend $20,000 to bring up a product, you think twice about it, said Jim Shelton, director of See Notes, page 87 NCR Corp. has teamed with AT&T EasyLink Services and Deirina Corp, to offer a turnkey electronic forms routing package called NCR Liberty. The package will enable users to send electronic forms across local- and wide- area nets without having to buy additional electronic mail software. NCR Liberty features two modules Liberty Designer/Filler for developing forms-based applications and Liberty Filler for filling out and routing forms electronically. Liberty Designer/Filler will be available in August for $1,000, while Liberty Filler, also available in August, will cost $199 per user. NCR: (513) 439-8404. Data General Corp. last week announced that it will resell Uniplex, Ltd.'s onGo Office and onGo Write/Paint/Draw groupware software products on the DG AVION server platform. DG will sell the products as AV onGO Office (5262 per seat for 100 users) and AV onGo Write/Paint/Draw ($298 per seat for 100 users). The Electronic Mail Association's (EMA) Private Management Domain Operations Committee has teleased a new white paper, Externally Defined Body Parts (Body Part 15). The paper addresses issues associated with automated identification of and interoperability hetween application data types in electronic messages. The paper has been written for both users operating private electronic mail nets and vendors of applications and mes- saging systems. The paper costs $35 for EMA members and $80 for non- members. ΕΜΑ: (703) 875-8620. Kaleida Labs, Inc., the Apple Computer, Inc. and IBM joint multimedia venture, has named IBM veteran Michael Braun to replace Nat Goldhaber as its president and chief executive officer. Goldhaber has been named a cochairman of the Mountain View, Calif., company, whose mission is to develop cross-platform multimedia technology, such as its ScriptX scripting language. Braun most recently was vice president and managing partner of IBM's See Briefs, page 88 NetWare Global MHS users share their experiences Users sold on the product's cost and integration with NetWare. BY BOB BROWN Beta users of Novell, Inc.'s NetWare Global Mes sage Handling Service (MHS) offering are giving the multiprotocol electronic mail server software good reviews based on its cost, flexibility and tight integra tion with NetWare. NetWare Global MHS is an enterprise messaging integration platform based on a series of NetWare Loadable Modules (NLM) that enables users to house messaging, file. and other types of servers on the same system. The product made its debut last summer, and Version 2.0 began shipping earlier this year. The state of Idaho recently completed a 400-user pilot test of what could turn out to be one of the biggest NetWare Global MHS implementations. The state chose the product as the basis for a statewide store- and-forward messaging service to be offered to agen- cies across Idaho, said Jake Hoffman, technology coordinator for the state. The decision to go with NetWare Global MHS was based largely on the state's heavy use of NetWare, he said. The product also features Simple Mail Transfer Protocol (SMTP) and IBM Systems Network Architec ture Distribution Services modules, which the state needs to provide connectivity between its MHS-based mail systems and those based on Digital Equipment Corp. VAXes and IBM mainframes. Novell has also announced an alliance with Retix to develop an X.400 module for NetWare Global MHS, which is in line with Idaho's plan to migrate to X.400, Hoffman said. Overall, Hoffman was pleased with the perfor mance of NetWare Global MHS during the pilot, as well as the price less than $1,000 for a 100-user NLM. NetWare Global MHS won out over offerings from companies such as Hewlett-Packard Co. and Soft-Switch, Inc. largely due to price, Hoffman said. Facts about NetWare Global Message Handling Service 2.0 Product description A scalable A scalable messag Loadable Modules (NLM designed for NetWarm on a set of NetWare Messaging protocol NLMs Architecture Distribution Services, Simple Mail Transfer Protocol and X-400 Software requirement hotWaro 3.11 or higher Hardware requirement. Inter Corp. 80386 or 80486-based server Price $495 for 20 mailboxes to $7,495 for 1,000 mailboxes Availability Began shipping in January through Novell esens NNOVELL SOURCE NOVELL FAC PRONO UTAH However, Hoffman warned potential NetWare Global MHS users that they may run into problems setting up messaging directories if they have not been careful in the past about giving NetWare servers unique names. Idaho typically had unique server names within each agency, but as Hoffman linked See Global MHS, page 88 Farallon gives Adobe Acrobat run for money Viewer. The Viewer, which runs on 80286-based or higher allows users to view Replica documents from within any Windows or Mac- intosh System 7 application. The Viewer lets users copy and paste any part of a Replica document, including text and graphics, into other applications or documents. BY WAYNE ECKERSON Alameda, Calif Farallon Computing, Inc. has announced software that lets per sonal computer users exchange documents across different plat forms and applications without losing original formatting, fonts or graphics. Called Replica, the software is designed to work with Windows or Macintosh computers and can be integrated with local-area network electronic mail systems, such as Lotus Development Corp.'s cc:Mail. As a network vendor, we've enabled users to share files between Windows and Macintosh computers, said Bill Freedman, product marketing manager at Far allon, which is based here. With Replica, we can now let users share files that can be read in their origi nal format. Freedman said cross-platform document viewing software enables users to share documents without buying compatible soft- ware for every desktop in the orga nization. Replica is akin to Adobe Sys tems, Inc.'s Acrobat software, which was recently introduced with a lot of fanfare. Freedman said Replica offers many of the same capabilities as Acrobat but is half the price and can run on smaller machines. Replica consists of Replica Cre ator and Replica Viewer software. The Creator software works with any Windows or Macintosh appli- cation that can produce printed output. It converts documents into a special Replica format, which can be read from within any appli cation that supports the Replica Users can also include a run- time version of the Viewer in a Rep lica file they want to distribute, thereby enabling recipients to view the file without having the Replica Viewer loaded on their machines. Recipients of such doc uments can load the Viewer onto their hard disks at no charge, Faral- lon officials said. Farallon recommends that users run Replica on 386 PCs with 4M bytes of memory. Replica is priced at $99 per copy and $749 for a 10-user ver- sion. Replica for Windows is cur- rently available, and the Macin tosh version will be available by the end of the year. Farallon: (510) 814-5100. NETWORK WORLD JULY 26, 1905 86",
|
19 |
+
"@cite_22": "PRODUCT COMPARISON Electronic document solutions Reading what you sow COMPARED EBT solution DynaText Indexer and Browser 2.3, In5tEd, and DynaTag 2.0 Electronic Book Technologies Inc. Folio solution Folio Views Infobase Production Kit 3.1 and Folio Views Infobase Manager 3.1 Folio Corp. InfoAccess solution Guide Passport and Guide Reader InfoAccess Inc. Interleaf solution Interleaf 6 Windows 95 and Windows NT, WorldView Press, and WorldView Viewer Interleaf Inc. UPCOMING ISSUE Adobe Systems Inc.'s FrameMaker 6 wasn't available for our testing, but see how it compares with these solutions in our forthcoming Product Comparison Update 66 INFOWORLD JULY 22, 1996 Want to move your corporate library online and cultivate collaboration on publications? Root yourself in train- ing at the start so you can harvest suc- cessful electronic publications later. L ike the end of a rainbow, the paperless office is a a goal that seems to recede as we move closer to it. But even if we never manage to lay our hands on that pot of gold, we are getting closer. The most recent and visible signs are the explosion of corporate Web pages and the blossoming of groupware programs. Unfortunately, Web pages are slow, limited in formatting, and have only basic search tools, and groupware programs require constant care. So what if you simply want to publish large, hyperlinked documents such as documentation, help files, or company publications-for users to access via the network or CD-ROM? What you need is an elec tronic document authoring application. NOT QUITE AN OPEN BOOK. Electronic document authoring, is not a well-defined category of software. In fact, users are putting together a diverse set of programs to use while vendors rush to tweak their pro grams to better suit the needs of network and CD-ROM publishers. For this comparison, we selected examples of the solutions we know readers will consider: traditional publishers, Standard Generalized Markup Language (SGML) systems, and hypertext databases. They are designed for large and detailed collections of information, so before you start, invest time (and money) to be properly trained. We compared Electronic Book Technologies Inc. (EBT)'s SGML converter (the EBT solution), which included DynaText Indexer and Browser 2.3, InStEd, and DynaTag 2.0, with Microsoft Corps Word for authoring capabilities; Folio Corp.'s Folio Views Infobase Produc tion Kit 3.1 (IPK) and Folio Views Infobase Manager 3.1 (the Folio solution); InfoAccess Inc.'s Guide Passport and Guide Reader (the InfoAccess solution); and Interleaf Inc.'s Interleaf 6 Windows 95 and Windows NT, WorldView Press, and WorldView Viewer (the Interleaf solution). We'll compare Adobe Systems Inc.'s FrameMaker 6 with these four solutions in an upcoming Product Comparison Update; it was not ready for our testing. The Folio solution leads the pack, because it does the basics very well: It features powerful formatting and hyperlinking tools as well as strong search tools. The IPK's long history as an end-user application محلة shows up in the program's attractive interface and excellent ease of use. Keep in mind, however, that these programs aren't the only ones on the market. Indeed, whichever approach you decide is best-SGML hypertext, or a complete publishing system. programs to consider. there are a variety of STOP THE PRESSES. There are lots of electronic publishing methods. http://www.infoworld.com"
|
20 |
+
}
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"abstract": "Write the 'Document Cloud' section of the article titled 'Adobe Acrobat'.",
|
24 |
+
"related_work": "In April 2015, Adobe introduced the Document Cloud branding (alongside its Creative Cloud) to signify its adoption of the cloud storage and the software as a service model. Programs under this branding received a DC suffix. In addition, Reader was renamed back to Acrobat Reader @cite_23 @cite_24. Following the introduction of Document Cloud, Acrobat.com was discontinued as their features were integrated into the desktop programs and mobile apps.",
|
25 |
+
"ref_abstract":
|
26 |
+
{
|
27 |
+
"@cite_23": "Review: Adobe Acrobat polishes its act - New version greatly improves PDF editability and mobile support, while introducing handy cloud-based document signing and management. To me, Adobe has always been the company that made powerful software with hard-to-learn and hard-to-use interfaces. Case in point: Photoshop. This widely used digital photo editing, printing, and drawing software seems to contain every image editing and processing technique known to man, but requires formal training or years of use before it becomes natural. Adobe launched its Creative Cloud in 2012, to mixed reviews. Some people thought a monthly subscription model was a huge improvement over having to upgrade thousands of dollars' worth of software on an annual or biannual basis; others resented having to pay continuously for software that they felt they’d bought, but knew would become obsolete as they upgraded their operating systems and hardware. I initially resented the push to the subscription model, but came around as I realized that my Creative Cloud subscription gave me more of the programs I needed for less total expenditure over time. Initially, there wasn’t much actual “cloud” in the Creative Cloud. It was more of a branding and purchase model change. Eventually, cloud file sharing and exchange and cloud assets became important parts of the Creative Cloud software suite. This week Adobe introduced the Adobe Document Cloud along with a more capable, easier-to-use version of Acrobat called Acrobat DC. The combination allows you to create, edit, sign, share, and track PDFs from anywhere, with Adobe’s promise that it will change how you work with all your important documents forever. I won’t go that far -- but it is a big improvement over Acrobat XI. Acrobat DC desktop The new Acrobat DC home screen (Figure 1) shows you all the PDF files you have recently visited on all your signed-in computers, including your iOS and Android mobile devices as well as your Windows and OS X computers. This gives you a fast place to start working on PDF files -- it’s actually more convenient than the way Microsoft handles the opening screen of Office applications. Once you open a document, you can see the most common tools in the right-hand column, with the option to search for any tools you wish from the box in the top right (Figure 2). That right there is a huge improvement over the UI of Acrobat XI, where I constantly had to play “find the function” to locate little-used but important capabilities such as redaction of sensitive or personal information. A third major screen, Tools, shows you all major tool categories in graphical form and includes its own search function. We’re still batting 1.000. But here’s where it starts to fall apart. You can save a file to your computer, your Document Cloud (if you’re signed in), or your Creative Cloud (if you have one), but if you go to open a file using the File/Open dialog or look at it from your Mac Finder or Windows Explorer, the Document Cloud is nowhere to be found. At first glance, you might think your file went into a roach motel and will never come out. It’s still there, however. Remember the Home screen? Whew! You’ll also find it if you browse to Acrobat.com (the old name for the Document Cloud), log in, and view your files, for example at https://cloud.acrobat.com/recent. Editing a PDF file used to be limited by the capabilities of Acrobat XI. To work around those limitations, I often made my edits to an article or book chapter in comments so that the author could apply them in Word if accepted, or I asked the author for the original Word document so that I could make tracked edits that they could then accept or reject. Acrobat DC fixes most of that. You can reflow text not only across a paragraph, but also across a page. You can add new lines to existing bulleted or numbered lists without reformatting. And fonts are matched -- sort of -- automatically. Sort of matched? For example, in Figure 3 you can see Acrobat DC trying to match a document’s fonts when I’m editing. This PDF uses Gill Sans, which is present on my Mac but not on a fresh Windows 10 installation. On the Mac, I had no problem editing; on Windows, Acrobat decided to substitute Comic Sans. At first I thought that was a joke. Alas, no. Should you happen to have touch-enabled devices, Acrobat DC will work with them. If you need to start with a scanned paper document, Acrobat DC will make quick work of converting it into an editable document, matching the fonts as well as it can, creating blocks, and giving you an opportunity to easily correct any OCR errors. If you need to export to Office, Acrobat DC does so much more cleanly and with better fidelity than Acrobat XI ever did. Acrobat DC mobile The concept of hand-off between iOS and OS X seemed interesting last year when Apple introduced it, but I never saw it in action because my iPad and iMac are too old. The Acrobat DC mobile versions on iOS and Android can interact with PDF content almost as well as the desktop versions on OS X and Windows, and the Document Cloud with mobile sign-in makes hand-off a low-friction process. Yes, you workaholic, you can mark up a PDF from your iPad on the train to work, then pick up where you left off from your work computer (Figure 4). If you think about it, camera integration might be the most useful integration that mobile Acrobat could have -- and it does. Use the Camera to PDF function in mobile Acrobat DC, and you can take a new picture or pick an image from your device, then track the conversion on the Acrobat Outbox. Once it’s converted and in the Document Cloud, you can enhance the PDF on a computer. The enhancements aren’t always perfect, but if you weren’t too far off on the angle and exposure of your images, you can correct them within Acrobat DC, fix the skew, and have it recognize the text. Does this replace a scanner? No -- but it’ll do in a pinch. Acrobat sign and send While form filling and electronic signatures have been useful for PDFs for more than a year, Acrobat DC and the Fill & Sign apps have taken this to a new level. In addition, Document Cloud adds tracked sending and delivery of documents, even large files that won’t transmit over email. Tracked sending requires a subscription. If you don’t have a paid Document Cloud subscription, you can still send large documents, but what you’re sending are anonymous links. This might be another argument in favor of a subscription -- or it might cause users who have purchased the product to raise a ruckus, the way they did when Photoshop CC became subscription-only. Overall, I like Acrobat DC’s greatly improved mobile and touch functionality, its better desktop usability, its much better PDF document editability and fidelity (even for scanned documents), and its new cloud sharing, signing, and tracking, as well as the fact that Acrobat DC is already included in my Creative Cloud subscription. I’m less enticed by the few omissions in the new user interface, the fact that Acrobat pricing is still at a premium level, and the limit of two desktop/laptop installations per subscription, which became an issue for me during my review. All of these are reflected in my scores: I consider Acrobat DC to be very good, but not yet excellent. I was able to work around the two-machine limit by signing out on other computers, and by using a perpetual license to create a second Adobe ID for the Document Cloud, but it was not my idea of a friendly experience. As a software reviewer, I might not be a typical user -- I have two Macs, two PCs, and about 15 virtual machine images on which I might want to run Acrobat, not to mention two Android devices and an iPad. (The mobile devices are not counted against the limit.) Nonetheless, I’ll bet many of you use more than two computers. I might guess you use one at home, one at work, and a laptop for the train and other travel. Does the tablet support in Acrobat DC make a difference? I would guess yes -- but I’d also offer the observations that the sort of people who use Acrobat heavily are also touch typists who are most productive when they have a physical keyboard, and the Bluetooth keyboards for tablets are not yet up to the same level as the keyboards on good laptops.",
|
28 |
+
"@cite_24": "Adobe teams up with Dropbox as part of Document Cloud upgrades A massive set of changes levels up Adobe's document management and e-sign platform Adobe is making it easier to work with PDF files stored in the cloud, thanks to a new partnership with Dropbox that connects users of the Acrobat PDF editing and viewing products with documents stored in the cloud storage firm's online locker. Starting Tuesday, users will be able to link their Dropbox accounts with Adobe's Acrobat Reader and Acrobat DC desktop apps, and then edit any PDFs they have stored in Dropbox's cloud from Acrobat without having to go digging for the right file. On the desktop, that doesn't seem like much of a bonus, since it's already possible to open files from a Dropbox folder that's stored on a user's computer with those apps. However, the companies will soon be rolling out an integration for iOS so mobile users who would otherwise have to flip between different apps in order to open their files won't need to. As part of that integration, when users open a PDF in the Dropbox app on their iOS devices a little bubble will pop up informing them that they can work more productively with the document using Acrobat. Having that bubble is a win for Adobe, which has to stand out among a sea of other PDF authoring and editing apps like Smile Software's PDFpen. Of course, it may also prove annoying for people who enjoyed the unblemished experience of using Dropbox prior to the integration. After the iOS features roll out in the coming months, Adobe and Dropbox will work together on similar experiences for Android users that are slated to launch next year. A similar suggestion feature will also show up in Dropbox's web interface. The Dropbox partnership is an interesting one, since Adobe's Document Cloud ambitions are primarily enterprise-focused. While Dropbox has a significant user base, its enterprise offering has largely lagged behind offerings from competitors like Google and Microsoft. Adobe plans to work on similar integrations with other storage providers in the future, and it's been cultivating a partner ecosystem around its Document Cloud beyond that, too. The company announced partnerships with Salesforce and Workday last month that are focused on integrating Document Cloud e-signature services into those products. Integrating with Dropbox is one component of a larger set of announcements Adobe made around its Document Cloud. The service, which is primarily focused on offering electronic signature functionality, now has a drag-and-drop workflow tool that lets anyone with a Document Cloud subscription set up a system for passing a document around to multiple people for them to review and sign. The service's e-sign capabilities also now include support for digital signatures, a more advanced form of electronic signature that uses a physical smartcard with a secure chip to authenticate a signature. It's a useful functionality for people in regulated industries like biopharmaceuticals, along with government workers and residents of countries that offer those cards as part of their identity Companies that want their employees to do more on the go can take advantage of new functionality that integrates with smartphones. Using the eSign Manager DC mobile app, employees will be able to take a photo of their handwritten signature and then use it to repeatedly sign documents. In addition, users can sync their signature across mobile, Web, and desktop apps so they can sign documents in a variety of locations. The Fill & Sign app, which lets anyone convert digital or scanned documents into electronic forms, is now also available for iPhone in addition to its existing incarnation on the iPad. All of this is part of Adobe's strategy to compete in a rough-and-tumble e-signature market against companies like DocuSign, which has made its entire business out of offering electronic signature products and has raised more than half a million dollars from investors and has an impressive client roll to boot."
|
29 |
+
}
|
30 |
+
}
|
31 |
+
]
|