Spaces:
Runtime error
Runtime error
Merge branch 'minko'
Browse files- app.py +41 -26
- plagiarism.py +18 -9
app.py
CHANGED
@@ -218,11 +218,9 @@ def ai_check(text: str, option: str):
|
|
218 |
|
219 |
|
220 |
def generate_prompt(settings: Dict[str, str]) -> str:
|
221 |
-
content_string = "\n".join(
|
222 |
-
f"{url.strip()}: \n{content.strip()[:2000]}" for url, content in settings["sources"].items()
|
223 |
-
)
|
224 |
-
|
225 |
prompt = f"""
|
|
|
|
|
226 |
Write a {settings['article_length']} words (around) {settings['format']} on {settings['topic']}.
|
227 |
|
228 |
Style and Tone:
|
@@ -243,9 +241,7 @@ def generate_prompt(settings: Dict[str, str]) -> str:
|
|
243 |
- End with a {settings['conclusion_type']} conclusion
|
244 |
- Add a "References" section at the end with at least 3 credible sources, formatted as [1], [2], etc.
|
245 |
- Do not make any headline, title bold.
|
246 |
-
|
247 |
-
Use the content here from the URLs I've found for you:
|
248 |
-
{content_string}
|
249 |
|
250 |
Ensure proper paragraph breaks for better readability.
|
251 |
Avoid any references to artificial intelligence, language models, or the fact that this is generated by an AI, and do not mention something like here is the article etc.
|
@@ -254,11 +250,9 @@ def generate_prompt(settings: Dict[str, str]) -> str:
|
|
254 |
|
255 |
|
256 |
def regenerate_prompt(settings: Dict[str, str]) -> str:
|
257 |
-
content_string = "\n".join(
|
258 |
-
f"{url.strip()}: \n{content.strip()[:2000]}" for url, content in settings["sources"].items()
|
259 |
-
)
|
260 |
-
|
261 |
prompt = f"""
|
|
|
|
|
262 |
"{settings['generated_article']}"
|
263 |
|
264 |
Edit the given text based on user comments.
|
@@ -268,8 +262,7 @@ def regenerate_prompt(settings: Dict[str, str]) -> str:
|
|
268 |
- The original content should not be changed. Make minor modifications based on user comments above.
|
269 |
- Keep the references the same as the given text in the same format.
|
270 |
- Do not make any headline, title bold.
|
271 |
-
|
272 |
-
{content_string}
|
273 |
|
274 |
Ensure proper paragraph breaks for better readability.
|
275 |
Avoid any references to artificial intelligence, language models, or the fact that this is generated by an AI, and do not mention something like here is the article etc.
|
@@ -278,6 +271,7 @@ def regenerate_prompt(settings: Dict[str, str]) -> str:
|
|
278 |
|
279 |
|
280 |
def generate_article(
|
|
|
281 |
topic: str,
|
282 |
keywords: str,
|
283 |
article_length: str,
|
@@ -291,15 +285,13 @@ def generate_article(
|
|
291 |
num_examples: str,
|
292 |
conclusion_type: str,
|
293 |
ai_model: str,
|
294 |
-
|
295 |
-
domains_to_skip,
|
296 |
api_key: str = None,
|
297 |
generated_article: str = None,
|
298 |
user_comments: str = None,
|
299 |
) -> str:
|
300 |
-
|
301 |
-
url_content = google_search(topic, sorted_date, domains_to_skip)
|
302 |
settings = {
|
|
|
303 |
"topic": topic,
|
304 |
"keywords": [k.strip() for k in keywords.split(",")],
|
305 |
"article_length": article_length,
|
@@ -312,7 +304,7 @@ def generate_article(
|
|
312 |
"references": [r.strip() for r in references.split(",")],
|
313 |
"num_examples": num_examples,
|
314 |
"conclusion_type": conclusion_type,
|
315 |
-
"sources":
|
316 |
"generated_article": generated_article,
|
317 |
"user_comments": user_comments,
|
318 |
}
|
@@ -378,7 +370,11 @@ def format_references(text: str) -> str:
|
|
378 |
in_references = False
|
379 |
|
380 |
for line in lines:
|
381 |
-
if
|
|
|
|
|
|
|
|
|
382 |
in_references = True
|
383 |
continue
|
384 |
if in_references:
|
@@ -395,6 +391,7 @@ def format_references(text: str) -> str:
|
|
395 |
|
396 |
|
397 |
def generate_and_format(
|
|
|
398 |
topic,
|
399 |
keywords,
|
400 |
article_length,
|
@@ -409,20 +406,29 @@ def generate_and_format(
|
|
409 |
conclusion_type,
|
410 |
ai_model,
|
411 |
api_key,
|
|
|
412 |
year_from,
|
413 |
month_from,
|
414 |
day_from,
|
415 |
year_to,
|
416 |
month_to,
|
417 |
day_to,
|
418 |
-
|
419 |
generated_article: str = None,
|
420 |
user_comments: str = None,
|
421 |
):
|
422 |
date_from = build_date(year_from, month_from, day_from)
|
423 |
date_to = build_date(year_to, month_to, day_to)
|
424 |
sorted_date = f"date:r:{date_from}:{date_to}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
425 |
article = generate_article(
|
|
|
426 |
topic,
|
427 |
keywords,
|
428 |
article_length,
|
@@ -436,9 +442,8 @@ def generate_and_format(
|
|
436 |
num_examples,
|
437 |
conclusion_type,
|
438 |
ai_model,
|
|
|
439 |
api_key,
|
440 |
-
sorted_date,
|
441 |
-
domains_to_skip,
|
442 |
generated_article,
|
443 |
user_comments,
|
444 |
)
|
@@ -464,6 +469,7 @@ def create_interface():
|
|
464 |
with gr.Column(scale=2):
|
465 |
with gr.Group():
|
466 |
gr.Markdown("## Article Configuration", elem_classes="text-xl mb-4")
|
|
|
467 |
input_topic = gr.Textbox(
|
468 |
label="Topic",
|
469 |
placeholder="Enter the main topic of your article",
|
@@ -584,6 +590,10 @@ def create_interface():
|
|
584 |
)
|
585 |
gr.Markdown("# Search Options", elem_classes="text-center text-3xl mb-6")
|
586 |
with gr.Group():
|
|
|
|
|
|
|
|
|
587 |
with gr.Row():
|
588 |
month_from = gr.Dropdown(
|
589 |
choices=months,
|
@@ -605,10 +615,11 @@ def create_interface():
|
|
605 |
year_to = gr.Textbox(label="To Year", value=d1[2])
|
606 |
|
607 |
with gr.Row():
|
608 |
-
|
609 |
domain_list,
|
|
|
610 |
multiselect=True,
|
611 |
-
label="
|
612 |
)
|
613 |
|
614 |
with gr.Group():
|
@@ -690,6 +701,7 @@ def create_interface():
|
|
690 |
generate_btn.click(
|
691 |
fn=generate_and_format,
|
692 |
inputs=[
|
|
|
693 |
input_topic,
|
694 |
input_keywords,
|
695 |
input_length,
|
@@ -704,13 +716,14 @@ def create_interface():
|
|
704 |
input_conclusion,
|
705 |
ai_generator,
|
706 |
input_api,
|
|
|
707 |
year_from,
|
708 |
month_from,
|
709 |
day_from,
|
710 |
year_to,
|
711 |
month_to,
|
712 |
day_to,
|
713 |
-
|
714 |
],
|
715 |
outputs=[output_article],
|
716 |
)
|
@@ -718,6 +731,7 @@ def create_interface():
|
|
718 |
regenerate_btn.click(
|
719 |
fn=generate_and_format,
|
720 |
inputs=[
|
|
|
721 |
input_topic,
|
722 |
input_keywords,
|
723 |
input_length,
|
@@ -732,13 +746,14 @@ def create_interface():
|
|
732 |
input_conclusion,
|
733 |
ai_generator,
|
734 |
input_api,
|
|
|
735 |
year_from,
|
736 |
month_from,
|
737 |
day_from,
|
738 |
year_to,
|
739 |
month_to,
|
740 |
day_to,
|
741 |
-
|
742 |
output_article,
|
743 |
ai_comments,
|
744 |
],
|
|
|
218 |
|
219 |
|
220 |
def generate_prompt(settings: Dict[str, str]) -> str:
|
|
|
|
|
|
|
|
|
221 |
prompt = f"""
|
222 |
+
I am a {settings['role']}
|
223 |
+
|
224 |
Write a {settings['article_length']} words (around) {settings['format']} on {settings['topic']}.
|
225 |
|
226 |
Style and Tone:
|
|
|
241 |
- End with a {settings['conclusion_type']} conclusion
|
242 |
- Add a "References" section at the end with at least 3 credible sources, formatted as [1], [2], etc.
|
243 |
- Do not make any headline, title bold.
|
244 |
+
{settings['sources']}
|
|
|
|
|
245 |
|
246 |
Ensure proper paragraph breaks for better readability.
|
247 |
Avoid any references to artificial intelligence, language models, or the fact that this is generated by an AI, and do not mention something like here is the article etc.
|
|
|
250 |
|
251 |
|
252 |
def regenerate_prompt(settings: Dict[str, str]) -> str:
|
|
|
|
|
|
|
|
|
253 |
prompt = f"""
|
254 |
+
I am a {settings['role']}
|
255 |
+
|
256 |
"{settings['generated_article']}"
|
257 |
|
258 |
Edit the given text based on user comments.
|
|
|
262 |
- The original content should not be changed. Make minor modifications based on user comments above.
|
263 |
- Keep the references the same as the given text in the same format.
|
264 |
- Do not make any headline, title bold.
|
265 |
+
{settings['sources']}
|
|
|
266 |
|
267 |
Ensure proper paragraph breaks for better readability.
|
268 |
Avoid any references to artificial intelligence, language models, or the fact that this is generated by an AI, and do not mention something like here is the article etc.
|
|
|
271 |
|
272 |
|
273 |
def generate_article(
|
274 |
+
input_role: str,
|
275 |
topic: str,
|
276 |
keywords: str,
|
277 |
article_length: str,
|
|
|
285 |
num_examples: str,
|
286 |
conclusion_type: str,
|
287 |
ai_model: str,
|
288 |
+
content_string: str,
|
|
|
289 |
api_key: str = None,
|
290 |
generated_article: str = None,
|
291 |
user_comments: str = None,
|
292 |
) -> str:
|
|
|
|
|
293 |
settings = {
|
294 |
+
"role": input_role,
|
295 |
"topic": topic,
|
296 |
"keywords": [k.strip() for k in keywords.split(",")],
|
297 |
"article_length": article_length,
|
|
|
304 |
"references": [r.strip() for r in references.split(",")],
|
305 |
"num_examples": num_examples,
|
306 |
"conclusion_type": conclusion_type,
|
307 |
+
"sources": content_string,
|
308 |
"generated_article": generated_article,
|
309 |
"user_comments": user_comments,
|
310 |
}
|
|
|
370 |
in_references = False
|
371 |
|
372 |
for line in lines:
|
373 |
+
if (
|
374 |
+
line.strip().lower() == "references"
|
375 |
+
or line.strip().lower() == "references:"
|
376 |
+
or line.strip().lower().startswith("references:")
|
377 |
+
):
|
378 |
in_references = True
|
379 |
continue
|
380 |
if in_references:
|
|
|
391 |
|
392 |
|
393 |
def generate_and_format(
|
394 |
+
input_role,
|
395 |
topic,
|
396 |
keywords,
|
397 |
article_length,
|
|
|
406 |
conclusion_type,
|
407 |
ai_model,
|
408 |
api_key,
|
409 |
+
google_search_check,
|
410 |
year_from,
|
411 |
month_from,
|
412 |
day_from,
|
413 |
year_to,
|
414 |
month_to,
|
415 |
day_to,
|
416 |
+
domains_to_include,
|
417 |
generated_article: str = None,
|
418 |
user_comments: str = None,
|
419 |
):
|
420 |
date_from = build_date(year_from, month_from, day_from)
|
421 |
date_to = build_date(year_to, month_to, day_to)
|
422 |
sorted_date = f"date:r:{date_from}:{date_to}"
|
423 |
+
content_string = ""
|
424 |
+
if google_search_check:
|
425 |
+
url_content = google_search(topic, sorted_date, domains_to_include)
|
426 |
+
content_string = "\n".join(
|
427 |
+
f"{url.strip()}: \n{content.strip()[:2000]}" for url, content in url_content.items()
|
428 |
+
)
|
429 |
+
content_string = "Use the trusted information here from the URLs I've found for you:\n" + content_string
|
430 |
article = generate_article(
|
431 |
+
input_role,
|
432 |
topic,
|
433 |
keywords,
|
434 |
article_length,
|
|
|
442 |
num_examples,
|
443 |
conclusion_type,
|
444 |
ai_model,
|
445 |
+
content_string,
|
446 |
api_key,
|
|
|
|
|
447 |
generated_article,
|
448 |
user_comments,
|
449 |
)
|
|
|
469 |
with gr.Column(scale=2):
|
470 |
with gr.Group():
|
471 |
gr.Markdown("## Article Configuration", elem_classes="text-xl mb-4")
|
472 |
+
input_role = gr.Textbox(label="I am a", placeholder="Enter your role", value="Student")
|
473 |
input_topic = gr.Textbox(
|
474 |
label="Topic",
|
475 |
placeholder="Enter the main topic of your article",
|
|
|
590 |
)
|
591 |
gr.Markdown("# Search Options", elem_classes="text-center text-3xl mb-6")
|
592 |
with gr.Group():
|
593 |
+
with gr.Row():
|
594 |
+
google_search_check = gr.Checkbox(
|
595 |
+
label="Enable Google Search For Recent Sources", value=True
|
596 |
+
)
|
597 |
with gr.Row():
|
598 |
month_from = gr.Dropdown(
|
599 |
choices=months,
|
|
|
615 |
year_to = gr.Textbox(label="To Year", value=d1[2])
|
616 |
|
617 |
with gr.Row():
|
618 |
+
domains_to_include = gr.Dropdown(
|
619 |
domain_list,
|
620 |
+
value=domain_list,
|
621 |
multiselect=True,
|
622 |
+
label="Domains To Include",
|
623 |
)
|
624 |
|
625 |
with gr.Group():
|
|
|
701 |
generate_btn.click(
|
702 |
fn=generate_and_format,
|
703 |
inputs=[
|
704 |
+
input_role,
|
705 |
input_topic,
|
706 |
input_keywords,
|
707 |
input_length,
|
|
|
716 |
input_conclusion,
|
717 |
ai_generator,
|
718 |
input_api,
|
719 |
+
google_search_check,
|
720 |
year_from,
|
721 |
month_from,
|
722 |
day_from,
|
723 |
year_to,
|
724 |
month_to,
|
725 |
day_to,
|
726 |
+
domains_to_include,
|
727 |
],
|
728 |
outputs=[output_article],
|
729 |
)
|
|
|
731 |
regenerate_btn.click(
|
732 |
fn=generate_and_format,
|
733 |
inputs=[
|
734 |
+
input_role,
|
735 |
input_topic,
|
736 |
input_keywords,
|
737 |
input_length,
|
|
|
746 |
input_conclusion,
|
747 |
ai_generator,
|
748 |
input_api,
|
749 |
+
google_search_check,
|
750 |
year_from,
|
751 |
month_from,
|
752 |
day_from,
|
753 |
year_to,
|
754 |
month_to,
|
755 |
day_to,
|
756 |
+
domains_to_include,
|
757 |
output_article,
|
758 |
ai_comments,
|
759 |
],
|
plagiarism.py
CHANGED
@@ -61,10 +61,18 @@ async def parallel_scrap(urls):
|
|
61 |
return results
|
62 |
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
def google_search_urls(
|
65 |
text,
|
66 |
sorted_date,
|
67 |
-
|
68 |
api_key,
|
69 |
cse_id,
|
70 |
**kwargs,
|
@@ -75,7 +83,9 @@ def google_search_urls(
|
|
75 |
if "items" in results and len(results["items"]) > 0:
|
76 |
for count, link in enumerate(results["items"]):
|
77 |
# skip user selected domains
|
78 |
-
if (
|
|
|
|
|
79 |
continue
|
80 |
url = link["link"]
|
81 |
if url not in url_list:
|
@@ -84,25 +94,24 @@ def google_search_urls(
|
|
84 |
|
85 |
|
86 |
def google_search(
|
87 |
-
|
88 |
sorted_date,
|
89 |
-
|
90 |
):
|
91 |
# api_key = "AIzaSyCLyCCpOPLZWuptuPAPSg8cUIZhdEMVf6g"
|
92 |
-
api_key = "AIzaSyA5VVwY1eEoIoflejObrxFDI0DJvtbmgW8"
|
93 |
-
|
94 |
# api_key = "AIzaSyCS1WQDMl1IMjaXtwSd_2rA195-Yc4psQE"
|
95 |
# api_key = "AIzaSyCB61O70B8AC3l5Kk3KMoLb6DN37B7nqIk"
|
96 |
# api_key = "AIzaSyCg1IbevcTAXAPYeYreps6wYWDbU0Kz8tg"
|
97 |
# api_key = "AIzaSyA5VVwY1eEoIoflejObrxFDI0DJvtbmgW8"
|
98 |
cse_id = "851813e81162b4ed4"
|
99 |
-
|
100 |
# get list of URLS to check
|
101 |
start_time = time.perf_counter()
|
102 |
url_list = google_search_urls(
|
103 |
-
|
104 |
sorted_date,
|
105 |
-
|
106 |
api_key,
|
107 |
cse_id,
|
108 |
)
|
|
|
61 |
return results
|
62 |
|
63 |
|
64 |
+
def scrap(urls):
|
65 |
+
client = httpx.Client()
|
66 |
+
soups = []
|
67 |
+
for url in urls:
|
68 |
+
soups.append(get_url_data(url=url, client=client))
|
69 |
+
return soups
|
70 |
+
|
71 |
+
|
72 |
def google_search_urls(
|
73 |
text,
|
74 |
sorted_date,
|
75 |
+
domains_to_include,
|
76 |
api_key,
|
77 |
cse_id,
|
78 |
**kwargs,
|
|
|
83 |
if "items" in results and len(results["items"]) > 0:
|
84 |
for count, link in enumerate(results["items"]):
|
85 |
# skip user selected domains
|
86 |
+
if (domains_to_include is None) or not any(
|
87 |
+
("." + domain) in link["link"] for domain in domains_to_include
|
88 |
+
):
|
89 |
continue
|
90 |
url = link["link"]
|
91 |
if url not in url_list:
|
|
|
94 |
|
95 |
|
96 |
def google_search(
|
97 |
+
topic,
|
98 |
sorted_date,
|
99 |
+
domains_to_include,
|
100 |
):
|
101 |
# api_key = "AIzaSyCLyCCpOPLZWuptuPAPSg8cUIZhdEMVf6g"
|
102 |
+
# api_key = "AIzaSyA5VVwY1eEoIoflejObrxFDI0DJvtbmgW8"
|
103 |
+
api_key = "AIzaSyCLyCCpOPLZWuptuPAPSg8cUIZhdEMVf6g"
|
104 |
# api_key = "AIzaSyCS1WQDMl1IMjaXtwSd_2rA195-Yc4psQE"
|
105 |
# api_key = "AIzaSyCB61O70B8AC3l5Kk3KMoLb6DN37B7nqIk"
|
106 |
# api_key = "AIzaSyCg1IbevcTAXAPYeYreps6wYWDbU0Kz8tg"
|
107 |
# api_key = "AIzaSyA5VVwY1eEoIoflejObrxFDI0DJvtbmgW8"
|
108 |
cse_id = "851813e81162b4ed4"
|
|
|
109 |
# get list of URLS to check
|
110 |
start_time = time.perf_counter()
|
111 |
url_list = google_search_urls(
|
112 |
+
topic,
|
113 |
sorted_date,
|
114 |
+
domains_to_include,
|
115 |
api_key,
|
116 |
cse_id,
|
117 |
)
|