Spaces:

kmaurinjones
/

Wiki-Game

Sleeping

App Files Files Community

KAI MAURIN-JONES commited on Jul 26, 2023

Commit

bf84cfc

•

1 Parent(s): 657dfe4

app updated

Browse files

Files changed (3) hide show

app.py +1 -1
wiki_game_st_bs4.py +101 -0
wiki_game_st.py → wiki_game_st_sel.py +0 -0

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import streamlit as st
-from wiki_game_st import *
 # Set the title of the app
 st.title("Wiki Game (BETA)")

 import streamlit as st
+from wiki_game_st_bs4 import *
 # Set the title of the app
 st.title("Wiki Game (BETA)")

wiki_game_st_bs4.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import requests
+from bs4 import BeautifulSoup
+import time
+import tensorflow as tf
+import tensorflow_hub as hub
+import numpy as np
+import jellyfish
+import streamlit as st
+# Load the pre-trained Universal Sentence Encoder
+embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")
+def calculate_jaro_similarity(str1, str2):
+    jaro_similarity = jellyfish.jaro_distance(str1, str2)
+    return jaro_similarity
+def most_similar_sentence(target_topic, labels_list):
+    context_embedding = embed([target_topic])[0]
+    sentence_embeddings = embed(labels_list)
+    similarities = np.inner(context_embedding, sentence_embeddings)
+    most_similar_index = np.argmax(similarities)
+    return labels_list[most_similar_index], similarities[most_similar_index], most_similar_index
+def get_wikipedia_page(query):
+    response = requests.get(f"https://en.wikipedia.org/wiki/{query}")
+    return response.text
+def get_topic_context(page_source):
+    soup = BeautifulSoup(page_source, 'html.parser')
+    first_paragraph = soup.select_one("div.mw-parser-output > p:not(.mw-empty-elt)").text
+    context_sentence = first_paragraph.split(". ")[0]
+    return context_sentence
+def play_wiki_game(starting_topic: str, target_topic: str, limit: int = 100):
+    topic = starting_topic
+    num_pages = 0
+    used_topics = []
+    used_links = []
+    start_time = time.time()
+    st.write("-" * 150)
+    st.write(f"\nStarting!\n")
+    st.write("-" * 150)
+    page_source = get_wikipedia_page(starting_topic)
+    used_links.append(f"https://en.wikipedia.org/wiki/{starting_topic}")
+    while True:
+        num_pages += 1
+        if num_pages > 1:
+            page_source = get_wikipedia_page(topic)
+        context_sentence = get_topic_context(page_source)
+        links_texts = []
+        soup = BeautifulSoup(page_source, 'html.parser')
+        links = soup.find_all('a')
+        for link in links:
+            link_url = link.get('href')
+            if link_url and link_url.startswith("/wiki/"):
+                link_url = "https://en.wikipedia.org" + link_url
+                link_text = link.text.strip()
+                if link_text and topic.lower() not in link_url.lower() and link_url not in used_links and link_text not in used_topics:
+                    if "en.wikipedia.org/wiki/" in link_url and ":" not in "".join(link_url.split("/")[1:]) and "Main_Page" != str(link_url.split("/")[-1]):
+                        links_texts.append((link_url, link_text))
+        best_label, best_score, loc_idx = most_similar_sentence(target_topic = target_topic, labels_list = [text for link, text in links_texts])
+        st.write(f"\nPage: {num_pages}")
+        st.write(f"Current topic: '{topic.title()}'")
+        st.write(f"Current URL: 'https://en.wikipedia.org/wiki/{topic}'")
+        st.write(f"Current Topic Context: '{context_sentence}'")
+        st.write(f"Next topic: '{best_label.title()}'. Semantic similarity to '{target_topic.title()}': {round((best_score * 100), 2)}%")
+        next_link, topic = links_texts[loc_idx]
+        if target_topic.lower() == topic.lower() or calculate_jaro_similarity(target_topic.lower(), topic.lower()) > 0.9 or best_score > float(0.90):
+            st.write("\n" + "-" * 150)
+            st.write(f"\nFrom '{starting_topic.title()}', to '{target_topic.title()}' in {num_pages} pages, {round(time.time() - start_time, 2)} seconds!")
+            st.write(f"Starting topic: '{starting_topic.title()}': 'https://en.wikipedia.org/wiki/{starting_topic}'")
+            st.write(f"Target topic: '{target_topic.title()}': '{used_links[-1]}'\n")
+            st.write("-" * 150)
+            break
+        if num_pages == limit:
+            st.write("\n" + "-" * 150)
+            st.write(f"\nUnfortunately, the model couldn't get from '{starting_topic.title()}', to '{target_topic.title()}' in {num_pages} pages or less.")
+            st.write(f"In {round(time.time() - start_time, 2)} seconds, it got from '{starting_topic.title()}': 'https://en.wikipedia.org/wiki/{starting_topic}', to '{target_topic.title()}': '{used_links[-1]}'")
+            st.write(f"\nTry a different combination to see if it can do it!\n")
+            st.write("-" * 150)
+            break
+        used_links.append(next_link)
+        used_topics.append(topic)
+# starting_topic = "soulja boy"
+# target_topic = "test"
+# play_wiki_game(starting_topic = starting_topic, target_topic = target_topic, limit = 50)

wiki_game_st.py → wiki_game_st_sel.py RENAMED Viewed

File without changes