im commited on
Commit
bceab41
·
1 Parent(s): 14feef8

remove possilbe query parameters from the link

Browse files
Files changed (1) hide show
  1. app.py +14 -14
app.py CHANGED
@@ -18,6 +18,7 @@ from langchain.schema import (
18
  SystemMessage
19
  )
20
  import random
 
21
 
22
  set_api_key(st.secrets["ELEVENLABS_API_KEY"])
23
  crawling_api_key = st.secrets["CRAWLING_API_KEY"]
@@ -40,14 +41,13 @@ def get_llm(model_name, model_temperature, api_key, max_tokens=None):
40
  openai_api_key=api_key)
41
 
42
 
43
- def is_valid_web_link(text):
44
- # Regular expression pattern to match a valid URL
45
- url_pattern = re.compile(
46
- r"^(https?)://"
47
- r"[\w\-]+(\.[\w\-]+)+" # Domain name (e.g., www.example.com)
48
- r"(:\d+)?(/[\w\-./?%&=]*)?$" # Optional port and path
49
- )
50
- return bool(url_pattern.match(text))
51
 
52
 
53
  @st.cache_data
@@ -182,7 +182,7 @@ def get_query_params():
182
  if 'web_url' in params:
183
  web_url = params['web_url'][0]
184
  if len(web_url) > 0:
185
- if is_valid_web_link(web_url):
186
  st.session_state.web_url = web_url
187
 
188
 
@@ -216,16 +216,16 @@ def main() -> None:
216
  st.caption(description)
217
  st.divider()
218
 
219
- content_url = st.text_input(label='Paste your link, e.g. https://expresso.today',
220
  label_visibility='collapsed',
221
  placeholder='Paste your link, e.g. https://expresso.today')
222
  col1, _, _, _, col2 = st.columns(5)
223
  col1.button("Doodle")
224
  if col2.button("Random Page"):
225
- content_url = get_random_page()
226
- if len(content_url) > 0:
227
- if is_valid_web_link(content_url):
228
- st.session_state.web_url = content_url
229
  st.experimental_rerun()
230
  else:
231
  st.warning(
 
18
  SystemMessage
19
  )
20
  import random
21
+ from urllib.parse import urlparse, urlunparse
22
 
23
  set_api_key(st.secrets["ELEVENLABS_API_KEY"])
24
  crawling_api_key = st.secrets["CRAWLING_API_KEY"]
 
41
  openai_api_key=api_key)
42
 
43
 
44
+ def is_valid_web_link(url):
45
+ parsed_url = urlparse(url)
46
+ cleaned_url = parsed_url._replace(query='')._replace(params='')
47
+ if parsed_url.scheme and parsed_url.netloc:
48
+ return urlunparse(cleaned_url)
49
+ else:
50
+ return None
 
51
 
52
 
53
  @st.cache_data
 
182
  if 'web_url' in params:
183
  web_url = params['web_url'][0]
184
  if len(web_url) > 0:
185
+ if web_url := is_valid_web_link(web_url):
186
  st.session_state.web_url = web_url
187
 
188
 
 
216
  st.caption(description)
217
  st.divider()
218
 
219
+ web_url = st.text_input(label='Paste your link, e.g. https://expresso.today',
220
  label_visibility='collapsed',
221
  placeholder='Paste your link, e.g. https://expresso.today')
222
  col1, _, _, _, col2 = st.columns(5)
223
  col1.button("Doodle")
224
  if col2.button("Random Page"):
225
+ web_url = get_random_page()
226
+ if len(web_url) > 0:
227
+ if web_url := is_valid_web_link(web_url):
228
+ st.session_state.web_url = web_url
229
  st.experimental_rerun()
230
  else:
231
  st.warning(