Spaces:

Sujithanumala
/

Wiki_page

Sleeping

Wiki_page / get_OIC_Documents.py

Upload 13 files

13b889e verified 3 months ago

1.19 kB

	import requests
	from bs4 import BeautifulSoup

	response = requests.get("https://docs.oracle.com/en/cloud/paas/integration-cloud/books.html")
	# Check if the request was successful
	if response.status_code == 200:
	# Access the content of the response
	html_content = response.text
	print(html_content)
	else:
	print("Failed to retrieve data")




	soup = BeautifulSoup(html_content, 'html.parser')

	book_links = soup.find_all('a', class_=False, parent=soup.find('div', class_="book"))
	pdf_links = []
	for link in book_links:
	if link.has_attr('href') and link['href'].endswith('.pdf'):
	pdf_links.append(link['href'])

	# Print the extracted PDF links
	print(pdf_links)

	base_url = 'https://docs.oracle.com/en/cloud/paas/integration-cloud/' # Replace with the actual base URL
	for pdf_link in pdf_links:
	full_pdf_url = base_url + pdf_link
	# Download the PDF file
	response = requests.get(full_pdf_url)
	if response.status_code == 200:
	with open('Documentation_OIC/'+pdf_link.split('/')[-1], 'wb') as f:
	f.write(response.content)
	print('PDF downloaded successfully.')
	else:
	print('Failed to download PDF.')