Wiki_page / get_OIC_Documents.py
Sujithanumala's picture
Upload 13 files
13b889e verified
import requests
from bs4 import BeautifulSoup
response = requests.get("https://docs.oracle.com/en/cloud/paas/integration-cloud/books.html")
# Check if the request was successful
if response.status_code == 200:
# Access the content of the response
html_content = response.text
print(html_content)
else:
print("Failed to retrieve data")
soup = BeautifulSoup(html_content, 'html.parser')
book_links = soup.find_all('a', class_=False, parent=soup.find('div', class_="book"))
pdf_links = []
for link in book_links:
if link.has_attr('href') and link['href'].endswith('.pdf'):
pdf_links.append(link['href'])
# Print the extracted PDF links
print(pdf_links)
base_url = 'https://docs.oracle.com/en/cloud/paas/integration-cloud/' # Replace with the actual base URL
for pdf_link in pdf_links:
full_pdf_url = base_url + pdf_link
# Download the PDF file
response = requests.get(full_pdf_url)
if response.status_code == 200:
with open('Documentation_OIC/'+pdf_link.split('/')[-1], 'wb') as f:
f.write(response.content)
print('PDF downloaded successfully.')
else:
print('Failed to download PDF.')