Spaces:

Oracle
/

OCI_X_HF

Runtime error

App Files Files Community

OCI_X_HF / Objects.py

Rainsilves

added multi_page

53dc6d5 over 2 years ago

raw

history blame

5 kB

	import streamlit as st
	import os
	import subprocess
	import sys
	import shutil
	from Crypto.PublicKey import RSA
	from datasets import load_dataset
	import pandas as pd
	import oci
	from oci import object_storage
	from oci.object_storage.models import CreateBucketDetails
	from oci.object_storage.models import CreatePreauthenticatedRequestDetails
	import pickle

	st.set_page_config(page_title="Oracle")
	st.title("Oracle x HF - by Allen Roush")
	st.caption("Upload HF Dataset to OCI Object Storage!")
	st.caption("The first of many features and integrations between HF and OCI")
	st.caption("After the OCI Object Storage Upload Functionality is finished, next step is to make HF models deployable on OCI")


	config_location = ".oci/config"
	user_ocid = ""
	tenancy_ocid = ""
	region = ""
	gen_api_key = "n"
	private_key_location = ".oci/private_key.pem"
	hf_dataset = ""





	oracle_form = st.form("configuration")
	oracle_form.write("OCI Settings")
	user_ocid = oracle_form.text_input("Enter the User OCID", "ocid1.user.oc1..aaaaaaaakhekqfxefo2a3sveid67qqlfgtrmpk5cym5oqkcgtgkhbi3elova")
	tenancy_ocid = oracle_form.text_input("Enter the tenancy ocid", "ocid1.tenancy.oc1..aaaaaaaahzy3x4boh7ipxyft2rowu2xeglvanlfewudbnueugsieyuojkldq")
	region = oracle_form.text_input("Enter the region", "us-ashburn-1")
	existing_checkbox = oracle_form.checkbox("Check this if you want to put the dataset into an existing bucket")



	oracle_submitted = oracle_form.form_submit_button("Generate API Key")



	dataset_form = st.form("dataset")
	dataset_form.write("Dataset Settings")
	dataset_name = dataset_form.text_area("Enter the name of the huggingface Dataset:", value = "biosses")
	dataset_name_2 = dataset_form.text_area("Enter the name of the config for the dataset if it has one", value = " ")
	split_name = dataset_form.text_area("Enter the name of the split of the dataset that you want to use", value = "train")
	pd_checkbox = dataset_form.checkbox("Check this if you want this to be a pandas dataframe instead of a HF Dataset Object")
	dataset_submitted = dataset_form.form_submit_button("Pull Dataset")


	def load_and_process_data(path, name, streaming, split_name):
	dataset = load_dataset(path = path, name = name, streaming=streaming, keep_in_memory = True)
	#return list(dataset)
	dataset_head = dataset[split_name]
	return dataset_head



	if oracle_submitted:
	input_str = config_location + "\n" + "Y" + "\n" + "USER" + "\n" + user_ocid + "\n" + tenancy_ocid + "\n" + region + "\n" + gen_api_key + "\n" + private_key_location

	key_input_str = " \n" + " \n"

	try:
	shutil.rmtree(".oci")
	except Exception:
	pass
	try:
	os.mkdir(".oci")
	except FileExistsError:
	pass

	open(".oci/config", "a").close()


	key = RSA.generate(2048)
	private_key = key.export_key()
	file_out = open(".oci/private_key.pem", "wb")
	file_out.write(private_key)
	file_out.close()

	public_key = key.publickey().export_key()
	file_out = open(".oci/public_key.pem", "wb")
	file_out.write(public_key)
	file_out.close()

	p = subprocess.run(["oci", "setup", "config"], text = True, input = input_str)

	cat_public = subprocess.run(["cat", ".oci/public_key.pem"], text = True, capture_output=True)
	cat_config = subprocess.run(["cat", ".oci/config"], text = True, capture_output=True)
	oracle_form.text(cat_public.stdout) ###took me SIX HOURS of debugging to figure out that the st.write command is NOT the right way to output an RSA key :(
	with oracle_form.expander("Open to see the generated OCI config file"):
	oracle_form.text(cat_config.stdout)



	if dataset_submitted:
	hf_dataset = load_and_process_data(dataset_name, dataset_name_2, False, split_name)
	if pd_checkbox:
	hf_dataset = pd.DataFrame.from_dict(hf_dataset)
	st.write(hf_dataset)
	st.write("Dataset Pulled Succesfully!")
	oci_config = oci.config.from_file(".oci/config", profile_name = "USER")
	object_storage = object_storage.ObjectStorageClient(oci_config)
	st.write("Object Storage Connected Succesfully")
	namespace = object_storage.get_namespace().data
	compartment_id = oci_config["tenancy"]
	st.write(namespace)
	bucket_name = dataset_name.replace("/", "-")
	try:
	bucket = object_storage.create_bucket(
	namespace,
	oci.object_storage.models.CreateBucketDetails(
	name=bucket_name,
	compartment_id=compartment_id,
	storage_tier='Archive',
	public_access_type='ObjectRead'
	)
	)
	st.write("Bucket Written:")
	st.write(bucket)
	except Exception:
	st.write("Bucket Exists, Writing Dataset to Bucket")

	st.write("Uploading new object if it doesn't already exist {!r}".format(hf_dataset))
	hf_bytes = pickle.dumps(hf_dataset)
	obj = object_storage.put_object(
	namespace,
	bucket_name,
	bucket_name,
	hf_bytes)
	st.write("Object Pushed Succesfully!")











	#if test_connection_button: