File size: 4,774 Bytes
2652d5f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
754c341
2652d5f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8bf4041
e5387db
2652d5f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8bf4041
2652d5f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import streamlit as st
import os
import subprocess
import sys
import shutil
from Crypto.PublicKey import RSA
from datasets import load_dataset
import pandas as pd
import oci
from oci import object_storage
from oci.object_storage.models import CreateBucketDetails
from oci.object_storage.models import CreatePreauthenticatedRequestDetails
import pickle

st.set_page_config(page_title="Oracle")
st.title("Oracle")
st.caption("Upload HF Dataset to OCI Object Storage!")


config_location = ".oci/config"
user_ocid = ""
tenancy_ocid = ""
region = ""
gen_api_key = "n"
private_key_location = ".oci/private_key.pem"
hf_dataset = ""





oracle_form =  st.form("configuration")
oracle_form.write("OCI Settings")
user_ocid = oracle_form.text_input("Enter the User OCID", "ocid1.user.oc1..aaaaaaaakhekqfxefo2a3sveid67qqlfgtrmpk5cym5oqkcgtgkhbi3elova")
tenancy_ocid = oracle_form.text_input("Enter the tenancy ocid", "ocid1.tenancy.oc1..aaaaaaaahzy3x4boh7ipxyft2rowu2xeglvanlfewudbnueugsieyuojkldq")
region = oracle_form.text_input("Enter the region", "us-ashburn-1")
existing_checkbox = oracle_form.checkbox("Check this if you want to put the dataset into an existing bucket")



oracle_submitted = oracle_form.form_submit_button("Generate API Key")



dataset_form  =  st.form("dataset")
dataset_form.write("Dataset Settings")
dataset_name = dataset_form.text_area("Enter the name of the huggingface Dataset:", value = "biosses")
dataset_name_2 = dataset_form.text_area("Enter the name of the config for the dataset if it has one", value = " ")
split_name = dataset_form.text_area("Enter the name of the split of the dataset that you want to use", value = "train")
pd_checkbox = dataset_form.checkbox("Check this if you want this to be a pandas dataframe instead of a HF Dataset Object")
dataset_submitted = dataset_form.form_submit_button("Pull Dataset")


def load_and_process_data(path, name, streaming, split_name):
    dataset = load_dataset(path = path, name = name, streaming=streaming, keep_in_memory = True)
    #return list(dataset)
    dataset_head = dataset[split_name]
    return dataset_head


    
if oracle_submitted:
    input_str = config_location + "\n" + "Y" + "\n" + "USER" + "\n" + user_ocid + "\n" + tenancy_ocid + "\n" + region + "\n" + gen_api_key + "\n" + private_key_location

    key_input_str = " \n" + " \n"

    try:
        shutil.rmtree(".oci")
    except Exception:
        pass
    try:
        os.mkdir(".oci")
    except FileExistsError:
        pass

    open(".oci/config", "a").close()


    key = RSA.generate(2048)
    private_key = key.export_key()
    file_out = open(".oci/private_key.pem", "wb")
    file_out.write(private_key)
    file_out.close()

    public_key = key.publickey().export_key()
    file_out = open(".oci/public_key.pem", "wb")
    file_out.write(public_key)
    file_out.close()

    p = subprocess.run(["oci", "setup", "config"], text = True, input = input_str)

    cat_public = subprocess.run(["cat", ".oci/public_key.pem"], text = True, capture_output=True)
    cat_config = subprocess.run(["cat", ".oci/config"], text = True, capture_output=True)
    oracle_form.text(cat_public.stdout) ###took me SIX HOURS of debugging to figure out that the st.write command is NOT the right way to output an RSA key :(
    with oracle_form.expander("Open to see the generated OCI config file"):
        oracle_form.text(cat_config.stdout)



if dataset_submitted:
    hf_dataset = load_and_process_data(dataset_name, dataset_name_2, False, split_name)
    if pd_checkbox:
        hf_dataset  = pd.DataFrame.from_dict(hf_dataset)
    st.write(hf_dataset)
    st.write("Dataset Pulled Succesfully!")
    oci_config = oci.config.from_file(".oci/config", profile_name = "USER")
    object_storage = object_storage.ObjectStorageClient(oci_config)
    st.write("Object Storage Connected Succesfully")
    namespace = object_storage.get_namespace().data
    compartment_id = oci_config["tenancy"]
    st.write(namespace)
    bucket_name = dataset_name.replace("/", "-")
    try:
        bucket = object_storage.create_bucket(
        namespace,
        oci.object_storage.models.CreateBucketDetails(
            name=bucket_name,
            compartment_id=compartment_id,
            storage_tier='Archive',
            public_access_type='ObjectRead'
        )
    )
        st.write("Bucket Written:")
        st.write(bucket)
    except Exception:
        st.write("Bucket Exists, Writing Dataset to Bucket")

    st.write("Uploading new object if it doesn't already exist {!r}".format(hf_dataset))
    hf_bytes = pickle.dumps(hf_dataset)
    obj = object_storage.put_object(
    namespace,
    bucket_name,
    bucket_name,
    hf_bytes)
    st.write("Object Pushed Succesfully!")





        





#if test_connection_button: