Hellisotherpeople commited on
Commit
2652d5f
1 Parent(s): d7bb8fd

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -0
app.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import subprocess
4
+ import sys
5
+ import shutil
6
+ from Crypto.PublicKey import RSA
7
+ from datasets import load_dataset
8
+ import pandas as pd
9
+ import oci
10
+ from oci import object_storage
11
+ from oci.object_storage.models import CreateBucketDetails
12
+ from oci.object_storage.models import CreatePreauthenticatedRequestDetails
13
+ import pickle
14
+
15
+ st.set_page_config(page_title="Oracle")
16
+ st.title("Oracle")
17
+ #st.caption("By Allen Roush")
18
+
19
+
20
+ config_location = ".oci/config"
21
+ user_ocid = ""
22
+ tenancy_ocid = ""
23
+ region = ""
24
+ gen_api_key = "n"
25
+ private_key_location = ".oci/private_key.pem"
26
+ hf_dataset = ""
27
+
28
+
29
+
30
+
31
+
32
+ oracle_form = st.form("configuration")
33
+ oracle_form.write("OCI Settings")
34
+ user_ocid = oracle_form.text_input("Enter the User OCID", "ocid1.user.oc1..aaaaaaaakhekqfxefo2a3sveid67qqlfgtrmpk5cym5oqkcgtgkhbi3elova")
35
+ tenancy_ocid = oracle_form.text_input("Enter the tenancy ocid", "ocid1.tenancy.oc1..aaaaaaaahzy3x4boh7ipxyft2rowu2xeglvanlfewudbnueugsieyuojkldq")
36
+ region = oracle_form.text_input("Enter the region", "us-ashburn-1")
37
+ existing_checkbox = oracle_form.checkbox("Check this if you want to put the dataset into an existing bucket")
38
+
39
+
40
+
41
+ oracle_submitted = oracle_form.form_submit_button("Generate API Key")
42
+
43
+
44
+
45
+ dataset_form = st.form("dataset")
46
+ dataset_form.write("Dataset Settings")
47
+ dataset_name = dataset_form.text_area("Enter the name of the huggingface Dataset:", value = "Hellisotherpeople/DebateSum")
48
+ dataset_name_2 = dataset_form.text_area("Enter the name of the config for the dataset if it has one", value = "")
49
+ split_name = dataset_form.text_area("Enter the name of the split of the dataset that you want to use", value = "train")
50
+ pd_checkbox = dataset_form.checkbox("Check this if you want this to be a pandas dataframe instead of a HF Dataset Object")
51
+ dataset_submitted = dataset_form.form_submit_button("Pull Dataset")
52
+
53
+
54
+ def load_and_process_data(path, name, streaming, split_name):
55
+ dataset = load_dataset(path = path, name = name, streaming=streaming, keep_in_memory = True)
56
+ #return list(dataset)
57
+ dataset_head = dataset[split_name]
58
+ return dataset_head
59
+
60
+
61
+
62
+ if oracle_submitted:
63
+ input_str = config_location + "\n" + "Y" + "\n" + "USER" + "\n" + user_ocid + "\n" + tenancy_ocid + "\n" + region + "\n" + gen_api_key + "\n" + private_key_location
64
+
65
+ key_input_str = " \n" + " \n"
66
+
67
+ try:
68
+ shutil.rmtree(".oci")
69
+ except Exception:
70
+ pass
71
+ try:
72
+ os.mkdir(".oci")
73
+ except FileExistsError:
74
+ pass
75
+
76
+ open(".oci/config", "a").close()
77
+
78
+
79
+ key = RSA.generate(2048)
80
+ private_key = key.export_key()
81
+ file_out = open(".oci/private_key.pem", "wb")
82
+ file_out.write(private_key)
83
+ file_out.close()
84
+
85
+ public_key = key.publickey().export_key()
86
+ file_out = open(".oci/public_key.pem", "wb")
87
+ file_out.write(public_key)
88
+ file_out.close()
89
+
90
+ p = subprocess.run(["oci", "setup", "config"], text = True, input = input_str)
91
+
92
+ cat_public = subprocess.run(["cat", ".oci/public_key.pem"], text = True, capture_output=True)
93
+ cat_config = subprocess.run(["cat", ".oci/config"], text = True, capture_output=True)
94
+ oracle_form.text(cat_public.stdout) ###took me SIX HOURS of debugging to figure out that the st.write command is NOT the right way to output an RSA key :(
95
+ with oracle_form.expander("Open to see the generated OCI config file"):
96
+ oracle_form.text(cat_config.stdout)
97
+
98
+
99
+
100
+ if dataset_submitted:
101
+ hf_dataset = load_and_process_data(dataset_name, dataset_name_2, False, split_name)
102
+ if pd_checkbox:
103
+ hf_dataset = pd.DataFrame.from_dict(hf_dataset)
104
+ st.write(hf_dataset)
105
+ st.write("Dataset Pulled Succesfully!")
106
+ oci_config = oci.config.from_file("~/HF_X_OCI/.oci/config", profile_name = "USER")
107
+ object_storage = object_storage.ObjectStorageClient(oci_config)
108
+ st.write("Object Storage Connected Succesfully")
109
+ namespace = object_storage.get_namespace().data
110
+ compartment_id = oci_config["tenancy"]
111
+ st.write(namespace)
112
+ bucket_name = dataset_name.replace("/", "-")
113
+ try:
114
+ bucket = object_storage.create_bucket(
115
+ namespace,
116
+ oci.object_storage.models.CreateBucketDetails(
117
+ name=bucket_name,
118
+ compartment_id=compartment_id,
119
+ storage_tier='Archive',
120
+ public_access_type='ObjectRead'
121
+ )
122
+ )
123
+ st.write("Bucket Written:")
124
+ st.write(bucket)
125
+ except Exception:
126
+ st.write("Bucket Exists, Writing Dataset to Bucket")
127
+
128
+ st.write("Uploading new object if it doesn't already exist {!r}".format(hf_dataset))
129
+ hf_bytes = pickle.dumps(hf_dataset)
130
+ obj = object_storage.put_object(
131
+ namespace,
132
+ bucket_name,
133
+ bucket_name,
134
+ hf_bytes)
135
+ st.write("Object Pushed Succesfully!")
136
+
137
+
138
+
139
+
140
+
141
+
142
+
143
+
144
+
145
+
146
+
147
+ #if test_connection_button: