Spaces:
Runtime error
Runtime error
sashavor
commited on
Commit
β’
1f6c998
1
Parent(s):
11ad112
fixing bugs
Browse files
app.py
CHANGED
@@ -11,6 +11,7 @@ CACHED_FILE_PATH = hf_hub_download(repo_id="sasha/co2_submissions", filename="co
|
|
11 |
api = HfApi()
|
12 |
|
13 |
def write_to_csv(hardware, gpu_tdp, num_gpus, training_time, provider, carbon_intensity, dynamic_emissions, experimentation_time, experimental_emissions, pue, pue_emissions, embodied_type, embodied_emissions, model_info):
|
|
|
14 |
with open(CACHED_FILE_PATH,'a', newline='') as f:
|
15 |
writer = csv.writer(f)
|
16 |
writer.writerow([hardware, gpu_tdp, num_gpus, training_time, provider, carbon_intensity, dynamic_emissions, experimentation_time, experimental_emissions, pue, pue_emissions, embodied_type, embodied_emissions, model_info])
|
@@ -67,6 +68,7 @@ model_info = "N/A"
|
|
67 |
### Conversion factors
|
68 |
kg_per_mile = 0.348
|
69 |
embodied_conversion_factor = 0.0289
|
|
|
70 |
|
71 |
electricity = pd.read_csv(electricity_url)
|
72 |
servers = pd.read_csv(server_url)
|
@@ -86,47 +88,46 @@ with st.expander("Calculate the dynamic emissions of your model"):
|
|
86 |
with col1:
|
87 |
hardware = st.selectbox('Hardware used', TDP['name'].tolist())
|
88 |
gpu_tdp = TDP['tdp_watts'][TDP['name'] == hardware].tolist()[0]
|
89 |
-
st.markdown("Different hardware has different
|
90 |
with col2:
|
91 |
num_gpus = st.text_input('Number of GPUs/CPUs/TPUs used', value = 16)
|
92 |
-
|
93 |
-
# 'i.e. if you used 100 GPUs for 10 hours, this is equal to 100x10 = 1,000 GPU hours.')
|
94 |
with col3:
|
95 |
training_time = st.number_input('Total training time (in hours)', value = 0.0)
|
|
|
96 |
with col4:
|
97 |
provider = st.selectbox('Provider used', providers)
|
98 |
st.markdown('If you can\'t find your provider here, select "Local/Private Infrastructure".')
|
99 |
with col5:
|
100 |
if provider != 'Local/Private Infastructure':
|
101 |
provider_instances = instances['region'][instances['provider'] == provider.lower()].unique().tolist()
|
102 |
-
region = st.selectbox('
|
103 |
carbon_intensity = instances['impact'][(instances['provider'] == provider.lower()) & (instances['region'] == region)].tolist()[0]
|
104 |
-
|
105 |
else:
|
106 |
carbon_intensity = st.number_input('Carbon intensity of your energy grid, in grams of CO2 per kWh')
|
107 |
st.markdown('You can consult a resource like the [IEA](https://www.iea.org/countries) or '
|
108 |
' [Electricity Map](https://app.electricitymaps.com/) to get this information.')
|
109 |
dynamic_emissions = round(gpu_tdp * float(num_gpus)*training_time * carbon_intensity/1000000)
|
110 |
st.metric(label="Dynamic emissions", value=str(dynamic_emissions)+' kilograms of CO2eq')
|
111 |
-
st.
|
112 |
' produced in 2021. [(Source: energy.gov)](https://www.energy.gov/eere/vehicles/articles/fotw-1223-january-31-2022-average-carbon-dioxide-emissions-2021-model-year)')
|
113 |
|
114 |
st.markdown('### Experimental Emissions π©βπ¬')
|
115 |
st.markdown('##### These are the emissions produced by generating the electricity necessary for powering the experiments and tests needed to pick your final model architecture '
|
116 |
'and parameters.')
|
117 |
with st.expander("Calculate the experimental emissions of your model"):
|
118 |
-
st.
|
119 |
experimentation_time = st.number_input(label='Number of hours of experimentation run', value=training_time)
|
120 |
st.markdown('##### As a baseline, language models such as [OPT](https://arxiv.org/pdf/2205.01068.pdf) and [BLOOM](https://arxiv.org/abs/2211.02001)'
|
121 |
' found that experimentation roughly doubles the amount of compute used by training the model itself.')
|
122 |
experimental_emissions = round(gpu_tdp * (experimentation_time) * carbon_intensity/1000000)
|
123 |
-
st.metric(label="Experimental emissions", value=str(
|
124 |
|
125 |
st.markdown('### Datacenter (Overhead) Emissions π')
|
126 |
st.markdown('##### These are the emissions produced by generating the electricity needed to power the rest of the infrastructure'
|
127 |
'used for model training -- the datacenter, network, heating/cooling, storage, etc.')
|
128 |
with st.expander("Calculate the idle emissions of your model"):
|
129 |
-
st.
|
130 |
' the ratio of energy used for computing overheads like cooling, which varies depending on the data center.')
|
131 |
pue = instances['PUE'][(instances['provider'] == provider.lower()) & (instances['region'] == region)].tolist()[0]
|
132 |
if math.isnan(pue) == True:
|
@@ -186,9 +187,13 @@ div.stButton > button:first-child {
|
|
186 |
width: 15em;
|
187 |
}
|
188 |
</style>""", unsafe_allow_html=True)
|
|
|
189 |
buttoncol1, buttoncol2, buttoncol3 = st.columns(3)
|
190 |
with buttoncol2:
|
191 |
-
st.
|
|
|
|
|
|
|
192 |
|
193 |
st.markdown('### Methodology')
|
194 |
with st.expander("More information about our Methodology"):
|
|
|
11 |
api = HfApi()
|
12 |
|
13 |
def write_to_csv(hardware, gpu_tdp, num_gpus, training_time, provider, carbon_intensity, dynamic_emissions, experimentation_time, experimental_emissions, pue, pue_emissions, embodied_type, embodied_emissions, model_info):
|
14 |
+
st.session_state["is_shared"] = True
|
15 |
with open(CACHED_FILE_PATH,'a', newline='') as f:
|
16 |
writer = csv.writer(f)
|
17 |
writer.writerow([hardware, gpu_tdp, num_gpus, training_time, provider, carbon_intensity, dynamic_emissions, experimentation_time, experimental_emissions, pue, pue_emissions, embodied_type, embodied_emissions, model_info])
|
|
|
68 |
### Conversion factors
|
69 |
kg_per_mile = 0.348
|
70 |
embodied_conversion_factor = 0.0289
|
71 |
+
st.session_state["is_shared"] = False
|
72 |
|
73 |
electricity = pd.read_csv(electricity_url)
|
74 |
servers = pd.read_csv(server_url)
|
|
|
88 |
with col1:
|
89 |
hardware = st.selectbox('Hardware used', TDP['name'].tolist())
|
90 |
gpu_tdp = TDP['tdp_watts'][TDP['name'] == hardware].tolist()[0]
|
91 |
+
st.markdown("Different hardware has different efficiencies, which impacts how much energy you use.")
|
92 |
with col2:
|
93 |
num_gpus = st.text_input('Number of GPUs/CPUs/TPUs used', value = 16)
|
94 |
+
st.markdown('If you can\'t find your hardware in the list, select the closest similar model.')
|
|
|
95 |
with col3:
|
96 |
training_time = st.number_input('Total training time (in hours)', value = 0.0)
|
97 |
+
st.markdown('You can find this number in your training logs or TensorBoards')
|
98 |
with col4:
|
99 |
provider = st.selectbox('Provider used', providers)
|
100 |
st.markdown('If you can\'t find your provider here, select "Local/Private Infrastructure".')
|
101 |
with col5:
|
102 |
if provider != 'Local/Private Infastructure':
|
103 |
provider_instances = instances['region'][instances['provider'] == provider.lower()].unique().tolist()
|
104 |
+
region = st.selectbox('Region used', provider_instances)
|
105 |
carbon_intensity = instances['impact'][(instances['provider'] == provider.lower()) & (instances['region'] == region)].tolist()[0]
|
|
|
106 |
else:
|
107 |
carbon_intensity = st.number_input('Carbon intensity of your energy grid, in grams of CO2 per kWh')
|
108 |
st.markdown('You can consult a resource like the [IEA](https://www.iea.org/countries) or '
|
109 |
' [Electricity Map](https://app.electricitymaps.com/) to get this information.')
|
110 |
dynamic_emissions = round(gpu_tdp * float(num_gpus)*training_time * carbon_intensity/1000000)
|
111 |
st.metric(label="Dynamic emissions", value=str(dynamic_emissions)+' kilograms of CO2eq')
|
112 |
+
st.info('This is roughly equivalent to '+ str(round(dynamic_emissions/kg_per_mile,1)) + ' miles driven in an average US car'
|
113 |
' produced in 2021. [(Source: energy.gov)](https://www.energy.gov/eere/vehicles/articles/fotw-1223-january-31-2022-average-carbon-dioxide-emissions-2021-model-year)')
|
114 |
|
115 |
st.markdown('### Experimental Emissions π©βπ¬')
|
116 |
st.markdown('##### These are the emissions produced by generating the electricity necessary for powering the experiments and tests needed to pick your final model architecture '
|
117 |
'and parameters.')
|
118 |
with st.expander("Calculate the experimental emissions of your model"):
|
119 |
+
st.info('Consult your training logs to figure out how many ablations, baselines and experiments were run before converging on the final model.')
|
120 |
experimentation_time = st.number_input(label='Number of hours of experimentation run', value=training_time)
|
121 |
st.markdown('##### As a baseline, language models such as [OPT](https://arxiv.org/pdf/2205.01068.pdf) and [BLOOM](https://arxiv.org/abs/2211.02001)'
|
122 |
' found that experimentation roughly doubles the amount of compute used by training the model itself.')
|
123 |
experimental_emissions = round(gpu_tdp * (experimentation_time) * carbon_intensity/1000000)
|
124 |
+
st.metric(label="Experimental emissions", value=str(experimental_emissions)+' kilograms of CO2eq')
|
125 |
|
126 |
st.markdown('### Datacenter (Overhead) Emissions π')
|
127 |
st.markdown('##### These are the emissions produced by generating the electricity needed to power the rest of the infrastructure'
|
128 |
'used for model training -- the datacenter, network, heating/cooling, storage, etc.')
|
129 |
with st.expander("Calculate the idle emissions of your model"):
|
130 |
+
st.info('A proxy often used to reflect idle emissions is PUE (Power Usage Effectiveness), which represents '
|
131 |
' the ratio of energy used for computing overheads like cooling, which varies depending on the data center.')
|
132 |
pue = instances['PUE'][(instances['provider'] == provider.lower()) & (instances['region'] == region)].tolist()[0]
|
133 |
if math.isnan(pue) == True:
|
|
|
187 |
width: 15em;
|
188 |
}
|
189 |
</style>""", unsafe_allow_html=True)
|
190 |
+
|
191 |
buttoncol1, buttoncol2, buttoncol3 = st.columns(3)
|
192 |
with buttoncol2:
|
193 |
+
if st.session_state["is_shared"] == False:
|
194 |
+
submitted = st.button(label="Share my CO2 data!", on_click = lambda *args: write_to_csv(hardware, gpu_tdp, num_gpus, training_time, provider, carbon_intensity, dynamic_emissions, experimentation_time, experimental_emissions, pue, pue_emissions, embodied_type, embodied_emissions, model_info))
|
195 |
+
else:
|
196 |
+
st.info('Thank you! Your data has been shared in https://huggingface.co/datasets/sasha/co2_submissions.')
|
197 |
|
198 |
st.markdown('### Methodology')
|
199 |
with st.expander("More information about our Methodology"):
|