Spaces:
Runtime error
Runtime error
File size: 10,662 Bytes
32e5892 3f4a706 06d8848 32e5892 06d8848 32e5892 06d8848 32e5892 9d736cc 32e5892 cc0aa37 32e5892 cc0aa37 32e5892 cc0aa37 06d8848 3f4a706 06d8848 3f4a706 06d8848 32e5892 06d8848 32e5892 3f4a706 32e5892 06d8848 32e5892 06d8848 3f4a706 cc0aa37 06d8848 32e5892 06d8848 32e5892 06d8848 32e5892 06d8848 32e5892 06d8848 32e5892 06d8848 32e5892 cc0aa37 32e5892 cc0aa37 32e5892 06d8848 3f4a706 06d8848 32e5892 06d8848 32e5892 06d8848 32e5892 06d8848 32e5892 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 |
# IMPORTING TOOLS
import streamlit as st
from rdflib import Graph, Literal
from rdflib.plugins.sparql import prepareQuery
import pandas as pd
import plotly.express as px
import numpy as np
# SET PAGE SETTINGS
st.set_page_config(page_title='Amusement Accidents', layout="centered")
# METHOD TO LOAD THE RDF
@st.cache(persist=True)
def importRDF(filename, format):
graph = Graph().parse(filename, format)
return graph
# IMPORTING THE RDF
with st.spinner('Loading all the stuffs...'):
graph = importRDF("rdf-dataset.ttl", "ttl")
# METHOD TO CONVERT THE QUERY RESULT INTO A DATAFRAME
def sparql_results_to_df(results):
return pd.DataFrame(
data=([None if x is None else x.toPython() for x in row] for row in results),
columns=[str(x) for x in results.vars],
)
# METHOD TO EXECUTE A GENERIC QUERY
def computeQuery(query, executor):
result = executor.query(query)
res_df = sparql_results_to_df(result)
return res_df
# METHOD TO EXECUTE A PARAMETRIC QUERY
def rideAccidentDescription(ride_name, executor):
ride_name = Literal(ride_name)
query = """
PREFIX ride_type: <http://example.org/ride_type#>
PREFIX acc: <http://example.org/accident#>
PREFIX ride: <http://example.org/ride#>
SELECT (?manuf AS ?Manufacturer) (?description AS ?Accident_Description)
WHERE {
?instance acc:description ?description ;
acc:ref-ride_id ?ride_id .
?ride_id ride:name ?name ;
ride:manufacturer ?manuf .
FILTER (?name = ?ride_name)
}
"""
prep_query = prepareQuery(query)
r = executor.query(prep_query, initBindings={'ride_name': ride_name})
return sparql_results_to_df(r), query
# PROCESSING & DISPLAY
def display():
with st.container():
st.write("#### What are the months with the highest number of accidents?")
res = computeQuery(query_5, graph)
fig = px.bar(res, x="mon", y="count", color="count", labels={"mon":"Month", "count":"Num. of Accidents"}, text_auto="True")
fig.update_xaxes(type="category")
fig.update_yaxes(showticklabels=False)
st.plotly_chart(fig, use_container_width=True)
with st.expander("Show query"):
st.code(query_5, language="sparql")
st.markdown("---")
with st.container():
st.write("#### Which cities and states have recorded the most accidents?")
res = computeQuery(query_8, graph)
fig = px.treemap(res, path=[px.Constant("U.S"), "state", "city"], values="count", hover_data=["state", "city","count"],
color="count",
color_continuous_scale='tealrose',
color_continuous_midpoint=np.average(res['count'], weights=res['count']))
st.plotly_chart(fig, use_container_width=True)
with st.expander("Show query"):
st.code(query_8, language="sparql")
st.markdown("---")
with st.container():
st.write("#### What incidents have occurred on your favorite ride?")
ride_names = computeQuery(query_0, graph)
option = st.selectbox("Select a Ride", options=ride_names)
res, query = rideAccidentDescription(option, graph)
res_count = res.count()[0]
if (res_count < 3):
st.table(res)
else:
limit = st.slider("Num. of Accidents to Visualize", 1, int(res_count), 2, 1)
st.table(res[:limit])
with st.expander("Show query"):
st.code(query, language="sparql")
st.markdown("---")
with st.container():
st.write("#### What Are the Most Common Categories of Accidents?")
res = computeQuery(query_4, graph)
fig = px.treemap(res, path=[px.Constant("Accident Category"), "category_name"], values="count", hover_data=["category_name","count"])
st.plotly_chart(fig, use_container_width=True)
with st.expander("Show query"):
st.code(query_4, language="sparql")
st.markdown("---")
with st.container():
st.write("#### What are the Most Dangerous Ride Categories?")
res = computeQuery(query_6, graph)
fig = px.pie(res, names="amus_cat_name", values="count", hole=.4)
st.plotly_chart(fig, use_container_width=True)
with st.expander("Show query"):
st.code(query_6, language="sparql")
st.markdown("---")
with st.container():
st.write("#### What are the Most Dangerous Ride Types?")
res = computeQuery(query_3, graph)
fig = px.bar(res, x="type_name", y="count", labels={"type_name":"Ride Type", "count":"Num. of Accidents"}, text_auto=True)
fig.update_xaxes(tickangle=45)
st.plotly_chart(fig, use_container_width=True)
with st.expander("Show query"):
st.code(query_3, language="sparql")
st.markdown("---")
with st.container():
st.write("#### How many people are generally involved in an accident?")
res = computeQuery(query_1, graph)
fig = px.bar(res, x="num_inj", y="count", labels={"num_inj":"Injured People", "count":"Num. of Accidents"}, text_auto=True)
fig.update_xaxes(type="category")
st.plotly_chart(fig, use_container_width=True)
with st.expander("Show query"):
st.code(query_1, language="sparql")
st.markdown("---")
return None
# ANALYTICAL QUERIES DEFINITION
# get the names of all the rides
query_0 = """
PREFIX ride:<http://example.org/ride#>
SELECT DISTINCT ?name
WHERE {
?ride ride:name ?name .
}
"""
# num of accidents per injured people
query_1 = """
PREFIX r:<http://example.org/ride#>
PREFIX a:<http://example.org/accident#>
SELECT ?num_inj (COUNT(?num_inj) AS ?count)
WHERE {
?acc a:num_injured ?num_inj .
}
GROUP BY ?num_inj
ORDER BY (?num_inj)
"""
# manufacturers of the rides subjected to most accidents
query_2 = """
PREFIX acc: <http://example.org/accident#>
PREFIX ride: <http://example.org/ride#>
SELECT ?ride_manuf (COUNT(?ride_manuf) AS ?count)
WHERE {
?instance acc:ref-ride_id ?ride_id .
?ride_id ride:manufacturer ?ride_manuf
}
GROUP BY ?ride_manuf
ORDER BY DESC(?count)
"""
# Top n types of rides most subjected to accidents
query_3 = """
PREFIX ride_type: <http://example.org/ride_type#>
PREFIX acc: <http://example.org/accident#>
PREFIX ride: <http://example.org/ride#>
SELECT ?type_name (COUNT(?type_name) AS ?count)
WHERE {
?instance acc:ref-ride_id ?ride_id .
?ride_id ride:ref-ride_type_id ?type_id .
?type_id ride_type:type ?type_name .
}
GROUP BY ?type_name
ORDER BY DESC(?count)
LIMIT 7
"""
# Top 6 categories of rides most subjected to accidents
query_6 = """
PREFIX amusement_cat: <http://example.org/amusement_category#>
PREFIX ride_type: <http://example.org/ride_type#>
PREFIX acc: <http://example.org/accident#>
PREFIX ride: <http://example.org/ride#>
SELECT ?amus_cat_name (COUNT(?amus_cat_name) AS ?count)
WHERE {
?instance acc:ref-ride_id ?ride_id .
?ride_id ride:ref-ride_type_id ?type_id .
?type_id ride_type:ref-amusement_category_id ?amus_cat_id .
?amus_cat_id amusement_cat:amusement_category ?amus_cat_name .
}
GROUP BY ?amus_cat_name
ORDER BY DESC(?count)
LIMIT 6
"""
# most common categories of accidents
query_4 = """
PREFIX acc_cat: <http://example.org/accident_category#>
PREFIX acc: <http://example.org/accident#>
SELECT ?category_name (COUNT(?category_name) AS ?count)
WHERE {
?instance acc:ref-accident_category_id ?category_id .
?category_id acc_cat:accident_category ?category_name .
}
GROUP BY ?category_name
ORDER BY DESC(?count)
"""
# months with the ngher num of accidents
query_5 = """
PREFIX acc: <http://example.org/accident#>
SELECT ?mon (COUNT(?mon) AS ?count)
WHERE {
?instance acc:date ?date .
}
GROUP BY (month(?date) AS ?mon)
ORDER BY (?mon)
"""
# cities with the higher num of accidents
query_8 = """
PREFIX location: <http://example.org/location#>
PREFIX acc: <http://example.org/accident#>
SELECT ?city (COUNT(?city) AS ?count) ?state
WHERE {
?instance acc:ref-location_id ?location_id .
?location_id location:city ?city ;
location:state ?state
}
GROUP BY ?city
ORDER BY DESC(?count)
"""
# TITLE
st.header("Theme Park Ride Accidents")
st.markdown("""There are **thousands of amusement parks** around the world that welcome **millions of visitors** each year.
Children, families, and teenagers are ready to spend days of adrenaline and fun.
Unfortunately, **accidents sometimes occur**. This raises some questions: **Are amusement parks safe? Which rides are the most accident-prone? What accidents happen most often? At what time of year are accidents most common?**
Let's try to find out in this **RDF data exploration** using **SPARQL** and **Plotly**.""")
st.markdown("---")
display()
# WRITE & RUN YOUR OWN QUERY
st.write("#### Write & Run your Custom Query")
pers_query = st.text_area('', """
PREFIX ride:<http://example.org/ride#>
SELECT ?name
WHERE {
?ride ride:manufacturer "Vekoma" ;
ride:name ?name
}
""", height=200)
with st.container():
try:
res = computeQuery(pers_query, graph)
st.table(res)
except:
st.error("Ooops! Check you query syntax...")
st.markdown("---")
# SIDEBAR
with st.sidebar:
st.write("""
This App proposes some visualization about theme park ride accidents.
The original dataset comes from "Saferparks", an organization that reports and collects data about theme park ride accidents in the US.
The original dataset covers years from 2010 to 2017 and comes in CSV or Excel format. I used python to split the dataset and convert it into the
Third Normal Form (3NF) of Database.
I uploaded the data into a PostgreSQL database and I used the Ontop tool to get the final RDF dataset.
Queries are expressed in SPARQL, and charts are generated with Plotly Express.
""")
st.markdown("---")
st.markdown("## Dataset Resources:")
st.markdown("""
Saferparks Original Dataset: https://ridesdatabase.org/saferparks/data/
Saferparks Dataset Description: https://ridesdatabase.org/wp-content/uploads/2020/02/Saferparks-data-description.pdf
""")
|