Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,261 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# IMPORTING TOOLS
|
2 |
+
import streamlit as st
|
3 |
+
from rdflib import Graph
|
4 |
+
import pandas as pd
|
5 |
+
import plotly.express as px
|
6 |
+
import numpy as np
|
7 |
+
|
8 |
+
# SET PAGE SETTINGS
|
9 |
+
st.set_page_config(page_title='Amusement Accidents')
|
10 |
+
|
11 |
+
# CACHED METHOD TO LOAD THE RDF
|
12 |
+
@st.cache(persist=True)
|
13 |
+
def importRDF(filename, format):
|
14 |
+
graph = Graph().parse(filename, format)
|
15 |
+
return graph
|
16 |
+
|
17 |
+
# IMPORTING THE RDF
|
18 |
+
with st.spinner('Loading all the stuffs...'):
|
19 |
+
graph = importRDF("rdf-dataset.ttl", "ttl")
|
20 |
+
|
21 |
+
# MOTHOD TO CONVERT THE QUERY RESULT INTO A DATAFRAME
|
22 |
+
def sparql_results_to_df(results):
|
23 |
+
return pd.DataFrame(
|
24 |
+
data=([None if x is None else x.toPython() for x in row] for row in results),
|
25 |
+
columns=[str(x) for x in results.vars],
|
26 |
+
)
|
27 |
+
|
28 |
+
# METHOD TO EXECUTE A QUERY (and return a pandas dataframe)
|
29 |
+
def computeQuery(query, executor):
|
30 |
+
result = executor.query(query)
|
31 |
+
res_df = sparql_results_to_df(result)
|
32 |
+
return res_df
|
33 |
+
|
34 |
+
# PROCESSING & DISPLAY
|
35 |
+
def display():
|
36 |
+
with st.container():
|
37 |
+
st.write("#### What are the months with the highest number of accidents?")
|
38 |
+
res = computeQuery(query_5, graph)
|
39 |
+
fig = px.bar(res, x="mon", y="count", color="count", labels={"mon":"Month", "count":"Num. of Accidents"}, text_auto="True")
|
40 |
+
fig.update_xaxes(type="category")
|
41 |
+
fig.update_yaxes(showticklabels=False)
|
42 |
+
st.plotly_chart(fig)
|
43 |
+
with st.expander("Show query"):
|
44 |
+
st.code(query_5, language="sparql")
|
45 |
+
st.markdown("---")
|
46 |
+
|
47 |
+
with st.container():
|
48 |
+
st.write("#### Which cities have recorded the most accidents?")
|
49 |
+
res = computeQuery(query_8, graph)
|
50 |
+
fig = px.treemap(res, path=[px.Constant("U.S"), "state", "city"], values="count", hover_data=["state", "city","count"],
|
51 |
+
color="count",
|
52 |
+
color_continuous_scale='tealrose',
|
53 |
+
color_continuous_midpoint=np.average(res['count'], weights=res['count']))
|
54 |
+
st.plotly_chart(fig)
|
55 |
+
with st.expander("Show query"):
|
56 |
+
st.code(query_8, language="sparql")
|
57 |
+
st.markdown("---")
|
58 |
+
|
59 |
+
with st.container():
|
60 |
+
st.write("#### What Are the Most Common Categories of Accidents?")
|
61 |
+
res = computeQuery(query_4, graph)
|
62 |
+
fig = px.treemap(res, path=[px.Constant("Accident Category"), "category_name"], values="count", hover_data=["category_name","count"])
|
63 |
+
st.plotly_chart(fig)
|
64 |
+
with st.expander("Show query"):
|
65 |
+
st.code(query_4, language="sparql")
|
66 |
+
st.markdown("---")
|
67 |
+
|
68 |
+
with st.container():
|
69 |
+
st.write("#### What are the Most Dangerous Ride Categories?")
|
70 |
+
res = computeQuery(query_6, graph)
|
71 |
+
fig = px.pie(res, names="amus_cat_name", values="count", hole=.4)
|
72 |
+
st.plotly_chart(fig)
|
73 |
+
with st.expander("Show query"):
|
74 |
+
st.code(query_6, language="sparql")
|
75 |
+
st.markdown("---")
|
76 |
+
|
77 |
+
with st.container():
|
78 |
+
st.write("#### What are the Most Dangerous Ride Types?")
|
79 |
+
res = computeQuery(query_3, graph)
|
80 |
+
fig = px.bar(res, x="type_name", y="count", labels={"type_name":"Ride Type", "count":"Num. of Accidents"}, text_auto=True)
|
81 |
+
fig.update_xaxes(tickangle=45)
|
82 |
+
st.plotly_chart(fig)
|
83 |
+
with st.expander("Show query"):
|
84 |
+
st.code(query_3, language="sparql")
|
85 |
+
st.markdown("---")
|
86 |
+
|
87 |
+
with st.container():
|
88 |
+
st.write("#### Which manufacturers produced the rides with the most accidents?")
|
89 |
+
res = computeQuery(query_2, graph)
|
90 |
+
fig = px.treemap(res, path=[px.Constant("Manufacturers"), "ride_manuf"], values="count", hover_data=["count"])
|
91 |
+
st.plotly_chart(fig)
|
92 |
+
with st.expander("Show query"):
|
93 |
+
st.code(query_2, language="sparql")
|
94 |
+
st.markdown("---")
|
95 |
+
|
96 |
+
with st.container():
|
97 |
+
st.write("#### How many people are generally involved in an accident?")
|
98 |
+
res = computeQuery(query_1, graph)
|
99 |
+
fig = px.bar(res, x="num_inj", y="count", labels={"num_inj":"Injured People", "count":"Num. of Accidents"}, text_auto=True)
|
100 |
+
fig.update_xaxes(type="category")
|
101 |
+
st.plotly_chart(fig, use_container_width=True)
|
102 |
+
with st.expander("Show query"):
|
103 |
+
st.code(query_1, language="sparql")
|
104 |
+
st.markdown("---")
|
105 |
+
|
106 |
+
|
107 |
+
|
108 |
+
return None
|
109 |
+
|
110 |
+
# ANALYTICAL QUERIES DEFINITION
|
111 |
+
# num of accidents per injured people
|
112 |
+
query_1 = """
|
113 |
+
PREFIX r:<http://example.org/ride#>
|
114 |
+
PREFIX a:<http://example.org/accident#>
|
115 |
+
|
116 |
+
SELECT ?num_inj (COUNT(?num_inj) AS ?count)
|
117 |
+
WHERE {
|
118 |
+
?acc a:num_injured ?num_inj .
|
119 |
+
}
|
120 |
+
GROUP BY ?num_inj
|
121 |
+
ORDER BY (?num_inj)
|
122 |
+
"""
|
123 |
+
|
124 |
+
# manufacturers of the rides subjected to most accidents
|
125 |
+
query_2 = """
|
126 |
+
PREFIX acc: <http://example.org/accident#>
|
127 |
+
PREFIX ride: <http://example.org/ride#>
|
128 |
+
|
129 |
+
SELECT ?ride_manuf (COUNT(?ride_manuf) AS ?count)
|
130 |
+
WHERE {
|
131 |
+
?instance acc:ref-ride_id ?ride_id .
|
132 |
+
?ride_id ride:manufacturer ?ride_manuf
|
133 |
+
}
|
134 |
+
GROUP BY ?ride_manuf
|
135 |
+
ORDER BY DESC(?count)
|
136 |
+
"""
|
137 |
+
|
138 |
+
# Top n types of rides most subjected to accidents
|
139 |
+
query_3 = """
|
140 |
+
PREFIX ride_type: <http://example.org/ride_type#>
|
141 |
+
PREFIX acc: <http://example.org/accident#>
|
142 |
+
PREFIX ride: <http://example.org/ride#>
|
143 |
+
|
144 |
+
SELECT ?type_name (COUNT(?type_name) AS ?count)
|
145 |
+
WHERE {
|
146 |
+
?instance acc:ref-ride_id ?ride_id .
|
147 |
+
?ride_id ride:ref-ride_type_id ?type_id .
|
148 |
+
?type_id ride_type:type ?type_name .
|
149 |
+
}
|
150 |
+
GROUP BY ?type_name
|
151 |
+
ORDER BY DESC(?count)
|
152 |
+
LIMIT 7
|
153 |
+
"""
|
154 |
+
|
155 |
+
# Top 6 categories of rides most subjected to accidents
|
156 |
+
query_6 = """
|
157 |
+
PREFIX amusement_cat: <http://example.org/amusement_category#>
|
158 |
+
PREFIX ride_type: <http://example.org/ride_type#>
|
159 |
+
PREFIX acc: <http://example.org/accident#>
|
160 |
+
PREFIX ride: <http://example.org/ride#>
|
161 |
+
|
162 |
+
SELECT ?amus_cat_name (COUNT(?amus_cat_name) AS ?count)
|
163 |
+
WHERE {
|
164 |
+
?instance acc:ref-ride_id ?ride_id .
|
165 |
+
?ride_id ride:ref-ride_type_id ?type_id .
|
166 |
+
?type_id ride_type:ref-amusement_category_id ?amus_cat_id .
|
167 |
+
?amus_cat_id amusement_cat:amusement_category ?amus_cat_name .
|
168 |
+
}
|
169 |
+
GROUP BY ?amus_cat_name
|
170 |
+
ORDER BY DESC(?count)
|
171 |
+
LIMIT 6
|
172 |
+
|
173 |
+
"""
|
174 |
+
|
175 |
+
# most common categories of accidents
|
176 |
+
query_4 = """
|
177 |
+
PREFIX acc_cat: <http://example.org/accident_category#>
|
178 |
+
PREFIX acc: <http://example.org/accident#>
|
179 |
+
|
180 |
+
SELECT ?category_name (COUNT(?category_name) AS ?count)
|
181 |
+
WHERE {
|
182 |
+
?instance acc:ref-accident_category_id ?category_id .
|
183 |
+
?category_id acc_cat:accident_category ?category_name .
|
184 |
+
}
|
185 |
+
GROUP BY ?category_name
|
186 |
+
ORDER BY DESC(?count)
|
187 |
+
"""
|
188 |
+
|
189 |
+
# months with the ngher num of accidents
|
190 |
+
query_5 = """
|
191 |
+
PREFIX acc: <http://example.org/accident#>
|
192 |
+
|
193 |
+
SELECT ?mon (COUNT(?mon) AS ?count)
|
194 |
+
WHERE {
|
195 |
+
?instance acc:date ?date .
|
196 |
+
}
|
197 |
+
GROUP BY (month(?date) AS ?mon)
|
198 |
+
ORDER BY (?mon)
|
199 |
+
"""
|
200 |
+
|
201 |
+
# cities with the higher num of accidents
|
202 |
+
query_8 = """
|
203 |
+
PREFIX location: <http://example.org/location#>
|
204 |
+
PREFIX acc: <http://example.org/accident#>
|
205 |
+
|
206 |
+
SELECT ?city (COUNT(?city) AS ?count) ?state
|
207 |
+
WHERE {
|
208 |
+
?instance acc:ref-location_id ?location_id .
|
209 |
+
?location_id location:city ?city ;
|
210 |
+
location:state ?state
|
211 |
+
}
|
212 |
+
GROUP BY ?city
|
213 |
+
ORDER BY DESC(?count)
|
214 |
+
|
215 |
+
"""
|
216 |
+
|
217 |
+
|
218 |
+
# TITLE
|
219 |
+
st.header("Theme Parks Rides Accidents")
|
220 |
+
st.markdown("""There are **thousands of amusement parks** around the world that welcome **millions of visitors** each year. Children, families and teenagers ready to spend days of adrenaline and fun.
|
221 |
+
Unfortunately, **accidents sometimes occur**. This raises some questions: **Are amusement parks safe? Which rides are the most accident-prone? What accidents happen most often? At what time of year are accidents most common?**
|
222 |
+
Let's try to find out in this **RDF data exploration** using **SPARQL** and **Plotly**.""")
|
223 |
+
st.markdown("---")
|
224 |
+
|
225 |
+
display()
|
226 |
+
|
227 |
+
# WRITE & RUN YOUR OWN QUERY
|
228 |
+
st.write("#### Write & Run your Custom Query")
|
229 |
+
pers_query = st.text_area('', """
|
230 |
+
PREFIX ride:<http://example.org/ride#>
|
231 |
+
SELECT ?name
|
232 |
+
WHERE {
|
233 |
+
?ride ride:manufacturer "Vekoma" ;
|
234 |
+
ride:name ?name
|
235 |
+
}
|
236 |
+
""", height=200)
|
237 |
+
with st.container():
|
238 |
+
res = computeQuery(pers_query, graph)
|
239 |
+
st.dataframe(res)
|
240 |
+
st.markdown("---")
|
241 |
+
|
242 |
+
# SIDEBAR
|
243 |
+
with st.sidebar:
|
244 |
+
st.write("""
|
245 |
+
This App propose some visualization about theme parks rides accidents. The original dataset comes from "Saferparks", an organization that reports and collects
|
246 |
+
data about theme parks rides accidents, in the US.
|
247 |
+
The original dataset covers years from 2010 to 2017 and comes in CSV or Excel format. I used python to split the dataset and convert it in the Third Normal Form (3NF)
|
248 |
+
of Database.
|
249 |
+
I uploaded the data into a PostgreSQL database and I used the Ontop tool to get the final RDF dataset.
|
250 |
+
""")
|
251 |
+
st.markdown("---")
|
252 |
+
st.markdown("## Resources:")
|
253 |
+
st.markdown("""
|
254 |
+
Saferparks dataset: https://ridesdatabase.org/saferparks/data/
|
255 |
+
|
256 |
+
Saferparks dataset description: https://ridesdatabase.org/wp-content/uploads/2020/02/Saferparks-data-description.pdf
|
257 |
+
|
258 |
+
Ontop Tool: https://ontop-vkg.org
|
259 |
+
|
260 |
+
Ontop Tool on GitHub: https://github.com/ontop/ontop
|
261 |
+
""")
|