Formula1-race-debriefing / db /formula1_databases.py
Draichi's picture
chore: organize the code into `/backup, /db, /notebooks, /regression-models` and `/SQL`
b4f5da6 unverified
from typing import Any, cast
import sqlite3
import pandas as pd
from datetime import datetime
from fastf1.core import Session
import fastf1
from rich.console import Console
console = Console(style="chartreuse1 on grey7")
class FastF1ToSQL:
"""
A class to convert FastF1 data into a SQLite database.
"""
def __init__(self, db_path: str) -> None:
"""
Initialize the FastF1ToSQL class.
Args:
db_path (str): Path to the SQLite database file.
"""
self.db_path = db_path
self.conn = sqlite3.connect(db_path, timeout=20)
self.cursor = self.conn.cursor()
self.__create_tables()
def __create_tables(self) -> None:
"""Create all necessary tables and indexes if they don't exist."""
self.cursor.executescript('''
CREATE TABLE IF NOT EXISTS Drivers (
driver_id INTEGER PRIMARY KEY,
driver_name TEXT NOT NULL,
team TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS Tracks (
track_id INTEGER PRIMARY KEY,
track_name TEXT NOT NULL,
country TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS Event (
event_id INTEGER PRIMARY KEY,
round_number INTEGER,
country TEXT,
location TEXT,
event_date DATE,
event_name TEXT,
session_1_date_utc DATETIME,
session_1_name TEXT,
session_2_date_utc DATETIME,
session_2_name TEXT,
session_3_date_utc DATETIME,
session_3_name TEXT,
session_4_date_utc DATETIME,
session_4_name TEXT,
session_5_date_utc DATETIME,
session_5_name TEXT
);
CREATE TABLE IF NOT EXISTS Sessions (
session_id INTEGER PRIMARY KEY,
event_id INTEGER,
track_id INTEGER,
session_type TEXT NOT NULL,
date DATETIME NOT NULL,
FOREIGN KEY (event_id) REFERENCES Event(event_id),
FOREIGN KEY (track_id) REFERENCES Tracks(track_id)
);
CREATE TABLE IF NOT EXISTS Weather (
weather_id INTEGER PRIMARY KEY,
session_id INTEGER,
datetime DATETIME,
air_temperature_in_celsius REAL,
relative_air_humidity_in_percentage REAL,
air_pressure_in_mbar REAL,
is_raining BOOLEAN,
track_temperature_in_celsius REAL,
wind_direction_in_grads REAL,
wind_speed_in_meters_per_seconds REAL,
FOREIGN KEY (session_id) REFERENCES Sessions(session_id)
);
CREATE TABLE IF NOT EXISTS Laps (
lap_id INTEGER PRIMARY KEY,
session_id INTEGER,
driver_name TEXT NOT NULL,
lap_number INTEGER NOT NULL,
stint INTEGER,
sector_1_speed_trap_in_km REAL,
sector_2_speed_trap_in_km REAL,
finish_line_speed_trap_in_km REAL,
longest_strait_speed_trap_in_km REAL,
is_personal_best BOOLEAN,
tyre_compound TEXT,
tyre_life_in_laps INTEGER,
is_fresh_tyre BOOLEAN,
position INTEGER,
lap_time_in_seconds REAL,
sector_1_time_in_seconds REAL,
sector_2_time_in_seconds REAL,
sector_3_time_in_seconds REAL,
lap_start_time_in_datetime DATETIME,
pin_in_time_in_datetime DATETIME,
pin_out_time_in_datetime DATETIME,
FOREIGN KEY (session_id) REFERENCES Sessions(session_id),
UNIQUE (session_id, driver_name, lap_number)
);
CREATE TABLE IF NOT EXISTS Telemetry (
telemetry_id INTEGER PRIMARY KEY,
lap_id INTEGER,
driver_name TEXT NOT NULL,
speed_in_km REAL,
RPM INTEGER,
gear_number INTEGER,
throttle_input REAL,
is_brake_pressed BOOLEAN,
is_DRS_open BOOLEAN,
x_position REAL,
y_position REAL,
z_position REAL,
is_off_track BOOLEAN,
datetime DATETIME,
FOREIGN KEY (lap_id) REFERENCES Laps(lap_id),
FOREIGN KEY (driver_name) REFERENCES Drivers(driver_name)
);
CREATE INDEX IF NOT EXISTS idx_laps_driver_name ON Laps(driver_name);
CREATE INDEX IF NOT EXISTS idx_laps_session_id ON Laps(session_id);
CREATE INDEX IF NOT EXISTS idx_telemetry_lap_id ON Telemetry(lap_id);
CREATE INDEX IF NOT EXISTS idx_telemetry_datetime ON Telemetry(datetime);
CREATE INDEX IF NOT EXISTS idx_weather_session_id ON Weather(session_id);
CREATE INDEX IF NOT EXISTS idx_weather_datetime ON Weather(datetime);
CREATE INDEX IF NOT EXISTS idx_event_date ON Event(event_date);
''')
self.conn.commit()
def process_session(self, session: Session) -> None:
"""
Process a session and insert the data into the database.
Args:
session (Session): The session to process.
"""
console.print(
f"> Processing session: {session.event.EventName} - {session.name}. This may take a while...")
# Load session data
session.load()
# Save session start date
self._session_start_date = session.t0_date
# Insert data into tables
self.insert_event(session)
self.insert_session(session)
self.insert_drivers(session)
self.insert_laps(session)
self.insert_telemetry(session)
self.insert_weather(session)
# Create data analysis views
self.__create_data_analysis_views()
# Commit changes and close connection
self.conn.commit()
self.conn.close()
def insert_event(self, session: Session) -> None:
"""
Insert the event data into the database.
Args:
session (Session): The FastF1 session object.
"""
event_data: dict[str, Any] = {
'round_number': int(session.event.RoundNumber),
'country': session.event.Country,
'location': session.event.Location,
'event_date': str(session.event.EventDate.date()),
'event_name': session.event.EventName,
'session_1_date_utc': str(session.event.Session1DateUtc),
'session_1_name': session.event.Session1.lower(),
'session_2_date_utc': str(session.event.Session2DateUtc),
'session_2_name': session.event.Session2.lower(),
'session_3_date_utc': str(session.event.Session3DateUtc),
'session_3_name': session.event.Session3.lower(),
'session_4_date_utc': str(session.event.Session4DateUtc),
'session_4_name': session.event.Session4.lower(),
'session_5_date_utc': str(session.event.Session5DateUtc),
'session_5_name': session.event.Session5.lower(),
}
columns = ', '.join(event_data.keys())
placeholders = ', '.join(['?' for _ in event_data])
query = f"INSERT OR REPLACE INTO Event ({columns}) VALUES ({placeholders})"
self.cursor.execute(query, list(event_data.values()))
self._event_id = self.cursor.lastrowid
def insert_session(self, session: Session) -> None:
"""
Insert the session data into the database.
Args:
session (Session): The FastF1 session object.
"""
session_data: dict[str, Any] = {
# Assuming this is called right after insert_event
'event_id': self._event_id,
'track_id': self.get_or_create_track(session.event.Location, session.event.Country),
'session_type': session.name,
'date': str(session.date),
}
columns = ', '.join(session_data.keys())
placeholders = ':' + ', :'.join(session_data.keys())
query = f"INSERT INTO Sessions ({columns}) VALUES ({placeholders})"
self.cursor.execute(query, session_data)
self._session_id = self.cursor.lastrowid
def insert_drivers(self, session: Session) -> None:
"""
Insert the drivers data into the database.
Args:
session (Session): The FastF1 session object.
"""
for driver in session.drivers:
driver_info = session.get_driver(driver)
driver_data = {
'driver_name': driver_info['FullName'],
'team': driver_info['TeamName']
}
columns = ', '.join(driver_data.keys())
placeholders = ':' + ', :'.join(driver_data.keys())
query = f"INSERT OR IGNORE INTO Drivers ({columns}) VALUES ({placeholders})"
self.cursor.execute(query, driver_data)
def insert_laps(self, session: Session) -> None:
"""
Insert the laps data into the database.
Args:
session (Session): The FastF1 session object.
"""
console.print("> Inserting laps data...")
laps_df = session.laps.copy()
laps_df['session_id'] = self._session_id
laps_df['lap_start_time_in_datetime'] = pd.to_datetime(
laps_df['LapStartDate'])
laps_df['pin_in_time_in_datetime'] = self._session_start_date + \
laps_df['PitInTime']
laps_df['pin_out_time_in_datetime'] = self._session_start_date + \
laps_df['PitOutTime']
for _, lap in laps_df.iterrows():
lap_data: dict[str, Any] = {
'session_id': lap['session_id'],
'driver_name': lap['Driver'],
'lap_number': lap['LapNumber'],
'sector_1_time_in_seconds': lap['Sector1Time'].total_seconds() if pd.notnull(lap['Sector1Time']) else None,
'sector_2_time_in_seconds': lap['Sector2Time'].total_seconds() if pd.notnull(lap['Sector2Time']) else None,
'sector_3_time_in_seconds': lap['Sector3Time'].total_seconds() if pd.notnull(lap['Sector3Time']) else None,
'lap_time_in_seconds': lap['LapTime'].total_seconds() if pd.notnull(lap['LapTime']) else None,
'finish_line_speed_trap_in_km': lap['SpeedFL'],
'longest_strait_speed_trap_in_km': lap['SpeedST'],
'is_personal_best': lap['IsPersonalBest'],
'tyre_compound': lap['Compound'],
'tyre_life_in_laps': lap['TyreLife'],
'is_fresh_tyre': lap['FreshTyre'],
'position': lap['Position'],
'lap_start_time_in_datetime': str(lap['lap_start_time_in_datetime']),
'pin_in_time_in_datetime': str(lap['pin_in_time_in_datetime']),
'pin_out_time_in_datetime': str(lap['pin_out_time_in_datetime']),
}
columns = ', '.join(lap_data.keys())
placeholders = ':' + ', :'.join(lap_data.keys())
query = f"INSERT INTO Laps ({columns}) VALUES ({placeholders})"
self.cursor.execute(query, lap_data)
self.conn.commit()
def insert_telemetry(self, session: Session) -> None:
"""
Insert the telemetry data into the database.
Args:
session (Session): The FastF1 session object.
"""
console.print('> Inserting telemetry data...')
telemetry_data_list = []
for driver in session.drivers:
laps_per_driver = session.laps.pick_driver(driver)
driver_name = session.get_driver(driver)['Abbreviation']
console.print(f"> Processing telemetry for driver: {driver_name}")
for _, lap in laps_per_driver.iterrows():
lap_number = lap['LapNumber']
console.print(f"> Processing telemetry for lap: {lap_number}")
telemetry = lap.get_telemetry()
telemetry['datetime'] = self._session_start_date + \
telemetry['SessionTime']
# Sort telemetry data by datetime
telemetry_sorted = telemetry.sort_values('datetime')
# Floor the 'datetime' to the specified decimal of a second
telemetry_sorted['floored_datetime'] = telemetry_sorted['datetime'].apply(
lambda x: x.floor(f'{0.1}s')
)
# Keep only the first occurrence for each floored_datetime
telemetry_unique = telemetry_sorted.groupby(
'floored_datetime', as_index=False).first()
for _, sample in telemetry_unique.iterrows():
telemetry_data: dict[str, Any] = {
'lap_id': self.__get_lap_id(session, driver_name, sample['datetime']),
'driver_name': driver_name,
'speed_in_km': sample['Speed'],
'RPM': sample['RPM'],
'gear_number': sample['nGear'],
'throttle_input': sample['Throttle'],
'is_brake_pressed': sample['Brake'],
'is_DRS_open': sample['DRS'],
'x_position': round(sample['X'], 2),
'y_position': round(sample['Y'], 2),
'z_position': round(sample['Z'], 2),
'is_off_track': sample['Status'] == 'OffTrack',
'datetime': str(sample['datetime']),
}
telemetry_data_list.append(telemetry_data)
if telemetry_data_list:
columns = ', '.join(telemetry_data_list[0].keys())
placeholders = ':' + ', :'.join(telemetry_data_list[0].keys())
query = f"INSERT INTO Telemetry ({columns}) VALUES ({placeholders})"
self.cursor.executemany(query, telemetry_data_list)
def insert_weather(self, session: Session) -> None:
"""
Insert weather data into the Weather table.
Args:
session (Session): The FastF1 session containing weather data.
"""
weather_data = cast(pd.DataFrame, session.weather_data)
weather_data['session_id'] = self._session_id
weather_data['datetime'] = self._session_start_date + \
weather_data['Time']
for _, sample in weather_data.iterrows():
weather_sample: dict[str, Any] = {
'session_id': sample['session_id'],
'air_temperature_in_celsius': sample['AirTemp'],
'track_temperature_in_celsius': sample['TrackTemp'],
'wind_speed_in_meters_per_seconds': sample['WindSpeed'],
'wind_direction_in_grads': sample['WindDirection'],
'relative_air_humidity_in_percentage': sample['Humidity'],
'air_pressure_in_mbar': sample['Pressure'],
'is_raining': sample['Rainfall'],
'datetime': str(sample['datetime']),
}
columns = ', '.join(weather_sample.keys())
placeholders = ':' + ', :'.join(weather_sample.keys())
query = f"INSERT INTO Weather ({columns}) VALUES ({placeholders})"
self.cursor.execute(query, weather_sample)
def get_or_create_track(self, track_name: str, country: str) -> int:
"""
Get the track_id for a given track, or create a new track if it doesn't exist.
Args:
track_name (str): The name of the track.
country (str): The country where the track is located.
Returns:
int: The track_id of the existing or newly created track.
"""
self.cursor.execute(
"SELECT track_id FROM Tracks WHERE track_name = ? AND country = ?", (track_name, country))
result = self.cursor.fetchone()
if result:
return result[0]
else:
self.cursor.execute(
"INSERT INTO Tracks (track_name, country) VALUES (?, ?)", (track_name, country))
return self.cursor.lastrowid or 0
def __get_lap_id(self, session: Session, driver_name: str, time: datetime) -> int:
"""
Get the lap_id for a given driver and time.
Args:
session (fastf1.core.Session): The FastF1 session.
driver (str): The driver's name or abbreviation.
time (pd.Timestamp): The timestamp to find the corresponding lap.
Returns:
int: The lap_id of the found lap.
"""
laps = session.laps.pick_driver(driver_name).copy()
# Convert LapStartDate to pd.Timestamp for proper comparison
laps['LapStartTime'] = pd.to_datetime(laps['LapStartDate'])
# Find the lap where the given time falls between LapStartTime and LapStartTime of the next lap
matching_laps = laps.loc[(laps['LapStartTime'] <= time) & (
laps['LapStartTime'].shift(-1) > time)]
if matching_laps.empty:
# Handle the case when no matching lap is found
print(
f"No matching lap found for driver {driver_name} at time {time}")
return 999 # or some default value, or raise a custom exception
lap = matching_laps.iloc[0]
if self._session_id is None:
raise ValueError("No ID was generated")
self.cursor.execute("SELECT lap_id FROM Laps WHERE session_id = ? AND driver_name = ? AND lap_number = ?",
(self._session_id, driver_name, lap['LapNumber']))
return self.cursor.fetchone()[0]
def __update_laps(self) -> None:
"""Update the laps table with the new data."""
console.print('> Updating laps table...')
self.cursor.execute('''
UPDATE Laps
SET
pin_in_time_in_datetime = CASE
WHEN pin_in_time_in_datetime = 'NaT' THEN NULL
ELSE pin_in_time_in_datetime
END,
pin_out_time_in_datetime = CASE
WHEN pin_out_time_in_datetime = 'NaT' THEN NULL
ELSE pin_out_time_in_datetime
END
WHERE
pin_in_time_in_datetime = 'NaT'
OR pin_out_time_in_datetime = 'NaT';
''')
def __create_data_analysis_views(self) -> None:
"""Create data analysis views in the database."""
console.print('> Creating data analysis views...')
self.cursor.executescript('''
-- 1. Driver Performance Summary with Weather
CREATE VIEW IF NOT EXISTS DriverPerformanceSummaryWithWeather AS
SELECT
l.driver_name,
e.event_name,
s.session_type,
t.track_name,
COUNT(l.lap_id) AS total_laps,
AVG(l.lap_time_in_seconds) AS avg_lap_time,
MIN(l.lap_time_in_seconds) AS best_lap_time,
AVG(l.sector_1_time_in_seconds) AS avg_sector1_time,
AVG(l.sector_2_time_in_seconds) AS avg_sector2_time,
AVG(l.sector_3_time_in_seconds) AS avg_sector3_time,
AVG(l.finish_line_speed_trap_in_km) AS avg_finish_line_speed,
COUNT(CASE WHEN l.is_personal_best THEN 1 END) AS personal_best_laps,
AVG(w.air_temperature_in_celsius) AS avg_air_temp,
AVG(w.track_temperature_in_celsius) AS avg_track_temp,
SUM(CASE WHEN w.is_raining THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS rain_percentage
FROM Laps l
JOIN Sessions s ON l.session_id = s.session_id
JOIN Tracks t ON s.track_id = t.track_id
JOIN Event e ON s.event_id = e.event_id
LEFT JOIN Weather w ON s.session_id = w.session_id
AND l.lap_start_time_in_datetime BETWEEN w.datetime AND datetime(w.datetime, '+1 minutes')
GROUP BY l.driver_name, e.event_id, s.session_id;
-- 2. Tyre Performance Analysis with Weather
CREATE VIEW IF NOT EXISTS TyrePerformanceAnalysisWithWeather AS
SELECT
l.driver_name,
e.event_name,
s.session_type,
t.track_name,
l.tyre_compound,
AVG(l.tyre_life_in_laps) AS avg_tyre_life,
AVG(l.lap_time_in_seconds) AS avg_lap_time,
AVG(l.longest_strait_speed_trap_in_km) AS avg_top_speed,
COUNT(CASE WHEN l.is_fresh_tyre THEN 1 END) AS fresh_tyre_laps,
COUNT(CASE WHEN NOT l.is_fresh_tyre THEN 1 END) AS used_tyre_laps,
AVG(w.track_temperature_in_celsius) AS avg_track_temp,
AVG(w.air_temperature_in_celsius) AS avg_air_temp
FROM Laps l
JOIN Sessions s ON l.session_id = s.session_id
JOIN Tracks t ON s.track_id = t.track_id
JOIN Event e ON s.event_id = e.event_id
LEFT JOIN Weather w ON s.session_id = w.session_id
AND l.lap_start_time_in_datetime BETWEEN w.datetime AND datetime(w.datetime, '+1 minutes')
GROUP BY l.driver_name, e.event_id, s.session_id, l.tyre_compound;
-- 3. Weather Impact Analysis
CREATE VIEW IF NOT EXISTS WeatherImpactAnalysis AS
SELECT
e.event_name,
s.session_type,
t.track_name,
AVG(w.air_temperature_in_celsius) AS avg_air_temp,
AVG(w.track_temperature_in_celsius) AS avg_track_temp,
AVG(w.relative_air_humidity_in_percentage) AS avg_humidity,
AVG(w.wind_speed_in_meters_per_seconds) AS avg_wind_speed,
SUM(CASE WHEN w.is_raining THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS rain_percentage,
AVG(l.lap_time_in_seconds) AS avg_lap_time,
MIN(l.lap_time_in_seconds) AS best_lap_time
FROM Weather w
JOIN Sessions s ON w.session_id = s.session_id
JOIN Tracks t ON s.track_id = t.track_id
JOIN Event e ON s.event_id = e.event_id
JOIN Laps l ON s.session_id = l.session_id
AND l.lap_start_time_in_datetime BETWEEN w.datetime AND datetime(w.datetime, '+1 minutes')
GROUP BY e.event_id, s.session_id;
-- 4. Event Performance Overview
CREATE VIEW IF NOT EXISTS EventPerformanceOverview AS
SELECT
e.event_name,
e.country,
e.location,
s.session_type,
COUNT(DISTINCT l.driver_name) AS driver_count,
AVG(l.lap_time_in_seconds) AS avg_lap_time,
MIN(l.lap_time_in_seconds) AS best_lap_time,
MAX(l.finish_line_speed_trap_in_km) AS max_finish_line_speed,
AVG(w.air_temperature_in_celsius) AS avg_air_temp,
AVG(w.track_temperature_in_celsius) AS avg_track_temp,
SUM(CASE WHEN w.is_raining THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS rain_percentage
FROM Event e
JOIN Sessions s ON e.event_id = s.event_id
JOIN Laps l ON s.session_id = l.session_id
LEFT JOIN Weather w ON s.session_id = w.session_id
AND l.lap_start_time_in_datetime BETWEEN w.datetime AND datetime(w.datetime, '+1 minutes')
GROUP BY e.event_id, s.session_id;
-- 5. Telemetry Analysis with Weather (Optimized)
CREATE VIEW IF NOT EXISTS TelemetryAnalysisWithWeather AS
WITH SampledTelemetry AS (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY lap_id ORDER BY RANDOM()) as rn
FROM Telemetry
)
SELECT
l.lap_id,
l.driver_name,
e.event_name,
s.session_type,
t.track_name,
l.lap_number,
l.lap_time_in_seconds,
AVG(tel.speed_in_km) AS avg_speed,
MAX(tel.speed_in_km) AS max_speed,
AVG(tel.RPM) AS avg_RPM,
MAX(tel.RPM) AS max_RPM,
AVG(tel.throttle_input) AS avg_throttle,
SUM(CASE WHEN tel.is_brake_pressed THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS brake_percentage,
SUM(CASE WHEN tel.is_DRS_open THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS drs_usage_percentage,
SUM(CASE WHEN tel.is_off_track THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS off_track_percentage,
AVG(w.air_temperature_in_celsius) AS avg_air_temp,
AVG(w.track_temperature_in_celsius) AS avg_track_temp,
AVG(w.wind_speed_in_meters_per_seconds) AS avg_wind_speed
FROM Laps l
JOIN Sessions s ON l.session_id = s.session_id
JOIN Tracks t ON s.track_id = t.track_id
JOIN Event e ON s.event_id = e.event_id
JOIN SampledTelemetry tel ON l.lap_id = tel.lap_id AND tel.rn <= 100
LEFT JOIN Weather w ON s.session_id = w.session_id
AND tel.datetime BETWEEN w.datetime AND datetime(w.datetime, '+1 minutes')
GROUP BY l.lap_id;
''')
self.conn.commit()
# Usage example:
session = fastf1.get_session(2023, 'Bahrain', 'Q')
converter = FastF1ToSQL('Bahrain_2023_Q.db')
converter.process_session(session)