File size: 11,169 Bytes
bae5414
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# -*- coding: utf-8 -*-
"""SQL_Queries.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1JMMq3yCv2xWTEZbs9S28qx4lXdF88M5d
"""

sql_queries = {
    'Count the number of records in the dataset': 'SELECT COUNT(*) FROM ved_test.synthetic_data;',
    'Get the average Weighted_Avg_PRB_Util_UL': 'SELECT AVG(Weighted_Avg_PRB_Util_UL) FROM ved_test.synthetic_data;',
    'Get the minimum UE_Pwr_Restricted_Pct value': 'SELECT MIN(UE_Pwr_Restricted_Pct) FROM ved_test.synthetic_data;',
    'Calculate the total UE_Pwr_Unrestricted_Pct_Num': 'SELECT SUM(UE_Pwr_Unrestricted_Pct_Num) FROM ved_test.synthetic_data;',
    'Get the distinct values of Network_Engineer': 'SELECT DISTINCT Network_Engineer FROM ved_test.synthetic_data;',
    'Count the number of records for each Network_Engineer': 'SELECT Network_Engineer, COUNT(*) FROM ved_test.synthetic_data GROUP BY Network_Engineer;',
    'Calculate the average RTT for each Network_Engineer': 'SELECT Network_Engineer, AVG(RTT) AS avg_rtt FROM ved_test.synthetic_data GROUP BY Network_Engineer;',
    'Calculate the average Jitter for weekends and weekdays': 'SELECT WEEKEND, AVG(Jitter) AS avg_jitter FROM ved_test.synthetic_data GROUP BY WEEKEND;',
    'Get the average Jitter and Packet Loss for each hour': 'SELECT Hour, AVG(Jitter) AS avg_jitter, AVG(`DL Packet Loss Pct`) AS avg_packet_loss FROM ved_test.synthetic_data GROUP BY Hour ORDER BY Hour;',
    'Find the top 5 records with the highest UPTP_Mbps': 'SELECT * FROM ved_test.synthetic_data ORDER BY UPTP_Mbps DESC LIMIT 5;',
    'Calculate the average HARQ_BLER_Pct for each 5G_Reliability_Category': 'SELECT `5G Reliability Category`, AVG(HARQ_BLER_Pct) AS avg_harq_bler_pct FROM ved_test.synthetic_data GROUP BY `5G Reliability Category`;',
    'Find the average RTT and Jitter for each combination of Market and Hour': 'SELECT Market, Hour, AVG(RTT) AS avg_rtt, AVG(Jitter) AS avg_jitter FROM ved_test.synthetic_data GROUP BY Market, Hour;',
    'Get the top 3 Network_Engineers with the highest average 5G_Reliability_Score': 'SELECT Network_Engineer, AVG(`5G Reliability Score`) AS avg_reliability_score FROM ved_test.synthetic_data GROUP BY Network_Engineer ORDER BY avg_reliability_score DESC LIMIT 3;',
    'Calculate the average Weighted_Avg_PRB_Util_UL and DL_MAC_Vol_Scell_Pct for each day of the week': 'SELECT EXTRACT(DAYOFWEEK FROM Timestamp) AS day_of_week, AVG(Weighted_Avg_PRB_Util_UL) AS avg_prb_util, AVG(`DL MAC Vol Scell Pct`) AS avg_dl_mac_vol FROM ved_test.synthetic_data GROUP BY day_of_week;',
    'Get the average HARQ_BLER_Pct for each Market on weekends': 'SELECT Market, AVG(HARQ_BLER_Pct) AS avg_harq_bler_pct FROM ved_test.synthetic_data WHERE WEEKEND = 1 GROUP BY Market;',
    'Calculate the total Bearer Releases for records where Bearer_Setup_Failure_Pct is greater than 80% and group by Market': 'SELECT Market, SUM(`Bearer Releases`) AS total_bearer_release FROM ved_test.synthetic_data WHERE Bearer_Setup_Failure_Pct > 0.8 GROUP BY Market;',
    'Find the Market with the highest total UE_Pwr_Unrestricted_Pct_Num and the total value': 'SELECT Market, SUM(UE_Pwr_Unrestricted_Pct_Num) AS total_unrestricted_pwr FROM ved_test.synthetic_data GROUP BY Market ORDER BY total_unrestricted_pwr DESC LIMIT 1;',
    'Get the average UPTP_Mbps and RRC_Reestab_Attempts for each Market for weekdays and weekends': 'SELECT Market, WEEKEND, AVG(UPTP_Mbps) AS avg_uptp_mbps, AVG(RRC_Reestab_Attempts) AS avg_rrc_attempts FROM ved_test.synthetic_data GROUP BY Market, WEEKEND;',
    'Find the correlation between RTT and Jitter for each Market': 'SELECT Market, CORR(RTT, Jitter) AS rtt_jitter_correlation FROM ved_test.synthetic_data GROUP BY Market;',
    'Calculate the average Weighted_Avg_PRB_Util_UL for each day of the week': 'SELECT EXTRACT(DAYOFWEEK FROM Timestamp) AS day_of_week, AVG(Weighted_Avg_PRB_Util_UL) AS avg_prb_util FROM ved_test.synthetic_data GROUP BY day_of_week;',
    'Calculate the average UPTP_Mbps for each 5G_Reliability_Category during weekdays and weekends': 'SELECT `5G Reliability Category`, WEEKEND, AVG(UPTP_Mbps) AS avg_uptp_mbps FROM ved_test.synthetic_data GROUP BY `5G Reliability Category`, WEEKEND ORDER BY `5G Reliability Category`, WEEKEND;',
    'Identify the hour with the highest average HARQ_BLER_Pct': 'SELECT Hour, AVG(HARQ_BLER_Pct) AS avg_harq_bler_pct FROM ved_test.synthetic_data GROUP BY Hour ORDER BY avg_harq_bler_pct DESC LIMIT 1;',
    'Calculate the standard deviation of 5G_Reliability_Score for each Network_Engineer': 'SELECT Network_Engineer, STDDEV(`5G Reliability Score`) AS stddev_reliability_score FROM ved_test.synthetic_data GROUP BY Network_Engineer;',
    'Find the top 3 hours with the highest total UE_Pwr_Unrestricted_Pct_Num during weekends': 'SELECT Hour, SUM(UE_Pwr_Unrestricted_Pct_Num) AS total_unrestricted_pwr FROM ved_test.synthetic_data WHERE WEEKEND = 1 GROUP BY Hour ORDER BY total_unrestricted_pwr DESC LIMIT 3;',
    'Determine the Network_Engineer with the highest average DL MAC Vol Scell Pct and the average value': 'SELECT Network_Engineer, AVG(`DL MAC Vol Scell Pct`) AS avg_dl_mac_vol FROM ved_test.synthetic_data GROUP BY Network_Engineer ORDER BY avg_dl_mac_vol DESC LIMIT 1;',
    'Find the variance in Jitter for each 5G_Reliability_Category': 'SELECT `5G Reliability Category`, VARIANCE(Jitter) AS variance_jitter FROM ved_test.synthetic_data GROUP BY `5G Reliability Category`;',
    'Calculate the correlation between RTT and 5G Reliability Value for each Reliability Category': 'SELECT `5G Reliability Category`, CORR(RTT, `5G Reliability Value`) AS rtt_reliability_correlation FROM ved_test.synthetic_data GROUP BY `5G Reliability Category`;',
    'Calculate the average and median MTTR for each Network Engineer filtered by 5G Reliability Value being above the overall average score': '''SELECT Network_Engineer, AVG(MTTR) AS avg_mttr, (
        SELECT AVG(middle_vals)
        FROM (
            SELECT MTTR AS middle_vals, ROW_NUMBER() OVER (PARTITION BY Network_Engineer ORDER BY MTTR) AS rnk, COUNT(*) OVER (PARTITION BY Network_Engineer) AS cnt
            FROM ved_test.synthetic_data
            WHERE `5G Reliability Value` > (SELECT AVG(`5G Reliability Value`) FROM ved_test.synthetic_data)
        ) AS subquery
        WHERE rnk IN (FLOOR((cnt + 1) / 2.0), FLOOR((cnt + 2) / 2.0))
    ) AS median_mttr
    FROM ved_test.synthetic_data
    WHERE `5G Reliability Value` > (SELECT AVG(`5G Reliability Value`) FROM ved_test.synthetic_data)
    GROUP BY Network_Engineer;''',
    'Find the count of records per 5G_Reliability_Category where 5G_Reliability_Value is below the average for the category': '''SELECT `5G Reliability Category`, COUNT(*) as count
    FROM ved_test.synthetic_data AS s1
    WHERE `5G Reliability Value` < (
      SELECT AVG(`5G Reliability Value`)
      FROM ved_test.synthetic_data AS s2
      WHERE s2.`5G Reliability Category` = s1.`5G Reliability Category`
    )
    GROUP BY `5G Reliability Category`;''',
    'Find the top 5 records with the highest HO Attempts for each Network_Engineer': '''SELECT s1.*
    FROM ved_test.synthetic_data s1
    JOIN (
      SELECT Network_Engineer, `HO Attempts`
      FROM (
        SELECT Network_Engineer, `HO Attempts`, ROW_NUMBER() OVER (PARTITION BY Network_Engineer ORDER BY `HO Attempts` DESC) AS rn
        FROM ved_test.synthetic_data
      ) temp
      WHERE rn <= 5
    ) s2
    ON s1.Network_Engineer = s2.Network_Engineer AND s1.`HO Attempts` = s2.`HO Attempts`;''',
    'Calculate the exponential moving average of 5G_Reliability_Value for each Network_Engineer with a smoothing factor of 0.1': '''WITH ema AS (
      SELECT Timestamp, Network_Engineer, `5G Reliability Value`, CAST(NULL AS FLOAT64) AS ema_value,
             ROW_NUMBER() OVER (PARTITION BY Network_Engineer ORDER BY Timestamp) AS row_num
      FROM ved_test.synthetic_data
    )
    SELECT a.Timestamp, a.Network_Engineer, a.`5G Reliability Value`,
           CASE
             WHEN a.row_num = 1 THEN a.`5G Reliability Value`
             ELSE (0.1 * a.`5G Reliability Value` + 0.9 * b.ema_value)
           END AS ema_value
    FROM ema a
    LEFT JOIN

 ema b
    ON a.Network_Engineer = b.Network_Engineer AND a.row_num = b.row_num + 1;''',
     "Find the records with the highest HARQ_BLER_Pct for each 5G_Reliability_Category": '''SELECT s1.*
FROM ved_test.synthetic_data s1
JOIN (
  SELECT `5G Reliability Category`, MAX(HARQ_BLER_Pct) AS max_harq_bler_pct
  FROM ved_test.synthetic_data
  GROUP BY `5G Reliability Category`
) s2
ON s1.`5G Reliability Category` = s2.`5G Reliability Category` AND s1.HARQ_BLER_Pct = s2.max_harq_bler_pct;''',

    "Identify the top 3 hours with the highest average UPTP_Mbps for each Market, including the variance in Jitter during these hours": '''WITH avg_uptp AS (
  SELECT Market, Hour, AVG(UPTP_Mbps) AS avg_uptp_mbps, VARIANCE(Jitter) AS jitter_variance,
         ROW_NUMBER() OVER (PARTITION BY Market ORDER BY AVG(UPTP_Mbps) DESC) AS rn
  FROM ved_test.synthetic_data
  GROUP BY Market, Hour
)
SELECT Market, Hour, avg_uptp_mbps, jitter_variance
FROM avg_uptp
WHERE rn <= 3;''',

    "Determine the Sector with the highest variance in 5G Reliability Value and its corresponding average Context Drop Percent": '''WITH variance_scores AS (
  SELECT Sector, VARIANCE(`5G Reliability Value`) AS score_variance, AVG(Context_Drop_Pct) AS avg_context_drop
  FROM ved_test.synthetic_data
  GROUP BY Sector
)
SELECT Sector, score_variance, avg_context_drop
FROM variance_scores
ORDER BY score_variance DESC
LIMIT 1;''',

    "Find hours where the average UPTP_Mbps is significantly different than the daily average (more than 2 standard deviations away from the mean)": '''WITH daily_stats AS (
    SELECT DATE(Timestamp) AS day, AVG(UPTP_Mbps) AS daily_avg_uptp, STDDEV(UPTP_Mbps) AS daily_stddev_uptp
    FROM ved_test.synthetic_data
    GROUP BY day
),
hourly_stats AS (
    SELECT DATE(Timestamp) AS day, EXTRACT(HOUR FROM Timestamp) AS hour, AVG(UPTP_Mbps) AS hourly_avg_uptp
    FROM ved_test.synthetic_data
    GROUP BY day, hour
)
SELECT
    hs.day, hs.hour, hs.hourly_avg_uptp, ds.daily_avg_uptp, ds.daily_stddev_uptp
FROM hourly_stats hs
JOIN daily_stats ds ON hs.day = ds.day
WHERE hs.hourly_avg_uptp > (ds.daily_avg_uptp + 2 * ds.daily_stddev_uptp)
   OR hs.hourly_avg_uptp < (ds.daily_avg_uptp - 2 * ds.daily_stddev_uptp)
ORDER BY hs.day, hs.hour;''',

    "Identify Days where more than 10% of the records have RRC Setup Failure above 25% for each region": '''WITH daily_rrc_failures AS (
    SELECT
        Region, DATE(Timestamp) AS day, COUNT(*) AS total_records,
        SUM(CASE WHEN `RRC Setup Failure% 5G`> 0.25 THEN 1 ELSE 0 END) AS high_failure_count
    FROM ved_test.synthetic_data
    GROUP BY Region, day
),
daily_rrc_failure_ratio AS (
    SELECT
        Region, day, total_records, high_failure_count, (high_failure_count / total_records) * 100 AS failure_ratio
    FROM daily_rrc_failures
)
SELECT
    Region, day, total_records, high_failure_count, failure_ratio
FROM daily_rrc_failure_ratio
WHERE failure_ratio > 10
ORDER BY Region, day;'''

}