omwdataset

Runtime error

App Files Files Community

victormiller commited on Sep 29

Commit

f29b166

•

1 Parent(s): 42102b3

Update results.py

Browse files

Files changed (1) hide show

results.py +41 -1

results.py CHANGED Viewed

@@ -45,8 +45,47 @@ fig.update_layout(
 Perplexity_Across_Different_Buckets_global_graph = fig
 intro_div = Div(
@@ -63,11 +102,13 @@ perp1_div = Div(
             H3("Perplexity vs Buckets"),
             P("For each bucket, we aggregated all the chunks that belong to a single year and calculated the average perplexity for each (bucket, year) data point."),
             Img(src="images/prep-diff-buckets-global.png", height = "300", width = "600" ),
         ),
         Section(
             H3("Perplexity vs Years"),
             P("Taking the same data, we can convert it into a graph indicating the yearly trend. For most buckets, the average perplexity of dumps from more recent years seem to be lower than that of former years."),
             Img(src="images/prep-across-diff-year-global-dup-buckets.png", height = "300", width = "600" ),
         ),
     Section(
             H3("Perplexity vs Document Duplication"),
@@ -127,7 +168,6 @@ def results():
                     perp1_div,
                     llama_div,
                     P("test plotly"),
-                    plotly2fasthtml(Perplexity_Across_Different_Buckets_global_graph),
                     id="inner-text"
                 )
     )

 Perplexity_Across_Different_Buckets_global_graph = fig
+import plotly.graph_objects as go
+# The data you provided
+DATA = [
+    ["2014", [["1-1", "2-5", "6-10", "11-100", "101-1000", "1001-30000000"], [17.410227605477868, 16.11176217183986, 15.632757662414805, 15.446116676532212, 16.716943171826703, 18.156821563322765]]],
+    ["2015", [["1-1", "2-5", "6-10", "11-100", "101-1000", "1001-30000000"], [17.446573602753478, 16.14852530113782, 15.627408549576069, 15.0055028132117, 15.565430373421485, 17.314701050452452]]],
+    ["2016", [["1-1", "2-5", "6-10", "11-100", "101-1000", "1001-30000000"], [17.307221780905284, 16.297702171159543, 15.948641884223639, 14.799690714225637, 14.935989931859659, 16.09585768919658]]],
+    ["2017", [["1-1", "2-5", "6-10", "11-100", "101-1000", "1001-30000000"], [17.338525603992114, 15.960924352297502, 15.912187993988933, 14.822102470001267, 14.778913482337416, 15.428145290012955]]],
+    ["2018", [["1-1", "2-5", "6-10", "11-100", "101-1000", "1001-30000000"], [17.08551151136689, 16.187802102106698, 14.935072408852303, 14.832038213200583, 14.508674264491997, 14.800605964649103]]],
+    ["2019", [["1-1", "2-5", "6-10", "11-100", "101-1000", "1001-30000000"], [16.818363305107052, 16.474269837858706, 14.944741674400241, 14.568394784374943, 14.690158822673334, 15.990949424635108]]],
+    ["2020", [["1-1", "2-5", "6-10", "11-100", "101-1000", "1001-30000000"], [16.98821894111693, 15.936494557783181, 14.79960386342691, 14.435682562274105, 14.58651834886038, 15.869365567783806]]],
+    ["2021", [["1-1", "2-5", "6-10", "11-100", "101-1000", "1001-30000000"], [17.125795647512877, 15.780419457145868, 14.631430892394002, 14.276477514399625, 14.337146941773641, 15.872474774329305]]],
+    ["2022", [["1-1", "2-5", "6-10", "11-100", "101-1000", "1001-30000000"], [16.573462144306383, 15.283018703313582, 14.378277745163881, 14.0611924390084, 13.9886330091318, 15.769421394877273]]],
+    ["2023", [["1-1", "2-5", "6-10", "11-100", "101-1000", "1001-30000000"], [15.4293630385597, 14.608379914730168, 14.118271697056592, 13.880215644749589, 13.767106666731275, 15.05749135510839]]]
+]
+# Extract years and ranges (buckets)
+years = [year_data[0] for year_data in DATA]
+ranges = DATA[0][1][0]
+all_values = [year_data[1][1] for year_data in DATA]
+# Create the figure
+fig = go.Figure()
+# Add a trace for each range (bucket)
+for i, range_label in enumerate(ranges):
+    values = [year_values[i] for year_values in all_values]
+    fig.add_trace(go.Scatter(x=years, y=values, mode='lines+markers', name=range_label))
+# Update layout
+fig.update_layout(
+    title="Perplexity over Time by Buckets",
+    xaxis_title="Year",
+    yaxis_title="Perplexity",
+    legend_title="Buckets",
+    hovermode="x unified"
+)
+# Show the plot
+Perplexity_Across_Different_years_graph = fig
 intro_div = Div(
             H3("Perplexity vs Buckets"),
             P("For each bucket, we aggregated all the chunks that belong to a single year and calculated the average perplexity for each (bucket, year) data point."),
             Img(src="images/prep-diff-buckets-global.png", height = "300", width = "600" ),
+            plotly2fasthtml(Perplexity_Across_Different_Buckets_global_graph),
         ),
         Section(
             H3("Perplexity vs Years"),
             P("Taking the same data, we can convert it into a graph indicating the yearly trend. For most buckets, the average perplexity of dumps from more recent years seem to be lower than that of former years."),
             Img(src="images/prep-across-diff-year-global-dup-buckets.png", height = "300", width = "600" ),
+            plotly2fasthtml(Perplexity_Across_Different_years_graph),
         ),
     Section(
             H3("Perplexity vs Document Duplication"),
                     perp1_div,
                     llama_div,
                     P("test plotly"),
                     id="inner-text"
                 )
     )