Spaces:

open-spaced-repetition
/

fsrs-optimizer

Running

JarrettYe commited on Feb 9, 2023

Commit

b430afc

1 Parent(s): 81fd061

Update to 3.13.0 (#7)

- Update to 3.13.0 (233f10f2062869ed8d8890347e8f8b4fd4492036)
- fix/wrong key (9e932a5823023b334f98ec42e610165ac2b4ad69)

Files changed (5) hide show

app.py +3 -3
fsrs4anki_optimizer.ipynb +0 -0
model.py +31 -14
plot.py +33 -26
utilities.py +2 -1

app.py CHANGED Viewed

@@ -28,7 +28,7 @@ def anki_optimizer(file, timezone, next_day_starts_at, revlog_start_date, reques
     proj_dir = extract(file, prefix)
-    type_sequence, df_out = create_time_series_features(revlog_start_date, timezone, next_day_starts_at, proj_dir)
     w, dataset = train_model(proj_dir)
     w_markdown = get_w_markdown(w)
     cleanup(proj_dir, files)
@@ -38,7 +38,7 @@ def anki_optimizer(file, timezone, next_day_starts_at, revlog_start_date, reques
     my_collection, rating_markdown = process_personalized_collection(requestRetention, w)
     difficulty_distribution_padding, difficulty_distribution = get_my_memory_states(proj_dir, dataset, my_collection)
-    fig, suggested_retention_markdown = make_plot(proj_dir, type_sequence, w, difficulty_distribution_padding)
     loss_markdown = my_loss(dataset, w)
     difficulty_distribution = difficulty_distribution.to_string().replace("\n", "\n\n")
     markdown_out = f"""
@@ -58,7 +58,7 @@ def anki_optimizer(file, timezone, next_day_starts_at, revlog_start_date, reques
 description = """
-# FSRS4Anki Optimizer App - v3.10.1
 Based on the [tutorial](https://medium.com/@JarrettYe/how-to-use-the-next-generation-spaced-repetition-algorithm-fsrs-on-anki-5a591ca562e2)
 of [Jarrett Ye](https://github.com/L-M-Sherlock). This application can give you personalized anki parameters without having to code.

     proj_dir = extract(file, prefix)
+    type_sequence, time_sequence, df_out = create_time_series_features(revlog_start_date, timezone, next_day_starts_at, proj_dir)
     w, dataset = train_model(proj_dir)
     w_markdown = get_w_markdown(w)
     cleanup(proj_dir, files)
     my_collection, rating_markdown = process_personalized_collection(requestRetention, w)
     difficulty_distribution_padding, difficulty_distribution = get_my_memory_states(proj_dir, dataset, my_collection)
+    fig, suggested_retention_markdown = make_plot(proj_dir, type_sequence, time_sequence, w, difficulty_distribution_padding)
     loss_markdown = my_loss(dataset, w)
     difficulty_distribution = difficulty_distribution.to_string().replace("\n", "\n\n")
     markdown_out = f"""
 description = """
+# FSRS4Anki Optimizer App - v3.13.0
 Based on the [tutorial](https://medium.com/@JarrettYe/how-to-use-the-next-generation-spaced-repetition-algorithm-fsrs-on-anki-5a591ca562e2)
 of [Jarrett Ye](https://github.com/L-M-Sherlock). This application can give you personalized anki parameters without having to code.

fsrs4anki_optimizer.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

model.py CHANGED Viewed

@@ -2,7 +2,24 @@ import numpy as np
 import torch
 from torch import nn
-init_w = [1, 1, 5, -0.5, -0.5, 0.2, 1.4, -0.02, 0.8, 2, -0.2, 0.5, 1]
 class FSRS(nn.Module):
@@ -54,19 +71,19 @@ class WeightClipper(object):
     def __call__(self, module):
         if hasattr(module, 'w'):
             w = module.w.data
-            w[0] = w[0].clamp(0.1, 10)  # initStability
-            w[1] = w[1].clamp(0.1, 5)  # initStabilityRatingFactor
-            w[2] = w[2].clamp(1, 10)  # initDifficulty
-            w[3] = w[3].clamp(-5, -0.1)  # initDifficultyRatingFactor
-            w[4] = w[4].clamp(-5, -0.1)  # updateDifficultyRatingFactor
-            w[5] = w[5].clamp(0, 0.5)  # difficultyMeanReversionFactor
-            w[6] = w[6].clamp(0, 2)  # recallFactor
-            w[7] = w[7].clamp(-0.2, -0.01)  # recallStabilityDecay
-            w[8] = w[8].clamp(0.01, 1.5)  # recallRetrievabilityFactor
-            w[9] = w[9].clamp(0.5, 5)  # forgetFactor
-            w[10] = w[10].clamp(-2, -0.01)  # forgetDifficultyDecay
-            w[11] = w[11].clamp(0.01, 0.9)  # forgetStabilityDecay
-            w[12] = w[12].clamp(0.01, 2)  # forgetRetrievabilityFactor
             module.w.data = w

 import torch
 from torch import nn
+init_w = [1, 1, 5, -0.5, -0.5, 0.2, 1.4, -0.12, 0.8, 2, -0.2, 0.2, 1]
+'''
+w[0]: initial_stability_for_again_answer
+w[1]: initial_stability_step_per_rating
+w[2]: initial_difficulty_for_good_answer
+w[3]: initial_difficulty_step_per_rating
+w[4]: next_difficulty_step_per_rating
+w[5]: next_difficulty_reversion_to_mean_speed (used to avoid ease hell)
+w[6]: next_stability_factor_after_success
+w[7]: next_stability_stabilization_decay_after_success
+w[8]: next_stability_retrievability_gain_after_success
+w[9]: next_stability_factor_after_failure
+w[10]: next_stability_difficulty_decay_after_success
+w[11]: next_stability_stability_gain_after_failure
+w[12]: next_stability_retrievability_gain_after_failure
+For more details about the parameters, please see:
+https://github.com/open-spaced-repetition/fsrs4anki/wiki/Free-Spaced-Repetition-Scheduler
+'''
 class FSRS(nn.Module):
     def __call__(self, module):
         if hasattr(module, 'w'):
             w = module.w.data
+            w[0] = w[0].clamp(0.1, 10)
+            w[1] = w[1].clamp(0.1, 5)
+            w[2] = w[2].clamp(1, 10)
+            w[3] = w[3].clamp(-5, -0.1)
+            w[4] = w[4].clamp(-5, -0.1)
+            w[5] = w[5].clamp(0, 0.5)
+            w[6] = w[6].clamp(0, 2)
+            w[7] = w[7].clamp(-0.2, -0.01)
+            w[8] = w[8].clamp(0.01, 1.5)
+            w[9] = w[9].clamp(0.5, 5)
+            w[10] = w[10].clamp(-2, -0.01)
+            w[11] = w[11].clamp(0.01, 0.9)
+            w[12] = w[12].clamp(0.01, 2)
             module.w.data = w

plot.py CHANGED Viewed

@@ -5,28 +5,34 @@ import numpy as np
 import plotly.express as px
-def make_plot(proj_dir, type_sequence, w, difficulty_distribution_padding, progress=gr.Progress(track_tqdm=True)):
     base = 1.01
-    index_len = 800
-    index_offset = 150
     d_range = 10
     d_offset = 1
-    r_repetitions = 1
-    f_repetitions = 2.3
-    max_repetitions = 200000
     type_block = dict()
     type_count = dict()
     last_t = type_sequence[0]
     type_block[last_t] = 1
     type_count[last_t] = 1
-    for t in type_sequence[1:]:
         type_count[t] = type_count.setdefault(t, 0) + 1
         if t != last_t:
             type_block[t] = type_block.setdefault(t, 0) + 1
         last_t = t
     if 2 in type_count and 2 in type_block:
-        f_repetitions = round(type_count[2] / type_block[2] + 1, 1)
     def stability2index(stability):
         return int(round(np.log(stability) / np.log(base)) + index_offset)
@@ -42,18 +48,18 @@ def make_plot(proj_dir, type_sequence, w, difficulty_distribution_padding, progr
     stability_list = np.array([np.power(base, i - index_offset) for i in range(index_len)])
     # print(f"terminal stability: {stability_list.max(): .2f}")
-    df = pd.DataFrame(columns=["retention", "difficulty", "repetitions"])
-    for percentage in trange(96, 70, -2, desc='Repetition vs Retention plot'):
         recall = percentage / 100
-        repetitions_list = np.zeros((d_range, index_len))
-        repetitions_list[:, :-1] = max_repetitions
         for d in range(d_range, 0, -1):
             s0 = init_stability(d)
             s0_index = stability2index(s0)
-            diff = max_repetitions
             while diff > 0.1:
-                s0_repetitions = repetitions_list[d - 1][s0_index]
                 for s_index in range(index_len - 2, -1, -1):
                     stability = stability_list[s_index];
                     interval = max(1, round(stability * np.log(recall) / np.log(0.9)))
@@ -63,29 +69,30 @@ def make_plot(proj_dir, type_sequence, w, difficulty_distribution_padding, progr
                     forget_s = cal_next_recall_stability(stability, p_recall, forget_d, 0)
                     recall_s_index = min(stability2index(recall_s), index_len - 1)
                     forget_s_index = min(max(stability2index(forget_s), 0), index_len - 1)
-                    recall_repetitions = repetitions_list[d - 1][recall_s_index] + r_repetitions
-                    forget_repetitions = repetitions_list[forget_d - 1][forget_s_index] + f_repetitions
-                    exp_repetitions = p_recall * recall_repetitions + (1.0 - p_recall) * forget_repetitions
-                    if exp_repetitions < repetitions_list[d - 1][s_index]:
-                        repetitions_list[d - 1][s_index] = exp_repetitions
-                diff = s0_repetitions - repetitions_list[d - 1][s0_index]
-            df.loc[0 if pd.isnull(df.index.max()) else df.index.max() + 1] = [recall, d, s0_repetitions]
     df.sort_values(by=["difficulty", "retention"], inplace=True)
-    df.to_csv(proj_dir/"expected_repetitions.csv", index=False)
     # print("expected_repetitions.csv saved.")
     optimal_retention_list = np.zeros(10)
     df2 = pd.DataFrame()
     for d in range(1, d_range + 1):
         retention = df[df["difficulty"] == d]["retention"]
-        repetitions = df[df["difficulty"] == d]["repetitions"]
-        optimal_retention = retention.iat[repetitions.argmin()]
         optimal_retention_list[d - 1] = optimal_retention
         df2 = df2.append(
-            pd.DataFrame({'retention': retention, 'expected repetitions': repetitions, 'd': d, 'r': optimal_retention}))
-    fig = px.line(df2, x="retention", y="expected repetitions", color='d', log_y=True)
     # print(f"\n-----suggested retention: {np.inner(difficulty_distribution_padding, optimal_retention_list):.2f}-----")
     suggested_retention_markdown = f"""# Suggested Retention: `{np.inner(difficulty_distribution_padding, optimal_retention_list):.2f}`"""

 import plotly.express as px
+def make_plot(proj_dir, type_sequence, time_sequence, w, difficulty_distribution_padding, progress=gr.Progress(track_tqdm=True)):
     base = 1.01
+    index_len = 793
+    index_offset = 200
     d_range = 10
     d_offset = 1
+    r_time = 8
+    f_time = 25
+    max_time = 200000
     type_block = dict()
     type_count = dict()
+    type_time = dict()
     last_t = type_sequence[0]
     type_block[last_t] = 1
     type_count[last_t] = 1
+    type_time[last_t] = time_sequence[0]
+    for i,t in enumerate(type_sequence[1:]):
         type_count[t] = type_count.setdefault(t, 0) + 1
+        type_time[t] = type_time.setdefault(t, 0) + time_sequence[i]
         if t != last_t:
             type_block[t] = type_block.setdefault(t, 0) + 1
         last_t = t
+    r_time = round(type_time[1]/type_count[1]/1000, 1)
     if 2 in type_count and 2 in type_block:
+        f_time = round(type_time[2]/type_block[2]/1000 + r_time, 1)
     def stability2index(stability):
         return int(round(np.log(stability) / np.log(base)) + index_offset)
     stability_list = np.array([np.power(base, i - index_offset) for i in range(index_len)])
     # print(f"terminal stability: {stability_list.max(): .2f}")
+    df = pd.DataFrame(columns=["retention", "difficulty", "time"])
+    for percentage in trange(96, 66, -2, desc='Time vs Retention plot'):
         recall = percentage / 100
+        time_list = np.zeros((d_range, index_len))
+        time_list[:,:-1] = max_time
         for d in range(d_range, 0, -1):
             s0 = init_stability(d)
             s0_index = stability2index(s0)
+            diff = max_time
             while diff > 0.1:
+                s0_time = time_list[d - 1][s0_index]
                 for s_index in range(index_len - 2, -1, -1):
                     stability = stability_list[s_index];
                     interval = max(1, round(stability * np.log(recall) / np.log(0.9)))
                     forget_s = cal_next_recall_stability(stability, p_recall, forget_d, 0)
                     recall_s_index = min(stability2index(recall_s), index_len - 1)
                     forget_s_index = min(max(stability2index(forget_s), 0), index_len - 1)
+                    recall_time = time_list[d - 1][recall_s_index] + r_time
+                    forget_time = time_list[forget_d - 1][forget_s_index] + f_time
+                    exp_time = p_recall * recall_time + (1.0 - p_recall) * forget_time
+                    if exp_time < time_list[d - 1][s_index]:
+                        time_list[d - 1][s_index] = exp_time
+                diff = s0_time - time_list[d - 1][s0_index]
+            df.loc[0 if pd.isnull(df.index.max()) else df.index.max() + 1] = [recall, d, s0_time]
     df.sort_values(by=["difficulty", "retention"], inplace=True)
+    df.to_csv(proj_dir/"expected_time.csv", index=False)
     # print("expected_repetitions.csv saved.")
     optimal_retention_list = np.zeros(10)
     df2 = pd.DataFrame()
     for d in range(1, d_range + 1):
         retention = df[df["difficulty"] == d]["retention"]
+        time = df[df["difficulty"] == d]["time"]
+        optimal_retention = retention.iat[time.argmin()]
         optimal_retention_list[d - 1] = optimal_retention
         df2 = df2.append(
+            pd.DataFrame({'retention': retention, 'expected time': time, 'd': d, 'r': optimal_retention}))
+    fig = px.line(df2, x="retention", y="expected time", color='d', log_y=True)
     # print(f"\n-----suggested retention: {np.inner(difficulty_distribution_padding, optimal_retention_list):.2f}-----")
     suggested_retention_markdown = f"""# Suggested Retention: `{np.inner(difficulty_distribution_padding, optimal_retention_list):.2f}`"""

utilities.py CHANGED Viewed

@@ -62,6 +62,7 @@ def create_time_series_features(revlog_start_date, timezone, next_day_starts_at,
     df.drop(df[df['review_date'].dt.year < 2006].index, inplace=True)
     df.sort_values(by=['cid', 'id'], inplace=True, ignore_index=True)
     type_sequence = np.array(df['type'])
     df.to_csv(proj_dir / "revlog.csv", index=False)
     # print("revlog.csv saved.")
     df = df[(df['type'] == 0) | (df['type'] == 1)].copy()
@@ -152,7 +153,7 @@ def create_time_series_features(revlog_start_date, timezone, next_day_starts_at,
         df_out = df[df['r_history'].str.contains(r'^[1-4][^124]*$', regex=True)][
             ['r_history', 'avg_interval', 'avg_retention', 'stability', 'factor', 'group_cnt']]
-    return type_sequence, df_out
 def train_model(proj_dir, progress=gr.Progress(track_tqdm=True)):

     df.drop(df[df['review_date'].dt.year < 2006].index, inplace=True)
     df.sort_values(by=['cid', 'id'], inplace=True, ignore_index=True)
     type_sequence = np.array(df['type'])
+    time_sequence = np.array(df['time'])
     df.to_csv(proj_dir / "revlog.csv", index=False)
     # print("revlog.csv saved.")
     df = df[(df['type'] == 0) | (df['type'] == 1)].copy()
         df_out = df[df['r_history'].str.contains(r'^[1-4][^124]*$', regex=True)][
             ['r_history', 'avg_interval', 'avg_retention', 'stability', 'factor', 'group_cnt']]
+    return type_sequence, time_sequence, df_out
 def train_model(proj_dir, progress=gr.Progress(track_tqdm=True)):