JarrettYe commited on
Commit
b430afc
1 Parent(s): 81fd061
Files changed (5) hide show
  1. app.py +3 -3
  2. fsrs4anki_optimizer.ipynb +0 -0
  3. model.py +31 -14
  4. plot.py +33 -26
  5. utilities.py +2 -1
app.py CHANGED
@@ -28,7 +28,7 @@ def anki_optimizer(file, timezone, next_day_starts_at, revlog_start_date, reques
28
 
29
  proj_dir = extract(file, prefix)
30
 
31
- type_sequence, df_out = create_time_series_features(revlog_start_date, timezone, next_day_starts_at, proj_dir)
32
  w, dataset = train_model(proj_dir)
33
  w_markdown = get_w_markdown(w)
34
  cleanup(proj_dir, files)
@@ -38,7 +38,7 @@ def anki_optimizer(file, timezone, next_day_starts_at, revlog_start_date, reques
38
 
39
  my_collection, rating_markdown = process_personalized_collection(requestRetention, w)
40
  difficulty_distribution_padding, difficulty_distribution = get_my_memory_states(proj_dir, dataset, my_collection)
41
- fig, suggested_retention_markdown = make_plot(proj_dir, type_sequence, w, difficulty_distribution_padding)
42
  loss_markdown = my_loss(dataset, w)
43
  difficulty_distribution = difficulty_distribution.to_string().replace("\n", "\n\n")
44
  markdown_out = f"""
@@ -58,7 +58,7 @@ def anki_optimizer(file, timezone, next_day_starts_at, revlog_start_date, reques
58
 
59
 
60
  description = """
61
- # FSRS4Anki Optimizer App - v3.10.1
62
  Based on the [tutorial](https://medium.com/@JarrettYe/how-to-use-the-next-generation-spaced-repetition-algorithm-fsrs-on-anki-5a591ca562e2)
63
  of [Jarrett Ye](https://github.com/L-M-Sherlock). This application can give you personalized anki parameters without having to code.
64
 
 
28
 
29
  proj_dir = extract(file, prefix)
30
 
31
+ type_sequence, time_sequence, df_out = create_time_series_features(revlog_start_date, timezone, next_day_starts_at, proj_dir)
32
  w, dataset = train_model(proj_dir)
33
  w_markdown = get_w_markdown(w)
34
  cleanup(proj_dir, files)
 
38
 
39
  my_collection, rating_markdown = process_personalized_collection(requestRetention, w)
40
  difficulty_distribution_padding, difficulty_distribution = get_my_memory_states(proj_dir, dataset, my_collection)
41
+ fig, suggested_retention_markdown = make_plot(proj_dir, type_sequence, time_sequence, w, difficulty_distribution_padding)
42
  loss_markdown = my_loss(dataset, w)
43
  difficulty_distribution = difficulty_distribution.to_string().replace("\n", "\n\n")
44
  markdown_out = f"""
 
58
 
59
 
60
  description = """
61
+ # FSRS4Anki Optimizer App - v3.13.0
62
  Based on the [tutorial](https://medium.com/@JarrettYe/how-to-use-the-next-generation-spaced-repetition-algorithm-fsrs-on-anki-5a591ca562e2)
63
  of [Jarrett Ye](https://github.com/L-M-Sherlock). This application can give you personalized anki parameters without having to code.
64
 
fsrs4anki_optimizer.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
model.py CHANGED
@@ -2,7 +2,24 @@ import numpy as np
2
  import torch
3
  from torch import nn
4
 
5
- init_w = [1, 1, 5, -0.5, -0.5, 0.2, 1.4, -0.02, 0.8, 2, -0.2, 0.5, 1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
 
8
  class FSRS(nn.Module):
@@ -54,19 +71,19 @@ class WeightClipper(object):
54
  def __call__(self, module):
55
  if hasattr(module, 'w'):
56
  w = module.w.data
57
- w[0] = w[0].clamp(0.1, 10) # initStability
58
- w[1] = w[1].clamp(0.1, 5) # initStabilityRatingFactor
59
- w[2] = w[2].clamp(1, 10) # initDifficulty
60
- w[3] = w[3].clamp(-5, -0.1) # initDifficultyRatingFactor
61
- w[4] = w[4].clamp(-5, -0.1) # updateDifficultyRatingFactor
62
- w[5] = w[5].clamp(0, 0.5) # difficultyMeanReversionFactor
63
- w[6] = w[6].clamp(0, 2) # recallFactor
64
- w[7] = w[7].clamp(-0.2, -0.01) # recallStabilityDecay
65
- w[8] = w[8].clamp(0.01, 1.5) # recallRetrievabilityFactor
66
- w[9] = w[9].clamp(0.5, 5) # forgetFactor
67
- w[10] = w[10].clamp(-2, -0.01) # forgetDifficultyDecay
68
- w[11] = w[11].clamp(0.01, 0.9) # forgetStabilityDecay
69
- w[12] = w[12].clamp(0.01, 2) # forgetRetrievabilityFactor
70
  module.w.data = w
71
 
72
 
 
2
  import torch
3
  from torch import nn
4
 
5
+ init_w = [1, 1, 5, -0.5, -0.5, 0.2, 1.4, -0.12, 0.8, 2, -0.2, 0.2, 1]
6
+ '''
7
+ w[0]: initial_stability_for_again_answer
8
+ w[1]: initial_stability_step_per_rating
9
+ w[2]: initial_difficulty_for_good_answer
10
+ w[3]: initial_difficulty_step_per_rating
11
+ w[4]: next_difficulty_step_per_rating
12
+ w[5]: next_difficulty_reversion_to_mean_speed (used to avoid ease hell)
13
+ w[6]: next_stability_factor_after_success
14
+ w[7]: next_stability_stabilization_decay_after_success
15
+ w[8]: next_stability_retrievability_gain_after_success
16
+ w[9]: next_stability_factor_after_failure
17
+ w[10]: next_stability_difficulty_decay_after_success
18
+ w[11]: next_stability_stability_gain_after_failure
19
+ w[12]: next_stability_retrievability_gain_after_failure
20
+ For more details about the parameters, please see:
21
+ https://github.com/open-spaced-repetition/fsrs4anki/wiki/Free-Spaced-Repetition-Scheduler
22
+ '''
23
 
24
 
25
  class FSRS(nn.Module):
 
71
  def __call__(self, module):
72
  if hasattr(module, 'w'):
73
  w = module.w.data
74
+ w[0] = w[0].clamp(0.1, 10)
75
+ w[1] = w[1].clamp(0.1, 5)
76
+ w[2] = w[2].clamp(1, 10)
77
+ w[3] = w[3].clamp(-5, -0.1)
78
+ w[4] = w[4].clamp(-5, -0.1)
79
+ w[5] = w[5].clamp(0, 0.5)
80
+ w[6] = w[6].clamp(0, 2)
81
+ w[7] = w[7].clamp(-0.2, -0.01)
82
+ w[8] = w[8].clamp(0.01, 1.5)
83
+ w[9] = w[9].clamp(0.5, 5)
84
+ w[10] = w[10].clamp(-2, -0.01)
85
+ w[11] = w[11].clamp(0.01, 0.9)
86
+ w[12] = w[12].clamp(0.01, 2)
87
  module.w.data = w
88
 
89
 
plot.py CHANGED
@@ -5,28 +5,34 @@ import numpy as np
5
  import plotly.express as px
6
 
7
 
8
- def make_plot(proj_dir, type_sequence, w, difficulty_distribution_padding, progress=gr.Progress(track_tqdm=True)):
9
  base = 1.01
10
- index_len = 800
11
- index_offset = 150
12
  d_range = 10
13
  d_offset = 1
14
- r_repetitions = 1
15
- f_repetitions = 2.3
16
- max_repetitions = 200000
17
 
18
  type_block = dict()
19
  type_count = dict()
 
20
  last_t = type_sequence[0]
21
  type_block[last_t] = 1
22
  type_count[last_t] = 1
23
- for t in type_sequence[1:]:
 
24
  type_count[t] = type_count.setdefault(t, 0) + 1
 
25
  if t != last_t:
26
  type_block[t] = type_block.setdefault(t, 0) + 1
27
  last_t = t
 
 
 
28
  if 2 in type_count and 2 in type_block:
29
- f_repetitions = round(type_count[2] / type_block[2] + 1, 1)
30
 
31
  def stability2index(stability):
32
  return int(round(np.log(stability) / np.log(base)) + index_offset)
@@ -42,18 +48,18 @@ def make_plot(proj_dir, type_sequence, w, difficulty_distribution_padding, progr
42
 
43
  stability_list = np.array([np.power(base, i - index_offset) for i in range(index_len)])
44
  # print(f"terminal stability: {stability_list.max(): .2f}")
45
- df = pd.DataFrame(columns=["retention", "difficulty", "repetitions"])
46
 
47
- for percentage in trange(96, 70, -2, desc='Repetition vs Retention plot'):
48
  recall = percentage / 100
49
- repetitions_list = np.zeros((d_range, index_len))
50
- repetitions_list[:, :-1] = max_repetitions
51
  for d in range(d_range, 0, -1):
52
  s0 = init_stability(d)
53
  s0_index = stability2index(s0)
54
- diff = max_repetitions
55
  while diff > 0.1:
56
- s0_repetitions = repetitions_list[d - 1][s0_index]
57
  for s_index in range(index_len - 2, -1, -1):
58
  stability = stability_list[s_index];
59
  interval = max(1, round(stability * np.log(recall) / np.log(0.9)))
@@ -63,29 +69,30 @@ def make_plot(proj_dir, type_sequence, w, difficulty_distribution_padding, progr
63
  forget_s = cal_next_recall_stability(stability, p_recall, forget_d, 0)
64
  recall_s_index = min(stability2index(recall_s), index_len - 1)
65
  forget_s_index = min(max(stability2index(forget_s), 0), index_len - 1)
66
- recall_repetitions = repetitions_list[d - 1][recall_s_index] + r_repetitions
67
- forget_repetitions = repetitions_list[forget_d - 1][forget_s_index] + f_repetitions
68
- exp_repetitions = p_recall * recall_repetitions + (1.0 - p_recall) * forget_repetitions
69
- if exp_repetitions < repetitions_list[d - 1][s_index]:
70
- repetitions_list[d - 1][s_index] = exp_repetitions
71
- diff = s0_repetitions - repetitions_list[d - 1][s0_index]
72
- df.loc[0 if pd.isnull(df.index.max()) else df.index.max() + 1] = [recall, d, s0_repetitions]
 
73
 
74
  df.sort_values(by=["difficulty", "retention"], inplace=True)
75
- df.to_csv(proj_dir/"expected_repetitions.csv", index=False)
76
  # print("expected_repetitions.csv saved.")
77
 
78
  optimal_retention_list = np.zeros(10)
79
  df2 = pd.DataFrame()
80
  for d in range(1, d_range + 1):
81
  retention = df[df["difficulty"] == d]["retention"]
82
- repetitions = df[df["difficulty"] == d]["repetitions"]
83
- optimal_retention = retention.iat[repetitions.argmin()]
84
  optimal_retention_list[d - 1] = optimal_retention
85
  df2 = df2.append(
86
- pd.DataFrame({'retention': retention, 'expected repetitions': repetitions, 'd': d, 'r': optimal_retention}))
87
 
88
- fig = px.line(df2, x="retention", y="expected repetitions", color='d', log_y=True)
89
 
90
  # print(f"\n-----suggested retention: {np.inner(difficulty_distribution_padding, optimal_retention_list):.2f}-----")
91
  suggested_retention_markdown = f"""# Suggested Retention: `{np.inner(difficulty_distribution_padding, optimal_retention_list):.2f}`"""
 
5
  import plotly.express as px
6
 
7
 
8
+ def make_plot(proj_dir, type_sequence, time_sequence, w, difficulty_distribution_padding, progress=gr.Progress(track_tqdm=True)):
9
  base = 1.01
10
+ index_len = 793
11
+ index_offset = 200
12
  d_range = 10
13
  d_offset = 1
14
+ r_time = 8
15
+ f_time = 25
16
+ max_time = 200000
17
 
18
  type_block = dict()
19
  type_count = dict()
20
+ type_time = dict()
21
  last_t = type_sequence[0]
22
  type_block[last_t] = 1
23
  type_count[last_t] = 1
24
+ type_time[last_t] = time_sequence[0]
25
+ for i,t in enumerate(type_sequence[1:]):
26
  type_count[t] = type_count.setdefault(t, 0) + 1
27
+ type_time[t] = type_time.setdefault(t, 0) + time_sequence[i]
28
  if t != last_t:
29
  type_block[t] = type_block.setdefault(t, 0) + 1
30
  last_t = t
31
+
32
+ r_time = round(type_time[1]/type_count[1]/1000, 1)
33
+
34
  if 2 in type_count and 2 in type_block:
35
+ f_time = round(type_time[2]/type_block[2]/1000 + r_time, 1)
36
 
37
  def stability2index(stability):
38
  return int(round(np.log(stability) / np.log(base)) + index_offset)
 
48
 
49
  stability_list = np.array([np.power(base, i - index_offset) for i in range(index_len)])
50
  # print(f"terminal stability: {stability_list.max(): .2f}")
51
+ df = pd.DataFrame(columns=["retention", "difficulty", "time"])
52
 
53
+ for percentage in trange(96, 66, -2, desc='Time vs Retention plot'):
54
  recall = percentage / 100
55
+ time_list = np.zeros((d_range, index_len))
56
+ time_list[:,:-1] = max_time
57
  for d in range(d_range, 0, -1):
58
  s0 = init_stability(d)
59
  s0_index = stability2index(s0)
60
+ diff = max_time
61
  while diff > 0.1:
62
+ s0_time = time_list[d - 1][s0_index]
63
  for s_index in range(index_len - 2, -1, -1):
64
  stability = stability_list[s_index];
65
  interval = max(1, round(stability * np.log(recall) / np.log(0.9)))
 
69
  forget_s = cal_next_recall_stability(stability, p_recall, forget_d, 0)
70
  recall_s_index = min(stability2index(recall_s), index_len - 1)
71
  forget_s_index = min(max(stability2index(forget_s), 0), index_len - 1)
72
+ recall_time = time_list[d - 1][recall_s_index] + r_time
73
+ forget_time = time_list[forget_d - 1][forget_s_index] + f_time
74
+ exp_time = p_recall * recall_time + (1.0 - p_recall) * forget_time
75
+ if exp_time < time_list[d - 1][s_index]:
76
+ time_list[d - 1][s_index] = exp_time
77
+ diff = s0_time - time_list[d - 1][s0_index]
78
+ df.loc[0 if pd.isnull(df.index.max()) else df.index.max() + 1] = [recall, d, s0_time]
79
+
80
 
81
  df.sort_values(by=["difficulty", "retention"], inplace=True)
82
+ df.to_csv(proj_dir/"expected_time.csv", index=False)
83
  # print("expected_repetitions.csv saved.")
84
 
85
  optimal_retention_list = np.zeros(10)
86
  df2 = pd.DataFrame()
87
  for d in range(1, d_range + 1):
88
  retention = df[df["difficulty"] == d]["retention"]
89
+ time = df[df["difficulty"] == d]["time"]
90
+ optimal_retention = retention.iat[time.argmin()]
91
  optimal_retention_list[d - 1] = optimal_retention
92
  df2 = df2.append(
93
+ pd.DataFrame({'retention': retention, 'expected time': time, 'd': d, 'r': optimal_retention}))
94
 
95
+ fig = px.line(df2, x="retention", y="expected time", color='d', log_y=True)
96
 
97
  # print(f"\n-----suggested retention: {np.inner(difficulty_distribution_padding, optimal_retention_list):.2f}-----")
98
  suggested_retention_markdown = f"""# Suggested Retention: `{np.inner(difficulty_distribution_padding, optimal_retention_list):.2f}`"""
utilities.py CHANGED
@@ -62,6 +62,7 @@ def create_time_series_features(revlog_start_date, timezone, next_day_starts_at,
62
  df.drop(df[df['review_date'].dt.year < 2006].index, inplace=True)
63
  df.sort_values(by=['cid', 'id'], inplace=True, ignore_index=True)
64
  type_sequence = np.array(df['type'])
 
65
  df.to_csv(proj_dir / "revlog.csv", index=False)
66
  # print("revlog.csv saved.")
67
  df = df[(df['type'] == 0) | (df['type'] == 1)].copy()
@@ -152,7 +153,7 @@ def create_time_series_features(revlog_start_date, timezone, next_day_starts_at,
152
 
153
  df_out = df[df['r_history'].str.contains(r'^[1-4][^124]*$', regex=True)][
154
  ['r_history', 'avg_interval', 'avg_retention', 'stability', 'factor', 'group_cnt']]
155
- return type_sequence, df_out
156
 
157
 
158
  def train_model(proj_dir, progress=gr.Progress(track_tqdm=True)):
 
62
  df.drop(df[df['review_date'].dt.year < 2006].index, inplace=True)
63
  df.sort_values(by=['cid', 'id'], inplace=True, ignore_index=True)
64
  type_sequence = np.array(df['type'])
65
+ time_sequence = np.array(df['time'])
66
  df.to_csv(proj_dir / "revlog.csv", index=False)
67
  # print("revlog.csv saved.")
68
  df = df[(df['type'] == 0) | (df['type'] == 1)].copy()
 
153
 
154
  df_out = df[df['r_history'].str.contains(r'^[1-4][^124]*$', regex=True)][
155
  ['r_history', 'avg_interval', 'avg_retention', 'stability', 'factor', 'group_cnt']]
156
+ return type_sequence, time_sequence, df_out
157
 
158
 
159
  def train_model(proj_dir, progress=gr.Progress(track_tqdm=True)):