Update to 3.13.0 (#7)
Browse files- Update to 3.13.0 (233f10f2062869ed8d8890347e8f8b4fd4492036)
- fix/wrong key (9e932a5823023b334f98ec42e610165ac2b4ad69)
- app.py +3 -3
- fsrs4anki_optimizer.ipynb +0 -0
- model.py +31 -14
- plot.py +33 -26
- utilities.py +2 -1
app.py
CHANGED
@@ -28,7 +28,7 @@ def anki_optimizer(file, timezone, next_day_starts_at, revlog_start_date, reques
|
|
28 |
|
29 |
proj_dir = extract(file, prefix)
|
30 |
|
31 |
-
type_sequence, df_out = create_time_series_features(revlog_start_date, timezone, next_day_starts_at, proj_dir)
|
32 |
w, dataset = train_model(proj_dir)
|
33 |
w_markdown = get_w_markdown(w)
|
34 |
cleanup(proj_dir, files)
|
@@ -38,7 +38,7 @@ def anki_optimizer(file, timezone, next_day_starts_at, revlog_start_date, reques
|
|
38 |
|
39 |
my_collection, rating_markdown = process_personalized_collection(requestRetention, w)
|
40 |
difficulty_distribution_padding, difficulty_distribution = get_my_memory_states(proj_dir, dataset, my_collection)
|
41 |
-
fig, suggested_retention_markdown = make_plot(proj_dir, type_sequence, w, difficulty_distribution_padding)
|
42 |
loss_markdown = my_loss(dataset, w)
|
43 |
difficulty_distribution = difficulty_distribution.to_string().replace("\n", "\n\n")
|
44 |
markdown_out = f"""
|
@@ -58,7 +58,7 @@ def anki_optimizer(file, timezone, next_day_starts_at, revlog_start_date, reques
|
|
58 |
|
59 |
|
60 |
description = """
|
61 |
-
# FSRS4Anki Optimizer App - v3.
|
62 |
Based on the [tutorial](https://medium.com/@JarrettYe/how-to-use-the-next-generation-spaced-repetition-algorithm-fsrs-on-anki-5a591ca562e2)
|
63 |
of [Jarrett Ye](https://github.com/L-M-Sherlock). This application can give you personalized anki parameters without having to code.
|
64 |
|
|
|
28 |
|
29 |
proj_dir = extract(file, prefix)
|
30 |
|
31 |
+
type_sequence, time_sequence, df_out = create_time_series_features(revlog_start_date, timezone, next_day_starts_at, proj_dir)
|
32 |
w, dataset = train_model(proj_dir)
|
33 |
w_markdown = get_w_markdown(w)
|
34 |
cleanup(proj_dir, files)
|
|
|
38 |
|
39 |
my_collection, rating_markdown = process_personalized_collection(requestRetention, w)
|
40 |
difficulty_distribution_padding, difficulty_distribution = get_my_memory_states(proj_dir, dataset, my_collection)
|
41 |
+
fig, suggested_retention_markdown = make_plot(proj_dir, type_sequence, time_sequence, w, difficulty_distribution_padding)
|
42 |
loss_markdown = my_loss(dataset, w)
|
43 |
difficulty_distribution = difficulty_distribution.to_string().replace("\n", "\n\n")
|
44 |
markdown_out = f"""
|
|
|
58 |
|
59 |
|
60 |
description = """
|
61 |
+
# FSRS4Anki Optimizer App - v3.13.0
|
62 |
Based on the [tutorial](https://medium.com/@JarrettYe/how-to-use-the-next-generation-spaced-repetition-algorithm-fsrs-on-anki-5a591ca562e2)
|
63 |
of [Jarrett Ye](https://github.com/L-M-Sherlock). This application can give you personalized anki parameters without having to code.
|
64 |
|
fsrs4anki_optimizer.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
model.py
CHANGED
@@ -2,7 +2,24 @@ import numpy as np
|
|
2 |
import torch
|
3 |
from torch import nn
|
4 |
|
5 |
-
init_w = [1, 1, 5, -0.5, -0.5, 0.2, 1.4, -0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
|
8 |
class FSRS(nn.Module):
|
@@ -54,19 +71,19 @@ class WeightClipper(object):
|
|
54 |
def __call__(self, module):
|
55 |
if hasattr(module, 'w'):
|
56 |
w = module.w.data
|
57 |
-
w[0] = w[0].clamp(0.1, 10)
|
58 |
-
w[1] = w[1].clamp(0.1, 5)
|
59 |
-
w[2] = w[2].clamp(1, 10)
|
60 |
-
w[3] = w[3].clamp(-5, -0.1)
|
61 |
-
w[4] = w[4].clamp(-5, -0.1)
|
62 |
-
w[5] = w[5].clamp(0, 0.5)
|
63 |
-
w[6] = w[6].clamp(0, 2)
|
64 |
-
w[7] = w[7].clamp(-0.2, -0.01)
|
65 |
-
w[8] = w[8].clamp(0.01, 1.5)
|
66 |
-
w[9] = w[9].clamp(0.5, 5)
|
67 |
-
w[10] = w[10].clamp(-2, -0.01)
|
68 |
-
w[11] = w[11].clamp(0.01, 0.9)
|
69 |
-
w[12] = w[12].clamp(0.01, 2)
|
70 |
module.w.data = w
|
71 |
|
72 |
|
|
|
2 |
import torch
|
3 |
from torch import nn
|
4 |
|
5 |
+
init_w = [1, 1, 5, -0.5, -0.5, 0.2, 1.4, -0.12, 0.8, 2, -0.2, 0.2, 1]
|
6 |
+
'''
|
7 |
+
w[0]: initial_stability_for_again_answer
|
8 |
+
w[1]: initial_stability_step_per_rating
|
9 |
+
w[2]: initial_difficulty_for_good_answer
|
10 |
+
w[3]: initial_difficulty_step_per_rating
|
11 |
+
w[4]: next_difficulty_step_per_rating
|
12 |
+
w[5]: next_difficulty_reversion_to_mean_speed (used to avoid ease hell)
|
13 |
+
w[6]: next_stability_factor_after_success
|
14 |
+
w[7]: next_stability_stabilization_decay_after_success
|
15 |
+
w[8]: next_stability_retrievability_gain_after_success
|
16 |
+
w[9]: next_stability_factor_after_failure
|
17 |
+
w[10]: next_stability_difficulty_decay_after_success
|
18 |
+
w[11]: next_stability_stability_gain_after_failure
|
19 |
+
w[12]: next_stability_retrievability_gain_after_failure
|
20 |
+
For more details about the parameters, please see:
|
21 |
+
https://github.com/open-spaced-repetition/fsrs4anki/wiki/Free-Spaced-Repetition-Scheduler
|
22 |
+
'''
|
23 |
|
24 |
|
25 |
class FSRS(nn.Module):
|
|
|
71 |
def __call__(self, module):
|
72 |
if hasattr(module, 'w'):
|
73 |
w = module.w.data
|
74 |
+
w[0] = w[0].clamp(0.1, 10)
|
75 |
+
w[1] = w[1].clamp(0.1, 5)
|
76 |
+
w[2] = w[2].clamp(1, 10)
|
77 |
+
w[3] = w[3].clamp(-5, -0.1)
|
78 |
+
w[4] = w[4].clamp(-5, -0.1)
|
79 |
+
w[5] = w[5].clamp(0, 0.5)
|
80 |
+
w[6] = w[6].clamp(0, 2)
|
81 |
+
w[7] = w[7].clamp(-0.2, -0.01)
|
82 |
+
w[8] = w[8].clamp(0.01, 1.5)
|
83 |
+
w[9] = w[9].clamp(0.5, 5)
|
84 |
+
w[10] = w[10].clamp(-2, -0.01)
|
85 |
+
w[11] = w[11].clamp(0.01, 0.9)
|
86 |
+
w[12] = w[12].clamp(0.01, 2)
|
87 |
module.w.data = w
|
88 |
|
89 |
|
plot.py
CHANGED
@@ -5,28 +5,34 @@ import numpy as np
|
|
5 |
import plotly.express as px
|
6 |
|
7 |
|
8 |
-
def make_plot(proj_dir, type_sequence, w, difficulty_distribution_padding, progress=gr.Progress(track_tqdm=True)):
|
9 |
base = 1.01
|
10 |
-
index_len =
|
11 |
-
index_offset =
|
12 |
d_range = 10
|
13 |
d_offset = 1
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
|
18 |
type_block = dict()
|
19 |
type_count = dict()
|
|
|
20 |
last_t = type_sequence[0]
|
21 |
type_block[last_t] = 1
|
22 |
type_count[last_t] = 1
|
23 |
-
|
|
|
24 |
type_count[t] = type_count.setdefault(t, 0) + 1
|
|
|
25 |
if t != last_t:
|
26 |
type_block[t] = type_block.setdefault(t, 0) + 1
|
27 |
last_t = t
|
|
|
|
|
|
|
28 |
if 2 in type_count and 2 in type_block:
|
29 |
-
|
30 |
|
31 |
def stability2index(stability):
|
32 |
return int(round(np.log(stability) / np.log(base)) + index_offset)
|
@@ -42,18 +48,18 @@ def make_plot(proj_dir, type_sequence, w, difficulty_distribution_padding, progr
|
|
42 |
|
43 |
stability_list = np.array([np.power(base, i - index_offset) for i in range(index_len)])
|
44 |
# print(f"terminal stability: {stability_list.max(): .2f}")
|
45 |
-
df = pd.DataFrame(columns=["retention", "difficulty", "
|
46 |
|
47 |
-
for percentage in trange(96,
|
48 |
recall = percentage / 100
|
49 |
-
|
50 |
-
|
51 |
for d in range(d_range, 0, -1):
|
52 |
s0 = init_stability(d)
|
53 |
s0_index = stability2index(s0)
|
54 |
-
diff =
|
55 |
while diff > 0.1:
|
56 |
-
|
57 |
for s_index in range(index_len - 2, -1, -1):
|
58 |
stability = stability_list[s_index];
|
59 |
interval = max(1, round(stability * np.log(recall) / np.log(0.9)))
|
@@ -63,29 +69,30 @@ def make_plot(proj_dir, type_sequence, w, difficulty_distribution_padding, progr
|
|
63 |
forget_s = cal_next_recall_stability(stability, p_recall, forget_d, 0)
|
64 |
recall_s_index = min(stability2index(recall_s), index_len - 1)
|
65 |
forget_s_index = min(max(stability2index(forget_s), 0), index_len - 1)
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
if
|
70 |
-
|
71 |
-
diff =
|
72 |
-
df.loc[0 if pd.isnull(df.index.max()) else df.index.max() + 1] = [recall, d,
|
|
|
73 |
|
74 |
df.sort_values(by=["difficulty", "retention"], inplace=True)
|
75 |
-
df.to_csv(proj_dir/"
|
76 |
# print("expected_repetitions.csv saved.")
|
77 |
|
78 |
optimal_retention_list = np.zeros(10)
|
79 |
df2 = pd.DataFrame()
|
80 |
for d in range(1, d_range + 1):
|
81 |
retention = df[df["difficulty"] == d]["retention"]
|
82 |
-
|
83 |
-
optimal_retention = retention.iat[
|
84 |
optimal_retention_list[d - 1] = optimal_retention
|
85 |
df2 = df2.append(
|
86 |
-
pd.DataFrame({'retention': retention, 'expected
|
87 |
|
88 |
-
fig = px.line(df2, x="retention", y="expected
|
89 |
|
90 |
# print(f"\n-----suggested retention: {np.inner(difficulty_distribution_padding, optimal_retention_list):.2f}-----")
|
91 |
suggested_retention_markdown = f"""# Suggested Retention: `{np.inner(difficulty_distribution_padding, optimal_retention_list):.2f}`"""
|
|
|
5 |
import plotly.express as px
|
6 |
|
7 |
|
8 |
+
def make_plot(proj_dir, type_sequence, time_sequence, w, difficulty_distribution_padding, progress=gr.Progress(track_tqdm=True)):
|
9 |
base = 1.01
|
10 |
+
index_len = 793
|
11 |
+
index_offset = 200
|
12 |
d_range = 10
|
13 |
d_offset = 1
|
14 |
+
r_time = 8
|
15 |
+
f_time = 25
|
16 |
+
max_time = 200000
|
17 |
|
18 |
type_block = dict()
|
19 |
type_count = dict()
|
20 |
+
type_time = dict()
|
21 |
last_t = type_sequence[0]
|
22 |
type_block[last_t] = 1
|
23 |
type_count[last_t] = 1
|
24 |
+
type_time[last_t] = time_sequence[0]
|
25 |
+
for i,t in enumerate(type_sequence[1:]):
|
26 |
type_count[t] = type_count.setdefault(t, 0) + 1
|
27 |
+
type_time[t] = type_time.setdefault(t, 0) + time_sequence[i]
|
28 |
if t != last_t:
|
29 |
type_block[t] = type_block.setdefault(t, 0) + 1
|
30 |
last_t = t
|
31 |
+
|
32 |
+
r_time = round(type_time[1]/type_count[1]/1000, 1)
|
33 |
+
|
34 |
if 2 in type_count and 2 in type_block:
|
35 |
+
f_time = round(type_time[2]/type_block[2]/1000 + r_time, 1)
|
36 |
|
37 |
def stability2index(stability):
|
38 |
return int(round(np.log(stability) / np.log(base)) + index_offset)
|
|
|
48 |
|
49 |
stability_list = np.array([np.power(base, i - index_offset) for i in range(index_len)])
|
50 |
# print(f"terminal stability: {stability_list.max(): .2f}")
|
51 |
+
df = pd.DataFrame(columns=["retention", "difficulty", "time"])
|
52 |
|
53 |
+
for percentage in trange(96, 66, -2, desc='Time vs Retention plot'):
|
54 |
recall = percentage / 100
|
55 |
+
time_list = np.zeros((d_range, index_len))
|
56 |
+
time_list[:,:-1] = max_time
|
57 |
for d in range(d_range, 0, -1):
|
58 |
s0 = init_stability(d)
|
59 |
s0_index = stability2index(s0)
|
60 |
+
diff = max_time
|
61 |
while diff > 0.1:
|
62 |
+
s0_time = time_list[d - 1][s0_index]
|
63 |
for s_index in range(index_len - 2, -1, -1):
|
64 |
stability = stability_list[s_index];
|
65 |
interval = max(1, round(stability * np.log(recall) / np.log(0.9)))
|
|
|
69 |
forget_s = cal_next_recall_stability(stability, p_recall, forget_d, 0)
|
70 |
recall_s_index = min(stability2index(recall_s), index_len - 1)
|
71 |
forget_s_index = min(max(stability2index(forget_s), 0), index_len - 1)
|
72 |
+
recall_time = time_list[d - 1][recall_s_index] + r_time
|
73 |
+
forget_time = time_list[forget_d - 1][forget_s_index] + f_time
|
74 |
+
exp_time = p_recall * recall_time + (1.0 - p_recall) * forget_time
|
75 |
+
if exp_time < time_list[d - 1][s_index]:
|
76 |
+
time_list[d - 1][s_index] = exp_time
|
77 |
+
diff = s0_time - time_list[d - 1][s0_index]
|
78 |
+
df.loc[0 if pd.isnull(df.index.max()) else df.index.max() + 1] = [recall, d, s0_time]
|
79 |
+
|
80 |
|
81 |
df.sort_values(by=["difficulty", "retention"], inplace=True)
|
82 |
+
df.to_csv(proj_dir/"expected_time.csv", index=False)
|
83 |
# print("expected_repetitions.csv saved.")
|
84 |
|
85 |
optimal_retention_list = np.zeros(10)
|
86 |
df2 = pd.DataFrame()
|
87 |
for d in range(1, d_range + 1):
|
88 |
retention = df[df["difficulty"] == d]["retention"]
|
89 |
+
time = df[df["difficulty"] == d]["time"]
|
90 |
+
optimal_retention = retention.iat[time.argmin()]
|
91 |
optimal_retention_list[d - 1] = optimal_retention
|
92 |
df2 = df2.append(
|
93 |
+
pd.DataFrame({'retention': retention, 'expected time': time, 'd': d, 'r': optimal_retention}))
|
94 |
|
95 |
+
fig = px.line(df2, x="retention", y="expected time", color='d', log_y=True)
|
96 |
|
97 |
# print(f"\n-----suggested retention: {np.inner(difficulty_distribution_padding, optimal_retention_list):.2f}-----")
|
98 |
suggested_retention_markdown = f"""# Suggested Retention: `{np.inner(difficulty_distribution_padding, optimal_retention_list):.2f}`"""
|
utilities.py
CHANGED
@@ -62,6 +62,7 @@ def create_time_series_features(revlog_start_date, timezone, next_day_starts_at,
|
|
62 |
df.drop(df[df['review_date'].dt.year < 2006].index, inplace=True)
|
63 |
df.sort_values(by=['cid', 'id'], inplace=True, ignore_index=True)
|
64 |
type_sequence = np.array(df['type'])
|
|
|
65 |
df.to_csv(proj_dir / "revlog.csv", index=False)
|
66 |
# print("revlog.csv saved.")
|
67 |
df = df[(df['type'] == 0) | (df['type'] == 1)].copy()
|
@@ -152,7 +153,7 @@ def create_time_series_features(revlog_start_date, timezone, next_day_starts_at,
|
|
152 |
|
153 |
df_out = df[df['r_history'].str.contains(r'^[1-4][^124]*$', regex=True)][
|
154 |
['r_history', 'avg_interval', 'avg_retention', 'stability', 'factor', 'group_cnt']]
|
155 |
-
return type_sequence, df_out
|
156 |
|
157 |
|
158 |
def train_model(proj_dir, progress=gr.Progress(track_tqdm=True)):
|
|
|
62 |
df.drop(df[df['review_date'].dt.year < 2006].index, inplace=True)
|
63 |
df.sort_values(by=['cid', 'id'], inplace=True, ignore_index=True)
|
64 |
type_sequence = np.array(df['type'])
|
65 |
+
time_sequence = np.array(df['time'])
|
66 |
df.to_csv(proj_dir / "revlog.csv", index=False)
|
67 |
# print("revlog.csv saved.")
|
68 |
df = df[(df['type'] == 0) | (df['type'] == 1)].copy()
|
|
|
153 |
|
154 |
df_out = df[df['r_history'].str.contains(r'^[1-4][^124]*$', regex=True)][
|
155 |
['r_history', 'avg_interval', 'avg_retention', 'stability', 'factor', 'group_cnt']]
|
156 |
+
return type_sequence, time_sequence, df_out
|
157 |
|
158 |
|
159 |
def train_model(proj_dir, progress=gr.Progress(track_tqdm=True)):
|