JarrettYe commited on
Commit
48f4086
1 Parent(s): 0fdc9b3
Files changed (2) hide show
  1. app.py +2 -2
  2. utilities.py +16 -7
app.py CHANGED
@@ -15,7 +15,7 @@ def get_w_markdown(w):
15
  # Updated Parameters
16
  Copy and paste these as shown in step 5 of the instructions:
17
 
18
- `var w = {w};`
19
 
20
  Check out the Analysis tab for more detailed information."""
21
 
@@ -59,7 +59,7 @@ def anki_optimizer(file, timezone, next_day_starts_at, revlog_start_date, reques
59
 
60
 
61
  description = """
62
- # FSRS4Anki Optimizer App - v3.13.3
63
  Based on the [tutorial](https://medium.com/@JarrettYe/how-to-use-the-next-generation-spaced-repetition-algorithm-fsrs-on-anki-5a591ca562e2)
64
  of [Jarrett Ye](https://github.com/L-M-Sherlock). This application can give you personalized anki parameters without having to code.
65
 
 
15
  # Updated Parameters
16
  Copy and paste these as shown in step 5 of the instructions:
17
 
18
+ `{w}`
19
 
20
  Check out the Analysis tab for more detailed information."""
21
 
 
59
 
60
 
61
  description = """
62
+ # FSRS4Anki Optimizer App - v3.14.7
63
  Based on the [tutorial](https://medium.com/@JarrettYe/how-to-use-the-next-generation-spaced-repetition-algorithm-fsrs-on-anki-5a591ca562e2)
64
  of [Jarrett Ye](https://github.com/L-M-Sherlock). This application can give you personalized anki parameters without having to code.
65
 
utilities.py CHANGED
@@ -64,9 +64,9 @@ def create_time_series_features(revlog_start_date, timezone, next_day_starts_at,
64
  time_sequence = np.array(df['time'])
65
  df.to_csv(proj_dir / "revlog.csv", index=False)
66
  # print("revlog.csv saved.")
67
- df = df[(df['type'] == 0) | (df['type'] == 1)].copy()
68
  df['real_days'] = df['review_date'] - timedelta(hours=next_day_starts_at)
69
- df['real_days'] = pd.DatetimeIndex(df['real_days'].dt.floor('D')).to_julian_date()
70
  df.drop_duplicates(['cid', 'real_days'], keep='first', inplace=True)
71
  df['delta_t'] = df.real_days.diff()
72
  df.dropna(inplace=True)
@@ -78,8 +78,14 @@ def create_time_series_features(revlog_start_date, timezone, next_day_starts_at,
78
 
79
  # code from https://github.com/L-M-Sherlock/anki_revlog_analysis/blob/main/revlog_analysis.py
80
  def get_feature(x):
 
81
  for idx, log in enumerate(x.itertuples()):
 
 
 
82
  if idx == 0:
 
 
83
  x.iloc[idx, col_idx['delta_t']] = 0
84
  if idx == x.shape[0] - 1:
85
  break
@@ -90,7 +96,7 @@ def create_time_series_features(revlog_start_date, timezone, next_day_starts_at,
90
  return x
91
 
92
  tqdm.pandas(desc='Saving Trainset')
93
- df = df.groupby('cid', as_index=False).progress_apply(get_feature)
94
  df = df[df['id'] >= time.mktime(datetime.strptime(revlog_start_date, "%Y-%m-%d").timetuple()) * 1000]
95
  df["t_history"] = df["t_history"].map(lambda x: x[1:] if len(x) > 1 else x)
96
  df["r_history"] = df["r_history"].map(lambda x: x[1:] if len(x) > 1 else x)
@@ -108,16 +114,19 @@ def create_time_series_features(revlog_start_date, timezone, next_day_starts_at,
108
  df = df.drop(columns=['id', 'cid', 'usn', 'ivl', 'last_lvl', 'factor', 'time', 'type', 'create_date', 'review_date',
109
  'real_days', 'r', 't_history'])
110
  df.drop_duplicates(inplace=True)
111
- df = df[(df['retention'] < 1) & (df['retention'] > 0)]
112
 
113
  def cal_stability(group: pd.DataFrame) -> pd.DataFrame:
 
 
 
 
114
  if group['i'].values[0] > 1:
115
  r_ivl_cnt = sum(group['delta_t'] * group['retention'].map(np.log) * pow(group['total_cnt'], 2))
116
  ivl_ivl_cnt = sum(group['delta_t'].map(lambda x: x ** 2) * pow(group['total_cnt'], 2))
117
  group['stability'] = round(np.log(0.9) / (r_ivl_cnt / ivl_ivl_cnt), 1)
118
  else:
119
  group['stability'] = 0.0
120
- group['group_cnt'] = sum(group['total_cnt'])
121
  group['avg_retention'] = round(
122
  sum(group['retention'] * pow(group['total_cnt'], 2)) / sum(pow(group['total_cnt'], 2)), 3)
123
  group['avg_interval'] = round(
@@ -128,7 +137,7 @@ def create_time_series_features(revlog_start_date, timezone, next_day_starts_at,
128
  return group
129
 
130
  tqdm.pandas(desc='Calculating Stability')
131
- df = df.groupby(by=['r_history']).progress_apply(cal_stability)
132
  # print("Stability calculated.")
133
  df.reset_index(drop=True, inplace=True)
134
  df.drop_duplicates(inplace=True)
@@ -143,7 +152,7 @@ def create_time_series_features(revlog_start_date, timezone, next_day_starts_at,
143
  df['factor'] = round(df['stability'] / df['last_stability'], 2)
144
  df = df[(df['i'] >= 2) & (df['group_cnt'] >= 100)]
145
  df['last_recall'] = df['r_history'].map(lambda x: x[-1])
146
- df = df[df.groupby(['i', 'r_history'])['group_cnt'].transform(max) == df['group_cnt']]
147
  df.to_csv(proj_dir / 'stability_for_analysis.tsv', sep='\t', index=None)
148
  # print("1:again, 2:hard, 3:good, 4:easy\n")
149
  # print(df[df['r_history'].str.contains(r'^[1-4][^124]*$', regex=True)][
 
64
  time_sequence = np.array(df['time'])
65
  df.to_csv(proj_dir / "revlog.csv", index=False)
66
  # print("revlog.csv saved.")
67
+ df = df[df['type'] != 3].copy()
68
  df['real_days'] = df['review_date'] - timedelta(hours=next_day_starts_at)
69
+ df['real_days'] = pd.DatetimeIndex(df['real_days'].dt.floor('D', ambiguous='infer', nonexistent='shift_forward')).to_julian_date()
70
  df.drop_duplicates(['cid', 'real_days'], keep='first', inplace=True)
71
  df['delta_t'] = df.real_days.diff()
72
  df.dropna(inplace=True)
 
78
 
79
  # code from https://github.com/L-M-Sherlock/anki_revlog_analysis/blob/main/revlog_analysis.py
80
  def get_feature(x):
81
+ last_kind = None
82
  for idx, log in enumerate(x.itertuples()):
83
+ if last_kind is not None and last_kind in (1, 2) and log.type == 0:
84
+ return x.iloc[:idx]
85
+ last_kind = log.type
86
  if idx == 0:
87
+ if log.type != 0:
88
+ return x.iloc[:idx]
89
  x.iloc[idx, col_idx['delta_t']] = 0
90
  if idx == x.shape[0] - 1:
91
  break
 
96
  return x
97
 
98
  tqdm.pandas(desc='Saving Trainset')
99
+ df = df.groupby('cid', as_index=False, group_keys=False).progress_apply(get_feature)
100
  df = df[df['id'] >= time.mktime(datetime.strptime(revlog_start_date, "%Y-%m-%d").timetuple()) * 1000]
101
  df["t_history"] = df["t_history"].map(lambda x: x[1:] if len(x) > 1 else x)
102
  df["r_history"] = df["r_history"].map(lambda x: x[1:] if len(x) > 1 else x)
 
114
  df = df.drop(columns=['id', 'cid', 'usn', 'ivl', 'last_lvl', 'factor', 'time', 'type', 'create_date', 'review_date',
115
  'real_days', 'r', 't_history'])
116
  df.drop_duplicates(inplace=True)
117
+ df['retention'] = df['retention'].map(lambda x: max(min(0.99, x), 0.01))
118
 
119
  def cal_stability(group: pd.DataFrame) -> pd.DataFrame:
120
+ group_cnt = sum(group['total_cnt'])
121
+ if group_cnt < 10:
122
+ return pd.DataFrame()
123
+ group['group_cnt'] = group_cnt
124
  if group['i'].values[0] > 1:
125
  r_ivl_cnt = sum(group['delta_t'] * group['retention'].map(np.log) * pow(group['total_cnt'], 2))
126
  ivl_ivl_cnt = sum(group['delta_t'].map(lambda x: x ** 2) * pow(group['total_cnt'], 2))
127
  group['stability'] = round(np.log(0.9) / (r_ivl_cnt / ivl_ivl_cnt), 1)
128
  else:
129
  group['stability'] = 0.0
 
130
  group['avg_retention'] = round(
131
  sum(group['retention'] * pow(group['total_cnt'], 2)) / sum(pow(group['total_cnt'], 2)), 3)
132
  group['avg_interval'] = round(
 
137
  return group
138
 
139
  tqdm.pandas(desc='Calculating Stability')
140
+ df = df.groupby(by=['r_history'], group_keys=False).progress_apply(cal_stability)
141
  # print("Stability calculated.")
142
  df.reset_index(drop=True, inplace=True)
143
  df.drop_duplicates(inplace=True)
 
152
  df['factor'] = round(df['stability'] / df['last_stability'], 2)
153
  df = df[(df['i'] >= 2) & (df['group_cnt'] >= 100)]
154
  df['last_recall'] = df['r_history'].map(lambda x: x[-1])
155
+ df = df[df.groupby(['i', 'r_history'], group_keys=False)['group_cnt'].transform(max) == df['group_cnt']]
156
  df.to_csv(proj_dir / 'stability_for_analysis.tsv', sep='\t', index=None)
157
  # print("1:again, 2:hard, 3:good, 4:easy\n")
158
  # print(df[df['r_history'].str.contains(r'^[1-4][^124]*$', regex=True)][