675 lines
27 KiB
Python
675 lines
27 KiB
Python
"""
|
|
fsrs.optimizer
|
|
---------
|
|
|
|
This module defines the optional Optimizer class.
|
|
"""
|
|
|
|
from fsrs.card import Card
|
|
from fsrs.review_log import ReviewLog, Rating
|
|
from fsrs.scheduler import (
|
|
Scheduler,
|
|
DEFAULT_PARAMETERS,
|
|
LOWER_BOUNDS_PARAMETERS,
|
|
UPPER_BOUNDS_PARAMETERS,
|
|
)
|
|
|
|
import math
|
|
from datetime import datetime, timezone
|
|
from copy import deepcopy
|
|
from random import Random
|
|
from statistics import mean
|
|
|
|
try:
|
|
import torch
|
|
from torch.nn import BCELoss
|
|
from torch import optim
|
|
import pandas as pd
|
|
from tqdm import tqdm
|
|
|
|
# weight clipping
|
|
LOWER_BOUNDS_PARAMETERS_TENSORS = torch.tensor(
|
|
LOWER_BOUNDS_PARAMETERS,
|
|
dtype=torch.float64,
|
|
)
|
|
|
|
UPPER_BOUNDS_PARAMETERS_TENSORS = torch.tensor(
|
|
UPPER_BOUNDS_PARAMETERS,
|
|
dtype=torch.float64,
|
|
)
|
|
|
|
# hyper parameters
|
|
num_epochs = 5
|
|
mini_batch_size = 512
|
|
learning_rate = 4e-2
|
|
max_seq_len = (
|
|
64 # up to the first 64 reviews of each card are used for optimization
|
|
)
|
|
|
|
class Optimizer:
|
|
"""
|
|
The FSRS optimizer.
|
|
|
|
Enables the optimization of FSRS scheduler parameters from existing review logs for more accurate interval calculations.
|
|
|
|
Attributes:
|
|
review_logs: A collection of previous ReviewLog objects from a user.
|
|
_revlogs_train: The collection of review logs, sorted and formatted for optimization.
|
|
"""
|
|
|
|
review_logs: tuple[ReviewLog, ...]
|
|
_revlogs_train: dict
|
|
|
|
def __init__(
|
|
self, review_logs: tuple[ReviewLog, ...] | list[ReviewLog]
|
|
) -> None:
|
|
"""
|
|
Initializes the Optimizer with a set of ReviewLogs. Also formats a copy of the review logs for optimization.
|
|
|
|
Note that the ReviewLogs provided by the user don't need to be in order.
|
|
"""
|
|
|
|
def _format_revlogs() -> dict:
|
|
"""
|
|
Sorts and converts the tuple of ReviewLog objects to a dictionary format for optimizing
|
|
"""
|
|
|
|
revlogs_train = {}
|
|
for review_log in self.review_logs:
|
|
# pull data out of current ReviewLog object
|
|
card_id = review_log.card_id
|
|
rating = review_log.rating
|
|
review_datetime = review_log.review_datetime
|
|
review_duration = review_log.review_duration
|
|
|
|
# if the card was rated Again, it was not recalled
|
|
recall = 0 if rating == Rating.Again else 1
|
|
|
|
# as a ML problem, [x, y] = [ [review_datetime, rating, review_duration], recall ]
|
|
datum = [[review_datetime, rating, review_duration], recall]
|
|
|
|
if card_id not in revlogs_train:
|
|
revlogs_train[card_id] = []
|
|
|
|
revlogs_train[card_id].append((datum))
|
|
revlogs_train[card_id] = sorted(
|
|
revlogs_train[card_id], key=lambda x: x[0][0]
|
|
) # keep reviews sorted
|
|
|
|
# sort the dictionary in order of when each card history starts
|
|
revlogs_train = dict(sorted(revlogs_train.items()))
|
|
|
|
return revlogs_train
|
|
|
|
self.review_logs = deepcopy(tuple(review_logs))
|
|
|
|
# format the ReviewLog data for optimization
|
|
self._revlogs_train = _format_revlogs()
|
|
|
|
def _compute_batch_loss(self, *, parameters: list[float]) -> float:
|
|
"""
|
|
Computes the current total loss for the entire batch of review logs.
|
|
"""
|
|
|
|
card_ids = list(self._revlogs_train.keys())
|
|
params = torch.tensor(parameters, dtype=torch.float64)
|
|
loss_fn = BCELoss()
|
|
scheduler = Scheduler(parameters=params)
|
|
step_losses = []
|
|
|
|
for card_id in card_ids:
|
|
card_review_history = self._revlogs_train[card_id][:max_seq_len]
|
|
|
|
for i in range(len(card_review_history)):
|
|
review = card_review_history[i]
|
|
|
|
x_date = review[0][0]
|
|
y_retrievability = review[1]
|
|
u_rating = review[0][1]
|
|
|
|
if i == 0:
|
|
card = Card(card_id=card_id, due=x_date)
|
|
|
|
y_pred_retrievability = scheduler.get_card_retrievability(
|
|
card=card, current_datetime=x_date
|
|
)
|
|
y_retrievability = torch.tensor(
|
|
y_retrievability, dtype=torch.float64
|
|
)
|
|
|
|
if card.last_review and (x_date - card.last_review).days > 0:
|
|
step_loss = loss_fn(y_pred_retrievability, y_retrievability)
|
|
step_losses.append(step_loss)
|
|
|
|
card, _ = scheduler.review_card(
|
|
card=card,
|
|
rating=u_rating,
|
|
review_datetime=x_date,
|
|
review_duration=None,
|
|
)
|
|
|
|
batch_loss = torch.sum(torch.stack(step_losses))
|
|
batch_loss = batch_loss.item() / len(step_losses)
|
|
|
|
return batch_loss
|
|
|
|
def compute_optimal_parameters(self, verbose: bool = False) -> list[float]:
|
|
"""
|
|
Computes a set of optimized parameters for the FSRS scheduler and returns it as a list of floats.
|
|
|
|
High level explanation of optimization:
|
|
---------------------------------------
|
|
FSRS is a many-to-many sequence model where the "State" at each step is a Card object at a given point in time,
|
|
the input is the time of the review and the output is the predicted retrievability of the card at the time of review.
|
|
|
|
Each card's review history can be thought of as a sequence, each review as a step and each collection of card review histories
|
|
as a batch.
|
|
|
|
The loss is computed by comparing the predicted retrievability of the Card at each step with whether the Card was actually
|
|
sucessfully recalled or not (0/1).
|
|
|
|
Finally, the card objects at each step in their sequences are updated using the current parameters of the Scheduler
|
|
as well as the rating given to that card by the user. The parameters of the Scheduler is what is being optimized.
|
|
"""
|
|
|
|
def _num_reviews() -> int:
|
|
"""
|
|
Computes how many Review-state reviews there are in the dataset.
|
|
Only the loss from Review-state reviews count for optimization and their number must
|
|
be computed in advance to properly initialize the Cosine Annealing learning rate scheduler.
|
|
"""
|
|
|
|
scheduler = Scheduler()
|
|
num_reviews = 0
|
|
# iterate through the card review histories
|
|
card_ids = list(self._revlogs_train.keys())
|
|
for card_id in card_ids:
|
|
card_review_history = self._revlogs_train[card_id][:max_seq_len]
|
|
|
|
# iterate through the current Card's review history
|
|
for i in range(len(card_review_history)):
|
|
review = card_review_history[i]
|
|
|
|
review_datetime = review[0][0]
|
|
rating = review[0][1]
|
|
|
|
# if this is the first review, create the Card object
|
|
if i == 0:
|
|
card = Card(card_id=card_id, due=review_datetime)
|
|
|
|
# only non-same-day reviews count
|
|
if (
|
|
card.last_review
|
|
and (review_datetime - card.last_review).days > 0
|
|
):
|
|
num_reviews += 1
|
|
|
|
card, _ = scheduler.review_card(
|
|
card=card,
|
|
rating=rating,
|
|
review_datetime=review_datetime,
|
|
review_duration=None,
|
|
)
|
|
|
|
return num_reviews
|
|
|
|
def _update_parameters(
|
|
*,
|
|
step_losses: list,
|
|
adam_optimizer: torch.optim.Adam,
|
|
params: torch.Tensor,
|
|
lr_scheduler: torch.optim.lr_scheduler.CosineAnnealingLR,
|
|
) -> None:
|
|
"""
|
|
Computes and updates the current FSRS parameters based on the step losses. Also updates the learning rate scheduler.
|
|
"""
|
|
|
|
# Backpropagate through the loss
|
|
mini_batch_loss = torch.sum(torch.stack(step_losses))
|
|
adam_optimizer.zero_grad() # clear previous gradients
|
|
mini_batch_loss.backward() # compute gradients
|
|
adam_optimizer.step() # Update parameters
|
|
|
|
# clamp the weights in place without modifying the computational graph
|
|
with torch.no_grad():
|
|
params.clamp_(
|
|
min=LOWER_BOUNDS_PARAMETERS_TENSORS,
|
|
max=UPPER_BOUNDS_PARAMETERS_TENSORS,
|
|
)
|
|
|
|
# update the learning rate
|
|
lr_scheduler.step()
|
|
|
|
# set local random seed for reproducibility
|
|
rng = Random(42)
|
|
|
|
card_ids = list(self._revlogs_train.keys())
|
|
|
|
num_reviews = _num_reviews()
|
|
|
|
if num_reviews < mini_batch_size:
|
|
return list(DEFAULT_PARAMETERS)
|
|
|
|
# Define FSRS Scheduler parameters as torch tensors with gradients
|
|
params = torch.tensor(
|
|
DEFAULT_PARAMETERS, requires_grad=True, dtype=torch.float64
|
|
)
|
|
|
|
loss_fn = BCELoss()
|
|
adam_optimizer = optim.Adam([params], lr=learning_rate)
|
|
lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(
|
|
optimizer=adam_optimizer,
|
|
T_max=math.ceil(num_reviews / mini_batch_size) * num_epochs,
|
|
)
|
|
|
|
best_params = None
|
|
best_loss = math.inf
|
|
# iterate through the epochs
|
|
for _ in tqdm(
|
|
range(num_epochs),
|
|
desc="Optimizing",
|
|
unit="epoch",
|
|
disable=(not verbose),
|
|
):
|
|
# randomly shuffle the order of which Card's review histories get computed first
|
|
# at the beginning of each new epoch
|
|
rng.shuffle(card_ids)
|
|
|
|
# initialize new scheduler with updated parameters each epoch
|
|
scheduler = Scheduler(parameters=params)
|
|
|
|
# stores the computed loss of each individual review
|
|
step_losses = []
|
|
|
|
# iterate through the card review histories (sequences)
|
|
for card_id in card_ids:
|
|
card_review_history = self._revlogs_train[card_id][:max_seq_len]
|
|
|
|
# iterate through the current Card's review history (steps)
|
|
for i in range(len(card_review_history)):
|
|
review = card_review_history[i]
|
|
|
|
# input
|
|
x_date = review[0][0]
|
|
# target
|
|
y_retrievability = review[1]
|
|
# update
|
|
u_rating = review[0][1]
|
|
|
|
# if this is the first review, create the Card object
|
|
if i == 0:
|
|
card = Card(card_id=card_id, due=x_date)
|
|
|
|
# predicted target
|
|
y_pred_retrievability = scheduler.get_card_retrievability(
|
|
card=card, current_datetime=x_date
|
|
)
|
|
y_retrievability = torch.tensor(
|
|
y_retrievability, dtype=torch.float64
|
|
)
|
|
|
|
# only compute step-loss on non-same-day reviews
|
|
if card.last_review and (x_date - card.last_review).days > 0:
|
|
step_loss = loss_fn(y_pred_retrievability, y_retrievability)
|
|
step_losses.append(step_loss)
|
|
|
|
# update the card's state
|
|
card, _ = scheduler.review_card(
|
|
card=card,
|
|
rating=u_rating,
|
|
review_datetime=x_date,
|
|
review_duration=None,
|
|
)
|
|
|
|
# take a gradient step after each mini-batch
|
|
if len(step_losses) == mini_batch_size:
|
|
_update_parameters(
|
|
step_losses=step_losses,
|
|
adam_optimizer=adam_optimizer,
|
|
params=params,
|
|
lr_scheduler=lr_scheduler,
|
|
)
|
|
|
|
# update the scheduler's with the new parameters
|
|
scheduler = Scheduler(parameters=params)
|
|
# clear the step losses for next batch
|
|
step_losses = []
|
|
|
|
# remove gradient history from tensor card parameters for next batch
|
|
card.stability = card.stability.detach()
|
|
card.difficulty = card.difficulty.detach()
|
|
|
|
# update params on remaining review logs
|
|
if len(step_losses) > 0:
|
|
_update_parameters(
|
|
step_losses=step_losses,
|
|
adam_optimizer=adam_optimizer,
|
|
params=params,
|
|
lr_scheduler=lr_scheduler,
|
|
)
|
|
|
|
# compute the current batch loss after each epoch
|
|
detached_params = [
|
|
x.detach().item() for x in list(params.detach())
|
|
] # convert to floats
|
|
with torch.no_grad():
|
|
epoch_batch_loss = self._compute_batch_loss(
|
|
parameters=detached_params
|
|
)
|
|
|
|
# if the batch loss is better with the current parameters, update the current best parameters
|
|
if epoch_batch_loss < best_loss:
|
|
best_loss = epoch_batch_loss
|
|
best_params = detached_params
|
|
|
|
return best_params
|
|
|
|
def _compute_probs_and_costs(self) -> dict[str, float]:
|
|
review_log_df = pd.DataFrame(
|
|
vars(review_log) for review_log in self.review_logs
|
|
)
|
|
|
|
review_log_df = review_log_df.sort_values(
|
|
by=["card_id", "review_datetime"], ascending=[True, True]
|
|
).reset_index(drop=True)
|
|
|
|
# dictionary to return
|
|
probs_and_costs_dict = {}
|
|
|
|
# compute the probabilities and costs of the first rating
|
|
first_reviews_df = review_log_df.loc[
|
|
~review_log_df["card_id"].duplicated(keep="first")
|
|
].reset_index(drop=True)
|
|
|
|
first_again_reviews_df = first_reviews_df.loc[
|
|
first_reviews_df["rating"] == Rating.Again
|
|
]
|
|
first_hard_reviews_df = first_reviews_df.loc[
|
|
first_reviews_df["rating"] == Rating.Hard
|
|
]
|
|
first_good_reviews_df = first_reviews_df.loc[
|
|
first_reviews_df["rating"] == Rating.Good
|
|
]
|
|
first_easy_reviews_df = first_reviews_df.loc[
|
|
first_reviews_df["rating"] == Rating.Easy
|
|
]
|
|
|
|
# compute the probability of the user clicking again/hard/good/easy given it's their first review
|
|
num_first_again = len(first_again_reviews_df)
|
|
num_first_hard = len(first_hard_reviews_df)
|
|
num_first_good = len(first_good_reviews_df)
|
|
num_first_easy = len(first_easy_reviews_df)
|
|
|
|
num_first_review = (
|
|
num_first_again + num_first_hard + num_first_good + num_first_easy
|
|
)
|
|
|
|
prob_first_again = num_first_again / num_first_review
|
|
prob_first_hard = num_first_hard / num_first_review
|
|
prob_first_good = num_first_good / num_first_review
|
|
prob_first_easy = num_first_easy / num_first_review
|
|
|
|
probs_and_costs_dict["prob_first_again"] = prob_first_again
|
|
probs_and_costs_dict["prob_first_hard"] = prob_first_hard
|
|
probs_and_costs_dict["prob_first_good"] = prob_first_good
|
|
probs_and_costs_dict["prob_first_easy"] = prob_first_easy
|
|
|
|
# compute the cost of the user clicking again/hard/good/easy on their first review
|
|
first_again_review_durations = list(
|
|
first_again_reviews_df["review_duration"]
|
|
)
|
|
first_hard_review_durations = list(first_hard_reviews_df["review_duration"])
|
|
first_good_review_durations = list(first_good_reviews_df["review_duration"])
|
|
first_easy_review_durations = list(first_easy_reviews_df["review_duration"])
|
|
|
|
avg_first_again_review_duration = (
|
|
mean(first_again_review_durations)
|
|
if first_again_review_durations
|
|
else 0
|
|
)
|
|
avg_first_hard_review_duration = (
|
|
mean(first_hard_review_durations) if first_hard_review_durations else 0
|
|
)
|
|
avg_first_good_review_duration = (
|
|
mean(first_good_review_durations) if first_good_review_durations else 0
|
|
)
|
|
avg_first_easy_review_duration = (
|
|
mean(first_easy_review_durations) if first_easy_review_durations else 0
|
|
)
|
|
|
|
probs_and_costs_dict["avg_first_again_review_duration"] = (
|
|
avg_first_again_review_duration
|
|
)
|
|
probs_and_costs_dict["avg_first_hard_review_duration"] = (
|
|
avg_first_hard_review_duration
|
|
)
|
|
probs_and_costs_dict["avg_first_good_review_duration"] = (
|
|
avg_first_good_review_duration
|
|
)
|
|
probs_and_costs_dict["avg_first_easy_review_duration"] = (
|
|
avg_first_easy_review_duration
|
|
)
|
|
|
|
# compute the probabilities and costs of non-first ratings
|
|
non_first_reviews_df = review_log_df.loc[
|
|
review_log_df["card_id"].duplicated(keep="first")
|
|
].reset_index(drop=True)
|
|
|
|
again_reviews_df = non_first_reviews_df.loc[
|
|
non_first_reviews_df["rating"] == Rating.Again
|
|
]
|
|
hard_reviews_df = non_first_reviews_df.loc[
|
|
non_first_reviews_df["rating"] == Rating.Hard
|
|
]
|
|
good_reviews_df = non_first_reviews_df.loc[
|
|
non_first_reviews_df["rating"] == Rating.Good
|
|
]
|
|
easy_reviews_df = non_first_reviews_df.loc[
|
|
non_first_reviews_df["rating"] == Rating.Easy
|
|
]
|
|
|
|
# compute the probability of the user clicking hard/good/easy given they correctly recalled the card
|
|
num_hard = len(hard_reviews_df)
|
|
num_good = len(good_reviews_df)
|
|
num_easy = len(easy_reviews_df)
|
|
|
|
num_recall = num_hard + num_good + num_easy
|
|
|
|
prob_hard = num_hard / num_recall
|
|
prob_good = num_good / num_recall
|
|
prob_easy = num_easy / num_recall
|
|
|
|
probs_and_costs_dict["prob_hard"] = prob_hard
|
|
probs_and_costs_dict["prob_good"] = prob_good
|
|
probs_and_costs_dict["prob_easy"] = prob_easy
|
|
|
|
again_review_durations = list(again_reviews_df["review_duration"])
|
|
hard_review_durations = list(hard_reviews_df["review_duration"])
|
|
good_review_durations = list(good_reviews_df["review_duration"])
|
|
easy_review_durations = list(easy_reviews_df["review_duration"])
|
|
|
|
avg_again_review_duration = (
|
|
mean(again_review_durations) if again_review_durations else 0
|
|
)
|
|
avg_hard_review_duration = (
|
|
mean(hard_review_durations) if hard_review_durations else 0
|
|
)
|
|
avg_good_review_duration = (
|
|
mean(good_review_durations) if good_review_durations else 0
|
|
)
|
|
avg_easy_review_duration = (
|
|
mean(easy_review_durations) if easy_review_durations else 0
|
|
)
|
|
|
|
probs_and_costs_dict["avg_again_review_duration"] = (
|
|
avg_again_review_duration
|
|
)
|
|
probs_and_costs_dict["avg_hard_review_duration"] = avg_hard_review_duration
|
|
probs_and_costs_dict["avg_good_review_duration"] = avg_good_review_duration
|
|
probs_and_costs_dict["avg_easy_review_duration"] = avg_easy_review_duration
|
|
|
|
return probs_and_costs_dict
|
|
|
|
def _simulate_cost(
|
|
self,
|
|
*,
|
|
desired_retention: float,
|
|
parameters: tuple[float, ...] | list[float],
|
|
num_cards_simulate: int,
|
|
probs_and_costs_dict: dict[str, float],
|
|
) -> float:
|
|
rng = Random(42)
|
|
|
|
# simulate from the beginning of 2025 till before the beginning of 2026
|
|
start_date = datetime(2025, 1, 1, 0, 0, 0, 0, timezone.utc)
|
|
end_date = datetime(2026, 1, 1, 0, 0, 0, 0, timezone.utc)
|
|
|
|
scheduler = Scheduler(
|
|
parameters=parameters,
|
|
desired_retention=desired_retention,
|
|
enable_fuzzing=False,
|
|
)
|
|
|
|
# unpack probs_and_costs_dict
|
|
prob_first_again = probs_and_costs_dict["prob_first_again"]
|
|
prob_first_hard = probs_and_costs_dict["prob_first_hard"]
|
|
prob_first_good = probs_and_costs_dict["prob_first_good"]
|
|
prob_first_easy = probs_and_costs_dict["prob_first_easy"]
|
|
|
|
avg_first_again_review_duration = probs_and_costs_dict[
|
|
"avg_first_again_review_duration"
|
|
]
|
|
avg_first_hard_review_duration = probs_and_costs_dict[
|
|
"avg_first_hard_review_duration"
|
|
]
|
|
avg_first_good_review_duration = probs_and_costs_dict[
|
|
"avg_first_good_review_duration"
|
|
]
|
|
avg_first_easy_review_duration = probs_and_costs_dict[
|
|
"avg_first_easy_review_duration"
|
|
]
|
|
|
|
prob_hard = probs_and_costs_dict["prob_hard"]
|
|
prob_good = probs_and_costs_dict["prob_good"]
|
|
prob_easy = probs_and_costs_dict["prob_easy"]
|
|
|
|
avg_again_review_duration = probs_and_costs_dict[
|
|
"avg_again_review_duration"
|
|
]
|
|
avg_hard_review_duration = probs_and_costs_dict["avg_hard_review_duration"]
|
|
avg_good_review_duration = probs_and_costs_dict["avg_good_review_duration"]
|
|
avg_easy_review_duration = probs_and_costs_dict["avg_easy_review_duration"]
|
|
|
|
simulation_cost = 0
|
|
for i in range(num_cards_simulate):
|
|
card = Card()
|
|
curr_date = start_date
|
|
while curr_date < end_date:
|
|
# the card is new
|
|
if curr_date == start_date:
|
|
rating = rng.choices(
|
|
[Rating.Again, Rating.Hard, Rating.Good, Rating.Easy],
|
|
weights=[
|
|
prob_first_again,
|
|
prob_first_hard,
|
|
prob_first_good,
|
|
prob_first_easy,
|
|
],
|
|
)[0]
|
|
|
|
if rating == Rating.Again:
|
|
simulation_cost += avg_first_again_review_duration
|
|
|
|
elif rating == Rating.Hard:
|
|
simulation_cost += avg_first_hard_review_duration
|
|
|
|
elif rating == Rating.Good:
|
|
simulation_cost += avg_first_good_review_duration
|
|
|
|
elif rating == Rating.Easy:
|
|
simulation_cost += avg_first_easy_review_duration
|
|
|
|
# the card is not new
|
|
else:
|
|
rating = rng.choices(
|
|
["recall", Rating.Again],
|
|
weights=[desired_retention, 1.0 - desired_retention],
|
|
)[0]
|
|
|
|
if rating == "recall":
|
|
# compute probability that the user chose hard/good/easy, GIVEN that they correctly recalled the card
|
|
rating = rng.choices(
|
|
[Rating.Hard, Rating.Good, Rating.Easy],
|
|
weights=[prob_hard, prob_good, prob_easy],
|
|
)[0]
|
|
|
|
if rating == Rating.Again:
|
|
simulation_cost += avg_again_review_duration
|
|
|
|
elif rating == Rating.Hard:
|
|
simulation_cost += avg_hard_review_duration
|
|
|
|
elif rating == Rating.Good:
|
|
simulation_cost += avg_good_review_duration
|
|
|
|
elif rating == Rating.Easy:
|
|
simulation_cost += avg_easy_review_duration
|
|
|
|
card, _ = scheduler.review_card(
|
|
card=card, rating=rating, review_datetime=curr_date
|
|
)
|
|
curr_date = card.due
|
|
|
|
total_knowledge = desired_retention * num_cards_simulate
|
|
simulation_cost = simulation_cost / total_knowledge
|
|
|
|
return simulation_cost
|
|
|
|
def compute_optimal_retention(
|
|
self, parameters: tuple[float, ...] | list[float]
|
|
) -> list[float]:
|
|
def _validate_review_logs() -> None:
|
|
if len(self.review_logs) < 512:
|
|
raise ValueError(
|
|
"Not enough ReviewLog's: at least 512 ReviewLog objects are required to compute optimal retention"
|
|
)
|
|
|
|
for review_log in self.review_logs:
|
|
if review_log.review_duration is None:
|
|
raise ValueError(
|
|
"ReviewLog.review_duration cannot be None when computing optimal retention"
|
|
)
|
|
|
|
_validate_review_logs()
|
|
|
|
NUM_CARDS_SIMULATE = 1000
|
|
DESIRED_RETENTIONS = [0.7, 0.75, 0.8, 0.85, 0.9, 0.95]
|
|
|
|
probs_and_costs_dict = self._compute_probs_and_costs()
|
|
|
|
simulation_costs = []
|
|
for desired_retention in DESIRED_RETENTIONS:
|
|
simulation_cost = self._simulate_cost(
|
|
desired_retention=desired_retention,
|
|
parameters=parameters,
|
|
num_cards_simulate=NUM_CARDS_SIMULATE,
|
|
probs_and_costs_dict=probs_and_costs_dict,
|
|
)
|
|
simulation_costs.append(simulation_cost)
|
|
|
|
min_index = simulation_costs.index(min(simulation_costs))
|
|
optimal_retention = DESIRED_RETENTIONS[min_index]
|
|
|
|
return optimal_retention
|
|
|
|
except ImportError:
|
|
|
|
class Optimizer:
|
|
def __init__(self, *args, **kwargs) -> None:
|
|
raise ImportError(
|
|
'Optimizer is not installed.\nInstall it with: pip install "fsrs[optimizer]"'
|
|
)
|
|
|
|
|
|
__all__ = ["Optimizer"]
|