-
-
Save yangchenyun/4942386a91b203bea117 to your computer and use it in GitHub Desktop.
full sm-15 implementation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(require racket/flonum) | |
;; TODO: how to calculate the rep category instead of rep number? | |
;; repetition category - number that represents the repetition number | |
;; corresponding to a given interval in the theoretical learning process | |
;; in which the current status of the OF matrix is used. | |
;; SuperMemo will look at the optimum interval and | |
;; see how many times an item would need to be repeated to reach that | |
;; interval. That number is called the repetition category. | |
;; Grade Range | |
(define MAX-GRADE 5) | |
(define MIN-GRADE 0) | |
;; AF Range | |
(define MIN-AF 1.2) | |
(define AF-STEP-LENGTH 0.3) | |
(define AF-STEP 20) | |
(define MAX-AF (+ MIN-AF (* AF-STEP-LENGTH (- AF-STEP 1)))) | |
(define REQUESTED-FI 0.1) | |
(define MAX-REP 20) | |
;; General Abstraction | |
;; item abstraction | |
;; (define (get-memory-lapse item)) | |
;; (define (get-repetition item)) | |
;; TODO: Note that the term first grade refers to the grade issued at first repetition (i.e. Repetitions=2), not at memorizing (i.e. Repetitions=1). The first issued grade does not affect the learning process except for determining which items enter the final drill | |
;; (define (get-first-grade item)) | |
;; (define (get-last-grade item)) | |
;; U-Factor - a number associated with each memorized element. It equals | |
;; to the ratio of the current interval and the previously used interval. | |
;; If the element has not been repeated yet, i.e. the previous interval | |
;; is not defined, U-Factor equals the first interval. The greater the | |
;; U-Factor the greater the increase of the interval between repetitions. | |
(define (get-ufactor item) | |
) | |
;; point is 2-dimensioned, it holds a relation/cordinate | |
(define (make-point x y ) (cons x y)) | |
(define (x-of point) (car point)) | |
(define (y-of point) (cdr point)) | |
;; graph is 2-dimensioned map of *points*. | |
;; the points contained inside graph's closure is the only mutable data structure | |
;; for the whole program | |
;; it could be used with a regression model to derive estimated function | |
(define (make-graph) | |
(let ((points '())) | |
(lambda (cmd) | |
(cond ((eq? cmd 'apply-regression-model) | |
(lambda (regression-model) | |
(regression-model points))) | |
((eq? cmd 'register-point) | |
(lambda (point) | |
(set! points (cons point points)) | |
'success)) | |
((eq? cmd 'set-points) | |
(lambda (new-points) | |
(set! points new-points) | |
'success)) | |
((eq? cmd 'get-points) points) | |
(else (error "graph: command not supported")))))) | |
(define (make-graph-with-points points) | |
(let ((graph (make-graph))) | |
((graph 'set-points) points) | |
graph)) | |
;; Forgetting curve is a graph of ufactor to retention | |
;; it is a closure which holds all the points | |
;; It will take some time before your first forgetting curves are | |
;; plotted. For that reason, the initial value of the RF matrix is taken | |
;; from the model of a less-than-average student. The model of average | |
;; student is not used because the convergence from poorer student | |
;; parameters upwards is faster than the convergence in the opposite | |
;; direction. | |
;; TODO: INIT-CURVE, used to calculate initial RF, open up SM-15 and copy and paste | |
;; by hand? | |
(define (init-curve) | |
(make-graph-with-points (map make-point 'ufactor-list 'af-list))) | |
;; Group of curves are a 2d-vector indexed by repetition category and AF | |
(define curves | |
(let ((_curves '())) | |
(if (empty? _curves) | |
(set! _curves (map (lambda (rep) | |
(cons rep (map (lambda (af) | |
(cons af 'init-curve)) | |
(range MIN-AF MAX-AF AF-STEP-LENGTH) | |
))) | |
(range 1 MAX-REP))) '()) | |
_curves)) | |
(define (curves-of rep afIndex) | |
(define (close-enough? a b) (> 0.0000001 (abs (- a b)))) | |
(define (list-lookup list index) | |
(let ((comp (if (flonum? index) fl= eqv?))) | |
(cdar (filter (lambda (elem) | |
(close-enough? (car elem) index)) list)))) | |
(list-lookup (list-lookup curves rep) afIndex)) | |
(define RECALL-THRESHOLD 3) | |
(define (add-record-to-curve curve ufactor grade) | |
(let ((remembered (if (> grade RECALL-THRESHOLD) 100 0)) | |
(p (make-point ufactor remembered))) | |
((curve 'register-point) p) | |
'success)) | |
;; TODO: as point is stored as remembered or forgetten, but the | |
;; actual graph is calculated by retention, this graph needed | |
;; to be processed before run regression model on it | |
(define (fi-to-retention FI) | |
(/ (- FI) | |
(log (- 1 FI)))) | |
;; a approximate function? | |
(define (retention-to-fi retention) | |
) | |
;; fit y = A * e^(Bx) | |
(define (exponential-model points) | |
(let* ((n (length points)) | |
(X (map x-of points)) | |
(Y (map y-of points)) | |
(logY (map log Y)) | |
(sqX (map sqr X)) | |
(sumlogY (reduce + logY)) | |
(sumsqX (reduce + sqX)) | |
(sumX (reduce + X)) | |
(sumXlogY (reduce + (map * X logY))) | |
(a (/ (- (* sumlogY sumsqX) | |
(* sumX sumXlogY)) | |
(- (* n sumsqX) | |
(sqr sumX)))) | |
(b (/ (- (* n sumXlogY) | |
(* sumX sumlogY)) | |
(- (* n sumsqX) | |
(sqr sumX)))) | |
(A (exp a)) | |
(B b)) | |
(lambda (cmd) | |
(cond ((eq? cmd 'A) A) | |
((eq? cmd 'B) B) | |
((eq? cmd 'x-to-y) | |
(lambda (x) | |
(* A (exp (* B x))))) | |
((eq? cmd 'y-to-x) | |
(lambda (y) | |
(/ (- (log y) (log A)) | |
B))) | |
(else (error "exponential-model: cmd is not supported")))))) | |
;; fit y = a + bx | |
(define (linear-model points) | |
(let* ((n (length points)) | |
(X (map x-of points)) | |
(Y (map y-of points)) | |
(sqX (map sqr X)) | |
(sumsqX (reduce + sqX)) | |
(sumX (reduce + X)) | |
(sumXY (reduce + (map * X Y))) | |
(a (/ (- (* sumY sumsqX) | |
(* sumX sumXY)) | |
(- (* n sumsqX) | |
(sqr sumX)))) | |
(b (/ (- (* n sumXY) | |
(* sumX sumY)) | |
(- (* n sumsqX) | |
(sqr sumX))))) | |
(lambda (cmd) | |
(cond ((eq? cmd 'a) a) | |
((eq? cmd 'b) b) | |
((eq? cmd 'x-to-y) (lambda (x) (+ a (* b x)))) | |
((eq? cmd 'y-to-x) (lambda (y) (/ (- y a) b))) | |
(else (error "linear-model: cmd is not supported")))))) | |
;; fit y = bx | |
(define (linear-model-through-origin points) | |
(let* ((n (length points)) | |
(X (map x-of points)) | |
(Y (map y-of points)) | |
(sumsqX (reduce + (map sqr X))) | |
(sumXY (reduce + (map * X Y))) | |
(b (/ sumXY sumsqX))) | |
(lambda (cmd) | |
(cond ((eq? cmd 'b) b) | |
((eq? cmd 'x-to-y) (lambda (x) (* b x))) | |
((eq? cmd 'y-to-x) (lambda (y) (/ y b))) | |
(else (error "linear-model-through-origin: cmd is not supported")))))) | |
;; TODO: abstraction for matrix | |
;; 7 RF matrix and Forgetting curves: | |
;; RFactor: the number which says at which U-Factor, the measured | |
;; forgetting index is approximated to be at REQUESTED-FI. | |
;; Individual entries of the RF matrix are computed from forgetting | |
;; curves approximated for each entry individually. | |
;; Each forgetting curve corresponds with a different value of the | |
;; repetition number and a different value of A-Factor (or memory lapses | |
;; in the case of the first repetition). | |
;; The value of the RF matrix entry corresponds to the moment in time | |
;; where the forgetting curve passes the knowledge retention point | |
;; derived from the requested forgetting index. | |
(define (RF rep afIndex) | |
(let* ((forgetting-curve (curves-of rep afIndex)) | |
(estimated-forgetting-func | |
((forgetting-curve 'apply-regression-model) | |
'forgetting-model))) | |
((estimated-forgetting-func 'get-ufactor) (- 1 REQUESTED-FI)))) | |
;; 8 TODO: Deriving OF matrix from the forgetting curves: | |
;; 1 fixed-point power approximation of the R-Factor decline along the RF matrix columns (the fixed point corresponds to second repetition at which the approximation curve passes through the A-Factor value), | |
;; 2 for all columns, computing D-Factor which expresses the decay constant of the power approximation, | |
;; 3 linear regression of D-Factor change across the RF matrix columns, and | |
;; 4 deriving the entire OF matrix from the slope and intercept of the straight line that makes up the best fit in the D-Factor graph. | |
;; Note that the first row of the OF matrix is computed in a different | |
;; way. It corresponds to the best-fit exponential curve obtained from | |
;; the first row of the RF matrix. | |
;; All the above steps are passed after each repetition. In other | |
;; words, the theoretically optimum value of the OF matrix is updated as | |
;; soon as *new forgetting curve data is collected*. | |
(define compute-OF | |
(lambda (rep afIndex) | |
)) | |
(define (get-AF-for-OF-column col) | |
) | |
(define (look-for-col-match rep target-interval) | |
) | |
;; 1 Optimum interval: Inter-repetition intervals are computed using the following formula: | |
;; I(1)=OF[1,L+1] | |
;; I(n)=I(n-1)*OF[n,AF] | |
;; where: | |
;; * OF - matrix of optimal factors, which is modified in the course of repetitions | |
;; * OF[1,L+1] - value of the OF matrix entry taken from the first row and the L+1 column | |
;; * OF[n,AF] - value of the OF matrix entry that corresponds with the n-th repetition, and with item difficulty AF | |
;; * L - number of times a given item has been forgotten (from "memory Lapses") | |
;; * AF - number that reflects absolute difficulty of a given item (from "Absolute difficulty Factor") | |
;; * I(n) - n-th inter-repetition interval for a given item | |
;; Note the first repetition here means the first repetition after forgetting | |
;; TODO: implement advancement spacing effect algorithm | |
;; 1. The actual interval is irrelevant to determine the optimal interval | |
;; 2. start with rep-1, it recursively multiplies ufactors to find the absolute days | |
(define (nth-optimal-interval item) | |
(let* ((L (get-memory-lapse item)) | |
(n (get-repetition item)) | |
(OF (compute-OF)) | |
(AF (calc-AF item n optimal-factor-matrix))) | |
(define (I n) | |
(if (= n 1) | |
(OF 1 (+ 1 L)) | |
(* (I (- 1 n)) | |
(OF n AF)))) | |
(I n))) | |
;; 9 Item difficulty: | |
;; ..the higher it is, the easier the item. | |
;; The most difficult items have A-Factors equal to 1.2. | |
;; It is worth noting that AF=OF[2,AF]. In other words, AF denotes the | |
;; optimum interval increase factor after the second repetition. This is | |
;; also equivalent with the highest interval increase factor for a given | |
;; item. | |
;; The initial value of A-Factor is derived from the first grade obtained | |
;; by the item, and the correlation graph of the first grade and A-Factor | |
;; (G-AF graph). | |
;; TODO: This graph is updated after each repetition in which a new A-Factor | |
;; value is estimated and correlated with the item's first grade, and old | |
;; A-Factor estimation is removed from the graph. | |
;; Subsequent approximations of the real A-Factor value are done after | |
;; each repetition by using grades, OF matrix, and a correlation graph | |
;; that shows the correspondence of the grade with the expected | |
;; forgetting index (FI-G graph). | |
;; The grade used to compute the initial A-Factor is normalized, | |
;; i.e. adjusted for the difference between the actually used interval | |
;; and the optimum interval for the forgetting index equal 10% | |
;; Initial: Assume a linear relationship between grade and AF | |
(define INIT-G-AF-points (list (make-point MAX-GRADE MAX-AF) | |
(make-point MIN-GRADE MIN-AF))) | |
(define G-AF-graph (make-graph-with-points INIT-G-AF-points)) | |
(define (add-entry-to-G-AF-graph grade AF) | |
(((G-AF-graph) 'register-point) | |
(make-point grade AF))) | |
(define (update-G-AF-graph first-grade new-AF) | |
(add-entry-to-G-AF-graph first-grade new-AF)) | |
(define (calc-initial-AF first-grade) | |
((estimated-func (((G-AF-graph 'apply-regression-model) exponential-model) | |
'x-to-y))) | |
(estimated-func first-grade)) | |
;; The FI-G graph is updated after each repetition by using the expected forgetting index and actual grade scores. | |
;; The expected forgetting index can easily be derived from the interval used between repetitions and the optimum interval computed from the OF matrix. | |
;; The higher the value of the expected forgetting index, the lower the grade. | |
;; From the grade and the FI-G graph, we can compute the estimated forgetting index | |
;; Notes: | |
;; Expected forgetting index is the expected probability of failing to recall a given element if the | |
;; repetition takes place at a given moment. | |
;; Estimated forgetting index is value of the forgetting index derived | |
;; from the scored grade on the basis of the FI-G graph. | |
(define INIT-FI-G-points (list (make-point 0 MAX-GRADE) | |
(make-point 100 MIN-GRADE))) | |
(define FI-G-graph (make-graph-with-points INIT-FI-G-points)) | |
(define (add-entry-to-FI-G-graph forgetting-index grade) | |
(((FI-G-graph) 'register-point) | |
(make-point forgetting-index grade))) | |
(define (update-FI-G-graph grade used-interval optimum-interval) | |
;; TODO: verify the calculation of expectedFI | |
;; or use leverage the current forgetting curve for it? | |
;; pesudo code: OptimumInterval:=(UsedInterval*ln(0.9)/ln(1-EstimatedFI/100)); | |
(let ((expected-FI (- 1 (exp (/ (/ used-interval (log 0.9)) | |
optimum-interval))))) | |
(add-entry-to-G-AF-graph expected-FI grade))) | |
(define (calc-estimated-FI grade) | |
(let ((estimated-func | |
(((FI-G-graph 'apply-regression-model) exponential-model) | |
'y-to-x))) | |
(estimated-func grade))) | |
;; 11 Computing A-Factors: | |
;; From (1) the estimated forgetting index, (2) length of the interval and (3) the OF matrix, we can easily compute the most accurate value of A-Factor. | |
;; Note that A-Factor serves as an index to the OF matrix, while the | |
;; estimated forgetting index allows one to find the column of the OF | |
;; matrix for which the optimum interval corresponds with the actually | |
;; used interval corrected for the deviation of the estimated forgetting | |
;; index from the requested forgetting index. | |
;; function GetNewAFactorFromEstimatedFI(EstimatedFI); | |
;; OptimumInterval:=(UsedInterval*ln(0.9)/ln(1-EstimatedFI/100)); | |
;; OFactor:=OptimumInterval/UsedInterval*UFactor; | |
;; for col:=20 downto 1 do {scan columns of the OF matrix} | |
;; if OFactor>EstimatedOFactor(col,RepetitionsCategory) then | |
;; break; | |
;; Result:=AFactorFromOFColumn(col); {new value of A-Factor} | |
;; At each repetition, a weighted average is taken of the old A-Factor | |
;; and the new estimated value of the A-Factor. The newly obtained | |
;; A-Factor is used in indexing the OF matrix when computing the new | |
;; optimum inter-repetition interval | |
(define (calc-AF item used-interval) | |
(let ((n (get-repetition item)) | |
(grade (get-grade item)) | |
(ufactor (get-ufactor item))) | |
(if (= n 1) | |
(calc-initial-AF grade) | |
(let* ((estimated-FI (calc-estimated-FI grade)) | |
(normalized-interval (/ (* used-interval (log 0.9)) | |
(log (- 1 estimated-FI)))) | |
(estimated-optimum-interval (* normalized-interval | |
(/ 1 used-interval) | |
ufactor))) | |
(get-AF-for-OF-column | |
(look-for-col-match n estimated-optimum-interval)))))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment