问题 并行化R中的滚动窗口回归


我正在运行滚动回归,非常类似于以下代码:

library(PerformanceAnalytics)
library(quantmod)
data(managers)

FL <- as.formula(Next(HAM1)~HAM1+HAM2+HAM3+HAM4)
MyRegression <- function(df,FL) {
  df <- as.data.frame(df)
  model <- lm(FL,data=df[1:30,])
  predict(model,newdata=df[31,])
}

system.time(Result <- rollapply(managers, 31, FUN="MyRegression",FL,
    by.column = FALSE, align = "right", na.pad = TRUE))

我有一些额外的处理器,所以我试图找到一种方法来并行化滚动窗口。如果这是一个非滚动回归,我可以使用apply系列函数轻松地并行化它...


1230
2018-04-13 15:52


起源



答案:


显而易见的是使用 lm.fit() 代替 lm() 所以你不会因处理公式等而产生所有开销。

更新: 所以,当我说 明显 我的意思是说 实施起来很明显但很难实现

经过一番摆弄,我想出了这个

library(PerformanceAnalytics)
library(quantmod)
data(managers)

第一阶段是要意识到模型矩阵可以预先构建,所以我们这样做并将其转换回Zoo对象以供使用 rollapply()

mmat2 <- model.frame(Next(HAM1) ~ HAM1 + HAM2 + HAM3 + HAM4, data = managers, 
                     na.action = na.pass)
mmat2 <- cbind.data.frame(mmat2[,1], Intercept = 1, mmat2[,-1])
mmatZ <- as.zoo(mmat2)

现在我们需要一个将要使用的功能 lm.fit() 在不必每次迭代创建设计矩阵的情况下进行繁重的工作:

MyRegression2 <- function(Z) {
    ## store value we want to predict for
    pred <- Z[31, -1, drop = FALSE]
    ## get rid of any rows with NA in training data
    Z <- Z[1:30, ][!rowSums(is.na(Z[1:30,])) > 0, ]
    ## Next() would lag and leave NA in row 30 for response
    ## but we precomputed model matrix, so drop last row still in Z
    Z <- Z[-nrow(Z),]
    ## fit the model
    fit <- lm.fit(Z[, -1, drop = FALSE], Z[,1])
    ## get things we need to predict, in case pivoting turned on in lm.fit
    p <- fit$rank
    p1 <- seq_len(p)
    piv <- fit$qr$pivot[p1]
    ## model coefficients
    beta <- fit$coefficients
    ## this gives the predicted value for row 31 of data passed in
    drop(pred[, piv, drop = FALSE] %*% beta[piv])
}

时间比较:

> system.time(Result <- rollapply(managers, 31, FUN="MyRegression",FL,
+                                 by.column = FALSE, align = "right", 
+                                 na.pad = TRUE))
   user  system elapsed 
  0.925   0.002   1.020 
> 
> system.time(Result2 <- rollapply(mmatZ, 31, FUN = MyRegression2,
+                                  by.column = FALSE,  align = "right",
+                                  na.pad = TRUE))
   user  system elapsed 
  0.048   0.000   0.05

这比原版提供了相当合理的改进。现在检查生成的对象是否相同:

> all.equal(Result, Result2)
[1] TRUE

请享用!


9
2018-04-13 15:57



@Zach我当然假设你知道你在这里做了什么 - 试图获得一步预测? - Gavin Simpson
@Gavin Simpson是的,这就是我在做的事情。我也试图将其并行化。 - Zach
@Zach - 刚刚发布了一个包含实现我的代码的更新 lm.fit() 建议。这样做比我赞赏的要复杂一点。 - Gavin Simpson
@Gavin Simpson:这是一个非常健康的加速,谢谢。 - Zach
@Gavin Simpson:如果我想使用其他回归函数,例如 glm 要么 glmnet?我是否能够实现类似的东西,或者您的方法是否仅针对线性回归进行了优化? - Zach


答案:


显而易见的是使用 lm.fit() 代替 lm() 所以你不会因处理公式等而产生所有开销。

更新: 所以,当我说 明显 我的意思是说 实施起来很明显但很难实现

经过一番摆弄,我想出了这个

library(PerformanceAnalytics)
library(quantmod)
data(managers)

第一阶段是要意识到模型矩阵可以预先构建,所以我们这样做并将其转换回Zoo对象以供使用 rollapply()

mmat2 <- model.frame(Next(HAM1) ~ HAM1 + HAM2 + HAM3 + HAM4, data = managers, 
                     na.action = na.pass)
mmat2 <- cbind.data.frame(mmat2[,1], Intercept = 1, mmat2[,-1])
mmatZ <- as.zoo(mmat2)

现在我们需要一个将要使用的功能 lm.fit() 在不必每次迭代创建设计矩阵的情况下进行繁重的工作:

MyRegression2 <- function(Z) {
    ## store value we want to predict for
    pred <- Z[31, -1, drop = FALSE]
    ## get rid of any rows with NA in training data
    Z <- Z[1:30, ][!rowSums(is.na(Z[1:30,])) > 0, ]
    ## Next() would lag and leave NA in row 30 for response
    ## but we precomputed model matrix, so drop last row still in Z
    Z <- Z[-nrow(Z),]
    ## fit the model
    fit <- lm.fit(Z[, -1, drop = FALSE], Z[,1])
    ## get things we need to predict, in case pivoting turned on in lm.fit
    p <- fit$rank
    p1 <- seq_len(p)
    piv <- fit$qr$pivot[p1]
    ## model coefficients
    beta <- fit$coefficients
    ## this gives the predicted value for row 31 of data passed in
    drop(pred[, piv, drop = FALSE] %*% beta[piv])
}

时间比较:

> system.time(Result <- rollapply(managers, 31, FUN="MyRegression",FL,
+                                 by.column = FALSE, align = "right", 
+                                 na.pad = TRUE))
   user  system elapsed 
  0.925   0.002   1.020 
> 
> system.time(Result2 <- rollapply(mmatZ, 31, FUN = MyRegression2,
+                                  by.column = FALSE,  align = "right",
+                                  na.pad = TRUE))
   user  system elapsed 
  0.048   0.000   0.05

这比原版提供了相当合理的改进。现在检查生成的对象是否相同:

> all.equal(Result, Result2)
[1] TRUE

请享用!


9
2018-04-13 15:57



@Zach我当然假设你知道你在这里做了什么 - 试图获得一步预测? - Gavin Simpson
@Gavin Simpson是的,这就是我在做的事情。我也试图将其并行化。 - Zach
@Zach - 刚刚发布了一个包含实现我的代码的更新 lm.fit() 建议。这样做比我赞赏的要复杂一点。 - Gavin Simpson
@Gavin Simpson:这是一个非常健康的加速,谢谢。 - Zach
@Gavin Simpson:如果我想使用其他回归函数,例如 glm 要么 glmnet?我是否能够实现类似的东西,或者您的方法是否仅针对线性回归进行了优化? - Zach


新答案

我写了一个包, rollRegres,这样做更快。比它快〜58倍 加文辛普森的回答。这是一个例子

# simulate data
set.seed(101)
n <- 10000
wdth <- 50
X <- matrix(rnorm(10 * n), n, 10)
y <- drop(X %*% runif(10)) + rnorm(n)
Z <- cbind(y, X)

# assign other function
lm_version <- function(Z, width = wdth) {
  pred <- Z[width + 1, -1, drop = FALSE]
  ## fit the model
  Z <- Z[-nrow(Z), ]
  fit <- lm.fit(Z[, -1, drop = FALSE], Z[,1])
  ## get things we need to predict, in case pivoting turned on in lm.fit
  p <- fit$rank
  p1 <- seq_len(p)
  piv <- fit$qr$pivot[p1]
  ## model coefficients
  beta <- fit$coefficients
  ## this gives the predicted value for next obs
  drop(pred[, piv, drop = FALSE] %*% beta[piv])
}

# show that they yield the same
library(rollRegres) # the new package
library(zoo)
all.equal(
  rollapply(Z, wdth + 1, FUN = lm_version,
            by.column = FALSE,  align = "right", fill = NA_real_),
  roll_regres.fit(
    x = X, y = y, width = wdth, do_compute = "1_step_forecasts"
    )$one_step_forecasts)
#R [1] TRUE

# benchmark
library(compiler)
lm_version <- cmpfun(lm_version)
microbenchmark::microbenchmark(
  newnew = roll_regres.fit(
    x = X, y = y, width = wdth, do_compute = "1_step_forecasts"),
  prev   = rollapply(Z, wdth + 1, FUN = lm_version,
                     by.column = FALSE,  align = "right", fill = NA_real_),
  times = 10)
#R Unit: milliseconds
#R   expr       min        lq      mean    median        uq       max neval
#R newnew  10.27279  10.48929  10.91631  11.04139  11.13877  11.87121    10
#R   prev 555.45898 565.02067 582.60309 582.22285 602.73091 605.39481    10

老答案

您可以通过更新分解来缩短运行时间。这产生了一个 frm1 每次迭代的成本而不是 frm1 其中n是窗口宽度。下面是一个比较两者的代码。在C ++中使用LINPACK可能要快得多 dchud 和 dchdd R不包括在内,所以你必须写一个包来这样做。此外,我记得读过你可能会比LINPACK更快地完成其他实现 dchud 和 dchdd 对于R更新

library(SamplerCompare) # for LINPACK `chdd` and `chud`
roll_forcast <- function(X, y, width){
  n <- nrow(X)
  p <- ncol(X)
  out <- rep(NA_real_, n)

  is_first <- TRUE
  i <- width 
  while(i < n){
    if(is_first){
      is_first <- FALSE
      qr. <- qr(X[1:width, ])
      R <- qr.R(qr.)

      # Use X^T for the rest
      X <- t(X)

      XtY <- drop(tcrossprod(y[1:width], X[, 1:width]))
    } else {
      x_new <- X[, i]
      x_old <- X[, i - width]

      # update R 
      R <- .Fortran(
        "dchud", R, p, p, x_new, 0., 0L, 0L, 
        0., 0., numeric(p), numeric(p), 
        PACKAGE = "SamplerCompare")[[1]]

      # downdate R
      R <- .Fortran(
        "dchdd", R, p, p, x_old, 0., 0L, 0L, 
        0., 0., numeric(p), numeric(p), integer(1),
        PACKAGE = "SamplerCompare")[[1]]

      # update XtY
      XtY <- XtY + y[i] * x_new - y[i - width] * x_old
    }

    coef. <- .Internal(backsolve(R, XtY, p, TRUE, TRUE))
    coef. <- .Internal(backsolve(R, coef., p, TRUE, FALSE))

    i <- i + 1
    out[i] <- X[, i] %*% coef.
  }

  out
}

# simulate data
set.seed(101)
n <- 10000
wdth = 50
X <- matrix(rnorm(10 * n), n, 10)
y <- drop(X %*% runif(10)) + rnorm(n)
Z <- cbind(y, X)

# assign other function
lm_version <- function(Z, width = wdth) {
  pred <- Z[width + 1, -1, drop = FALSE]
  ## fit the model
  Z <- Z[-nrow(Z), ]
  fit <- lm.fit(Z[, -1, drop = FALSE], Z[,1])
  ## get things we need to predict, in case pivoting turned on in lm.fit
  p <- fit$rank
  p1 <- seq_len(p)
  piv <- fit$qr$pivot[p1]
  ## model coefficients
  beta <- fit$coefficients
  ## this gives the predicted value for row 31 of data passed in
  drop(pred[, piv, drop = FALSE] %*% beta[piv])
}

# show that they yield the same
library(zoo)
all.equal(
  rollapply(Z, wdth + 1, FUN = lm_version,  
            by.column = FALSE,  align = "right", fill = NA_real_),
  roll_forcast(X, y, wdth))
#R> [1] TRUE

# benchmark
library(compiler)
roll_forcast <- cmpfun(roll_forcast)
lm_version <- cmpfun(lm_version)
microbenchmark::microbenchmark(
  new =  roll_forcast(X, y, wdth),
  prev = rollapply(Z, wdth + 1, FUN = lm_version,  
                   by.column = FALSE,  align = "right", fill = NA_real_), 
  times = 10)
#R> Unit: milliseconds
#R> expr      min       lq     mean   median       uq      max neval cld
#R>  new 113.7637 115.4498 129.6562 118.6540 122.4930 230.3414    10  a 
#R> prev 639.6499 674.1677 682.1996 678.6195 686.8816 763.8034    10   b

1
2018-02-18 22:41



很酷的主意,谢谢! - Zach