## PTL estimation demo: compare non-transfer, Trans-lasso, PTL, HPTL
library(MASS)
library(glmnet)
library(ncvreg)
library(DoubleML)
library(mlr3)
library(mlr3learners)
library(PartialTL)

p <- 200  # nuisance dimension including zeros
n_t <- 40
n_s <- 2000

nonzero.Beta_t <- c(0.3, 0.9, 0.8, 0.7, seq(0.1,0.9, length.out = 16))
nonzero.Gamma_t <- seq(0.1,0.9, length.out = 10)
p.nonzero <- length(nonzero.Beta_t)

Beta_t <- matrix(c(nonzero.Beta_t, rep(0, p - p.nonzero)))
Gamma_t <- matrix(c(nonzero.Gamma_t, rep(0, p - p.nonzero)))

# source model: h = 0.05, s = 0.05
nonzero.Beta_s <- c(0.35, 0.85, 0.75, 0.7, seq(0.1,0.9, length.out = 16))
nonzero.Gamma_s <- seq(0.05,0.85, length.out = 10)

Beta_s <- matrix(c(nonzero.Beta_s, rep(0, p - p.nonzero)))
Gamma_s <- matrix(c(nonzero.Gamma_s, rep(0, p - p.nonzero)))

causal_t <- c(-0.8)
causal_s <- c(0.8)
q <- length(causal_t)  # causal dimension

# 'learner$task_type' must be 'regr'
lgr::get_logger("mlr3")$set_threshold("warn")
learner_la = lrn("regr.cv_glmnet")
ml_f = learner_la$clone()
ml_g = learner_la$clone()

sim <- 5
hat.rho.non.trans_lasso <- matrix(0, nrow = q, ncol = sim)
hat.rho.non.trans_DML <- matrix(0, nrow = q, ncol = sim)
hat.rho.Trans_lasso <- matrix(0, nrow = q, ncol = sim)
hat.rho.PTL <- matrix(0, nrow = q, ncol = sim)
hat.rho.HPTL <- matrix(0, nrow = q, ncol = sim)
for (s in 1:sim) {
  cat('iteration:', s)
  cat('\n')
  ############################ target data ################################
  dgp_t <- DGP(n_t, q, p, p.nonzero, causal_t, Beta_t, Gamma_t,
               mu = rep(10, p), sigma = 0.5, f_func = f_0, g_func = g_0)
  D_t <- dgp_t$D
  X_t <- dgp_t$X
  Y_t <- dgp_t$Y
  ## nontrans_lasso fit
  lasso.cv_t <- cv.glmnet(cbind(D_t, X_t), Y_t, intercept = FALSE, nfolds = 5)
  hat.rho.non.trans_lasso[,s] <- matrix(coef(lasso.cv_t))[1+q,]
  ## nontrans_DML fit
  dml_data_t <- double_ml_data_from_matrix(X = X_t, y = Y_t, d = D_t)
  obj_dml_plr_t <- DoubleMLPLR$new(dml_data_t, ml_l = ml_f, ml_m = ml_g)
  hat.rho.non.trans_DML[, s] <- obj_dml_plr_t$fit()$all_coef

  ############################ source data ################################
  dgp_s <- DGP(n_s, q, p, p.nonzero, causal_s, Beta_s, Gamma_s,
               mu = rep(15, p), sigma = 0.5, f_func = f_k, g_func = g_k)
  D_s <- dgp_s$D
  X_s <- dgp_s$X
  Y_s <- dgp_s$Y
  ## Partial Trans-lasso fit
  # step1: initialization
  lasso.cv_s <- cv.glmnet(cbind(D_s, X_s), Y_s, intercept = FALSE, nfolds = 5)
  rho.and.beta.hat_s <- matrix(coef(lasso.cv_s)[-1,])
  # step2: de-biasing
  Y_t.new <- Y_t - cbind(D_t, X_t)%*%rho.and.beta.hat_s
  # adaptive weighting
  weighting <- c(0, rep(1,p))
  lasso.cv_t.new <- cv.glmnet(cbind(D_t, X_t), Y_t.new, intercept = FALSE,
                              nfolds = 5, penalty.factor = weighting)
  delta.hat <- coef(lasso.cv_t.new)[-1,]
  hat.rho.Trans_lasso[,s] <- rho.and.beta.hat_s[1] + delta.hat[1]
  ## PTL fit
  ptl <- fit_PTL(D_t, X_t, Y_t, D_s, X_s, Y_s,
                 ml_f, ml_g, fold = 5L)
  hat.rho.PTL[, s] <- ptl$hat_rho_PTL
  ## HPTL fit
  D_s_all <- rbind(D_s, D_s)
  X_s_all <- rbind(X_s, X_s)
  Y_s_all <- rbind(Y_s, Y_s)
  source_sizes <- c(n_s, n_s)
  module_sizes <- c(50, 150)
  hptl <- fit_HPTL(D_t, X_t, Y_t, D_s_all, X_s_all, Y_s_all,
                   source_sizes, module_sizes, ml_f, ml_g, fold = 5L)
  hat.rho.HPTL[, s] <- hptl$hat_rho_HPTL
}
RMSE.non.trans_lasso <- round(RMSE(causal_t, hat.rho.non.trans_lasso),4)
RMSE.non.trans_DML <- round(RMSE(causal_t, hat.rho.non.trans_DML),4)
RMSE.Trans_lasso <- round(RMSE(causal_t, hat.rho.Trans_lasso),4)
RMSE.PTL <- round(RMSE(causal_t, hat.rho.PTL),4)
RMSE.HPTL <- round(RMSE(causal_t, hat.rho.HPTL),4)
rbind(RMSE.non.trans_lasso, RMSE.non.trans_DML, RMSE.Trans_lasso, RMSE.PTL, RMSE.HPTL)
