# The Linear Regression Model: Null Hypothesis Significance Testing {#linreg-estimation-nhst-example}

Data

See jeksterslabRdatarepo::wages.matrix() for the data set used in this example.

X <- jeksterslabRdatarepo::wages.matrix[["X"]]
# age is removed
X <- X[, -ncol(X)]
y <- jeksterslabRdatarepo::wages.matrix[["y"]]
head(X)
#>      constant gender race union education experience
#> [1,]        1      1    0     0        12         20
#> [2,]        1      0    0     0         9          9
#> [3,]        1      0    0     0        16         15
#> [4,]        1      0    1     1        14         38
#> [5,]        1      1    1     0        16         19
#> [6,]        1      1    0     0        12          4
head(y)
#>      wages
#> [1,] 11.55
#> [2,]  5.00
#> [3,] 12.00
#> [4,]  7.00
#> [5,] 21.15
#> [6,]  6.92

Original Data

n <- nrow(X)
k <- ncol(X)
betahat <- betahat(
  X = X,
  y = y
)
sehatbetahat <- sehatbetahat(
  X = X,
  y = y
)
result1 <- nhst(
  betahat = betahat,
  sehatbetahat = sehatbetahat,
  n = n,
  k = k
)
result2 <- betahatinference(
  X = X,
  y = y
)

Scaled Data

slopeshatprime <- slopeshatprime(
  X = X,
  y = y
)
sehatslopeshatprimetb <- sehatslopeshatprimetb(
  X = X,
  y = y
)
results1_scaled <- .slopeshatprimeinference(
  slopeshatprime = slopeshatprime,
  sehatslopeshatprime = sehatslopeshatprimetb,
  n = n
)
results2_scaled <- .slopeshatprimeinference(
  sehatslopeshatprime = sehatslopeshatprimetb,
  X = X,
  y = y
)
results3_scaled <- .slopeshatprimeinference(
  slopeshatprime = slopeshatprime,
  sehatslopeshatprimetype = "textbook",
  X = X,
  y = y
)
results3_scaled <- .slopeshatprimeinference(
  X = X,
  y = y,
  sehatslopeshatprimetype = "textbook"
)
results4_scaled <- slopeshatprimeinference(
  X = X,
  y = y,
  sehatslopeshatprimetype = "textbook"
)

lm() function

lmobj <- lm(
  wages ~ gender + race + union + education + experience,
  data = jeksterslabRdatarepo::wages
)
lm_coef <- summary(lmobj)[["coefficients"]][, "Estimate"]
lm_se <- summary(lmobj)[["coefficients"]][, "Std. Error"]
lm_t <- summary(lmobj)[["coefficients"]][, "t value"]
lm_p <- summary(lmobj)[["coefficients"]][, "Pr(>|t|)"]

lm() function - scaled data

lmscaledobj <- lm(
  wages ~ gender + race + union + education + experience,
  data = as.data.frame(scale(jeksterslabRdatarepo::wages))
)
lmscaled_coef <- as.vector(summary(lmscaledobj)[["coefficients"]][, "Estimate"])
lmscaled_coef <- lmscaled_coef[-1]
lmscaled_se <- as.vector(summary(lmscaledobj)[["coefficients"]][, "Std. Error"])
lmscaled_se <- lmscaled_se[-1]
lmscaled_t <- as.vector(summary(lmscaledobj)[["coefficients"]][, "t value"])
lmscaled_t <- lmscaled_t[-1]
lmscaled_p <- as.vector(summary(lmscaledobj)[["coefficients"]][, "Pr(>|t|)"])
lmscaled_p <- lmscaled_p[-1]
result_coef1 <- as.vector(result1[, "coef"])
result_se1 <- as.vector(result1[, "se"])
result_t1 <- as.vector(result1[, "t"])
result_p1 <- as.vector(result1[, "p"])
result_coef2 <- as.vector(result2[, "coef"])
result_se2 <- as.vector(result2[, "se"])
result_t2 <- as.vector(result2[, "t"])
result_p2 <- as.vector(result2[, "p"])
context("Test linreg-estimation-linreg")
test_that("coef.", {
  expect_equivalent(
    length(lm_coef),
    length(result_coef1),
    length(result_coef2)
  )
  for (i in seq_along(result_coef1)) {
    expect_equivalent(
      result_coef1[i],
      lm_coef[i]
    )
    expect_equivalent(
      result_coef2[i],
      lm_coef[i]
    )
  }
})
#> Test passed 🎉
test_that("se.", {
  expect_equivalent(
    length(lm_se),
    length(result_se1),
    length(result_se2)
  )
  for (i in seq_along(result_se1)) {
    expect_equivalent(
      result_se1[i],
      lm_se[i]
    )
    expect_equivalent(
      result_se2[i],
      lm_se[i]
    )
  }
})
#> Test passed 🎉
test_that("t.", {
  expect_equivalent(
    length(lm_t),
    length(result_t1),
    length(result_t2)
  )
  for (i in seq_along(result_t1)) {
    expect_equivalent(
      result_t1[i],
      lm_t[i]
    )
    expect_equivalent(
      result_t2[i],
      lm_t[i]
    )
  }
})
#> Test passed 😸
test_that("p.", {
  expect_equivalent(
    length(lm_p),
    length(result_p1),
    length(result_p2)
  )
  for (i in seq_along(result_p1)) {
    expect_equivalent(
      result_p1[i],
      lm_p[i]
    )
    expect_equivalent(
      result_p2[i],
      lm_p[i]
    )
  }
})
#> Test passed 😀
results_scaled_coef1 <- as.vector(results1_scaled[, "coef"])
results_scaled_se1 <- as.vector(results1_scaled[, "se"])
results_scaled_t1 <- as.vector(results1_scaled[, "t"])
results_scaled_p1 <- as.vector(results1_scaled[, "p"])
results_scaled_coef2 <- as.vector(results2_scaled[, "coef"])
results_scaled_se2 <- as.vector(results2_scaled[, "se"])
results_scaled_t2 <- as.vector(results2_scaled[, "t"])
results_scaled_p2 <- as.vector(results2_scaled[, "p"])
results_scaled_coef3 <- as.vector(results3_scaled[, "coef"])
results_scaled_se3 <- as.vector(results3_scaled[, "se"])
results_scaled_t3 <- as.vector(results3_scaled[, "t"])
results_scaled_p3 <- as.vector(results3_scaled[, "p"])
results_scaled_coef4 <- as.vector(results4_scaled[, "coef"])
results_scaled_se4 <- as.vector(results4_scaled[, "se"])
results_scaled_t4 <- as.vector(results4_scaled[, "t"])
results_scaled_p4 <- as.vector(results4_scaled[, "p"])
test_that("scaled coef.", {
  expect_equivalent(
    length(lmscaled_coef),
    length(results_scaled_coef1),
    length(results_scaled_coef2),
    length(results_scaled_coef3),
    length(results_scaled_coef4)
  )
  for (i in seq_along(results_scaled_coef1)) {
    expect_equivalent(
      results_scaled_coef1[i],
      lmscaled_coef[i]
    )
    expect_equivalent(
      results_scaled_coef2[i],
      lmscaled_coef[i]
    )
    expect_equivalent(
      results_scaled_coef3[i],
      lmscaled_coef[i]
    )
    expect_equivalent(
      results_scaled_coef4[i],
      lmscaled_coef[i]
    )
  }
})
#> Test passed 🎉
test_that("scaled se.", {
  expect_equivalent(
    length(lmscaled_se),
    length(results_scaled_se1),
    length(results_scaled_se2),
    length(results_scaled_se3),
    length(results_scaled_se4)
  )
  for (i in seq_along(results_scaled_se1)) {
    expect_equivalent(
      results_scaled_se1[i],
      lmscaled_se[i]
    )
    expect_equivalent(
      results_scaled_se2[i],
      lmscaled_se[i]
    )
    expect_equivalent(
      results_scaled_se3[i],
      lmscaled_se[i]
    )
    expect_equivalent(
      results_scaled_se4[i],
      lmscaled_se[i]
    )
  }
})
#> Test passed 🥇
test_that("scaled t.", {
  expect_equivalent(
    length(lmscaled_t),
    length(results_scaled_t1),
    length(results_scaled_t2),
    length(results_scaled_t3),
    length(results_scaled_t4)
  )
  for (i in seq_along(results_scaled_t1)) {
    expect_equivalent(
      results_scaled_t1[i],
      lmscaled_t[i]
    )
    expect_equivalent(
      results_scaled_t2[i],
      lmscaled_t[i]
    )
    expect_equivalent(
      results_scaled_t3[i],
      lmscaled_t[i]
    )
    expect_equivalent(
      results_scaled_t4[i],
      lmscaled_t[i]
    )
  }
})
#> Test passed 😀
test_that("scaled p.", {
  expect_equivalent(
    length(lmscaled_p),
    length(results_scaled_p1),
    length(results_scaled_p2),
    length(results_scaled_p3),
    length(results_scaled_p4)
  )
  for (i in seq_along(results_scaled_p1)) {
    expect_equivalent(
      results_scaled_p1[i],
      lmscaled_p[i]
    )
    expect_equivalent(
      results_scaled_p2[i],
      lmscaled_p[i]
    )
    expect_equivalent(
      results_scaled_p3[i],
      lmscaled_p[i]
    )
    expect_equivalent(
      results_scaled_p4[i],
      lmscaled_p[i]
    )
  }
})
#> Test passed 😀