#!/usr/bin/Rscript
data <- data.frame(t(read.table("ex43_microarray_processing.data", header=T, row.names=1)))
fm <- lm(data$Costs ~ data$Arrays)
plot(data, pch=19)
abline(fm, col="red")
print(summary(fm))

cat("a) Use linear regression to estimate the cost of processing a single array.\n\n  ",
     "cost(n) =", coef(fm)[1], "+", "n *", coef(fm)[2], "\n\n")

cat("b) Interpret each component of the regression equation. What does the y-intercept mean in the
   context of this problem? What does the slope mean in the context of this problem? How can you
   use this information to get a more complete picture of the cost of microarry processing?

   intercept: fixed costs, regardless of the amount of processed arrays
   slope: slope is lower than 1, so if more arrays get processed then it is less expensive for each array.

")

estimated_cost <- function(n) coef(fm)[1] + n*coef(fm)[2]
cat("c) How much will it cost to process 643 arrays in one month? What error do you expect\n",
     "  for your prediction?\n\n  ",
     "cost(643) =", round(estimated_cost(643)), "( +/-", round(sd(resid(fm))), "[= standard deviation])\n\n")