#!/usr/bin/Rscript data <- data.frame(t(read.table("ex43_microarray_processing.data", header=T, row.names=1))) fm <- lm(data$Costs ~ data$Arrays) plot(data, pch=19) abline(fm, col="red") print(summary(fm)) cat("a) Use linear regression to estimate the cost of processing a single array.\n\n ", "cost(n) =", coef(fm)[1], "+", "n *", coef(fm)[2], "\n\n") cat("b) Interpret each component of the regression equation. What does the y-intercept mean in the context of this problem? What does the slope mean in the context of this problem? How can you use this information to get a more complete picture of the cost of microarry processing? intercept: fixed costs, regardless of the amount of processed arrays slope: slope is lower than 1, so if more arrays get processed then it is less expensive for each array. ") estimated_cost <- function(n) coef(fm)[1] + n*coef(fm)[2] cat("c) How much will it cost to process 643 arrays in one month? What error do you expect\n", " for your prediction?\n\n ", "cost(643) =", round(estimated_cost(643)), "( +/-", round(sd(resid(fm))), "[= standard deviation])\n\n")