## ANALYZING RESPONSE SURFACE DATA ##
# This sample script has been written by Rebecca Clark. Please feel free to share with others. If you have any questions, you can contact me at: r11clark@gmail.com
################### RESPONSE SURFACE STATISTICS ######################
# Load the response surface package, rsm. For more information, including a reference manual and useful vignettes, see: http://cran.r-project.org/web/packages/rsm/index.html
library(rsm)
library(car)
## Two ways of entering data - from a file, or manually. The manual entry method is at the end of this script - run that first, then come back up here.
# Set the working directory
setwd("~/Documents/RebeccaClark/R Manuals/SampleScripts")
# Read in your data
eggdataex <- read.table("eggdata.csv", header=TRUE, sep=",", colClasses=c("factor", "factor", "factor", "numeric", "numeric", "numeric"))
head(eggdataex)
levels(eggdataex$Morph)
# R uses Type I sums of squares, in which the order of factors will affect statistical calculations for analysis of variance and linear regressions. See ?options() and ?contrasts() for more information on the defaults. Also, view the current defaults are with:
getOption("contrasts")
# This indicates that defaults are "contr.treatment" and "contr.poly" for unordered and ordered factors, respectively.
# You can use this step to adjust the ANOVA (unordered) contrasts to Type III, if preferred. Alternatively, use the Anova() function from the 'car' package.
options(contrasts=c("contr.helmert", "contr.poly"))
# A preliminary example of how R calculates and presents simple linear models. Here, the model includes the terms 'protein,' 'carbohydrate,' the 'protein-by-carbohydrate' interaction, and 'Morph' as factors.
eslm <- lm(eggs~protein+carbohydrate+protein*carbohydrate+Morph, data=eggdataex)
# Compare the output of the following summary functions:
summary(eslm)
anova(eslm)
Anova(eslm, type="III")
drop1(eslm, ~., test="F")
# Now, using rsm. The first step is to "code" continuous factors so they are all on equivalent scales, from -1 to 1. This is important for ensuring that all factors contribute evenly to the shape of the response surface. If you examine the structure of this function, it should be clear how this results in appropriate restructuring. Note that x1 is the 'coded' version of 'protein,' and x2 is the coded version of 'carbohydrate.'
# Coding data for use with rsm:
ecd <- coded.data(eggdataex, x1~(protein-mean(eggdataex$protein))/(mean(eggdataex$protein)-min(eggdataex$protein)), x2~(carbohydrate-mean(eggdataex$carbohydrate))/(mean(eggdataex$carbohydrate)-min(eggdataex$carbohydrate)))
head(ecd)
#It's possible to use the rsm() function from the rsm package to generate the response surface. However, the overall format of the output may not be desirable (see below):
ers <- rsm(eggs~SO(x1, x2)+Morph, data=ecd)
summary(ers)
# However, you can obtain the same statistical output with lm:
egglm <- lm(eggs~x1+x2+x1*x2+I(x1^2)+I(x2^2)+Morph, data=ecd)
summary(egglm)
anova(egglm)
# But with a couple of important distinctions. The output from rsm() includes an estimate of the stationary point of the response surface, in the original units. It also includes both the linear regression table and an ANOVA table that tests the significance of including the first-order terms (FO), two-way interaction between terms (TW), and partial quadratic terms (PQ), as well as other effects (e.g., Morph in this case).
# This model still isn't quite what's desired for a full test of the "Morph" factor. Here is a slightly more complex model including two-way and three-way interactions between Morph, protein, and carbohydrate:
egglm2 <- lm(eggs~x1+x2+I(x1^2)+x1*x2+I(x2^2)+Morph*x1*x2+Morph+Morph*x1+Morph*x2, data=ecd)
summary(egglm2)
drop1(egglm2, ~., test="F")
# To test for the significance of "Morph," now generate a reduced model where the 'Morph' term is completely dropped. You can also use the 'summary()' and 'anova()' functions/etc. to view output of this step, if desired.
egglm3 <- lm(eggs~x1+x2+x1*x2+I(x1^2)+I(x2^2), data=ecd)
# Now, to test for the significance of morph (aka check the partial F-test):
anova(egglm2, egglm3)
# Here, the partial F-test is significant, indicating there's a Morph-by-diet interaction effect on lifetime egg production; it is best to retain the model containing the 'Morph' terms.
#################### RESPONSE SURFACE FIGURES #####################
library(fields)
# A response surface for just the LW crickets, since "Morph" was significant. To standardize the axes, first check the data range for the Z variable of interest (i.e. 'eggs'). Note that I have gone back to using the original data frame for this, because I want the axes to match my original units for protein and carbohydrate.
range(eggdataex$eggs)
# Often, you may wish to adjust the limits further, based on the distribution of the data (so the range doesn't extend from absolute max and min); it can be informative to check a histogram, too:
hist(eggdataex$eggs)
# Here, a scale from 0 to ~2000 may be appropriate
# Subset the data based on the factor of interest:
eggdataexLW <- subset(eggdataex, Morph=="LW")
# Use the Tps (thin-plate splines) function to estimate the response surface:
surf.te.LW <- Tps(cbind(eggdataexLW$protein, eggdataexLW$carbohydrate),eggdataexLW$eggs, lambda=0.01)
# You may generate several warnings, particularly if there's no peak present in the middle of your response surface. You can generally ignore these, given that your statistical analysis is not based off of the nonparametric thin-plate splines analysis.
# Produce the surface and then map contour lines onto it:
surf.te.outLW <- predictSurface(surf.te.LW)
image(surf.te.outLW, col=tim.colors(128), lwd=5, las=1, font.lab=2, cex.lab=1.3, mgp=c(2.7, 0.5, 0), font.axis=1, lab=c(5,5, 6), xlab=expression("Protein content"), ylab=expression("Carbohydrate content"), main="LW crickets - eggs produced over the first five weeks of adulthood", xlim=c(0,38), ylim=c(0,38), zlim=c(0,2000))
##### Decorative Options #####
# Add contour lines
contour(surf.te.outLW, lwd=2, labcex=1, add=T)
# Add in the treatment points
points(eggdataexLW$protein, eggdataexLW$carbohydrate, pch=21)
# Add in lines, e.g. a line for isocaloric intake:
abline(a=0, b=1, lty=1, lwd=1.5)
# Use par() to examine the default graphing parameters if you wish to make adjustments to anything in the image() argument.
eggdataex<-structure(list(Run = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("1", "2"), class = "factor"), Cage = structure(c(1L, 12L, 23L, 34L, 45L, 56L, 67L, 74L, 75L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 24L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 57L, 58L, 59L, 60L, 61L, 62L, 1L, 12L, 23L, 34L, 45L, 56L, 67L, 74L, 75L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 35L, 36L, 38L, 40L, 41L, 42L, 43L, 44L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 68L, 69L, 70L, 71L, 72L, 73L), .Label = c("1", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "2", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "3", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39", "4", "40", "41", "42", "43", "44", "45", "46", "47", "48", "49", "5", "50", "51", "52", "53", "54", "55", "56", "57", "58", "59", "6", "60", "61", "62", "63", "64", "65", "66", "67", "68", "69", "7", "71", "72", "73", "74", "75", "76", "8", "9"), class = "factor"), Morph = structure(c(2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("SW", "LW"), class = "factor"), protein = c(4, 23, 28.75, 16.25, 9.75, 28, 28, 18, 16.25, 13, 27, 13, 9, 14, 13, 8, 8, 28.75, 9, 23, 4, 9, 28.75, 9, 18, 9, 14, 23, 16.25, 8, 9.75, 16.25, 23, 4, 13, 27, 17.25, 17.25, 28, 9.75, 4, 18, 17.25, 13, 8, 27, 18, 27, 23, 28, 14, 4, 8, 9.75, 14, 9.75, 17.25, 17.25, 28.75, 14, 27, 16.25, 18, 4, 9, 28, 16.25, 8, 16.25, 27, 23, 28.75, 9, 8, 9, 28, 28.75, 28, 16.25, 18, 9.75, 16.25, 4, 14, 9.75, 18, 28, 13, 23, 27, 13, 27, 4, 8, 18, 27, 18, 9, 9.75, 17.25, 16.25, 27, 14, 18, 9.75, 28.75, 9, 17.25, 4, 28, 17.25, 8, 28.75, 13, 8, 4, 17.25, 9.75, 14, 23, 17.25, 28.75, 23, 23, 14, 8, 9.75, 14, 23, 28.75, 23, 23, 14, 28.75, 28), carbohydrate = c(17, 19, 23.75, 36.25, 21.75, 14, 14, 24, 36.25, 29, 36, 29, 12, 7, 29, 34, 34, 23.75, 12, 19, 17, 12, 23.75, 12, 24, 12, 7, 19, 36.25, 34, 21.75, 36.25, 19, 17, 29, 36, 14.25, 14.25, 14, 21.75, 17, 24, 14.25, 29, 34, 36, 24, 36, 19, 14, 7, 17, 34, 21.75, 7, 21.75, 14.25, 14.25, 23.75, 7, 36, 36.25, 24, 17, 12, 14, 36.25, 34, 36.25, 36, 19, 23.75, 12, 34, 12, 14, 23.75, 14, 36.25, 24, 21.75, 36.25, 17, 7, 21.75, 24, 14, 29, 19, 36, 29, 36, 17, 34, 24, 36, 24, 12, 21.75, 14.25, 36.25, 36, 7, 24, 21.75, 23.75, 12, 14.25, 17, 14, 14.25, 34, 23.75, 29, 34, 17, 14.25, 21.75, 7, 19, 14.25, 23.75, 19, 19, 7, 34, 21.75, 7, 19, 23.75, 19, 19, 7, 23.75, 14), eggs = c(427, 1567, 1024, 441, 473, 664, 193, 1566, 1222, 520, 1298, 1325, 979, 1458, 893, 220, 767, 1626, 1088, 1717, 536, 985, 418, 607, 1195, 668, 681, 957, 428, 5, 816, 404, 2046, 210, 106, 637, 1153, 1534, 656, 1005, 149, 1894, 1071, 1342, 42, 1413, 956, 565, 1370, 1104, 807, 204, 250, 347, 904, 337, 1339, 880, 1919, 952, 880, 1038, 239, 276, 522, 890, 555, 10, 1010, 823, 1100, 1732, 279, 120, 712, 1622, 1373, 2370, 598, 2594, 53, 1169, 178, 875, 137, 326, 1920, 684, 1040, 1397, 97, 637, 639, 218, 500, 1409, 506, 555, 731, 1454, 537, 1034, 397, 357, 685, 1985, 736, 243, 107, 952, 1375, 130, 979, 216, 255, 188, 688, 810, 668, 1319, 484, 1198, 971, 1090, 46, 630, 1339, 773, 1216, 1428, 954, 1841, 632, 1693, 1542)), .Names = c("Run", "Cage", "Morph", "protein", "carbohydrate", "eggs"), row.names = c(NA, -135L), class = "data.frame")