# SAMPLE DOL CALCULATION PROCESS IN R
# Note that this process is set up for a data file where the frequencies of different tasks (not behaviors) performed by a given individual are all on a single line. If you wanted to calculate the DoL for a set of different colonies, it's probably possible to create a loop to do so, but I haven't yet reached the point where I'm comfortable enough with the syntax for writing a loop for this situation. An alternative approach would be to use the script below iteratively for each colony or unit of interest.
# Set working directory
setwd("/Users/username/Documents/RebeccaClark/R Manuals/SampleScripts/DivisionOfLabor")
dir()
# Load any packages used
library(plyr)
# Load data
doldata<-read.table("ClarkDOLData.csv", header=TRUE, sep=",")
doldata
# It's often a good idea to specify what the column headings are, and what they mean, so when you come back in a month (or two years) you know what you're looking at.
# Columns: Ind, BC, T, G, FP, FOR
# Meaning: Individual ant, Brood Care, Trash/Debris Removal, Allogrooming, Food-Processing, Foraging
# Create a data frame containing only the numeric values (no other informatioin), then use the prop.table() function to "normalize" your counts (convert them into the proportion of observations of a given task performed by a given individual out of the total number of observations made across all tasks and individuals)
# Using subscripting to create the data frame:
doldata2<-data.frame(doldata[,2:6])
# Normalize counts
PiTx<-prop.table(doldata2)
# Check your work
PiTx
# Now calculate the proportion of the total observations that consist of a given task (across all of the individuals).
SumPt<-colSums(PiTx)
# This calculation is then also put into a matrix so it will match up for a subsequent step. You'll have to adjust the numbers of rows and columns to match up with your total number of individuals (rows) and tasks (columns). If you print your "prop.table()" data frame, the rows should be numbered.
PtTx<-matrix(SumPt, nrow=4, ncol=5, byrow=TRUE)
PtTx
# Calculate the proportion of times that, e.g., individual 1 was observed performing task 1, out of the total proportion of times that task one was performed.
PiPt<-data.frame(PiTx/PtTx)
PiPt
# Create a function to calculate the components of the Shannon Diversity Index (pi*log10(pi))
H<-function(x) x*log10(x)
# Double-checking it:
H(PiPt)
# Calculate components for calculating the Shannon diversity index (H(tasks))
# (Question for myself) Is this the joint probability/mutual entropy between individuals and tasks?
negpilogpi<-data.frame(-H(PiPt))
# Convert the NaN's to zeros. The NaN's won't affect the subsequent steps anyway, but it might be useful to know how to do this for other purposes.
negpilogpi[is.na(negpilogpi)]<-0
negpilogpi
# For each task, calculate the total H value
HiTx<-colSums(negpilogpi)
HiTx
# Calculate the proportion of total observations that were made on a given individual.
SumPi<-rowSums(PiTx)
SumPi
# Calculate the proportion of the total observations that were made on a given task.
SumPt<-colSums(PiTx)
SumPt
# Calculate the diversity index for the proportion of observations that were made on a given individual.
HSumPi<-(-H(SumPi))
HSumPi
# Total this value to get Hi
Hi<-sum(HSumPi)
Hi
# To obtain Ht, calculate the diversity index for the proportion of observations that were made on a given task.
HSumPt<-(-H(SumPt))
HSumPt
# Total this value to get Ht
Ht<-sum(HSumPt)
Ht
# To obtain Hit, multiply each HiTx value by its corresponding PtTx (but you'll have to recalculate PtTx as a single row), and add together
Hit<-(HiTx*SumPt)
sum(Hit)
# To obtain I, subtract Hit from Hi
I<-c(Hi-sum(Hit))
I
# To obtain DOLti, divide I by Hi
# NOTE: This will be sensitive to changes in the number of individuals, but insensitive to changes in the number of tasks.
DOLti<-c(I/Hi)
DOLti
# To obtain DOLit, divide I by Ht
# NOTE: This will be sensitive to changes in the number of tasks, but insensitive to changes in the number of individuals. Most likely, this is the index that will be most useful to you.
DOLit<-c(I/Ht)
DOLit