# Quantifying abdominal pigmentaiton # Omid Saleh Ziabari # Last updated: January 12th, 2017 # setting our working directory and loading our libraries setwd("~/local/path/to/folder") libs<-list("data.table","reshape2","plyr","stringr","ggplot2","dplyr") lapply(libs,require,character.only = TRUE) # DEINFE: filepath to profiles pathdir<-"~/path/to/profile/folderProfiles/" # ImageJ appends the profile name with "_profile.csv" so we specify to select only those files within the directory specified in "list.files" list.files<-list.files(pathdir) prof.files<-list.files[grep("profile",list.files)] # The function "reader" is used to take every indiviudal profile file name, separate factor id's from the file name, and return a data.frame of the profile data. reader<-function(profile){ path<-paste(pathdir,profile,sep="") # writes the directory path to variable "path" csv<-read.csv(file=path) # reads the "...profile.csv" and writes it to "csv" # following lines identify the identity factors of the sample id, the profile name includes the sample id seperated by "_', # so the lines below simply take the substring to match the particular sample id element session<-as.factor(gsub('\\_.*', '', profile)) # This needs to extract everything before the second underscore sample<-as.factor(gsub("^.*_ *(.*) *_T.*$", "\\1", profile)) # assumes sample id is letters and numbers, no special characters tergite<-as.factor(gsub("^.*_ *(.*) *_p.*$", "\\1", profile)) n<-nrow(csv) # gives us the number of rows in our profile data, "csv" data<-data.frame(csv,rep(session,n),rep(sample,n),rep(tergite,n)) # takes the factor id's extracted above, and repeats it for every row, creating our "data" of the profile colnames(data)<-c(colnames(csv),"Session","Sample","Tergite") return(data) } data<-ldply(prof.files,reader) # the function "reader" is then applied to every profile within our directory variable: "prof.files" # generic function to quickly combine column names to make an ID tag addPrimaryKey <-function(df, cols){ indivID<-apply(df[,cols], 1, function(x) paste(x, collapse="")) df<-cbind(df,indivID) return(df) } raw<-data.table(addPrimaryKey(data,3:ncol(data))) # add columns of factors together, from column 3 to end calib<-0.4616 # this variable is the spatial callibration (um/px) of the images taken, it's used to convert the pixel values to actual calibrated distance in um. When acquiring images, at that same magnification, we image a stage micrometer and obtain the resolution individual.list<-as.list(unique(raw$indivID)) # writes the unique values of "indivID", the specimen id, to a variable "individual.list" # the function "adjustments" converts pixel value to distance in um, and converts grayscale pixel value to "pigment value" adjustments<-function(individual){ # adjusting pixel intensity to pigmentation intensity (an inversion for an 8-bit image (0-255), where we take 255 - x, when x is the grayscale pixel value) this.fly<-raw[indivID==individual] this.fly$Value<-(255)-this.fly$Value # adjusting posterior:anterior orinetation this.fly$X<-(length(this.fly$X)+1) - this.fly$X # this reverses the x-axis # adjusting pixel unit to μm this.fly$X<-this.fly$X/calib # calib was defined globally, as measured with a stage micrometer setkey(this.fly,X) return(this.fly) } adj.data<-data.table(ldply(individual.list,adjustments)) #applies function "adjustments" to every element in "individual.list" # the function "spline.der.er" takes an individual and calculates the cubic spline, 1st-, and 2nd-derivatives spline.der.er<-function(x){ individual<-subset(adj.data,adj.data$indivID==x) spline<-smooth.spline(individual$Value,spar=.80) der1<-predict(spline,deriv=1) der2<-predict(spline,deriv=2) individual$spline.y<-spline$y individual$der1<-der1$y individual$der2<-der2$y return(individual) } data<-data.table(ldply(individual.list,spline.der.er)) # "spline.der.er" is applied for every indiviudal in "individual.list" # the function "coord" is the major function in extracting the coordinates of the profile spline curve, # using the 1st- and 2nd-derivatives to pinpoint major characteristics of the curve in order to obtain the metrics of pigmentation coord<-function(each){ # The first section of "coords" starts from the anterior end and checks every point along the 1st derivative of the spline for an the point, i, that has i-1<0 (negative number:"neg") and i>0 (positive number,"pos") indicating a zero ("neg.pos") neg.pos<-vector() for(i in seq(2,length(each$der1)-1)){ if(each$der1[i-1]<0 && each$der1[i]>0) neg.pos[i]<-i } neg.pos<-neg.pos[!is.na(neg.pos)] # this line removes all the NA's in "neg.pos" neg.pos.x<-each$X[neg.pos] # takes the indices from "neg.pos" and extracts the "each$X" distance value for that point # if "neg.pos" is empty (meaning no zeros were found), this second filter step checks the 2nd derivative, but instead reverses the order of the search from the lowest value to the anterior edge, checking for # the zero that satisfies i-1<0 (neg) and i>0 (pos), but again, the loop runs backwards. if(is.logical(neg.pos)) { least<-each[each$der2==min(each$der2),]$X*calib for(i in least:2){ if(each$der2[i-1]<0 && each$der2[i]>0) neg.pos[i]<-i } } neg.pos<-neg.pos[!is.na(neg.pos)] # removes NA's from "neg.pos" of the second filter neg.pos<-tail(neg.pos,1) # obtains the first instance (if you're coming from the posterior) or the last instance (if you're coming from the anterior) neg.pos.x<-each$X[neg.pos] # if still no such "neg.pos" zero is found, then it's flagged with an NA if(is.logical(neg.pos)) { neg.pos<-1 # so when we search for pos.neg in the next section, we defaul to 1 if nothing is found8 neg.pos.x<-NA } neg.pos.y<-each$spline.y[neg.pos] # neg.pos.y is the spline.y value (pigment value) at index "neg.pos" if(length(neg.pos.y)>1){ neg.pos.y<-NA } # note that the coordinate (neg.pos.x,neg.pos.y) gives us the pigment value at the anterior side of the tergite, we don't use the first point of the anterior edge because some flies have an additional pigment band # and so, this ensures that the point used to compare pigment values is the "lightest" anterior section # This next section of "coords" is much simpler, identifying the posterior edge (very distinct) pos.neg<-vector() for(i in seq(neg.pos,length(each$der1)-1)){ # we start our search from the neg.pos to save time if(each$der1[i-1]>0 && each$der1[i]<0) # trying to find the zero that satisfies i-1>0 ("pos") and i<0 ("neg") to find the index "pog.neg" pos.neg[i]<-i } pos.neg<-pos.neg[!is.na(pos.neg)] pos.neg<-pos.neg[1] pos.neg.x<-each$X[pos.neg] # there is an implicit assumption by made by the macro that if there is no distinct zero of the 1st derivative, it is because the tergites were really close together and there was no intermembrane to cause the hard contrast # in which case the user selects the posterior edge as closely as they can, and thus below the loop takes the last indice to correspond to the posterior edge. if(is.logical(pos.neg)) { pos.neg<-length(each$X)-1 pos.neg.x<-tail(each$X,1) } pos.neg.y<-each$spline.y[pos.neg] # if pos.neg.y is < 1 ,meaning it hasn't found any indicies for whatever reason, it is flagged as NA if(length(pos.neg.y)>1){ pos.neg.y<-NA } # the last section of "coords" identifies the inflection point of the spline immediately anterior to the posterior edge to extract the index of the gradient band ending. grad.band<-vector() for(i in seq(pos.neg,neg.pos)){ # the loop starts at the pos.neg index to the neg.pos index, working backwards if(each$der2[i]>0 && each$der2[i+1]<0) # the inflection point of the spline corresponds to the zero of the second derivative, unlike the identification of the other zeros, because we're counting down # for the loop, and coming in anatomically reversed, if we following along the loop, we're looking for a positive-negative zero. But depending on your reference frame, it can differ. grad.band[i]<-i } grad.band<-grad.band[!is.na(grad.band)] # removing teh NAs from the grad.band grad.band<-tail(grad.band,1) grad.band<-each$X[grad.band] if(is.logical(grad.band)) { grad.band<-NA # flagging any errors with NA } # "coord" returns "coord" which is a vector of the indices coord<-c(neg.pos.x,pos.neg.x,grad.band,neg.pos.y,pos.neg.y) return(coord) } # this function is an intermediate into looping our "coords" function over our entire dataset, and uses the local functional environment to allocate variable space assembly.coords<-function(individual.list.uno){ individual<-subset(data,data$indivID==unlist(individual.list.uno)) # print(unique(individual$indivID)) splined<-individual$spline.y deriv<-individual$der1 deriv2<-individual$der2 coordinate<-coord(individual) return(coordinate) } dataset<-lapply(individual.list,assembly.coords) # this returns a list of vectors of the indices from each "coord" returned # The "chek" function is a casual plotting function that randomly samples along the specimens using unique(indiv) and returns a ggplot that combines the spline, 1st-, and 2nd derivative, and also draws intercept lines at the # vector returned from "coord", helps in visualizing the data; note that it uses the multiplot function from here: http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/ chek<-function(dat){ guess<-sample(unique(dat$indivID),1) doo<-subset(dat,dat$indivID==guess) a<-ggplot(doo,aes(X,spline.y))+geom_point()+geom_vline(xintercept = coord(doo)[1:3]) b<-ggplot(doo,aes(X,der1))+geom_point()+geom_vline(xintercept = coord(doo)[1:3])+geom_hline(yintercept = 0) c<-ggplot(doo,aes(X,der2))+geom_point()+geom_vline(xintercept = coord(doo)[1:3])+geom_hline(yintercept = 0) multiplot(a,b,c) print(guess) } # This last block of code combines the id factors from data, the "tergaldata" which is a structured organization of the dataset from above, and names the columns accordingly. index<-grep("spline.y",colnames(data))-1 # select factor columns df<-unique(data[,3:index,with=F]) df$indivID <- paste(df$Sample, df$Tergite, sep='-') # gets rid of the session component of indivID to allow duplicated control images to be identified rownames(df)<-NULL tergaldata<-ldply(dataset,function(x) data.table(matrix(x,ncol=5))) colnames(tergaldata)<-c("neg.pos.x","pos.neg.x","grad.band","neg.pos.y","pos.neg.y") metrics<-cbind(df,tergaldata) # "metrics" is the final data variable where we calculate the aspects of pigmentation and trait values metrics$pigment.intensity<-metrics$pos.neg.y-metrics$neg.pos.y metrics$W.band<-metrics$pos.neg.x-metrics$grad.band metrics$W.tergite<-metrics$pos.neg.x # by definition colnames(metrics)<-c("Session","Sample","Tergite","id","ant.edge.ish","pos.edge","grad.band","Pmin","Pmax","pigment.intensity","W.band","W.tergite") metrics<-metrics[,c("Session","Sample","Tergite","id","Pmax","Pmin","W.band","W.tergite"),with=F] # subsetting relevant data, excluding factor id's like replicates # This next section is the "daisy chain" correction correction<-function(dataframe){ dt<-dataframe dt$corr.Pmax<-dt$Pmax dt$corr.duplicate<-rep("0",nrow(dt)) dt$corr.Pmin<-dt$Pmin # this line orders sessions directly session.order<-sort(as.character(unique(dt$Session))) for(i in 2:length(session.order)){ current.session<-dt[Session==session.order[i]] previous.session<-dt[Session==session.order[i-1]] # omitted looksie-back because the dt$corr.Pmax outside of the loop above has taken care of the first session of the chain overlap.specimens<-intersect(current.session$id,previous.session$id) pmax.si<-mean(subset(current.session,id %in% overlap.specimens)$corr.Pmax) pmax.si_1<-mean(subset(previous.session,id %in% overlap.specimens)$corr.Pmax) pmin.si<-mean(subset(current.session,id %in% overlap.specimens)$corr.Pmin) pmin.si_1<-mean(subset(previous.session,id %in% overlap.specimens)$corr.Pmin) pmax.diff<-pmax.si_1-pmax.si pmin.diff<-pmin.si_1-pmin.si print(pmax.diff) # as an ex it works dt[Session==session.order[i]]$corr.Pmax<-current.session$corr.Pmax+pmax.diff dt[Session==session.order[i]]$corr.Pmin<-current.session$corr.Pmin+pmin.diff # marking overlap specimens and the corresponding sessions theyre from, for now debug dt[id %in% overlap.specimens]$corr.duplicate<-rep(paste(LETTERS[i-1],session.order[i-1],"|",session.order[i],sep=""),nrow(dt[id %in% overlap.specimens][Session==session.order[i]])) } dt$corr.pigment.intensity<-dt$corr.Pmin-dt$corr.Pmax return(dt) } # subset out tergites and apply the correction. metrics.t3<-metrics[Tergite=="T3"] metrics.t4<-metrics[Tergite=="T4"] corr.metrics<-as.data.table(rbind(correction(metrics.t3),correction(metrics.t4))) corr.metrics<-corr.metrics[order(Session),] corr.metrics<-corr.metrics[!duplicated(corr.metrics$id),] # removes the duplicated tergites