library(ggplot2) library(dplyr) data<-read.csv(file = "D:/E Drive/heritability_collective/Paper data/speed_gen_4.csv", header = TRUE) data$cell_line<-as.character(data$cell_line) data$outcome<-as.character(data$outcome) data$family<-as.character(data$family) data$TID<-as.character(data$TID) a<-unique(data$family) test = array( data = NA, dim = c(0,(ncol(data)+1))) for ( i in a){ a_1<-data[data$family == i,] a_2<-unique(a_1$TID) for ( j in a_2){ a_3<-a_1[a_1$TID == j,] a_4<-head(a_3, n=1) a_5<-tail(a_3, n=1) a_6<-rbind(a_4,a_5) test = rbind(test, a_6) } } test<-as.data.frame(test) test1 = array(data = NA, dim = c(0,21)) b<-unique(test$family) for ( i in b){ b_1<-test[test$family == i,] for ( j in seq(2, nrow(b_1), 2)){ if( b_1$parent[j] != 0){ b_2<-b_1$cell_line[j] b_3<-b_1$media[j] b_4<-b_1$family[j] b_5<-b_1[b_1$TID == b_1$parent[j],]$generation[2] b_6<-b_1[b_1$TID == b_1$parent[j],]$TID[2] b_7<-b_1[b_1$TID == b_1$parent[j],]$length[2] b_8<-b_1[b_1$TID == b_1$parent[j],]$d2s[2] b_9<-b_8/b_7 b_10a<-b_1[(b_1$TID == b_1$parent[j]),]$time b_10<-(b_10a[2] - b_10a[1])/3600 b_11<-b_7/b_10 b_12<-b_8/b_10 b_13<-b_1[b_1$TID == b_1$parent[j],]$outcome[2] b_14<-b_1$generation[j] b_15<-b_1$TID[j] b_16<-b_1$length[j] b_17<-b_1$d2s[j] b_18<-b_17/b_16 b_19a<-b_1$time[(j-1)] b_19b<-b_1$time[j] b_19<-(b_19b-b_19a)/3600 b_20<-b_16/b_19 b_21<-b_17/b_19 b_22<-b_1$outcome[j] b_23<-cbind(b_2, b_3, b_4, b_5, b_6, b_7, b_8, b_9, b_10, b_11, b_12,b_13,b_14,b_15,b_16,b_17,b_18,b_19,b_20,b_21,b_22) test1=rbind(test1,b_23) } } } test1<-as.data.frame(test1) colnames(test1)<-c("cell_line","media","family", "parent_gen","parent_tid","parent_total_dist","parent_net_dist", "parent_confine_ratio","parent_total_time","parent_curvlinear_speed","parent_straight_line_speed", "parent_outcome","offspring_gen","offspring_tid","offspring_total_dist","offspring_net_dist", "offspring_confine_ratio","offspring_total_time","offspring_curvlinear_speed", "offspring_straight_line_speed","offspring_outcome") test1[]<-lapply(test1, as.character) test1$parent_total_dist<-as.numeric(test1$parent_total_dist) test1$parent_net_dist<-as.numeric(test1$parent_net_dist) test1$parent_confine_ratio<-as.numeric(test1$parent_confine_ratio) test1$parent_total_time<-as.numeric(test1$parent_total_time) test1$parent_straight_line_speed<-as.numeric(test1$parent_straight_line_speed) test1$parent_curvlinear_speed<-as.numeric(test1$parent_curvlinear_speed) test1$offspring_total_dist<-as.numeric(test1$offspring_total_dist) test1$offspring_net_dist<-as.numeric(test1$offspring_net_dist) test1$offspring_confine_ratio<-as.numeric(test1$offspring_confine_ratio) test1$offspring_total_time<-as.numeric(test1$offspring_total_time) test1$offspring_straight_line_speed<-as.numeric(test1$offspring_straight_line_speed) test1$offspring_curvlinear_speed<-as.numeric(test1$offspring_curvlinear_speed) check<-test1[test1$parent_outcome == "mos",] test1<-test1[(test1$family != 1065) & ( test1$family != 2429),] test2<-test1[(test1$offspring_outcome != "") & (test1$offspring_outcome != "mos") & (test1$offspring_outcome != "d into 3") & (test1$offspring_outcome != "died"),] ggplot(test2[test2$offspring_gen == 2,], aes(offspring_total_time, colour = cell_line))+geom_histogram(binwidth = 1)+ facet_wrap(~cell_line, scales="free")+labs(x = "Offspring Total Time") + ggtitle("Generation 2: Total Time Histogram")+scale_colour_discrete(name="Cell Line") ggplot(test2[test2$offspring_gen == 3,], aes(offspring_total_time, colour = cell_line))+geom_histogram(binwidth = 1)+ facet_wrap(~cell_line, scales="free")+labs(x = "Offspring Total Time") + ggtitle("Generation 3: Total Time Histogram")+scale_colour_discrete(name="Cell Line") a<-unique(test2$cell_line) for ( i in a){ print(i) b<-t.test(test2[(test2$cell_line == i) & (test2$offspring_gen == 2),]$offspring_total_time~test2[(test2$cell_line == i) & (test2$offspring_gen == 2),]$media) print(b) } a<-unique(test2$cell_line) for ( i in a){ print(i) b<-t.test(test2[(test2$cell_line == i) & (test2$offspring_gen == 3),]$offspring_total_time~test2[(test2$cell_line == i) & (test2$offspring_gen == 3),]$media) print(b) } ggplot(test2[(test2$offspring_gen == 2) & ( test2$media == 0.5),], aes(offspring_total_time, colour = cell_line))+geom_histogram(binwidth = 1)+ facet_wrap(~cell_line, scales="free")+labs(x = "Offspring Curvlinear Speed") + ggtitle("Generation 2: Curvlinear Speed Histogram : Media 0.5")+scale_colour_discrete(name="Cell Line") ggplot(test2[(test2$offspring_gen == 2) & ( test2$media == 10),], aes(offspring_total_time, colour = cell_line))+geom_histogram(binwidth = 1)+ facet_wrap(~cell_line, scales="free")+labs(x = "Offspring Curvlinear Speed") + ggtitle("Generation 2: Curvlinear Speed Histogram")+scale_colour_discrete(name="Cell Line") gen2_3<-test2[test2$parent_gen == 2,] fam_size<-count(gen2_3, family, parent_tid) one_fam<-fam_size[fam_size$n == 1,] non_one_fam<-fam_size[fam_size$n != 1,] gen2_3_one<-array(data =NA, dim = c(0, ncol(gen2_3))) for ( i in 1:nrow(one_fam)){ a<-one_fam$family[i] b<-one_fam$parent_tid[i] c<-gen2_3[(gen2_3$family == a) & (gen2_3$parent_tid == b),] gen2_3_one = rbind(gen2_3_one, c) } gen2_3_both<-array(data = NA, dim = c(0,ncol(gen2_3))) for ( i in 1:nrow(non_one_fam)){ a<-non_one_fam$family[i] b<-non_one_fam$parent_tid[i] c<-gen2_3[(gen2_3$family == a) & (gen2_3$parent_tid == b),] gen2_3_both = rbind(gen2_3_both, c) } gen2_3_both$offspring_avg_time_taken<-NA for ( i in seq(1,nrow(gen2_3_both),by=2)){ c_1<-gen2_3_both$offspring_total_time[i] c_2<-gen2_3_both$offspring_total_time[i+1] c_3<-(c_1 + c_2)/2 gen2_3_both$offspring_avg_time_taken[i]<-c_3 } gen2_3_one$offspring_avg_time_taken<-gen2_3_one$offspring_total_time gen2_3<-rbind(gen2_3_both, gen2_3_one) gen2_3<-gen2_3[gen2_3$family != 614,] gen2_3_0.5<-gen2_3[gen2_3$media == 0.5,] gen2_3_10<-gen2_3[gen2_3$media == 10,] rownames(gen2_3_0.5)<-1:nrow(gen2_3_0.5) rownames(gen2_3_10)<-1:nrow(gen2_3_10) ggplot(gen2_3_0.5, aes(x = parent_total_time, y = offspring_avg_time_taken, colour = cell_line))+geom_point()+ geom_smooth(aes(parent_total_time,offspring_avg_time_taken, colour = cell_line), method=lm, se=FALSE)+ facet_wrap(~cell_line, scales="free") + labs(x = "Parent Time Taken", y = "Offspring Time Taken")+ scale_colour_discrete(name="Cell Line")+ ggtitle("Generation 2-3: Parent - Offspring : Media = 0.5 ") ggplot(gen2_3_10, aes(x = parent_total_time, y = offspring_avg_time_taken, colour = cell_line))+geom_point()+ geom_smooth(aes(parent_total_time,offspring_avg_time_taken, colour = cell_line), method=lm, se=FALSE)+ facet_wrap(~cell_line, scales="free") + labs(x = "Parent Time Taken", y = "Offspring Time Taken")+ scale_colour_discrete(name="Cell Line")+ ggtitle("Generation 2-3: Parent - Offspring : Media = 10 ") a<-unique(gen2_3_0.5$cell_line) a<-a[a != "MDA"] for ( i in a){ c<-lm(offspring_avg_time_taken ~ parent_total_time, data = gen2_3_0.5[gen2_3_0.5$cell_line == i,]) d<-cooks.distance(c) plot(d) title(main=i) abline(h=0.5) text(x= 1:length(d)+1, y=d,labels = ifelse(d>4*mean(d,na.rm=TRUE),names(d),"")) } influ<-c(596) gen2_3_0.5[influ,] fam<-c(753) gen2_3_0.5<-gen2_3_0.5[!(gen2_3_0.5$family %in% fam),] pa1<-ggplot(gen2_3_0.5, aes(x = parent_total_time, y = offspring_avg_time_taken, colour = cell_line))+geom_point()+ geom_smooth(aes(parent_total_time,offspring_avg_time_taken, colour = cell_line), method=lm, se=FALSE)+ facet_wrap(~cell_line, scales="free") + labs(x = "Parent Time Taken", y = "Offspring Time Taken")+ scale_colour_discrete(name="Cell Line")+ ggtitle("Generation 2-3: Parent - Offspring : Media = 0.5 ") a<-unique(gen2_3_10$cell_line) for ( i in a){ c<-lm(offspring_avg_time_taken ~ parent_total_time, data = gen2_3_10[gen2_3_10$cell_line == i,]) d<-cooks.distance(c) plot(d) title(main=i) abline(h=0.5) text(x= 1:length(d)+1, y=d,labels = ifelse(d>4*mean(d,na.rm=TRUE),names(d),"")) } influ<-c(49) gen2_3_10[influ,] fam<-c(1175) gen2_3_10<-gen2_3_10[!(gen2_3_10$family %in% fam),] pa2<-ggplot(gen2_3_10, aes(x = parent_total_time, y = offspring_avg_time_taken, colour = cell_line))+geom_point()+ geom_smooth(aes(parent_total_time,offspring_avg_time_taken, colour = cell_line), method=lm, se=FALSE)+ facet_wrap(~cell_line, scales="free") + labs(x = "Parent Time Taken", y = "Offspring Time Taken")+ scale_colour_discrete(name="Cell Line")+ ggtitle("Generation 2-3: Parent - Offspring : Media = 10 ") hela<-gen2_3_10_na[gen2_3_10_na$cell_line == "HeLa",] model1<-lm(offspring_avg_time_taken~parent_total_time, data =hela) temp_var<-predict(model1, interval="prediction") new_df<-cbind(hela,temp_var) ggplot(new_df,aes(x=parent_total_time,y=offspring_avg_time_taken))+ geom_point()+ geom_line(aes(y=lwr), color="red", linetype = "dashed")+ geom_line(aes(y=upr), color="red", linetype = "dashed")+ geom_smooth(method=lm, se=TRUE) summary(model1) gen2_3_0.5_na<-gen2_3_0.5[-which(is.na(gen2_3_0.5$offspring_avg_time_taken)),] gen2_3_10_na<-gen2_3_10[-which(is.na(gen2_3_10$offspring_avg_time_taken)),] a<-unique(gen2_3_0.5$cell_line) a<-a[a != "MDA"] for ( i in a ){ c<-lm(offspring_avg_time_taken ~ parent_total_time, data = gen2_3_0.5_na[gen2_3_0.5_na$cell_line == i,]) c1<-resid(c) c2<-i c3<-plot(gen2_3_0.5_na[gen2_3_0.5_na$cell_line == i,]$parent_total_time, c1, ylab = "Residuals", xlab = "Parent Total Time", main = "Parent Offspring Generation 2 - 3: Media 0.5") c4<-plot(c, main = i) } a<-unique(gen2_3_10_na$cell_line) for ( i in a ){ c<-lm(offspring_avg_time_taken ~ parent_total_time, data = gen2_3_10_na[gen2_3_10_na$cell_line == i,]) c1<-resid(c) c2<-i c3<-plot(gen2_3_10_na[gen2_3_10_na$cell_line == i,]$parent_total_time, c1, ylab = "Residuals", xlab = "Parent Total Time", main = "Parent Offspring Generation 2 - 3: Media 10") c4<-plot(c, main = i) } output<-array(data = NA, c(0,8)) a<-unique(gen2_3$cell_line) for ( i in a){ c<-lm(offspring_avg_time_taken ~ parent_total_time, data = gen2_3_0.5[gen2_3_0.5$cell_line == i,]) d<-summary(c) d1<-(d$df[2] + 2) d2<-d$adj.r.squared d3<-d$coefficients[2] d31<-d$coefficients[8] d32<-d$coefficients[4]*(d1^0.5) d4<-cbind(d1,d2,d3,d31,d32,i,0.5,"Generation 2-3: Parent - Offspring") output = rbind(output, d4) e<-resid(c) f<-gen2_3_0.5[gen2_3_0.5$cell_line == i,]$parent_total_time[seq(1,2*length(e),2)] g<-i #plot(f,e, main=i) } a<-unique(gen2_3$cell_line) for ( i in a){ c<-lm(offspring_avg_time_taken ~ parent_total_time, data = gen2_3_10[gen2_3_10$cell_line == i,]) d<-summary(c) d1<-(d$df[2] + 2) d2<-d$adj.r.squared d3<-d$coefficients[2] d31<-d$coefficients[8] d32<-d$coefficients[4]*(d1^0.5) d4<-cbind(d1,d2,d3,d31,d32,i,10, "Generation 2-3: Parent - Offspring") output = rbind(output, d4) e<-resid(c) f<-gen2_3_10[gen2_3_10$cell_line == i,]$parent_total_time[seq(1,2*length(e),2)] g<-i #plot(f,e,main=i) } gen2<-test[(test$generation == 2) & (test$outcome != "") & (test$outcome !="mos") & (test$outcome != "d into 3") & (test$outcome != "died"),] gen3<-test[(test$generation == 3) & (test$outcome != "") & (test$outcome !="mos") & (test$outcome != "d into 3") & (test$outcome != "died"),] gen2$time_taken<-NA for( i in seq(1,nrow(gen2),2)){ a<-gen2$time[i]/3600 b<-gen2$time[(i+1)]/3600 d<-(b-a) gen2$time_taken[(i+1)]<-d } gen3$time_taken<-NA for( i in seq(1,nrow(gen3),2)){ a<-gen3$time[i]/3600 b<-gen3$time[(i+1)]/3600 d<-(b-a) gen3$time_taken[(i+1)]<-d } gen2_na<-gen2[-which(is.na(gen2$time_taken)),] gen3_na<-gen3[-which(is.na(gen3$time_taken)),] fam_size2<-count(gen2_na, family) one_fam2<-fam_size2[fam_size2$n > 1,] fam_size3<-count(gen3_na, family, parent) one_fam3<-fam_size3[fam_size3$n >1,] gen2_both<-gen2_na[gen2_na$family %in% one_fam2$family,] gen3_both<-array(data = NA, dim = c(0,ncol(gen3))) for ( i in 1:nrow(one_fam3)){ a<-one_fam3$family[i] b<-one_fam3$parent[i] c<-gen3_na[(gen3_na$family == a) & (gen3_na$parent == b),] gen3_both = rbind(gen3_both, c) } gen2_3_both_ss<-rbind(gen2_both, gen3_both) gen2_3_ss<-array(data = NA, dim=c(0,6)) for ( i in seq(1,nrow(gen2_3_both_ss),2)){ a<-gen2_3_both_ss$cell_line[i] b<-gen2_3_both_ss$media[i] c<-gen2_3_both_ss$family[i] d<-gen2_3_both_ss$generation[i] e<-gen2_3_both_ss$time_taken[i] f<-gen2_3_both_ss$time_taken[(i+1)] g<-cbind(a,b,c,d,e,f) gen2_3_ss<-rbind(gen2_3_ss, g) } gen2_3_ss<-as.data.frame(gen2_3_ss) colnames(gen2_3_ss)<-c("cell_line","media","family","generation","sister1","sister2") gen2_3_ss[]<-lapply(gen2_3_ss, as.character) gen2_3_ss$sister1<-as.numeric(gen2_3_ss$sister1) gen2_3_ss$sister2<-as.numeric(gen2_3_ss$sister2) gen2_3_ss<-gen2_3_ss[gen2_3_ss$family != 614,] gen2_3_ss_0.5<-gen2_3_ss[gen2_3_ss$media == 0.5,] gen2_3_ss_10<-gen2_3_ss[gen2_3_ss$media == 10,] rownames(gen2_3_ss_0.5)<-1:nrow(gen2_3_ss_0.5) rownames(gen2_3_ss_10)<-1:nrow(gen2_3_ss_10) ggplot(gen2_3_ss_0.5, aes(x = sister1, y = sister2, colour = cell_line))+geom_point()+ geom_smooth(aes(sister1,sister2, colour = cell_line), method=lm, se=FALSE)+ facet_wrap(~cell_line, scales="free") + labs(x = "sister1", y = "sister2")+ scale_colour_discrete(name="Cell Line")+ ggtitle("Generation 2 & 3: sister - sister : Media = 0.5 ") ggplot(gen2_3_ss_10, aes(x = sister1, y = sister2, colour = cell_line))+geom_point()+ geom_smooth(aes(sister1,sister2, colour = cell_line), method=lm, se=FALSE)+ facet_wrap(~cell_line, scales="free") + labs(x = "sister1", y = "sister2")+ scale_colour_discrete(name="Cell Line")+ ggtitle("Generation 2 & 3: sister - sister : Media = 10 ") a<-unique(gen2_3_ss_0.5$cell_line) a<-a[a != "MDA"] for ( i in a){ c<-lm(sister1 ~ sister2, data = gen2_3_ss_0.5[gen2_3_ss_0.5$cell_line == i,]) d<-cooks.distance(c) plot(d) title(main=i) abline(h=0.5) text(x= 1:length(d)+1, y=d,labels = ifelse(d>4*mean(d,na.rm=TRUE),names(d),"")) } influ<-c(54,252) gen2_3_ss_0.5[influ,] fam<-c(749,152) gen2_3_ss_0.5<-gen2_3_ss_0.5[!(gen2_3_ss_0.5$family %in% fam),] pa3<-ggplot(gen2_3_ss_0.5, aes(x = sister1, y = sister2, colour = cell_line))+geom_point()+ geom_smooth(aes(sister2,sister1, colour = cell_line), method=lm, se=FALSE)+ facet_wrap(~cell_line, scales="free") + labs(x = "sister1", y = "sister2")+ scale_colour_discrete(name="Cell Line")+ ggtitle("Generation 2 & 3: sister - sister : Media = 0.5 ") a<-unique(gen2_3_ss_10$cell_line) for ( i in a){ c<-lm(sister1 ~ sister2, data = gen2_3_ss_10[gen2_3_ss_10$cell_line == i,]) d<-cooks.distance(c) plot(d) title(main=i) abline(h=0.5) text(x= 1:length(d)+1, y=d,labels = ifelse(d>4*mean(d,na.rm=TRUE),names(d),"")) } influ1<-c(108,222,3,304,215) gen2_3_ss[influ1,] fam1<-c(541,950,10,1200,942) gen2_3_ss_10<-gen2_3_ss_10[!(gen2_3_ss_10$family %in% fam1),] pa4<-ggplot(gen2_3_ss_10, aes(x = sister1, y = sister2, colour = cell_line))+geom_point()+ geom_smooth(aes(sister1,sister2, colour = cell_line), method=lm, se=FALSE)+ facet_wrap(~cell_line, scales="free") + labs(x = "sister1", y = "sister2")+ scale_colour_discrete(name="Cell Line")+ ggtitle("Generation 2 & 3: sister - sister : Media = 10 ") a<-unique(gen2_3_ss_0.5$cell_line) a<-a[a != "MDA"] for ( i in a ){ c<-lm(sister1 ~ sister2, data = gen2_3_ss_0.5[gen2_3_ss_0.5$cell_line == i,]) c1<-resid(c) c2<-i c3<-plot(gen2_3_ss_0.5[gen2_3_ss_0.5$cell_line == i,]$sister2, c1, ylab = "Residuals", xlab = "sister 1", main = "sister sister Generation 2 & 3: Media 0.5") c4<-plot(c, main = i) } a<-unique(gen2_3_ss_10$cell_line) for ( i in a ){ c<-lm(sister1 ~ sister2, data = gen2_3_ss_10[gen2_3_ss_10$cell_line == i,]) c1<-resid(c) c2<-i c3<-plot(gen2_3_ss_10[gen2_3_ss_10$cell_line == i,]$sister2, c1, ylab = "Residuals", xlab = "sister 1", main = "sister sister Generation 2 & 3: Media 10") c4<-plot(c, main = i) } a<-unique(gen2_3_ss$cell_line) for ( i in a){ c<-lm(sister1 ~ sister2, data = gen2_3_ss_0.5[gen2_3_ss_0.5$cell_line == i,]) d<-summary(c) d1<-(d$df[2] + 2) d2<-d$adj.r.squared d3<-d$coefficients[2] d31<-d$coefficients[8] d32<-d$coefficients[4]*(d1^0.5) d4<-cbind(d1,d2,d3,d31,d32,i,0.5,"Generation 2 & 3: sister - sister") output = rbind(output, d4) e<-resid(c) f<-gen2_3_ss_0.5[gen2_3_ss_0.5$cell_line == i,]$sister2[seq(1,2*length(e),2)] g<-i #plot(f,e, main=i) } a<-unique(gen2_3_ss$cell_line) for ( i in a){ c<-lm(sister1 ~ sister2, data = gen2_3_ss_10[gen2_3_ss_10$cell_line == i,]) d<-summary(c) d1<-(d$df[2] + 2) d2<-d$adj.r.squared d3<-d$coefficients[2] d31<-d$coefficients[8] d32<-d$coefficients[4]*(d1^0.5) d4<-cbind(d1,d2,d3,d31,d32,i,10, "Generation 2 & 3: sister - sister") output = rbind(output, d4) e<-resid(c) f<-gen2_3_ss_10[gen2_3_ss_10$cell_line == i,]$sister2[seq(1,2*length(e),2)] g<-i #plot(f,e,main=i) } gen3_na<-gen3[-which(is.na(gen3$time_taken)),] fam_one<-count(gen3_na,family) fam_no_one<-fam_one[fam_one$n > 1,] fam_two<-fam_one[fam_one$n == 2,] fam_two_par<-count(gen3_na,family,parent) fam2<-fam_two_par[fam_two_par$family %in% fam_two$family,] fam22<-fam2[fam2$n ==2,] # WE DON'T WANT THESE VALUES fam_use<-fam_no_one[!(fam_no_one$family %in% fam22$family),] gen3_use<-gen3_na[gen3_na$family %in% fam_use$family,] gen3_cc<-array(data = NA , c(0,5)) a<-unique(gen3_use$family) for ( i in a){ b<-gen3_use[gen3_use$family == i,] c<-nrow(b) if ( c == 2){ d1<-b$cell_line[1] d2<-b$media[1] d3<-b$time_taken[1] d4<-b$time_taken[2] d5<-b$family[1] d6<-cbind(d1,d2,d3,d4,d5) gen3_cc = rbind(gen3_cc,d6) } if ( c == 4){ d1<-b$cell_line[1] d2<-b$media[1] d31<-b$time_taken[1] d32<-b$time_taken[2] d33<-b$time_taken[3] d34<-b$time_taken[4] d3<-(d31+d32)/2 d4<-(d33+d34)/2 d5<-b$family[1] d6<-cbind(d1,d2,d3,d4,d5) gen3_cc = rbind(gen3_cc,d6) } if ( c == 3){ e<-unique(b$parent) e1<-b[b$parent == e[1],] e2<-b[b$parent == e[2],] if ( nrow(e1) > nrow(e2)){ d1<-e1$cell_line[1] d2<-e1$media[1] d31<-e1$time_taken[1] d32<-e1$time_taken[2] d3<-(d31 + d32)/2 d4<-e2$time_taken[1] d5<-b$family[1] d6<-cbind(d1,d2,d3,d4,d5) gen3_cc = rbind(gen3_cc,d6) } if ( nrow(e1) < nrow(e2)){ d1<-e1$cell_line[1] d2<-e1$media[1] d3<-e1$time_taken[1] d41<-e2$time_taken[1] d42<-e2$time_taken[2] d4<-(d41 + d42)/2 d5<-b$family[1] d6<-cbind(d1,d2,d3,d4,d5) gen3_cc = rbind(gen3_cc,d6) } } } gen3_cc<-as.data.frame(gen3_cc) colnames(gen3_cc)<-c("cell_line","media","cousin1","cousin2","family") gen3_cc[]<-lapply(gen3_cc, as.character) gen3_cc$cousin1<-as.numeric(gen3_cc$cousin1) gen3_cc$cousin2<-as.numeric(gen3_cc$cousin2) gen3_cc<-gen3_cc[gen3_cc$family != 614,] gen3_cc_0.5<-gen3_cc[gen3_cc$media == 0.5,] gen3_cc_10<-gen3_cc[gen3_cc$media == 10,] rownames(gen3_cc_0.5)<-1:nrow(gen3_cc_0.5) rownames(gen3_cc_10)<-1:nrow(gen3_cc_10) ggplot(gen3_cc_0.5, aes(x = cousin1, y = cousin2, colour = cell_line))+geom_point()+ geom_smooth(aes(cousin1,cousin2, colour = cell_line), method=lm, se=FALSE)+ facet_wrap(~cell_line, scales="free") + labs(x = "Cousin 1 Speed", y = "Cousin 2 Speed")+ scale_colour_discrete(name="Cell Line")+ ggtitle("Generation 3: Cousin - Cousin : Media = 0.5 ") ggplot(gen3_cc_10, aes(x = cousin1, y = cousin2, colour = cell_line))+geom_point()+ geom_smooth(aes(cousin1,cousin2, colour = cell_line), method=lm, se=FALSE)+ facet_wrap(~cell_line, scales="free") + labs(x = "Cousin 1 Speed", y = "Cousin 2 Speed")+ scale_colour_discrete(name="Cell Line")+ ggtitle("Generation 3: Cousin - Cousin : Media = 10 ") a<-unique(gen3_cc_0.5$cell_line) a<-a[a != "MDA"] for ( i in a){ c<-lm(cousin1 ~ cousin2, data = gen3_cc_0.5[gen3_cc_0.5$cell_line == i,]) d<-cooks.distance(c) plot(d) title(main=i) abline(h=0.5) text(x= 1:length(d)+1, y=d,labels = ifelse(d>4*mean(d,na.rm=TRUE),names(d),"")) } influ<-c(19,67) gen3_cc_0.5[influ,] fam<-c(198,1011) gen3_cc_0.5<-gen3_cc_0.5[!(gen3_cc_0.5$family %in% fam),] pa5<-ggplot(gen3_cc_0.5, aes(x = cousin1, y = cousin2, colour = cell_line))+geom_point()+ geom_smooth(aes(cousin2,cousin1, colour = cell_line), method=lm, se=FALSE)+ facet_wrap(~cell_line, scales="free") + labs(x = "cousin1", y = "cousin2")+ scale_colour_discrete(name="Cell Line")+ ggtitle("Generation 2 & 3: cousin - cousin : Media = 0.5 ") a<-unique(gen3_cc_10$cell_line) for ( i in a){ c<-lm(cousin1 ~ cousin2, data = gen3_cc_10[gen3_cc_10$cell_line == i,]) d<-cooks.distance(c) plot(d) title(main=i) abline(h=0.5) text(x= 1:length(d)+1, y=d,labels = ifelse(d>4*mean(d,na.rm=TRUE),names(d),"")) } influ<-c(128,61) gen3_cc_10[influ,] fam<-c(1236,850) gen3_cc_10<-gen3_cc_10[!(gen3_cc_10$family %in% fam),] pa6<-ggplot(gen3_cc_10, aes(x = cousin1, y = cousin2, colour = cell_line))+geom_point()+ geom_smooth(aes(cousin1,cousin2, colour = cell_line), method=lm, se=FALSE)+ facet_wrap(~cell_line, scales="free") + labs(x = "cousin1", y = "cousin2")+ scale_colour_discrete(name="Cell Line")+ ggtitle("Generation 2 & 3: cousin - cousin : Media = 10 ") a<-unique(gen3_cc$cell_line) for ( i in a){ c<-lm(cousin1 ~ cousin2, data = gen3_cc_0.5[gen3_cc_0.5$cell_line == i,]) d<-summary(c) d1<-(d$df[2] + 2) d2<-d$adj.r.squared d3<-d$coefficients[2] d31<-d$coefficients[8] d32<-d$coefficients[4]*(d1^0.5) d4<-cbind(d1,d2,d3,d31,d32,i,0.5,"Generation 3: cousin - cousin") output = rbind(output, d4) e<-resid(c) f<-gen3_cc_0.5[gen3_cc_0.5$cell_line == i,]$cousin2[seq(1,2*length(e),2)] g<-i #plot(f,e, main=i) } a<-unique(gen3_cc$cell_line) for ( i in a){ c<-lm(cousin1 ~ cousin2, data = gen3_cc_10[gen3_cc_10$cell_line == i,]) d<-summary(c) d1<-(d$df[2] + 2) d2<-d$adj.r.squared d3<-d$coefficients[2] d31<-d$coefficients[8] d32<-d$coefficients[4]*(d1^0.5) d4<-cbind(d1,d2,d3,d31,d32,i,10, "Generation 3: cousin - cousin") output = rbind(output, d4) e<-resid(c) f<-gen3_cc_10[gen3_cc_10$cell_line == i,]$cousin2[seq(1,2*length(e),2)] g<-i #plot(f,e,main=i) } output<-as.data.frame(output) colnames(output)<-c("N","R^2","beta","signif","beta_standard_devation","cell_line","media","plot") write.csv(output, file = "D:/E Drive/heritability_collective/Paper data/heritability_division_time_divided.csv") pdf("Speed - Parent Off Spring regression Division Times Divided.pdf") pa1 pa2 pa3 pa4 pa5 pa6 dev.off() ss_com<-gen2_3[gen2_3$family %in% gen3_use$family,] ss_daughter<-array(data = NA , c(0,4)) a<-unique(ss_com$family) for ( i in a){ b<-ss_com[ss_com$family == i,] b1<-nrow(b) if ( b1 == 2){ c<-b[1,] d<-b[2,] c1<-c$cell_line c2<-c$media c3<-c$parent_total_time c4<-d$parent_total_time c5<-c$offspring_avg_time_taken c6<-d$offspring_avg_time_taken if( (c3 >= c4) & (c5 >= c6)){ c7<-1 } if( (c3 >= c4) & (c6 >= c5)){ c7<-0 } if((c4 >= c3) & (c5 >= c6)){ c7<-0 } if ((c4 >= c3) & (c6 >= c5)){ c7<-1 } c8<-2 c9<-cbind(c1,c2,c7,c8) ss_daughter = rbind(ss_daughter, c9) } if ( b1 == 4){ c<-b[c(1,2),] d<-b[c(3,4),] c1<-c$cell_line[1] c2<-c$media[1] c3<-c$parent_total_time[1] c4<-d$parent_total_time[1] c5<-c$offspring_avg_time_taken[1] c6<-d$offspring_avg_time_taken[1] if( (c3 >= c4) & (c5 >= c6)){ c7<-1 } if( (c3 >= c4) & (c6 >= c5)){ c7<-0 } if((c4 >= c3) & (c5 >= c6)){ c7<-0 } if ((c4 >= c3) & (c6 >= c5)){ c7<-1 } c8<-4 c9<-cbind(c1,c2,c7,c8) ss_daughter = rbind(ss_daughter, c9) } if ( b1 == 3){ b2<-unique(b$parent_tid) c<-b[b$parent_tid == b2[1],] d<-b[b$parent_tid == b2[2],] c1<-c$cell_line[1] c2<-c$media[1] c3<-c$parent_total_time[1] c4<-d$parent_total_time[1] c5<-c$offspring_avg_time_taken[1] c6<-d$offspring_avg_time_taken[1] if( (c3 >= c4) & (c5 >= c6)){ c7<-1 } if( (c3 >= c4) & (c6 >= c5)){ c7<-0 } if((c4 >= c3) & (c5 >= c6)){ c7<-0 } if ((c4 >= c3) & (c6 >= c5)){ c7<-1 } c8<-3 c9<-cbind(c1,c2,c7,c8) ss_daughter = rbind(ss_daughter, c9) } } ss_daughter<-as.data.frame(ss_daughter) colnames(ss_daughter)<-c("cell_line","media","result","number_of_offspring") ss_daughter[]<-lapply(ss_daughter, as.character) ss_daughter$result<-as.numeric(ss_daughter$result) ss_daughter$number_of_offspring<-as.numeric(ss_daughter$number_of_offspring) a<-unique(ss_daughter$cell_line) b<-unique(ss_daughter$media) test3<-array(data = NA, c(0,4)) for( i in a){ for ( j in b){ c<-ss_daughter[(ss_daughter$cell_line == i) & (ss_daughter$media ==j),] d<-sum(c$result) e<-nrow(c) f<-(d/e)*100 g<-sum(c$number_of_offspring) h<-cbind(i,j,f,g) test3 = rbind(test3,h) } } test3<-as.data.frame(test3) colnames(test3)<-c("cell_line","media","slower_mum_slower_daughter_percentage","number_of_offspring") data1<-test[(test$generation != 1) & (test$generation != 4) & (test$outcome != "") & (test$outcome != "mos") & (test$outcome != "d into 3") & (test$outcome != "died"),] data1_10<-data1[data1$media == 10,] data1_0.5<-data1[data1$media == 0.5,] a<-unique(data1$cell_line) data2<-array(data=NA, c(0,3)) for ( i in a){ b<-data1_10[data1_10$cell_line == i,] for ( j in seq(1,nrow(b),2)){ c1<-b$cell_line[j] c2<-b$generation[j] c31<-b$time[j]/3600 c32<-b$time[(j+1)]/3600 c3<-c32-c31 c6<-cbind(c1,c2,c3) data2 = rbind(data2, c6) } } data2<-as.data.frame(data2) colnames(data2)<-c("cell_line","generation","div_time") data2[]<-lapply(data2, as.character) data2$div_time<-as.numeric(data2$div_time) for ( i in a){ b<-data2[data2$cell_line == i,] c<-mean(b$div_time) d<-sd(b$div_time) print(i) print(c) print(d) } a<-unique(gen2_3_0.5$cell_line) for ( i in a){ print(i) b<-gen2_3_10[gen2_3_10$cell_line == i,] c<-length(unique(b$family)) print(c) }