########################################################################################## ### Code to simulate drift for the WQS population ### ########################################################################################## # T. Beissinger # Questions may be directed to: beissinger@ucdavis.edu #### Function to simulate entire WQS experiment, from cycle 2 as starting pop to cycle 5 as #### ending pop. Demographics of the WQS selection protocol are incorporated. wqsFullSim <- function(initial.freq){ ### Begin parameters Hardy <- c(initial.freq^2,2*initial.freq*(1-initial.freq), (1-initial.freq)^2) # hardy weinberg genotype frequencies # S1.probs <- c(Hardy[1]+Hardy[2]/4,Hardy[2]/2,Hardy[3]+Hardy[2]/4) # genotype frequencies after 1 generation of selfing C2.inds <- 200 C3.S1s <- 186 C3.S2s <- 462 C4.S1s <- 300 C4.S2s <- 881 C5.S1sa <- 200 C5.S1sb <- 100 C5.S2s <- 188 ### End parameters ### Make C2 according to known allele frequency pool <- c(rep(2,Hardy[1]*C2.inds),rep(1,Hardy[2]*C2.inds),rep(0,Hardy[3]*C2.inds)) ### Make C3 (C2 provides starting freqs) pool <- sample(pool,C3.S1s,replace=F) # sample 186 inds to self pool[which(pool == 1)] <- sample(c(0,1,2),length(which(pool == 1)),replace=T,prob = c(0.25,0.5,0.25)) # self to make S1 families pool <- rep(pool,3) # repeat families 3 times because 2-3 S2 families were made from each S1 pool[which(pool == 1)] <- sample(c(0,1,2),length(which(pool == 1)),replace=T,prob = c(0.25,0.5,0.25)) # self to make S2 families pool <- sample(pool,size=C3.S2s,replace=F) # Select S2 families pool <- sample(pool,20,replace=F) # Select 20 S2 families cur.freq <- sum(pool)/(2*length(pool)) ## split gametes p1 <- pool/2 p2 <- pool/2 p1[which(p1 == 0.5)] <- 1 p2[which(p2 == 0.5)] <- 0 pool <- c(p1,p2) ## gametes split gam1 <- sample(pool,200,replace=T) # Recombine allele 1 gam2 <- sample(pool,200,replace=T) # Recombine allele 2 pool <- gam1+gam2 # Recombine genotypes ### Make C4 pool <- sample(pool,C4.S1s,replace=T) ### sample inds to self pool[which(pool == 1)] <- sample(c(0,1,2),length(which(pool == 1)),replace=T,prob = c(0.25,0.5,0.25)) # self to make S1 families pool <- rep(pool,3) # repeat families 3 times because 2-3 S2 families were made from each S1 pool[which(pool == 1)] <- sample(c(0,1,2),length(which(pool == 1)),replace=T,prob = c(0.25,0.5,0.25)) # self to make S2 families pool <- sample(pool,C4.S2s,replace=F) # Select S2 families pool <- sample(pool,20,replace=F) # Select 20 S2 families cur.freq <- sum(pool)/(2*length(pool)) ## split gametes p1 <- pool/2 p2 <- pool/2 p1[which(p1 == 0.5)] <- 1 p2[which(p2 == 0.5)] <- 0 pool <- c(p1,p2) ## recombine gam1 <- sample(pool,200,replace=T) # Recombine allele 1 gam2 <- sample(pool,200,replace=T) # Recombine allele 2 pool <- gam1+gam2 # Recombine genotypes ### Make C5 pool[which(pool == 1)] <- sample(c(0,1,2),length(which(pool == 1)),replace=T,prob = c(0.25,0.5,0.25)) # self to make S1 families pool <- sample(pool,C5.S1sb,replace=F) # select 100 S1 families pool <- rep(pool,2) # select two of each S1 family pool[which(pool == 1)] <- sample(c(0,1,2),length(which(pool == 1)),replace=T,prob = c(0.25,0.5,0.25)) # self to make S2 families pool <- sample(pool,C5.S2s,replace=T) # Evaluate 188 S2 families pool <- sample(pool,20,replace=F) # Select 20 S2 families cur.freq <- sum(pool)/(2*length(pool)) ## split gametes p1 <- pool/2 p2 <- pool/2 p1[which(p1 == 0.5)] <- 1 p2[which(p2 == 0.5)] <- 0 pool <- c(p1,p2) ## recombine gam1 <- sample(pool,200,replace=T) # Recombine allele 1 gam2 <- sample(pool,200,replace=T) # Recombine allele 2 pool <- gam1+gam2 # Recombine genotypes cur.freq <- sum(pool)/(2*length(pool)) return(cur.freq) } ####################################################################################### #### Simulate Fst, based on true C2 allele frequency distribution ####### ####################################################################################### ### Load data Gen <- read.csv("../DATA/genoWQS_UPDATED.csv",header=T,stringsAsFactors=F) # Write a function to compute allele frequencies for a given set of rows. 'start' and # 'end' correspond to the rows that house individuals from the desired population CalcFreqs <- function(Gen=Gen,start,end){ sub <- Gen[start:end,] freq <- c() for(i in 2:ncol(sub)){ print(i) temp <- unlist(strsplit(unlist(sub[,i]),split="")) freq[i-1] <- length(which(temp=="A")) / sum(table(temp)) } return(freq) } ### Calculate C2 allele frequencies from data C2Freqs <- CalcFreqs(Gen=Gen,start=17,end=179) ### Load Fst calculator #source("../FstAnalysis/vectorFst.R") #Instead of sourcing separate file, function is included at bottom of this script. ### Do 1,000 simulations permResEff <- c() for(s in 1:1000){ print(s) C2Trunk <- sample(C2Freqs,17590,replace=F) # subsample C2Freqs to make new C0 distribution C5Freqs.sim <- c() ### Simulate C5 freqs for(i in 1:length(C2Trunk)){ C5Freqs.sim[i] <- wqsFullSim(C2Trunk[i]) } Fst.sim.obs <- vectorFst(C2Trunk,C5Freqs.sim,simple=F ) # calculate Fst permResEff[s] <- max(Fst.sim.obs,na.rm=T) # record highest Fst from sim } ### Determine permutation-test-like alpha = 0.05 quantile(permResEff,0.95) ### Determine permutation-test-like alpha = 0.01 quantile(permResEff,0.99) ### The function below will calculate Fst. Inputs are as follows: ### pop1Freqs: Vector of allele frequencies for each locus in pop1 ### pop2Freqs: Vector of allele frequencies for each locus in pop2 ### pop3Freqs: Vector of allele frequencies for each locus in pop3, if there ### is a pop3. If not, leave blank. ### simple: Should the simplest formula for Fst be used? If T, the ### formula used is Fst = s^2 / (mean(p) * (1- mean(p)) . ### if false, a better formula which corrects for small number ### of populations is used: Weir and Cockerham, 1984 vectorFst <- function(pop1Freqs,pop2Freqs,pop3Freqs=NULL,simple=T){ if(simple==T){ if(is.null(pop3Freqs)){ mean <- (pop1Freqs+pop2Freqs) / 2 var <- (pop1Freqs-mean)^2 + (pop2Freqs-mean)^2 Fst <- var / (mean*(1-mean)) } else { mean <- (pop1Freqs+pop2Freqs+pop3Freqs) / 3 var <- ( (pop1Freqs-mean)^2 + (pop2Freqs-mean)^2 + (pop3Freqs-mean)^2 ) /2 Fst <- var / (mean*(1-mean)) } } else if(simple==F){ if(is.null(pop3Freqs)){ mean <- (pop1Freqs+pop2Freqs) / 2 var <- (pop1Freqs-mean)^2 + (pop2Freqs-mean)^2 Fst <- var / {(mean*(1-mean)) + var/2} } else { mean <- (pop1Freqs+pop2Freqs+pop3Freqs) / 3 var <- ( (pop1Freqs-mean)^2 + (pop2Freqs-mean)^2 + (pop3Freqs-mean)^2 ) /2 Fst <- var / { (mean*(1-mean)) +var / 3} } } return(Fst) }