Posted By

hbeale on 04/19/12
Tagged

Versions (?)

Last Edited at 04/19/12 06:29am
Statistics

Viewed 1297 times
Favorited by 0 user(s)
Related snippets

frequently used convenience functions

/ Published in: R
as you'd guess, these are R functions that I use frequently, posted for the benefit of people I share code with can
Expand | Embed | Plain Text
Copy this code and paste it in your HTML
## these are functions that I sometimes use in my code
## they are intended to be able to be run on any computer
## with R installed.
 
 
#################################
###
### Settings
###
#################################
options(stringsAsFactors=FALSE)
 
 
#################################
###
### Aliases for frequently used functions
###
#################################
s <- base::summary;
h <- utils::head;
n <- base::names;
as.dataframe<-base::data.frame;
 
 
#################################
###
### FUNCTION: sourceDir
###
#################################
# sources any *.R files in a given directory
#
# Args: path 
# trace is a boolean to print file names as they are sourced
#
# Returns: nothing
# 
# this function is copied from the "source" help text
# 
sourceDir <- function(path, trace = TRUE, ...) {
    for (nm in list.files(path, pattern = "\\.[Rr]$")) {
       if(trace) cat(nm,":")           
       source(file.path(path, nm), ...)
       if(trace) cat("\n")
    }
 }
 
 
#################################
###
### FUNCTION: write.txt
###
#################################
# write.table with my most frequently used settings
#
# Args: 
# ... (vector of items to paste)
#
# Returns:
# 
write.txt<-function (x, file="", quote=FALSE, sep="\t", row.names=FALSE, ...){
 
	write.table(x, file, quote=quote, sep=sep, row.names=row.names, ...)
 
	}
 
 
#################################
###
### FUNCTION: read.txt
###
#################################
# read.table with my most frequently used settings
#
# Args: 
# ... (vector of items to paste)
#
# Returns:
# data frame
# 
read.txt<-function (file="", sep="\t", header=TRUE,row.names=NULL, ...){
 
	read.table(file, sep= sep, header= header, row.names= row.names, ...)
 
	}
 
 
#################################
###
### FUNCTION: pasteNS
###
#################################
# Paste arguments with no spaces between them
#
# Args: 
# ... (vector of items to paste)
#
# Returns:
# character vector result of paste
# 
pasteNS<-function (...){
 
	paste(..., sep="")
 
	}
 
 
#################################
###
### FUNCTION: convertToComplement
###
#################################
# Converts string into its DNA complement
#
# Args: 
# string
# fGenotype (boolean; if TRUE, orders the string alphabetically)
#
# Returns:
# string
# 
convertToComplement<-function(x,fGenotype=TRUE){
	allComps<-NULL
	for (nucString in x){
 
		bases=c("A","C","G","T")
		xx<-unlist(strsplit(toupper(nucString),NULL))
		thisComp=unlist(lapply(xx,function(bbb){
				if(bbb=="A") compString<-"T"		
				if(bbb=="C") compString<-"G"		
				if(bbb=="G") compString<-"C"		
				if(bbb=="T") compString<-"A"
				if(!bbb %in% bases) compString<-"N"
 
				return(compString)
			}))
		if(fGenotype) thisComp = thisComp [order(thisComp)]
		thisComp =paste(thisComp,collapse="")
		allComps=c(allComps, thisComp)
	}
	return(allComps)	
}
 
#################################
###
### FUNCTION: convertColsToNumeric
###
#################################
#   Converts specified columns to numeric
#
# Args: 
# data frame, column indices
#
# Returns:
# original DF with specified columns changed
# 
convertColsToNumeric<-function(dfConv,colIndices){
	for (i in colIndices){
		dfConv[,i]=as.numeric(dfConv[,i])
		}
		return(dfConv)
		}
 
#################################
###
### FUNCTION: printNumberedVector
###
#################################
# 
# prints a vector to screen, one item per line with each line numbered
#
# Input: a vector
#
# Returns:
# nothing
# 
##
printNumberedVector<-function(someVector){
cat(paste(1:length(someVector ), someVector,"\n"))
}
 
 
 
#################################
###
### FUNCTION: cnvSummary
###
#################################
# 
# creates a summary of cnvs identified by the cnv-seq package
# writes the summary to a file
# returns the files
#
# Input: cnvFileName, outputFileName
#
# Returns:
# nothing
# 
##
summarizeCNV<-function(cnvFileName,outputFileName){
 
## for testing
## cnvFileName ="*CNV.hits.log2-0.6.pvalue-0.001.minw-4.cnv"
if (! file.exists(outputFileName)) {
	library(cnv)
	cnvFileData <- read.delim(cnvFileName)
	if (sum(cnvFileData $cnv!=0)){
		cnvSummary<-NULL
		for (i in seq(max(min(cnvFileData$cnv), 1), max(cnvFileData$cnv))) {
	        sub <- subset(cnvFileData, cnv == i)
	        start <- ceiling(mean(c(min(sub$start), min(sub$position))))
	        end <- floor(mean(c(max(sub$end), max(sub$position))))
	        cnvSummary <- rbind(cnvSummary ,c(paste("CNVR_", i, sep = ""), unique(sub$chromosome), start, end, end - start + 1, unique(sub$cnv.log2), unique(sub$cnv.p.value)))#, sep = "\t", file = file, 
	    }
		colnames(cnvSummary)=   c("cnv", "chromosome", "start", "end", "size", "log2",  "p.value")
		cnvSummary=data.frame(cnvSummary)
		for (i in 3:7){
			cnvSummary[,i]=as.numeric(cnvSummary[,i])
		}
	} else {
		return(NULL)
	}
	write.table(cnvSummary, outputFileName, row.names=FALSE, quote=FALSE, sep="\t")
	} else { 
	cnvSummary =read.table(outputFileName, header=TRUE, sep="\t")
	}
return(cnvSummary)
}
 
 
 
#################################
###
### FUNCTION: fixRowNamesColumn
###
#################################
# 
# moves data in "Row.names" column to rownames
#
# Args: df
#
# Returns:
# dataframe
# 
##
 
 
 
fixRowNamesColumn<-function(mergeResultsDF){
 
	#mergeResultsDF= popByAgeWide
	rownames(mergeResultsDF)= mergeResultsDF$Row.names
	mergeResultsDF= mergeResultsDF[,2:ncol(mergeResultsDF)]
	}
 
 
#################################
###
### FUNCTION: getLabelsAndMidPointsOfGroups
###
#################################
# 
# useful for labeling axes and creating 
#
# Args: df, valname, byname, passFUN, fnDesc
# Example Vals
	# df= creatDF
	# valname="creat"
	# byname="breed"
	# passFUN=length
	# fnDesc="count"
	# fReplaceValName=FALSE  ## the default is to append
#
# Returns:
# dataframe
# 
##
 
 
getLabelsAndMidPointsOfGroups<- function(charVec,alternatingColors=c("grey", "black")){
 
plotAnnotation<-data.frame(values= charVec)
previousVal=	 c("", charVec[1:(length(charVec )-1)])
switchPoint= charVec != previousVal
groupBreaks=data.frame(start=which(switchPoint))
groupBreaks$length= c(groupBreaks $start[2:nrow(groupBreaks)],length(charVec)+1)-groupBreaks$start
groupBreaks$end= groupBreaks$start+   (groupBreaks$length-1)
groupBreaks$midpoint=round(rowMeans(groupBreaks[,c("start","end")]),0)
groupBreaks$label= plotAnnotation$values[groupBreaks$midpoint]
 
 
groupBreaks$color= alternatingColors[1]
groupBreaks$color[ c(T,F) ]=alternatingColors[2]
 
 
plotAnnotation$color=as.character(unlist(apply(groupBreaks,1,function(x){
	    rep(x["color"],x["length"])
	    })))
 
plotAnnotation$labelnames= ""
 
plotAnnotation$labelnames[groupBreaks$midpoint]=plotAnnotation$value[groupBreaks$midpoint]
list(labelsDF=plotAnnotation, breaksDF=groupBreaks)
}
 
 
 
#################################
###
### FUNCTION: prettyAggregate
###
#################################
# wrapper for aggregate
# renames columns based on input
#
# Args: df, valname, byname, passFUN, fnDesc
# Example Vals
	# df= creatDF
	# valname="creat"
	# byname="breed"
	# passFUN=length
	# fnDesc="count"
	# fReplaceValName=FALSE  ## the default is to append
#
# Returns:
# dataframe
# 
##
 
prettyAggregate<-function(df, valname, byname, passFUN, fnDesc, fReplaceValName=FALSE){
	if (fReplaceValName) {
		agColName=fnDesc
	} else {
		agColName=pasteNS(valname,"_", fnDesc)
	}
	result=aggregate(df[, valname],by=list(df[, byname]), FUN= passFUN)
	colnames(result)=c(byname, agColName)
	return(result)
	}
 
 
 
#################################
###
### FUNCTION: cbindList/rbindList
###
#################################
# tries to return a dataframe from a list of bind-able vectors or dataframes
# based on first example in Reduce help
#
# Args: 
# List
#
# Returns:
# dataframe
# 
##
 
cbindList <- function(x) Reduce("cbind", x)
rbindList <-function(x) Reduce("rbind",x)
 
 
#################################
###
### FUNCTION: bindRepeat
###
#################################
# returns a dataframe composed of n 
# copies of the vector
 
# Args: 
# vector2Rep - vector to be repeated
# n - times to repeat
## abandoned:
# fBindAsColumns - optional, boolean
#
# Returns:
# dataframe
# 
##
bindRepeat <- function(vector2Rep,n){
	a <- list()
	 for(i in 1:n) a[[i]] <- vector2Rep
 	rbindList(a)
}
 
 
 
#################################
###
### FUNCTION: inMB
###
#################################
# Convert a number of bases to megabases
#
# Args: 
# number(s)
#
# Returns:
# vector
# 
##
inMB<-function(bases){
	bases/1000000
	}
 
 
 
 
#################################
###
### FUNCTION: textListToVec
###
#################################
# Convert a chunk of text (one item per line) to a vector
# Just type textListToVec(" and then paste in the lines and type ")
#
# Args: 
# newline-separated list
#
# Returns:
# vector
# 
##
 
 
textListToVec <-function(a) strsplit(a,"\n")[[1]]
 
 
 
#################################
###
### FUNCTION: tableDF
###
#################################
# performs the table function
# but returns a well-formatted data.frame
#
# Args: 
# valVector
#
# Returns:
# dataframe
# 
##
 
tableDF<-function(valVector=c("a", "a", "b")){
	t=data.frame(as.matrix(table(valVector)))
	t2=data.frame(value=row.names(t), frequency=t[,1])
	row.names(t2)=row.names(t)#	colnames(t)=c("frequency")
	return(t2)
	}
 
 
 
 
#################################
###
### FUNCTION: interleave
###
#################################
# Interleave two vectorsAdd timestamp and data to plot
#
# Args: 
# v1
# v2
#
# Returns:
# vector
# 
##
 
interleave <- function(v1,v2)
{
ord1 <- 2*(1:length(v1))-1
ord2 <- 2*(1:length(v2))
c(v1,v2)[order(c(ord1,ord2))]
} 
 
 
 
#################################
###
### FUNCTION: stampPlot
###
#################################
# Add timestamp and data to plot
#
# Args: 
# desc.txt=""
# fOuter=FALSE
#
# Returns:
# nothinbg
# 
##
 
stampPlot=function(desc.txt="",fOuter=FALSE	){
	if(fOuter) {
		pAdj=-2
		} else {
		pAdj=-0.6
		}
mtext(paste(desc.txt, hbTimeStamp()),side=4,outer= fOuter,padj= pAdj,cex=.8)
}
 
 
 
#################################
###
### FUNCTION: snap
###
#################################
# like head, but also limits the number of columns. default is 6 columns 6 rows.
 
 snap <-function (df,rowLim=6,colLim=6) {
	# troubleshooting
		#age=18:29
		#height=runif(12,62,74)
		#df =data.frame(age=age,height=height)
 
 
	if (is(df)[1]=="data.frame" | is(df)[1]=="matrix"){
		nrowDF =nrow(df)
		ncolDF =ncol(df)
		if (rowLim<nrowDF) nrowDF=rowLim
		if (colLim<ncolDF) ncolDF=colLim
	print(df[1:nrowDF,1:ncolDF])	
		} else {
		print("cannot parse input")
			}
	}
 
 
#################################
###
### FUNCTION: getAllListSubItemsByIndex
###
#################################
# get the nth sub item from each item
#
# Args: 
# theList, list to pull from
# theIndex, the index of desired items
#
# Returns:
# vector or results
# 
getAllListSubItemsByIndex <-function (theList, theIndex){
 
paste(lapply(theList,function(x) x=x[[theIndex]]))
 
 
}
 
 
 
#################################
###
### FUNCTION: greaterOf
###
#################################
# get the greater of two items
#
# Args: 
# values to compare
#
# Returns:
# greater value
# 
greaterOf <- function (x,y) {
 
	if (x>y | y==x){
		x
	} else {
		if (y>x) {
			y
		} else {
			NA
		}
	}
 
}
 
 
#################################
###
### FUNCTION: lesserOf
###
#################################
# get the lesser of two items
#
# Args: 
# values to compare
#
# Returns:
# lesser value
# 
lesserOf <-  function (x,y) {
 
	if (x<y | y==x ){
		x
	} else {
		if (y<x) {
			y
		} else {
				NA
		}
	}
 
}
 
 
 
### FUNCTION: isBetween
###
#################################
# is one value between two others
#
# Args: 
# 3 values
#
# Returns:
# TRUE or FALSE
#
isBetween <- function (x,y,z) {
 
	x>y & x<z	
 
}
 
 
#################################
###
### FUNCTION: initialCap
###
#################################
# Converts each string in a vector to initial upper case
#
# Args: 
# wordsToConvert: vector of genes symbols
#
# Returns:
# vector with changed words
# 
initialCap <- function(wordsToConvert) {
 
	return_list<-NULL
	for (i in 1:length(wordsToConvert)){
		r<-tolower(wordsToConvert[i])
	    s <- strsplit(r, " ")[[1]]
    	return_list[i]=paste(toupper(substring(s, 1,1)), substring(s, 2), sep="", collapse=" ")
       }
       return(return_list)
}
 
#################################
###
### FUNCTION: allIdentical
###
#################################
# tests whether each item in a vector is identical
#
# Args: 
# vectorToTest: vector 
#
# Returns:
# TRUE or FALSE
# 
 
 
allIdentical <- function(vectorToTest){
	sum(! vectorToTest %in% vectorToTest[1])}
 
 
#################################
###
### FUNCTION: set_up_plot
###
#################################
# Determines axis  values from data
# adds a time stamp
#
# Args: 
# x & y data
# fPlotFromZero
# fPlotEvenAxes
 
# Returns:
# plot
# 
 
 
set_up_plot <-function(x,y,fPlotFromZero=TRUE, fPlotEvenAxes=FALSE,fDateStamp=TRUE,stampText="",fOuter=FALSE,xAxLabel=colnames(x), yAxLabel=colnames(y), ...){
#print(names(x))
#	if 
# xAxName=colnames(x)
# yAxLabel=colnames(y)
if(fPlotEvenAxes){
	x=c(min(c(x,y)),max(c(x,y)))
	y=x
	}
if (fPlotFromZero) {
	plot_y_min=0
	plot_x_min=0
} else {
	plot_y_min=min(y)
	plot_x_min=min(x)
} 
 
	plot_y_max=max(y)
	plot_x_max=max(x)
 
	plot(c(plot_x_min, plot_x_max),c(plot_y_min, plot_y_max), xlab= xAxLabel , ylab= yAxLabel , type="n", ... )
 
	if (fDateStamp) stampPlot(stampText,fOuter= fOuter)
 
	 }
 
 
 
 
 
#################################
###
### FUNCTION: hbTimeStamp
###
#################################
# Get formatted time and date
#
# Args (all optional): 
# sepYMD 
# sepHMS
# sepDateTime
 
# Returns:
# character vector
# 
 
 
hbTimeStamp<-function(sepYMD="-", sepHMS=".", sepDateTime="_", dateVars=c("Y","m","d"), timeVars=c("I", "M", "S")){
	dateSpec=paste(paste("%", dateVars, sep=""), collapse= sepYMD)
	timeSpec=paste(paste("%", timeVars, sep=""), collapse= sepHMS)
 
	paste(format(Sys.time(), dateSpec),paste(format(Sys.time(), timeSpec),substring(format(Sys.time(), "%r"),10,11),sep=""),sep= sepDateTime)
	}
 
 
#################################
###
### FUNCTION: breakPlotIntoPages
###
#################################
# Breaks data into page-size chunks and calls a plotting function for each chunk
#
# Args: 
# dataframe with plot data
# optional: desired rows per page
 
# Returns:
# multiple plots
# 
 
breakPlotIntoPages<-function(multipagePlotData,rowsPerPage=40,varList){
 
#if (! "myPlot" %in% varList) {
#	print("the 'myPlot' function must exist")
#	return()
#	}
 
plotRows=nrow(multipagePlotData)
dataBreaks=unique(c(seq(from=1,to=plotRows,by=rowsPerPage),plotRows))
breakCount=length(dataBreaks)
penultimateBreakCount= breakCount-1
lastPageRowCount= dataBreaks[breakCount ]-dataBreaks[penultimateBreakCount]
 
 
for (i in 1:(length(dataBreaks)-1)){
	brokenData=multipagePlotData[dataBreaks[i]:dataBreaks[i+1],]
	xLimVals=c(0,rowsPerPage*1.6)
	barPos=myPlot(brokenData,xLimVals,1)
	}
 
}
 
 
 
## a useful function: rev() for strings
strReverse <- function(x)
        sapply(lapply(strsplit(x, NULL), rev), paste, collapse="")
##strReverse(c("abc", "Statistics"))
 
 
 
#make transparenbt
 
 
 
 
################################
##
## FUNCTION: makeTransparent
 
##
################################
#Add transparency to colors
 
# Args: 
# vector of colors in name or hex format, e.g. grey or #FFCDFF
# desiredPctTranparent
 
# Returns:
# same colors with transparency added
 
#
 
makeTransparent<-function(colorVector=c("#FFCDFF", "#C0108C", "#CB7600"), desiredPctTranparent=50){
 
#colorVector=c("grey", "blue")
if (substr(colorVector[1],0,1)!="#") {
	colorVector =rgb(t(col2rgb(colorVector)/255))
	} 
s=seq(0,255,length.out=21)
transparencyCodes=data.frame(pctTransparent=100*(1-(s/255)))
t2=unlist(lapply(s,function(x) rgb(0,100,0,x,maxColorValue=255)))
transparencyCodes$hexSuffix= substring(t2,8,9)
paste(rgb(t(col2rgb(colorVector)),maxColorValue=255), transparencyCodes[transparencyCodes$pctTransparent== as.character(desiredPctTranparent),2],sep="")
 
}
 
 
 
 
#################################
###
### FUNCTION: plotMultipleCors
###
#################################
# Plots multiple vectors against 
# a single vector and returns 
# correlations
 
#
# Args: 
# plotData 
## plot data should have x values in first col, and any number of cols after that can be Y
## column names determine legend text
# vColors -- a vector of colors the length of each Y col in plotData
# ylabText -- text for Y label
# legendPos -- legend position, like "topleft"
# fNormalizeY -- normalize each Y value so the max val is 100 
 
# Returns:
# plot
# 
 
plotMultipleCors <-function(plotData, vColors="",ylabText="", xlabText ="Breed average creatinine level",legendPos="topleft",fNormalizeY=TRUE,...){
## plot data should have x values in first col, and any number of cols after that can be Y
#test plotData= gAcdDesc[,c("mean","se","sd","Height","Weight")]
 library(plotrix)
 x= plotData[,1]
 if (fNormalizeY) {
		plotData[,2:ncol(plotData)]=apply(plotData[,2:ncol(plotData)],2,function(y){
# 			100*y/max(y)
 			rescale(y,c(0,100))
 			})
 		}
 
 maxY=max(plotData[,2:ncol(plotData)],na.rm=TRUE)
 set_up_plot( x,0: maxY,xAxLabel= xlabText, yAxLabel=ylabText ,...)
 
 
# if (vColors =""){
 	nColorsNeeded= ncol(plotData)-1
 	pal1=brewer.pal(8,"Set1")
	vColors=pal1[rep(1:length(pal1), nColorsNeeded)[1: nColorsNeeded]]
#	}
 
	plotSettings=data.frame(dataName =colnames(plotData[,2:ncol(plotData)]),color= vColors)
	plotSettings$notNA=apply(plotData[,2:ncol(plotData)],2,function(x){
		sum(!is.na(x))
		})
 	plotSettings$legendText= plotSettings$dataName
	plotSettings$pch=0:(nrow(plotSettings)-1)
 for (i in 1:nrow(plotSettings)){
 	y=plotData[,(i+1)]
	points(x,y,col= plotSettings$color[i],pch=plotSettings$pch[i])
 abline(lm(y ~ x),col= plotSettings$color[i])
 rsq <-cor(x,y, use="complete.obs")
  plotSettings$corr_value[i]  <- format(c(rsq, 0.123456789), digits=2)[1]
 ##text(mean(x),mean(y),substitute(paste("R"^{2}, " = "*x),list(x= corr_value) ),cex=1,col= vColors[i-1])
 ##legend(210, 110, bquote(r^2 ==.(format(summary(regression)$adj.r.squared,digits=3)))) 
	print(plotSettings$color[i])
  	 } #end for loop
  legend(legendPos,	legend=pasteNS(plotSettings$legendText,", Rsq=",plotSettings$corr_value, ", n=",plotSettings$notNA), pch= plotSettings$pch, col= plotSettings$color)
 
 # colnames(plotData[,2:ncol(plotData)]), lty=1, col=vColors)
 
  	 } ## end function
 
#################################
###
### FUNCTION: plotCors
###
#################################
# Plots multiple vectors against 
# a single vector and returns 
# correlations
 
#
# Args: 
# plotData 
## plot data should have x values in first col, and any number of cols after that can be Y
## column names determine legend text
# vColors -- a vector of colors the length of each Y col in plotData
# ylabText -- text for Y label
# legendPos -- legend position, like "topleft"
# fNormalizeY -- normalize each Y value so the max val is 100 
 
# Returns:
# plot
# 
 
plotCors<-function(plotData, vColors="",ylabText=colnames(plotData)[2], xlabText =colnames(plotData)[1],legendPos="topleft",fNormalizeY=TRUE,...){
## plot data should have x values in first col, and any number of cols after that can be Y
#test plotData= gAcdDesc[,c("mean","se","sd","Height","Weight")]
#plotData=na.omit(plotData)
 x= plotData[,1]
 	if (fNormalizeY) {
 		plotData[,2]=100*plotData[,2]/max(plotData[,2])
 		}
y=plotData[,2]
 
 set_up_plot( x,y,xAxLabel= xlabText, yAxLabel=ylabText ,...)
 
	points(x,y)#,col= plotSettings$color[i])
 abline(lm(y ~ x)) #,col= plotSettings$color[i])
 rsq <-cor(x,y, use="complete.obs")
  rsqTxt  <- format(c(rsq, 0.123456789), digits=2)[1]
 ##text(mean(x),mean(y),substitute(paste("R"^{2}, " = "*x),list(x= corr_value) ),cex=1,col= vColors[i-1])
 ##legend(210, 110, bquote(r^2 ==.(format(summary(regression)$adj.r.squared,digits=3)))) 
	#print(plotSettings$color[i])
 title(sub=pasteNS("Rsq=", rsqTxt ))
 
 # colnames(plotData[,colSpec]), lty=1, col=vColors)
 
  	 } ## end function
 
 
 
 
 
#################################
###
### FUNCTION: groupSNPsIntoLoci
###
#################################
# 
# find groups of nearby SNPs
#
# Args: 
# hitListSnpChrID 	any kind of chromosome identifier
# hitListSnpPos 	position of snp, either a number of a snp id like chr15.44226659
# maxInterSNPRange	the maximum acceptable distance between SNPs
#
# Returns:
# data.frame vector result of paste
# data frame contains columns - c("chr", "minPos", "maxPos", "minPVal", "snpCount", "locID", "locusSize")
# 
# find windows in snps
groupSNPsIntoLoci<-function(hitListSnpChrID, hitListSnpPos, hitListSnpPVals, maxInterSNPRange=1E6){
#test hitListSnpChrID=
# hitListSnpChrID =KCsize_candidates$chrChar
# hitListSnpPos=KCsize_candidates$pos
# hitListSnpPVals=KCsize_candidates$KC.LOGP	
if (sum(grepl("chr", hitListSnpPos))>1){
	hitListSnpPos=as.numeric(gsub("^[^\\.]*\\.","", hitListSnpPos))	
}
 
 
hitList=data.frame(cbind(hitListSnpChrID, hitListSnpPos, hitListSnpPVals))
colnames(hitList)=c("chr","pos","pval")
hitList$pos=as.numeric(hitList$pos)
hitList$pval =as.numeric(hitList$pval)
# set up variables and record info for first row
#assocLocus columns: chr, minPos, minPos, minPVal, snpCount
assocLocus<-NULL
hitList= hitList[order(hitList$chr, hitList$pos),]
currAssocLocus= data.frame(c(hitList[1,c("chr","pos","pos","pval")]))
colnames(currAssocLocus)=c("chr", "minPos", "maxPos", "minPVal")
currAssocLocus$snpCount[1]=1
#round(hitList[1,"pos"]/1E6,0)
hitList$locusID=NA
 
currAssocLocus$locID[1]=pasteNS(hitList[1,"chr"],".",round(hitList[1,"pos"]/1E6,0),"Mb")
for (i in 2:nrow(hitList)){
	# check each snp
	if (hitList[i,"chr"]==currAssocLocus$chr[1] & (abs(hitList[i,"pos"]-currAssocLocus$minPos[1])<maxInterSNPRange | abs(hitList[i,"pos"]-currAssocLocus$minPos[1])<maxInterSNPRange)){
		# they are on the same chromosome and within maxInterSNPRange of each other
	 	# extend the current Association Locus values (update currAssocLocus)
	 	currAssocLocus$snpCount[1]=currAssocLocus$snpCount[1]+1
 		if (hitList[i,"pval"]<currAssocLocus$minPVal[1]) currAssocLocus$minPVal[1]=hitList[i,"pval"]
 		if (hitList[i,"pos"]<currAssocLocus$minPos[1]) currAssocLocus$minPos[1]=hitList[i,"pos"]
		if (hitList[i,"pos"]>currAssocLocus$maxPos[1]) currAssocLocus$maxPos[1]=hitList[i,"pos"]
		if (i==nrow(hitList)) 	assocLocus<-rbind(assocLocus, currAssocLocus) ## add this last locus to the list
 
	} else {
		assocLocus<-rbind(assocLocus, currAssocLocus)		
		currAssocLocus[1,c("chr", "minPos", "maxPos", "minPVal")]=c(hitList[i,c("chr","pos","pos","pval")])
		currAssocLocus$snpCount[1]=1
		currAssocLocus$locID[1]=pasteNS(hitList[i,"chr"],".",round(hitList[i,"pos"]/1E6,0),"Mb")
	}
	hitList$locusID[i]=currAssocLocus$locID[1]
}
 
assocLocus=data.frame(assocLocus)
assocLocus$locusSize= assocLocus$maxPos-assocLocus$minPos+1
return(assocLocus)
}
Report this snippet Tweet
Comments

Subscribe to comments
Comment:
You need to login to post a comment.