/
Intro to R & Bioconductor

Intro to R & Bioconductor

Class Material

Connect to O2

ssh -Y -L $PORT:127.0.0.1:$PORT <your_HMS_ID>@o2.hms.harvard.edu

Managing your R packages on O2

Create an R Personal Library

# Create a library on your HOME ("~") directory mkdir ~/R-4.1.1 # Create an .Renviron file echo 'R_LIBS_USER="~/R-4.1.1"'> $HOME/.Renviron

Running RStudio on O2

For more in-depth details, please visit our wiki page - RStudio on O2.

Load RStudio modules

module load rstudio_launcher/1.0 module load gcc/6.2.0 module load R/4.1.1 module load git/2.9.5

Launch RStudio

# $PORT is the number used while connecting to O2 srun -t 0-2:00 --pty -p interactive -c 1 --mem=2G --tunnel $PORT:$PORT RStudio_launcher.sh $PORT

intro.R

# Variable assignment myX <- 5 myX # Create a vector myvector <- c(3,5,7) myvector # Data Type: List myvector <- c(3,"Tp53",7) myvector # Data Type: Factors gender <- c("male", "male", "female", "female", "female") gender <- factor(gender) gender # Data Type: Matrix mymatrix <- matrix(c(1:6), nrow=3, ncol=2) mymatrix # Data Type: Dataframe mydataframe <- as.data.frame(mymatrix) mydataframe # Data Type: Dataframe: Columns name names(mydataframe) <- c("col1", "col2") mydataframe # Data Type: Dataframe: Rows name row.names(mydataframe) <- mydataframe[,1] mydataframe # Data Wrangling: Indexing mymatrix <- matrix(c(1:6), nrow=3, ncol=2) mymatrix # Indexing: Returns element in row 1 and column 2 mymatrix[1,2] # Indexing: return all elements in row 1 mymatrix[1,] # Indexing: return all elements in column 1 mymatrix[,1] # Data Wrangling: Change Data Type myvector <- c(3,5,7) myvector myvector <- as.character(myvector) myvector # Data Wrangling: Apply function - row sums apply(mymatrix,1,sum) # Data Wrangling: Apply function - column sums apply(mymatrix,2,sum) # Operators: Arithmetic 3+5 #Addition 3-1 #Substration 5*5 #Multiplication 10/2 #Division 3^2 #Exponent 9%%2 #Module(remainder) # Data import from text file # mydata <- read.table(file="MyFile",header=TRUE,sep=",") # head(mydata) # Data import from Excel # install.packages("xlsx") #Install package if needed # library("xlsx") # mydata <- read.xlsx("myexcel.xlsx",sheetIndex=1) #Read in the 1st worksheet # mydata <- read.xlsx("myexcel.xlsx",sheetName="mysheet") #Read worksheet named "mysheet" # Data export # write.table(x=mydataframe,file="MyFile.txt",sep="\t") # Workspace: save image save.image(file="~/2021-06-15-my-project.RData") # Exercise: Import Data mdata <- "/n/groups/rc-training/introR/Rcoursetestdata1.csv" mydf <- read.table(mdata, header=TRUE, row.names=1, sep=",") head(mydf) # Exercise: Basic Stats summary(mydf) # Exercise: Transposing Data mymatrix <- as.matrix(mydf) myTmatrix<- t(mymatrix) #t as for transponse myTdf <- as.data.frame(myTmatrix) #as data frame again # Exercise: to explore later (ggplot2) ## Install and Load ggplot2 install.packages("ggplot2") #Install if needed library("ggplot2") ## Load some data data("midwest",package="ggplot2") ## Plotting gg <- ggplot(midwest, aes(x=area, y=poptotal)) + geom_point(aes(col=state, size=popdensity)) + labs(y="Population", x="Area", title="Scatterplot", caption = "Source: midwest") gg

Installing R Packages

Comprehensive R Archive Network (CRAN)

install.packages("BiocManager")

Bioconductor

BiocManager::install("edgeR")

GitHub

## the devtools package is required # install.packages("devtools") # to install the latest dev version install_github("tidyverse/ggplot2") # Alternatively, you can specify the version with @vX.X.X # install_github("tidyverse/ggplot2@v0.9.1")

Related content