R
Updated: May 22, 2026Categories: Languages, Stats
Printed from:
R Programming Cheatsheet
Language Overview
- Statistical computing and data analysis language
- Developed by Ross Ihaka and Robert Gentleman at the University of Auckland
- Open-source (GPL-2/GPL-3), primarily used for statistical analysis, graphics, and data science
- Supports functional, object-oriented, and imperative programming paradigms
- Current stable release: R 4.5.x series (2025)
Basic Syntax
r
12345# Single-line comment
print("Hello, R!") # Printing to console
x <- 10 # Assignment operator
|> sqrt() # Native pipe (R >= 4.1)
Data Types (Primitive Types, Collection Types)
Primitive Types
r
123456789101112131415161718192021222324# Numeric (double)
x <- 42.5
is.numeric(x) # TRUE
# Integer
y <- 42L
is.integer(y) # TRUE
# Character
name <- "R Programming"
is.character(name) # TRUE
# Logical
is_true <- TRUE
is.logical(is_true) # TRUE
# Complex
z <- 3 + 2i
is.complex(z) # TRUE
# Raw
r <- charToRaw("R")
is.raw(r) # TRUE
Collection Types
r
123456789101112131415161718192021222324# Vector (homogeneous, atomic)
numeric_vector <- c(1, 2, 3, 4, 5)
character_vector <- c("a", "b", "c")
# List (heterogeneous)
mixed_list <- list(1, "a", TRUE, 3.14)
# Factor (categorical variable)
gender <- factor(c("male", "female", "male"))
# Matrix / Array
m <- matrix(1:12, nrow = 3, ncol = 4)
a <- array(1:24, dim = c(2, 3, 4))
# Data Frame
# Note: as of R 4.0.0, stringsAsFactors defaults to FALSE
df <- data.frame(
name = c("Alice", "Bob", "Charlie"),
age = c(25, 30, 35)
)
# Tibble (modern data frame from the tibble package)
# library(tibble); tibble(name = ..., age = ...)
Variables and Constants
r
12345678910111213# Variable assignment
x <- 10 # Preferred method
y = 20 # Also works (allowed at top level)
x <<- 30 # Super-assignment (parent environment)
# R has no true constants; use UPPER_CASE by convention
PI <- 3.14159
# `lockBinding()` can make a binding read-only
e <- new.env()
e$K <- 273.15
lockBinding("K", e)
Operators
Arithmetic Operators
r
12345678910a <- 10
b <- 3
a + b # Addition
a - b # Subtraction
a * b # Multiplication
a / b # Division
a %% b # Modulo
a %/% b # Integer division
a ^ b # Exponentiation
Comparison Operators
r
1234567x == y # Equal to
x != y # Not equal to
x < y # Less than
x > y # Greater than
x <= y # Less than or equal to
x >= y # Greater than or equal to
Logical Operators
r
1234567TRUE & FALSE # Vectorised AND
TRUE | FALSE # Vectorised OR
TRUE && FALSE # Scalar AND (errors on length != 1 since R 4.3)
TRUE || FALSE # Scalar OR (errors on length != 1 since R 4.3)
!TRUE # Logical NOT
isTRUE(x); isFALSE(x)
Pipe Operators
r
12345678# Native pipe (base R, requires R >= 4.1; placeholder `_` since 4.2)
mtcars |> subset(cyl == 4) |> head()
mtcars |> lm(mpg ~ wt, data = _)
# magrittr pipe (still common via dplyr/tidyverse)
library(magrittr)
mtcars %>% subset(cyl == 4) %>% head()
Control Structures
Conditional Statements
r
12345678910111213141516171819# If-else
if (condition) {
# code
} else if (another_condition) {
# code
} else {
# code
}
# Vectorised choice
ifelse(x > 0, "positive", "non-positive")
# Switch statement
switch(x,
"a" = "First option",
"b" = "Second option",
"default"
)
Loops
r
12345678910111213141516# For loop
for (i in seq_len(5)) {
print(i)
}
# While loop
while (condition) {
# code
}
# Repeat loop
repeat {
# code
if (condition) break
}
Functions
Basic Functions
r
123456789101112# Function definition
my_function <- function(x, y = 10) {
x + y # last expression is returned
}
# Traditional anonymous function
square <- function(x) x^2
# Shorthand lambda syntax (R >= 4.1)
square <- \(x) x^2
sapply(1:5, \(x) x^2)
Advanced Function Concepts
r
1234567891011121314# Ellipsis for variable arguments
variable_args <- function(...) {
args <- list(...)
print(args)
}
# Functional programming with apply family
lapply(list(1, 2, 3), \(x) x^2)
sapply(list(1, 2, 3), \(x) x^2)
vapply(1:3, \(x) x^2, numeric(1)) # type-safe
# purrr equivalents
# purrr::map(1:3, \(x) x^2); purrr::map_dbl(1:3, \(x) x^2)
Data Structures
Vectors
r
123456789# Creating vectors
v1 <- c(1, 2, 3)
v2 <- seq(1, 10, by = 2)
v3 <- rep(1, times = 5)
# Vector operations (recycle to longest)
v1 + v2
length(v1)
Data Frames
r
123456789101112# Creating data frames
df <- data.frame(
name = c("Alice", "Bob"),
age = c(25, 30)
)
# Subsetting
df$name
df[1, 2] # First row, second column
df[df$age > 25, ]
subset(df, age > 25)
Object-Oriented Programming
R supports several OO systems:
S3 Classes
r
1234567person <- list(name = "John", age = 30)
class(person) <- "Person"
print.Person <- function(x, ...) {
cat("Name:", x$name, "Age:", x$age, "\n")
}
S4 Classes
r
1234setClass("Point", representation(x = "numeric", y = "numeric"))
setGeneric("norm", function(obj) standardGeneric("norm"))
setMethod("norm", "Point", function(obj) sqrt(obj@x^2 + obj@y^2))
R6 Classes (reference semantics, via the R6 package)
r
12345678library(R6)
Counter <- R6Class("Counter",
public = list(
n = 0,
inc = function() { self$n <- self$n + 1; invisible(self) }
)
)
S7 (modern OO system from R-core/Posit)
r
1234567# install.packages("S7")
library(S7)
Range <- new_class("Range",
properties = list(lo = class_double, hi = class_double)
)
r <- Range(lo = 0, hi = 1)
Error Handling
r
1234567891011121314# Try-catch with conditions
tryCatch({
# code that might throw an error
}, error = function(e) {
message("An error occurred: ", conditionMessage(e))
}, warning = function(w) {
message("A warning occurred: ", conditionMessage(w))
}, finally = {
# cleanup code
})
# Modern condition handling with rlang
# rlang::abort(), rlang::warn(), rlang::inform()
File I/O
r
12345678910111213# Reading files (base)
read.csv("data.csv")
read.table("data.txt")
# Writing files (base)
write.csv(df, "output.csv", row.names = FALSE)
write.table(df, "output.txt")
# Faster / modern alternatives
# readr::read_csv("data.csv"); readr::write_csv(df, "out.csv")
# data.table::fread("data.csv"); data.table::fwrite(df, "out.csv")
# arrow::read_parquet("data.parquet")
Common Libraries and Frameworks
Data Manipulation
- dplyr: Data transformation (grammar of data manipulation)
- tidyr: Data tidying / reshaping
- data.table: High-performance data manipulation
- arrow: Apache Arrow / Parquet I/O and analytics
- duckdb / duckplyr: In-process analytical SQL engine
Data Visualization
- ggplot2: Layered grammar of graphics
- plotly: Interactive plots
- lattice: Trellis-style statistical plotting
- echarts4r, highcharter: Interactive charting
Statistical Analysis & Modelling
- stats: Built-in statistical functions
- car: Companion to Applied Regression
- lme4 / glmmTMB: Mixed-effects models
- survival: Survival analysis
- tidymodels: Tidy modelling framework (parsnip, recipes, workflows, rsample, tune, yardstick)
Web & Apps
- shiny: Reactive web apps
- plumber: REST APIs in R
- httr2: Modern HTTP client (successor to httr)
Package Management
r
12345678910111213141516# Install package from CRAN
install.packages("ggplot2")
# Load package
library(ggplot2)
# Check installed packages
installed.packages()
# Install from GitHub (via pak — recommended modern installer)
# install.packages("pak")
pak::pak("tidyverse/dplyr")
# Reproducible project libraries
# renv::init(); renv::snapshot(); renv::restore()
Best Practices
- Use meaningful, snake_case variable and function names
- Prefer
<-over=for assignment - Vectorize operations instead of using explicit loops
- Leverage functional programming (apply family, purrr)
- Use a pipe (
|>in base R or%>%from magrittr) for readability - Pin dependencies with
renvfor reproducibility - Lint and style with
lintrandstyler - Profile and optimize only after measuring
Testing
r
12345678910111213# Using testthat (3rd edition)
library(testthat)
test_that("Addition works", {
expect_equal(2 + 2, 4)
})
# Snapshot tests
# expect_snapshot(print(model))
# Property-based / parameterised checks
# Other useful packages: tinytest, hedgehog, vdiffr (for ggplot2)
Reproducible Research
r
12345678910# R Markdown
library(rmarkdown)
render("report.Rmd")
# Quarto (next-generation, multi-language successor to R Markdown)
# quarto::quarto_render("report.qmd")
# Workflow orchestration
# library(targets); tar_make()
Performance Optimization
- Vectorize wherever possible
- Avoid growing objects in a loop; pre-allocate
- Use specialized data structures (
data.table,arrow, matrices) - Profile with
Rprof(),profvis::profvis(), orbench::mark() - Parallelize with
parallel,future/future.apply, ormirai - Push heavy work down to C/C++ via
Rcpporcpp11 - For very large data, use
arrow,duckdb, orpolars(R binding)
Resources for Further Learning
- R Project (r-project.org)
- Posit (posit.co) — makers of RStudio, formerly RStudio PBC
- CRAN (cran.r-project.org) and Bioconductor (bioconductor.org)
- "R for Data Science" (2e) by Hadley Wickham, Mine Çetinkaya-Rundel & Garrett Grolemund
- "Advanced R" (2e) by Hadley Wickham
- Posit Cloud, R-bloggers, and the Posit Community forum
Continue Learning
Discover more cheatsheets to boost your productivity