Cassandra
Printed from:
Cassandra CQL (Cassandra Query Language) Cheatsheet
1. Connection and Basic Commands
Connect to Cassandra
12345678# Using cqlsh
cqlsh [hostname] [port]
cqlsh localhost 9042 # Default connection
# Show cluster information
SHOW VERSION;
SHOW HOST;
Authentication (if enabled)
123456-- Connect with username and password
cqlsh -u [username] -p [password]
-- Connect using SSL
cqlsh --ssl
2. Keyspace Operations
Create Keyspace
123456789101112131415161718-- Simple replication (development/single-DC only; not recommended for production)
CREATE KEYSPACE my_keyspace
WITH replication = {
'class': 'SimpleStrategy',
'replication_factor': 3
};
-- Network topology replication (recommended for production)
CREATE KEYSPACE my_keyspace
WITH replication = {
'class': 'NetworkTopologyStrategy',
'datacenter1': 3,
'datacenter2': 2
};
-- Use keyspace
USE my_keyspace;
Alter Keyspace
123456ALTER KEYSPACE my_keyspace
WITH replication = {
'class': 'NetworkTopologyStrategy',
'datacenter1': 3
};
Drop Keyspace
12DROP KEYSPACE my_keyspace;
3. Table Operations and Column Families
Create Table
1234567891011121314151617CREATE TABLE users (
user_id UUID PRIMARY KEY,
username TEXT,
email TEXT,
age INT,
registration_date TIMESTAMP
);
-- Composite Primary Key
CREATE TABLE user_posts (
user_id UUID,
post_id TIMEUUID,
content TEXT,
likes INT,
PRIMARY KEY ((user_id), post_id)
) WITH CLUSTERING ORDER BY (post_id DESC);
Alter Table
123456789-- Add column
ALTER TABLE users ADD phone_number TEXT;
-- Drop column
ALTER TABLE users DROP phone_number;
-- Rename a primary key column
ALTER TABLE users RENAME user_id TO id;
Drop Table
12345DROP TABLE users;
-- Remove all rows but keep the table schema
TRUNCATE users;
4. CQL Data Types and Collections
Primitive Types
text/varchar: UTF-8 encoded stringascii: ASCII-only stringint: 32-bit signed integerbigint: 64-bit signed integersmallint,tinyint: 16-bit and 8-bit signed integersfloat,double: Floating-point numbersdecimal: Variable-precision decimalboolean: true/falsedate,time,timestamp: Date/time valuesduration: Months/days/nanoseconds durationuuid: Unique identifiertimeuuid: Time-based unique identifier (v1)blob: Arbitrary bytesinet: IPv4 or IPv6 addressvector<type, dimension>: Fixed-length float vector (Cassandra 5.0+, used with Storage-Attached Indexes for ANN search)
Collection Types
12345678910111213141516171819-- List
CREATE TABLE example (
id UUID PRIMARY KEY,
tags list<text>
);
-- Set (unique elements)
tags set<text>
-- Map (key-value pairs)
preferences map<text, text>
-- Frozen collection (stored as a single immutable value)
addresses frozen<list<text>>
-- Inserting collections
INSERT INTO example (id, tags) VALUES
(uuid(), ['tag1', 'tag2', 'tag3']);
5. CRUD Operations
Insert
12345678INSERT INTO users (user_id, username, email)
VALUES (uuid(), 'johndoe', 'john@example.com')
USING TTL 86400; -- Optional time-to-live (seconds)
-- Conditional insert (lightweight transaction)
INSERT INTO users (user_id, username) VALUES (uuid(), 'johndoe')
IF NOT EXISTS;
Select
1234567891011121314151617181920212223-- Basic select
SELECT * FROM users;
-- Filtering
SELECT * FROM users WHERE username = 'johndoe';
-- Pagination
SELECT * FROM users LIMIT 10;
-- Per-partition limit
SELECT * FROM user_posts PER PARTITION LIMIT 5;
-- Grouping (within a partition)
SELECT user_id, COUNT(*) FROM user_posts
WHERE user_id = ? GROUP BY user_id;
-- Complex queries
SELECT username, email
FROM users
WHERE user_id IN (uuid1, uuid2)
AND age > 25
ALLOW FILTERING;
Update
123456789101112131415UPDATE users
SET email = 'new_email@example.com',
age = 30
WHERE user_id = uuid();
-- Increment counter
UPDATE user_stats
SET login_count = login_count + 1
WHERE user_id = uuid();
-- Conditional update (lightweight transaction)
UPDATE users SET email = 'new@example.com'
WHERE user_id = ?
IF email = 'old@example.com';
Delete
1234567DELETE FROM users WHERE user_id = uuid();
-- Delete specific columns
DELETE email, age
FROM users
WHERE user_id = uuid();
6. Primary Keys, Partition Keys, and Clustering Keys
12345678910-- Composite Partition Key
CREATE TABLE sensor_data (
datacenter text,
rack text,
sensor_id uuid,
reading_time timeuuid,
temperature float,
PRIMARY KEY ((datacenter, rack), sensor_id, reading_time)
) WITH CLUSTERING ORDER BY (sensor_id ASC, reading_time DESC);
7. Indexing
Secondary Indexes
123456-- Legacy 2i secondary index
CREATE INDEX ON users (email);
-- Query with indexed column
SELECT * FROM users WHERE email = 'john@example.com';
Storage-Attached Indexes (SAI) — Cassandra 5.0+
12345678910111213141516171819-- Preferred index implementation in 5.0; supports text, numeric, and vector columns
CREATE INDEX users_email_sai ON users (email) USING 'sai';
-- Case-insensitive matching
CREATE INDEX ON users (email) USING 'sai'
WITH OPTIONS = {'case_sensitive': 'false', 'normalize': 'true'};
-- Vector similarity index for ANN queries
CREATE TABLE products (
id uuid PRIMARY KEY,
name text,
embedding vector<float, 1536>
);
CREATE INDEX ON products(embedding) USING 'sai';
SELECT id, name FROM products
ORDER BY embedding ANN OF [0.12, 0.34, ...]
LIMIT 10;
Materialized Views
1234567-- Materialized views remain experimental; enable with
-- materialized_views_enabled: true in cassandra.yaml
CREATE MATERIALIZED VIEW users_by_email AS
SELECT * FROM users
WHERE email IS NOT NULL AND user_id IS NOT NULL
PRIMARY KEY (email, user_id);
8. Time-to-Live (TTL) and Timestamps
12345678-- Insert with TTL (seconds)
INSERT INTO users (user_id, username)
VALUES (uuid(), 'tempuser')
USING TTL 3600; -- Expires in 1 hour
-- Get TTL and write timestamp of a column
SELECT TTL(username), WRITETIME(username) FROM users;
9. Consistency Levels
1234567-- Consistency levels: ONE, TWO, THREE, QUORUM, ALL,
-- LOCAL_ONE, LOCAL_QUORUM, EACH_QUORUM, ANY (writes), SERIAL, LOCAL_SERIAL
CONSISTENCY QUORUM;
-- Inspect the current consistency level
CONSISTENCY;
Note: per-query
USING CONSISTENCYis not valid CQL. Set the level for the cqlsh session withCONSISTENCY <level>;, or specify it per request through a driver's statement API.
10. User-Defined Types and Functions
123456789101112131415-- User-Defined Type
CREATE TYPE address (
street text,
city text,
zip_code text
);
-- User-Defined Function (UDFs are disabled by default;
-- set user_defined_functions_enabled: true in cassandra.yaml)
CREATE FUNCTION fahrenheit_to_celsius(f double)
RETURNS NULL ON NULL INPUT
RETURNS double
LANGUAGE java AS
'return (f - 32.0) * 5/9;';
Note: Scripted UDFs (
LANGUAGE javascript) and thenashornengine were removed in Cassandra 4.1. Java UDFs remain supported but are sandboxed and off by default.
11. Batch Operations
123456789101112-- Logged batch (atomic across partitions; use sparingly)
BEGIN BATCH
INSERT INTO users (user_id, username) VALUES (uuid1, 'user1');
INSERT INTO user_stats (user_id, login_count) VALUES (uuid1, 1);
APPLY BATCH;
-- Unlogged batch (single partition only; better performance)
BEGIN UNLOGGED BATCH
INSERT INTO user_posts (user_id, post_id, content) VALUES (?, ?, ?);
INSERT INTO user_posts (user_id, post_id, content) VALUES (?, ?, ?);
APPLY BATCH;
12. Performance Optimization
Best Practices
- Design for horizontal scalability and query-first modeling
- Keep partitions bounded (target < 100 MB and < 100k rows per partition)
- Choose partition keys that distribute load evenly
- Avoid multi-partition reads; denormalize instead
- Prefer SAI over legacy 2i indexes on Cassandra 5.0+
- Use
ALLOW FILTERINGsparingly — only for bounded, ad-hoc queries - Restrict logged batches to multi-table writes that need atomicity
Denormalization Example
123456789-- Denormalized user_timeline for faster reads
CREATE TABLE user_timeline (
user_id uuid,
post_id timeuuid,
content text,
author_name text,
PRIMARY KEY ((user_id), post_id)
) WITH CLUSTERING ORDER BY (post_id DESC);
13. Time-Series and IoT Data Patterns
12345678910111213141516171819-- Bucketed IoT sensor data to keep partitions bounded
CREATE TABLE sensor_readings (
device_id uuid,
day date,
reading_time timeuuid,
temperature float,
humidity float,
PRIMARY KEY ((device_id, day), reading_time)
) WITH CLUSTERING ORDER BY (reading_time DESC)
AND default_time_to_live = 2592000; -- 30 days
-- Time Window Compaction Strategy is well-suited for TTL'd time-series
ALTER TABLE sensor_readings
WITH compaction = {
'class': 'TimeWindowCompactionStrategy',
'compaction_window_unit': 'DAYS',
'compaction_window_size': 1
};
14. Vector Search (Cassandra 5.0+)
123456789101112131415-- Store embeddings and run approximate nearest-neighbor (ANN) search
CREATE TABLE docs (
id uuid PRIMARY KEY,
content text,
embedding vector<float, 768>
);
CREATE INDEX ON docs(embedding) USING 'sai'
WITH OPTIONS = {'similarity_function': 'cosine'};
SELECT id, content
FROM docs
ORDER BY embedding ANN OF ?
LIMIT 5;
Additional Tips
- Use
TRACING ON;to understand query performance - Monitor with
nodetool status,nodetool tablestats, andnodetool tpstats - Use the virtual
system_views/system_virtual_schemakeyspaces for runtime metrics - Java 11 and Java 17 are the supported runtimes for Cassandra 5.0; Java 8 is no longer supported
Note: Cassandra is a distributed, high-availability NoSQL database optimized for write-heavy workloads and linear scalability. As of the 5.0 GA line, headline additions include Storage-Attached Indexes (SAI), native vector search, the vector data type, Unified Compaction Strategy, and Java 17 support; Apache Thrift and the legacy cassandra-cli were removed in 4.x and remain gone.
Continue Learning
Discover more cheatsheets to boost your productivity