Snowflake SQL Cheatsheet: Mastering Cloud Data Warehousing

1. Connection and Basic Setup

SnowSQL CLI Installation

Bash

1
2
3
4
5
6
# Install SnowSQL (download installer from Snowflake)
# https://docs.snowflake.com/en/user-guide/snowsql-install-config

# Connect to Snowflake
snowsql -a <account_identifier> -u <username>

Note: The snowflake-snowsql pip package is no longer the recommended install path. Use the official SnowSQL installer or Homebrew (brew install --cask snowflake-snowsql). For newer scripting workflows, consider the Snowflake CLI (snow), which is the modern successor for managing apps, Snowpark, and Streamlit.

Web UI Connection

Snowsight is the default web UI: https://app.snowflake.com
Legacy Classic Console has been deprecated and retired
Support for SSO, MFA, key-pair auth, OAuth, and federated authentication
MFA is enforced by default for password-based human logins (rolled out through 2025)

Programmatic Connectors

Python

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# Python (snowflake-connector-python)
import snowflake.connector

conn = snowflake.connector.connect(
    account='your_account',
    user='your_username',
    password='your_password',
    warehouse='your_warehouse',
    database='your_database',
    schema='your_schema',
    role='your_role'
)

# Key-pair authentication (recommended over passwords)
conn = snowflake.connector.connect(
    account='your_account',
    user='your_username',
    private_key_file='/path/to/rsa_key.p8',
    warehouse='your_warehouse'
)

2. Database, Schema, and Table Operations

Database Management

SQL

1
2
3
4
5
6
7
8
9
10
11
-- Create database
CREATE DATABASE my_database;
CREATE DATABASE IF NOT EXISTS my_database;

-- Switch database context
USE DATABASE my_database;

-- Drop database (recoverable via UNDROP within retention window)
DROP DATABASE IF EXISTS my_database;
UNDROP DATABASE my_database;

Schema Management

SQL

1
2
3
4
5
6
7
8
9
10
11
12
13
-- Create schema
CREATE SCHEMA my_schema;
CREATE SCHEMA IF NOT EXISTS my_schema;

-- Managed access schema (centralized grant management)
CREATE SCHEMA secure_schema WITH MANAGED ACCESS;

-- Switch schema context
USE SCHEMA my_schema;

-- Drop schema
DROP SCHEMA IF EXISTS my_schema;

Table Creation

SQL

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
-- Basic table creation
CREATE TABLE employees (
    id INT,
    name VARCHAR(100),
    salary DECIMAL(10,2),
    hire_date DATE
);

-- Table with constraints
CREATE TABLE departments (
    dept_id INT PRIMARY KEY,
    dept_name VARCHAR(50) NOT NULL,
    location VARCHAR(100)
);

-- Transient and temporary tables (no Fail-safe, lower storage cost)
CREATE TRANSIENT TABLE staging_events (event_id INT, payload VARIANT);
CREATE TEMPORARY TABLE session_scratch (id INT);

-- Iceberg table (Snowflake-managed, Parquet on your own cloud storage)
CREATE ICEBERG TABLE orders_iceberg (
    order_id INT,
    order_ts TIMESTAMP_NTZ,
    amount NUMBER(12,2)
)
CATALOG = 'SNOWFLAKE'
EXTERNAL_VOLUME = 'my_ext_volume'
BASE_LOCATION = 'orders/';

-- Dynamic table (declarative, auto-refreshing pipeline)
CREATE DYNAMIC TABLE daily_orders
TARGET_LAG = '5 minutes'
WAREHOUSE = etl_wh
AS
SELECT DATE_TRUNC('day', order_ts) AS day, SUM(amount) AS total
FROM orders
GROUP BY 1;

3. Snowflake-Specific Data Types

Semi-Structured Types

SQL

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
-- VARIANT: Flexible JSON-like storage (max 128 MB compressed per value)
CREATE TABLE product_catalog (
    product_id INT,
    product_details VARIANT
);

-- Example VARIANT insertion
INSERT INTO product_catalog
SELECT 1,
    PARSE_JSON('{
        "name": "Laptop",
        "specs": {"ram": "16GB", "storage": "512GB"}
    }');

-- OBJECT and ARRAY types
CREATE TABLE customer_info (
    customer_id INT,
    addresses ARRAY,
    preferences OBJECT
);

-- Structured (typed) OBJECT, ARRAY, and MAP — generally available
CREATE TABLE typed_orders (
    order_id INT,
    items ARRAY(OBJECT(sku VARCHAR, qty NUMBER)),
    tags MAP(VARCHAR, VARCHAR)
);

-- VECTOR type for embeddings / similarity search
CREATE TABLE doc_embeddings (
    doc_id INT,
    embedding VECTOR(FLOAT, 768)
);

4. Standard SQL Operations

SELECT Operations

SQL

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
-- Basic SELECT
SELECT column1, column2
FROM table_name
WHERE condition;

-- Aggregations
SELECT
    department,
    AVG(salary) as avg_salary,
    COUNT(*) as employee_count
FROM employees
GROUP BY department;

-- QUALIFY filters on window function results
SELECT id, dept, salary,
       ROW_NUMBER() OVER (PARTITION BY dept ORDER BY salary DESC) AS rn
FROM employees
QUALIFY rn <= 3;

INSERT, UPDATE, DELETE

SQL

1
2
3
4
5
6
7
8
9
10
11
12
13
-- INSERT
INSERT INTO employees (id, name, salary)
VALUES (1, 'John Doe', 75000);

-- UPDATE
UPDATE employees
SET salary = salary * 1.1
WHERE performance_rating > 4;

-- DELETE
DELETE FROM employees
WHERE termination_date IS NOT NULL;

MERGE (Upsert)

SQL

1
2
3
4
5
6
7
8
9
10
11
MERGE INTO target_table t
USING source_table s
ON t.id = s.id
WHEN MATCHED THEN
    UPDATE SET
        t.column1 = s.column1,
        t.column2 = s.column2
WHEN NOT MATCHED THEN
    INSERT (id, column1, column2)
    VALUES (s.id, s.column1, s.column2);

5. Semi-Structured Data Handling

JSON Processing

SQL

1
2
3
4
5
6
7
8
9
10
11
12
-- Extract JSON values
SELECT
    product_details:name::STRING as product_name,
    product_details:specs.ram::STRING as ram
FROM product_catalog;

-- Flatten JSON arrays
SELECT
    f.value:name::STRING as category_name
FROM product_catalog,
LATERAL FLATTEN(input => product_details:categories) f;

Other Format Support

SQL

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
-- Create table from Parquet using schema inference
CREATE TABLE parquet_data
USING TEMPLATE (
    SELECT ARRAY_AGG(OBJECT_CONSTRUCT(*))
    FROM TABLE(
        INFER_SCHEMA(
            LOCATION=>'@my_stage/data.parquet',
            FILE_FORMAT=>'parquet_format'
        )
    )
);

-- Load semi-structured data directly with COPY
COPY INTO product_catalog
FROM @my_stage/products.json
FILE_FORMAT = (TYPE = JSON)
MATCH_BY_COLUMN_NAME = CASE_INSENSITIVE;

6. Time Travel and Data Retention

SQL

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
-- Query historical data
SELECT * FROM employees
AT (TIMESTAMP => '2025-01-01 12:00:00'::TIMESTAMP);

-- Query by query ID or relative offset
SELECT * FROM employees BEFORE (STATEMENT => '01abc...');
SELECT * FROM employees AT (OFFSET => -60*5);  -- 5 minutes ago

-- Restore table to previous state via clone
CREATE TABLE employees_restored
CLONE employees
AT (TIMESTAMP => '2025-01-01 12:00:00'::TIMESTAMP);

-- Configure time travel retention (1 day Standard, up to 90 days Enterprise+)
ALTER TABLE employees
SET DATA_RETENTION_TIME_IN_DAYS = 14;

7. Cloning and Zero-Copy Cloning

SQL

1
2
3
4
5
6
7
8
9
10
11
12
-- Zero-copy clone (metadata-only, no data duplication)
CREATE TABLE employees_test
CLONE employees;

-- Clone databases or schemas (entire object trees)
CREATE DATABASE prod_clone CLONE production;

-- Clone with specific timestamp
CREATE TABLE employees_historical
CLONE employees
AT (TIMESTAMP => '2025-06-01 00:00:00'::TIMESTAMP);

8. Streams, Tasks, and Dynamic Tables

SQL

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
-- Create stream to track row-level changes (CDC)
CREATE STREAM employees_stream
ON TABLE employees;

-- Append-only and insert-only stream variants
CREATE STREAM events_append_only ON TABLE events APPEND_ONLY = TRUE;

-- Create task for ETL (serverless or warehouse-backed)
CREATE TASK load_processed_employees
WAREHOUSE = my_warehouse
SCHEDULE = '5 MINUTE'
WHEN SYSTEM$STREAM_HAS_DATA('employees_stream')
AS
INSERT INTO processed_employees
SELECT * FROM employees_stream;

-- Serverless task (Snowflake-managed compute)
CREATE TASK serverless_load
USER_TASK_MANAGED_INITIAL_WAREHOUSE_SIZE = 'XSMALL'
SCHEDULE = '10 MINUTE'
AS
CALL my_etl_proc();

-- Activate task
ALTER TASK load_processed_employees RESUME;

-- Dynamic Tables (declarative alternative to streams+tasks)
CREATE DYNAMIC TABLE customer_summary
TARGET_LAG = '1 minute'
WAREHOUSE = etl_wh
REFRESH_MODE = AUTO
AS
SELECT customer_id, COUNT(*) AS order_count
FROM orders
GROUP BY customer_id;

9. User-Defined Functions and Stored Procedures

SQL

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
-- Scalar SQL UDF
CREATE OR REPLACE FUNCTION tax_calculator(salary FLOAT)
RETURNS FLOAT
LANGUAGE SQL
AS
$$
    salary * 0.25
$$;

-- Python UDF (Snowpark)
CREATE OR REPLACE FUNCTION normalize_text(s STRING)
RETURNS STRING
LANGUAGE PYTHON
RUNTIME_VERSION = '3.11'
HANDLER = 'run'
AS
$$
def run(s):
    return s.strip().lower() if s else None
$$;

-- Stored Procedure in Snowflake Scripting
CREATE OR REPLACE PROCEDURE update_salaries(percentage FLOAT)
RETURNS STRING
LANGUAGE SQL
AS
$$
BEGIN
    UPDATE employees
    SET salary = salary * (1 + :percentage);

    RETURN 'Salaries updated successfully';
END;
$$;

-- Snowpark Python stored procedure
CREATE OR REPLACE PROCEDURE etl_job()
RETURNS STRING
LANGUAGE PYTHON
RUNTIME_VERSION = '3.11'
PACKAGES = ('snowflake-snowpark-python')
HANDLER = 'main'
AS
$$
def main(session):
    df = session.table('raw_events').filter("status = 'ok'")
    df.write.mode('overwrite').save_as_table('clean_events')
    return 'done'
$$;

10. Performance Optimization

Clustering

SQL

1
2
3
4
5
6
7
8
9
10
11
-- Create clustered table (automatic clustering maintains order)
CREATE TABLE large_sales (
    sale_date DATE,
    product_id INT,
    amount DECIMAL(10,2)
)
CLUSTER BY (sale_date, product_id);

-- Inspect clustering health
SELECT SYSTEM$CLUSTERING_INFORMATION('large_sales');

Materialized Views and Search Optimization

SQL

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
CREATE MATERIALIZED VIEW monthly_sales AS
SELECT
    DATE_TRUNC('month', sale_date) AS month,
    SUM(amount) AS total_sales
FROM sales
GROUP BY 1;

-- Search Optimization Service for point-lookup / equality / substring queries
ALTER TABLE large_sales ADD SEARCH OPTIMIZATION
    ON EQUALITY(product_id), SUBSTRING(product_name);

-- Query Acceleration Service (offload scan-heavy work to serverless compute)
ALTER WAREHOUSE my_warehouse
SET ENABLE_QUERY_ACCELERATION = TRUE
    QUERY_ACCELERATION_MAX_SCALE_FACTOR = 8;

11. Resource Management

Warehouse Scaling

SQL

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
-- Create warehouse with auto-suspend and multi-cluster scaling
CREATE WAREHOUSE my_warehouse
WITH
    WAREHOUSE_SIZE = 'MEDIUM'
    AUTO_SUSPEND = 60
    AUTO_RESUME = TRUE
    MIN_CLUSTER_COUNT = 1
    MAX_CLUSTER_COUNT = 4
    SCALING_POLICY = 'STANDARD'
    INITIALLY_SUSPENDED = TRUE;

-- Resize warehouse (supports up to 6X-LARGE)
ALTER WAREHOUSE my_warehouse
SET WAREHOUSE_SIZE = 'LARGE';

-- Resource monitors guard credit consumption
CREATE RESOURCE MONITOR monthly_cap
WITH CREDIT_QUOTA = 1000
     FREQUENCY = MONTHLY
     START_TIMESTAMP = IMMEDIATELY
     TRIGGERS
        ON 80 PERCENT DO NOTIFY
        ON 100 PERCENT DO SUSPEND;

12. Security Features

Role-Based Access Control

SQL

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
-- Create role
CREATE ROLE data_analyst;

-- Grant privileges
GRANT SELECT ON DATABASE my_database TO ROLE data_analyst;
GRANT USAGE ON WAREHOUSE my_warehouse TO ROLE data_analyst;

-- Row access policy
CREATE ROW ACCESS POLICY region_policy AS (region STRING)
RETURNS BOOLEAN ->
    CURRENT_ROLE() = 'ADMIN'
    OR EXISTS (SELECT 1 FROM user_regions WHERE user_name = CURRENT_USER() AND region = region);

ALTER TABLE sales ADD ROW ACCESS POLICY region_policy ON (region);

-- Dynamic data masking
CREATE MASKING POLICY mask_email AS (val STRING) RETURNS STRING ->
    CASE WHEN CURRENT_ROLE() IN ('PII_ROLE') THEN val
         ELSE REGEXP_REPLACE(val, '.+@', '***@') END;

ALTER TABLE customers MODIFY COLUMN email SET MASKING POLICY mask_email;

-- Network policies and authentication policies
CREATE NETWORK POLICY corp_only ALLOWED_IP_LIST = ('203.0.113.0/24');

13. Cost Optimization Strategies

Use auto-suspend (30–60 sec) and auto-resume on warehouses
Right-size warehouses; multi-cluster for concurrency, not bigger sizes
Leverage zero-copy cloning instead of physical copies
Use transient/temporary tables when Fail-safe is not needed
Tune DATA_RETENTION_TIME_IN_DAYS to actual recovery needs
Use Search Optimization, clustering, and materialized views selectively
Monitor with SNOWFLAKE.ACCOUNT_USAGE and ORGANIZATION_USAGE views
Set Resource Monitors to cap credit spend
Prefer Dynamic Tables over hand-rolled streams+tasks where possible
Consider Snowpark-optimized warehouses only for memory-bound workloads

SQL

1
2
3
4
5
6
7
8
9
10
11
12
13
14
-- Create share (Secure Data Sharing — no data copy)
CREATE SHARE sales_data_share;

-- Add database/schema/objects to share
GRANT USAGE ON DATABASE sales_database TO SHARE sales_data_share;
GRANT USAGE ON SCHEMA sales_database.public TO SHARE sales_data_share;
GRANT SELECT ON ALL TABLES IN SCHEMA sales_database.public TO SHARE sales_data_share;

-- Grant share to another account
ALTER SHARE sales_data_share ADD ACCOUNTS = other_account;

-- Snowflake Marketplace listings and private listings publish data and apps
-- via the Provider Studio in Snowsight.

15. Snowflake Cortex (Built-in AI/ML)

SQL

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
-- LLM functions (model availability varies by region)
SELECT SNOWFLAKE.CORTEX.COMPLETE(
    'llama3.1-70b',
    'Summarize this support ticket: ' || ticket_text
) AS summary
FROM tickets;

SELECT SNOWFLAKE.CORTEX.SENTIMENT(review_text) AS score FROM reviews;
SELECT SNOWFLAKE.CORTEX.TRANSLATE(text, 'en', 'es') FROM docs;

-- Embeddings + vector similarity for RAG
SELECT doc_id,
       VECTOR_COSINE_SIMILARITY(
           embedding,
           SNOWFLAKE.CORTEX.EMBED_TEXT_768('snowflake-arctic-embed-m', :query)
       ) AS score
FROM doc_embeddings
ORDER BY score DESC
LIMIT 5;

-- Cortex Analyst (natural-language to SQL) and Cortex Search (hybrid retrieval)
-- are exposed via REST APIs and Snowsight.

Best Practices

Right-size warehouses and use auto-suspend/auto-resume
Prefer key-pair or OAuth authentication; enforce MFA for human users
Leverage semi-structured and structured VARIANT/OBJECT/ARRAY types
Implement RBAC with least privilege; use row/column policies for sensitive data
Use Time Travel, zero-copy cloning, and UNDROP for safe recovery
Optimize storage and queries with clustering, materialized views, and Search Optimization
Use Dynamic Tables for declarative pipelines; Streams+Tasks when fine control is needed
Monitor cost via ACCOUNT_USAGE views and Resource Monitors
Consider Iceberg tables for open-format interoperability with your data lake
Pin Snowpark/Python runtime versions explicitly for reproducibility

Note: Always refer to the latest Snowflake documentation for the most up-to-date information and best practices.

Snowflake

Continue Learning