Skip to content

Snowflake

You can ingest data into Snowflake via one of the other connectors.

S3 -> Snowflake

  1. Setup an S3 connector. We will ingest data from this S3 connector into Snowflake.

  2. Log into your Snowflake account.

  3. Run the following queries from Snowflake:

    sql
    -- Create JSON file format in Snowflake
    CREATE FILE FORMAT hp_json_format
    TYPE = JSON
    STRIP_OUTER_ARRAY = TRUE
    ENABLE_OCTAL = FALSE
    ALLOW_DUPLICATE = FALSE
    STRIP_NULL_VALUES = TRUE;
    
    -- Create a stage, replacing YOUR_BUCKET_NAME and Credentials
    CREATE OR REPLACE STAGE HONEYPOT_STAGE
    URL = 's3://YOUR_BUCKET_NAME/'
    CREDENTIALS = (
        AWS_KEY_ID = '...',
        AWS_SECRET_KEY = '...'
    )
    FILE_FORMAT = (FORMAT_NAME = 'hp_json_format')
    DIRECTORY = (ENABLE = true, AUTO_REFRESH = false);
    
    -- Optionally, if your bucket already has data in it,
    -- confirm that you can select from the stage
    SELECT 
        METADATA$FILENAME as file_name,
        METADATA$FILE_ROW_NUMBER as row_number,
        $1 as data
    FROM @HONEYPOT_STAGE
    LIMIT 10;
    
    -- Create a destination table
    CREATE TABLE honeypot_events (
        -- Core Event Fields
        org_id VARCHAR,
        honeypot_name VARCHAR,
        event_id VARCHAR,
        events ARRAY,
        event_name VARCHAR,
        event_properties VARIANT,
        event_time TIMESTAMP,
        
        -- Network & Location Data
        ip_address VARCHAR,
        ip_address_connecting VARCHAR,
        ip_address_forwarded_for VARCHAR,
        asn NUMBER,
        as_organization VARCHAR,
        city_name VARCHAR,
        country_name VARCHAR,
        country_code VARCHAR,
        continent VARCHAR,
        region_name VARCHAR,
        region_code VARCHAR,
        latitude FLOAT,
        longitude FLOAT,
        timezone VARCHAR,
        connection_type NUMBER,
        headers VARIANT,
        
        -- Identity & Session Management
        identity VARCHAR,
        identity_type VARCHAR,
        identities ARRAY,
        browser_fingerprint VARCHAR,
        handprint_id VARCHAR,
        device_id VARCHAR,
        device_id_details VARIANT,
        session_id VARCHAR,
        ref VARCHAR,
        
        -- Device & Browser Information
        referer VARCHAR,
        ua VARCHAR,
        browser_name VARCHAR,
        browser_version VARCHAR,
        browser_major VARCHAR,
        engine_name VARCHAR,
        engine_version VARCHAR,
        os_name VARCHAR,
        os_version VARCHAR,
        device_vendor VARCHAR,
        device_model VARCHAR,
        device_type VARCHAR,
        cpu_architecture VARCHAR,
        
        -- Performance Metrics
        performance_page_load FLOAT,
        performance_dom_ready FLOAT,
        performance_dns FLOAT,
        performance_tcp FLOAT,
        performance_ttfb FLOAT,
        
        -- Screen & Display
        screen_width NUMBER,
        screen_height NUMBER,
        screen_color_depth NUMBER,
        screen_pixel_depth NUMBER,
        screen_orientation_angle NUMBER,
        screen_orientation_type VARCHAR,
        screen_inner_width NUMBER,
        screen_inner_height NUMBER,
        screen_outer_width NUMBER,
        screen_outer_height NUMBER,
        
        -- Language & Localization
        languages_supported ARRAY,
        languages_preferred VARCHAR,
        timezone_browser VARCHAR,
        timezone_browser_offset_hours FLOAT,
        
        -- Privacy Settings
        browser_privacy_enabled BOOLEAN,
        cookies_enabled BOOLEAN,
        cookie_test_passed BOOLEAN,
        gpc_enabled BOOLEAN,
        
        -- Security & Risk Assessment
        verified_bot_category VARCHAR,
        unverified_bot_category VARCHAR,
        current_tags ARRAY,
        session_tags ARRAY,
        tag_metadata VARIANT,
        
        -- Session Analytics
        last_event_name VARCHAR,
        last_event_date TIMESTAMP,
        seconds_since_last_event FLOAT,
        session_start_date TIMESTAMP,
        seconds_since_session_start FLOAT,
        session_total_event_count NUMBER,
        session_ips ARRAY,
        session_ip_count NUMBER,
        
        -- Historical Context
        last_identity VARIANT,
        last_ip_address VARCHAR,
        last_latitude FLOAT,
        last_longitude FLOAT,
        last_connection_type NUMBER,
        last_asn NUMBER,
        last_as_organization VARCHAR,
        last_city_name VARCHAR,
        last_country_name VARCHAR,
        last_country_code VARCHAR,
        last_region_name VARCHAR,
        last_timezone_browser VARCHAR,
        last_timezone_ip VARCHAR,
        distance_traveled_miles FLOAT,
        
        -- Smart Property Intelligence
        phone_intel VARIANT,
        address_intel VARIANT,
        email_domain VARCHAR,
        
        -- Wallet Intelligence
        wallet_intel VARIANT,
        
        -- Geofencing & Compliance
        geofence_vpn_rescreen_frequency_seconds NUMBER,
        geofenced BOOLEAN,
        geofence_triggered BOOLEAN,
        redirect_url VARCHAR,
        redirect_auto BOOLEAN,
        geofence_test_only BOOLEAN,
        geo_challenge_status NUMBER,
        geofence_event_count NUMBER,
        
        -- Behaviors
        behaviors VARIANT,
        
        -- Additional fields
        address_to_ip_distance FLOAT,
        is_api_call BOOLEAN,
        file_name VARCHAR,
        loaded_at TIMESTAMP,
        
        -- Primary key for deduplication
        PRIMARY KEY (event_id)
    );
    
    -- Task with PRIMARY KEY handling
    CREATE OR REPLACE TASK LOAD_HONEYPOT_EVENTS
    WAREHOUSE = YOUR_WAREHOUSE
    SCHEDULE = '5 MINUTE'
    AS
    COPY INTO honeypot_events
    FROM @HONEYPOT_STAGE
    PATTERN = '.*[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{3}Z\.json'
    MATCH_BY_COLUMN_NAME = CASE_INSENSITIVE
    ON_ERROR = CONTINUE;  -- This is crucial with PRIMARY KEY
    
    ALTER TASK load_honeypot_events RESUME;
    
    SELECT DISTINCT file_name, loaded_at 
    FROM honeypot_events 
    ORDER BY loaded_at DESC;

Kafka -> Snowflake

Coming soon.