Unified Host and Network Dataset

Data modifications

We modified the data in the following way:

Graph schema

import xgt
con = xgt.Connection()
devices = con.create_vertex_frame(
    name='Devices',
    schema=[['device',xgt.TEXT]],
    key='device')
import xgt
con = xgt.Connection()
netflow = con.create_edge_frame(
    name='Netflow',
    schema=[['epoch_time', xgt.INT],
            ['duration', xgt.INT],
            ['src_device', xgt.TEXT],
            ['dst_device', xgt.TEXT],
            ['protocol', xgt.INT],
            ['src_port', xgt.INT],
            ['dst_port', xgt.INT],
            ['src_packets', xgt.INT],
            ['dst_packets', xgt.INT],
            ['src_bytes', xgt.INT],
            ['dst_bytes', xgt.INT]],
    source = 'Devices',
    target = 'Devices',
    source_key='src_device',
    target_key='dst_device')
import xgt
con = xgt.Connection()
host_events = con.create_edge_frame(
    name='HostEvents',
    schema=[['epoch_time', xgt.INT],
            ['event_id', xgt.INT],
            ['log_host', xgt.TEXT],
            ['user_name', xgt.TEXT],
            ['domain_name', xgt.TEXT],
            ['logon_id', xgt.INT],
            ['process_name', xgt.TEXT],
            ['process_id', xgt.INT],
            ['parent_process_name', xgt.TEXT],
            ['parent_process_id', xgt.INT]],
    source = 'Devices',
    target = 'Devices',
    source_key='log_host',
    target_key='log_host')
import xgt
con = xgt.Connection()
auth_events = con.create_edge_frame(
    name='AuthEvents',
    schema=[['epoch_time',xgt.INT],
            ['event_id', xgt.INT],
            ['log_host', xgt.TEXT],
            ['logon_type',xgt.INT],
            ['logon_typeDescription',xgt.TEXT],
            ['username',xgt.TEXT],
            ['domain_name',xgt.TEXT],
            ['logon_id',xgt.INT],
            ['subject_username',xgt.TEXT],
            ['subject_domain_name',xgt.TEXT],
            ['subject_logon_id',xgt.TEXT],
            ['status',xgt.TEXT],
            ['source',xgt.TEXT],
            ['service_name',xgt.TEXT],
            ['destination',xgt.TEXT],
            ['authentication_package',xgt.TEXT],
            ['failure_reason',xgt.TEXT],
            ['process_name',xgt.TEXT],
            ['process_id',xgt.INT],
            ['parent_process_name',xgt.TEXT],
            ['parent_process_id',xgt.INT]],
    source = 'Devices',
    target = 'Devices',
    source_key = 'source',
    target_key = auth_target_key)
# assuming edge frame objects created earlier

# load several days of data (after uncommenting URLs)
netflow.load( (
    #'https://datasets.trovares.com/LANL/xgt/nf_day-08.csv',
    #'https://datasets.trovares.com/LANL/xgt/nf_day-09.csv',
    #'https://datasets.trovares.com/LANL/xgt/nf_day-10.csv',
    #'https://datasets.trovares.com/LANL/xgt/nf_day-11.csv',
) )

host_event.load( (
    #'https://datasets.trovares.com/LANL/xgt/wls_day-08_1v.csv',
    #'https://datasets.trovares.com/LANL/xgt/wls_day-09_1v.csv',
    #'https://datasets.trovares.com/LANL/xgt/wls_day-10_1v.csv',
    #'https://datasets.trovares.com/LANL/xgt/wls_day-11_1v.csv',
) )

auth_event.load( (
    #'https://datasets.trovares.com/LANL/xgt/wls_day-08_2v.csv',
    #'https://datasets.trovares.com/LANL/xgt/wls_day-09_2v.csv',
    #'https://datasets.trovares.com/LANL/xgt/wls_day-10_2v.csv',
    #'https://datasets.trovares.com/LANL/xgt/wls_day-11_2v.csv',
) )