Unified Host and Network Dataset

Data modifications

We modified the data in the following way:

Graph schema

import xgt
con = xgt.Connection()
dev = con.create_vertex_frame(name = 'Device', schema = [['device',xgt.TEXT]], key = 'device')
import xgt
con = xgt.Connection()
nf = con.create_edge_frame('NETFLOW',
              schema = [['epochtime',xgt.INT],
                        ['duration',xgt.INT],
                        ['srcDevice',xgt.TEXT],
                        ['dstDevice',xgt.TEXT],
                        ['protocol',xgt.INT],
                        ['srcPort',xgt.INT],
                        ['dstPort',xgt.INT],
                        ['srcPackets',xgt.INT],
                        ['dstPackets',xgt.INT],
                        ['srcBytes',xgt.INT],
                        ['dstBytes',xgt.INT]],
              source = 'Device',
              target = 'Device',
              source_key = 'srcDevice',
              target_key = 'dstDevice')
import xgt
con = xgt.Connection()
host_event = con.create_edge_frame('HOST_EVENT',
               schema = [['epochtime',xgt.INT],
                         ['eventID',xgt.INT],
                         ['logHost',xgt.TEXT],
                         ['userName',xgt.TEXT],
                         ['domainName',xgt.TEXT],
                         ['logonID',xgt.INT],
                         ['processName',xgt.TEXT],
                         ['processID',xgt.INT],
                         ['parentProcessName',xgt.TEXT],
                         ['parentProcessID',xgt.INT]],
               source = 'Device',
               target = 'Device',
               source_key = 'logHost',
               target_key = 'logHost')
import xgt
con = xgt.Connection()
auth_event = con.create_edge_frame('AUTH_EVENT',
               schema = [['epochtime',xgt.INT],
                         ['eventID',xgt.INT],
                         ['logHost',xgt.TEXT],
                         ['logonType',xgt.INT],
                         ['logonTypeDescription',xgt.TEXT],
                         ['userName',xgt.TEXT],
                         ['domainName',xgt.TEXT],
                         ['logonID',xgt.INT],
                         ['subjectUserName',xgt.TEXT],
                         ['subjectDomainName',xgt.TEXT],
                         ['subjectLogonID',xgt.TEXT],
                         ['status',xgt.TEXT],
                         ['src',xgt.TEXT],
                         ['serviceName',xgt.TEXT],
                         ['destination',xgt.TEXT],
                         ['authenticationPackage',xgt.TEXT],
                         ['failureReason',xgt.TEXT],
                         ['processName',xgt.TEXT],
                         ['processID',xgt.INT],
                         ['parentProcessName',xgt.TEXT],
                         ['parentProcessID',xgt.INT]],
               source = 'Device',
               target = 'Device',
               source_key = 'src',
               target_key = 'logHost')
# assuming edge frame objects created earlier

# load several days of data (after uncommenting URLs)
nf.load( (
    #'https://datasets.trovares.com/LANL/xgt/nf_day-08.csv',
    #'https://datasets.trovares.com/LANL/xgt/nf_day-09.csv',
    #'https://datasets.trovares.com/LANL/xgt/nf_day-10.csv',
    #'https://datasets.trovares.com/LANL/xgt/nf_day-11.csv',
) )

host_event.load( (
    #'https://datasets.trovares.com/LANL/xgt/wls_day-08_1v.csv',
    #'https://datasets.trovares.com/LANL/xgt/wls_day-09_1v.csv',
    #'https://datasets.trovares.com/LANL/xgt/wls_day-10_1v.csv',
    #'https://datasets.trovares.com/LANL/xgt/wls_day-11_1v.csv',
) )

auth_event.load( (
    #'https://datasets.trovares.com/LANL/xgt/wls_day-08_2v.csv',
    #'https://datasets.trovares.com/LANL/xgt/wls_day-09_2v.csv',
    #'https://datasets.trovares.com/LANL/xgt/wls_day-10_2v.csv',
    #'https://datasets.trovares.com/LANL/xgt/wls_day-11_2v.csv',
) )