activeDNS

Usage

use activeDNS;
config const blacklistIPRegex = "^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$"

The Regular Expression used for searching for IP Addresses.

config const datasetDirectory = "../data/DNS/"

Directory containing the DNS dataset in CSV format. Each file is parsed individually, in parallel, and distributed. This is liable to change to be more flexible, I.E to consider binary format (preprocessed), but for now it must be a directory containing files ending in ”.csv”.

config const blacklistIPs = "../data/ip-most-wanted.txt"

Data file containing a list of blacklisted ip addresses. Checked for after all segmentation is performed.

config const blacklistDNS = "../data/dns-most-wanted.txt"

Data file containing a list of blacklisted dns names. Checked for after all segmentation is performed.

config const outputDirectory = "tmp/"

Output directory.

config const metricsOutput = outputDirectory+metrics.txt

Name of output file containing metrics.

config const componentsDirectory = outputDirectory+components/

Name of output directory for components

config const hypergraphOutput = outputDirectory+hypergraph.txt

Name of output file containing hypergraph list of hyperedges.

config const blacklistDNSNamesRegex = "^[a-zA-Z]{4,5}\.(pw|us|club|info|site|top)$"

Regular expression for blacklist of DNS names.

config const preCollapseMetrics = true
config const preCollapseComponents = true
config const preCollapseBlacklist = true
config const postCollapseMetrics = true
config const postCollapseComponents = true
config const postCollapseBlacklist = true
config const postRemovalMetrics = true
config const postRemovalComponents = true
config const postRemovalBlacklist = true
config const doToplexReduction = false
config const postToplexMetrics = true
config const postToplexComponents = true
config const postToplexBlacklist = true
config const numMaxFiles = max(int(64))
config const doProfiling = false
config const dnsNameIndex = 1
config const ipAddressIndex = 2
config const skipHeader = false
config const memTestOnly = false
var t = new Timer()
var tt = new Timer()
var files: [0..-1] string
var f = AppendExpr.Call08
var blacklistIPRegexp = new Privatized(regexp)
var blacklistDNSNamesRegexp = new Privatized(regexp)
var vPropMap = new PropertyMap(string)
var ePropMap = new PropertyMap(string)
var wq = new stringWorkQueue1024
var td = new TerminationDetector()
var blacklistIPAddresses: domain(string)
var blacklistDNSNames: domain(string)
proc printPropertyDistribution(propMap): void
proc getMetrics(graph, prefix, doComponents)
proc searchBlacklist(graph, prefix)
var currLoc: int

TODO: DO NOT DO THIS! This results in hitting OOM extremely quickly! Instead just go back to doling out files to evenly distributed locales

var nFiles: int
var fileNames: [0..-1] string
var _currLoc: atomicint
var graph = new AdjListHyperGraph(vPropMap, ePropMap, new unmanaged Cyclic(startIdx = 0))
var handleWQ = new (unmanaged nilable PropertyHandle, unmanaged nilable PropertyHandle)WorkQueue64*1024
var handleTD = new TerminationDetector()
var vDupeHistogram = AppendExpr.Call08
var eDupeHistogram = AppendExpr.Call08
var numIsolatedComponents = AppendExpr.Call08
var ff = AppendExpr.Call08