Training & Evading ML based IDS

profilejordan1st!
p5.zip

CS6262_p5/Polymorphic_blend/Makefile

a.out: shellcode.o payload.o gcc -g3 -m32 shellcode.o payload.o -o a.out shellcode.o: shellcode.S gcc -g3 -c shellcode.S -m32 -o shellcode.o payload.o: payload.bin objcopy -I binary -O elf32-i386 -B i386 payload.bin payload.o

CS6262_p5/Polymorphic_blend/task1.py

#!/usr/bin/env python2 import struct from collections import Counter from substitution import * from padding import * ARTIFICIAL_PATH = "artificial-profile.pcap" ATTACKBODY_PATH = "YOUR_GTID.pcap" # replace the file name by the one you downloaded if __name__ == '__main__': # Read in source pcap file and extract tcp payload attack_payload = getAttackBodyPayload(ATTACKBODY_PATH) artificial_payload = getArtificialPayload(ARTIFICIAL_PATH) # Generate substitution table based on byte frequency in file substitution_table = getSubstitutionTable(artificial_payload, attack_payload) # Substitution table will be used to encrypt attack body and generate corresponding xor_table which will be used to decrypt the attack body (xor_table, adjusted_attack_body) = substitute(attack_payload, substitution_table) # For xor operation, should be a multiple of 4 while len(xor_table) < 128: # CHECK: 128 can be some other number (greater than and multiple of 4) per your attack trace length xor_table.append(chr(0)) # For xor operation, should be a multiple of 4 while len(adjusted_attack_body) < 128: # CHECK: 128 can be some other number (greater than and multiple of 4) per your attack trace length adjusted_attack_body.append(chr(0)) # Read in decryptor binary to append at the start of payload with open("shellcode.bin", mode='rb') as file: shellcode_content = file.read() # Prepare byte list for payload b_list = [] for b in shellcode_content: b_list.append(b) # Raw payload will be constructed by encrypted attack body and xor_table raw_payload = b_list + adjusted_attack_body + xor_table while len(raw_payload) < len(artificial_payload): padding(artificial_payload, raw_payload) # Write prepared payload to Output file and test against your PAYL model with open("output", "w") as result_file: result_file.write(''.join(raw_payload))

CS6262_p5/Polymorphic_blend/shellcode.bin

CS6262_p5/Polymorphic_blend/shellcode.S

.extern _binary_payload_bin_start .globl main main: jmp call start: /*popl %ebx get address of substituted attack body and xor table */ movl $_binary_payload_bin_start, %ebx /* copy the address to ecx */ movl %ebx, %ecx addl $134, %ecx movl $0x0,%edx movl $0x0,%eax subl $0x8c,%esp loop: cmpl $0x1f,%edx jge run /* if (0x7c<edx) goto skip */ movl (%ecx),%eax xor (%ebx),%eax movl %eax,(%esp) add $0x4,%ebx /* move pointer to adjusted attack body */ add $0x4,%ecx /* move pointer to xor table */ add $0x4,%esp /* move pointer to result */ add $0x1,%edx jmp loop run: /* print out decrypted code */ leal -0x7c(%esp),%ecx movl $0x7c,%edx movl $0x1,%ebx xorl %eax,%eax movl $0x4,%eax int $0x80 /*quit*/ xorl %eax,%eax inc %al int $0x80 call: call start .string

CS6262_p5/Polymorphic_blend/padding.py

#!/usr/bin/env python2 import struct import math import random from frequency import * from collections import Counter def padding(artificial_payload, raw_payload): padding = "" # Get frequency of raw_payload and artificial profile payload artificial_frequency = frequency(artificial_payload) raw_payload_frequency = frequency(raw_payload) # To simplify padding, you only need to find the maximum frequency difference for each byte in raw_payload and artificial_payload, and pad that byte at the end of the raw_payload. Note: only consider the differences when artificial profile has higher frequency. # Depending upon the difference, call raw_payload.append # Your code here ...

CS6262_p5/Polymorphic_blend/substitution.py

#!/usr/bin/env python2 import struct import math import dpkt import socket from collections import Counter from frequency import * def substitute(attack_payload, subsitution_table): # Using the substitution table you generated to encrypt attack payload # Note that you also need to generate a xor_table which will be used to decrypt the attack_payload # i.e. (encrypted attack payload) XOR (xor_table) = (original attack payload) b_attack_payload = bytearray(attack_payload) result = [] xor_table = [] # Based on your implementattion of substitution table, please prepare result and xor_table as output return (xor_table, result) def getSubstitutionTable(artificial_payload, attack_payload): # You will need to generate a substitution table which can be used to encrypt the attack body by replacing the most frequent byte in attack body by the most frequent byte in artificial profile one by one # Note that the frequency for each byte is provided below in dictionay format. Please check frequency.py for more details artificial_frequency = frequency(artificial_payload) attack_frequency = frequency(attack_payload) sorted_artificial_frequency = sorting(artificial_frequency) sorted_attack_frequency = sorting(attack_frequency) # Your code here ... # You may implement substitution table in your way. Just make sure it can be used in substitute(attack_payload, subsitution_table) return substitution_table def getAttackBodyPayload(path): f = open(path) pcap = dpkt.pcap.Reader(f) for ts, buf in pcap: eth = dpkt.ethernet.Ethernet(buf) ip = eth.data if socket.inet_ntoa(ip.dst) == "192.150.11.111": # verify if the dst IP from your attack payload is same tcp = ip.data if tcp.data == "": continue return tcp.data.rstrip() def getArtificialPayload(path): f = open(path) pcap = dpkt.pcap.Reader(f) for ts, buf in pcap: eth = dpkt.ethernet.Ethernet(buf) ip = eth.data tcp = ip.data if tcp.sport == 80 and len(tcp.data) > 0: #MODIFY THE PORT NUMBERS FOR IRC TRAFFIC (Similar to what you did in read_pcap.py) return tcp.data

CS6262_p5/Polymorphic_blend/artificial-profile.pcap

CS6262_p5/Polymorphic_blend/frequency.py

#!/usr/bin/env python2 import struct from collections import Counter def sorting(dictFrequency): result = sorted(dictFrequency.items(), lambda x, y: cmp(x[1], y[1]), reverse = True) return result def frequency(payload): c = Counter(payload) number = 0.0 for (k,n) in dict(c).items(): number = number + n #print number result = {} for (k,n) in dict(c).items(): result.update({k:round(n/number,3)}) #print result return result

CS6262_p5/PAYL/wrapper.py

''' Wrapper script for OMSCS CS 6262 Project 4 run as : python wrapper.py ''' import read_pcap as dpr import random as rn import sys import analysis attack_file = None # check which mode the program is being run in len_of_args = len(sys.argv) if(len_of_args == 1): print '\n\tAttack data not provided, training and testing model based on pcap files in \'data/\' folder alone.' print '\tTo provide attack data, run the code as: python wrapper.py <attack-data-file-name>' else: print '\n\tAttack data provided, as command line argument \''+sys.argv[1]+'\'' attack_file = sys.argv[1] print '---------------------------------------------' payloads = dpr.getPayloadStrings() # shuffle the data to randomly pick samples rn.shuffle(payloads) min_length = 0 max_length = 0 while min_length ==0 and max_length ==0: min_length = 0 max_length = 0 # This is where we decide what the split ratio is split_ratio = 0.75 split_index = int(len(payloads)*split_ratio) training = payloads[0:split_index+1] test = payloads[split_index+1:len(payloads)] # we need at least one min and max length samples in the training data set for x in training: if len(x) == 0: min_length = 1 if len(x) == 1460: max_length =1 for j in range(0,len(test)): if len(test[j]) == 705: for i in range(0, len(training)): if len(training[i]) !=0 and len(training[i]) != 1460 and len(training[i]) !=705: t = training[i] training[i] = test[j] test[j] = t i = len(training)+1 # Simple sanity check if len(payloads) != len(test)+len(training) or split_ratio >= 1.0: sys.exit() else: ''' To better understand the behaviour of the model with different parameters, we typically let the parameters iterate over a range. Here, range(threshold_for_mahalanobis_lower, threshold_for_mahalanobis_upper+1) is the range over which the mahalanobis threshold iterates. Similarly, range(smoothing_factor_lower, smoothing_factor_upper+0.1) is the range over which the smoothing factor iterates. For each such combination of mahalanobis threshold and smoothing factor, the model is generated with these parameters. ''' smoothing_factor_lower = 0 smoothing_factor_upper = 30 threshold_for_mahalanobis_lower = 0 threshold_for_mahalanobis_upper = 10000 # this loops from smoothing_factor_lower to smoothing_factor_upper in steps of 0.1 for smoothing_factor in range(smoothing_factor_lower, smoothing_factor_upper+1): for mahabs in range(threshold_for_mahalanobis_lower, threshold_for_mahalanobis_upper+1, 50): print 'Smoothing Factor: '+str(smoothing_factor/10.0) print 'Threshold for Mahalanobis Distance: '+str(mahabs) analysis.train_and_test(training, test, attack_file, smoothing_factor/10.0, mahabs, verbose = "False") #analysis.train_and_test(training, test, attack_file, smoothing_factor/10.0, mahabs, verbose = "True") print '---------------------------------------------'

CS6262_p5/PAYL/analysis.py

''' Trains Model and Tests samples. Dependency : distance_and_clustering.py ''' import sys, os, string import numpy as np import matplotlib.pyplot as plt import matplotlib import distance_and_clustering as dc import read_pcap as dpr def is_ascii(string1): for c in string1: if ord(c) >= 256: print '"'+c+'" '+str(ord(c)) return 0 return 1 def get_freq_from_ascii_string(ascii_string): freq_array1 = [0]*256 for c in ascii_string: freq_array1[ord(c)] = freq_array1[ord(c)]+1 return freq_array1 def get_mahabs_distance(pool, training_length_dict, averaged_feature_vector, new_frequency_distribution, smoothing_factor): for str2 in pool: mahabs_distance = sys.maxint if is_ascii(str2) == 0 : sys.exit('Error: File contains non-ascii characters! Exiting') new_frequency_distribution = get_freq_from_ascii_string(str2) # now, check if the length has been encountered is or not ! if(len(str2) in training_length_dict.keys()): averaged_feature_vector = (feature_vector[min_length-len(str2)]) mahabs_distance = dc.give_mahalanobis_distance(averaged_feature_vector, new_frequency_distribution, smoothing_factor) else: mahabs_distance = sys.maxint return mahabs_distance # if verbose is set to True, the graphs are generated def train_and_test(training, test, attack_file, smoothing_factor, threshold_for_classification, verbose = "False"): all_ascii = [str(i) for i in xrange(256)] all_ascii_int = [ j for j in xrange(256)] matplotlib.rcParams.update({'font.size': 10}) # sort the files by length training_length_dict = {} min_length = len(training[0]) max_length = len(training[0]) for payload in training: if is_ascii(payload) == 0 : sys.exit('Error: File contains non-ascii characters! Exiting') payload_length = len(payload) if payload_length in training_length_dict.keys(): training_length_dict[payload_length].append(payload) else: training_length_dict[payload_length] =[payload] if min_length > payload_length: min_length = payload_length if max_length < payload_length: max_length = payload_length feature_vector = [] for i in range(1, max_length-min_length+2): mean = [0]*256 stddev = [0]*256 feature_vector.append(np.vstack((mean,stddev)).T) print 'Training the Model' # process the sorted files and store the models by the length of the files for key in sorted(training_length_dict.iterkeys()): i = 0 # frequency array for each length group freq_array_per_length= [[0]*256]*(len(training_length_dict[key])) for argu in training_length_dict[key]: if is_ascii(argu) == 0 : sys.exit('Error: File contains non-ascii characters! Exiting') freq_array_per_length[i] = get_freq_from_ascii_string(argu) i = i+1 stddev_array_per_length = np.std(freq_array_per_length,axis=0) mean_array_per_length = np.mean(freq_array_per_length, axis=0) feature_vector[min_length-key] = np.vstack((mean_array_per_length,stddev_array_per_length)).T #print str(min_length-key) #print feature_vector[min_length-key] #print "NEW" # plotting the mean array if verbose == "True": plt.xticks(all_ascii_int, all_ascii) plt.bar(all_ascii_int, mean_array_per_length) plt.title('Mean frequency of each of the acsii characters for length '+str(key)) plt.show() # plotting the std dev array if verbose == "True": plt.xticks(all_ascii_int, all_ascii) plt.xticks(all_ascii_int, all_ascii) plt.bar(all_ascii_int, stddev_array_per_length) plt.title('Std Dev of freq of each of the ascii characters for Length '+str(key)) plt.show() #print 'Training lengths:' #for key in sorted(training_length_dict.iterkeys()): # print "Training key: " + str(key) + ":" + str(len(training_length_dict[key])) print 'Testing the Model' true_positive = 0 false_negative = 0 for str2 in test: mahabs_distance = sys.maxint if is_ascii(str2) == 0 : sys.exit('Error: File contains non-ascii characters! Exiting') new_frequency_distribution = get_freq_from_ascii_string(str2) # now, check if the length has been encountered is or not ! if(len(str2) in training_length_dict.keys()): averaged_feature_vector = (feature_vector[min_length-len(str2)]) #print averaged_feature_vector mahabs_distance = dc.give_mahalanobis_distance(averaged_feature_vector, new_frequency_distribution, smoothing_factor) else: mahabs_distance = sys.maxint if mahabs_distance <= threshold_for_classification: true_positive = true_positive + 1 if verbose == "True": print str(mahabs_distance) else: false_negative = false_negative + 1 if verbose == "True": print str(mahabs_distance) print 'Total Number of testing samples: '+str(len(test)) print 'Percentage of True positives: '+str((true_positive/float(len(test)))*100.0) #Attack data loading false_positive = 0 true_negative = 0 if attack_file is not None: if attack_file.lower().endswith('.pcap'): attack = dpr.readPcap(attack_file, "True") else: attack = dpr.read_attack_data(attack_file) else: print '\nExiting now' return print '--------------------------------------' print 'Analysing attack data, of length '+str(len(attack[0])) for str2 in attack: mahabs_distance = sys.maxint if is_ascii(str2) == 0 : sys.exit('Error: File contains non-ascii characters! Exiting') new_frequency_distribution = get_freq_from_ascii_string(str2) # now, check if the length has been encountered is or not ! if(len(str2) in training_length_dict.keys()): #print "No of samples in length:" + str(len(training_length_dict[len(str2)])) #print str(min_length-len(str2)) #print feature_vector[min_length-len(str2)] #averaged_feature_vector = (feature_vector[-2503]) averaged_feature_vector = (feature_vector[min_length-len(str2)]) mahabs_distance = dc.give_mahalanobis_distance(averaged_feature_vector, new_frequency_distribution, smoothing_factor) else: mahabs_distance = sys.maxint if mahabs_distance <= threshold_for_classification: false_positive = false_positive + 1 print 'Calculated distance of '+ str(mahabs_distance)+' is lesser than the threshold of '+\ str(threshold_for_classification)+'. It fits the model. ' else: true_negative = true_negative + 1 print 'Calculated distance of '+ str(mahabs_distance)+' is greater than the threshold of '+\ str(threshold_for_classification) +'. It doesn\'t fit the model. ' print 'Total number of True Negatives: '+str((true_negative/float(len(attack)))*100.0) print 'Total number of False Positives: '+str((false_positive/float(len(attack)))*100.0)+'\n'

CS6262_p5/PAYL/distance_and_clustering.py

''' Code for calculating various distances and clustering the models ''' import numpy as np import scipy.spatial.distance as dist import math ''' averaged_feature_vector : 256*2 array representing <mean,variance> pairs for each of the 256 ASCII characters new_frequency_distribution : 256*1 array representing the frequencies of each of the 256 ASCII characters smoothing_factor : single scalar value ''' def give_mahalanobis_distance(averaged_feature_vector, new_frequency_distribution, smoothing_factor): if (smoothing_factor ==0): raise Exception("Smoothing factor cannot be zero") distance = 0 for n in range(0,256) : xi = averaged_feature_vector[n][0] yi = new_frequency_distribution[n] sigi = averaged_feature_vector[n][1] if(sigi <0): print sigi distance = distance + (abs(xi-yi)/(sigi+smoothing_factor)) return distance #Takes as argument two 1-D list and gives distance as double ''' model_i : 256*1 array representing the average frequency values of each of the 256 ASCII characters model_k : 256*1 array representing the average frequency values of each of the 256 ASCII characters ''' def manhattan_distance(model_i,model_k): x = np.array(model_i) y = np.array(model_k) return dist.cityblock(x,y) ''' variance1 : single scalar value variance2 : single scalar value mean1 : single scalar value mean2 : single scalar value size1 : single scalar value size2 : single scalar value ''' def weighted_variance(variance1, variance2, mean1,mean2,size1,size2): term1 = size1 * (variance1 + (mean1*mean1)) term2 = size2*(variance2 + (mean2*mean2)) size = size1 + size2 weighted_mean = ((size1*mean1)+(size2*mean2))/ float(size) #print(weighted_mean) return ((term1 + term2)/ float(size)) - (weighted_mean*weighted_mean) ''' sd1 : single scalar value sd2 : single scalar value mean1 : single scalar value mean2 : single scalar value size1 : single scalar value size2 : single scalar value ''' def weighted_sd(sd1,sd2,mean1,mean2,size1,size2): variance1 = sd1 * sd1 variance2 = sd2 * sd2 variance = weighted_variance(variance1, variance2, mean1, mean2, size1,size2) return math.sqrt(variance) ''' There can be multiple implementations of this. We merge and update based on weighted average frequency where weight being the number of samples each model has model_i: 256*2 feature array where each tuple is <freq,stddev> for length i model_k: 256*2 feature array where each tuple is <freq,stddev> for length k n_i: single scalar value representing the number of samples for length i n_k: single scalar value representing the number of samples for length k ''' def merge_update(model_i,model_k,ni,nk): # print("merge and update") if (ni == 0 and nk == 0): return model_i,model_k for j in range(0,255): avg_frequency_i = model_i[j][0] avg_frequency_k = model_k[j][0] n = ni + nk avg_cumulative_mean = ((avg_frequency_i * ni) + (avg_frequency_k * nk))/n avg_cumulative_stddev = weighted_sd(model_i[j][1], model_k[j][1], model_i[j][0], model_k[j][0], ni, nk) model_i[j][0] = avg_cumulative_mean model_k[j][0] = avg_cumulative_mean model_i[j][1] = avg_cumulative_stddev model_k[j][1] = avg_cumulative_stddev return model_i,model_k ''' Takes as argument models - 2d list with payload length and average frequency for 256 characters [n][256] where n is the number of models. The list is sorted based on payload length threshold - when two models should be merged lengthwise_sample_numbers - 1-D list number of samples recorded for each length while training Uses manhattan distance to decide which models need to be merged Model that remains will be a compact model and will not have all the lengths. For the lengths not found, look for the largest predecessor threshold : single scalar value models : (range of payload length)*256*2 lengthwise_sample_numbers : (range of payload length)*1 ''' def cluster(threshold, models, lengthwise_sample_numbers): i = 0 # iterate over each of the models while (i < len(models)): # for each model, search through nearby models and update k = i+1 while(k < len(models) and manhattan_distance(models[i][0],models[k][0]) < threshold): ni = lengthwise_sample_numbers[i] nk = lengthwise_sample_numbers[k] models[i], models[k] = merge_update(models[i],models[k],ni,nk) k = k+1 i = k+1 return models

CS6262_p5/PAYL/SETUP.txt

--------------------------------------------------------------------------------- PREREQUISITES --------------------------------------------------------------------------------- The commands in the following instructions are for Linux OS only. If you use some other OS, please look for equivalent commands online. 1) Python version 2 - You need to have python version 2 for this. We have tested it on 2.7.11 but it should work on other python 2 versions too. How to check your python version? python --version If you do not have version 2 - either install it along side other versions or have a VM with version 2 (this is less messy). 2) Python modules required (with the versions we used in bracket but it should work with whatever the latest versions are) - - numpy (1.11.0 and above) - scipy (0.18.1) - dpkt (1.8.8) - matplotlib (1.5.1) How to install? sudo pip install <module name> To install all of them in one go sudo pip install numpy scipy dpkt matplotlib How to check module version? pip freeze | grep <module name> ----------------------------------------------------------------------------------- HOW TO RUN THE CODE ----------------------------------------------------------------------------------- Code is in PAYL directory. :) To run the code given, python wrapper.py Once you run it, it will take some time to generate the model. Based on the values passed, it will compute false negative and true positive rate. Before modifying the code, run the original code given to see how it works and if your setup is correct. Now, read the project description to understand what the project task is and modify and run the code. <<Optional>> Set verbose = True (in wrapper.py) to get the graphs for each model to be generated. This will not be graded but can help you understand the generated models better. ----------------------------------------------------------------------------------- CODE COMPONENTS ----------------------------------------------------------------------------------- 1) wrapper.py - It is the entry point for the project code. It calls all other functions and modules for the project. 2) analysis.py - It trains the model and tests it. 3) distance_and_clustering.py - This module computes different distances and clusters model. 4) read_pcap.py - It parses the pcap data and makes it ready for the wrapper to use. ----------------------------------------------------------------------------------- DATA ----------------------------------------------------------------------------------- We have 5 pcap files in code/data directory. We have already written script that reads the files and gives data to the model. All the best !!! Contact us or post on piazza if you run into issues.

CS6262_p5/PAYL/read_pcap.py

import dpkt def readPcap(fileName, port_neutral = "False"): payload_list = [] f = open(fileName,"r") pcap = dpkt.pcap.Reader(f) for ts,buf in pcap: try: eth = dpkt.ethernet.Ethernet(buf) ip = eth.data tcp = ip.data #Read http payload #if(tcp.sport==80 or tcp.dport==80): if (tcp.sport==6667 or tcp.dport==6667): # MODIFY THE PORT NUMBER FOR HTTP payload = tcp.data payload_list.append(str(payload)) elif (port_neutral == "True"): payload = tcp.data payload_list.append(str(payload)) except : continue return payload_list def getPayloadStrings(): payload_list = [] list1 = readPcap('IRC_data/all_irc_data.pcap')# MODIFY THE PATH FOR HTTP TRAFFIC payload_list.extend(list1) # HTTP: FOR HTTP PROTOCOL, YOU NEED TO SELECT ALL THE PCAPS ONE BY ONE FROM HTTP_data FOLDER AND EXTEND ONE BY AS SHOWN BELOW (Just uncomment the lines below for HTTP :)) #list1 = readPcap('data/HTTPtext_V1.pcap') #list2 = readPcap('data/HTTPtext_V2.pcap') #list3 = readPcap('data/modified_new3_simple_http.pcap') #list4 = readPcap('data/modified_new4_simple_http.pcap') #list5 = readPcap('data/modified_new5_simple_http.pcap') #list6 = readPcap('data/modified_new6_simple_http.pcap') #list7 = readPcap('data/modified_new_simple_http.pcap') #payload_list.extend(list1) #payload_list.extend(list2) #payload_list.extend(list3) #payload_list.extend(list4) #payload_list.extend(list5) #payload_list.extend(list6) #payload_list.extend(list7) return payload_list def read_attack_data(filename): listl = open(filename) listl1 = listl.read() return [listl1]

CS6262_p5/PAYL/artificial-profile.pcap

CS6262_p5/PAYL/HTTP_data/modified_new3_simple_http.pcap

CS6262_p5/PAYL/HTTP_data/HTTPtext_V2.pcap

CS6262_p5/PAYL/HTTP_data/HTTPtext_V1.pcap

CS6262_p5/PAYL/HTTP_data/modified_new_simple_http.pcap

CS6262_p5/PAYL/HTTP_data/modified_new6_simple_http.pcap

CS6262_p5/PAYL/HTTP_data/modified_new5_simple_http.pcap

CS6262_p5/PAYL/HTTP_data/modified_new4_simple_http.pcap

CS6262_p5/PAYL/IRC_data/all_irc_data.pcap