Explore ATT&CK Data Sources#

Goals:#

Access ATT&CK data sources in STIX format via a public TAXII server
Learn to interact with ATT&CK data all at once
Explore and idenfity patterns in the data retrieved
Learn more about ATT&CK data sources

Import ATT&CK API Client#

from attackcti import attack_client

Import Extra Libraries#

from pandas import *
import numpy as np
import json

import altair as alt
alt.renderers.enable('default')

import itertools

import logging
logging.getLogger('taxii2client').setLevel(logging.CRITICAL)

Initialize ATT&CK Client Class#

lift = attack_client()

Get All Techniques#

all_techniques = lift.get_techniques()

Convert Techniques to Dataframe and Update Techniques Objects#

Normalizing semi-structured JSON data into a flat table via pandas.io.json.json_normalize

Reference: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.io.json.json_normalize.html

temp_list = []
for t in all_techniques:
    matrix_name = t['external_references'][0]['source_name']
    technique_number = t['external_references'][0]['external_id']
    if 'x_mitre_data_sources' in t.keys():
        data_sources = list(set([ds.split(':')[0] for ds in t['x_mitre_data_sources']]))
        t = t.new_version(x_mitre_data_sources = data_sources)
    t = t.new_version(matrix = matrix_name)
    t = t.new_version(technique_id = technique_number)
    temp_list.append(json.loads(t.serialize()))
techniques = pandas.json_normalize(temp_list)
techniques.rename(columns = {'x_mitre_platforms':'platform', 'kill_chain_phases':'tactic', 'name':'technique', 'x_mitre_data_sources':'data_sources'}, inplace = True)

techniques = techniques.reindex(['matrix','platform','tactic','technique','technique_id','data_sources'], axis=1)
techniques.head()

	matrix	platform	tactic	technique	technique_id	data_sources
0	mitre-attack	[macOS]	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Resource Forking	T1564.009	[Command, Process, File]
1	mitre-attack	[Windows, Linux, macOS]	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Downgrade Attack	T1562.010	[Process, Command]
2	mitre-attack	[macOS]	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Login Items	T1547.015	[Process, File]
3	mitre-attack	[macOS, Linux, Windows]	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Reflective Code Loading	T1620	[Module, Process, Script]
4	mitre-attack	[IaaS]	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Cloud Storage Object Discovery	T1619	[Cloud Storage]

print('A total of ',len(techniques),' techniques')

A total of  736  techniques

Techniques Per Matrix#

Using altair python library we can start showing a few charts stacking the number of techniques with or without data sources. Reference: https://altair-viz.github.io/

data = techniques
data_2 = data.groupby(['matrix'])['technique'].count()
data_3 = data_2.to_frame().reset_index()
data_3

	matrix	technique
0	mitre-attack	566
1	mitre-ics-attack	78
2	mitre-mobile-attack	92

alt.Chart(data_3).mark_bar().encode(x='technique', y='matrix', color='matrix').properties(height = 200)

Techniques With and Without Data Sources#

data_source_distribution = pandas.DataFrame({
    'Techniques': ['Without DS','With DS'],
    'Count of Techniques': [techniques['data_sources'].isna().sum(),techniques['data_sources'].notna().sum()]})
bars = alt.Chart(data_source_distribution).mark_bar().encode(x='Techniques',y='Count of Techniques',color='Techniques').properties(width=200,height=300)
text = bars.mark_text(align='center',baseline='middle',dx=0,dy=-5).encode(text='Count of Techniques')
bars + text

What is the distribution of techniques based on ATT&CK Matrix?

data = techniques
data['Count_DS'] = data['data_sources'].str.len()
data['Ind_DS'] = np.where(data['Count_DS']>0,'With DS','Without DS')
data_2 = data.groupby(['matrix','Ind_DS'])['technique'].count()
data_3 = data_2.to_frame().reset_index()
data_3

	matrix	Ind_DS	technique
0	mitre-attack	With DS	520
1	mitre-attack	Without DS	46
2	mitre-ics-attack	With DS	63
3	mitre-ics-attack	Without DS	15
4	mitre-mobile-attack	Without DS	92

alt.Chart(data_3).mark_bar().encode(x='technique', y='Ind_DS', color='matrix').properties(height = 200)

What are those mitre-attack techniques without data sources?

data[(data['matrix']=='mitre-attack') & (data['Ind_DS']=='Without DS')][0:5]

	matrix	platform	tactic	technique	technique_id	data_sources	Count_DS	Ind_DS
58	mitre-attack	[PRE]	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Vulnerabilities	T1588.006	NaN	NaN	Without DS
66	mitre-attack	[PRE]	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Purchase Technical Data	T1597.002	NaN	NaN	Without DS
67	mitre-attack	[PRE]	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Threat Intel Vendors	T1597.001	NaN	NaN	Without DS
68	mitre-attack	[PRE]	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Search Closed Sources	T1597	NaN	NaN	Without DS
69	mitre-attack	[PRE]	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Scan Databases	T1596.005	NaN	NaN	Without DS

Techniques without data sources#

techniques_without_data_sources=techniques[techniques.data_sources.isnull()].reset_index(drop=True)

techniques_without_data_sources.head()

	matrix	platform	tactic	technique	technique_id	data_sources	Count_DS	Ind_DS
0	mitre-attack	[PRE]	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Vulnerabilities	T1588.006	NaN	NaN	Without DS
1	mitre-attack	[PRE]	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Purchase Technical Data	T1597.002	NaN	NaN	Without DS
2	mitre-attack	[PRE]	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Threat Intel Vendors	T1597.001	NaN	NaN	Without DS
3	mitre-attack	[PRE]	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Search Closed Sources	T1597	NaN	NaN	Without DS
4	mitre-attack	[PRE]	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Scan Databases	T1596.005	NaN	NaN	Without DS

print('There are ',techniques['data_sources'].isna().sum(),' techniques without data sources (',"{0:.0%}".format(techniques['data_sources'].isna().sum()/len(techniques)),' of ',len(techniques),' techniques)')

There are  153  techniques without data sources ( 21%  of  736  techniques)

Techniques With Data Sources#

techniques_with_data_sources=techniques[techniques.data_sources.notnull()].reset_index(drop=True)

techniques_with_data_sources.head()

	matrix	platform	tactic	technique	technique_id	data_sources	Count_DS	Ind_DS
0	mitre-attack	[macOS]	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Resource Forking	T1564.009	[Command, Process, File]	3.0	With DS
1	mitre-attack	[Windows, Linux, macOS]	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Downgrade Attack	T1562.010	[Process, Command]	2.0	With DS
2	mitre-attack	[macOS]	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Login Items	T1547.015	[Process, File]	2.0	With DS
3	mitre-attack	[macOS, Linux, Windows]	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Reflective Code Loading	T1620	[Module, Process, Script]	3.0	With DS
4	mitre-attack	[IaaS]	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Cloud Storage Object Discovery	T1619	[Cloud Storage]	1.0	With DS

print('There are ',techniques['data_sources'].notna().sum(),' techniques with data sources (',"{0:.0%}".format(techniques['data_sources'].notna().sum()/len(techniques)),' of ',len(techniques),' techniques)')

There are  583  techniques with data sources ( 79%  of  736  techniques)

Grouping Techniques With Data Sources By Matrix#

Let’s create a graph to represent the number of techniques per matrix:

matrix_distribution = pandas.DataFrame({
    'Matrix': list(techniques_with_data_sources.groupby(['matrix'])['matrix'].count().keys()),
    'Count of Techniques': techniques_with_data_sources.groupby(['matrix'])['matrix'].count().tolist()})
bars = alt.Chart(matrix_distribution).mark_bar().encode(y='Matrix',x='Count of Techniques').properties(width=300,height=100)
text = bars.mark_text(align='center',baseline='middle',dx=10,dy=0).encode(text='Count of Techniques')
bars + text

All the techniques belong to mitre-attack matrix which is the main Enterprise matrix. Reference: https://attack.mitre.org/wiki/Main_Page

Grouping Techniques With Data Sources by Platform#

First, we need to split the platform column values because a technique might be mapped to more than one platform

techniques_platform=techniques_with_data_sources

attributes_1 = ['platform'] # In attributes we are going to indicate the name of the columns that we need to split

for a in attributes_1:
    s = techniques_platform.apply(lambda x: pandas.Series(x[a]),axis=1).stack().reset_index(level=1, drop=True)
    # "s" is going to be a column of a frame with every value of the list inside each cell of the column "a"
    s.name = a
    # We name "s" with the same name of "a".
    techniques_platform=techniques_platform.drop(a, axis=1).join(s).reset_index(drop=True)
    # We drop the column "a" from "techniques_platform", and then join "techniques_platform" with "s"

# Let's re-arrange the columns from general to specific
techniques_platform_2=techniques_platform.reindex(['matrix','platform','tactic','technique','technique_id','data_sources'], axis=1)

We can now show techniques with data sources mapped to one platform at the time

techniques_platform_2.head()

	matrix	platform	tactic	technique	technique_id	data_sources
0	mitre-attack	macOS	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Resource Forking	T1564.009	[Command, Process, File]
1	mitre-attack	Windows	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Downgrade Attack	T1562.010	[Process, Command]
2	mitre-attack	Linux	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Downgrade Attack	T1562.010	[Process, Command]
3	mitre-attack	macOS	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Downgrade Attack	T1562.010	[Process, Command]
4	mitre-attack	macOS	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Login Items	T1547.015	[Process, File]

Let’s create a visualization to show the number of techniques grouped by platform:

platform_distribution = pandas.DataFrame({
    'Platform': list(techniques_platform_2.groupby(['platform'])['platform'].count().keys()),
    'Count of Techniques': techniques_platform_2.groupby(['platform'])['platform'].count().tolist()})
bars = alt.Chart(platform_distribution,height=300).mark_bar().encode(x ='Platform',y='Count of Techniques',color='Platform').properties(width=200)
text = bars.mark_text(align='center',baseline='middle',dx=0,dy=-5).encode(text='Count of Techniques')
bars + text

In the bar chart above we can see that there are more techniques with data sources mapped to the Windows platform.

Defende-evasion and Persistence are tactics with the highest nummber of techniques with data sources

Grouping Techniques With Data Sources by Data Source#

We need to split the data source column values because a technique might be mapped to more than one data source:

techniques_data_source=techniques_with_data_sources

attributes_3 = ['data_sources'] # In attributes we are going to indicate the name of the columns that we need to split

for a in attributes_3:
    s = techniques_data_source.apply(lambda x: pandas.Series(x[a]),axis=1).stack().reset_index(level=1, drop=True)
    # "s" is going to be a column of a frame with every value of the list inside each cell of the column "a"
    s.name = a
    # We name "s" with the same name of "a".
    techniques_data_source = techniques_data_source.drop(a, axis=1).join(s).reset_index(drop=True)
    # We drop the column "a" from "techniques_data_source", and then join "techniques_data_source" with "s"

# Let's re-arrange the columns from general to specific
techniques_data_source_2 = techniques_data_source.reindex(['matrix','platform','tactic','technique','technique_id','data_sources'], axis=1)

# We are going to edit some names inside the dataframe to improve the consistency:
techniques_data_source_3 = techniques_data_source_2.replace(['Process monitoring','Application logs'],['Process Monitoring','Application Logs'])

We can now show techniques with data sources mapped to one data source at the time

techniques_data_source_3.head()

	matrix	platform	tactic	technique	technique_id	data_sources
0	mitre-attack	[macOS]	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Resource Forking	T1564.009	Command
1	mitre-attack	[macOS]	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Resource Forking	T1564.009	Process
2	mitre-attack	[macOS]	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Resource Forking	T1564.009	File
3	mitre-attack	[Windows, Linux, macOS]	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Downgrade Attack	T1562.010	Process
4	mitre-attack	[Windows, Linux, macOS]	[{'kill_chain_name': 'mitre-attack', 'phase_na...	Downgrade Attack	T1562.010	Command

Let’s create a visualization to show the number of techniques grouped by data sources:

data_source_distribution = pandas.DataFrame({
    'Data Source': list(techniques_data_source_3.groupby(['data_sources'])['data_sources'].count().keys()),
    'Count of Techniques': techniques_data_source_3.groupby(['data_sources'])['data_sources'].count().tolist()})
bars = alt.Chart(data_source_distribution,width=800,height=300).mark_bar().encode(x ='Data Source',y='Count of Techniques',color='Data Source').properties(width=1200)
text = bars.mark_text(align='center',baseline='middle',dx=0,dy=-5).encode(text='Count of Techniques')
bars + text

A few interesting things from the bar chart above:

Process Monitoring, File Monitoring, and Process Command-line parameters are the Data Sources with the highest number of techniques
There are some data source names that include string references to Windows such as PowerShell, Windows and wmi

Most Relevant Groups Of Data Sources Per Technique#

Number Of Data Sources Per Technique#

Although identifying the data sources with the highest number of techniques is a good start, they usually do not work alone. You might be collecting Process Monitoring already but you might be still missing a lot of context from a data perspective.

data_source_distribution_2 = pandas.DataFrame({
    'Techniques': list(techniques_data_source_3.groupby(['technique'])['technique'].count().keys()),
    'Count of Data Sources': techniques_data_source_3.groupby(['technique'])['technique'].count().tolist()})

data_source_distribution_3 = pandas.DataFrame({
    'Number of Data Sources': list(data_source_distribution_2.groupby(['Count of Data Sources'])['Count of Data Sources'].count().keys()),
    'Count of Techniques': data_source_distribution_2.groupby(['Count of Data Sources'])['Count of Data Sources'].count().tolist()})

bars = alt.Chart(data_source_distribution_3).mark_bar().encode(x ='Number of Data Sources',y='Count of Techniques').properties(width=500)
text = bars.mark_text(align='center',baseline='middle',dx=0,dy=-5).encode(text='Count of Techniques')
bars + text

The image above shows you the number data sources needed per techniques according to ATT&CK:

There are 71 techniques that require 3 data sources as enough context to validate the detection of them according to ATT&CK
Only one technique has 12 data sources
One data source only applies to 19 techniques

Let’s create subsets of data sources with the data source column defining and using a python function:

# https://stackoverflow.com/questions/26332412/python-recursive-function-to-display-all-subsets-of-given-set
def subs(l):
    res = []
    for i in range(1, len(l) + 1):
        for combo in itertools.combinations(l, i):
            res.append(list(combo))
    return res

Before applying the function, we need to use lowercase data sources names and sort data sources names to improve consistency:

df = techniques_with_data_sources[['data_sources']]

for index, row in df.iterrows():
    row["data_sources"]=[x.lower() for x in row["data_sources"]]
    row["data_sources"].sort()

df.head()

	data_sources
0	[command, file, process]
1	[command, process]
2	[file, process]
3	[module, process, script]
4	[cloud storage]

Let’s apply the function and split the subsets column:

df['subsets']=df['data_sources'].apply(subs)

<ipython-input-32-9765a9dc0b2f>:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['subsets']=df['data_sources'].apply(subs)

df.head()

	data_sources	subsets
0	[command, file, process]	[[command], [file], [process], [command, file]...
1	[command, process]	[[command], [process], [command, process]]
2	[file, process]	[[file], [process], [file, process]]
3	[module, process, script]	[[module], [process], [script], [module, proce...
4	[cloud storage]	[[cloud storage]]

We need to split the subsets column values:

techniques_with_data_sources_preview = df

attributes_4 = ['subsets']

for a in attributes_4:
    s = techniques_with_data_sources_preview.apply(lambda x: pandas.Series(x[a]),axis=1).stack().reset_index(level=1, drop=True)
    s.name = a
    techniques_with_data_sources_preview = techniques_with_data_sources_preview.drop(a, axis=1).join(s).reset_index(drop=True)
    
techniques_with_data_sources_subsets = techniques_with_data_sources_preview.reindex(['data_sources','subsets'], axis=1)

techniques_with_data_sources_subsets.head()

	data_sources	subsets
0	[command, file, process]	[command]
1	[command, file, process]	[file]
2	[command, file, process]	[process]
3	[command, file, process]	[command, file]
4	[command, file, process]	[command, process]

Let’s add three columns to analyse the dataframe: subsets_name (Changing Lists to Strings), subsets_number_elements ( Number of data sources per subset) and number_data_sources_per_technique

techniques_with_data_sources_subsets['subsets_name']=techniques_with_data_sources_subsets['subsets'].apply(lambda x: ','.join(map(str, x)))
techniques_with_data_sources_subsets['subsets_number_elements']=techniques_with_data_sources_subsets['subsets'].str.len()
techniques_with_data_sources_subsets['number_data_sources_per_technique']=techniques_with_data_sources_subsets['data_sources'].str.len()

techniques_with_data_sources_subsets.head()

	data_sources	subsets	subsets_name	subsets_number_elements	number_data_sources_per_technique
0	[command, file, process]	[command]	command	1	3
1	[command, file, process]	[file]	file	1	3
2	[command, file, process]	[process]	process	1	3
3	[command, file, process]	[command, file]	command,file	2	3
4	[command, file, process]	[command, process]	command,process	2	3

As it was described above, we need to find grups pf data sources, so we are going to filter out all the subsets with only one data source:

subsets = techniques_with_data_sources_subsets

subsets_ok=subsets[subsets.subsets_number_elements != 1]

subsets_ok.head()

	data_sources	subsets	subsets_name	subsets_number_elements	number_data_sources_per_technique
3	[command, file, process]	[command, file]	command,file	2	3
4	[command, file, process]	[command, process]	command,process	2	3
5	[command, file, process]	[file, process]	file,process	2	3
6	[command, file, process]	[command, file, process]	command,file,process	3	3
9	[command, process]	[command, process]	command,process	2	2

Finally, we calculate the most relevant groups of data sources (Top 15):

subsets_graph = subsets_ok.groupby(['subsets_name'])['subsets_name'].count().to_frame(name='subsets_count').sort_values(by='subsets_count',ascending=False)[0:15]

subsets_graph

	subsets_count
subsets_name
command,process	206
command,file	131
file,process	118
command,file,process	90
command,windows registry	60
process,windows registry	59
command,process,windows registry	53
application log,network traffic	48
command,network traffic	45
network traffic,process	44
module,process	44
file,network traffic	39
file,windows registry	37
file,process,windows registry	33
command,module	31

subsets_graph_2 = pandas.DataFrame({
    'Data Sources': list(subsets_graph.index),
    'Count of Techniques': subsets_graph['subsets_count'].tolist()})

bars = alt.Chart(subsets_graph_2).mark_bar().encode(x ='Data Sources', y ='Count of Techniques', color='Data Sources').properties(width=500)
text = bars.mark_text(align='center',baseline='middle',dx= 0,dy=-5).encode(text='Count of Techniques')
bars + text

Group (Process Monitoring - Process Command-line parameters) is the is the group of data sources with the highest number of techniques. This group of data sources are suggested to hunt 78 techniques

Let’s Split all the Information About Techniques With Data Sources Defined: Matrix, Platform, Tactic and Data Source#

Let’s split all the relevant columns of the dataframe:

techniques_data = techniques_with_data_sources

attributes = ['platform','tactic','data_sources'] # In attributes we are going to indicate the name of the columns that we need to split

for a in attributes:
    s = techniques_data.apply(lambda x: pandas.Series(x[a]),axis=1).stack().reset_index(level=1, drop=True)
    # "s" is going to be a column of a frame with every value of the list inside each cell of the column "a"
    s.name = a
    # We name "s" with the same name of "a".
    techniques_data=techniques_data.drop(a, axis=1).join(s).reset_index(drop=True)
    # We drop the column "a" from "techniques_data", and then join "techniques_data" with "s"

# Let's re-arrange the columns from general to specific
techniques_data_2=techniques_data.reindex(['matrix','platform','tactic','technique','technique_id','data_sources'], axis=1)

# We are going to edit some names inside the dataframe to improve the consistency:
techniques_data_3 = techniques_data_2.replace(['Process monitoring','Application logs'],['Process Monitoring','Application Logs'])

techniques_data_3.head()

	matrix	platform	tactic	technique	technique_id	data_sources
0	mitre-attack	macOS	{'kill_chain_name': 'mitre-attack', 'phase_nam...	Resource Forking	T1564.009	Command
1	mitre-attack	macOS	{'kill_chain_name': 'mitre-attack', 'phase_nam...	Resource Forking	T1564.009	Process
2	mitre-attack	macOS	{'kill_chain_name': 'mitre-attack', 'phase_nam...	Resource Forking	T1564.009	File
3	mitre-attack	Windows	{'kill_chain_name': 'mitre-attack', 'phase_nam...	Downgrade Attack	T1562.010	Process
4	mitre-attack	Windows	{'kill_chain_name': 'mitre-attack', 'phase_nam...	Downgrade Attack	T1562.010	Command

Do you remember data sources names with a reference to Windows? After splitting the dataframe by platforms, tactics and data sources, are there any macOC or linux techniques that consider windows data sources? Let’s identify those rows:

# After splitting the rows of the dataframe, there are some values that relate windows data sources with platforms like linux and masOS.
# We need to identify those rows
conditions = [(techniques_data_3['platform']=='Linux')&(techniques_data_3['data_sources'].str.contains('windows',case=False)== True),
             (techniques_data_3['platform']=='macOS')&(techniques_data_3['data_sources'].str.contains('windows',case=False)== True),
             (techniques_data_3['platform']=='Linux')&(techniques_data_3['data_sources'].str.contains('powershell',case=False)== True),
             (techniques_data_3['platform']=='macOS')&(techniques_data_3['data_sources'].str.contains('powershell',case=False)== True),
             (techniques_data_3['platform']=='Linux')&(techniques_data_3['data_sources'].str.contains('wmi',case=False)== True),
             (techniques_data_3['platform']=='macOS')&(techniques_data_3['data_sources'].str.contains('wmi',case=False)== True)]
# In conditions we indicate a logical test

choices = ['NO OK','NO OK','NO OK','NO OK','NO OK','NO OK']
# In choices, we indicate the result when the logical test is true

techniques_data_3['Validation'] = np.select(conditions,choices,default='OK')
# We add a column "Validation" to "techniques_data_3" with the result of the logical test. The default value is going to be "OK"

What is the inconsistent data?

techniques_analysis_data_no_ok = techniques_data_3[techniques_data_3.Validation == 'NO OK']
# Finally, we are filtering all the values with NO OK

techniques_analysis_data_no_ok.head()

	matrix	platform	tactic	technique	technique_id	data_sources	Validation
31	mitre-attack	Linux	{'kill_chain_name': 'mitre-attack', 'phase_nam...	System Language Discovery	T1614.001	Windows Registry	NO OK
34	mitre-attack	macOS	{'kill_chain_name': 'mitre-attack', 'phase_nam...	System Language Discovery	T1614.001	Windows Registry	NO OK
68	mitre-attack	macOS	{'kill_chain_name': 'mitre-attack', 'phase_nam...	Code Signing Policy Modification	T1553.006	Windows Registry	NO OK
335	mitre-attack	Linux	{'kill_chain_name': 'mitre-attack', 'phase_nam...	Run Virtual Instance	T1564.006	Windows Registry	NO OK
340	mitre-attack	macOS	{'kill_chain_name': 'mitre-attack', 'phase_nam...	Run Virtual Instance	T1564.006	Windows Registry	NO OK

print('There are ',len(techniques_analysis_data_no_ok),' rows with inconsistent data')

There are  85  rows with inconsistent data

What is the impact of this inconsistent data from a platform and data sources perspective?

df = techniques_with_data_sources

attributes = ['platform','data_sources']

for a in attributes:
    s = df.apply(lambda x: pandas.Series(x[a]),axis=1).stack().reset_index(level=1, drop=True)
    s.name = a
    df=df.drop(a, axis=1).join(s).reset_index(drop=True)
    
df_2=df.reindex(['matrix','platform','tactic','technique','technique_id','data_sources'], axis=1)
df_3 = df_2.replace(['Process monitoring','Application logs'],['Process Monitoring','Application Logs'])

conditions = [(df_3['data_sources'].str.contains('windows',case=False)== True),
              (df_3['data_sources'].str.contains('powershell',case=False)== True),
              (df_3['data_sources'].str.contains('wmi',case=False)== True)]

choices = ['Windows','Windows','Windows']

df_3['Validation'] = np.select(conditions,choices,default='Other')
df_3['Num_Tech'] = 1
df_4 = df_3[df_3.Validation == 'Windows']
df_5 = df_4.groupby(['data_sources','platform'])['technique'].nunique()
df_6 = df_5.to_frame().reset_index()

alt.Chart(df_6).mark_bar().encode(x=alt.X('technique', stack="normalize"),    y='data_sources',    color='platform').properties(height=200)

There are techniques that consider Windows Error Reporting, Windows Registry, and Windows event logs as data sources and they also consider platforms like Linux and masOS. We do not need to consider this rows because those data sources can only be managed at a Windows environment. These are the techniques that we should not consider in our data base:

techniques_analysis_data_no_ok[['technique','data_sources']].drop_duplicates().sort_values(by='data_sources',ascending=True)

	technique	data_sources
2558	Event Triggered Execution	WMI
31	System Language Discovery	Windows Registry
3911	Input Capture	Windows Registry
3818	Indicator Removal on Host	Windows Registry
3543	Two-Factor Authentication Interception	Windows Registry
3374	Browser Extensions	Windows Registry
3123	Service Stop	Windows Registry
3108	Inhibit System Recovery	Windows Registry
2700	Create or Modify System Process	Windows Registry
2560	Event Triggered Execution	Windows Registry
2519	Boot or Logon Autostart Execution	Windows Registry
2241	Abuse Elevation Control Mechanism	Windows Registry
2084	Unsecured Credentials	Windows Registry
2007	Subvert Trust Controls	Windows Registry
4103	Boot or Logon Initialization Scripts	Windows Registry
1815	Keylogging	Windows Registry
1692	Adversary-in-the-Middle	Windows Registry
1396	Impair Defenses	Windows Registry
1350	Disable or Modify Tools	Windows Registry
1322	Disable or Modify System Firewall	Windows Registry
1313	Install Root Certificate	Windows Registry
1177	Hide Artifacts	Windows Registry
951	System Services	Windows Registry
831	Hijack Execution Flow	Windows Registry
730	Hidden Users	Windows Registry
415	Indicator Blocking	Windows Registry
348	Hidden File System	Windows Registry
335	Run Virtual Instance	Windows Registry
68	Code Signing Policy Modification	Windows Registry
1740	Modify Authentication Process	Windows Registry
4286	OS Credential Dumping	Windows Registry

Without considering this inconsistent data, the final dataframe is:

techniques_analysis_data_ok = techniques_data_3[techniques_data_3.Validation == 'OK']
techniques_analysis_data_ok.head()

	matrix	platform	tactic	technique	technique_id	data_sources	Validation
0	mitre-attack	macOS	{'kill_chain_name': 'mitre-attack', 'phase_nam...	Resource Forking	T1564.009	Command	OK
1	mitre-attack	macOS	{'kill_chain_name': 'mitre-attack', 'phase_nam...	Resource Forking	T1564.009	Process	OK
2	mitre-attack	macOS	{'kill_chain_name': 'mitre-attack', 'phase_nam...	Resource Forking	T1564.009	File	OK
3	mitre-attack	Windows	{'kill_chain_name': 'mitre-attack', 'phase_nam...	Downgrade Attack	T1562.010	Process	OK
4	mitre-attack	Windows	{'kill_chain_name': 'mitre-attack', 'phase_nam...	Downgrade Attack	T1562.010	Command	OK

print('There are ',len(techniques_analysis_data_ok),' rows of data that you can play with')

There are  4703  rows of data that you can play with

Getting Techniques by Data Sources#

This function gets techniques’ information that includes specific data sources

data_source = 'PROCESS'

results = lift.get_techniques_by_data_sources(data_source)

len(results)

type(results)

list

results2 = lift.get_techniques_by_data_sources('pRoceSS','commAnd')

len(results2)

results2[1]

AttackPattern(type='attack-pattern', id='attack-pattern--824add00-99a1-4b15-9a2d-6c5683b7b497', created_by_ref='identity--c78cb6e5-0c4b-4611-8297-d1b8b55e40b5', created='2021-10-08T14:06:28.212Z', modified='2021-10-15T00:48:06.723Z', name='Downgrade Attack', description='Adversaries may downgrade or use a version of system features that may be outdated, vulnerable, and/or does not support updated security controls such as logging. For example, [PowerShell](https://attack.mitre.org/techniques/T1059/001) versions 5+ includes Script Block Logging (SBL) which can record executed script content. However, adversaries may attempt to execute a previous version of PowerShell that does not support SBL with the intent to [Impair Defenses](https://attack.mitre.org/techniques/T1562) while running malicious scripts that may have otherwise been detected.(Citation: CrowdStrike BGH Ransomware 2021)(Citation: Mandiant BYOL 2018)\n\nAdversaries may downgrade and use less-secure versions of various features of a system, such as [Command and Scripting Interpreter](https://attack.mitre.org/techniques/T1059)s or even network protocols that can be abused to enable [Adversary-in-the-Middle](https://attack.mitre.org/techniques/T1557).(Citation: Praetorian TLS Downgrade Attack 2014)', kill_chain_phases=[KillChainPhase(kill_chain_name='mitre-attack', phase_name='defense-evasion')], revoked=False, external_references=[ExternalReference(source_name='mitre-attack', url='https://attack.mitre.org/techniques/T1562/010', external_id='T1562.010'), ExternalReference(source_name='CrowdStrike BGH Ransomware 2021', description='Falcon Complete Team. (2021, May 11). Response When Minutes Matter: Rising Up Against Ransomware. Retrieved October 8, 2021.', url='https://www.crowdstrike.com/blog/how-falcon-complete-stopped-a-big-game-hunting-ransomware-attack/'), ExternalReference(source_name='Mandiant BYOL 2018', description='Kirk, N. (2018, June 18). Bring Your Own Land (BYOL) – A Novel Red Teaming Technique. Retrieved October 8, 2021.', url='https://www.mandiant.com/resources/bring-your-own-land-novel-red-teaming-technique'), ExternalReference(source_name='Praetorian TLS Downgrade Attack 2014', description='Praetorian. (2014, August 19). Man-in-the-Middle TLS Protocol Downgrade Attack. Retrieved October 8, 2021.', url='https://www.praetorian.com/blog/man-in-the-middle-tls-ssl-protocol-downgrade-attack/')], object_marking_refs=['marking-definition--fa42a846-8d90-4e51-bc29-71d5b4802168'], x_mitre_data_sources=['Command: Command Execution', 'Process: Process Metadata', 'Process: Process Creation'], x_mitre_detection='Monitor for commands or other activity that may be indicative of attempts to abuse older or deprecated technologies (ex: <code>powershell –v 2</code>). Also monitor for other abnormal events, such as execution of and/or processes spawning from a version of a tool that is not expected in the environment.', x_mitre_is_subtechnique=True, x_mitre_permissions_required=['User'], x_mitre_platforms=['Windows', 'Linux', 'macOS'], x_mitre_version='1.0')

ATTACK Python Client

Explore ATT&CK Data Sources

Contents