diff --git a/core/dom.py b/core/dom.py index 8e9b9dbc..21378abc 100644 --- a/core/dom.py +++ b/core/dom.py @@ -8,7 +8,6 @@ def dom(response): sources = r'''document\.(URL|documentURI|URLUnencoded|baseURI|cookie|referrer)|location\.(href|search|hash|pathname)|window\.name|history\.(pushState|replaceState)(local|session)Storage''' sinks = r'''eval|evaluate|execCommand|assign|navigate|getResponseHeaderopen|showModalDialog|Function|set(Timeout|Interval|Immediate)|execScript|crypto.generateCRMFRequest|ScriptElement\.(src|text|textContent|innerText)|.*?\.onEventName|document\.(write|writeln)|.*?\.innerHTML|Range\.createContextualFragment|(document|window)\.location''' scripts = re.findall(r'(?i)(?s)]*>(.*?)', response) - sinkFound, sourceFound = False, False for script in scripts: script = script.split('\n') num = 1 @@ -32,7 +31,6 @@ def dom(response): for part in parts: if source in part: controlledVariables.add(re.search(r'[a-zA-Z$_][a-zA-Z0-9$_]+', part).group().replace('$', '\$')) - sourceFound = True line = line.replace(source, yellow + source + end) for controlledVariable in controlledVariables: allControlledVariables.add(controlledVariable) @@ -46,13 +44,12 @@ def dom(response): sink = newLine[grp.start():grp.end()].replace(' ', '') if sink: line = line.replace(sink, red + sink + end) - sinkFound = True if line != newLine: highlighted.append('%-3s %s' % (str(num), line.lstrip(' '))) num += 1 except MemoryError: pass - if sinkFound and sourceFound: + if (yellow and red) in highlighted: return highlighted else: return [] diff --git a/core/filterChecker.py b/core/filterChecker.py index 827c2160..5d29b6e5 100644 --- a/core/filterChecker.py +++ b/core/filterChecker.py @@ -2,32 +2,44 @@ def filterChecker(url, params, headers, GET, delay, occurences, timeout, encoding): - positions = occurences.keys() + positions = {} sortedEfficiencies = {} # adding < > to environments anyway because they can be used in all contexts environments = set(['<', '>']) - for i in range(len(positions)): + for i in range(len(occurences)): sortedEfficiencies[i] = {} - for i in occurences: - occurences[i]['score'] = {} - context = occurences[i]['context'] - if context == 'comment': + for i, occurence in zip(range(len(occurences)), occurences.values()): + environments.add(occurence['context'][1]) + location = occurence['context'][0] + try: + attributeName = list(occurence['context'][3].keys())[0] + attributeValue = list(occurence['context'][3].values())[0] + except AttributeError: + attributeName = occurence['context'][3] + positions[str(i)] = occurence['position'] + if location == 'comment': environments.add('-->') - elif context == 'script': - environments.add(occurences[i]['details']['quote']) + elif location == 'script': environments.add('') - elif context == 'attribute': - if occurences[i]['details']['type'] == 'value': - if occurences[i]['details']['name'] == 'srcdoc': # srcdoc attribute accepts html data with html entity encoding - environments.add('<') # so let's add the html entity - environments.add('>') # encoded versions of < and > - if occurences[i]['details']['quote']: - environments.add(occurences[i]['details']['quote']) + elif attributeName == 'srcdoc': # srcdoc attribute accepts html data with html entity encoding + environments.add('<') # so let's add the html entity + environments.add('>') # encoded versions of < and > + for environment in environments: - if environment: + if environment == '': + efficiencies = [100 for i in range(len(occurences))] + else: efficiencies = checker( url, params, headers, GET, delay, environment, positions, timeout, encoding) - efficiencies.extend([0] * (len(occurences) - len(efficiencies))) - for occurence, efficiency in zip(occurences, efficiencies): - occurences[occurence]['score'][environment] = efficiency + if len(efficiencies) < len(occurences): + for i in range(len(occurences) - len(efficiencies)): + efficiencies.append(0) + for i, efficiency in zip(range(len(efficiencies)), efficiencies): + try: + sortedEfficiencies[i][environment] = efficiency + except: + sortedEfficiencies[i] = {} + sortedEfficiencies[i][environment] = efficiency + for efficiency, occurence in zip(sortedEfficiencies.values(), occurences.values()): + occurence['score'] = efficiency return occurences diff --git a/core/generator.py b/core/generator.py index 353532c4..67490d38 100644 --- a/core/generator.py +++ b/core/generator.py @@ -9,120 +9,116 @@ def generator(occurences, response): vectors = {11: set(), 10: set(), 9: set(), 8: set(), 7: set(), 6: set(), 5: set(), 4: set(), 3: set(), 2: set(), 1: set()} for i in occurences: - context = occurences[i]['context'] + context = occurences[i]['context'][0] + breaker = occurences[i]['context'][1] + special = occurences[i]['context'][2] + try: + attributeName = list(occurences[i]['context'][3].keys())[0] + attributeValue = list(occurences[i]['context'][3].values())[0] + except AttributeError: + attributeName = occurences[i]['context'][3] + if special not in badTags: + special = '' + elif context == 'attribute': + special = '' + else: + special = '' if context == 'html': lessBracketEfficiency = occurences[i]['score']['<'] greatBracketEfficiency = occurences[i]['score']['>'] + breakerEfficiency = occurences[i]['score'][breaker] + if breaker == '\'' or breaker == '"': + breaker = '' + breakerEfficiency = 100 ends = ['//'] - badTag = occurences[i]['details']['badTag'] if 'badTag' in occurences[i]['details'] else '' if greatBracketEfficiency == 100: ends.append('>') - if lessBracketEfficiency: + if lessBracketEfficiency == breakerEfficiency == 100: payloads = genGen(fillings, eFillings, lFillings, - eventHandlers, tags, functions, ends, badTag) + eventHandlers, tags, functions, ends, breaker, special) for payload in payloads: vectors[10].add(payload) elif context == 'attribute': found = False - tag = occurences[i]['details']['tag'] - Type = occurences[i]['details']['type'] - quote = occurences[i]['details']['quote'] or '' - attributeName = occurences[i]['details']['name'] - attributeValue = occurences[i]['details']['value'] - quoteEfficiency = occurences[i]['score'][quote] if quote in occurences[i]['score'] else 100 + breakerEfficiency = occurences[i]['score'][breaker] greatBracketEfficiency = occurences[i]['score']['>'] ends = ['//'] if greatBracketEfficiency == 100: ends.append('>') - if greatBracketEfficiency == 100 and quoteEfficiency == 100: + if greatBracketEfficiency == 100 and breakerEfficiency == 100: payloads = genGen(fillings, eFillings, lFillings, - eventHandlers, tags, functions, ends) + eventHandlers, tags, functions, ends, breaker, special) for payload in payloads: - payload = quote + '>' + payload + if breaker: + payload = payload.replace(breaker, breaker + '>') + else: + payload = '>' + payload found = True - vectors[9].add(payload) - if quoteEfficiency == 100: + vectors[6].add(payload) + if breakerEfficiency == 100: for filling in fillings: for function in functions: - vector = quote + filling + r('autofocus') + \ - filling + r('onfocus') + '=' + quote + function + vector = breaker + filling + 'auTOfOcuS' + \ + filling + 'OnFoCUs' + '=' + breaker + function found = True - vectors[8].add(vector) - if quoteEfficiency == 90: + vectors[6].add(vector) + if breakerEfficiency == 90: for filling in fillings: for function in functions: - vector = '\\' + quote + filling + r('autofocus') + filling + \ - r('onfocus') + '=' + function + filling + '\\' + quote + vector = '\\' + breaker + filling + 'auTOfOcuS' + filling + \ + 'OnFoCUs' + '=' + function + filling + '\\' + breaker found = True - vectors[7].add(vector) - if Type == 'value': - if attributeName == 'srcdoc': - if occurences[i]['score']['<']: - if occurences[i]['score']['>']: - del ends[:] - ends.append('%26gt;') - payloads = genGen( - fillings, eFillings, lFillings, eventHandlers, tags, functions, ends) - for payload in payloads: - found = True - vectors[9].add(payload.replace('<', '%26lt;')) - elif attributeName == 'href' and attributeValue == xsschecker: - for function in functions: + vectors[6].add(vector) + if attributeName == 'srcdoc': + if occurences[i]['score']['<']: + if occurences[i]['score']['>']: + del ends[:] + ends.append('%26gt;') + payloads = genGen( + fillings, eFillings, lFillings, eventHandlers, tags, functions, ends, '', '') + for payload in payloads: found = True - vectors[10].add(r('javascript:') + function) - elif attributeName.startswith('on'): - closer = jsContexter(attributeValue) - quote = '' - for char in attributeValue.split(xsschecker)[1]: - if char in ['\'', '"', '`']: - quote = char - break + vectors[9].add(payload.replace('<', '%26lt;')) + if attributeName.startswith('on'): + closer = jsContexter(attributeValue) + breaker = '' + for char in attributeValue.split(xsschecker)[1]: + if char in ['\'', '"', '`']: + breaker = char + break + if closer: suffix = '//\\' for filling in jFillings: for function in functions: - vector = quote + closer + filling + function + suffix + vector = breaker + closer + filling + function + suffix if found: vectors[7].add(vector) else: vectors[9].add(vector) - if quoteEfficiency > 83: - suffix = '//' - for filling in jFillings: - for function in functions: - if '=' in function: - function = '(' + function + ')' - if quote == '': - filling = '' - vector = '\\' + quote + closer + filling + function + suffix - if found: - vectors[7].add(vector) - else: - vectors[9].add(vector) - elif tag in ('script', 'iframe', 'embed', 'object'): - if attributeName in ('src', 'iframe', 'embed') and attributeValue == xsschecker: - payloads = ['//15.rs', '\\/\\\\\\/\\15.rs'] - for payload in payloads: - vectors[10].add(payload) - elif tag == 'object' and attributeName == 'data' and attributeValue == xsschecker: + elif breakerEfficiency > 83: + suffix = '//' + for filling in jFillings: for function in functions: - found = True - vectors[10].add(r('javascript:') + function) - elif quoteEfficiency == greatBracketEfficiency == 100: - payloads = genGen(fillings, eFillings, lFillings, - eventHandlers, tags, functions, ends) - for payload in payloads: - payload = quote + '>' + r('') + payload - found = True - vectors[11].add(payload) + if '=' in function: + function = '(' + function + ')' + if breaker == '': + filling = '' + vector = '\\' + breaker + closer + filling + function + suffix + if found: + vectors[7].add(vector) + else: + vectors[9].add(vector) + elif context == 'comment': lessBracketEfficiency = occurences[i]['score']['<'] greatBracketEfficiency = occurences[i]['score']['>'] + breakerEfficiency = occurences[i]['score'][breaker] ends = ['//'] if greatBracketEfficiency == 100: ends.append('>') - if lessBracketEfficiency == 100: + if lessBracketEfficiency == breakerEfficiency == 100: payloads = genGen(fillings, eFillings, lFillings, - eventHandlers, tags, functions, ends) + eventHandlers, tags, functions, ends, breaker, special) for payload in payloads: vectors[10].add(payload) elif context == 'script': @@ -134,26 +130,27 @@ def generator(occurences, response): else: continue closer = jsContexter(script) - quote = occurences[i]['details']['quote'] scriptEfficiency = occurences[i]['score'][''] greatBracketEfficiency = occurences[i]['score']['>'] - breakerEfficiency = 100 - if quote: - breakerEfficiency = occurences[i]['score'][quote] + breakerEfficiency = occurences[i]['score'][breaker] ends = ['//'] if greatBracketEfficiency == 100: ends.append('>') if scriptEfficiency == 100: breaker = r('') payloads = genGen(fillings, eFillings, lFillings, - eventHandlers, tags, functions, ends) + eventHandlers, tags, functions, ends, breaker, special) for payload in payloads: vectors[10].add(payload) if closer: suffix = '//\\' + if not breaker: + closer = closer[1:] + if breakerEfficiency != 100: + breaker = '' for filling in jFillings: for function in functions: - vector = quote + closer + filling + function + suffix + vector = breaker + closer + filling + function + suffix vectors[7].add(vector) elif breakerEfficiency > 83: suffix = '//' @@ -161,9 +158,9 @@ def generator(occurences, response): for function in functions: if '=' in function: function = '(' + function + ')' - if quote == '': + if breaker == '': filling = '' - vector = '\\' + quote + closer + filling + function + suffix + vector = '\\' + breaker + closer + filling + function + suffix vectors[6].add(vector) index += 1 return vectors diff --git a/core/htmlParser.py b/core/htmlParser.py index 60e51409..a1d8815e 100644 --- a/core/htmlParser.py +++ b/core/htmlParser.py @@ -1,7 +1,7 @@ import re from core.config import badTags, xsschecker -from core.utils import isBadContext, equalize, escaped +from core.utils import isBadContext def htmlParser(response, encoding): @@ -9,82 +9,104 @@ def htmlParser(response, encoding): response = response.text # response content if encoding: # if the user has specified an encoding, encode the probe in that response = response.replace(encoding(xsschecker), xsschecker) - reflections = response.count(xsschecker) - position_and_context = {} - environment_details = {} - clean_response = re.sub(r'', '', response) - script_checkable = clean_response - for i in range(reflections): - occurence = re.search(r'(?i)(?s)]*>.*?(%s).*?' % xsschecker, script_checkable) - if occurence: - thisPosition = occurence.start(1) - position_and_context[thisPosition] = 'script' - environment_details[thisPosition] = {} - environment_details[thisPosition]['details'] = {'quote' : ''} - for i in range(len(occurence.group())): - currentChar = occurence.group()[i] - if currentChar in ('\'', '`', '"') and not escaped(i, occurence.group()): - environment_details[thisPosition]['details']['quote'] = currentChar - elif currentChar in (')', ']', '}', '}') and not escaped(i, occurence.group()): - break - script_checkable = script_checkable.replace(xsschecker, '', 1) - if len(position_and_context) < reflections: - attribute_context = re.finditer(r'<[^>]*?(%s)[^>]*?>' % xsschecker, clean_response) - for occurence in attribute_context: - match = occurence.group(0) - thisPosition = occurence.start(1) - parts = re.split(r'\s', match) - tag = parts[0][1:] - for part in parts: - if xsschecker in part: - Type, quote, name, value = '', '', '', '' - if '=' in part: - quote = re.search(r'=([\'`"])?', part).group(1) - name_and_value = part.split('=')[0], '='.join(part.split('=')[1:]) - if xsschecker == name_and_value[0]: - Type = 'name' - else: - Type = 'value' - name = name_and_value[0] - value = name_and_value[1].rstrip('>').rstrip(quote).lstrip(quote) - else: - Type = 'flag' - position_and_context[thisPosition] = 'attribute' - environment_details[thisPosition] = {} - environment_details[thisPosition]['details'] = {'tag' : tag, 'type' : Type, 'quote' : quote, 'value' : value, 'name' : name} - if len(position_and_context) < reflections: - html_context = re.finditer(xsschecker, clean_response) - for occurence in html_context: - thisPosition = occurence.start() - if thisPosition not in position_and_context: - position_and_context[occurence.start()] = 'html' - environment_details[thisPosition] = {} - environment_details[thisPosition]['details'] = {} - if len(position_and_context) < reflections: - comment_context = re.finditer(r'' % xsschecker, response) - for occurence in comment_context: - thisPosition = occurence.start(1) - position_and_context[thisPosition] = 'comment' - environment_details[thisPosition] = {} - environment_details[thisPosition]['details'] = {} - database = {} - for i in sorted(position_and_context): - database[i] = {} - database[i]['position'] = i - database[i]['context'] = position_and_context[i] - database[i]['details'] = environment_details[i]['details'] + tags = [] # tags in which the input is reflected + locations = [] # contexts in which the input is reflected + attributes = [] # attribute names + environments = [] # strings needed to break out of the context + positions = [] # postions of all the reflections of the xsschecker + for match in re.finditer(xsschecker, response): + positions.append(match.start()) - bad_contexts = re.finditer(r'(?s)(?i)<(style|template|textarea|title|noembed|noscript)>[.\s\S]*(%s)[.\s\S]*' % xsschecker, response) +# It finds the contexts of the reflections + + parts = response.split(xsschecker) + # remove first element since it doesn't contain xsschecker + parts.remove(parts[0]) + # add xsschecker in front of all elements + parts = [xsschecker + s for s in parts] + for part in parts: # iterate over the parts + deep = part.split('>') + if '' + break + num += 1 + if '<' not in response: + if rawResponse.headers['Content-Type'].startswith('text/html'): + location = 'html' + locations.append(location) # add location to locations list + + bad_contexts = re.finditer(r'''(?s)(?i)<(style|template|textarea|title|noembed|noscript)>[.\s\S]*(%s)[.\s\S]*''' % xsschecker, response) non_executable_contexts = [] for each in bad_contexts: non_executable_contexts.append([each.start(), each.end(), each.group(1)]) - - if non_executable_contexts: - for key in database.keys(): - position = database[key]['position'] - badTag = isBadContext(position, non_executable_contexts) - if badTag: - database[key]['details']['badTag'] = badTag - else: - database[key]['details']['badTag'] = '' - return database +# Finds the "environment" of reflections. is it within double quotes? Which tag contains the reflection? + num = 0 # dummy value to keep record of occurence being processed + # find xsschecker in response and return matches + for occ in re.finditer(xsschecker, response, re.IGNORECASE): + # convert "xsschecker to EOF" into a list + toLook = list(response[occ.end():]) + for loc in range(len(toLook)): # interate over the chars + if toLook[loc] in ('\'', '"', '`'): # if the char is a quote + environments.append(toLook[loc]) # add it to environments list + tokens = response.split('<') + goodTokens = [] # tokens which contain xsschecker + for token in tokens: # iterate over tokens + if xsschecker in token: # if xsschecker is in token + goodTokens.append(token) # add it to goodTokens list + # attributes and their values are generally seperated with space so... + attrs = token.split(' ') + for attr in attrs: # iterate over the attribute + if xsschecker in attr: # is xsschecker in this attribute? + # alright, this is the one we need + attributeName = attr.split('=')[0] + attributeValue = ''.join(attr.split('=')[1:]) + if attributeValue.startswith('\'') or attributeValue.startswith('"'): + attributeValue = attributeValue[1:-1] + attributes.append({attributeName:attributeValue}) + break + try: + # finds the tag "inside" which input is refelcted + tag = re.search(r'\w+', goodTokens[num]).group() + except IndexError: + try: + # finds the tag "inside" which input is refelcted + tag = re.search(r'\w+', goodTokens[num - 1]).group() + except IndexError: + tag = 'null' + tags.append(tag) # add the tag to the tags list + break + else: # if we encounter a closing angular brackt + # check if the next character to it is a / to make sure its a closing tag + badContext = isBadContext(positions[num], non_executable_contexts) + if badContext: + environments.append('') + else: + environments.append('') + tags.append('') + attributes.append('') + break + loc += 1 + num += 1 + occurences = {} # a dict to store all the collected information about the reflections + for i, loc, env, tag, attr, position in zip(range(len(locations)), locations, environments, tags, attributes, positions): + occurences[i] = {} + occurences[i]['position'] = position + if loc == 'comment': # if context is html comment + env = '-->' # add --> as environment as we need this to break out + occurences[i]['context'] = [loc, env, tag, attr] + return [occurences, positions] diff --git a/core/photon.py b/core/photon.py index 9a7b1ae9..53f9fc56 100644 --- a/core/photon.py +++ b/core/photon.py @@ -54,19 +54,16 @@ def rec(target): for link in matches: # iterate over the matches # remove everything after a "#" to deal with in-page anchors link = link.split('#')[0] - if link.endswith(('.pdf', '.png', '.jpg', '.jpeg', '.xls', '.xml', '.docx', '.doc')): - pass + if link[:4] == 'http': + if link.startswith(main_url): + storage.add(link) + elif link[:2] == '//': + if link.split('/')[2].startswith(host): + storage.add(schema + link) + elif link[:1] == '/': + storage.add(main_url + link) else: - if link[:4] == 'http': - if link.startswith(main_url): - storage.add(link) - elif link[:2] == '//': - if link.split('/')[2].startswith(host): - storage.add(schema + link) - elif link[:1] == '/': - storage.add(main_url + link) - else: - storage.add(main_url + '/' + link) + storage.add(main_url + '/' + link) for x in range(level): urls = storage - processed # urls to crawl = all urls - urls that have been crawled # for url in urls: diff --git a/core/utils.py b/core/utils.py index c987741f..2d59cee8 100644 --- a/core/utils.py +++ b/core/utils.py @@ -131,7 +131,7 @@ def flattenParams(currentParam, params, payload): return '?' + '&'.join(flatted) -def genGen(fillings, eFillings, lFillings, eventHandlers, tags, functions, ends, badTag=None): +def genGen(fillings, eFillings, lFillings, eventHandlers, tags, functions, ends, breaker, special): vectors = [] r = randomUpper # randomUpper randomly converts chars of a string to uppercase for tag in tags: @@ -150,10 +150,7 @@ def genGen(fillings, eFillings, lFillings, eventHandlers, tags, functions, ends, if tag == 'd3v' or tag == 'a': if '>' in ends: end = '>' # we can't use // as > with "a" or "d3v" tag - breaker = '' - if badTag: - breaker = '' - vector = breaker + '<' + r(tag) + filling + r( + vector = vector = r(breaker) + special + '<' + r(tag) + filling + r( eventHandler) + eFilling + '=' + eFilling + function + lFilling + end + bait vectors.append(vector) return vectors @@ -180,8 +177,6 @@ def getParams(url, data, GET): parts = data.split('&') for part in parts: each = part.split('=') - if len(each) < 2: - each.append('') try: params[each[0]] = each[1] except IndexError: @@ -217,20 +212,22 @@ def js_extractor(response): def handle_anchor(parent_url, url): + if parent_url.count('/') > 2: + replacable = re.search(r'/[^/]*?$', parent_url).group() + if replacable != '/': + parent_url = parent_url.replace(replacable, '') scheme = urlparse(parent_url).scheme if url[:4] == 'http': return url elif url[:2] == '//': return scheme + ':' + url - elif url.startswith('/'): - host = urlparse(parent_url).netloc - scheme = urlparse(parent_url).scheme - parent_url = scheme + '://' + host - return parent_url + url - elif parent_url.endswith('/'): + elif url[:1] == '/': return parent_url + url else: - return parent_url + '/' + url + if parent_url.endswith('/') or url.startswith('/'): + return parent_url + url + else: + return parent_url + '/' + url def deJSON(data): @@ -256,21 +253,3 @@ def isBadContext(position, non_executable_contexts): badContext = each[2] break return badContext - -def equalize(array, number): - if len(array) < number: - array.append('') - -def escaped(position, string): - usable = string[:position][::-1] - match = re.search(r'^\\*', usable) - if match: - match = match.group() - if len(match) == 1: - return True - elif len(match) % 2 == 0: - return False - else: - return True - else: - return False diff --git a/modes/crawl.py b/modes/crawl.py index 57efe2b3..81ffb307 100644 --- a/modes/crawl.py +++ b/modes/crawl.py @@ -41,8 +41,9 @@ def crawl(scheme, host, main_url, form, blindXSS, blindPayload, headers, delay, paramsCopy[paramName] = xsschecker response = requester( url, paramsCopy, headers, GET, delay, timeout) - occurences = htmlParser(response, encoding) - positions = occurences.keys() + parsedResponse = htmlParser(response, encoding) + occurences = parsedResponse[0] + positions = parsedResponse[1] efficiencies = filterChecker( url, paramsCopy, headers, GET, delay, occurences, timeout, encoding) vectors = generator(occurences, response.text) diff --git a/modes/scan.py b/modes/scan.py index e2030f84..c16aeccc 100644 --- a/modes/scan.py +++ b/modes/scan.py @@ -67,9 +67,11 @@ def scan(target, paramData, encoding, headers, delay, timeout, skipDOM, find, sk else: paramsCopy[paramName] = xsschecker response = requester(url, paramsCopy, headers, GET, delay, timeout) - occurences = htmlParser(response, encoding) - positions = occurences.keys() + parsedResponse = htmlParser(response, encoding) + occurences = parsedResponse[0] logger.debug('Scan occurences: {}'.format(occurences)) + positions = parsedResponse[1] + logger.debug('Scan positions: {}'.format(positions)) if not occurences: logger.error('No reflection found') continue