Skip to content

Commit

Permalink
Distinguish betwen strings and attributes (#388)
Browse files Browse the repository at this point in the history
Much of the code is now based on converting attributes and/or identifier
nodes into strings to identify whether that string matches a suspicious
call as part of a Rule.

However, the code needs to distinguish between a string representing an
attribute/identifier and a true regular string.

To do this, a convenience utils class was added to detect true strings
from tree-sitter node text. Luckily they appear different because they
have extra quotes.

This should fix some critical false positive/negative cases where an
identifier assignment was to a string and not a suspicious function.

Signed-off-by: Eric Brown <[email protected]>
  • Loading branch information
ericwb authored Mar 26, 2024
1 parent 47eff81 commit 0e3a5ac
Show file tree
Hide file tree
Showing 13 changed files with 127 additions and 56 deletions.
27 changes: 25 additions & 2 deletions precli/core/argument.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright 2023 Secure Saurce LLC
# Copyright 2024 Secure Saurce LLC
from tree_sitter import Node

from precli.core import utils
from precli.parsers import tokens


Expand All @@ -17,6 +18,8 @@ def __init__(
self._position = position
self._value = value
self._ident_node = Argument._get_func_ident(self._node)
self._is_str = utils.is_str(value)
self._value_str = utils.to_str(value) if self._is_str else None

@staticmethod
def _get_func_ident(node: Node) -> Node:
Expand Down Expand Up @@ -78,12 +81,32 @@ def position(self) -> int:
"""
return self._position

@property
def is_str(self) -> bool:
"""
True if the value is a true string.
:return: if value is a string
:rtype: bool
"""
return self._is_str

@property
def value(self):
"""
The value of the argument
The value of the argument.
:return: value of argument
:rtype: object
"""
return self._value

@property
def value_str(self) -> str:
"""
The value as a true string.
:return: value as string
:rtype: str
"""
return self._value_str
46 changes: 46 additions & 0 deletions precli/core/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright 2024 Secure Saurce LLC


def is_str(value) -> bool:
"""
True if the value is a tree-sitter node string.
:return: if value is a string
:rtype: bool
"""
if isinstance(value, str) and (
value.startswith('b"""')
or value.startswith("b'''")
or value.startswith('b"')
or value.startswith("b'")
or value.startswith('"""')
or value.startswith("'''")
or value.startswith('"')
or value.startswith("'")
):
return True
return False


def to_str(value: str) -> str:
"""
Converts a tree-sitter node string value to a
true string.
:return: value as string
:rtype: str
"""
if isinstance(value, str):
value_str = value
bytestr = False
if value_str and value_str[0] == "b":
value_str = value_str[1:]
bytestr = True
if value_str.startswith('"""') or value_str.startswith("'''"):
value_str = value_str[3:-3]
elif value_str.startswith('"') or value_str.startswith("'"):
value_str = value_str[1:-1]
if bytestr is True:
value_str = bytes(value_str, encoding="utf-8")

return value_str
41 changes: 11 additions & 30 deletions precli/parsers/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,13 @@ def unchain(self, node: Node, result: list):
if child.type != tokens.ARGUMENT_LIST:
self.unchain(child, result)

def join_symbol(self, nodetext: str, symbol: Symbol):
if isinstance(symbol.value, str):
value = nodetext.replace(symbol.name, symbol.value, 1)
else:
value = symbol.value
return value

def resolve(self, node: Node, default=None):
"""
Resolve the given node into its liternal value.
Expand All @@ -362,33 +369,18 @@ def resolve(self, node: Node, default=None):
nodetext = node.children[0].text.decode()
symbol = self.get_qual_name(node.children[0])
if symbol is not None:
if isinstance(symbol.value, str):
value = nodetext.replace(
symbol.name, symbol.value, 1
)
else:
value = symbol.value
value = self.join_symbol(nodetext, symbol)
case tokens.ATTRIBUTE:
result = []
self.unchain(node, result)
nodetext = ".".join(result)
symbol = self.get_qual_name(node)
if symbol is not None:
if isinstance(symbol.value, str):
value = nodetext.replace(
symbol.name, symbol.value, 1
)
else:
value = symbol.value
value = self.join_symbol(nodetext, symbol)
case tokens.IDENTIFIER:
symbol = self.get_qual_name(node)
if symbol is not None:
if isinstance(symbol.value, str):
value = nodetext.replace(
symbol.name, symbol.value, 1
)
else:
value = symbol.value
value = self.join_symbol(nodetext, symbol)
case tokens.KEYWORD_ARGUMENT:
keyword = node.named_children[0].text.decode()
kwvalue = node.named_children[1]
Expand All @@ -407,18 +399,7 @@ def resolve(self, node: Node, default=None):
value += (self.resolve(child),)
case tokens.STRING:
# TODO: handle f-strings? (f"{a}")
bytestr = False
if nodetext and nodetext[0] == "b":
nodetext = nodetext[1:]
bytestr = True
if nodetext.startswith('"""') or nodetext.startswith(
"'''"
):
value = nodetext[3:-3]
elif nodetext.startswith('"') or nodetext.startswith("'"):
value = nodetext[1:-1]
if bytestr is True:
value = bytes(value, encoding="utf-8")
value = nodetext
case tokens.INTEGER:
# TODO: hex, octal, binary
try:
Expand Down
6 changes: 3 additions & 3 deletions precli/rules/python/stdlib/argparse_sensitive_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,9 @@ def analyze_call(self, context: dict, call: Call) -> Result:
action = call.get_argument(name="action")

if (
"--password" in [arg0.value, arg1.value]
or "--api-key" in [arg0.value, arg1.value]
) and action.value == "store":
"--password" in [arg0.value_str, arg1.value_str]
or "--api-key" in [arg0.value_str, arg1.value_str]
) and action.value_str == "store":
return Result(
rule_id=self.id,
location=Location(node=call.node),
Expand Down
12 changes: 6 additions & 6 deletions precli/rules/python/stdlib/hashlib_weak_hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,19 +141,19 @@ def analyze_call(self, context: dict, call: Call) -> Result:
dklen=None
)
"""
hash_name = call.get_argument(position=0, name="hash_name").value
argument = call.get_argument(position=0, name="hash_name")

if isinstance(hash_name, str) and hash_name.lower() in WEAK_HASHES:
if argument.is_str and argument.value_str.lower() in WEAK_HASHES:
return Result(
rule_id=self.id,
location=Location(node=call.function_node),
message=self.message.format(hash_name),
message=self.message.format(argument.value_str),
)
elif call.name_qualified in ["hashlib.new"]:
# hashlib.new(name, data=b'', **kwargs)
name = call.get_argument(position=0, name="name").value
argument = call.get_argument(position=0, name="name")

if isinstance(name, str) and name.lower() in WEAK_HASHES:
if argument.is_str and argument.value_str.lower() in WEAK_HASHES:
used_for_security = call.get_argument(
name="usedforsecurity", default=Argument(None, True)
).value
Expand All @@ -162,5 +162,5 @@ def analyze_call(self, context: dict, call: Call) -> Result:
return Result(
rule_id=self.id,
location=Location(node=call.function_node),
message=self.message.format(name),
message=self.message.format(argument.value_str),
)
26 changes: 16 additions & 10 deletions precli/rules/python/stdlib/hmac_weak_hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,26 +113,32 @@ def analyze_call(self, context: dict, call: Call) -> Result:
if call.name_qualified in ["hmac.new"]:
# hmac.new(key, msg=None, digestmod='')
argument = call.get_argument(position=2, name="digestmod")
digestmod = argument.value

if (
isinstance(digestmod, str) and digestmod.lower() in WEAK_HASHES
) or digestmod in HASHLIB_WEAK_HASHES:
if argument.is_str and argument.value_str.lower() in WEAK_HASHES:
return Result(
rule_id=self.id,
location=Location(node=argument.node),
message=self.message.format(digestmod),
message=self.message.format(argument.value_str),
)
if argument.value in HASHLIB_WEAK_HASHES:
return Result(
rule_id=self.id,
location=Location(node=argument.node),
message=self.message.format(argument.value),
)
elif call.name_qualified in ["hmac.digest"]:
# hmac.digest(key, msg, digest)
argument = call.get_argument(position=2, name="digest")
digest = argument.value

if (
isinstance(digest, str) and digest.lower() in WEAK_HASHES
) or digest in HASHLIB_WEAK_HASHES:
if argument.is_str and argument.value_str.lower() in WEAK_HASHES:
return Result(
rule_id=self.id,
location=Location(node=argument.node),
message=self.message.format(argument.value_str),
)
if argument.value in HASHLIB_WEAK_HASHES:
return Result(
rule_id=self.id,
location=Location(node=argument.node),
message=self.message.format(digest),
message=self.message.format(argument.value),
)
5 changes: 4 additions & 1 deletion precli/rules/python/stdlib/http_server_unrestricted_bind.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def run(server_class: HTTPServer, handler_class: BaseHTTPRequestHandler):
_New in version 0.3.14_
""" # noqa: E501
from precli.core import utils
from precli.core.call import Call
from precli.core.location import Location
from precli.core.result import Result
Expand Down Expand Up @@ -94,7 +95,9 @@ def analyze_call(self, context: dict, call: Call) -> Result:
arg = call.get_argument(position=0, name="server_address")
server_address = arg.value

if isinstance(server_address, tuple) and server_address[0] in (
if isinstance(server_address, tuple) and utils.to_str(
server_address[0]
) in (
"",
INADDR_ANY,
IN6ADDR_ANY,
Expand Down
5 changes: 4 additions & 1 deletion precli/rules/python/stdlib/http_url_secret.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,10 @@ def analyze_call(self, context: dict, call: Call) -> Result:
return

argument = call.get_argument(position=1, name="url")
url = argument.value
if argument.is_str is False:
return

url = argument.value_str
split_url = urlsplit(url)
query = split_url.query
params = parse_qs(query)
Expand Down
3 changes: 2 additions & 1 deletion precli/rules/python/stdlib/socket_unrestricted_bind.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
_New in version 0.3.14_
""" # noqa: E501
from precli.core import utils
from precli.core.call import Call
from precli.core.location import Location
from precli.core.result import Result
Expand Down Expand Up @@ -90,7 +91,7 @@ def analyze_call(self, context: dict, call: Call) -> Result:
arg = call.get_argument(position=0, name="address")
address = arg.value

if isinstance(address, tuple) and address[0] in (
if isinstance(address, tuple) and utils.to_str(address[0]) in (
"",
INADDR_ANY,
IN6ADDR_ANY,
Expand Down
5 changes: 4 additions & 1 deletion precli/rules/python/stdlib/socketserver_unrestricted_bind.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def handle(self):
_New in version 0.3.14_
""" # noqa: E501
from precli.core import utils
from precli.core.call import Call
from precli.core.location import Location
from precli.core.result import Result
Expand Down Expand Up @@ -116,7 +117,9 @@ def analyze_call(self, context: dict, call: Call) -> Result:
arg = call.get_argument(position=0, name="server_address")
server_address = arg.value

if isinstance(server_address, tuple) and server_address[0] in (
if isinstance(server_address, tuple) and utils.to_str(
server_address[0]
) in (
"",
INADDR_ANY,
IN6ADDR_ANY,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def run(server_class: DocXMLRPCServer, handler_class: DocXMLRPCRequestHandler):
_New in version 0.3.14_
""" # noqa: E501
from precli.core import utils
from precli.core.call import Call
from precli.core.location import Location
from precli.core.result import Result
Expand Down Expand Up @@ -94,7 +95,7 @@ def analyze_call(self, context: dict, call: Call) -> Result:
arg = call.get_argument(position=0, name="addr")
addr = arg.value

if isinstance(addr, tuple) and addr[0] in (
if isinstance(addr, tuple) and utils.to_str(addr[0]) in (
"",
INADDR_ANY,
IN6ADDR_ANY,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# level: NONE
hashlib = "hashlib"
hashlib.md5()
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def test_rule_meta(self):
"hashlib_blake2s.py",
"hashlib_md4.py",
"hashlib_md5.py",
"hashlib_md5_as_identifier.py",
"hashlib_md5_usedforsecurity_true.py",
"hashlib_new_blake2b.py",
"hashlib_new_blake2s.py",
Expand Down

0 comments on commit 0e3a5ac

Please sign in to comment.