From a0fda96cc1ee722457eba43e063575ec5c935762 Mon Sep 17 00:00:00 2001 From: Andre Brait Date: Tue, 6 Feb 2024 15:17:14 +0100 Subject: [PATCH] pfBlockerNG: EasyList and Python mode improvements * Remove support for Python 2 * Fix TypeError on DHCP staticmap check (General) * Fix detection of EasyLists by pre-scanning the file (General) * Fix empty response when blocking alternating A and AAAA records (Python) * Support parsing and applying EasyList exclusions (Python) * Support parsing and applying EasyList regular expressions (Python) * Temporary workaround for cache hit invalidation (Python) * Add debug mode (Python) * Add tracing mode (Python) (manual, no GUI, not development only) * Improve logging and error handling (Python) * Make I/O operations asynchronous (Python) --- .../usr/local/pkg/pfblockerng/pfb_unbound.py | 1370 ++++++++++------- .../usr/local/pkg/pfblockerng/pfblockerng.inc | 552 +++++-- .../usr/local/pkg/pfblockerng/pfblockerng.sh | 262 +++- .../www/pfblockerng/pfblockerng_alerts.php | 4 +- .../www/pfblockerng/pfblockerng_dnsbl.php | 11 + .../local/www/pfblockerng/pfblockerng_log.php | 24 +- 6 files changed, 1535 insertions(+), 688 deletions(-) diff --git a/net/pfSense-pkg-pfBlockerNG-devel/files/usr/local/pkg/pfblockerng/pfb_unbound.py b/net/pfSense-pkg-pfBlockerNG-devel/files/usr/local/pkg/pfblockerng/pfb_unbound.py index fb4f9737a736..326ffc79dd28 100644 --- a/net/pfSense-pkg-pfBlockerNG-devel/files/usr/local/pkg/pfblockerng/pfb_unbound.py +++ b/net/pfSense-pkg-pfBlockerNG-devel/files/usr/local/pkg/pfblockerng/pfb_unbound.py @@ -19,6 +19,8 @@ # limitations under the License. from datetime import datetime +from functools import wraps +import traceback import logging import time import csv @@ -29,14 +31,7 @@ global pfb pfb = {} -if sys.version_info < (2, 8): - from ConfigParser import ConfigParser - pfb['py_v3'] = False -else: - from configparser import ConfigParser - pfb['py_v3'] = True - -from collections import defaultdict +from configparser import ConfigParser # Import additional python modules try: @@ -72,25 +67,65 @@ pfb['mod_sqlite3_e'] = e pass +try: + from concurrent.futures import ThreadPoolExecutor + pfb['async_io'] = True + pfb['async_io_executor'] = ThreadPoolExecutor(max_workers=1) +except Exception as e: + pfb['async_io'] = False + pfb['async_io_executor_e'] = e -def init_standard(id, env): - global pfb, rcodeDB, dataDB, zoneDB, regexDB, hstsDB, whiteDB, excludeDB, excludeAAAADB, excludeSS, dnsblDB, noAAAADB, gpListDB, safeSearchDB, feedGroupIndexDB, maxmindReader - - if not register_inplace_cb_reply(inplace_cb_reply, env, id): - log_info('[pfBlockerNG]: Failed register_inplace_cb_reply') - return False +def exception_logger(func): + @wraps(func) + def _log(*args, **kwargs): + try: + return func(*args, **kwargs) + except: + log_err('[pfBlockerNG]: Exception caught in Python module. Check the error log for details.') + sys.stderr.write("[pfBlockerNG]: Exception caught: \n\t{}".format('\t'.join(traceback.format_exc().splitlines(True)))) + raise + return _log + +def traced(func): + # This is mostly targeted at developers making changes to pfBlockerNG, so no UI is exposed + @wraps(func) + def _log(*args, **kwargs): + # Change this to False to enable logging + if True: + return func(*args, **kwargs) + + # Early check to prevent getting the name and locals if not needed + debug('Function call (func={}): args={}, kwargs={}', func.__name__, args, kwargs) + try: + result = func(*args, **kwargs) + debug('Function call (func={}) result: {}', func.__name__, result) + return result + except: + debug('Exception caught (func={}): \n\t{}', func.__name__, '\t'.join(traceback.format_exc().splitlines(True))) + raise - if not register_inplace_cb_reply_cache(inplace_cb_reply_cache, env, id): - log_info('[pfBlockerNG]: Failed register_inplace_cb_reply_cache') - return False + return _log - if not register_inplace_cb_reply_local(inplace_cb_reply_local, env, id): - log_info('[pfBlockerNG]: Failed register_inplace_cb_reply_local') - return False +def init_standard(id, env): + try: + bootstrap_logging() + except: + message = 'Exception caught\n\t{}\n'.format(timestamp, '\t'.join(traceback.format_exc().splitlines(True))) + log_err('[pfBlockerNG]: {}'.format(message)) + with open('/var/log/pfblockerng/py_error.log', 'a') as error_log: + timestamp = datetime.now().strftime("%b %-d %H:%M:%S") + error_log.write('{}|ERROR| {}'.format(timestamp, message)) + raise + init(id, env) - if not register_inplace_cb_reply_servfail(inplace_cb_reply_servfail, env, id): - log_info('[pfBlockerNG]: Failed register_inplace_cb_reply_servfail') - return False +def bootstrap_logging(): + global pfb + # Clear debug file + debug_logfile = '/var/log/pfblockerng/py_debug.log' + if os.path.isfile(debug_logfile): + os.remove(debug_logfile) + # Touch the file + open(debug_logfile, 'w').close() # Store previous error message to avoid repeating pfb['p_err'] = '' @@ -100,12 +135,25 @@ class log_stderr(object): def __init__(self, logger): self.logger = logger self.linebuf = '' + if pfb['async_io']: + self.executor = pfb['async_io_executor'] + else: + self.executor = None - def write(self, msg): + def _write(self, msg): if msg != pfb['p_err']: - self.logger.log(logging.ERROR, msg.rstrip()) + msg = msg.rstrip() + self.logger.log(logging.ERROR, msg) + _debug('[ERROR LOG]: {}', msg) pfb['p_err'] = msg + def write(self, msg): + if self.executor is not None: + self.executor.submit(self._write, msg) + else: + self._write(msg) + + # Create python error logfile logfile = '/var/log/pfblockerng/py_error.log' @@ -119,6 +167,27 @@ def write(self, msg): os.remove(logfile) sys.stderr = log_stderr(logging.getLogger('pfb_stderr')) +@traced +@exception_logger +def init(id, env): + global pfb, rcodeDB, dataDB, wildcardDataDB, zoneDB, regexDataDB, regexDB, hstsDB, whiteDB, wildcardWhiteDB, regexWhiteDB, excludeAAAADB, excludeSS, block_cache, exclusion_cache, noAAAADB, gpListDB, safeSearchDB, maxmindReader, segmentSizeDB + + if not register_inplace_cb_reply(inplace_cb_reply, env, id): + log_info('[pfBlockerNG]: Failed register_inplace_cb_reply') + return False + + if not register_inplace_cb_reply_cache(inplace_cb_reply_cache, env, id): + log_info('[pfBlockerNG]: Failed register_inplace_cb_reply_cache') + return False + + if not register_inplace_cb_reply_local(inplace_cb_reply_local, env, id): + log_info('[pfBlockerNG]: Failed register_inplace_cb_reply_local') + return False + + if not register_inplace_cb_reply_servfail(inplace_cb_reply_servfail, env, id): + log_info('[pfBlockerNG]: Failed register_inplace_cb_reply_servfail') + return False + # Validate write access to log files for l_file in ('dnsbl', 'dns_reply', 'unified'): lfile = '/var/log/pfblockerng/' + l_file + '.log' @@ -146,25 +215,20 @@ def write(self, msg): if not pfb['mod_sqlite3']: sys.stderr.write("[pfBlockerNG]: Failed to load python module 'sqlite3': {}" .format(pfb['mod_sqlite3_e'])) + if not pfb['async_io']: + sys.stderr.write("[pfBlockerNG]: Failed to create I/O Thread Pool Executor: {}" .format(pfb['async_io_executor_e'])) + # Initialize default settings pfb['dnsbl_ipv4'] = '' pfb['dnsbl_ipv6'] = '' - pfb['dataDB'] = False - pfb['zoneDB'] = False - pfb['hstsDB'] = False - pfb['whiteDB'] = False - pfb['regexDB'] = False - pfb['whiteDB'] = False - pfb['gpListDB'] = False - pfb['noAAAADB'] = False pfb['python_idn'] = False pfb['python_ipv6'] = False pfb['python_hsts'] = False pfb['python_reply'] = False pfb['python_cname'] = False - pfb['safeSearchDB'] = False pfb['group_policy'] = False pfb['python_enable'] = False + pfb['python_debug'] = False pfb['python_nolog'] = False pfb['python_control'] = False pfb['python_maxmind'] = False @@ -200,21 +264,39 @@ def write(self, msg): 'insurance', 'meet', 'new', 'page', 'play', 'search', 'youtube') # Initialize dicts/lists - dataDB = defaultdict(list) - zoneDB = defaultdict(list) - dnsblDB = defaultdict(list) - safeSearchDB = defaultdict(list) - feedGroupIndexDB = defaultdict(list) - - regexDB = defaultdict(str) - whiteDB = defaultdict(str) - hstsDB = defaultdict(str) - gpListDB = defaultdict(str) - noAAAADB = defaultdict(str) - feedGroupDB = defaultdict(str) - excludeDB = [] - excludeAAAADB = [] - excludeSS = [] + dataDB = dict() + wildcardDataDB = dict() + regexDataDB = dict() + whiteDB = dict() + wildcardWhiteDB = dict() + regexWhiteDB = dict() + zoneDB = dict() + safeSearchDB = dict() + segmentSizeDB = {'wildcardDataDB': pow(2, 32), 'wildcardWhiteDB': pow(2, 32), 'zoneDB': pow(2, 32)} + + regexDB = dict() + hstsDB = set() + gpListDB = set() + noAAAADB = dict() + excludeAAAADB = set() + excludeSS = set() + + exclusion_cache = dict() + block_cache = dict() + + # String deduplication for in-memory databases + # Less invasive than String interning, gets collected at the end of initialization + _stringDeduplicationDB = dict() + def dedup(str_val, default=None): + if not str_val: + return default if default else str_val + + cached = _stringDeduplicationDB.get(str_val) + if cached: + return cached + + _stringDeduplicationDB[str_val] = str_val + return str_val # Read pfb_unbound.ini settings if os.path.isfile(pfb['pfb_unbound.ini']): @@ -228,6 +310,8 @@ def write(self, msg): if config.has_section('MAIN'): if config.has_option('MAIN', 'python_enable'): pfb['python_enable'] = config.getboolean('MAIN', 'python_enable') + if config.has_option('MAIN', 'python_debug'): + pfb['python_debug'] = config.getboolean('MAIN', 'python_debug') if config.has_option('MAIN', 'python_ipv6'): pfb['python_ipv6'] = config.getboolean('MAIN', 'python_ipv6') if config.has_option('MAIN', 'python_reply'): @@ -238,14 +322,13 @@ def write(self, msg): pfb['python_hsts'] = config.getboolean('MAIN', 'python_hsts') if config.has_option('MAIN', 'python_idn'): pfb['python_idn'] = config.getboolean('MAIN', 'python_idn') - if config.has_option('MAIN', 'python_tld_seg'): - pfb['python_tld_seg'] = config.getint('MAIN', 'python_tld_seg') if config.has_option('MAIN', 'python_tld'): pfb['python_tld'] = config.getboolean('MAIN', 'python_tld') if config.has_option('MAIN', 'python_tlds'): - pfb['python_tlds'] = config.get('MAIN', 'python_tlds').split(',') + pfb['python_tlds'] = dict.fromkeys(config.get('MAIN', 'python_tlds').split(',')) if config.has_option('MAIN', 'dnsbl_ipv4'): pfb['dnsbl_ipv4'] = config.get('MAIN', 'dnsbl_ipv4') + pfb['dnsbl_ipv4_to_6'] = '::{}'.format(pfb['dnsbl_ipv4']) if config.has_option('MAIN', 'python_nolog'): pfb['python_nolog'] = config.getboolean('MAIN', 'python_nolog') if config.has_option('MAIN', 'python_cname'): @@ -254,7 +337,7 @@ def write(self, msg): pfb['python_control'] = config.getboolean('MAIN', 'python_control') if pfb['python_ipv6']: - pfb['dnsbl_ipv6'] = '::' + pfb['dnsbl_ipv4'] + pfb['dnsbl_ipv6'] = pfb['dnsbl_ipv4_to_6'] else: pfb['dnsbl_ipv6'] = '::' @@ -269,46 +352,66 @@ def write(self, msg): if pfb['python_enable']: + debug('Python mode enabled') + + regex_translation = str.maketrans({'.': r'\.', '*': r'.*'}) + # Enable the Blacklist functions (IDN) if pfb['python_idn']: pfb['python_blacklist'] = True + debug('Python IDN enabled') + debug('Python Blacklist enabled. Reason: IDN') # Enable the Blacklist functions (TLD Allow) - if pfb['python_tld'] and pfb['python_tlds'] != '': + if pfb['python_tld'] and pfb['python_tlds']: pfb['python_blacklist'] = True + debug('Python TLD Allow enabled: {}', list(pfb['python_tlds'].keys())) + debug('Python Blacklist enabled. Reason: TLD Allow') # Collect user-defined Regex patterns if config.has_section('REGEX'): regex_config = config.items('REGEX') if regex_config: + debug('REGEX configuration section found') r_count = 1 for name, pattern in regex_config: try: - regexDB[name] = re.compile(pattern) - pfb['regexDB'] = True - pfb['python_blacklist'] = True + entry = {'key': pattern, 'log': '1', 'feed': name, 'group': 'DNSBL_Regex', 'b_type': 'Python', 'regex': re.compile(pattern, re.IGNORECASE)} + regexDB[pattern] = entry + debug('Parsed user REGEX: {}: {}', pattern, entry) except Exception as e: - sys.stderr.write("[pfBlockerNG]: Regex [ {} ] compile error pattern [ {} ] on line #{}: {}" .format(name, pattern, r_count, e)) + sys.stderr.write("[pfBlockerNG]: Regex [ {} ] compile error pattern [ {} ] on line #{}: {}" .format(name, pattern, r_count, e)) pass r_count += 1 + if regexDB: + pfb['python_blacklist'] = True + debug('Python Blacklist enabled. Reason: REGEX') + # Collect user-defined no AAAA domains if config.has_section('noAAAA'): noaaaa_config = config.items('noAAAA') if noaaaa_config: + debug('noAAAA configuration section found') try: for row, line in noaaaa_config: - data = line.rstrip('\r\n').split(',') + line = line.rstrip('\r\n') + debug('Parsing no-AAAA domain: {}', line) + data = line.split(',') if data and len(data) == 2: - if data[1] == '1': - wildcard = True - else: - wildcard = False - noAAAADB[data[0]] = wildcard + domain_name = data[0].lower() + wildcard = data[1] == '1' + + debug('Parsed no-AAAA domain: {}, wildcard={}', domain_name, wildcard) + + # if both wildcard and non-wildcard entries are found, keep the wildcard only + if wildcard: + noAAAADB[domain_name] = True + elif domain_name not in noAAAADB: + noAAAADB[domain_name] = False else: sys.stderr.write("[pfBlockerNG]: Failed to parse: noAAAA: row:{} line:{}" .format(row, line)) - pfb['noAAAADB'] = True except Exception as e: sys.stderr.write("[pfBlockerNG]: Failed to load no AAAA domain list: {}" .format(e)) pass @@ -317,11 +420,12 @@ def write(self, msg): if config.has_section('GP_Bypass_List'): gp_bypass_list = config.items('GP_Bypass_List') if gp_bypass_list: + debug('GP_Bypass_List configuration section found') try: for row, line in gp_bypass_list: - gpListDB[line.rstrip('\r\n')] = 0 - - pfb['gpListDB'] = True + value = line.rstrip('\r\n') + debug('Parsed Group Policy Bypass entry: {}', value) + gpListDB.add(line) except Exception as e: sys.stderr.write("[pfBlockerNG]: Failed to load GP Bypass List: {}" .format(e)) pass @@ -331,47 +435,40 @@ def write(self, msg): try: with open(pfb['pfb_py_ss']) as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') + debug('SafeSearch Redirection file found: {}', pfb['pfb_py_ss']) for row in csv_reader: if row and len(row) == 3: - safeSearchDB[row[0]] = {'A': row[1], 'AAAA': row[2]} + domain_name = row[0].lower() + entry = {'A': row[1], 'AAAA': row[2]} + debug('Parsed SafeSearch Redirection entry: {}: {}', domain_name, entry) + safeSearchDB[domain_name] = entry else: sys.stderr.write("[pfBlockerNG]: Failed to parse: {}: {}" .format(pfb['pfb_py_ss'], row)) - pfb['safeSearchDB'] = True except Exception as e: - sys.stderr.write("[pfBlockerNG]: Failed to load: {}: {}" .format(pfb['pfb_py_zone'], e)) + sys.stderr.write("[pfBlockerNG]: Failed to load: {}: {}" .format(pfb['pfb_py_ss'], e)) pass - # While reading 'data|zone' CSV files: Replace 'Feed/Group' pairs with an index value (Memory performance) - feedGroup_index = 0 - # Zone dicts if os.path.isfile(pfb['pfb_py_zone']): try: with open(pfb['pfb_py_zone']) as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') + debug('Zone Blacklist file found: {}', pfb['pfb_py_zone']) for row in csv_reader: - if row and len(row) == 6: + if row and len(row) >= 6: # Query Feed/Group/index - isInFeedGroupDB = feedGroupDB.get(row[4] + row[5]) - - # Add Feed/Group/index - if isInFeedGroupDB is None: - feedGroupDB[row[4] + row[5]] = feedGroup_index - feedGroupIndexDB[feedGroup_index] = {'feed': row[4], 'group': row[5]} - final_index = feedGroup_index - feedGroup_index += 1 - - # Use existing Feed/Group/index - else: - final_index = isInFeedGroupDB - - zoneDB[row[1]] = {'log': row[3], 'index': final_index} + domain_name = dedup(row[1]) + entry = {'key': domain_name, 'log': dedup(row[3]), 'feed': dedup(row[4], default='Unknown'), 'group': dedup(row[5], default='Unknown'), 'b_type': 'TLD'}; + debug('Parsed Zone Blacklist entry: {}', entry) + zoneDB[domain_name] = entry + segmentSizeDB['zoneDB'] = min(segmentSizeDB['zoneDB'], domain_name.count('.') + 1) else: sys.stderr.write("[pfBlockerNG]: Failed to parse: {}: {}" .format(pfb['pfb_py_zone'], row)) - pfb['zoneDB'] = True - pfb['python_blacklist'] = True + if zoneDB: + pfb['python_blacklist'] = True + debug('Python Blacklist enabled. Reason: Zone Blacklist') except Exception as e: sys.stderr.write("[pfBlockerNG]: Failed to load: {}: {}" .format(pfb['pfb_py_zone'], e)) pass @@ -381,50 +478,89 @@ def write(self, msg): try: with open(pfb['pfb_py_data']) as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') + debug('Blacklist data file found: {}', pfb['pfb_py_data']) for row in csv_reader: - if row and len(row) == 6: - # Query Feed/Group/index - isInFeedGroupDB = feedGroupDB.get(row[4] + row[5]) - - # Add Feed/Group/index - if isInFeedGroupDB is None: - feedGroupDB[row[4] + row[5]] = feedGroup_index - feedGroupIndexDB[feedGroup_index] = {'feed': row[4], 'group': row[5]} - final_index = feedGroup_index - feedGroup_index += 1 - - # Use existing Feed/Group/index + if row and (len(row) == 6 or len(row) == 7): + if len(row) == 7 and row[6] == '2': + expression = row[1] + try: + python_regex = r'(?:^|\.){}$'.format(expression.translate(regex_translation)) + entry = {'key': expression, 'log': dedup(row[3]), 'feed': dedup(row[4], default='Unknown'), 'group': dedup(row[5], default='Unknown'), 'b_type': 'DNSBL', 'regex': re.compile(python_regex, re.IGNORECASE)} + debug('Parsed Blacklist entry (Regex): {}', entry) + regexDataDB[expression] = entry + except Exception as e: + sys.stderr.write("[pfBlockerNG]: Failed to parse regex in file {}: {}: {}".format(pfb['pfb_py_data'], expression, e)) + pass + elif len(row) == 7 and row[6] == '1': + domain_name = dedup(row[1]) + entry = {'key': domain_name, 'log': dedup(row[3]), 'feed': dedup(row[4], default='Unknown'), 'group': dedup(row[5], default='Unknown'), 'b_type': 'DNSBL'} + debug('Parsed Blacklist entry (Wildcard): {}', entry) + wildcardDataDB[domain_name] = entry + segmentSizeDB['wildcardDataDB'] = min(segmentSizeDB['wildcardDataDB'], domain_name.count('.') + 1) else: - final_index = isInFeedGroupDB + domain_name = dedup(row[1]) + entry = {'key': domain_name, 'log': dedup(row[3]), 'feed': dedup(row[4], default='Unknown'), 'group': dedup(row[5], default='Unknown'), 'b_type': 'DNSBL'} + debug('Parsed Blacklist entry (Domain): {}', entry) + dataDB[domain_name] = entry - dataDB[row[1]] = {'log': row[3], 'index': final_index} else: - sys.stderr.write("[pfBlockerNG]: Failed to parse: {}: {}" .format(pfb['pfb_py_data'], row)) + sys.stderr.write("[pfBlockerNG]: Failed to parse: {}: {}".format(pfb['pfb_py_data'], row)) - pfb['dataDB'] = True - pfb['python_blacklist'] = True + if dataDB or wildcardDataDB or regexDataDB: + pfb['python_blacklist'] = True + debug('Python Blacklist enabled. Reason: Blacklist data') except Exception as e: - sys.stderr.write("[pfBlockerNG]: Failed to load: {}: {}" .format(pfb['pfb_py_data'], e)) + sys.stderr.write("[pfBlockerNG]: Failed to load: {}: {}".format(pfb['pfb_py_data'], e)) pass - # Clear temporary Feed/Group/Index list - feedGroupDB.clear() - if pfb['python_blacklist']: - # Collect user-defined Whitelist + # TODO: separate user whitelist and DNSBL exclusions + # Collect whitelists and DNSBL exclusions if os.path.isfile(pfb['pfb_py_whitelist']): try: with open(pfb['pfb_py_whitelist']) as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') + debug('User-defined whitelist data file found: {}', pfb['pfb_py_whitelist']) for row in csv_reader: - if row and len(row) == 2: - if row[1] == '1': - wildcard = True + if row and (len(row) == 2 or len(row) == 7): + if len(row) == 2: + domain_name = dedup(row[0]) + entry = {'key': domain_name, 'log': '1', 'feed': 'DNSBL_WHITELIST', 'group': 'USER'} + + if row[1] == '1': + debug('Parsed Whitelist entry (Wildcard): {}', entry) + wildcardWhiteDB[domain_name] = entry + segmentSizeDB['wildcardWhiteDB'] = min(segmentSizeDB['wildcardWhiteDB'], domain_name.count('.') + 1) + else: + debug('Parsed Whitelist entry (Domain): {}', entry) + whiteDB[domain_name] = entry + else: - wildcard = False - whiteDB[row[0]] = wildcard - pfb['whiteDB'] = True + + if row[6] == '2': + expression = row[1] + try: + python_regex = r'(?:^|\.){}$'.format(expression.translate(regex_translation)) + entry = {'key': expression, 'log': dedup(row[3]), 'feed': dedup(row[4], default='Unknown'), 'group': dedup(row[5], default='Unknown'), 'regex': re.compile(python_regex, re.IGNORECASE)} + debug('Parsed Whitelist entry (Regex): {}', entry) + regexWhiteDB[expression] = entry + except Exception as e: + sys.stderr.write("[pfBlockerNG]: Failed to parse regex in file {}: {}: {}".format(pfb['pfb_py_whitelist'], expression, e)) + pass + else: + if row[6] == '1': + domain_name = dedup(row[1]) + entry = {'key': domain_name, 'log': dedup(row[3]), 'feed': dedup(row[4], default='Unknown'), 'group': dedup(row[5], default='Unknown')} + debug('Parsed Whitelist entry (Wildcard): {}', entry) + wildcardWhiteDB[domain_name] = entry + segmentSizeDB['wildcardWhiteDB'] = min(segmentSizeDB['wildcardWhiteDB'], domain_name.count('.') + 1) + else: + domain_name = dedup(row[1]) + entry = {'key': domain_name, 'log': dedup(row[3]), 'feed': dedup(row[4], default='Unknown'), 'group': dedup(row[5], default='Unknown')} + debug('Parsed Whitelist entry (Domain): {}', entry) + whiteDB[domain_name] = entry + else: sys.stderr.write("[pfBlockerNG]: Failed to parse: {}: {}" .format(pfb['pfb_py_whitelist'], row)) @@ -436,9 +572,11 @@ def write(self, msg): if pfb['python_hsts'] and os.path.isfile(pfb['pfb_py_hsts']): try: with open(pfb['pfb_py_hsts']) as hsts: + debug('HSTS data found: {}', pfb['python_hsts']) for line in hsts: - hstsDB[line.rstrip('\r\n')] = 0 - pfb['hstsDB'] = True + value = line.rstrip('\r\n') + debug('Parsed HSTS entry: {}', value) + hstsDB.add(value) except Exception as e: sys.stderr.write("[pfBlockerNG]: Failed to load: {}: {}" .format(pfb['pfb_py_hsts'], e)) pass @@ -446,6 +584,7 @@ def write(self, msg): # Validate SQLite3 database connections if pfb['mod_sqlite3']: + debug('Connecting to SQLite databases') # Enable Resolver query statistics for i in range(2): try: @@ -460,6 +599,8 @@ def write(self, msg): # Enable DNSBL statistics if pfb['python_blacklist']: + + debug('Enabling DNSBL statistics') for i in range(2): try: if write_sqlite(2, '', False): @@ -473,6 +614,8 @@ def write(self, msg): # Open MaxMind db reader for DNS Reply GeoIP logging if pfb['mod_maxminddb'] and pfb['python_reply'] and os.path.isfile(pfb['maxminddb']): + + debug('Open MaxMind database for DNS Reply GeoIP logging') try: maxmindReader = maxminddb.open_database(pfb['maxminddb']) pfb['python_maxmind'] = True @@ -482,19 +625,27 @@ def write(self, msg): else: log_info('[pfBlockerNG]: Failed to load ini configuration. Ini file missing.') - log_info('[pfBlockerNG]: init_standard script loaded') + debug('------------------------------------------------') + debug('Initialization complete. Summary of parsed data:') + debug('------------------------------------------------') + debug('DNSBL count (Zone): {}', len(zoneDB)) + debug('DNSBL count (Domain): {}', len(dataDB)) + debug('DNSBL count (Wildcard): {}', len(wildcardDataDB)) + debug('DNSBL count (Regex): {}', len(regexDataDB)) + debug('DNSBL count (User Regex): {}', len(regexDB)) + debug('Whitelist count (Domain): {}', len(whiteDB)) + debug('Whitelist count (Wildcard): {}', len(wildcardWhiteDB)) + debug('Whitelist count (Regex): {}', len(regexWhiteDB)) + debug('No-AAAA count: {}', len(noAAAADB)) + debug('Group Policy count: {}', len(gpListDB)) + debug('Safe Search count: {}', len(safeSearchDB)) + debug('HSTS count: {}', len(hstsDB)) + debug('------------------------------------------------') - -def pfb_regex_match(q_name): - global regexDB - - if q_name: - for k,r in regexDB.items(): - if r.search(q_name): - return k - return False + log_info('[pfBlockerNG]: init_standard script loaded') +@traced def get_q_name_qstate(qstate): q_name = '' try: @@ -507,7 +658,7 @@ def get_q_name_qstate(qstate): pass return is_unknown(q_name) - +@traced def get_q_name_qinfo(qinfo): q_name = '' try: @@ -518,12 +669,12 @@ def get_q_name_qinfo(qinfo): pass return is_unknown(q_name) - +@traced def get_q_ip(qstate): q_ip = '' try: - if qstate and qstate.mesh_info.reply_list: + if qstate: reply_list = qstate.mesh_info.reply_list while reply_list: if reply_list.query_reply: @@ -535,22 +686,24 @@ def get_q_ip(qstate): pass return is_unknown(q_ip) - +@traced def get_q_ip_comm(kwargs): q_ip = '' try: - if kwargs and kwargs is not None and ('pfb_addr' in kwargs): - q_ip = kwargs['pfb_addr'] - elif kwargs and kwargs is not None and kwargs['repinfo'] and kwargs['repinfo'].addr: - q_ip = kwargs['repinfo'].addr + if kwargs: + q_ip = kwargs.get('pfb_addr') + if not q_ip: + repinfo = kwargs.get('repinfo') + if repinfo: + q_ip = repinfo.addr except Exception as e: for a in e: sys.stderr.write("[pfBlockerNG]: Failed get_q_ip_comm: {}" .format(a)) pass return is_unknown(q_ip) - +@traced def get_q_type(qstate, qinfo): q_type = '' if qstate and qstate.qinfo.qtype_str: @@ -559,7 +712,7 @@ def get_q_type(qstate, qinfo): q_type = qinfo.qtype_str return is_unknown(q_type) - +@traced def get_o_type(qstate, rep): o_type = '' if qstate: @@ -567,63 +720,46 @@ def get_o_type(qstate, rep): o_type = qstate.return_msg.rep.rrsets[0].rk.type_str elif qstate.qinfo.qtype_str: o_type = qstate.qinfo.qtype_str - elif rep is not None and rep.rrsets[0] is not None and rep.rrsets[0].rk is not None: + elif rep and rep.rrsets[0] and rep.rrsets[0].rk: o_type = rep.rrsets[0].rk.type_str return is_unknown(o_type) - +@traced def get_rep_ttl(rep): ttl = '' if rep and rep.ttl: ttl = rep.ttl return str(is_unknown(ttl)).replace('Unknown', 'Unk') - +@traced def get_tld(qstate): tld = '' if qstate and qstate.qinfo and len(qstate.qinfo.qname_list) > 1: tld = qstate.qinfo.qname_list[-2] return tld - +@traced def convert_ipv4(x): - global pfb - ipv4 = '' if x: - if pfb['py_v3']: - ipv4 = "{}.{}.{}.{}" .format(x[2], x[3], x[4], x[5]) - else: - ipv4 = "{}.{}.{}.{}" .format(ord(x[2]), ord(x[3]), ord(x[4]), ord(x[5])) + ipv4 = "{}.{}.{}.{}" .format(x[2], x[3], x[4], x[5]) return is_unknown(ipv4) - +@traced def convert_ipv6(x): - global pfb - ipv6 = '' if x: - if pfb['py_v3']: - ipv6 = "{:02x}{:02x}:{:02x}{:02x}:{:02x}{:02x}:{:02x}{:02x}:{:02x}{:02x}:{:02x}{:02x}:{:02x}{:02x}:{:02x}{:02x}" \ - .format(x[2],x[3],x[4],x[5],x[6],x[7],x[8],x[9],x[10],x[11],x[12],x[13],x[14],x[15],x[16],x[17]) - else: - ipv6 = "{:02x}{:02x}:{:02x}{:02x}:{:02x}{:02x}:{:02x}{:02x}:{:02x}{:02x}:{:02x}{:02x}:{:02x}{:02x}:{:02x}{:02x}" \ - .format(ord(x[2]),ord(x[3]),ord(x[4]),ord(x[5]),ord(x[6]),ord(x[7]),ord(x[8]),ord(x[9]),ord(x[10]), \ - ord(x[11]),ord(x[12]),ord(x[13]),ord(x[14]),ord(x[15]),ord(x[16]),ord(x[17])) + ipv6 = "{:02x}{:02x}:{:02x}{:02x}:{:02x}{:02x}:{:02x}{:02x}:{:02x}{:02x}:{:02x}{:02x}:{:02x}{:02x}:{:02x}{:02x}" \ + .format(x[2],x[3],x[4],x[5],x[6],x[7],x[8],x[9],x[10],x[11],x[12],x[13],x[14],x[15],x[16],x[17]) return is_unknown(ipv6) - +@traced def convert_other(x): - global pfb - final = '' if x: for i in x[3:]: - if pfb['py_v3']: - val = i - else: - val = ord(i) + val = i if val == 0: i = '|' @@ -638,14 +774,13 @@ def convert_other(x): elif val <= 33 or val > 126: continue else: - if pfb['py_v3']: - i = chr(i) + i = chr(i) final += i final = final.strip('.|') return is_unknown(final) - +@traced def is_unknown(x): try: if not x or x is None: @@ -656,7 +791,7 @@ def is_unknown(x): pass return x - +@traced def write_sqlite(db, groupname, update): global pfb @@ -745,53 +880,61 @@ def write_sqlite(db, groupname, update): return True -def get_details_dnsbl(m_type, qinfo, qstate, rep, kwargs): - global pfb, rcodeDB, dnsblDB, noAAAADB, maxmindReader +def write_sqlite_async(db, groupname, update): + if pfb['async_io']: + pfb['async_io_executor'].submit(write_sqlite, db, groupname, update) + else: + write_sqlite(db, groupname, update) - if qstate and qstate is not None: - q_name = get_q_name_qstate(qstate) - elif qinfo and qinfo is not None: - q_name = get_q_name_qinfo(qinfo) +@traced +def format_b_type(b_type, q_type, isCNAME): + if isCNAME: + return '{}_CNAME_{}'.format(b_type, q_type) else: - return True + return '{}_{}'.format(b_type, q_type) + +@traced +def get_details_dnsbl(q_name, q_ip, q_type, isCNAME): + global pfb, block_cache # Increment totalqueries counter if pfb['sqlite3_resolver_con']: - write_sqlite(1, '', True) + write_sqlite_async(1, '', True) # Determine if event is a 'reply' or DNSBL block - isDNSBL = dnsblDB.get(q_name) - if isDNSBL is not None: + cached_block = block_cache.get(q_name) + if cached_block: + + block_result = cached_block['entry'] + if not block_result: + # Negative cached result, skip it + return True # If logging is disabled, do not log blocked DNSBL events (Utilize DNSBL Webserver) except for Python nullblock events - if pfb['python_nolog'] and not isDNSBL['b_ip'] in ('0.0.0.0', '::'): + if pfb['python_nolog'] and not block_result['b_ip'] in ('0.0.0.0', '::'): return True # Increment dnsblgroup counter - if pfb['sqlite3_dnsbl_con'] and isDNSBL['group'] != '': - write_sqlite(2, isDNSBL['group'], True) + if pfb['sqlite3_dnsbl_con'] and block_result['group'] != '': + write_sqlite_async(2, block_result['group'], True) dupEntry = '+' - lastEvent = dnsblDB.get('last-event') - if lastEvent is not None: - if str(lastEvent) == str(isDNSBL): - dupEntry = '-' - else: - dnsblDB['last-event'] = isDNSBL + lastEvent = block_cache.get('last-event') + if lastEvent and lastEvent == cached_block: + dupEntry = '-' else: - dnsblDB['last-event'] = isDNSBL + block_cache['last-event'] = cached_block # Skip logging - if isDNSBL['log'] == '2': + if block_result['log'] == '2': return True - m_type = isDNSBL['b_type'] - - q_ip = get_q_ip_comm(kwargs) + q_ip = is_unknown(q_ip) if q_ip == 'Unknown': q_ip = '127.0.0.1' - for i in range(2): + timestamp = 'TIME_UNAVAILABLE' + for _ in range(2): try: timestamp = datetime.now().strftime("%b %-d %H:%M:%S") except TypeError: @@ -799,29 +942,68 @@ def get_details_dnsbl(m_type, qinfo, qstate, rep, kwargs): continue break - csv_line = ','.join('{}'.format(v) for v in ('DNSBL-python', timestamp, q_name, q_ip, isDNSBL['p_type'], isDNSBL['b_type'], isDNSBL['group'], isDNSBL['b_eval'], isDNSBL['feed'], dupEntry)) - log_entry(csv_line, '/var/log/pfblockerng/dnsbl.log') - log_entry(csv_line, '/var/log/pfblockerng/unified.log') + b_type = format_b_type(block_result['b_type'], q_type, isCNAME) - return True + csv_line = ','.join(str(v) for v in ('DNSBL-python', timestamp, q_name, q_ip, block_result['p_type'], b_type, block_result['group'], block_result['b_eval'], block_result['feed'], dupEntry)) + if pfb['async_io']: + executor = pfb['async_io_executor'] + executor.submit(log_entry, csv_line, '/var/log/pfblockerng/dnsbl.log') + executor.submit(log_entry, csv_line, '/var/log/pfblockerng/unified.log') + else: + log_entry(csv_line, '/var/log/pfblockerng/dnsbl.log') + log_entry(csv_line, '/var/log/pfblockerng/unified.log') + return True def log_entry(line, log): for i in range(1,5): try: with open(log, 'a') as append_log: - append_log.write(line + '\n') - except Exception as e: + append_log.write(line) + append_log.write('\n') + break + except: if i == 4: - sys.stderr.write("[pfBlockerNG]: log_entry: {}: {}" .format(i, e)) - time.sleep(0.25) + sys.stderr.write("[pfBlockerNG]: Exception caught in log_entry(line='{}', log='{}'): \n\t{}".format(line, log, '\t'.join(traceback.format_exc().splitlines(True)))) + else: + time.sleep(0.25) pass continue - break +def _debug(format_str, *args): + global pfb + if pfb.get('python_debug') and isinstance(format_str, str): + with open('/var/log/pfblockerng/py_debug.log', 'a') as append_log: + timestamp = datetime.now().strftime("%b %-d %H:%M:%S") + append_log.write('{}|DEBUG: {}\n'.format(timestamp, format_str.format(*args) if args else format_str)) +# Helper function for using async I/O +def __debug(format_str, *args): + for i in range(1,5): + try: + _debug(format_str, *args) + break + except: + if i == 4: + sys.stderr.write("[pfBlockerNG]: Exception caught in _debug(format_str='{}', args={}): \n\t{}".format(format_str, args, '\t'.join(traceback.format_exc().splitlines(True)))) + else: + time.sleep(0.25) + pass + continue + +def debug(format_str, *args): + global pfb + # validate before to avoid additional costs for non-debug calls + if pfb.get('python_debug') and isinstance(format_str, str): + if pfb['async_io']: + executor = pfb['async_io_executor'] + executor.submit(__debug, format_str, *args) + else: + __debug(format_str, *args) + +@traced def get_details_reply(m_type, qinfo, qstate, rep, kwargs): - global pfb, rcodeDB, dnsblDB, noAAAADB, maxmindReader + global pfb, rcodeDB, block_cache, noAAAADB, maxmindReader if qstate and qstate is not None: q_name = get_q_name_qstate(qstate) @@ -855,7 +1037,7 @@ def get_details_reply(m_type, qinfo, qstate, rep, kwargs): # Increment totalqueries counter (Don't include the Resolver DNS requests) if pfb['sqlite3_resolver_con'] and q_ip != '127.0.0.1': - write_sqlite(1, '', True) + write_sqlite_async(1, '', True) # Do not log Replies, if disabled if not pfb['python_reply']: @@ -881,10 +1063,7 @@ def get_details_reply(m_type, qinfo, qstate, rep, kwargs): if pfb['mod_ipaddress']: r_addr = convert_ipv6(x) try: - if pfb['py_v3']: - r_addr = ipaddress.ip_address(r_addr).compressed - else: - r_addr = ipaddress.ip_address(unicode(r_addr)).compressed + r_addr = ipaddress.ip_address(r_addr).compressed except Exception as e: sys.stderr.write("[pfBlockerNG]: Failed to compress IPv6: {}, {}" .format(r_addr, e)) pass @@ -924,28 +1103,20 @@ def get_details_reply(m_type, qinfo, qstate, rep, kwargs): q_name = 'NS' # Determine if domain was noAAAA blocked - if r_addr == 'NXDOMAIN' and q_type == 'AAAA' and noAAAADB.get(q_name) is not None: + if r_addr == 'NXDOMAIN' and q_type == 'AAAA' and q_name in noAAAADB: r_addr = 'noAAAA' if pfb['python_maxmind'] and r_addr not in ('', 'Unknown', 'NXDOMAIN', 'NODATA', 'DNSSEC', 'SOA', 'NS'): try: - if pfb['py_v3']: - version = ipaddress.ip_address(r_addr).version - else: - version = ipaddress.ip_address(unicode(r_addr)).version - + version = ipaddress.ip_address(r_addr).version except Exception as e: version = '' pass if version != '': try: - if pfb['py_v3']: - isPrivate = ipaddress.ip_address(r_addr).is_private - isLoopback = ipaddress.ip_address(r_addr).is_loopback - else: - isPrivate = ipaddress.ip_address(unicode(r_addr)).is_private - isLoopback = ipaddress.ip_address(unicode(r_addr)).is_loopback + isPrivate = ipaddress.ip_address(r_addr).is_private + isLoopback = ipaddress.ip_address(r_addr).is_loopback if isPrivate: iso_code = 'prv' @@ -996,14 +1167,20 @@ def get_details_reply(m_type, qinfo, qstate, rep, kwargs): continue break - csv_line = ','.join('{}'.format(v) for v in ('DNS-reply', timestamp, m_type, o_type, q_type, ttl, q_name, q_ip, r_addr, iso_code)) - log_entry(csv_line, '/var/log/pfblockerng/dns_reply.log') - log_entry(csv_line, '/var/log/pfblockerng/unified.log') + csv_line = ','.join(str(v) for v in ('DNS-reply', timestamp, m_type, o_type, q_type, ttl, q_name, q_ip, r_addr, iso_code)) + if pfb['async_io']: + executor = pfb['async_io_executor'] + executor.submit(log_entry, csv_line, '/var/log/pfblockerng/dns_reply.log') + executor.submit(log_entry, csv_line, '/var/log/pfblockerng/unified.log') + else: + log_entry(csv_line, '/var/log/pfblockerng/dns_reply.log') + log_entry(csv_line, '/var/log/pfblockerng/unified.log') return True # Is sleep duration valid +@traced def python_control_duration(duration): try: @@ -1018,6 +1195,7 @@ def python_control_duration(duration): return False # Is thread still active +@traced def python_control_thread(tname): global threads @@ -1031,6 +1209,7 @@ def python_control_thread(tname): return False # Python_control Start Thread +@traced def python_control_start_thread(tname, fcall, arg1, arg2): global threads @@ -1046,7 +1225,8 @@ def python_control_start_thread(tname, fcall, arg1, arg2): # Python_control sleep timer -def python_control_sleep(duration, arg): +@traced +def python_control_sleep(duration): global pfb try: @@ -1059,57 +1239,207 @@ def python_control_sleep(duration, arg): # Python_control Add Bypass IP for specified duration +@traced def python_control_addbypass(duration, b_ip): global pfb, gpListDB try: time.sleep(duration) - if gpListDB.get(b_ip) is not None: - gpListDB.pop(b_ip) + if b_ip in gpListDB: + gpListDB.remove(b_ip) return True except Exception as e: sys.stderr.write("[pfBlockerNG] python_control_addbypass: {}" .format(e)) pass return False +@traced +@exception_logger def inplace_cb_reply(qinfo, qstate, rep, rcode, edns, opt_list_out, region, **kwargs): get_details_reply('reply-x', qinfo, qstate, rep, kwargs) return True +@traced +@exception_logger def inplace_cb_reply_cache(qinfo, qstate, rep, rcode, edns, opt_list_out, region, **kwargs): get_details_reply('cache', qinfo, qstate, rep, kwargs) return True +@traced +@exception_logger def inplace_cb_reply_local(qinfo, qstate, rep, rcode, edns, opt_list_out, region, **kwargs): get_details_reply('local', qinfo, qstate, rep, kwargs) return True +@traced +@exception_logger def inplace_cb_reply_servfail(qinfo, qstate, rep, rcode, edns, opt_list_out, region, **kwargs): get_details_reply('servfail', qinfo, qstate, rep, kwargs) return True +@traced +@exception_logger def deinit(id): global pfb, maxmindReader if pfb['python_maxmind']: maxmindReader.close() + if pfb['async_io']: + pfb['async_io_executor'].shutdown() + log_info('[pfBlockerNG]: pfb_unbound.py script exiting') return True +@traced +@exception_logger def inform_super(id, qstate, superqstate, qdata): return True +@traced +def lookup(db, name, try_www=False, tld_limit=1, filter=None): + debug('Checking DB for: {}', name) + + entry = db.get(name) + if entry and (not filter or filter(entry)): + return (entry, name) + + if try_www: + if name.startswith('www.'): + name = name[4:] + entry = db.get(name) + if entry and (not filter or filter(entry)): + return (entry, name) + else: + www_name = 'www.{}'.format(name) + entry = db.get(www_name) + if entry and (not filter or filter(entry)): + return (entry, www_name) + + if tld_limit > 0: + q = name.split('.', 1)[-1] + for _ in range(q.count('.') + 1, tld_limit - 1, -1): + entry = db.get(q) + if entry and (not filter or filter(entry)): + return (entry, q) + q = q.split('.', 1)[-1] + + return (None, None) + +@traced +def regex_lookup(db, name, filter=None): + if name: + for entry in db.values(): + if entry['regex'].search(name): + if not filter or filter(entry): + return (entry, name) + return (None, None) + +@traced +def block_lookup(q_name, tld): + global pfb, dataDB, wildcardDataDB, zoneDB, regexDataDB, regexDB, segmentSizeDB + + result = None # the raw entry found in the queried dictionary + match = None # the actual value which caused the match (e.g. the TLD, www.domain, etc.) + + # Allow only approved TLDs + if tld and pfb['python_tld'] and tld not in pfb['python_tlds'] and q_name != pfb['dnsbl_ipv4'] and q_name != pfb['dnsbl_ipv4_to_6']: + debug('Domain TLD not found in TLD Allow list: {}: {}', q_name, tld) + result = {'key': q_name, 'log': '1', 'feed': 'TLD_Allow', 'group': 'DNSBL_TLD_Allow', 'b_type': 'Python'} + match = q_name + + # Block IDN or 'xn--' Domains + elif pfb['python_idn'] and (q_name.startswith('xn--') or '.xn--' in q_name): + debug("Blocked IDN or 'xn--': {}", q_name) + result = {'key': q_name, 'log': '1', 'feed': 'IDN', 'group': 'DNSBL_IDN', 'b_type': 'Python'} + match = q_name + + # Block via Regex + elif regexDB: + debug('Checking REGEX DB for: {}', q_name) + (result, match) = regex_lookup(regexDB, q_name) + + # Determine if domain is in DNSBL 'data|zone' database + if not result and pfb['python_blocking']: + + # Determine if domain is in DNSBL 'data' database (log to dnsbl.log) + if dataDB: + debug('Checking Blacklist DB (Domain) for: {}', q_name) + (result, match) = lookup(dataDB, q_name, tld_limit=0) + + # Determine TLD segment matches + if not result and wildcardDataDB: + debug('Checking Blacklist DB (Wildcard) for: {}', q_name) + (result, match) = lookup(wildcardDataDB, q_name, tld_limit=segmentSizeDB['wildcardDataDB']) + + # Determine if domain is in DNSBL 'zone' database (log to dnsbl.log) + if not result and zoneDB: + debug('Checking Zone DB for: {}', q_name) + (result, match) = lookup(zoneDB, q_name, tld_limit=segmentSizeDB['zoneDB']) + + # Block via Domain Name Regex + if not result and regexDataDB: + debug('Checking Blacklist DB (Regex) for: {}', q_name) + (result, match) = regex_lookup(regexDataDB, q_name) + + # Set log data, if we got a match + if result: + debug('Found Blacklist entry for: {} (matching: {}): {}', q_name, match, result) + + if not result: + # Validate other python methods, if not blocked via DNSBL zone/data + debug('Domain not blacklisted: {}', q_name) + + + return (result, match) + +@traced +def whitelist_lookup(q_name, user_only=False): + global pfb, whiteDB, wildcardWhiteDB, regexWhiteDB, segmentSizeDB + + result = None # the raw entry found in the queried dictionary + match = None # the actual value which caused the match (e.g. the TLD, www.domain, etc.) + filter = None + + # Check only user-defined whitelist entries + if user_only: + filter = (lambda x: x['group'] == 'USER') + + # Validate domain in DNSBL Whitelist + if whiteDB: + debug('Checking whitelist: {}', q_name) + (result, match) = lookup(whiteDB, q_name, try_www=True, tld_limit=0, filter=filter) + + # Determine TLD segment matches + if not result and wildcardWhiteDB: + debug('Checking Whitelist DB (Wildcard) for: {}', q_name) + (result, match) = lookup(wildcardWhiteDB, q_name, tld_limit=segmentSizeDB['wildcardWhiteDB'], filter=filter) + + # Allow via Domain Name Regex + if not result and regexWhiteDB: + debug('Checking Whitelist DB (Regex) for: {}', q_name) + (result, match) = regex_lookup(regexWhiteDB, q_name, filter=filter) + + # Set log data, if we got a match + if result: + debug('Found Whitelist entry for: {} (matching: {}): {}', q_name, match, result) + + return (result, match) + +@traced +@exception_logger def operate(id, event, qstate, qdata): - global pfb, threads, dataDB, zoneDB, hstsDB, whiteDB, excludeDB, excludeAAAADB, excludeSS, dnsblDB, noAAAADB, gpListDB, safeSearchDB, feedGroupIndexDB + global pfb, threads, dataDB, zoneDB, wildcardDataDB, regexDataDB, hstsDB, whiteDB, wildcardWhiteDB, regexWhiteDB, excludeAAAADB, excludeSS, block_cache, exclusion_cache, noAAAADB, gpListDB, safeSearchDB, feedGroupDB, segmentSizeDB qstate_valid = False try: - if qstate is not None and qstate.qinfo.qtype is not None: + if qstate and qstate.qinfo.qtype: qstate_valid = True q_type = qstate.qinfo.qtype + q_type_str = qstate.qinfo.qtype_str q_name_original = get_q_name_qstate(qstate).lower() q_ip = get_q_ip(qstate) + debug('[{}]: q_type={}, q_ip={}', q_name_original, q_type_str, q_ip) else: sys.stderr.write("[pfBlockerNG] qstate is not None and qstate.qinfo.qtype is not None") except Exception as e: @@ -1119,37 +1449,14 @@ def operate(id, event, qstate, qdata): if (event == MODULE_EVENT_NEW) or (event == MODULE_EVENT_PASS): # no AAAA validation - if qstate_valid and q_type == RR_TYPE_AAAA and pfb['noAAAADB'] and q_name_original not in excludeAAAADB: - isin_noAAAA = False - - # Determine full domain match - isnoAAAA = noAAAADB.get(q_name_original) - if isnoAAAA is not None: - isin_noAAAA = True - - # Wildcard verification of domain - if not isin_noAAAA: - q = q_name_original.split('.', 1) - q = q[-1] - - # Validate to 2nd level TLD only - for x in range(q.count('.'), 0, -1): - isnoAAAA = noAAAADB.get(q) - - # Determine if domain is a wildcard whitelist entry - if isnoAAAA is not None and isnoAAAA: - isin_noAAAA = True - - # Add sub-domain to noAAAA DB - noAAAADB[q_name_original] = True + if qstate_valid and q_type == RR_TYPE_AAAA and noAAAADB and q_name_original not in excludeAAAADB: - break - else: - q = q.split('.', 1) - q = q[-1] + debug('[{}]: checking no-AAAA DB', q_name_original) + (isnoAAAA, isnoAAAA_match) = lookup(noAAAADB, q_name_original) # Create FQDN Reply Message (AAAA -> A) - if isin_noAAAA: + if isnoAAAA: + debug('[{}]: domain found in no-AAAA DB (matching: {}). Creating FQDN Reply Message (AAAA -> A)', q_name_original, isnoAAAA_match) msg = DNSMessage(qstate.qinfo.qname_str, RR_TYPE_A, RR_CLASS_IN, PKT_QR | PKT_RA) if msg is None or not msg.set_return_msg(qstate): qstate.ext_state[id] = MODULE_ERROR @@ -1162,27 +1469,20 @@ def operate(id, event, qstate, qdata): # Add domain to excludeAAAADB to skip subsequent no AAAA validation else: - excludeAAAADB.append(q_name_original) + debug('[{}]: domain added to AAAA exclusion DB', q_name_original) + excludeAAAADB.add(q_name_original) # SafeSearch Redirection validation - if qstate_valid and pfb['safeSearchDB']: + if qstate_valid and safeSearchDB: # Determine if domain has been previously validated if q_name_original not in excludeSS: - isSafeSearch = safeSearchDB.get(q_name_original) - - # Validate 'www.' Domains - if isSafeSearch is None and not q_name_original.startswith('www.'): - isSafeSearch = safeSearchDB.get('www.' + q_name_original) - - # TODO: See CNAME message below - #if isSafeSearch is None and q_name_original != 'safe.duckduckgo.com' and q_name_original.endswith('duckduckgo.com'): - # isSafeSearch = safeSearchDB.get('duckduckgo.com') - #if isSafeSearch is None and q_name_original != 'safesearch.pixabay.com' and q_name_original.endswith('pixabay.com'): - # isSafeSearch = safeSearchDB.get('pixabay.com') + debug('[{}]: checking Safe Search DB', q_name_original) + (isSafeSearch, isSafeSearch_match) = lookup(safeSearchDB, q_name_original, try_www=True, tld_limit=-1) - if isSafeSearch is not None: + if isSafeSearch: + debug('[{}]: domain found in Safe Search DB (matching: {}): {}', q_name_original, isSafeSearch_match, isSafeSearch) ss_found = False if isSafeSearch['A'] == 'nxdomain': @@ -1192,14 +1492,18 @@ def operate(id, event, qstate, qdata): # TODO: Wait for Unbound code changes to allow for this functionality, using local-zone/local-data entries for CNAMES for now elif isSafeSearch['A'] == 'cname': - if isSafeSearch['AAAA'] is not None and isSafeSearch['AAAA'] != '': + if isSafeSearch['AAAA']: if q_type == RR_TYPE_A: + answer = "{} 3600 IN CNAME {}".format(qstate.qinfo.qname_str, isSafeSearch['AAAA']) + debug('[{}]: answer: {}', q_name_original, answer) cname_msg = DNSMessage(qstate.qinfo.qname_str, RR_TYPE_A, RR_CLASS_IN, PKT_QR | PKT_RD | PKT_RA) - cname_msg.answer.append("{} 3600 IN CNAME {}" .format(qstate.qinfo.qname_str, isSafeSearch['AAAA'])) + cname_msg.answer.append(answer) ss_found = True elif q_type == RR_TYPE_AAAA: + answer = "{} 3600 IN CNAME {}".format(qstate.qinfo.qname_str, isSafeSearch['AAAA']) + debug('[{}]: answer: {}', q_name_original, answer) cname_msg = DNSMessage(qstate.qinfo.qname_str, RR_TYPE_AAAA, RR_CLASS_IN, PKT_QR | PKT_RD | PKT_RA) - cname_msg.answer.append("{} 3600 IN CNAME {}" .format(qstate.qinfo.qname_str, isSafeSearch['AAAA'])) + cname_msg.answer.append(answer) ss_found = True if ss_found: @@ -1213,13 +1517,17 @@ def operate(id, event, qstate, qdata): qstate.ext_state[id] = MODULE_RESTART_NEXT return True else: - if (q_type == RR_TYPE_A and isSafeSearch['A'] != '') or (q_type == RR_TYPE_AAAA and isSafeSearch['AAAA'] == ''): + if (q_type == RR_TYPE_A and isSafeSearch['A']) or (q_type == RR_TYPE_AAAA and not isSafeSearch['AAAA']): + answer = "{} 300 IN {} {}".format(qstate.qinfo.qname_str, 'A', isSafeSearch['A']) + debug('[{}]: answer: {}', q_name_original, answer) msg = DNSMessage(qstate.qinfo.qname_str, RR_TYPE_A, RR_CLASS_IN, PKT_QR | PKT_RA) - msg.answer.append("{} 300 IN {} {}" .format(qstate.qinfo.qname_str, 'A', isSafeSearch['A'])) + msg.answer.append(answer) ss_found = True - elif q_type == RR_TYPE_AAAA and isSafeSearch['AAAA'] != '': + elif q_type == RR_TYPE_AAAA and isSafeSearch['AAAA']: + answer = "{} 300 IN {} {}".format(qstate.qinfo.qname_str, 'AAAA', isSafeSearch['AAAA']) + debug('[{}]: answer: {}', q_name_original, answer) msg = DNSMessage(qstate.qinfo.qname_str, RR_TYPE_AAAA, RR_CLASS_IN, PKT_QR | PKT_RA) - msg.answer.append("{} 300 IN {} {}" .format(qstate.qinfo.qname_str, 'AAAA', isSafeSearch['AAAA'])) + msg.answer.append(answer) ss_found = True if ss_found: @@ -1235,13 +1543,15 @@ def operate(id, event, qstate, qdata): # Add domain to excludeSS to skip subsequent SafeSearch validation else: - excludeSS.append(q_name_original) + debug('[{}]: domain added to Safe Search exclusion DB', q_name_original) + excludeSS.add(q_name_original) # Python_control - Receive TXT commands from pfSense local IP if qstate_valid and q_type == RR_TYPE_TXT and q_name_original.startswith('python_control.'): control_rcd = False if pfb['python_control'] and q_ip == '127.0.0.1': + debug('[{}]: Python Control', q_name_original) control_command = q_name_original.split('.') if (len(control_command) >= 2): @@ -1252,7 +1562,7 @@ def operate(id, event, qstate, qdata): pfb['python_blacklist'] = False # If duration specified, disable DNSBL Blocking for specified time in seconds - if pfb['mod_threading'] and len(control_command) == 3 and control_command[2] != '': + if pfb['mod_threading'] and len(control_command) == 3 and control_command[2]: # Validate Duration argument duration = python_control_duration(control_command[2]) @@ -1262,7 +1572,7 @@ def operate(id, event, qstate, qdata): if not python_control_thread('sleep'): # Start Thread - if not python_control_start_thread('sleep', python_control_sleep, duration, None): + if not python_control_start_thread('sleep', python_control_sleep, duration): control_rcd = False control_msg = 'Python_control: DNSBL disabled: Thread failed' else: @@ -1281,62 +1591,60 @@ def operate(id, event, qstate, qdata): elif control_command[1] == 'addbypass' or control_command[1] == 'removebypass': b_ip = (control_command[2]).replace('-', '.') - if pfb['py_v3']: - isIPValid = ipaddress.ip_address(b_ip) - else: - isIPValid = ipaddress.ip_address(unicode(b_ip)) + isIPValid = ipaddress.ip_address(b_ip) if isIPValid: - if not pfb['gpListDB']: - pfb['gpListDB'] = True control_rcd = True if control_command[1] == 'addbypass': control_msg = "Python_control: Add bypass for IP: [ {} ]" .format(b_ip) # If duration specified, disable DNSBL Blocking for specified time in seconds - if pfb['mod_threading'] and len(control_command) == 4 and control_command[3] != '': + if pfb['mod_threading'] and len(control_command) == 4 and control_command[3]: # Validate Duration argument duration = python_control_duration(control_command[3]) if duration: # Ensure thread is not active - if not python_control_thread('addbypass' + b_ip): + if not python_control_thread('addbypass {}'.format(b_ip)): # Start Thread - if not python_control_start_thread('addbypass' + b_ip, python_control_addbypass, duration, b_ip): + if not python_control_start_thread('addbypass {}'.format(b_ip), python_control_addbypass, duration, b_ip): control_rcd = False - control_msg = "Python_control: Add bypass for IP: [ {} ] thread failed" .format(b_ip) + control_msg = "Python_control: Add bypass for IP: [ {} ] thread failed".format(b_ip) else: - control_msg = "{} for {} second(s)" .format(control_msg, duration) + control_msg = "{} for {} second(s)".format(control_msg, duration) else: control_rcd = False - control_msg = "Python_control: Add bypass for IP: [ {} ]: Previous call still in progress" .format(b_ip) + control_msg = "Python_control: Add bypass for IP: [ {} ]: Previous call still in progress".format(b_ip) else: control_rcd = False - control_msg = "Python_control: Add bypass for IP: [ {} ]: duration [ {} ] out of range (1-3600sec)" .format(b_ip, control_command[3]) + control_msg = "Python_control: Add bypass for IP: [ {} ]: duration [ {} ] out of range (1-3600sec)".format(b_ip, control_command[3]) else: # Add bypass called without duration if control_rcd: - gpListDB[b_ip] = 0 + gpListDB.add(b_ip) elif control_command[1] == 'removebypass': - if gpListDB.get(b_ip) is not None: - control_msg = "Python_control: Remove bypass for IP: [ {} ]" .format(b_ip) - gpListDB.pop(b_ip) + if b_ip in gpListDB: + control_msg = "Python_control: Remove bypass for IP: [ {} ]".format(b_ip) + gpListDB.remove(b_ip) else: - control_msg = "Python_control: IP not in Group Policy: [ {} ]" .format(b_ip) + control_msg = "Python_control: IP not in Group Policy: [ {} ]".format(b_ip) if control_rcd: q_reply = 'python_control' else: if control_msg == '': - control_msg = "Python_control: Command not authorized! [ {} ]" .format(q_name_original) + control_msg = "Python_control: Command not authorized! [ {} ]".format(q_name_original) q_reply = 'python_control_fail' + answer = '{}. 0 IN TXT "{}"'.format(q_reply, control_msg) + debug('[{}]: answer: {}', q_name_original, answer) + txt_msg = DNSMessage(qstate.qinfo.qname_str, RR_TYPE_TXT, RR_CLASS_IN, PKT_QR | PKT_RA) - txt_msg.answer.append("{}. 0 IN TXT \"{}\"" .format(q_reply, control_msg)) + txt_msg.answer.append(answer) if txt_msg is None or not txt_msg.set_return_msg(qstate): qstate.ext_state[id] = MODULE_ERROR @@ -1352,24 +1660,25 @@ def operate(id, event, qstate, qdata): # Group Policy - Bypass DNSBL Validation bypass_dnsbl = False - if pfb['gpListDB']: - q_ip = get_q_ip(qstate) + if gpListDB: + debug('[{}]: checking Group Policy DB', q_name_original) - if q_ip != 'Unknown': - isgpBypass = gpListDB.get(q_ip) - - if isgpBypass is not None: - bypass_dnsbl = True + q_ip = get_q_ip(qstate) + if q_ip != 'Unknown' and q_ip in gpListDB: + debug('[{}]: bypassing DNSBL due to Group Policy match for IP {}', q_name_original, q_ip) + bypass_dnsbl = True # Create list of Domain/CNAMES to be evaluated validate = [] # Skip 'in-addr.arpa' domains - if not q_name_original.endswith('.in-addr.arpa') and not bypass_dnsbl: + if not bypass_dnsbl and not q_name_original.endswith('.in-addr.arpa'): validate.append(q_name_original) # DNSBL CNAME Validation if pfb['python_cname'] and qstate.return_msg: + debug('[{}]: adding CNAMEs for validation', q_name_original) + r = qstate.return_msg.rep if r.an_numrrsets > 1: for i in range (0, r.an_numrrsets): @@ -1383,256 +1692,241 @@ def operate(id, event, qstate, qdata): if domain != 'Unknown': validate.append(domain) + debug('[{}]: validating domain names: {}', q_name_original, validate) + isCNAME = False - for val_counter, q_name in enumerate(validate, start=1): - if val_counter > 1: - isCNAME = True + block_result = None # the raw dictionary entry + block_match = None # the value that caused the match (e.g. TLD, www.domain, etc.) + block_name = None # the q_name that caused the match - # Determine if domain has been previously validated - if q_name not in excludeDB: - - q_type_str = qstate.qinfo.qtype_str - isFound = False - log_type = False - isInWhitelist = False - isInHsts = False - b_type = 'Python' - p_type = 'Python' - feed = 'Unknown' - group = 'Unknown' - - # print "v0: " + q_name - - # Determine if domain was previously DNSBL blocked - isDomainInDNSBL = dnsblDB.get(q_name) - if isDomainInDNSBL is None: - tld = get_tld(qstate) - - # Determine if domain is in DNSBL 'data|zone' database - if pfb['python_blocking']: - - # Determine if domain is in DNSBL 'data' database (log to dnsbl.log) - isDomainInData = False - if pfb['dataDB']: - isDomainInData = dataDB.get(q_name) - if isDomainInData is not None: - #print q_name + ' data: ' + str(isDomainInData) - isFound = True - log_type = isDomainInData['log'] - - # Collect Feed/Group - feedGroup = feedGroupIndexDB.get(isDomainInData['index']) - if feedGroup is not None: - feed = feedGroup['feed'] - group = feedGroup['group'] - - b_type = 'DNSBL' - b_eval = q_name - - # Determine if domain is in DNSBL 'zone' database (log to dnsbl.log) - if not isFound and pfb['zoneDB']: - q = q_name - for x in range(q.count('.') +1, 0, -1): - isDomainInZone = zoneDB.get(q) - if isDomainInZone is not None: - #print q_name + ' zone: ' + str(isDomainInZone) - isFound = True - log_type = isDomainInZone['log'] - - # Collect Feed/Group - feedGroup = feedGroupIndexDB.get(isDomainInZone['index']) - if feedGroup is not None: - feed = feedGroup['feed'] - group = feedGroup['group'] - - b_type = 'TLD' - b_eval = q - break - else: - q = q.split('.', 1) - q = q[-1] - - # Validate other python methods, if not blocked via DNSBL zone/data - if not isFound: - - # Allow only approved TLDs - if pfb['python_tld'] and tld != '' and q_name not in (pfb['dnsbl_ipv4'], '::' + pfb['dnsbl_ipv4']) and tld not in pfb['python_tlds']: - isFound = True - feed = 'TLD_Allow' - group = 'DNSBL_TLD_Allow' - - # Block IDN or 'xn--' Domains - if not isFound and pfb['python_idn'] and (q_name.startswith('xn--') or '.xn--' in q_name): - isFound = True - feed = 'IDN' - group = 'DNSBL_IDN' - - # Block via Regex - if not isFound and pfb['regexDB']: - isRegexMatch = pfb_regex_match(q_name) - #print q_name + ' regex: ' + str(isRegexMatch) - if isRegexMatch: - isFound = True - feed = isRegexMatch - group = 'DNSBL_Regex' - - if isFound: - b_eval = q_name - log_type = '1' - - # Validate domain in DNSBL Whitelist - if isFound and pfb['whiteDB']: - # print q_name + ' w' - - # Create list of Domain/CNAMES to be validated against Whitelist - whitelist_validate = [] - whitelist_validate.append(q_name) - - if isCNAME: - whitelist_validate.append(q_name_original) - - for w_q_name in whitelist_validate: - - # Determine full domain match - isDomainInWhitelist = whiteDB.get(w_q_name) - if isDomainInWhitelist is not None: - isInWhitelist = True - elif w_q_name.startswith('www.'): - isDomainInWhitelist = whiteDB.get(w_q_name[4:]) - if isDomainInWhitelist is not None: - isInWhitelist = True - - # Determine TLD segment matches - if not isInWhitelist: - q = w_q_name.split('.', 1) - q = q[-1] - for x in range(q.count('.') +1, 0, -1): - if x >= pfb['python_tld_seg']: - isDomainInWhitelist = whiteDB.get(q) - - # Determine if domain is a wildcard whitelist entry - if isDomainInWhitelist is not None and isDomainInWhitelist: - isInWhitelist = True - break - else: - q = q.split('.', 1) - q = q[-1] - - # Add domain to excludeDB to skip subsequent blacklist validation - if not isFound or isInWhitelist: - #print "Add to Pass: " + q_name - excludeDB.append(q_name) - - # Domain to be blocked and is not whitelisted - if isFound and not isInWhitelist: - - # Determine if domain is in HSTS database (Null blocking) - if pfb['hstsDB']: - #print q_name + ' hsts:' - - # Determine if TLD is in HSTS database - if tld in pfb['hsts_tlds']: - isInHsts = True - p_type = 'HSTS_TLD' - #print q_name + " HSTS" - else: - q = q_name - for x in range(q.count('.') +1, 0, -2): - # print q_name + ' validate: ' + q - isDomainInHsts = hstsDB.get(q) - if isDomainInHsts is not None: - #print q_name + " q: " + q + " HSTS blacklist" - isInHsts = True - if q_type_str in pfb['rr_types2']: - p_type = 'HSTS_' + q_type_str - else: - p_type = 'HSTS' - break - else: - q = q.split('.', 1) - q = q[-1] + whitelist_result = None # the raw dictionary entry + whitelist_match = None # the value that caused the match (e.g. TLD, www.domain, etc.) + whitelist_name = None # the q_name that caused the match - # print q_name + ' break' + is_cached_block = False + is_cached_exclusion = False - # Determine blocked IP type (DNSBL VIP vs Null Blocking) - if not isInHsts: - # A/AAAA RR_Types - if q_type_str in pfb['rr_types2']: - if log_type: - b_ip = pfb['dnsbl_ip'][q_type_str][log_type] - else: - b_ip = pfb['dnsbl_ip'][q_type_str]['0'] + tld = get_tld(qstate) - # All other RR_Types (use A RR_Type) - else: - if log_type: - b_ip = pfb['dnsbl_ip']['A'][log_type] - else: - b_ip = pfb['dnsbl_ip']['A']['0'] + debug('[{}]: got TLD: {}', q_name_original, tld) - # print q_name + ' ' + str(qstate.qinfo.qtype) + ' ' + q_type_str + for val_counter, q_name in enumerate(validate, start=1): - else: + q_block_result = None # the raw dictionary entry for this q_name + q_block_match = None # the value that caused the match for this q_name (e.g. TLD, www.domain, etc.) + + # Determine if domain was previously blocked + debug('[{}]: checking block cache for domain name: {}', q_name_original, q_name) + cached_block = block_cache.get(q_name) + if cached_block: + cached_block_entry = cached_block['entry'] + if cached_block_entry: + (q_block_result, q_block_match) = (cached_block_entry, cached_block_entry['b_eval']) + debug('[{}]: found domain name in block cache: {} (matching: {}): {}', q_name_original, q_name, q_block_match, q_block_result) + else: + debug('[{}]: found negative result for domain name in block cache: {}', q_name_original, q_name) + else: + (q_block_result, q_block_match) = block_lookup(q_name, tld) + + if q_block_result: + debug('[{}]: domain blocked: {} (matching: {}): {}', q_name_original, q_name, q_block_match, q_block_result) + (block_result, block_match, block_name, is_cached_block) = (q_block_result, q_block_match, q_name, cached_block is not None) + if val_counter > 1: + isCNAME = True + if block_result['b_type'] == 'Python': + # This is the type of blocking with the highest precedence, so skip all other checks + break + elif not cached_block: + # If there is a future match, this is eventually replaced by the actual match + debug('[{}]: adding negative result to block cache: {}', q_name_original, q_name) + block_cache[q_name] = {'entry': None} + + if block_result: + for val_counter, q_name in enumerate(validate, start=1): + + q_whitelist_result = None # the raw dictionary entry for this q_name + q_whitelist_match = None # the value that caused the match for this q_name (e.g. TLD, www.domain, etc.) + + # Determine if domain has been previously excluded + debug('[{}]: checking exclusion cache for domain name: {}', q_name_original, q_name) + cached_exclusion = exclusion_cache.get(q_name) + if cached_exclusion: + cached_exclusion_entry = cached_exclusion['entry'] + if cached_exclusion_entry: + (q_whitelist_result, q_whitelist_match) = cached_exclusion_entry + debug('[{}]: domain found in exclusion cache: {} (matching: {}): {}', q_name_original, q_name, q_whitelist_match, q_whitelist_result) + else: + debug('[{}]: found negative result for domain name in exclusion cache: {}', q_name_original, q_name) + else: + # Only user-defined exclusions ("whitelist") have priority over 'Python' + # Do not bother checking whitelist entries that do not take precedence + (q_whitelist_result, q_whitelist_match) = whitelist_lookup(q_name, user_only=(block_result['b_type'] == 'Python')) + + if q_whitelist_result: + debug('[{}]: domain excluded: {} (matching: {}): {}', q_name_original, q_name, q_whitelist_match, q_whitelist_result) + (whitelist_result, whitelist_match, whitelist_name, is_cached_exclusion) = (q_whitelist_result, q_whitelist_match, q_name, cached_exclusion is not None) + if whitelist_result['group'] == 'USER': + # This is the type of exclusion with the highest precedence, so skip all other checks + break + elif not cached_exclusion: + # If there is a future match, this is eventually replaced by the actual match + debug('[{}]: adding negative result to exclusion cache: {}', q_name_original, q_name) + exclusion_cache[q_name] = {'entry': None} + + # Exclusion has higher precendence than block, except for block of type Python (which means either user-defined block, regex block, etc.) + # User-defined exclusion ("whitelist") has the highest precedence, though. + # Whitelist (User) > Block (Python) > Exclusion (Lists) > Block (Lists) + # While the filtering above should have gotten rid of this, protect against "bad" cached results + # This is unlikely to be necessary, but the current logic is too messy to be 100% sure, so let's be defensive here + # TODO: remove double-check when this chain of checks gets refactored and caching restructured + if block_result and whitelist_result: + if block_result['b_type'] != 'Python' or whitelist_result['group'] == 'USER': + debug('[{}]: exclusion has priority over block entry. Block: {} (matching: {}): {}. Exclusion: {} (matching: {}): {}.', \ + q_name_original, block_name, block_match, block_result, whitelist_name, whitelist_match, whitelist_result) + + # Clear block result + (block_result, block_match, block_name) = (None, None, None) + + if not is_cached_exclusion: + + # Cache for all validated CNAMEs + for q_name in validate: + + # Skip positive entries already present - except for the whitelisted domain itself + if q_name != whitelist_name: + cached_exclusion = exclusion_cache.get(q_name) + if cached_exclusion and cached_exclusion['entry']: + continue + + debug('[{}]: adding entry to exclusion cache: {} (matching: {}): {}', q_name_original, q_name, whitelist_match, whitelist_result) + exclusion_cache[q_name] = {'entry': (whitelist_result, whitelist_match)} + else: + debug('[{}]: block has priority over exclusion entry. Block: {} (matching: {}): {}. Exclusion: {} (matching: {}): {}.', \ + q_name_original, block_name, block_match, block_result, whitelist_name, whitelist_match, whitelist_result) + + + if block_result and not is_cached_block: + + p_type = 'Python' + + # Determine if domain is in HSTS database (Null blocking) + if hstsDB: + debug('[{}]: checking HSTS for: {}', q_name_original, block_name) + + # Determine if TLD is in HSTS database + if tld in pfb['hsts_tlds']: + debug('[{}]: found TLD in HSTS: {}: {}', q_name_original, block_name, tld) + p_type = 'HSTS_TLD' + else: + q = q_name + for _ in range(q.count('.') + 1, 0, -2): + if q in hstsDB: + debug('[{}]: found HSTS blacklist entry: {}: {}', q_name_original, block_name, q) if q_type_str in pfb['rr_types2']: - b_ip = pfb['dnsbl_ip'][q_type_str]['0'] + p_type = 'HSTS_{}'.format(q_type_str) else: - b_ip = pfb['dnsbl_ip']['A']['0'] + p_type = 'HSTS' + break + else: + q = q.split('.', 1)[-1] + + (b_type, log_type, key, feed, group, b_eval) = \ + (block_result['b_type'], block_result['log'], block_result['key'], block_result['feed'], block_result['group'], block_match) + + # Cache for all validated CNAMEs + for q_name in validate: + # Skip positive entries already present - except for the blocked domain itself + if q_name != block_name: + cached_block = block_cache.get(q_name) + if cached_block and cached_block['entry']: + continue - # Add 'CNAME' suffix to Block type (CNAME Validation) - if isCNAME: - b_type = b_type + '_CNAME' - q_name = q_name_original + # Add domain to dict for get_details_dnsbl function + entry = {'q_name': q_name, 'b_type': b_type, 'p_type': p_type, 'key': key, 'log': log_type, 'feed': feed, 'group': group, 'b_eval': b_eval} + debug('[{}]: adding entry to block cache: {}: {}', q_name_original, q_name, entry) + block_cache[q_name] = {'entry': entry} - # Add q_type to b_type (Block type) - b_type = b_type + '_' + q_type_str + # Replace block result reference with cached reference + if q_name == block_name: + block_result = entry - # Skip subsequent DNSBL validation for domain, and add domain to dict for get_details_dnsbl function - dnsblDB[q_name] = {'qname': q_name, 'b_type': b_type, 'p_type': p_type, 'b_ip': b_ip, 'log': log_type, 'feed': feed, 'group': group, 'b_eval': b_eval } - # Skip subsequent DNSBL validation for original domain (CNAME validation), and add domain to dict for get_details_dnsbl function - if isCNAME and dnsblDB.get(q_name_original) is None: - dnsblDB[q_name_original] = {'qname': q_name_original, 'b_type': b_type, 'p_type': p_type, 'b_ip': b_ip, 'log': log_type, 'feed': feed, 'group': group, 'b_eval': b_eval } + # Add domain data to block cache for Reports tab + write_sqlite_async(3, '', [format_b_type(b_type, q_type_str, isCNAME), q_name, group, b_eval, feed]) - # Add domain data to DNSBL cache for Reports tab - write_sqlite(3, '', [b_type, q_name, group, b_eval, feed]) + # Use previously blocked domain details + if block_result: - # Use previously blocked domain details + (q_name, p_type, log_type, feed, group, b_eval) = \ + (block_result['q_name'], block_result['p_type'], block_result['log'], block_result['feed'], block_result['group'], block_result['b_eval']) + + # Determine blocked IP type (DNSBL VIP vs Null Blocking) + if p_type.startswith('HSTS'): + if q_type_str in pfb['rr_types2']: + b_ip = pfb['dnsbl_ip'][q_type_str]['0'] else: - b_ip = isDomainInDNSBL['b_ip'] - b_type = isDomainInDNSBL['b_type'] - isFound = True - # print "v: " + q_name + b_ip = pfb['dnsbl_ip']['A']['0'] + else: + # A/AAAA RR_Types + if q_type_str in pfb['rr_types2']: + if log_type: + b_ip = pfb['dnsbl_ip'][q_type_str][log_type] + else: + b_ip = pfb['dnsbl_ip'][q_type_str]['0'] - if isFound and not isInWhitelist: + # All other RR_Types (use A RR_Type) + else: + if log_type: + b_ip = pfb['dnsbl_ip']['A'][log_type] + else: + b_ip = pfb['dnsbl_ip']['A']['0'] - # Default RR_TYPE ANY -> A - if q_type == RR_TYPE_ANY: - q_type = RR_TYPE_A - q_type_str = 'A' + # Default RR_TYPE ANY -> A + if q_type == RR_TYPE_ANY: + q_type = RR_TYPE_A + q_type_str = 'A' - # print q_name + ' Blocked ' + b_ip + ' ' + q_type_str + debug('[{}]: blocked: {}, b_ip={}, q_type={}', q_name_original, q_name, b_ip, q_type_str) - # Create FQDN Reply Message - msg = DNSMessage(qstate.qinfo.qname_str, q_type, RR_CLASS_IN, PKT_QR | PKT_RA) - msg.answer.append("{}. 60 IN {} {}" .format(q_name, q_type_str, b_ip)) + # Create FQDN Reply Message + answer = "{}. 60 IN {} {}".format(q_name, q_type_str, b_ip) + debug('[{}]: answer: {}', q_name_original, answer) + msg = DNSMessage(qstate.qinfo.qname_str, q_type, RR_CLASS_IN, PKT_QR | PKT_RA) + msg.answer.append(answer) - msg.set_return_msg(qstate) - if msg is None or not msg.set_return_msg(qstate): - qstate.ext_state[id] = MODULE_ERROR - return True + if not msg.set_return_msg(qstate): + qstate.ext_state[id] = MODULE_ERROR + return True - # Log entry - kwargs = {'pfb_addr': q_ip} - if qstate.return_msg: - get_details_dnsbl('dnsbl', None, qstate, qstate.return_msg.rep, kwargs) - else: - get_details_dnsbl('dnsbl', None, qstate, None, kwargs) + # Log entry + get_details_dnsbl(q_name_original, q_ip, q_type_str, isCNAME) - qstate.return_rcode = RCODE_NOERROR - qstate.return_msg.rep.security = 2 - qstate.ext_state[id] = MODULE_FINISHED - return True + qstate.return_rcode = RCODE_NOERROR + qstate.return_msg.rep.security = 2 + qstate.ext_state[id] = MODULE_FINISHED + return True + + # Cache negative block response after analysing precedence, etc. + # This is a workaround for caching negative block matches when caused by a positive exclusion match + # This works, but it is honestly horrible and we should refactor this ASAP + # TODO: refactor this entire chain to make caching more straightforward, maybe use lru_cache or similar strategy + else: + # Cache for all validated CNAMEs + for q_name in validate: + + # Check existing entries + cached_block = block_cache.get(q_name) + + # Skip positive entries already present + if cached_block and cached_block['entry']: + continue + elif not cached_block: + debug('[{}]: adding negative result to block cache: {}', q_name_original, q_name) + block_cache[q_name] = {'entry': None} + + + debug('[{}]: passed through', q_name_original) if (event == MODULE_EVENT_NEW) or (event == MODULE_EVENT_PASS): qstate.ext_state[id] = MODULE_WAIT_MODULE diff --git a/net/pfSense-pkg-pfBlockerNG-devel/files/usr/local/pkg/pfblockerng/pfblockerng.inc b/net/pfSense-pkg-pfBlockerNG-devel/files/usr/local/pkg/pfblockerng/pfblockerng.inc index f489d9369db0..df3dc385c5f2 100644 --- a/net/pfSense-pkg-pfBlockerNG-devel/files/usr/local/pkg/pfblockerng/pfblockerng.inc +++ b/net/pfSense-pkg-pfBlockerNG-devel/files/usr/local/pkg/pfblockerng/pfblockerng.inc @@ -750,6 +750,7 @@ function pfb_global() { $pfb['dnsbl_python'] = $pfb['dnsblconfig']['pfb_python']; // DNSBL Resolver python integration $pfb['dnsbl_mode'] = $pfb['dnsblconfig']['dnsbl_mode']; // DNSBL Mode (Unbound/python mode) + $pfb['dnsbl_py_debug'] = $pfb['dnsblconfig']['pfb_py_debug']; // DNSBL Resolver python DNS Reply logging $pfb['dnsbl_py_reply'] = $pfb['dnsblconfig']['pfb_py_reply']; // DNSBL Resolver python DNS Reply logging $pfb['dnsbl_py_block'] = $pfb['dnsblconfig']['pfb_py_block']; // DNSBL Resolver python blocking mode $pfb['dnsbl_hsts'] = $pfb['dnsblconfig']['pfb_hsts']; // DNSBL Resolver python block HSTS via Null Block mode @@ -1228,8 +1229,6 @@ function pfb_logger($log, $logtype) { case 6: @file_put_contents("{$pfb['errlog']}", "{$elog}", FILE_APPEND); break; - default: - break; } } @@ -2197,7 +2196,7 @@ function pfb_unbound_dnsbl($mode) { $log = "\nAdding DNSBL Unbound mode (Resolver adv. setting)"; } - // To be removed when SafeSearch CNAME python mode has been fixed + // TODO: to be removed when SafeSearch CNAME python mode has been fixed elseif ($pfb['safesearch_enable'] !== 'Disable') { $pfbupdate = TRUE; $unbound_custom .= "\n{$unbound_include}"; @@ -2218,7 +2217,7 @@ function pfb_unbound_dnsbl($mode) { unset($custom[$key]); } - // To be removed when SafeSearch CNAME python mode has been fixed + // TODO: to be removed when SafeSearch CNAME python mode has been fixed elseif ($pfb['safesearch_enable'] !== 'Disable') { // } @@ -2242,7 +2241,7 @@ function pfb_unbound_dnsbl($mode) { $log = "\nAdding DNSBL Unbound mode (Resolver adv. setting)"; } - // To be removed when SafeSearch CNAME python mode has been fixed + // TODO: to be removed when SafeSearch CNAME python mode has been fixed elseif ($pfb['safesearch_enable'] !== 'Disable') { $pfbupdate = TRUE; $unbound_custom = "{$unbound_include}"; @@ -2295,7 +2294,7 @@ function pfb_unbound_dnsbl($mode) { $unbound = TRUE; } - // To be removed when SafeSearch CNAME python mode has been fixed + // TODO: to be removed when SafeSearch CNAME python mode has been fixed elseif ($pfb['safesearch_enable'] !== 'Disable') { $unbound = TRUE; } @@ -2360,7 +2359,7 @@ function pfb_unbound_dnsbl($mode) { $conf[] = "\nserver:include: {$pfb['dnsbl_file']}.*conf\n"; } - // To be removed when SafeSearch CNAME python mode has been fixed + // TODO: to be removed when SafeSearch CNAME python mode has been fixed elseif ($pfb['safesearch_enable'] !== 'Disable') { $u_update = TRUE; $u_msg .= " Added DNSBL SafeSearch CNAME mode\n"; @@ -2413,29 +2412,45 @@ function pfb_unbound_python_whitelist($mode='') { global $pfb; pfb_global(); - $dnsbl_whitelist = ''; + $dnsbl_whitelist = array(); $dnsbl_white = pfbng_text_area_decode($pfb['dnsblconfig']['suppression'], TRUE, FALSE, TRUE); if (!empty($dnsbl_white)) { foreach ($dnsbl_white as $key => $line) { + $line = trim($line); if (!empty($line)) { if (substr($line, 0, 4) == 'www.') { $line = substr($line, 4); } - // Minimize the python whitelist queries to the smallest tld segment count - if (!isset($tld_segments)) { - $tld_segments = (substr_count($line, '.') +1); - } - $tld_segments = @min((array((substr_count($line, '.') +1), $tld_segments) ?: 1)); - + // 0 = literal + // 1 = wildcard if (substr($line, 0, 1) == '.') { $line = ltrim($line, '.'); - $dnsbl_whitelist .= "{$line},1\n"; + $dnsbl_whitelist[] = ",{$line},,1,DNSBL_WHITELIST,USER,1\n"; } else { - $dnsbl_whitelist .= "{$line},0\n"; + $dnsbl_whitelist[] = ",{$line},,1,DNSBL_WHITELIST,USER,0\n"; + } + } + } + } + + // TODO: move exclusions to their own file + $exclusions_files = glob("{$pfb['dnsdir']}/*.exclusions"); + foreach($exclusions_files as $file) { + if (($handle = @fopen($file, 'r')) !== FALSE) { + while (($line = @fgets($handle)) !== FALSE) { + $line = trim($line); + if (!empty($line)) { + $alias = substr(basename($file), 0, -11); + $csvline = str_getcsv($line); + $type = $csvline[1]; // 0 = literal, 1 = wildcard, 2 = regex + $dnsbl_whitelist[] = ",{$csvline[0]},,1,DNSBL_EXCLUSION,{$alias},{$type}\n"; } } } + if ($handle) { + @fclose($handle); + } } if ($mode == 'alerts') { @@ -2493,7 +2508,7 @@ function pfb_unbound_python($mode) { $dnsbl_whitelist = pfb_unbound_python_whitelist(); // Compare previous DNSBL Whitelist to new Whitelist - $pfb_py_whitelist_ex = @file_get_contents($pfb['unbound_py_wh']); + $pfb_py_whitelist_ex = @file($pfb['unbound_py_wh'], FILE_SKIP_EMPTY_LINES); if ($dnsbl_whitelist !== $pfb_py_whitelist_ex) { $pfbpython = TRUE; @file_put_contents($pfb['unbound_py_wh'], $dnsbl_whitelist, LOCK_EX); @@ -2506,8 +2521,10 @@ function pfb_unbound_python($mode) { $pfbpython = TRUE; } - if (!isset($tld_segments)) { - $tld_segments = '1'; + + $python_debug = 'off'; + if ($pfb['dnsbl_py_debug'] == 'on') { + $python_debug = 'on'; } $python_ipv6 = 'off'; @@ -2585,12 +2602,12 @@ function pfb_unbound_python($mode) { [MAIN] dnsbl_ipv4 = {$pfb['dnsbl_vip']} python_enable = {$python_enable} +python_debug = {$python_debug} python_ipv6 = {$python_ipv6} python_reply = {$python_reply} python_blocking = {$python_blocking} python_hsts = {$python_hsts} python_idn = {$python_idn} -python_tld_seg = {$tld_segments} python_tld = {$python_tld} python_tlds = {$python_tlds} python_nolog = {$python_nolog} @@ -2912,7 +2929,6 @@ function tld_analysis() { if ($pfb['dnsbl_py_blacklist']) { $tld_cnt++; $pfb_found = TRUE; - $tld_segments = @max((array((substr_count($tld, '.') +1), $tld_segments) ?: 1)); pfb_logger("{$tld}|", 1); @fwrite($p_zone, ",{$tld},,1,DNSBL_TLD,DNSBL_TLD\n"); @@ -3079,8 +3095,10 @@ function tld_analysis() { $dparts = explode('.', $domain); $dcnt = count($dparts); $tld = end($dparts); - $d_info = $eparts[2]; // Logging Type/Header/Alias group details + $d_info = $eparts[2]; // ",logging_type,header,alias_group_details(,entry_type)?" (0 = literal, 1 = wildcard, 2 = regex) $dfound = ''; + $tparts = explode(',', $d_info); + $t_type = count($tparts) > 4 ? intval(end($tparts)) : 0; } // DNSBL Unbound blocking mode @@ -3151,10 +3169,17 @@ function tld_analysis() { // DNSBL python blocking mode if ($pfb['dnsbl_py_blacklist']) { - @fwrite($p_zone, ",{$dfound},{$d_info}"); + if ($t_type === 0) { + // Only type 0 (literal) may be treated as a "zone" + @fwrite($p_zone, ",{$dfound},{$d_info}"); - // TLD remove files - See below for description - @fwrite($p_tsp, ".{$dfound},,\n"); + // TLD remove files - See below for description + @fwrite($p_tsp, ".{$dfound},,\n"); + } else { + // Type 1 (wildcard) is already a "zone" + // Type 2 (regex) is a special case + @fwrite($p_data, ",{$dfound},{$d_info}"); + } } else { $ipv6_dnsbl = ''; @@ -3423,6 +3448,7 @@ function pfb_unbound_clear_work_files() { "{$pfb['dnsbldir']}/unbound.bk", "{$pfb['dnsbldir']}/unbound.tmp", "{$pfb['dnsbl_file']}.bk", + "{$pfb['dnsbl_file']}.ex", "{$pfb['dnsbl_file']}.tsp", "{$pfb['dnsbl_file']}.sync", "/tmp/dnsbl_remove*", @@ -3441,9 +3467,9 @@ function pfb_update_unbound($mode, $pfbupdate, $pfbpython) { global $g, $pfb; if ($mode == 'enabled') { - $ext = '.bk'; + $ext = array( '.bk', '.ex' ); } else { - $ext = '.*'; // Remove all DNSBL Unbound files + $ext = array( '.*' ); // Remove all DNSBL Unbound files } // Execute TLD analysis, if configured @@ -3477,7 +3503,9 @@ function pfb_update_unbound($mode, $pfbupdate, $pfbpython) { // When pfBlockerNG is disabled and 'keep blocklists' is disabled. if ($pfb['enable'] == '' && $pfb['keep'] == '' && !$pfb['install']) { - unlink_if_exists("{$pfb['dnsbl_file']}{$ext}"); + foreach ($ext as $e) { + unlink_if_exists("{$pfb['dnsbl_file']}{$e}"); + } } // Disable DNSBL @@ -3580,6 +3608,7 @@ function pfb_update_unbound($mode, $pfbupdate, $pfbpython) { // Python blocking mode enabled else { + // TODO: improve this logic once exclusions are moved to their own file $tld_cnt = @file_get_contents($pfb['unbound_py_count']); $dnsbl_cnt = $dnsbl_cnt - $tld_cnt; $final_cnt = exec("/usr/bin/find {$pfb['unbound_py_data']} {$pfb['unbound_py_zone']} -type f 2>/dev/null | xargs cat | {$pfb['grep']} -c ^ 2>&1"); @@ -3653,6 +3682,7 @@ function pfblockerng_top1m() { } // Collect Domain TLD + $line = strtolower($line); $csvline = str_getcsv($line); $tld = substr($csvline[1], strrpos($csvline[1], '.') + 1); @@ -5307,7 +5337,7 @@ function pfb_collect_localhosts() { // Collect static DHCP hostnames/IPs foreach (config_get_path('dhcpd', []) as $dhcp) { - if (is_array($dhcp['staticmap'])) { + if (isset($dhcp['staticmap']) && is_array($dhcp['staticmap'])) { foreach ($dhcp['staticmap'] as $smap) { $local_hosts[$smap['ipaddr']] = strtolower("{$smap['hostname']}"); } @@ -5316,7 +5346,7 @@ function pfb_collect_localhosts() { // Collect static DHCPv6 hostnames/IPs foreach (config_get_path('dhcpdv6', []) as $dhcpv6) { - if (is_array($dhcpv6['staticmap'])) { + if (isset($dhcpv6['staticmap']) && is_array($dhcpv6['staticmap'])) { foreach ($dhcpv6['staticmap'] as $smap) { $local_hosts[$smap['ipaddrv6']] = strtolower("{$smap['hostname']}"); } @@ -6966,6 +6996,30 @@ function pfb_clear_contents() { } +function convert_idn($line) { + global $pfb; + + // Convert IDN (Unicode domains) to ASCII (punycode) + if (!ctype_print($line)) { + + // Convert encodings to UTF-8 + $line = mb_convert_encoding($line, 'UTF-8', + mb_detect_encoding($line, 'UTF-8, ASCII, ISO-8859-1')); + + $log = "\n IDN converted: [ {$line} ]\t"; + $line = idn_to_ascii($line); + if (!empty($line)) { + pfb_logger("{$log} [ {$line} ]", 1); + return $line; + } else { + return ''; + } + } + + return $line; +} + + // Main pfBlockerNG function function sync_package_pfblockerng($cron='') { global $g, $config, $pfb, $pfbarr; @@ -7602,6 +7656,11 @@ function sync_package_pfblockerng($cron='') { } if ($pfb['enable'] == 'on' && $pfb['dnsbl'] == 'on' && !$pfb['save'] && !$dnsbl_error) { + + $postprocess = FALSE; // Marker to determine whether postprocessing is needed (i.e. any files add anything to the whitelist) + $postprocess_dnsbl = array(); // Files that need postprocessing due to whitelisting + $alias_postprocessing_data = array(); // Data used for processing DNSBL after the loop over the lists + if ((config_get_path('installedpackages/pfblockerngdnsbl/config') != null) || (config_get_path('installedpackages/pfblockerngblacklist/blacklist_enable') == 'Enable')) { @@ -7759,7 +7818,8 @@ function sync_package_pfblockerng($cron='') { $line = str_replace('"', '', strstr($host, '"', False)); $host_ip = trim(str_replace('A ', '', strstr($line, 'A ', FALSE))); - $domain = strstr($line, ' ', TRUE); + $domain = strtolower(strstr($line, ' ', TRUE)); + if (substr($domain, 0, 4) == 'www.') { $domain = substr($domain, 4); } @@ -7885,6 +7945,7 @@ function sync_package_pfblockerng($cron='') { // Python mode create a CSV list of SafeSearch hosts if ($pfb['dnsbl_py_blacklist'] && !empty($safesearch_hosts)) { foreach ($safesearch_hosts as $host => $data) { + $host = strtolower($host); if (isset($data['nxdomain'])) { $line = "{$host},nxdomain,nxdomain\n"; } else { @@ -7947,6 +8008,8 @@ function sync_package_pfblockerng($cron='') { $wildcard = TRUE; } + $line = strtolower($line); + // Remove 'www.' prefix if (substr($line, 0, 4) == 'www.') { $line = substr($line, 4); @@ -8330,39 +8393,105 @@ function sync_package_pfblockerng($cron='') { $liteparser = TRUE; } - // Variables for Easylists - $easylist = $validate_header = FALSE; + // Variables for Easylist-style lists $e_replace = array( '||', '.^', '^' ); + // Variables for parsing Easylist-style lists + $easylist = FALSE; + $run_once = $csv_parser = FALSE; $csv_type = ''; $ipcount = $ip_cnt = 0; + // First check if this is an EasyList + if (($fhandle = @fopen("{$file_dwn}.orig", 'r')) !== FALSE) { + + // Variables for checking whether it's an Easylist-style list + $validated_header = FALSE; + + while (($line = @fgets($fhandle)) !== FALSE) { + + $line = trim($line); + + // Validate EasyList/AdBlock/uBlock/ADGuard Feeds + if (!$validated_header) { + if (strpos($line, '[Adblock Plus ') !== FALSE || + strpos($line, '[Adblock Plus]') !== FALSE || + strpos($line, '[uBlock Origin') !== FALSE || + strpos($line, '! Title: AdGuard') !== FALSE) { + $easylist = $validated_header = TRUE; + break; + } + // Skip exclamation comment lines and other headers + // Needs to be done separately because of AdGuard + elseif (substr($line, 0, 1) === '!' || + substr($line, 0, 1) === '[') { + continue; + } + // Some real content found, so stop checking for the header + else { + $validated_header = TRUE; + } + } + + // Skip exclamation comment lines and headers + if (substr($line, 0, 1) === '!' || + substr($line, 0, 1) === '[') { + continue; + } + + // Checks for EasyLists without headers + if (substr($line, 0, 2) === '||' || + substr($line, 0, 4) === '@@||') { + $easylist = TRUE; + break; + } + + // Probably EasyList-style domain + if (substr($line, 0, 1) === '|' || + substr($line, 0, 1) === '@') { + + // EasyLists are the only syntax that accepts ^ + if (strpos($line, '^') !== FALSE) { + $easylist = TRUE; + break; + } + + // EasyList exclusion + if (substr($line, 0, 2) === '@@') { + $easylist = TRUE; + break; + } + + // Remove all pipes + $line = trim($line, '|'); + $line = trim(str_replace($e_replace, '', $line)); + + // Skip empty lines + if (empty($line)) { + continue; + } + + // This is very likely an EasyList + $easylist = TRUE; + break; + } + } + } + if ($fhandle) { + @fclose($fhandle); + } + // Parse downloaded file for Domain names if (($fhandle = @fopen("{$file_dwn}.orig", 'r')) !== FALSE) { - if (($dhandle = @fopen("{$pfbfolder}/{$header}.bk", 'w')) !== FALSE) { + if (($dhandle = @fopen("{$pfbfolder}/{$header}.bk", 'w')) !== FALSE && + ($ehandle = @fopen("{$pfbfolder}/{$header}.ex", 'w')) !== FALSE) { + while (($line = @fgets($fhandle)) !== FALSE) { // Collect original line $oline = $line; - // Validate EasyList/AdBlock/uBlock/ADGuard Feeds - if (!$validate_header) { - if (strpos($line, '[Adblock Plus ') !== FALSE || - strpos($line, '[Adblock Plus]') !== FALSE || - strpos($line, '[uBlock Origin') !== FALSE || - strpos($line, '! Title: AdGuard') !== FALSE) { - $easylist = $validate_header = TRUE; - continue; - } - elseif (substr($line, 0, 1) === '!') { - continue; - } - else { - $validate_header = TRUE; - } - } - // Remove any '^M' characters if (strpos($line, "\r") !== FALSE) { $line = rtrim($line, "\x00..\x1F"); @@ -8370,18 +8499,138 @@ function sync_package_pfblockerng($cron='') { // Remove invalid charaters $line = trim($line, " \t\n\r\0\x0B\xC2\xA0"); + + // Trim all whitespace characters + $line = trim($line); + + if (empty($line)) { + continue; + } + + // Skip exclamations comment lines and headers + if (substr($line, 0, 1) === '!' || + substr($line, 0, 1) === '[') { + continue; + } + // Make it case-insensitive + $line = strtolower($line); + + // Check for EasyList-style domain blocks and exclusions if ($easylist) { - if (substr($line, 0, 2) !== '||' || - substr($line, -1) !== '^' || - strpos($line, '$') !== FALSE || - strpos($line, '*') !== FALSE || - strpos($line, '/') !== FALSE) { + + // Invalid for either blocking or excluding -- skip it + if (strpos($line, '$') !== FALSE || + strpos($line, '/') !== FALSE || + strpos($line, '?') !== FALSE || + strpos($line, '~') !== FALSE || + strpos($line, '=') !== FALSE || + strpos($line, '&') !== FALSE || + strpos($line, ';') !== FALSE || + strpos($line, '#') !== FALSE || + strpos($line, ',') !== FALSE || + strpos($line, ':') !== FALSE) { continue; } - $lite = TRUE; - $line = str_replace($e_replace, '', $line); + $startpos = 0; + $endpos = -1; + $is_easylist_exclusion = FALSE; + $is_regex = FALSE; + + // Exclusion + if (substr($line, 0, 2) === '@@') { + // Skip for Unbound mode -- not compatible with exclusions + if(!$pfb['dnsbl_py_blacklist']) { + continue; + } + + $startpos = 2; + $is_easylist_exclusion = TRUE; + } + + // Not sure if within spec, but some domains might end with a pipe + if (substr($line, -1) === '|') { + $endpos = -2; + } + + // Not a domain name entry -- skip it + if (substr($line, $startpos, 2) !== '||' || + substr($line, $endpos, 1) !== '^') { + continue; + } + + $line = trim(str_replace($e_replace, '', substr($line, $startpos, $endpos))); + + // Skip empty lines + if (empty($line)) { + continue; + } + + // Regex + if (strpos($line, '*') !== FALSE) { + // Skip for Unbound mode -- not compatible with regular expressions + if (!$pfb['dnsbl_py_blacklist']) { + continue; + } + + $is_regex = TRUE; + } + + // Convert IDN (Unicode domains) to ASCII (punycode) + $line = convert_idn($line); + if (empty($line)) { + // Record failed parsed line + pfb_parsed_fail($header, '', $oline, $pfb['dnsbl_parse_err']); + continue; + } + + // Special handling for whitelist entries + if ($is_easylist_exclusion) { + + if ($is_regex) { + // 2 = regular expression flag + $exclusion_data = "{$line},2\n"; + } + else { + // Remove leading/trailing dots + $line = trim(trim($line), '.'); + + // Skip empty lines + if (empty($line)) { + continue; + } + + // Domain Validation + if (empty(pfb_filter($line, PFB_FILTER_DOMAIN, 'DNSBL_Download'))) { + + // Log invalid Domains + if (!isset($dnsbl_skip[$line])) { + pfb_parsed_fail($header, $line, $oline, $pfb['dnsbl_parse_err']); + } + continue; + } + + // 1 = wildcard + $exclusion_data = "{$line},1\n"; + } + + + $postprocess = TRUE; + @fwrite($ehandle, $exclusion_data); + continue; + } + + // Convert to regular expression for usage with Python + // Otherwise, let the normal processing continue + if ($is_regex) { + // 2 = regular expression flag + $domain_data = ",{$line},,{$logging_type},{$header},{$alias},2\n"; + + $postprocess = TRUE; + @fwrite($dhandle, $domain_data); + continue; + } } else { // If 'tab' character found, replace with whitespace @@ -8560,9 +8809,19 @@ function sync_package_pfblockerng($cron='') { continue; } } + $line = trim($line); - if (!$easylist) { + if (empty($line)) { + continue; + } + + if ($easylist) { + + // Only lite parsing required for EasyLists + $lite = TRUE; + } + else { // Typical Host Feed format - Remove characters before space if (!$rev_format && strpos($line, ' ') !== FALSE) { @@ -8624,8 +8883,13 @@ function sync_package_pfblockerng($cron='') { if (strpos($line, ':') !== FALSE) { $line = preg_replace("/:[0-9]{1,5}$/", '', $line); } + + $line = trim($line); + + if (empty($line)) { + continue; + } } - $line = trim($line); // Collect any IPs found in domain feed if (is_ipaddrv4($line)) { @@ -8641,27 +8905,20 @@ function sync_package_pfblockerng($cron='') { } // Convert IDN (Unicode domains) to ASCII (punycode) - if (!ctype_print($line)) { - - // Convert encodings to UTF-8 - $line = mb_convert_encoding($line, 'UTF-8', - mb_detect_encoding($line, 'UTF-8, ASCII, ISO-8859-1')); - - $log = "\n IDN converted: [ {$line} ]\t"; - $line = idn_to_ascii($line); - if (!empty($line)) { - pfb_logger("{$log} [ {$line} ]", 1); - } - else { - // Record failed parsed line - pfb_parsed_fail($header, '', $oline, $pfb['dnsbl_parse_err']); - continue; - } + $line = convert_idn($line); + if (empty($line)) { + // Record failed parsed line + pfb_parsed_fail($header, '', $oline, $pfb['dnsbl_parse_err']); + continue; } // Remove leading/trailing dots $line = trim(trim($line), '.'); + if (empty($line)) { + continue; + } + // Domain Validation if (empty(pfb_filter($line, PFB_FILTER_DOMAIN, 'DNSBL_Download'))) { @@ -8682,17 +8939,18 @@ function sync_package_pfblockerng($cron='') { // For DNSBL python, save domain and Logging type if ($pfb['dnsbl_py_blacklist']) { - $domain_data = ',' . strtolower($line) - . ",,{$logging_type},{$header},{$alias}\n"; + // 0 = literal flag + // 1 = wildcard + $wildcard = $easylist ? '1' : '0'; + $postprocess |= $easylist; + $domain_data = ",{$line},,{$logging_type},{$header},{$alias},{$wildcard}\n"; } else { $ipv6_dnsbl = "\n"; if ($pfb['dnsbl_v6'] == 'on' && !$pfb['dnsbl_tld']) { - $ipv6_dnsbl = " local-data: \"" . strtolower($line) - . " 60 IN AAAA {$sinkhole_type6}\"\n"; + $ipv6_dnsbl = " local-data: \"{$line} 60 IN AAAA {$sinkhole_type6}\"\n"; } - $domain_data = "local-data: \"" . strtolower($line) - . " 60 IN A {$sinkhole_type4}\"{$ipv6_dnsbl}"; + $domain_data = "local-data: \"{$line} 60 IN A {$sinkhole_type4}\"{$ipv6_dnsbl}"; } @fwrite($dhandle, $domain_data); } @@ -8700,6 +8958,9 @@ function sync_package_pfblockerng($cron='') { if ($dhandle) { @fclose($dhandle); } + if ($ehandle) { + @fclose($ehandle); + } } if ($fhandle) { @fclose($fhandle); @@ -8719,6 +8980,11 @@ function sync_package_pfblockerng($cron='') { unlink_if_exists("{$pfbfolder}/{$header}_v4.ip"); } + // Add to the list for postprocessing + if (!empty($domain_data)) { + $postprocess_dnsbl[] = $header_esc; + } + // Validate feed with Unbound-checkconf if (!empty($domain_data)) { $conf = "server:\n"; @@ -8739,15 +9005,13 @@ function sync_package_pfblockerng($cron='') { $pfb_alexa = 'on'; } - // DNSBL python requires a different deduplication process - $dup_mode = ''; + // Call script to process DNSBL 'De-Duplication / Whitelisting / TOP1M Whitelisting' if ($pfb['dnsbl_py_blacklist']) { - $dup_mode = 'python'; + exec("{$pfb['script']} dnsbl_scrub_python {$header_esc} {$pfb_alexa} unused {$elog}"); + } else { + exec("{$pfb['script']} dnsbl_scrub_unbound {$header_esc} {$pfb_alexa} unused {$elog}"); } - // Call script to process DNSBL 'De-Duplication / Whitelisting / TOP1M Whitelisting' - exec("{$pfb['script']} dnsbl_scrub {$header_esc} {$pfb_alexa} {$dup_mode} {$elog}"); - if ($ip_cnt > 0) { pfb_logger(" IPv4 count={$ip_cnt}\n", 1); } @@ -8760,6 +9024,23 @@ function sync_package_pfblockerng($cron='') { } unlink_if_exists("{$pfb['dnsbldir']}/check.conf"); } + elseif ($pfb['dnsbl_py_blacklist'] && !empty($exclusion_data)) { + pfb_logger(".\n", 1); + + // Bypass TOP1M whitelist, if user configured + $pfb_alexa = 'Disabled'; + if ($pfb['dnsbl_alexa'] == 'on' && + $list['filter_alexa'] == 'on' && + file_exists("{$pfb['dbdir']}/pfbalexawhitelist.txt")) { + $pfb_alexa = 'on'; + } + + // Call script to process DNSBL 'De-Duplication / TOP1M Whitelisting' + exec("{$pfb['script']} dnsbl_scrub_python {$header_esc} {$pfb_alexa} unused {$elog}"); + + unlink_if_exists("{$pfbfolder}/{$header}.bk"); + $result = array('unbound-checkconf: no errors'); + } else { $log = "\n No Domains Found! Ensure only domain based Feeds are used for DNSBL!\n"; pfb_logger("{$log}", 1); @@ -8769,12 +9050,14 @@ function sync_package_pfblockerng($cron='') { @copy("{$file_dwn}.orig", "/tmp/Error_{$header}_{$ts}.orig"); unlink_if_exists("{$pfbfolder}/{$header}.bk"); + unlink_if_exists("{$pfbfolder}/{$header}.ex"); $result = array('unbound-checkconf: no errors'); } // If parse error found, use previously downloaded file if available if (!$pfb['dnsbl_py_blacklist'] && !preg_grep("/unbound-checkconf: no errors/", $result)) { unlink_if_exists("{$pfbfolder}/{$header}.bk"); + unlink_if_exists("{$pfbfolder}/{$header}.ex"); pfb_logger("\n DNSBL FAIL - Skipped! Use previous data, if found:\n", 2); $log = htmlspecialchars(implode("\n", $result)); @@ -8794,39 +9077,98 @@ function sync_package_pfblockerng($cron='') { @rename("{$pfbfolder}/{$header}.bk", "{$pfbfolder}/{$header}.txt"); } + // Rename processed whitelist file to final location + if (file_exists("{$pfbfolder}/{$header}.ex")) { + @rename("{$pfbfolder}/{$header}.ex", "{$pfbfolder}/{$header}.exclusions"); + } + // Create empty placeholder file if (!file_exists("{$pfbfolder}/{$header}.txt")) { touch("{$pfbfolder}/{$header}.txt"); } - $list_cnt = exec("{$pfb['grep']} -c ^ " . escapeshellarg("{$pfbfolder}/{$header}.txt")); - $alias_cnt = $alias_cnt + $list_cnt; + // Create empty placeholder whitelist file + if (!file_exists("{$pfbfolder}/{$header}.exclusions")) { + touch("{$pfbfolder}/{$header}.exclusions"); + } + + // Save DNSBL feed info for postprocessing and alias information + $alias_postprocessing_data[] = array( + 'header' => $header, + 'alias' => $alias, + 'lists_dnsbl_current' => $lists_dnsbl_current, + 'pfbfolder' => $pfbfolder + ); // Remove update file indicator unlink_if_exists("{$pfbfolder}/{$header}.update"); } } } - - // If changes found update DNSBL alias and TLD disabled, call function to update DNSBL alias - if ($pfb['aliasupdate'] && !$pfb['dnsbl_tld']) { - dnsbl_alias_update('update', $alias, $pfbfolder, $lists_dnsbl_current, $alias_cnt); - } - - // Collect Alias/Feeds for post TLD function - if ($pfb['dnsbl_tld']) { - if (!is_array($pfb['tld_update'][$alias])) { - $pfb['tld_update'][$alias] = array(); - } - $pfb['tld_update'][$alias]['feeds'] = $lists_dnsbl_current; - $pfb['tld_update'][$alias]['count'] = $alias_cnt; - } } else { dnsbl_alias_update('disabled', $alias, '', '', ''); } } + } + + if ($pfb['dnsbl_py_blacklist'] && $postprocess) { + $log = "\n===[ DNSBL Postprocess ]============================================\n"; + pfb_logger("{$log}", 1); + + // Consolidate all exclusions + exec("{$pfb['script']} dnsbl_py_assemble_exclusions_file unused unused unused {$elog}"); + + exec("{$pfb['script']} dnsbl_py_assemble_redundants_file unused unused unused {$elog}"); + + // Process Whitelists + foreach ($postprocess_dnsbl as $header_esc) { + + $log = "\n[ ${header_esc} ]${logtab} Processing downloaded whitelist entries\n"; + pfb_logger("{$log}", 1); + + // Ignore TOP1M whitelist at this stage + $pfb_alexa = 'Disabled'; + + // Call script to process DNSBL Whitelisting + exec("{$pfb['script']} dnsbl_py_remove_excluded {$header_esc} {$pfb_alexa} unused {$elog}"); + + $log = "\n[ ${header_esc} ]${logtab} Removing redundant DNSBL entries\n"; + pfb_logger("{$log}", 1); + + exec("{$pfb['script']} dnsbl_py_remove_redundant {$header_esc} unused unused {$elog}"); + } + + exec("{$pfb['script']} dnsbl_py_cleanup_exclusions_file unused unused unused {$elog}"); + + exec("{$pfb['script']} dnsbl_py_cleanup_redundants_file unused unused unused {$elog}"); + } + $alias_cnt = 0; + // Process alias information + foreach ($alias_postprocessing_data as $item) { + + $header = $item['header']; + $alias = $item['alias']; + $lists_dnsbl_current = $item['lists_dnsbl_current']; + $pfbfolder = $item['pfbfolder']; + + $list_cnt = exec("{$pfb['grep']} -c ^ " . escapeshellarg("{$pfbfolder}/{$header}.txt")); + $alias_cnt = $alias_cnt + $list_cnt; + + // If changes found update DNSBL alias and TLD disabled, call function to update DNSBL alias + if ($pfb['aliasupdate'] && !$pfb['dnsbl_tld']) { + dnsbl_alias_update('update', $alias, $pfbfolder, $lists_dnsbl_current, $alias_cnt); + } + + // Collect Alias/Feeds for post TLD function + if ($pfb['dnsbl_tld']) { + if (!is_array($pfb['tld_update'][$alias])) { + $pfb['tld_update'][$alias] = array(); + } + $pfb['tld_update'][$alias]['feeds'] = $lists_dnsbl_current; + $pfb['tld_update'][$alias]['count'] = $alias_cnt; + } } // Remove any unused DNSBL aliases @@ -8918,7 +9260,7 @@ function sync_package_pfblockerng($cron='') { if (!empty($lists_dnsbl_all)) { pfb_logger("\n------------------------------------------------------------------------\n", 1); - pfb_logger('Assembling DNSBL database...', 1); + pfb_logger("\nAssembling DNSBL database...", 1); unlink_if_exists("{$pfb['dnsbl_file']}.raw"); $pfb_output = @fopen("{$pfb['dnsbl_file']}.raw", 'w'); foreach ($lists_dnsbl_all as $current_list) { diff --git a/net/pfSense-pkg-pfBlockerNG-devel/files/usr/local/pkg/pfblockerng/pfblockerng.sh b/net/pfSense-pkg-pfBlockerNG-devel/files/usr/local/pkg/pfblockerng/pfblockerng.sh index 28eb9ed00dc0..b389dee56697 100644 --- a/net/pfSense-pkg-pfBlockerNG-devel/files/usr/local/pkg/pfblockerng/pfblockerng.sh +++ b/net/pfSense-pkg-pfBlockerNG-devel/files/usr/local/pkg/pfblockerng/pfblockerng.sh @@ -81,6 +81,9 @@ tempmatchfile=/tmp/pfbtemp8_$rvar domainmaster=/tmp/pfbtemp9_$rvar asntemp=/tmp/pfbtemp10_$rvar +dnsbl_exclusions=/tmp/dnsbl_exclusions +dnsbl_cleanup_wildcards=/tmp/dnsbl_cleanup_wildcards + dnsbl_tld_remove=/tmp/dnsbl_tld_remove dnsbl_add=/tmp/dnsbl_add @@ -361,10 +364,8 @@ duplicate() { emptyfiles # Call emptyfiles function } - - -# Function for DNSBL (De-Duplication, Whitelist, and Alexa Whitelist) -dnsbl_scrub() { +# DNSBL scrubbing for Unbound mode (De-Duplication, Whitelist, and Alexa Whitelist) +dnsbl_scrub_unbound() { counto="$(grep -c ^ ${pfbdomain}${alias}.bk)" alexa_enable="${max}" @@ -379,15 +380,7 @@ dnsbl_scrub() { # Only execute awk command, if master domain file contains data. query_size="$(grep -c ^ ${domainmaster})" if [ "${query_size}" -gt 0 ]; then - - # Unbound blocking mode dedup - if [ "${dedup}" == '' ]; then - awk 'FNR==NR{a[$2];next}!($2 in a)' "${domainmaster}" "${pfbdomain}${alias}.bk2" > "${pfbdomain}${alias}.bk" - - # Unbound python blocking mode dedup - else - awk -F',' 'FNR==NR{a[$2];next}!($2 in a)' "${domainmaster}" "${pfbdomain}${alias}.bk2" > "${pfbdomain}${alias}.bk" - fi + awk 'FNR==NR{a[$2];next}!($2 in a)' "${domainmaster}" "${pfbdomain}${alias}.bk2" > "${pfbdomain}${alias}.bk" fi rm -f "${domainmaster}" @@ -406,20 +399,11 @@ dnsbl_scrub() { countw="$((countf - countx))" if [ "${countw}" -gt 0 ]; then - if [ "${dedup}" == '' ]; then - data="$(awk 'FNR==NR{a[$0];next}!($0 in a)' ${pfbdomain}${alias}.bk2 ${pfbdomain}${alias}.bk | \ - cut -d '"' -f2 | cut -d ' ' -f1 | sort | uniq | tr '\n' '|')" - else - data="$(awk 'FNR==NR{a[$0];next}!($0 in a)' ${pfbdomain}${alias}.bk2 ${pfbdomain}${alias}.bk | \ - cut -d ',' -f2 | sort | uniq | tr '\n' '|')" - fi + data="$(awk 'FNR==NR{a[$0];next}!($0 in a)' ${pfbdomain}${alias}.bk2 ${pfbdomain}${alias}.bk | \ + cut -d '"' -f2 | cut -d ' ' -f1 | sort | uniq | tr '\n' '|')" if [ -z "${data}" ]; then - if [ "${dedup}" == '' ]; then - data="$(cut -d '"' -f2 ${pfbdomain}${alias}.bk | cut -d ' ' -f1 | sort | uniq | tr '\n' '|')" - else - data="$(cut -d ',' -f2 ${pfbdomain}${alias}.bk | sort | uniq | tr '\n' '|')" - fi + data="$(cut -d '"' -f2 ${pfbdomain}${alias}.bk | cut -d ' ' -f1 | sort | uniq | tr '\n' '|')" fi echo " Whitelist: ${data}" @@ -437,20 +421,11 @@ dnsbl_scrub() { counta="$((countf - countx))" if [ "${counta}" -gt 0 ]; then - if [ "${dedup}" == '' ]; then - data="$(awk 'FNR==NR{a[$0];next}!($0 in a)' ${pfbdomain}${alias}.bk2 ${pfbdomain}${alias}.bk | \ - cut -d '"' -f2 | cut -d ' ' -f1 | sort | uniq | tr '\n' '|')" - else - data="$(awk 'FNR==NR{a[$0];next}!($0 in a)' ${pfbdomain}${alias}.bk2 ${pfbdomain}${alias}.bk | \ - cut -d ',' -f2 | sort | uniq | tr '\n' '|')" - fi + data="$(awk 'FNR==NR{a[$0];next}!($0 in a)' ${pfbdomain}${alias}.bk2 ${pfbdomain}${alias}.bk | \ + cut -d '"' -f2 | cut -d ' ' -f1 | sort | uniq | tr '\n' '|')" if [ -z "${data}" ]; then - if [ "${dedup}" == '' ]; then - data="$(cut -d '"' -f2 ${pfbdomain}${alias}.bk | cut -d ' ' -f1 | sort | uniq | tr '\n' '|')" - else - data="$(cut -d ',' -f2 ${pfbdomain}${alias}.bk | sort | uniq | tr '\n' '|')" - fi + data="$(cut -d '"' -f2 ${pfbdomain}${alias}.bk | cut -d ' ' -f1 | sort | uniq | tr '\n' '|')" fi echo " TOP1M Whitelist: ${data}" @@ -470,6 +445,194 @@ dnsbl_scrub() { echo ' ----------------------------------------------------------------------' } +# DNSBL scrubbing for Python mode (De-Duplication, Whitelist, and Alexa Whitelist) +# Does not print exactly which lines are removed by the whitelist processing, since +# it is spammy and this step will be removed in favor of zero-downtime reloads anyway. +# If desired, users can see the contents of the final whitelist file in the logs tab. +dnsbl_scrub_python() { + + counto="$(grep -c ^ ${pfbdomain}${alias}.bk)" + alexa_enable="${max}" + + # Process De-Duplication + sort "${pfbdomain}${alias}.bk" | uniq > "${pfbdomain}${alias}.bk2" + countu="$(grep -c ^ ${pfbdomain}${alias}.bk2)" + + if [ -d "${pfbdomain}" ] && [ "$(ls -A ${pfbdomain}*.txt 2>/dev/null)" ]; then + find "${pfbdomain}"*.txt ! -name "${alias}.txt" | xargs cat > "${domainmaster}" + + # Only execute awk command, if master domain file contains data. + query_size="$(grep -c ^ ${domainmaster})" + if [ "${query_size}" -gt 0 ]; then + # Consider both the domain name (2nd column) and type of entry (7th column) when checking for duplicates + awk -F',' 'FNR==NR{a[$2, $7];next}!(($2, $7) in a)' "${domainmaster}" "${pfbdomain}${alias}.bk2" > "${pfbdomain}${alias}.bk" + fi + + rm -f "${domainmaster}" + else + mv -f "${pfbdomain}${alias}.bk2" "${pfbdomain}${alias}.bk" + fi + + countf="$(grep -c ^ ${pfbdomain}${alias}.bk)" + countd="$((countu - countf))" + rm -f "${pfbdomain}${alias}.bk2" + + # Sort and count the Easylist exclusions + rm -f "${pfbdomain}${alias}.ex2" + if [ -s "${pfbdomain}${alias}.ex" ]; then + sort "${pfbdomain}${alias}.ex" | uniq > "${pfbdomain}${alias}.ex2" + mv -f "${pfbdomain}${alias}.ex2" "${pfbdomain}${alias}.ex" + countex="$(grep -c ^ ${pfbdomain}${alias}.ex)" + else + countex=0 + fi + + # Remove Whitelisted Domains and Sub-Domains, if configured + if [ -s "${pfbdnsblsuppression}" ] && [ -s "${pfbdomain}${alias}.bk" ]; then + /usr/local/bin/ggrep -vF -f "${pfbdnsblsuppression}" "${pfbdomain}${alias}.bk" > "${pfbdomain}${alias}.bk2" + countx="$(grep -c ^ ${pfbdomain}${alias}.bk2)" + countw="$((countf - countx))" + + if [ "${countw}" -gt 0 ]; then + mv -f "${pfbdomain}${alias}.bk2" "${pfbdomain}${alias}.bk" + fi + else + countw=0 + fi + + # Process TOP1M Whitelist + if [ "${alexa_enable}" == "on" ] && [ -s "${pfbalexa}" ] && [ -s "${pfbdomain}${alias}.bk" ]; then + countf="$(grep -c ^ ${pfbdomain}${alias}.bk)" + /usr/local/bin/ggrep -vFi -f "${pfbalexa}" "${pfbdomain}${alias}.bk" > "${pfbdomain}${alias}.bk2" + countx="$(grep -c ^ ${pfbdomain}${alias}.bk2)" + counta="$((countf - countx))" + + if [ "${counta}" -gt 0 ]; then + mv -f "${pfbdomain}${alias}.bk2" "${pfbdomain}${alias}.bk" + fi + else + counta=0 + fi + + countf="$(grep -c ^ ${pfbdomain}${alias}.bk)" + rm -f "${pfbdomain}${alias}.bk2" + + echo ' ----------------------------------------------------------------------' + printf "%-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n" ' Orig.' 'Unique' '# Dups' '# Excl' '# White' '# TOP1M' 'Final' + echo ' ----------------------------------------------------------------------' + printf "%-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n" " ${counto}" "${countu}" "${countd}" "${countex}" "${countw}" "${counta}" "${countf}" + echo ' ----------------------------------------------------------------------' +} + +# Process all downloaded exclusion, converted them to GNU grep regexes and put them into a master file +dnsbl_py_assemble_exclusions_file() { + # Downloaded exclusion lists are always either regular expressions or wildcards + # Convert all of them to regular expressions for GNU grep + if [ "$(ls -A ${pfbdomain}*.exclusions 2>/dev/null)" ]; then + find "${pfbdomain}"*.exclusions | xargs cat | sort | uniq | cut -d',' -f1 | \ + sed 's/\./\\./g' | sed 's/*/[^,[:space:]]*/g' | sed 's/^/[,.]/' | sed 's/$/,,/' > "${dnsbl_exclusions}" + fi +} + +# Remove excluded Domains and Sub-Domains using the file assembled above +dnsbl_py_remove_excluded() { + + counto="$(grep -c ^ ${pfbdomain}${alias}.txt)" + + # Process DNSBL file + if [ -s "${pfbdomain}${alias}.txt" ] && [ -s "${dnsbl_exclusions}" ]; then + + # Only execute if exclusion temp file contains data + query_size="$(grep -c ^ ${dnsbl_exclusions})" + if [ "${query_size}" -gt 0 ]; then + countf="$(grep -c ^ ${pfbdomain}${alias}.txt)" + /usr/local/bin/ggrep -v -f "${dnsbl_exclusions}" "${pfbdomain}${alias}.txt" > "${pfbdomain}${alias}.bk" + countx="$(grep -c ^ ${pfbdomain}${alias}.bk)" + countex="$((countf - countx))" + + if [ "${countex}" -gt 0 ]; then + mv -f "${pfbdomain}${alias}.bk" "${pfbdomain}${alias}.txt" + else + rm -f "${pfbdomain}${alias}.bk" + fi + else + countex=0 + fi + else + countex=0 + fi + + countf="$(grep -c ^ ${pfbdomain}${alias}.txt)" + + echo ' ------------------------------' + printf "%-10s %-10s %-10s\n" ' Orig.' '# Removed' 'Final' + echo ' ------------------------------' + printf "%-10s %-10s %-10s\n" " ${counto}" "${countex}" "${countf}" + echo ' ------------------------------' +} + +# Remove the assembled exclusions file +dnsbl_py_cleanup_exclusions_file() { + rm -f "${dnsbl_exclusions}" +} + +# Process all DNSBL files looking for wildcard entries and put them all in a master file +dnsbl_py_assemble_redundants_file() { + if [ "$(ls -A ${pfbdomain}*.txt 2>/dev/null)" ]; then + find "${pfbdomain}"*.txt | xargs cat | grep ',1$' | sort | uniq | cut -d',' -f2 | sed -E 's/^(.*)$/,\1,,\n.\1,,/' > "${dnsbl_cleanup_wildcards}" + fi +} + +# Remove all exact-match and regex entries that would be redundant given an existing wildcard entry +# e.g. google.com and *.google.com are both redundant if a wildcard entry google.com exists +dnsbl_py_remove_redundant() { + + counto="$(grep -c ^ ${pfbdomain}${alias}.txt)" + + # Process DNSBL file + if [ -s "${pfbdomain}${alias}.txt" ] && [ -s "${dnsbl_cleanup_wildcards}" ]; then + + # Only execute if wildcard temp file contains data + query_size="$(grep -c ^ ${dnsbl_cleanup_wildcards})" + if [ "${query_size}" -gt 0 ]; then + countf="$(grep -c ^ ${pfbdomain}${alias}.txt)" + + # backup wildcard entries, as they are surely going to be removed (obviously they all exist in the wildcard temp file) + grep ',1$' "${pfbdomain}${alias}.txt" > "${pfbdomain}${alias}.bk2" + + # remove all redundant entries + /usr/local/bin/ggrep -vF -f "${dnsbl_cleanup_wildcards}" "${pfbdomain}${alias}.txt" > "${pfbdomain}${alias}.bk" + + countw="$(grep -c ^ ${pfbdomain}${alias}.bk2)" + countx="$(grep -c ^ ${pfbdomain}${alias}.bk)" + countrd="$((countf - (countx + countw)))" + + if [ "${countrd}" -gt 0 ]; then + cat "${pfbdomain}${alias}.bk" "${pfbdomain}${alias}.bk2" | sort > "${pfbdomain}${alias}.txt" + fi + + rm -f "${pfbdomain}${alias}.bk" + rm -f "${pfbdomain}${alias}.bk2" + else + countrd=0 + fi + else + countrd=0 + fi + + countf="$(grep -c ^ ${pfbdomain}${alias}.txt)" + + echo ' ------------------------------' + printf "%-10s %-10s %-10s\n" ' Orig.' '# Removed' 'Final' + echo ' ------------------------------' + printf "%-10s %-10s %-10s\n" " ${counto}" "${countrd}" "${countf}" + echo ' ------------------------------' +} + +# Remove the assembled wildcards +dnsbl_py_cleanup_redundants_file() { + rm -f "${dnsbl_cleanup_wildcards}" +} # Function to process TLD domaintld() { @@ -1311,8 +1474,29 @@ case "${1}" in continent) duplicate ;; - dnsbl_scrub) - dnsbl_scrub + dnsbl_scrub_unbound) + dnsbl_scrub_unbound + ;; + dnsbl_scrub_python) + dnsbl_scrub_python + ;; + dnsbl_py_assemble_exclusions_file) + dnsbl_py_assemble_exclusions_file + ;; + dnsbl_py_remove_excluded) + dnsbl_py_remove_excluded + ;; + dnsbl_py_cleanup_exclusions_file) + dnsbl_py_cleanup_exclusions_file + ;; + dnsbl_py_assemble_redundants_file) + dnsbl_py_assemble_redundants_file + ;; + dnsbl_py_remove_redundant) + dnsbl_py_remove_redundant + ;; + dnsbl_py_cleanup_redundants_file) + dnsbl_py_cleanup_redundants_file ;; domaintld) domaintld diff --git a/net/pfSense-pkg-pfBlockerNG-devel/files/usr/local/www/pfblockerng/pfblockerng_alerts.php b/net/pfSense-pkg-pfBlockerNG-devel/files/usr/local/www/pfblockerng/pfblockerng_alerts.php index 2c2fcfa4fd23..a1ac996be165 100644 --- a/net/pfSense-pkg-pfBlockerNG-devel/files/usr/local/www/pfblockerng/pfblockerng_alerts.php +++ b/net/pfSense-pkg-pfBlockerNG-devel/files/usr/local/www/pfblockerng/pfblockerng_alerts.php @@ -1177,7 +1177,7 @@ } if ($pfb['dnsbl_py_blacklist']) { - @file_put_contents($pfb['unbound_py_data'], ",{$entry},,1,,\n", FILE_APPEND); + @file_put_contents($pfb['unbound_py_data'], ",{$entry},,1,DNSBL,USER\n", FILE_APPEND); $dnsbl_py_changes = TRUE; } else { $domain_esc = escapeshellarg($entry); @@ -1200,7 +1200,7 @@ } if ($pfb['dnsbl_py_blacklist']) { - @file_put_contents($pfb['unbound_py_zone'], ",{$entry},,1,,\n", FILE_APPEND); + @file_put_contents($pfb['unbound_py_zone'], ",{$entry},,1,DNSBL_TLD,USER\n", FILE_APPEND); $dnsbl_py_changes = TRUE; } else { $domain_esc = escapeshellarg($entry); diff --git a/net/pfSense-pkg-pfBlockerNG-devel/files/usr/local/www/pfblockerng/pfblockerng_dnsbl.php b/net/pfSense-pkg-pfBlockerNG-devel/files/usr/local/www/pfblockerng/pfblockerng_dnsbl.php index 45faf3ebcd65..8da09618a63f 100644 --- a/net/pfSense-pkg-pfBlockerNG-devel/files/usr/local/www/pfblockerng/pfblockerng_dnsbl.php +++ b/net/pfSense-pkg-pfBlockerNG-devel/files/usr/local/www/pfblockerng/pfblockerng_dnsbl.php @@ -79,6 +79,7 @@ $pconfig['pfb_cname'] = $pfb['dconfig']['pfb_cname'] ?: ''; $pconfig['pfb_noaaaa'] = $pfb['dconfig']['pfb_noaaaa'] ?: ''; $pconfig['pfb_gp'] = $pfb['dconfig']['pfb_gp'] ?: ''; +$pconfig['pfb_py_debug'] = $pfb['dconfig']['pfb_py_debug'] ?: ''; $pconfig['pfb_pytld'] = $pfb['dconfig']['pfb_pytld'] ?: ''; $pconfig['pfb_pytld_sort'] = $pfb['dconfig']['pfb_pytld_sort'] ?: ''; $pconfig['pfb_pytlds_gtld'] = explode(',', $pfb['dconfig']['pfb_pytlds_gtld']) ?: $default_tlds; @@ -586,6 +587,7 @@ $pfb['dconfig']['pfb_cname'] = pfb_filter($_POST['pfb_cname'], PFB_FILTER_ON_OFF, 'dnsbl') ?: ''; $pfb['dconfig']['pfb_noaaaa'] = pfb_filter($_POST['pfb_noaaaa'], PFB_FILTER_ON_OFF, 'dnsbl') ?: ''; $pfb['dconfig']['pfb_gp'] = pfb_filter($_POST['pfb_gp'], PFB_FILTER_ON_OFF, 'dnsbl') ?: ''; + $pfb['dconfig']['pfb_py_debug'] = pfb_filter($_POST['pfb_py_debug'], PFB_FILTER_ON_OFF, 'dnsbl') ?: ''; $pfb['dconfig']['pfb_pytld'] = pfb_filter($_POST['pfb_pytld'], PFB_FILTER_ON_OFF, 'dnsbl') ?: ''; $pfb['dconfig']['pfb_pytld_sort'] = pfb_filter($_POST['pfb_pytld_sort'], PFB_FILTER_ON_OFF, 'dnsbl') ?: ''; @@ -2591,6 +2593,14 @@ 'on' ))->setHelp('Enable the Python Group Policy functionality to allow certain Local LAN IPs to bypass DNSBL'); +$section->addInput(new Form_Checkbox( + 'pfb_py_debug', + gettext('Python Debug Log') . '(py)', + 'Enable', + $pconfig['pfb_py_debug'] === 'on' ? true:false, + 'on' +))->setHelp('Enable logging of debug information. This can potentially increase CPU and memory usage and should only be enabled for troubleshooting'); + $form->add($section); $section = new Form_Section('Python Group Policy', 'Python_Group_Policy', COLLAPSIBLE|SEC_CLOSED); @@ -3230,6 +3240,7 @@ function enable_python() { hideCheckbox('pfb_noaaaa', !python); hideCheckbox('pfb_cname', !python); hideCheckbox('pfb_gp', !python); + hideCheckbox('pfb_py_debug', !python); hideInput('pfb_regex_list', !python); hideInput('pfb_noaaaa_list', !python); hideInput('pfb_gp_bypass_list', !python); diff --git a/net/pfSense-pkg-pfBlockerNG-devel/files/usr/local/www/pfblockerng/pfblockerng_log.php b/net/pfSense-pkg-pfBlockerNG-devel/files/usr/local/www/pfblockerng/pfblockerng_log.php index 28002d4004f3..f068138bbb9f 100644 --- a/net/pfSense-pkg-pfBlockerNG-devel/files/usr/local/www/pfblockerng/pfblockerng_log.php +++ b/net/pfSense-pkg-pfBlockerNG-devel/files/usr/local/www/pfblockerng/pfblockerng_log.php @@ -77,9 +77,21 @@ function getlogs($logdir, $log_extentions = array('log')) { $pfb_logtypes = array( 'defaultlogs' => array('name' => 'Log Files', 'logdir' => "{$pfb['logdir']}/", - 'logs' => array('pfblockerng.log', 'error.log', 'ip_block.log', 'ip_permit.log', 'ip_match.log', - 'dnsbl.log', 'unified.log', 'extras.log', 'dnsbl_parsed_error.log', 'dns_reply.log', - 'py_error.log', 'maxmind_ver', 'wizard.log'), + 'logs' => array( + 'pfblockerng.log', + 'error.log', + 'ip_block.log', + 'ip_permit.log', + 'ip_match.log', + 'dnsbl.log', + 'unified.log', + 'extras.log', + 'dnsbl_parsed_error.log', + 'dns_reply.log', + 'py_error.log', + 'py_debug.log', + 'maxmind_ver', + 'wizard.log'), 'download' => TRUE, 'clear' => TRUE ), @@ -109,7 +121,7 @@ function getlogs($logdir, $log_extentions = array('log')) { 'clear' => TRUE ), 'dnsbl' => array('name' => 'DNSBL Files', - 'ext' => array('txt', 'ip'), + 'ext' => array('txt', 'ip', 'exclusions'), 'txt' => 'dnsbl', 'logdir' => "{$pfb['dnsdir']}/", 'download' => TRUE, @@ -312,6 +324,10 @@ function pfb_validate_filepath($validate, $pfb_logtypes) { $fp = @fopen("{$s_logfile}", 'r+'); @ftruncate($fp, 0); @fclose($fp); + } if (strpos($s_logfile, 'py_debug.log') !== FALSE) { + $fp = @fopen("{$s_logfile}", 'r+'); + @ftruncate($fp, 0); + @fclose($fp); } else { unlink_if_exists($s_logfile);