From 3ed5425e0b294a6863e542ca5ce29887b4e45204 Mon Sep 17 00:00:00 2001 From: Alex Perfilov Date: Fri, 19 Dec 2014 13:53:39 -0500 Subject: [PATCH 1/5] remove useless --- ckanext/datajson/datajsonvalidator.py | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/ckanext/datajson/datajsonvalidator.py b/ckanext/datajson/datajsonvalidator.py index c7973368..48d15fde 100644 --- a/ckanext/datajson/datajsonvalidator.py +++ b/ckanext/datajson/datajsonvalidator.py @@ -1,10 +1,5 @@ import re -# from the iso8601 package, plus ^ and $ on the edges -ISO8601_REGEX = re.compile(r"^([0-9]{4})(-([0-9]{1,2})(-([0-9]{1,2})" - r"((.)([0-9]{2}):([0-9]{2})(:([0-9]{2})(\.([0-9]+))?)?" - r"(Z|(([-+])([0-9]{2}):([0-9]{2})))?)?)?)?$") - TEMPORAL_REGEX_1 = re.compile( r'^([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?' r'|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))([T\s]((([01]\d|2[0-3])((:?)[0-5]\d)?|24\:?00)([\.,]' @@ -265,7 +260,7 @@ def do_validation(doc, errors_array): # rights # Required-If-Applicable # TODO move to warnings # if item.get("accessLevel") != "public": - # check_string_field(item, "rights", 1, dataset_name, errs) + # check_string_field(item, "rights", 1, dataset_name, errs) # spatial # Required-If-Applicable # TODO: There are more requirements than it be a string. @@ -431,23 +426,6 @@ def check_string_field(obj, field_name, min_length, dataset_name, errs): return True -def check_date_field(obj, field_name, dataset_name, errs): - # checks that a required date field exists and looks like a date - if not check_required_field(obj, field_name, (str, unicode), dataset_name, errs): - return False - elif len(obj[field_name].strip()) == 0: - add_error(errs, 10, "Missing Required Fields", "The '%s' field is present but empty." % field_name, - dataset_name) - return False - else: - if not ISO8601_REGEX.match(obj[field_name]): - add_error(errs, 5, "Invalid Required Field Value", - "The '%s' field has an invalid ISO 8601 date or date-time value: \"%s\"." % ( - field_name, obj[field_name]), dataset_name) - return False - return True - - def check_url_field(required, obj, field_name, dataset_name, errs): # checks that a required or optional field, if specified, looks like a URL if not required and (field_name not in obj or obj[field_name] is None): return True # not required, so OK From e07ad6cea74609ef085334f49d58401dcc7872b5 Mon Sep 17 00:00:00 2001 From: Alex Perfilov Date: Fri, 19 Dec 2014 13:56:25 -0500 Subject: [PATCH 2/5] refactoring --- ckanext/datajson/datajsonvalidator.py | 28 +++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/ckanext/datajson/datajsonvalidator.py b/ckanext/datajson/datajsonvalidator.py index 48d15fde..8c66655a 100644 --- a/ckanext/datajson/datajsonvalidator.py +++ b/ckanext/datajson/datajsonvalidator.py @@ -102,11 +102,11 @@ def do_validation(doc, errors_array): dataset_name = "dataset %d" % (i + 1) # title - if check_string_field(item, "title", 1, dataset_name, errs): + if check_required_string_field(item, "title", 1, dataset_name, errs): dataset_name = '"%s"' % item.get("title", "").strip() # accessLevel # required - if check_string_field(item, "accessLevel", 3, dataset_name, errs): + if check_required_string_field(item, "accessLevel", 3, dataset_name, errs): if item["accessLevel"] not in ("public", "restricted public", "non-public"): add_error(errs, 5, "Invalid Required Field Value", "The field 'accessLevel' had an invalid value: \"%s\"" % item["accessLevel"], @@ -132,10 +132,10 @@ def do_validation(doc, errors_array): if check_required_field(item, "contactPoint", dict, dataset_name, errs): cp = item["contactPoint"] # contactPoint - fn # required - check_string_field(cp, "fn", 1, dataset_name, errs) + check_required_string_field(cp, "fn", 1, dataset_name, errs) # contactPoint - hasEmail # required - if check_string_field(cp, "hasEmail", 9, dataset_name, errs): + if check_required_string_field(cp, "hasEmail", 9, dataset_name, errs): import lepl.apps.rfc3696 email_validator = lepl.apps.rfc3696.Email() @@ -146,10 +146,10 @@ def do_validation(doc, errors_array): dataset_name) # description # required - check_string_field(item, "description", 1, dataset_name, errs) + check_required_string_field(item, "description", 1, dataset_name, errs) # identifier #required - if check_string_field(item, "identifier", 1, dataset_name, errs): + if check_required_string_field(item, "identifier", 1, dataset_name, errs): if item["identifier"] in seen_identifiers: add_error(errs, 5, "Invalid Required Field Value", "The dataset identifier \"%s\" is used more than once." % item["identifier"], @@ -170,7 +170,7 @@ def do_validation(doc, errors_array): "A keyword in the keyword array was an empty string.", dataset_name) # modified # required - if check_string_field(item, "modified", 1, dataset_name, errs): + if check_required_string_field(item, "modified", 1, dataset_name, errs): if not MODIFIED_REGEX_1.match(item['modified']) \ and not MODIFIED_REGEX_2.match(item['modified']) \ and not MODIFIED_REGEX_3.match(item['modified']): @@ -190,7 +190,7 @@ def do_validation(doc, errors_array): # publisher # required if check_required_field(item, "publisher", dict, dataset_name, errs): # publisher - name # required - check_string_field(item["publisher"], "name", 1, dataset_name, errs) + check_required_string_field(item["publisher"], "name", 1, dataset_name, errs) # Required-If-Applicable @@ -217,7 +217,7 @@ def do_validation(doc, errors_array): # distribution - mediaType # Required-If-Applicable if 'downloadURL' in dt: - if check_string_field(dt, "mediaType", 1, distribution_name, errs): + if check_required_string_field(dt, "mediaType", 1, distribution_name, errs): if not IANA_MIME_REGEX.match(dt["mediaType"]): add_error(errs, 5, "Invalid Field Value", "The distribution mediaType \"%s\" is invalid. " @@ -244,15 +244,15 @@ def do_validation(doc, errors_array): # distribution - description # optional if dt.get("description") is not None: - check_string_field(dt, "description", 1, distribution_name, errs) + check_required_string_field(dt, "description", 1, distribution_name, errs) # distribution - format # optional if dt.get("format") is not None: - check_string_field(dt, "format", 1, distribution_name, errs) + check_required_string_field(dt, "format", 1, distribution_name, errs) # distribution - title # optional if dt.get("title") is not None: - check_string_field(dt, "title", 1, distribution_name, errs) + check_required_string_field(dt, "title", 1, distribution_name, errs) # license # Required-If-Applicable check_url_field(False, item, "license", dataset_name, errs) @@ -307,7 +307,7 @@ def do_validation(doc, errors_array): # isPartOf # optional if item.get("isPartOf"): - check_string_field(item, "isPartOf", 1, dataset_name, errs) + check_required_string_field(item, "isPartOf", 1, dataset_name, errs) # issued # optional if item.get("issued") is not None: @@ -410,7 +410,7 @@ def check_required_field(obj, field_name, data_type, dataset_name, errs): return True -def check_string_field(obj, field_name, min_length, dataset_name, errs): +def check_required_string_field(obj, field_name, min_length, dataset_name, errs): # checks that a required field exists, is typed as a string, and has a minimum length if not check_required_field(obj, field_name, (str, unicode), dataset_name, errs): return False From 0aeafc70b0eb9bf1d74a0dc7f0ff554a3abd3796 Mon Sep 17 00:00:00 2001 From: Alex Perfilov Date: Fri, 19 Dec 2014 14:07:15 -0500 Subject: [PATCH 3/5] error message fix --- ckanext/datajson/datajsonvalidator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/datajson/datajsonvalidator.py b/ckanext/datajson/datajsonvalidator.py index 8c66655a..9185e6a0 100644 --- a/ckanext/datajson/datajsonvalidator.py +++ b/ckanext/datajson/datajsonvalidator.py @@ -199,7 +199,7 @@ def do_validation(doc, errors_array): pass # not required elif not isinstance(item["dataQuality"], bool): add_error(errs, 50, "Invalid Field Value (Optional Fields)", - "The field 'theme' must be true or false, " + "The field 'dataQuality' must be true or false, " "as a JSON boolean literal (not the string \"true\" or \"false\").", dataset_name) From 1fb42a37d86da44a02b3ad0fddde11e330d08131 Mon Sep 17 00:00:00 2001 From: ykhadilkar Date: Tue, 13 Jan 2015 19:05:33 -0500 Subject: [PATCH 4/5] Creating combined JSON ... https://github.com/GSA/enterprise-data-inventory/issues/26 --- ckanext/datajson/plugin.py | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/ckanext/datajson/plugin.py b/ckanext/datajson/plugin.py index d750916b..33a264a6 100644 --- a/ckanext/datajson/plugin.py +++ b/ckanext/datajson/plugin.py @@ -217,15 +217,16 @@ def make_edi(owner_org): logger.addHandler(eh) # Build the data.json file. - packages = get_all_group_packages(group_id=owner_org) + packages = get_packages(owner_org) + output = [] for pkg in packages: - if pkg['owner_org'] == owner_org: - datajson_entry = make_datajson_entry(pkg) - if datajson_entry and is_valid(datajson_entry): - output.append(datajson_entry) - else: - logger.warn("Dataset id=[%s], title=[%s] omitted\n", pkg.get('id', None), pkg.get('title', None)) + #if pkg['owner_org'] == owner_org: + datajson_entry = make_datajson_entry(pkg) + if datajson_entry and is_valid(datajson_entry): + output.append(datajson_entry) + else: + logger.warn("Dataset id=[%s], title=[%s] omitted\n", pkg.get('id', None), pkg.get('title', None)) # Get the error log eh.flush() @@ -247,18 +248,15 @@ def make_pdl(owner_org): eh.setFormatter(formatter) logger.addHandler(eh) - # Build the data.json file. - packages = get_all_group_packages(group_id=owner_org) + packages = get_packages(owner_org) output = [] #Create data.json only using public datasets, datasets marked non-public are not exposed for pkg in packages: extras = dict([(x['key'], x['value']) for x in pkg['extras']]) try: - if pkg['owner_org'] == owner_org \ - and not (re.match(r'[Nn]on-public', extras['public_access_level'])): - + if not (re.match(r'[Nn]on-public', extras['public_access_level'])): datajson_entry = make_datajson_entry(pkg) if datajson_entry and is_valid(datajson_entry): output.append(datajson_entry) @@ -280,6 +278,20 @@ def make_pdl(owner_org): #return json.dumps(output) return write_zip(output, error, zip_name='pdl') +def get_packages(owner_org): + # Build the data.json file. + packages = get_all_group_packages(group_id=owner_org) + #get packages for sub-agencies. + sub_agency = model.Group.get(owner_org) + if 'sub-agencies' in sub_agency.extras.col.target: + sub_agencies = sub_agency.extras.col.target['sub-agencies'].value + sub_agencies_list = sub_agencies.split(",") + for sub in sub_agencies_list: + sub_packages = get_all_group_packages(group_id=sub) + for sub_package in sub_packages: + packages.append(sub_package) + + return packages def get_all_group_packages(group_id): """ From b6154f2091064552413f3a0d54eb424e5f514ce2 Mon Sep 17 00:00:00 2001 From: ykhadilkar Date: Wed, 14 Jan 2015 18:21:32 -0500 Subject: [PATCH 5/5] Github issues # 26 - Bug fix - checking if extra field is 'active' --- ckanext/datajson/plugin.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ckanext/datajson/plugin.py b/ckanext/datajson/plugin.py index 33a264a6..7987ec81 100644 --- a/ckanext/datajson/plugin.py +++ b/ckanext/datajson/plugin.py @@ -283,7 +283,8 @@ def get_packages(owner_org): packages = get_all_group_packages(group_id=owner_org) #get packages for sub-agencies. sub_agency = model.Group.get(owner_org) - if 'sub-agencies' in sub_agency.extras.col.target: + if 'sub-agencies' in sub_agency.extras.col.target and \ + sub_agency.extras.col.target['sub-agencies'].state == 'active': sub_agencies = sub_agency.extras.col.target['sub-agencies'].value sub_agencies_list = sub_agencies.split(",") for sub in sub_agencies_list: