diff --git a/ckanext/datajson/datajsonvalidator.py b/ckanext/datajson/datajsonvalidator.py index ea84cbb2..9185e6a0 100644 --- a/ckanext/datajson/datajsonvalidator.py +++ b/ckanext/datajson/datajsonvalidator.py @@ -1,10 +1,5 @@ import re -# from the iso8601 package, plus ^ and $ on the edges -ISO8601_REGEX = re.compile(r"^([0-9]{4})(-([0-9]{1,2})(-([0-9]{1,2})" - r"((.)([0-9]{2}):([0-9]{2})(:([0-9]{2})(\.([0-9]+))?)?" - r"(Z|(([-+])([0-9]{2}):([0-9]{2})))?)?)?)?$") - TEMPORAL_REGEX_1 = re.compile( r'^([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?' r'|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))([T\s]((([01]\d|2[0-3])((:?)[0-5]\d)?|24\:?00)([\.,]' @@ -107,11 +102,11 @@ def do_validation(doc, errors_array): dataset_name = "dataset %d" % (i + 1) # title - if check_string_field(item, "title", 1, dataset_name, errs): + if check_required_string_field(item, "title", 1, dataset_name, errs): dataset_name = '"%s"' % item.get("title", "").strip() # accessLevel # required - if check_string_field(item, "accessLevel", 3, dataset_name, errs): + if check_required_string_field(item, "accessLevel", 3, dataset_name, errs): if item["accessLevel"] not in ("public", "restricted public", "non-public"): add_error(errs, 5, "Invalid Required Field Value", "The field 'accessLevel' had an invalid value: \"%s\"" % item["accessLevel"], @@ -137,10 +132,10 @@ def do_validation(doc, errors_array): if check_required_field(item, "contactPoint", dict, dataset_name, errs): cp = item["contactPoint"] # contactPoint - fn # required - check_string_field(cp, "fn", 1, dataset_name, errs) + check_required_string_field(cp, "fn", 1, dataset_name, errs) # contactPoint - hasEmail # required - if check_string_field(cp, "hasEmail", 9, dataset_name, errs): + if check_required_string_field(cp, "hasEmail", 9, dataset_name, errs): import lepl.apps.rfc3696 email_validator = lepl.apps.rfc3696.Email() @@ -151,10 +146,10 @@ def do_validation(doc, errors_array): dataset_name) # description # required - check_string_field(item, "description", 1, dataset_name, errs) + check_required_string_field(item, "description", 1, dataset_name, errs) # identifier #required - if check_string_field(item, "identifier", 1, dataset_name, errs): + if check_required_string_field(item, "identifier", 1, dataset_name, errs): if item["identifier"] in seen_identifiers: add_error(errs, 5, "Invalid Required Field Value", "The dataset identifier \"%s\" is used more than once." % item["identifier"], @@ -175,7 +170,7 @@ def do_validation(doc, errors_array): "A keyword in the keyword array was an empty string.", dataset_name) # modified # required - if check_string_field(item, "modified", 1, dataset_name, errs): + if check_required_string_field(item, "modified", 1, dataset_name, errs): if not MODIFIED_REGEX_1.match(item['modified']) \ and not MODIFIED_REGEX_2.match(item['modified']) \ and not MODIFIED_REGEX_3.match(item['modified']): @@ -195,7 +190,7 @@ def do_validation(doc, errors_array): # publisher # required if check_required_field(item, "publisher", dict, dataset_name, errs): # publisher - name # required - check_string_field(item["publisher"], "name", 1, dataset_name, errs) + check_required_string_field(item["publisher"], "name", 1, dataset_name, errs) # Required-If-Applicable @@ -222,7 +217,7 @@ def do_validation(doc, errors_array): # distribution - mediaType # Required-If-Applicable if 'downloadURL' in dt: - if check_string_field(dt, "mediaType", 1, distribution_name, errs): + if check_required_string_field(dt, "mediaType", 1, distribution_name, errs): if not IANA_MIME_REGEX.match(dt["mediaType"]): add_error(errs, 5, "Invalid Field Value", "The distribution mediaType \"%s\" is invalid. " @@ -249,15 +244,15 @@ def do_validation(doc, errors_array): # distribution - description # optional if dt.get("description") is not None: - check_string_field(dt, "description", 1, distribution_name, errs) + check_required_string_field(dt, "description", 1, distribution_name, errs) # distribution - format # optional if dt.get("format") is not None: - check_string_field(dt, "format", 1, distribution_name, errs) + check_required_string_field(dt, "format", 1, distribution_name, errs) # distribution - title # optional if dt.get("title") is not None: - check_string_field(dt, "title", 1, distribution_name, errs) + check_required_string_field(dt, "title", 1, distribution_name, errs) # license # Required-If-Applicable check_url_field(False, item, "license", dataset_name, errs) @@ -265,7 +260,7 @@ def do_validation(doc, errors_array): # rights # Required-If-Applicable # TODO move to warnings # if item.get("accessLevel") != "public": - # check_string_field(item, "rights", 1, dataset_name, errs) + # check_string_field(item, "rights", 1, dataset_name, errs) # spatial # Required-If-Applicable # TODO: There are more requirements than it be a string. @@ -312,7 +307,7 @@ def do_validation(doc, errors_array): # isPartOf # optional if item.get("isPartOf"): - check_string_field(item, "isPartOf", 1, dataset_name, errs) + check_required_string_field(item, "isPartOf", 1, dataset_name, errs) # issued # optional if item.get("issued") is not None: @@ -415,7 +410,7 @@ def check_required_field(obj, field_name, data_type, dataset_name, errs): return True -def check_string_field(obj, field_name, min_length, dataset_name, errs): +def check_required_string_field(obj, field_name, min_length, dataset_name, errs): # checks that a required field exists, is typed as a string, and has a minimum length if not check_required_field(obj, field_name, (str, unicode), dataset_name, errs): return False @@ -431,23 +426,6 @@ def check_string_field(obj, field_name, min_length, dataset_name, errs): return True -def check_date_field(obj, field_name, dataset_name, errs): - # checks that a required date field exists and looks like a date - if not check_required_field(obj, field_name, (str, unicode), dataset_name, errs): - return False - elif len(obj[field_name].strip()) == 0: - add_error(errs, 10, "Missing Required Fields", "The '%s' field is present but empty." % field_name, - dataset_name) - return False - else: - if not ISO8601_REGEX.match(obj[field_name]): - add_error(errs, 5, "Invalid Required Field Value", - "The '%s' field has an invalid ISO 8601 date or date-time value: \"%s\"." % ( - field_name, obj[field_name]), dataset_name) - return False - return True - - def check_url_field(required, obj, field_name, dataset_name, errs): # checks that a required or optional field, if specified, looks like a URL if not required and (field_name not in obj or obj[field_name] is None): return True # not required, so OK diff --git a/ckanext/datajson/plugin.py b/ckanext/datajson/plugin.py index d750916b..7987ec81 100644 --- a/ckanext/datajson/plugin.py +++ b/ckanext/datajson/plugin.py @@ -217,15 +217,16 @@ def make_edi(owner_org): logger.addHandler(eh) # Build the data.json file. - packages = get_all_group_packages(group_id=owner_org) + packages = get_packages(owner_org) + output = [] for pkg in packages: - if pkg['owner_org'] == owner_org: - datajson_entry = make_datajson_entry(pkg) - if datajson_entry and is_valid(datajson_entry): - output.append(datajson_entry) - else: - logger.warn("Dataset id=[%s], title=[%s] omitted\n", pkg.get('id', None), pkg.get('title', None)) + #if pkg['owner_org'] == owner_org: + datajson_entry = make_datajson_entry(pkg) + if datajson_entry and is_valid(datajson_entry): + output.append(datajson_entry) + else: + logger.warn("Dataset id=[%s], title=[%s] omitted\n", pkg.get('id', None), pkg.get('title', None)) # Get the error log eh.flush() @@ -247,18 +248,15 @@ def make_pdl(owner_org): eh.setFormatter(formatter) logger.addHandler(eh) - # Build the data.json file. - packages = get_all_group_packages(group_id=owner_org) + packages = get_packages(owner_org) output = [] #Create data.json only using public datasets, datasets marked non-public are not exposed for pkg in packages: extras = dict([(x['key'], x['value']) for x in pkg['extras']]) try: - if pkg['owner_org'] == owner_org \ - and not (re.match(r'[Nn]on-public', extras['public_access_level'])): - + if not (re.match(r'[Nn]on-public', extras['public_access_level'])): datajson_entry = make_datajson_entry(pkg) if datajson_entry and is_valid(datajson_entry): output.append(datajson_entry) @@ -280,6 +278,21 @@ def make_pdl(owner_org): #return json.dumps(output) return write_zip(output, error, zip_name='pdl') +def get_packages(owner_org): + # Build the data.json file. + packages = get_all_group_packages(group_id=owner_org) + #get packages for sub-agencies. + sub_agency = model.Group.get(owner_org) + if 'sub-agencies' in sub_agency.extras.col.target and \ + sub_agency.extras.col.target['sub-agencies'].state == 'active': + sub_agencies = sub_agency.extras.col.target['sub-agencies'].value + sub_agencies_list = sub_agencies.split(",") + for sub in sub_agencies_list: + sub_packages = get_all_group_packages(group_id=sub) + for sub_package in sub_packages: + packages.append(sub_package) + + return packages def get_all_group_packages(group_id): """