From 7f7d490c3a44ac7f54326c64cbfcd7d9cc856111 Mon Sep 17 00:00:00 2001
From: Alex Perfilov <alexandr.perfilov@reisystems.com>
Date: Fri, 20 Feb 2015 16:17:49 -0500
Subject: [PATCH 01/22] Renaming plugin to JsonExport

---
 ckanext/datajson/__init__.py         |  2 +-
 ckanext/datajson/build_datajsonld.py |  8 +++----
 ckanext/datajson/plugin.py           | 34 ++++++++++++++--------------
 setup.py                             |  2 +-
 4 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/ckanext/datajson/__init__.py b/ckanext/datajson/__init__.py
index f20145e9..d5261f69 100644
--- a/ckanext/datajson/__init__.py
+++ b/ckanext/datajson/__init__.py
@@ -6,6 +6,6 @@
     import pkgutil
     __path__ = pkgutil.extend_path(__path__, __name__)
     
-from plugin import DataJsonPlugin
+from plugin import JsonExportPlugin
 from harvester_datajson import DataJsonHarvester
 from harvester_cmsdatanavigator import CmsDataNavigatorHarvester
diff --git a/ckanext/datajson/build_datajsonld.py b/ckanext/datajson/build_datajsonld.py
index 9f0c073d..fb88f6dc 100644
--- a/ckanext/datajson/build_datajsonld.py
+++ b/ckanext/datajson/build_datajsonld.py
@@ -4,10 +4,10 @@
     from sqlalchemy.util import OrderedDict
 
 def dataset_to_jsonld(dataset):
-    from plugin import DataJsonPlugin
+    from plugin import JsonExportPlugin
 	
     ret = OrderedDict([
-       ("@id", DataJsonPlugin.site_url + "/dataset/" + dataset["identifier"]),
+       ("@id", JsonExportPlugin.site_url + "/dataset/" + dataset["identifier"]),
        ("@type", "dcat:Dataset"),
     ])
     
@@ -20,9 +20,9 @@ def dataset_to_jsonld(dataset):
     return ret
         
 def distribution_to_jsonld(distribution):
-    from plugin import DataJsonPlugin
+    from plugin import JsonExportPlugin
     ret = OrderedDict([
-       ("@id", DataJsonPlugin.site_url + "/resource/" + distribution["identifier"]),
+       ("@id", JsonExportPlugin.site_url + "/resource/" + distribution["identifier"]),
        ("@type", "dcat:Distribution"),
     ])
     apply_jsonld_metadata_mapping(distribution, ret)
diff --git a/ckanext/datajson/plugin.py b/ckanext/datajson/plugin.py
index 99d5d403..c755c377 100644
--- a/ckanext/datajson/plugin.py
+++ b/ckanext/datajson/plugin.py
@@ -38,7 +38,7 @@ def get_validator():
 from build_datajsonld import dataset_to_jsonld
 
 
-class DataJsonPlugin(p.SingletonPlugin):
+class JsonExportPlugin(p.SingletonPlugin):
     p.implements(p.interfaces.IConfigurer)
     p.implements(p.interfaces.IRoutes, inherit=True)
 
@@ -48,14 +48,14 @@ def update_config(self, config):
         # to know how to set the paths.
 
         # TODO commenting out enterprise data inventory for right now
-        # DataJsonPlugin.route_edata_path = config.get("ckanext.enterprisedatajson.path", "/enterprisedata.json")
-        DataJsonPlugin.route_enabled = config.get("ckanext.datajson.url_enabled", "True") == 'True'
-        DataJsonPlugin.route_path = config.get("ckanext.datajson.path", "/data.json")
-        DataJsonPlugin.route_ld_path = config.get("ckanext.datajsonld.path",
-                                                  re.sub(r"\.json$", ".jsonld", DataJsonPlugin.route_path))
-        DataJsonPlugin.ld_id = config.get("ckanext.datajsonld.id", config.get("ckan.site_url"))
-        DataJsonPlugin.ld_title = config.get("ckan.site_title", "Catalog")
-        DataJsonPlugin.site_url = config.get("ckan.site_url")
+        # JsonExportPlugin.route_edata_path = config.get("ckanext.enterprisedatajson.path", "/enterprisedata.json")
+        JsonExportPlugin.route_enabled = config.get("ckanext.datajson.url_enabled", "True") == 'True'
+        JsonExportPlugin.route_path = config.get("ckanext.datajson.path", "/data.json")
+        JsonExportPlugin.route_ld_path = config.get("ckanext.datajsonld.path",
+                                                  re.sub(r"\.json$", ".jsonld", JsonExportPlugin.route_path))
+        JsonExportPlugin.ld_id = config.get("ckanext.datajsonld.id", config.get("ckan.site_url"))
+        JsonExportPlugin.ld_title = config.get("ckan.site_title", "Catalog")
+        JsonExportPlugin.site_url = config.get("ckan.site_url")
 
         # Adds our local templates directory. It's smart. It knows it's
         # relative to the path of *this* file. Wow.
@@ -65,13 +65,13 @@ def before_map(self, m):
         return m
 
     def after_map(self, m):
-        if DataJsonPlugin.route_enabled:
+        if JsonExportPlugin.route_enabled:
             # /data.json and /data.jsonld (or other path as configured by user)
-            m.connect('datajson', DataJsonPlugin.route_path, controller='ckanext.datajson.plugin:DataJsonController',
+            m.connect('datajson', JsonExportPlugin.route_path, controller='ckanext.datajson.plugin:DataJsonController',
                       action='generate_json')
             # TODO commenting out enterprise data inventory for right now
-            # m.connect('enterprisedatajson', DataJsonPlugin.route_edata_path, controller='ckanext.datajson.plugin:DataJsonController', action='generate_enterprise')
-            #m.connect('datajsonld', DataJsonPlugin.route_ld_path, controller='ckanext.datajson.plugin:DataJsonController', action='generate_jsonld')
+            # m.connect('enterprisedatajson', JsonExportPlugin.route_edata_path, controller='ckanext.datajson.plugin:DataJsonController', action='generate_enterprise')
+            #m.connect('datajsonld', JsonExportPlugin.route_ld_path, controller='ckanext.datajson.plugin:DataJsonController', action='generate_jsonld')
 
         # TODO DWC update action
         # /data/{org}/data.json
@@ -112,11 +112,11 @@ def generate_output(self, format):
                     ("foaf", "http://xmlns.com/foaf/0.1/"),
                 ])
                 ),
-                ("@id", DataJsonPlugin.ld_id),
+                ("@id", JsonExportPlugin.ld_id),
                 ("@type", "dcat:Catalog"),
-                ("dcterms:title", DataJsonPlugin.ld_title),
-                ("rdfs:label", DataJsonPlugin.ld_title),
-                ("foaf:homepage", DataJsonPlugin.site_url),
+                ("dcterms:title", JsonExportPlugin.ld_title),
+                ("rdfs:label", JsonExportPlugin.ld_title),
+                ("foaf:homepage", JsonExportPlugin.site_url),
                 ("dcat:dataset", [dataset_to_jsonld(d) for d in data]),
             ])
 
diff --git a/setup.py b/setup.py
index 4431576f..ae35f925 100644
--- a/setup.py
+++ b/setup.py
@@ -25,7 +25,7 @@
 	entry_points=\
 	"""
         [ckan.plugins]
-	datajson=ckanext.datajson:DataJsonPlugin
+	datajson=ckanext.datajson:JsonExportPlugin
 	datajson_harvest=ckanext.datajson:DataJsonHarvester
 	cmsdatanav_harvest=ckanext.datajson:CmsDataNavigatorHarvester
 	""",

From d4d7cbd2a62aa096e8c2f54bf12e9889b03bab9e Mon Sep 17 00:00:00 2001
From: Alex Perfilov <alexandr.perfilov@reisystems.com>
Date: Wed, 25 Feb 2015 15:34:40 -0500
Subject: [PATCH 02/22] merge schemas

---
 ckanext/datajson/plugin.py                    |   2 +-
 .../federal-v1.1/catalog.json                 |   0
 .../federal-v1.1/dataset.json                 |  10 +-
 .../pod_schema/non-federal-v1.1/catalog.json  |  58 ++
 .../non-federal-v1.1/dataset-non-federal.json | 569 ++++++++++++++++++
 .../pod_schema/non-federal/single_entry.json  | 415 +++++++++++++
 ckanext/datajson/pod_schema/single_entry.json | 416 +++++++++++++
 .../schema/1_0_final/single_entry.json        | 207 -------
 8 files changed, 1464 insertions(+), 213 deletions(-)
 rename ckanext/datajson/{schema => pod_schema}/federal-v1.1/catalog.json (100%)
 rename ckanext/datajson/{schema => pod_schema}/federal-v1.1/dataset.json (99%)
 create mode 100644 ckanext/datajson/pod_schema/non-federal-v1.1/catalog.json
 create mode 100644 ckanext/datajson/pod_schema/non-federal-v1.1/dataset-non-federal.json
 create mode 100644 ckanext/datajson/pod_schema/non-federal/single_entry.json
 create mode 100644 ckanext/datajson/pod_schema/single_entry.json
 delete mode 100644 ckanext/datajson/schema/1_0_final/single_entry.json

diff --git a/ckanext/datajson/plugin.py b/ckanext/datajson/plugin.py
index c755c377..c2747c99 100644
--- a/ckanext/datajson/plugin.py
+++ b/ckanext/datajson/plugin.py
@@ -16,7 +16,7 @@ def get_validator():
     import os
     from jsonschema import Draft4Validator, FormatChecker
 
-    schema_path = os.path.join(os.path.dirname(__file__), 'schema', 'federal-v1.1', 'dataset.json')
+    schema_path = os.path.join(os.path.dirname(__file__), 'pod_schema', 'federal-v1.1', 'dataset.json')
     with open(schema_path, 'r') as file:
         schema = json.loads(file.read())
         return Draft4Validator(schema, format_checker=FormatChecker())
diff --git a/ckanext/datajson/schema/federal-v1.1/catalog.json b/ckanext/datajson/pod_schema/federal-v1.1/catalog.json
similarity index 100%
rename from ckanext/datajson/schema/federal-v1.1/catalog.json
rename to ckanext/datajson/pod_schema/federal-v1.1/catalog.json
diff --git a/ckanext/datajson/schema/federal-v1.1/dataset.json b/ckanext/datajson/pod_schema/federal-v1.1/dataset.json
similarity index 99%
rename from ckanext/datajson/schema/federal-v1.1/dataset.json
rename to ckanext/datajson/pod_schema/federal-v1.1/dataset.json
index 8fdfce22..06fb984c 100644
--- a/ckanext/datajson/schema/federal-v1.1/dataset.json
+++ b/ckanext/datajson/pod_schema/federal-v1.1/dataset.json
@@ -365,7 +365,7 @@
     "title": {
       "title": "Title",
       "description": "Human-readable name of the asset. Should be in plain English and include sufficient detail to facilitate search and discovery.",
-      "type": "string",
+      "type": "string", 
       "minLength": 1
     }
   },
@@ -387,12 +387,12 @@
           "enum": [
             "vcard:Contact"
           ]
-        },
+        },    
         "fn": {
           "title": "Contact Name",
           "description": "A full formatted name, eg Firstname Lastname",
           "type": "string",
-          "minLength": 1
+          "minLength": 1      
         },
         "hasEmail": {
           "title": "Email",
@@ -446,7 +446,7 @@
             {
               "type": "null"
             }
-          ]
+          ]      
         },
         "format": {
           "title": "Format",
@@ -562,7 +562,7 @@
           "title": "Publisher Name",
           "description": "A full formatted name, eg Firstname Lastname",
           "type": "string",
-          "minLength": 1
+          "minLength": 1 
         },
         "subOrganizationOf": {
           "title": "Parent Organization",
diff --git a/ckanext/datajson/pod_schema/non-federal-v1.1/catalog.json b/ckanext/datajson/pod_schema/non-federal-v1.1/catalog.json
new file mode 100644
index 00000000..95fcd75c
--- /dev/null
+++ b/ckanext/datajson/pod_schema/non-federal-v1.1/catalog.json
@@ -0,0 +1,58 @@
+{
+  "$schema": "http://json-schema.org/draft-04/schema#",
+  "id": "https://project-open-data.cio.gov/v1.1/schema/catalog.json#",
+  "title": "Project Open Data Catalog",
+  "description": "Validates an entire collection of common core metadata JSON objects. Agencies produce said collections in the form of Data.json files.",
+  "type": "object",
+  "dependencies": {
+    "@type": [
+      "@context"
+    ]
+  },
+  "required": [
+    "conformsTo",
+    "dataset"
+  ],
+  "properties": {
+    "@context": {
+      "title": "Metadata Context",
+      "description": "URL or JSON object for the JSON-LD Context that defines the schema used",
+      "type": "string",
+      "format": "uri"
+    },
+    "@id": {
+      "title": "Metadata Catalog ID",
+      "description": "IRI for the JSON-LD Node Identifier of the Catalog. This should be the URL of the data.json file itself.",
+      "type": "string",
+      "format": "uri"
+    },
+    "@type": {
+      "title": "Metadata Context",
+      "description": "IRI for the JSON-LD data type. This should be dcat:Catalog for the Catalog",
+      "enum": [
+        "dcat:Catalog"
+      ]
+    },
+    "conformsTo": {
+      "description": "Version of Schema",
+      "title": "Version of Schema",
+      "enum": [
+        "https://project-open-data.cio.gov/v1.1/schema"
+      ]
+    },
+    "describedBy": {
+      "description": "URL for the JSON Schema file that defines the schema used",
+      "title": "Data Dictionary",
+      "type": "string",
+      "format": "uri"
+    },
+    "dataset": {
+      "type": "array",
+      "items": {
+        "$ref": "dataset-non-federal.json",
+        "minItems": 1,
+        "uniqueItems": true
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/ckanext/datajson/pod_schema/non-federal-v1.1/dataset-non-federal.json b/ckanext/datajson/pod_schema/non-federal-v1.1/dataset-non-federal.json
new file mode 100644
index 00000000..b0a7f846
--- /dev/null
+++ b/ckanext/datajson/pod_schema/non-federal-v1.1/dataset-non-federal.json
@@ -0,0 +1,569 @@
+{
+  "$schema": "http://json-schema.org/draft-04/schema#",
+  "id": "https://project-open-data.cio.gov/v1.1/schema/dataset-non-federal.json#",
+  "title": "Project Open Data Dataset",
+  "description": "The metadata format for all federal open data. Validates a single JSON object entry (as opposed to entire Data.json catalog).",
+  "type": "object",
+  "required": [
+    "title",
+    "description",
+    "publisher",
+    "contactPoint",
+    "identifier",
+    "accessLevel"
+  ],
+  "properties": {
+    "@type": {
+      "title": "Metadata Context",
+      "description": "IRI for the JSON-LD data type. This should be dcat:Dataset for each Dataset",
+      "enum": [
+        "dcat:Dataset"
+      ]
+    },
+    "accessLevel": {
+      "description": "The degree to which this dataset could be made publicly-available, regardless of whether it has been made available. Choices: public (Data asset is or could be made publicly available to all without restrictions), restricted public (Data asset is available under certain use restrictions), or non-public (Data asset is not available to members of the public)",
+      "title": "Public Access Level",
+      "enum": [
+        "public",
+        "restricted public",
+        "non-public"
+      ]
+    },
+    "rights": {
+      "title": "Rights",
+      "description": "This may include information regarding access or restrictions based on privacy, security, or other policies. This should also provide an explanation for the selected \"accessLevel\" including instructions for how to access a restricted file, if applicable, or explanation for why a \"non-public\" or \"restricted public\" data assetis not \"public,\" if applicable. Text, 255 characters.",
+      "anyOf": [
+        {
+          "type": "string",
+          "minLength": 1,
+          "maxLength": 255
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "accrualPeriodicity": {
+      "title": "Frequency",
+      "description": "Frequency with which dataset is published.",
+      "anyOf": [
+        {
+          "enum": [
+            "irregular"
+          ]
+        },
+        {
+          "type": "string",
+          "pattern": "^R\\/P(?:\\d+(?:\\.\\d+)?Y)?(?:\\d+(?:\\.\\d+)?M)?(?:\\d+(?:\\.\\d+)?W)?(?:\\d+(?:\\.\\d+)?D)?(?:T(?:\\d+(?:\\.\\d+)?H)?(?:\\d+(?:\\.\\d+)?M)?(?:\\d+(?:\\.\\d+)?S)?)?$"
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "bureauCode": {
+      "title": "Bureau Code",
+      "description": "Federal agencies, combined agency and bureau code from <a href=\"http://www.whitehouse.gov/sites/default/files/omb/assets/a11_current_year/app_c.pdf\">OMB Circular A-11, Appendix C</a> in the format of <code>015:010</code>.",
+      "type": "array",
+      "items": {
+        "type": "string",
+        "pattern": "[0-9]{3}:[0-9]{2}"
+      },
+      "minItems": 1,
+      "uniqueItems": true
+    },
+    "contactPoint": {
+      "$ref": "#/definitions/vcard-non-federal"
+    },
+    "describedBy": {
+      "title": "Data Dictionary",
+      "description": "URL to the data dictionary for the dataset or API. Note that documentation other than a data dictionary can be referenced using Related Documents as shown in the expanded fields.",
+      "anyOf": [
+        {
+          "type": "string",
+          "format": "uri"
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "describedByType": {
+      "title": "Data Dictionary Type",
+      "description": "The machine-readable file format (IANA Media Type or MIME Type) of the distribution’s describedBy URL",
+      "anyOf": [
+        {
+          "pattern": "^[-\\w]+/[-\\w]+(\\.[-\\w]+)*([+][-\\w]+)?$",
+          "type": "string"
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "conformsTo": {
+      "title": "Data Standard",
+      "description": "URI used to identify a standardized specification the dataset conforms to",
+      "anyOf": [
+        {
+          "type": "string",
+          "format": "uri"
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "dataQuality": {
+      "title": "Data Quality",
+      "description": "Whether the dataset meets the agency’s Information Quality Guidelines (true/false).",
+      "anyOf": [
+        {
+          "type": "boolean"
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "description": {
+      "title": "Description",
+      "description": "Human-readable description (e.g., an abstract) with sufficient detail to enable a user to quickly understand whether the asset is of interest.",
+      "type": "string",
+      "minLength": 1
+    },
+    "distribution": {
+      "title": "Distribution",
+      "description": "A container for the array of Distribution objects",
+      "anyOf": [
+        {
+          "type": "array",
+          "items": {
+            "$ref": "distribution.json",
+            "minItems": 1,
+            "uniqueItems": true
+          }
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "identifier": {
+      "title": "Unique Identifier",
+      "description": "A unique identifier for the dataset or API as maintained within an Agency catalog or database.",
+      "type": "string",
+      "minLength": 1
+    },
+    "issued": {
+      "title": "Release Date",
+      "description": "Date of formal issuance.",
+      "anyOf": [
+        {
+          "type": "string",
+          "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "keyword": {
+      "title": "Tags",
+      "description": "Tags (or keywords) help users discover your dataset; please include terms that would be used by technical and non-technical users.",
+      "type": "array",
+      "items": {
+        "type": "string",
+        "minLength": 1
+      },
+      "minItems": 1
+    },
+    "landingPage": {
+      "title": "Homepage URL",
+      "description": "Alternative landing page used to redirect user to a contextual, Agency-hosted “homepage” for the Dataset or API when selecting this resource from the Data.gov user interface.",
+      "anyOf": [
+        {
+          "type": "string",
+          "format": "uri"
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "language": {
+      "title": "Language",
+      "description": "The language of the dataset.",
+      "anyOf": [
+        {
+          "type": "array",
+          "items": {
+            "type": "string",
+            "pattern": "^(((([A-Za-z]{2,3}(-([A-Za-z]{3}(-[A-Za-z]{3}){0,2}))?)|[A-Za-z]{4}|[A-Za-z]{5,8})(-([A-Za-z]{4}))?(-([A-Za-z]{2}|[0-9]{3}))?(-([A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(-([0-9A-WY-Za-wy-z](-[A-Za-z0-9]{2,8})+))*(-(x(-[A-Za-z0-9]{1,8})+))?)|(x(-[A-Za-z0-9]{1,8})+)|((en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)|(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)))$"
+          }
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "license": {
+      "title": "License",
+      "description": "The license dataset or API is published with. See <a href=\"https://project-open-data.cio.gov/open-licenses/\">Open Licenses</a> for more information.",
+      "anyOf": [
+        {
+          "type": "string",
+          "format": "uri"
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "modified": {
+      "title": "Last Update",
+      "description": "Most recent date on which the dataset was changed, updated or modified.",
+      "anyOf": [
+        {
+          "type": "string",
+          "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^(R\\d*\\/)?P(?:\\d+(?:\\.\\d+)?Y)?(?:\\d+(?:\\.\\d+)?M)?(?:\\d+(?:\\.\\d+)?W)?(?:\\d+(?:\\.\\d+)?D)?(?:T(?:\\d+(?:\\.\\d+)?H)?(?:\\d+(?:\\.\\d+)?M)?(?:\\d+(?:\\.\\d+)?S)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^(R\\d*\\/)?([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\4([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\18[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)P(?:\\d+(?:\\.\\d+)?Y)?(?:\\d+(?:\\.\\d+)?M)?(?:\\d+(?:\\.\\d+)?W)?(?:\\d+(?:\\.\\d+)?D)?(?:T(?:\\d+(?:\\.\\d+)?H)?(?:\\d+(?:\\.\\d+)?M)?(?:\\d+(?:\\.\\d+)?S)?)?$"
+        }
+      ]
+    },
+    "primaryITInvestmentUII": {
+      "title": "Primary IT Investment UII",
+      "description": "For linking a dataset with an IT Unique Investment Identifier (UII)",
+      "anyOf": [
+        {
+          "type": "string",
+          "pattern": "[0-9]{3}-[0-9]{9}"
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "programCode": {
+      "title": "Program Code",
+      "description": "Federal agencies, list the primary program related to this data asset, from the <a href=\"http://goals.performance.gov/sites/default/files/images/FederalProgramInventory_FY13_MachineReadable_091613.xls\">Federal Program Inventory</a>. Use the format of <code>015:001</code>",
+      "type": "array",
+      "items": {
+        "type": "string",
+        "pattern": "[0-9]{3}:[0-9]{3}"
+      },
+      "minItems": 1,
+      "uniqueItems": true
+    },
+    "publisher": {
+      "$ref": "organization.json"
+    },
+    "references": {
+      "title": "Related Documents",
+      "description": "Related documents such as technical information about a dataset, developer documentation, etc.",
+      "anyOf": [
+        {
+          "type": "array",
+          "items": {
+            "type": "string",
+            "format": "uri"
+          },
+          "minItems": 1,
+          "uniqueItems": true
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "spatial": {
+      "title": "Spatial",
+      "description": "The range of spatial applicability of a dataset. Could include a spatial region like a bounding box or a named place.",
+      "anyOf": [
+        {
+          "type": "string",
+          "minLength": 1
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "systemOfRecords": {
+      "title": "System of Records",
+      "description": "If the systems is designated as a system of records under the Privacy Act of 1974, provide the URL to the System of Records Notice related to this dataset.",
+      "anyOf": [
+        {
+          "type": "string",
+          "minLength": 1
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "temporal": {
+      "title": "Temporal",
+      "description": "The range of temporal applicability of a dataset (i.e., a start and end date of applicability for the data).",
+      "anyOf": [
+        {
+          "type": "string",
+          "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^(R\\d*\\/)?([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\4([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\18[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)P(?:\\d+(?:\\.\\d+)?Y)?(?:\\d+(?:\\.\\d+)?M)?(?:\\d+(?:\\.\\d+)?W)?(?:\\d+(?:\\.\\d+)?D)?(?:T(?:\\d+(?:\\.\\d+)?H)?(?:\\d+(?:\\.\\d+)?M)?(?:\\d+(?:\\.\\d+)?S)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^(R\\d*\\/)?P(?:\\d+(?:\\.\\d+)?Y)?(?:\\d+(?:\\.\\d+)?M)?(?:\\d+(?:\\.\\d+)?W)?(?:\\d+(?:\\.\\d+)?D)?(?:T(?:\\d+(?:\\.\\d+)?H)?(?:\\d+(?:\\.\\d+)?M)?(?:\\d+(?:\\.\\d+)?S)?)?\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\4([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\18[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "isPartOf": {
+      "title": "Collection",
+      "description": "The collection of which the dataset is a subset",
+      "anyOf": [
+        {
+          "type": "string",
+          "minLength": 1
+        }
+      ]
+    },
+    "theme": {
+      "title": "Category",
+      "description": "Main thematic category of the dataset.",
+      "anyOf": [
+        {
+          "type": "array",
+          "items": {
+            "type": "string",
+            "minLength": 1
+          },
+          "minItems": 1,
+          "uniqueItems": true
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "title": {
+      "title": "Title",
+      "description": "Human-readable name of the asset. Should be in plain English and include sufficient detail to facilitate search and discovery.",
+      "type": "string", 
+      "minLength": 1
+    }
+  },
+  "definitions": {
+    "vcard-non-federal": {
+      "$schema": "http://json-schema.org/draft-04/schema#",
+      "id": "https://project-open-data.cio.gov/v1.1/schema/vcard-non-federal.json#",
+      "title": "Project Open Data ContactPoint vCard",
+      "description": "A Dataset ContactPoint as a vCard object",
+      "type": "object",
+      "required": [
+        "fn"
+      ],
+      "properties": {
+        "@type": {
+          "title": "Metadata Context",
+          "description": "IRI for the JSON-LD data type. This should be vcard:Contact for contactPoint",
+          "enum": [
+            "vcard:Contact"
+          ]
+        },    
+        "fn": {
+          "title": "Contact Name",
+          "description": "A full formatted name, eg Firstname Lastname",
+          "type": "string",
+          "minLength": 1      
+        },
+        "hasEmail": {
+          "title": "Email",
+          "description": "Email address for the contact",
+          "pattern": "^mailto:([\\w.-]+@[\\w.-]+\\.[\\w.-]+)?$",
+          "type": "string"
+        }
+      }
+    },
+    "distribution": {
+      "$schema": "http://json-schema.org/draft-04/schema#",
+      "id": "https://project-open-data.cio.gov/v1.1/schema/distribution.json#",
+      "title": "Project Open Data Distribution",
+      "description": "Validates an entire collection of common core metadata JSON objects. Agencies produce said collections in the form of Data.json files.",
+      "type": "object",
+      "dependencies": {
+        "downloadURL": {
+          "properties": {
+            "mediaType": {
+              "type": "string",
+              "pattern": "^[-\\w]+/[-\\w]+(\\.[-\\w]+)*([+][-\\w]+)?$"
+            }
+          },
+          "required": [
+            "mediaType"
+          ]
+        }
+      },
+      "properties": {
+        "@type": {
+          "title": "Metadata Context",
+          "description": "IRI for the JSON-LD data type. This should be dcat:Distribution for each Distribution",
+          "enum": [
+            "dcat:Distribution"
+          ]
+        },
+        "downloadURL": {
+          "title": "Download URL",
+          "description": "URL providing direct access to a downloadable file of a dataset",
+          "type": "string",
+          "format": "uri"
+        },
+        "mediaType": {
+          "title": "Media Type",
+          "description": "The machine-readable file format (IANA Media Type or MIME Type) of the distribution’s downloadURL",
+          "anyOf": [
+            {
+              "pattern": "^[-\\w]+/[-\\w]+(\\.[-\\w]+)*([+][-\\w]+)?$",
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ]      
+        },
+        "format": {
+          "title": "Format",
+          "description": "A human-readable description of the file format of a distribution",
+          "anyOf": [
+            {
+              "type": "string",
+              "minLength": 1
+            },
+            {
+              "type": "null"
+            }
+          ]
+        },
+        "accessURL": {
+          "title": "Access URL",
+          "description": "URL providing indirect access to a dataset",
+          "anyOf": [
+            {
+              "type": "string",
+              "format": "uri"
+            },
+            {
+              "type": "null"
+            }
+          ]
+        },
+        "description": {
+          "title": "Description",
+          "description": "Human-readable description of the distribution",
+          "anyOf": [
+            {
+              "type": "string",
+              "minLength": 1
+            },
+            {
+              "type": "null"
+            }
+          ]
+        },
+        "title": {
+          "title": "Title",
+          "description": "Human-readable name of the distribution",
+          "anyOf": [
+            {
+              "type": "string",
+              "minLength": 1
+            },
+            {
+              "type": "null"
+            }
+          ]
+        },
+        "conformsTo": {
+          "title": "Data Standard",
+          "description": "URL providing indirect access to a dataset",
+          "anyOf": [
+            {
+              "type": "string",
+              "format": "uri"
+            },
+            {
+              "type": "null"
+            }
+          ]
+        },
+        "describedBy": {
+          "title": "Data Dictionary",
+          "description": "URL to the data dictionary for the distribution found at the downloadURL",
+          "anyOf": [
+            {
+              "type": "string",
+              "format": "uri"
+            },
+            {
+              "type": "null"
+            }
+          ]
+        },
+        "describedByType": {
+          "title": "Data Dictionary Type",
+          "description": "The machine-readable file format (IANA Media Type or MIME Type) of the distribution’s describedBy URL",
+          "anyOf": [
+            {
+              "pattern": "^[-\\w]+/[-\\w]+(\\.[-\\w]+)*([+][-\\w]+)?$",
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ]
+        }
+      }
+    },
+    "organization": {
+      "$schema": "http://json-schema.org/draft-04/schema#",
+      "id": "https://project-open-data.cio.gov/v1.1/schema/organization.json#",
+      "title": "Project Open Data Organization",
+      "description": "A Dataset Publisher Organization as a foaf:Agent object",
+      "type": "object",
+      "required": [
+        "name"
+      ],
+      "properties": {
+        "@type": {
+          "title": "Metadata Context",
+          "description": "IRI for the JSON-LD data type. This should be org:Organization for each publisher",
+          "enum": [
+            "org:Organization"
+          ]
+        },
+        "name": {
+          "title": "Publisher Name",
+          "description": "A full formatted name, eg Firstname Lastname",
+          "type": "string",
+          "minLength": 1 
+        },
+        "subOrganizationOf": {
+          "title": "Parent Organization",
+          "$ref": "organization.json"
+        }
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/ckanext/datajson/pod_schema/non-federal/single_entry.json b/ckanext/datajson/pod_schema/non-federal/single_entry.json
new file mode 100644
index 00000000..4ab4b311
--- /dev/null
+++ b/ckanext/datajson/pod_schema/non-federal/single_entry.json
@@ -0,0 +1,415 @@
+{
+    "$schema": "http://json-schema.org/draft-04/schema#",
+    "id": "http://project-open-data.github.io/schema/1_0_final/single_entry.json#",
+    "title": "Common Core Metadata Schema",
+    "description": "The metadata format for all federal open data. Validates a single JSON object entry (as opposed to entire Data.json catalog).",
+    "type": "object",
+    "required": ["title", "description", "license", "publisher", "contactPoint", "identifier", "accessLevel"],
+    "properties": {
+        "accessLevel": {
+            "description":"The degree to which this dataset could be made publicly-available, regardless of whether it has been made available. Choices: public (Data asset is or could be made publicly available to all without restrictions), restricted public (Data asset is available under certain use restrictions), or non-public (Data asset is not available to members of the public)",
+            "title": "Public Access Level",
+            "enum": ["public", "restricted public", "non-public"]
+        },
+        "accessLevelComment": {
+            "title":"Access Level Comment",
+            "description":"An explanation for the selected \"accessLevel\" including instructions for how to access a restricted file, if applicable, or explanation for why a \"non-public\" or \"restricted public\" data assetis not \"public,\" if applicable. Text, 255 characters.",
+            "anyOf": [
+                {
+                   "type": "string",
+                   "minLength": 1,
+                   "maxLength":255
+                },
+                {
+                    "type": "null"                    
+                }
+              ]            
+        },       
+        "accrualPeriodicity": {
+            "title":"Frequency",
+            "description":"Frequency with which dataset is published.",
+            "anyOf": [
+                {
+                    "enum": ["Annual", "Bimonthly", "Semiweekly", "Daily", "Biweekly", "Semiannual", "Biennial", "Triennial",
+                        "Three times a week", "Three times a month", "Continuously updated", "Monthly", "Quarterly", "Semimonthly",
+                        "Three times a year", "Weekly", "Completely irregular"]
+                },
+                {
+                    "type": "null"                    
+                }
+              ]            
+        },
+        "bureauCode": {
+            "title":"Bureau Code",
+            "description":"Federal agencies, combined agency and bureau code from <a href=\"http://www.whitehouse.gov/sites/default/files/omb/assets/a11_current_year/app_c.pdf\">OMB Circular A-11, Appendix C</a> in the format of <code>015:010</code>.",
+            "anyOf": [
+                {
+                   "type": "array",
+                   "items": {
+                        "type": "string",
+                        "pattern": "[0-9]{3}:[0-9]{2}"
+                    }, 
+                   "minItems": 1,
+                   "uniqueItems": true
+                },
+                {
+                    "type": "null"                    
+                }
+              ]
+        },
+        "contactPoint": {
+            "title":"Contact Name",
+            "description":"Contact person’s name for the asset.",
+            "type": "string"
+        },
+        "dataDictionary": {
+            "title":"Data Dictionary",
+            "description":"URL to the data dictionary for the dataset or API. Note that documentation other than a data dictionary can be referenced using Related Documents as shown in the expanded fields.",
+            "anyOf": [
+                {
+                   "type": "string",
+                   "format": "uri"
+                },
+                {
+                    "type": "null"                    
+                }
+              ] 
+        },
+        "dataQuality": {
+            "title":"Data Quality",
+            "description":"Whether the dataset meets the agency’s Information Quality Guidelines (true/false).",
+            "anyOf": [
+                {
+                   "type": "boolean"
+                },
+                {
+                    "type": "null"                    
+                }
+              ]             
+        },
+        "description": {
+            "title" : "Description",
+            "description": "Human-readable description (e.g., an abstract) with sufficient detail to enable a user to quickly understand whether the asset is of interest.",
+            "type": "string"
+        },
+        "distribution": {
+            "title":"Distribution",
+            "description":"Holds multiple download URLs for datasets composed of multiple files and/or file types",
+            "anyOf": [
+                {
+                    "type": "array",                    
+                    "items": {
+                        "type": "object",
+                        "required": ["accessURL", "format"],
+                        "properties": {
+                            "accessURL": {
+                                "title":"Download URL",
+                                "description":"URL providing direct access to the downloadable distribution of a dataset.",
+                                "type": "string",
+                                "format": "uri"
+                            },
+                            "format": {
+                                "title":"Format",
+                                "description":"The file format or API type of the distribution.",
+                                "pattern": "^[-\\w]+/[-\\w]+(\\.[-\\w]+)*([+][-\\w]+)?$",
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "minItems": 1,
+                    "uniqueItems": true
+                },
+                {
+                    "type": "null"                    
+                }
+              ]       
+        },         
+        "identifier": {
+            "title":"Unique Identifier",
+            "description":"A unique identifier for the dataset or API as maintained within an Agency catalog or database.",
+            "type": "string",
+            "pattern": "[\\w]+"
+        },
+        "issued": {
+            "title":"Release Date",
+            "description":"Date of formal issuance.",
+            "anyOf": [
+                {
+                   "type": "string",
+                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^R\\d*\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?\\/P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+                },
+                {
+                    "type": "null"                    
+                }
+              ]            
+        },
+        "keyword": {
+            "title": "Tags",
+            "description": "Tags (or keywords) help users discover your dataset; please include terms that would be used by technical and non-technical users.",
+            "anyOf": [
+                {
+                   "type": "array",
+                   "items": {
+                       "type": "string", 
+                       "minLength": 1
+                   },
+                   "minItems": 1
+                },
+                {
+                    "type": "null"                    
+                }
+              ]             
+            
+        },
+        "landingPage": {
+            "title":"Homepage URL",
+            "description":"Alternative landing page used to redirect user to a contextual, Agency-hosted “homepage” for the Dataset or API when selecting this resource from the Data.gov user interface.",
+            "anyOf": [
+                {
+                   "type": "string",
+                   "format": "uri"
+                },
+                {
+                    "type": "null"                    
+                }
+              ]            
+        },
+        "language": {
+            "title":"Language",
+            "description":"The language of the dataset.",
+            "anyOf": [
+                {
+                   "type": "array",
+                   "items": {
+                       "type": "string",
+                       "pattern": "^(((([A-Za-z]{2,3}(-([A-Za-z]{3}(-[A-Za-z]{3}){0,2}))?)|[A-Za-z]{4}|[A-Za-z]{5,8})(-([A-Za-z]{4}))?(-([A-Za-z]{2}|[0-9]{3}))?(-([A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(-([0-9A-WY-Za-wy-z](-[A-Za-z0-9]{2,8})+))*(-(x(-[A-Za-z0-9]{1,8})+))?)|(x(-[A-Za-z0-9]{1,8})+)|((en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)|(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)))$"
+                   }
+                },
+                {
+                    "type": "null"                    
+                }
+              ]           
+        },
+        "license": {
+            "title":"License",
+            "description":"The license dataset or API is published with. See <a href=\"http://project-open-data.github.io/open-licenses/\">Open Licenses</a> for more information.",
+            "type": "string",
+            "minLength": 1
+        },
+        "mbox": {
+            "title":"Contact Email",
+            "description":"Contact person’s email address.",
+            "anyOf": [
+                {
+                   "type": "string",
+                   "format": "email"
+                },
+                {
+                    "type": "null"                    
+                },
+                {
+                   "type": "string"
+                }
+              ]            
+        },
+        "modified": {
+            "title": "Last Update",
+            "description": "Most recent date on which the dataset was changed, updated or modified.",
+            "anyOf": [
+                {
+                   "type": "string",
+                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^R\\d*\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?\\/P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+                }
+              ]
+        },
+        "PrimaryITInvestmentUII": {
+            "title":"Primary IT Investment UII",
+            "description":"For linking a dataset with an IT Unique Investment Identifier (UII)",
+            "anyOf": [
+                {
+                   "type": "string",
+                   "pattern": "[0-9]{3}-[0-9]{9}"
+                },
+                {
+                    "type": "null"                    
+                }
+              ]            
+        },
+        "programCode": {
+            "title":"Program Code",
+            "description":"Federal agencies, list the primary program related to this data asset, from the <a href=\"http://goals.performance.gov/sites/default/files/images/FederalProgramInventory_FY13_MachineReadable_091613.xls\">Federal Program Inventory</a>. Use the format of <code>015:001</code>",
+            "anyOf": [
+                {
+                   "type": "array",
+                   "items": {
+                       "type": "string",
+                       "pattern": "[0-9]{3}:[0-9]{3}"
+                   },
+                   "minItems": 1,
+                   "uniqueItems": true
+                },
+                {
+                    "type": "null"                    
+                }
+              ]            
+        },
+        "publisher": {
+            "title":"Publisher",
+            "description": "The publishing entity.",
+            "type": "string"
+        },
+        "references": {
+            "title":"Related Documents",
+            "description":"Related documents such as technical information about a dataset, developer documentation, etc.",
+            "anyOf": [
+                {
+                   "type": "array",
+                   "items": {
+                       "type": "string",
+                       "format": "uri"
+                   },
+                   "minItems": 1,
+                   "uniqueItems": true
+                },
+                {
+                    "type": "null"                    
+                }
+              ]             
+        },
+        "spatial": {
+            "title":"Spatial",
+            "description":"The range of spatial applicability of a dataset. Could include a spatial region like a bounding box or a named place.",
+            "anyOf": [
+                {
+                   "type": "string",
+                   "minLength": 1
+                },
+                {
+                    "type": "null"                    
+                }
+              ]
+        },
+        "systemOfRecords": {
+            "title":"System of Records",
+            "description":"If the systems is designated as a system of records under the Privacy Act of 1974, provide the URL to the System of Records Notice related to this dataset.",
+            "anyOf": [
+                {
+                   "type": "string",
+                   "minLength": 1
+                },
+                {
+                    "type": "null"                    
+                }
+              ]            
+        },
+        "temporal": {
+            "title":"Temporal",
+            "description":"The range of temporal applicability of a dataset (i.e., a start and end date of applicability for the data).",
+            "anyOf": [
+                {
+                   "type": "string",
+                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^R\\d*\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?\\/P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+                },
+                {
+                    "type": "null"                    
+                }
+              ]            
+        },
+        "theme": {
+            "title":"Category",
+            "description":"Main thematic category of the dataset.",
+            "anyOf": [
+                {
+                   "type": "array",
+                   "items": {
+                       "type": "string",
+                       "minLength": 1
+                   },
+                   "minItems": 1,
+                   "uniqueItems": true
+                },
+                {
+                    "type": "null"                    
+                }
+              ]            
+        },
+        "title": {
+            "title": "Title",
+            "description": "Human-readable name of the asset. Should be in plain English and include sufficient detail to facilitate search and discovery.",
+            "type": "string"
+        },
+        "webService": {
+            "title":"Endpoint",
+            "description":"Endpoint of web service to access dataset.",
+            "anyOf": [
+                {
+                   "type": "string",
+                   "format": "uri"
+                },
+                {
+                    "type": "null"                    
+                }
+              ]
+        }
+    }
+} 
diff --git a/ckanext/datajson/pod_schema/single_entry.json b/ckanext/datajson/pod_schema/single_entry.json
new file mode 100644
index 00000000..52dcda77
--- /dev/null
+++ b/ckanext/datajson/pod_schema/single_entry.json
@@ -0,0 +1,416 @@
+{
+    "$schema": "http://json-schema.org/draft-04/schema#",
+    "id": "http://project-open-data.github.io/schema/1_0_final/single_entry.json#",
+    "title": "Common Core Metadata Schema",
+    "description": "The metadata format for all federal open data. Validates a single JSON object entry (as opposed to entire Data.json catalog).",
+    "type": "object",
+    "required": ["bureaucode", "programcode", "title", "description", "keyword", "modified", "publisher", "contactpoint", "mbox", "identifier", "accesslevel"],
+    "properties": {
+        "accesslevel": {
+            "description":"The degree to which this dataset could be made publicly-available, regardless of whether it has been made available. Choices: public (Data asset is or could be made publicly available to all without restrictions), restricted public (Data asset is available under certain use restrictions), or non-public (Data asset is not available to members of the public)",
+            "title": "Public Access Level",
+            "enum": ["public", "restricted public", "non-public"]
+        },
+        "accesslevelcomment": {
+            "title":"Access Level Comment",
+            "description":"An explanation for the selected \"accesslevel\" including instructions for how to access a restricted file, if applicable, or explanation for why a \"non-public\" or \"restricted public\" data assetis not \"public,\" if applicable. Text, 255 characters.",
+            "anyOf": [
+                {
+                   "type": "string",
+                   "minLength": 1,
+                   "maxLength":255
+                },
+                {
+                    "type": "null"                    
+                }
+              ]            
+        },
+        "accessurl": {
+            "title":"Download URL",
+            "description":"URL providing direct access to the downloadable distribution of a dataset.",
+            "anyOf": [
+                {
+                    "type": "string",
+                    "format": "uri"
+                },
+                {
+                    "type": "null"                    
+                }
+              ]              
+        },       
+        "accrualperiodicity": {
+            "title":"Frequency",
+            "description":"Frequency with which dataset is published.",
+            "anyOf": [
+                {
+                    "enum": ["Annual", "Bimonthly", "Semiweekly", "Daily", "Biweekly", "Semiannual", "Biennial", "Triennial",
+                        "Three times a week", "Three times a month", "Continuously updated", "Monthly", "Quarterly", "Semimonthly",
+                        "Three times a year", "Weekly", "Completely irregular"]
+                },
+                {
+                    "type": "null"                    
+                }
+              ]            
+        },
+        "bureaucode": {
+            "title":"Bureau Code",
+            "description":"Federal agencies, combined agency and bureau code from <a href=\"http://www.whitehouse.gov/sites/default/files/omb/assets/a11_current_year/app_c.pdf\">OMB Circular A-11, Appendix C</a> in the format of <code>015:010</code>.",
+            "type": "array",
+            "items": {
+                 "type": "string",
+                 "pattern": "[0-9]{3}:[0-9]{2}"
+             }, 
+            "minItems": 1,
+            "uniqueItems": true
+        },
+        "contactpoint": {
+            "title":"Contact Name",
+            "description":"Contact person’s name for the asset.",
+            "type": "string"
+        },
+        "datadictionary": {
+            "title":"Data Dictionary",
+            "description":"URL to the data dictionary for the dataset or API. Note that documentation other than a data dictionary can be referenced using Related Documents as shown in the expanded fields.",
+            "anyOf": [
+                {
+                   "type": "string",
+                   "format": "uri"
+                },
+                {
+                    "type": "null"                    
+                }
+              ] 
+        },
+        "dataquality": {
+            "title":"Data Quality",
+            "description":"Whether the dataset meets the agency’s Information Quality Guidelines (true/false).",
+            "anyOf": [
+                {
+                   "type": "boolean"
+                },
+                {
+                    "type": "null"                    
+                }
+              ]             
+        },
+        "description": {
+            "title" : "Description",
+            "description": "Human-readable description (e.g., an abstract) with sufficient detail to enable a user to quickly understand whether the asset is of interest.",
+            "type": "string"
+        },
+        "distribution": {
+            "title":"Distribution",
+            "description":"Holds multiple download URLs for datasets composed of multiple files and/or file types",
+            "anyOf": [
+                {
+                    "type": "array",                    
+                    "items": {
+                        "type": "object",
+                        "required": ["accessurl", "format"],
+                        "properties": {
+                            "accessurl": {
+                                "title":"Download URL",
+                                "description":"URL providing direct access to the downloadable distribution of a dataset.",
+                                "type": "string",
+                                "format": "uri"
+                            },
+                            "format": {
+                                "title":"Format",
+                                "description":"The file format or API type of the distribution.",
+                                "pattern": "^[-\\w]+/[-\\w]+(\\.[-\\w]+)*([+][-\\w]+)?$",
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "minItems": 1,
+                    "uniqueItems": true
+                },
+                {
+                    "type": "null"                    
+                }
+              ]       
+        },
+        "format": {
+            "title":"Format",
+            "description":"The file format or API type of the distribution.",            
+            "anyOf": [
+                {
+                   "type": "string",
+                   "pattern": "^[-\\w]+/[-\\w]+(\\.[-\\w]+)*([+][-\\w]+)?$"
+                },
+                {
+                    "type": "null"                    
+                }
+              ]            
+        },         
+        "identifier": {
+            "title":"Unique Identifier",
+            "description":"A unique identifier for the dataset or API as maintained within an Agency catalog or database.",
+            "type": "string",
+            "pattern": "[\\w]+"
+        },
+        "issued": {
+            "title":"Release Date",
+            "description":"Date of formal issuance.",
+            "anyOf": [
+                {
+                   "type": "string",
+                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^R\\d*\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?\\/P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+                },
+                {
+                    "type": "null"                    
+                }
+              ]            
+        },
+        "keyword": {
+            "title": "Tags",
+            "description": "Tags (or keywords) help users discover your dataset; please include terms that would be used by technical and non-technical users.",
+            "type": "array",
+             "items": {
+                 "type": "string", 
+                 "minLength": 1
+             },
+             "minItems": 1
+        },
+        "landingpage": {
+            "title":"Homepage URL",
+            "description":"Alternative landing page used to redirect user to a contextual, Agency-hosted “homepage” for the Dataset or API when selecting this resource from the Data.gov user interface.",
+            "anyOf": [
+                {
+                   "type": "string",
+                   "format": "uri"
+                },
+                {
+                    "type": "null"                    
+                }
+              ]            
+        },
+        "language": {
+            "title":"Language",
+            "description":"The language of the dataset.",
+            "anyOf": [
+                {
+                   "type": "array",
+                   "items": {
+                       "type": "string",
+                       "pattern": "^(((([A-Za-z]{2,3}(-([A-Za-z]{3}(-[A-Za-z]{3}){0,2}))?)|[A-Za-z]{4}|[A-Za-z]{5,8})(-([A-Za-z]{4}))?(-([A-Za-z]{2}|[0-9]{3}))?(-([A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(-([0-9A-WY-Za-wy-z](-[A-Za-z0-9]{2,8})+))*(-(x(-[A-Za-z0-9]{1,8})+))?)|(x(-[A-Za-z0-9]{1,8})+)|((en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)|(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)))$"
+                   }
+                },
+                {
+                    "type": "null"                    
+                }
+              ]           
+        },
+        "license": {
+            "title":"License",
+            "description":"The license dataset or API is published with. See <a href=\"http://project-open-data.github.io/open-licenses/\">Open Licenses</a> for more information.",
+            "anyOf": [
+                {
+                   "type": "string",
+                   "minLength": 1
+                },
+                {
+                    "type": "null"                    
+                }
+              ]
+        },
+        "mbox": {
+            "title":"Contact Email",
+            "description":"Contact person’s email address.",
+            "type": "string",
+            "format": "email"
+        },
+        "modified": {
+            "title": "Last Update",
+            "description": "Most recent date on which the dataset was changed, updated or modified.",
+            "anyOf": [
+                {
+                   "type": "string",
+                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^R\\d*\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?\\/P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+                }
+              ]
+        },
+        "primaryitinvestmentuii": {
+            "title":"Primary IT Investment UII",
+            "description":"For linking a dataset with an IT Unique Investment Identifier (UII)",
+            "anyOf": [
+                {
+                   "type": "string",
+                   "pattern": "[0-9]{3}-[0-9]{9}"
+                },
+                {
+                    "type": "null"                    
+                }
+              ]            
+        },
+        "programcode": {
+            "title":"Program Code",
+            "description":"Federal agencies, list the primary program related to this data asset, from the <a href=\"http://goals.performance.gov/sites/default/files/images/FederalProgramInventory_FY13_MachineReadable_091613.xls\">Federal Program Inventory</a>. Use the format of <code>015:001</code>",
+            "type": "array",
+            "items": {
+                "type": "string",
+                "pattern": "[0-9]{3}:[0-9]{3}"
+            },
+            "minItems": 1,
+            "uniqueItems": true            
+        },
+        "publisher": {
+            "title":"Publisher",
+            "description": "The publishing entity.",
+            "type": "string"
+        },
+        "references": {
+            "title":"Related Documents",
+            "description":"Related documents such as technical information about a dataset, developer documentation, etc.",
+            "anyOf": [
+                {
+                   "type": "array",
+                   "items": {
+                       "type": "string",
+                       "format": "uri"
+                   },
+                   "minItems": 1,
+                   "uniqueItems": true
+                },
+                {
+                    "type": "null"                    
+                }
+              ]             
+        },
+        "spatial": {
+            "title":"Spatial",
+            "description":"The range of spatial applicability of a dataset. Could include a spatial region like a bounding box or a named place.",
+            "anyOf": [
+                {
+                   "type": "string",
+                   "minLength": 1
+                },
+                {
+                    "type": "null"                    
+                }
+              ]
+        },
+        "systemofrecords": {
+            "title":"System of Records",
+            "description":"If the systems is designated as a system of records under the Privacy Act of 1974, provide the URL to the System of Records Notice related to this dataset.",
+            "anyOf": [
+                {
+                   "type": "string",
+                   "minLength": 1
+                },
+                {
+                    "type": "null"                    
+                }
+              ]            
+        },
+        "temporal": {
+            "title":"Temporal",
+            "description":"The range of temporal applicability of a dataset (i.e., a start and end date of applicability for the data).",
+            "anyOf": [
+                {
+                   "type": "string",
+                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+                },
+                {
+                   "type": "string",
+                   "pattern": "^R\\d*\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?\\/P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+                },
+                {
+                    "type": "null"                    
+                }
+              ]            
+        },
+        "theme": {
+            "title":"Category",
+            "description":"Main thematic category of the dataset.",
+            "anyOf": [
+                {
+                   "type": "array",
+                   "items": {
+                       "type": "string",
+                       "minLength": 1
+                   },
+                   "minItems": 1,
+                   "uniqueItems": true
+                },
+                {
+                    "type": "null"                    
+                }
+              ]            
+        },
+        "title": {
+            "title": "Title",
+            "description": "Human-readable name of the asset. Should be in plain English and include sufficient detail to facilitate search and discovery.",
+            "type": "string"
+        },
+        "webservice": {
+            "title":"Endpoint",
+            "description":"Endpoint of web service to access dataset.",
+            "anyOf": [
+                {
+                   "type": "string",
+                   "format": "uri"
+                },
+                {
+                    "type": "null"                    
+                }
+              ]
+        }
+    }
+} 
diff --git a/ckanext/datajson/schema/1_0_final/single_entry.json b/ckanext/datajson/schema/1_0_final/single_entry.json
deleted file mode 100644
index 4567f43c..00000000
--- a/ckanext/datajson/schema/1_0_final/single_entry.json
+++ /dev/null
@@ -1,207 +0,0 @@
-{
-    "$schema": "http://json-schema.org/draft-04/schema#",
-    "id": "http://project-open-data.github.io/schema/1_0_final/single_entry.json#",
-    "title": "Common Core Metadata Schema",
-    "description": "The metadata format for all federal open data. Validates a single JSON object entry (as opposed to entire Data.json catalog).",
-    "type": "object",
-    "required": ["title", "description", "keyword", "modified", "publisher", "contactPoint", "mbox", "identifier", "accessLevel"],
-    "properties": {
-        "accessLevel": {
-            "description":"The degree to which this dataset could be made publicly-available, regardless of whether it has been made available. Choices: public (Data asset is or could be made publicly available to all without restrictions), restricted public (Data asset is available under certain use restrictions), or non-public (Data asset is not available to members of the public)",
-            "title": "Public Access Level",
-            "enum": ["public", "restricted public", "non-public"]
-        },
-        "accessLevelComment": {
-            "title":"Access Level Comment",
-            "description":"An explanation for the selected \"accessLevel\" including instructions for how to access a restricted file, if applicable, or explanation for why a \"non-public\" or \"restricted public\" data assetis not \"public,\" if applicable. Text, 255 characters.",
-            "type": "string",
-            "maxLength":255
-        },
-        "accrualPeriodicity": {
-            "title":"Frequency",
-            "description":"Frequency with which dataset is published.",
-            "enum": ["Annual", "Bimonthly", "Semiweekly", "Daily", "Biweekly", "Semiannual", "Biennial", "Triennial",
-                "Three times a week", "Three times a month", "Continuously updated", "Monthly", "Quarterly", "Semimonthly",
-                "Three times a year", "Weekly", "Completely irregular"]
-        },
-        "bureauCode": {
-            "title":"Bureau Code",
-            "description":"Federal agencies, combined agency and bureau code from <a href=\"http://www.whitehouse.gov/sites/default/files/omb/assets/a11_current_year/app_c.pdf\">OMB Circular A-11, Appendix C</a> in the format of <code>015:010</code>.",
-            "type": "array",
-            "items": {
-                "type": "string",
-                "pattern": "[0-9]{3}:[0-9]{2}"
-            },
-            "minItems": 1,
-            "uniqueItems": true
-        },
-        "contactPoint": {
-            "title":"Contact Name",
-            "description":"Contact personâ€™s name for the asset.",
-            "type": "string"
-        },
-        "dataDictionary": {
-            "title":"Data Dictionary",
-            "description":"URL to the data dictionary for the dataset or API. Note that documentation other than a data dictionary can be referenced using Related Documents as shown in the expanded fields.",
-            "type": "string",
-            "format": "uri"
-        },
-        "dataQuality": {
-            "title":"Data Quality",
-            "description":"Whether the dataset meets the agencyâ€™s Information Quality Guidelines (true/false).",
-            "type": "boolean"
-        },
-        "description": {
-            "title" : "Description",
-            "description": "Human-readable description (e.g., an abstract) with sufficient detail to enable a user to quickly understand whether the asset is of interest.",
-            "type": "string"
-        },
-        "distribution": {
-            "title":"Distribution",
-            "description":"Holds multiple download URLs for datasets composed of multiple files and/or file types",
-            "type": "array",
-            "items": {
-                "type": "object",
-                "properties": {
-                    "accessURL": {
-                        "title":"Download URL",
-                        "description":"URL providing direct access to the downloadable distribution of a dataset.",
-                        "type": "string",
-                        "format": "uri"
-                    },
-                    "format": {
-                        "title":"Format",
-                        "description":"The file format or API type of the distribution.",
-                        "pattern": "^[-\\w]+/[-\\w]+(\\.[-\\w]+)*([+][-\\w]+)?$",
-                        "type": "string"
-                    }
-                }
-            },
-            "minItems": 1,
-            "uniqueItems": true
-        },
-        "identifier": {
-            "title":"Unique Identifier",
-            "description":"A unique identifier for the dataset or API as maintained within an Agency catalog or database.",
-            "type": "string",
-            "pattern": "[\\w]+"
-        },
-        "issued": {
-            "title":"Release Date",
-            "description":"Date of formal issuance.",
-            "type": "string",
-            "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
-        },
-        "keyword": {
-            "title": "Tags",
-            "description": "Tags (or keywords) help users discover your dataset; please include terms that would be used by technical and non-technical users.",
-            "type": "array",
-            "items": {
-                "type": "string"
-            },
-            "minItems": 1,
-            "uniqueItems": true
-        },
-        "landingPage": {
-            "title":"Homepage URL",
-            "description":"Alternative landing page used to redirect user to a contextual, Agency-hosted â€œhomepageâ€ for the Dataset or API when selecting this resource from the Data.gov user interface.",
-            "type": "string",
-            "format": "uri"
-        },
-        "language": {
-            "title":"Language",
-            "description":"The language of the dataset.",
-            "type": "array",
-            "items": {
-                "type": "string",
-                "pattern": "^(((([A-Za-z]{2,3}(-([A-Za-z]{3}(-[A-Za-z]{3}){0,2}))?)|[A-Za-z]{4}|[A-Za-z]{5,8})(-([A-Za-z]{4}))?(-([A-Za-z]{2}|[0-9]{3}))?(-([A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(-([0-9A-WY-Za-wy-z](-[A-Za-z0-9]{2,8})+))*(-(x(-[A-Za-z0-9]{1,8})+))?)|(x(-[A-Za-z0-9]{1,8})+)|((en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)|(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)))$"
-            }
-        },
-        "license": {
-            "title":"License",
-            "description":"The license dataset or API is published with. See <a href=\"http://project-open-data.github.io/open-licenses/\">Open Licenses</a> for more information.",
-            "type": "string"
-        },
-        "mbox": {
-            "title":"Contact Email",
-            "description":"Contact personâ€™s email address.",
-            "type": "string",
-            "format": "email"
-        },
-        "modified": {
-            "title": "Last Update",
-            "description": "Most recent date on which the dataset was changed, updated or modified.",
-            "type": "string",
-            "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
-        },
-        "PrimaryITInvestmentUII": {
-            "title":"Primary IT Investment UII",
-            "description":"For linking a dataset with an IT Unique Investment Identifier (UII)",
-            "type": "string"
-        },
-        "programCode": {
-            "title":"Program Code",
-            "description":"Federal agencies, list the primary program related to this data asset, from the <a href=\"http://goals.performance.gov/sites/default/files/images/FederalProgramInventory_FY13_MachineReadable_091613.xls\">Federal Program Inventory</a>. Use the format of <code>015:001</code>",
-            "type": "array",
-            "items": {
-                "type": "string",
-                "pattern": "[0-9]{3}:[0-9]{3}"
-            },
-            "minItems": 1,
-            "uniqueItems": true
-        },
-        "publisher": {
-            "title":"Publisher",
-            "description": "The publishing entity.",
-            "type": "string"
-        },
-        "references": {
-            "title":"Related Documents",
-            "description":"Related documents such as technical information about a dataset, developer documentation, etc.",
-            "type": "array",
-            "items": {
-                "type": "string",
-                "format": "uri"
-            },
-            "minItems": 1,
-            "uniqueItems": true
-        },
-        "spatial": {
-            "title":"Spatial",
-            "description":"The range of spatial applicability of a dataset. Could include a spatial region like a bounding box or a named place.",
-            "type": "string"
-        },
-        "systemOfRecords": {
-            "title":"System of Records",
-            "description":"If the systems is designated as a system of records under the Privacy Act of 1974, provide the URL to the System of Records Notice related to this dataset.",
-            "type": "string"
-        },
-        "temporal": {
-            "title":"Temporal",
-            "description":"The range of temporal applicability of a dataset (i.e., a start and end date of applicability for the data).",
-            "type": "string",
-            "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
-        },
-        "theme": {
-            "title":"Category",
-            "description":"Main thematic category of the dataset.",
-            "type": "array",
-            "items": {
-                "type": "string"
-            },
-            "minItems": 1,
-            "uniqueItems": true
-        },
-        "title": {
-            "title": "Title",
-            "description": "Human-readable name of the asset. Should be in plain English and include sufficient detail to facilitate search and discovery.",
-            "type": "string"
-        },
-        "webService": {
-            "title":"Endpoint",
-            "description":"Endpoint of web service to access dataset.",
-            "type": "string",
-            "format": "uri"
-        }
-    }
-}

From 7d6f7acf8afb3256dbda140406d63b51ccafa1f4 Mon Sep 17 00:00:00 2001
From: Alex Perfilov <alexandr.perfilov@reisystems.com>
Date: Wed, 25 Feb 2015 16:10:32 -0500
Subject: [PATCH 03/22] renaming inventory datajson plugin to datajson_export

---
 ckanext/datajson/plugin.py | 43 +++++++++++++++++++++++---------------
 setup.py                   |  3 ++-
 2 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/ckanext/datajson/plugin.py b/ckanext/datajson/plugin.py
index c2747c99..0daae0a9 100644
--- a/ckanext/datajson/plugin.py
+++ b/ckanext/datajson/plugin.py
@@ -1,13 +1,14 @@
-import ckan.plugins as p
+import json
+import logging
+import StringIO
 
+import ckan.plugins as p
 from ckan.lib.base import BaseController, render, c
 import ckan.model as model
 from pylons import request, response
 import ckan.lib.dictization.model_dictize as model_dictize
-import json, re
-import logging
+import re
 from jsonschema.exceptions import best_match
-import StringIO
 
 logger = logging.getLogger('datajson')
 
@@ -38,6 +39,11 @@ def get_validator():
 from build_datajsonld import dataset_to_jsonld
 
 
+class DataJsonPlugin(p.SingletonPlugin):
+    p.implements(p.interfaces.IConfigurer)
+    p.implements(p.interfaces.IRoutes, inherit=True)
+
+
 class JsonExportPlugin(p.SingletonPlugin):
     p.implements(p.interfaces.IConfigurer)
     p.implements(p.interfaces.IRoutes, inherit=True)
@@ -52,7 +58,7 @@ def update_config(self, config):
         JsonExportPlugin.route_enabled = config.get("ckanext.datajson.url_enabled", "True") == 'True'
         JsonExportPlugin.route_path = config.get("ckanext.datajson.path", "/data.json")
         JsonExportPlugin.route_ld_path = config.get("ckanext.datajsonld.path",
-                                                  re.sub(r"\.json$", ".jsonld", JsonExportPlugin.route_path))
+                                                    re.sub(r"\.json$", ".jsonld", JsonExportPlugin.route_path))
         JsonExportPlugin.ld_id = config.get("ckanext.datajsonld.id", config.get("ckan.site_url"))
         JsonExportPlugin.ld_title = config.get("ckan.site_title", "Catalog")
         JsonExportPlugin.site_url = config.get("ckan.site_url")
@@ -71,7 +77,7 @@ def after_map(self, m):
                       action='generate_json')
             # TODO commenting out enterprise data inventory for right now
             # m.connect('enterprisedatajson', JsonExportPlugin.route_edata_path, controller='ckanext.datajson.plugin:DataJsonController', action='generate_enterprise')
-            #m.connect('datajsonld', JsonExportPlugin.route_ld_path, controller='ckanext.datajson.plugin:DataJsonController', action='generate_jsonld')
+            # m.connect('datajsonld', JsonExportPlugin.route_ld_path, controller='ckanext.datajson.plugin:DataJsonController', action='generate_jsonld')
 
         # TODO DWC update action
         # /data/{org}/data.json
@@ -147,7 +153,8 @@ def validator(self):
                     e) + ". Try using JSONLint.com."]))
             except Exception as e:
                 c.errors.append((
-                "Internal Error", ["Something bad happened while trying to load and parse the file: " + unicode(e)]))
+                    "Internal Error",
+                    ["Something bad happened while trying to load and parse the file: " + unicode(e)]))
 
             if body:
                 try:
@@ -163,8 +170,8 @@ def generate_pdl(self):
         # DWC this is a hack, as I couldn't get to the request parameters. For whatever reason, the multidict was always empty
         match = re.match(r"/organization/([-a-z0-9]+)/data.json", request.path)
 
-        #If user is not editor or admin of the organization then don't allow pdl download
-        if p.toolkit.check_access('package_create', {'model': model,'user':c.user}, {'owner_org': match.group(1)}):
+        # If user is not editor or admin of the organization then don't allow pdl download
+        if p.toolkit.check_access('package_create', {'model': model, 'user': c.user}, {'owner_org': match.group(1)}):
             if match:
                 # set content type (charset required or pylons throws an error)
                 response.content_type = 'application/json; charset=UTF-8'
@@ -179,8 +186,8 @@ def generate_edi(self):
         # DWC this is a hack, as I couldn't get to the request parameters. For whatever reason, the multidict was always empty
         match = re.match(r"/organization/([-a-z0-9]+)/edi.json", request.path)
 
-        #If user is not editor or admin of the organization then don't allow edi download
-        if p.toolkit.check_access('package_create', {'model': model,'user':c.user}, {'owner_org': match.group(1)}):
+        # If user is not editor or admin of the organization then don't allow edi download
+        if p.toolkit.check_access('package_create', {'model': model, 'user': c.user}, {'owner_org': match.group(1)}):
             if match:
                 # set content type (charset required or pylons throws an error)
                 response.content_type = 'application/json; charset=UTF-8'
@@ -227,7 +234,7 @@ def make_edi(owner_org):
 
     output = []
     for pkg in packages:
-        #if pkg['owner_org'] == owner_org:
+        # if pkg['owner_org'] == owner_org:
         datajson_entry = make_datajson_entry(pkg)
         if datajson_entry and is_valid(datajson_entry):
             output.append(datajson_entry)
@@ -241,7 +248,7 @@ def make_edi(owner_org):
     logger.removeHandler(eh)
     stream.close()
 
-    #return json.dumps(output)
+    # return json.dumps(output)
     return write_zip(output, error, zip_name='edi')
 
 
@@ -258,7 +265,7 @@ def make_pdl(owner_org):
     packages = get_packages(owner_org)
 
     output = []
-    #Create data.json only using public datasets, datasets marked non-public are not exposed
+    # Create data.json only using public datasets, datasets marked non-public are not exposed
     for pkg in packages:
         extras = dict([(x['key'], x['value']) for x in pkg['extras']])
         try:
@@ -281,13 +288,14 @@ def make_pdl(owner_org):
     logger.removeHandler(eh)
     stream.close()
 
-    #return json.dumps(output)
+    # return json.dumps(output)
     return write_zip(output, error, zip_name='pdl')
 
+
 def get_packages(owner_org):
     # Build the data.json file.
     packages = get_all_group_packages(group_id=owner_org)
-    #get packages for sub-agencies.
+    # get packages for sub-agencies.
     sub_agency = model.Group.get(owner_org)
     if 'sub-agencies' in sub_agency.extras.col.target and \
                     sub_agency.extras.col.target['sub-agencies'].state == 'active':
@@ -300,6 +308,7 @@ def get_packages(owner_org):
 
     return packages
 
+
 def get_all_group_packages(group_id):
     """
     Gets all of the group packages, public or private, returning them as a list of CKAN's dictized packages.
@@ -337,7 +346,7 @@ def write_zip(data, error=None, zip_name='data'):
     if data:
         zf.writestr('data.json', json.dumps(make_datajson_catalog(data), ensure_ascii=False).encode('utf8'))
 
-    #Write the error log
+    # Write the error log
     if error:
         zf.writestr('errorlog.txt', error.encode('utf8'))
 
diff --git a/setup.py b/setup.py
index ae35f925..09c5f542 100644
--- a/setup.py
+++ b/setup.py
@@ -25,7 +25,8 @@
 	entry_points=\
 	"""
         [ckan.plugins]
-	datajson=ckanext.datajson:JsonExportPlugin
+    datajson=ckanext.datajson:DataJsonPlugin
+	datajson_export=ckanext.datajson:JsonExportPlugin
 	datajson_harvest=ckanext.datajson:DataJsonHarvester
 	cmsdatanav_harvest=ckanext.datajson:CmsDataNavigatorHarvester
 	""",

From 706e61722c337763fdc8ef907eded56c86f71550 Mon Sep 17 00:00:00 2001
From: Alex Perfilov <alexandr.perfilov@reisystems.com>
Date: Wed, 25 Feb 2015 16:50:50 -0500
Subject: [PATCH 04/22] revert requirements renamings

---
 ckanext/datajson/__init__.py             | 1 +
 requirements.txt => pip-requirements.txt | 1 +
 2 files changed, 2 insertions(+)
 rename requirements.txt => pip-requirements.txt (76%)

diff --git a/ckanext/datajson/__init__.py b/ckanext/datajson/__init__.py
index d5261f69..b68480c0 100644
--- a/ckanext/datajson/__init__.py
+++ b/ckanext/datajson/__init__.py
@@ -7,5 +7,6 @@
     __path__ = pkgutil.extend_path(__path__, __name__)
     
 from plugin import JsonExportPlugin
+from plugin import DataJsonPlugin
 from harvester_datajson import DataJsonHarvester
 from harvester_cmsdatanavigator import CmsDataNavigatorHarvester
diff --git a/requirements.txt b/pip-requirements.txt
similarity index 76%
rename from requirements.txt
rename to pip-requirements.txt
index 441b63d5..4f5e07df 100644
--- a/requirements.txt
+++ b/pip-requirements.txt
@@ -1,3 +1,4 @@
 pyyaml
 lepl
 jsonschema
+rfc3987
\ No newline at end of file

From 737dcfac0e46bc6f75652840ff77eb6aac931e07 Mon Sep 17 00:00:00 2001
From: Alex Perfilov <alexandr.perfilov@reisystems.com>
Date: Wed, 25 Feb 2015 17:20:56 -0500
Subject: [PATCH 05/22] get closer to catalog branch

---
 ckanext/datajson/build_datajson.py |   4 +-
 ckanext/datajson/plugin.py         | 386 +++++++++++++++--------------
 2 files changed, 197 insertions(+), 193 deletions(-)

diff --git a/ckanext/datajson/build_datajson.py b/ckanext/datajson/build_datajson.py
index de7be113..17b1c8f5 100644
--- a/ckanext/datajson/build_datajson.py
+++ b/ckanext/datajson/build_datajson.py
@@ -13,7 +13,7 @@
 
 # TODO this file is pretty sloppy, needs cleanup and redundancies removed
 
-def make_datajson_catalog(datasets):
+def make_datajson_export_catalog(datasets):
     catalog = OrderedDict([
         ('conformsTo', 'https://project-open-data.cio.gov/v1.1/schema'),  # requred
         ('describedBy', 'https://project-open-data.cio.gov/v1.1/schema/catalog.json'),  # optional
@@ -24,7 +24,7 @@ def make_datajson_catalog(datasets):
     return catalog
 
 
-def make_datajson_entry(package):
+def make_datajson_export_entry(package):
     # extras is a list of dicts [{},{}, {}]. For each dict, extract the key, value entries into a new dict
     extras = dict([(x['key'], x['value']) for x in package['extras']])
 
diff --git a/ckanext/datajson/plugin.py b/ckanext/datajson/plugin.py
index 0daae0a9..6997079c 100644
--- a/ckanext/datajson/plugin.py
+++ b/ckanext/datajson/plugin.py
@@ -1,39 +1,24 @@
-import json
 import logging
 import StringIO
+import json
 
 import ckan.plugins as p
 from ckan.lib.base import BaseController, render, c
-import ckan.model as model
 from pylons import request, response
-import ckan.lib.dictization.model_dictize as model_dictize
 import re
+import ckan.model as model
+import ckan.lib.dictization.model_dictize as model_dictize
 from jsonschema.exceptions import best_match
 
-logger = logging.getLogger('datajson')
-
-
-def get_validator():
-    import os
-    from jsonschema import Draft4Validator, FormatChecker
-
-    schema_path = os.path.join(os.path.dirname(__file__), 'pod_schema', 'federal-v1.1', 'dataset.json')
-    with open(schema_path, 'r') as file:
-        schema = json.loads(file.read())
-        return Draft4Validator(schema, format_checker=FormatChecker())
-
-    logger.warn('Unable to create validator')
-    return None
-
 
-validator = get_validator()
+logger = logging.getLogger('datajson')
 
 try:
     from collections import OrderedDict  # 2.7
 except ImportError:
     from sqlalchemy.util import OrderedDict
 
-from build_datajson import make_datajson_entry, make_datajson_catalog
+from build_datajson import make_datajson_export_entry, make_datajson_export_catalog
 
 # from build_enterprisedatajson import make_enterprisedatajson_entry
 from build_datajsonld import dataset_to_jsonld
@@ -73,29 +58,30 @@ def before_map(self, m):
     def after_map(self, m):
         if JsonExportPlugin.route_enabled:
             # /data.json and /data.jsonld (or other path as configured by user)
-            m.connect('datajson', JsonExportPlugin.route_path, controller='ckanext.datajson.plugin:DataJsonController',
+            m.connect('datajson_export', JsonExportPlugin.route_path,
+                      controller='ckanext.datajson.plugin:JsonExportController',
                       action='generate_json')
             # TODO commenting out enterprise data inventory for right now
-            # m.connect('enterprisedatajson', JsonExportPlugin.route_edata_path, controller='ckanext.datajson.plugin:DataJsonController', action='generate_enterprise')
-            # m.connect('datajsonld', JsonExportPlugin.route_ld_path, controller='ckanext.datajson.plugin:DataJsonController', action='generate_jsonld')
+            # m.connect('enterprisedatajson', JsonExportPlugin.route_edata_path, controller='ckanext.datajson.plugin:JsonExportController', action='generate_enterprise')
+            # m.connect('datajsonld', JsonExportPlugin.route_ld_path, controller='ckanext.datajson.plugin:JsonExportController', action='generate_jsonld')
 
         # TODO DWC update action
         # /data/{org}/data.json
         m.connect('public_data_listing', '/organization/{org}/data.json',
-                  controller='ckanext.datajson.plugin:DataJsonController', action='generate_pdl')
+                  controller='ckanext.datajson.plugin:JsonExportController', action='generate_pdl')
 
         # TODO DWC update action
         # /data/{org}/edi.json
         m.connect('enterprise_data_inventory', '/organization/{org}/edi.json',
-                  controller='ckanext.datajson.plugin:DataJsonController', action='generate_edi')
+                  controller='ckanext.datajson.plugin:JsonExportController', action='generate_edi')
 
         # /pod/validate
-        # m.connect('datajsonvalidator', "/pod/validate", controller='ckanext.datajson.plugin:DataJsonController', action='validator')
+        # m.connect('datajsonvalidator', "/pod/validate", controller='ckanext.datajson.plugin:JsonExportController', action='validator')
 
         return m
 
 
-class DataJsonController(BaseController):
+class JsonExportController(BaseController):
     def generate_output(self, format):
         # set content type (charset required or pylons throws an error)
         response.content_type = 'application/json; charset=UTF-8'
@@ -106,7 +92,7 @@ def generate_output(self, format):
 
         # TODO special processing for enterprise
         # output
-        data = make_json()
+        data = self.make_json()
 
         if format == 'json-ld':
             # Convert this to JSON-LD.
@@ -179,7 +165,7 @@ def generate_pdl(self):
                 # allow caching of response (e.g. by Apache)
                 del response.headers["Cache-Control"]
                 del response.headers["Pragma"]
-                return make_pdl(match.group(1))
+                return self.make_pdl(match.group(1))
         return "Invalid organization id"
 
     def generate_edi(self):
@@ -195,169 +181,187 @@ def generate_edi(self):
                 # allow caching of response (e.g. by Apache)
                 del response.headers["Cache-Control"]
                 del response.headers["Pragma"]
-                return make_edi(match.group(1))
+                return self.make_edi(match.group(1))
         return "Invalid organization id"
 
 
-def make_json():
-    # Build the data.json file.
-    packages = p.toolkit.get_action("current_package_list_with_resources")(None, {})
-    output = []
-    # Create data.json only using public and public-restricted datasets, datasets marked non-public are not exposed
-    for pkg in packages:
-        extras = dict([(x['key'], x['value']) for x in pkg['extras']])
-        try:
-            if not (re.match(r'[Nn]on-public', extras['public_access_level'])):
-                datajson_entry = make_datajson_entry(pkg)
-                if datajson_entry:
-                    output.append(datajson_entry)
-                else:
-                    logger.warn("Dataset id=[%s], title=[%s] omitted\n", pkg.get('id', None), pkg.get('title', None))
-        except KeyError:
-            logger.warn("Dataset id=[%s], title=[%s] missing required 'public_access_level' field", pkg.get('id', None),
-                        pkg.get('title', None))
-            pass
-    return output
-
-
-def make_edi(owner_org):
-    # Error handler for creating error log
-    stream = StringIO.StringIO()
-    eh = logging.StreamHandler(stream)
-    eh.setLevel(logging.WARN)
-    formatter = logging.Formatter('%(asctime)s - %(message)s')
-    eh.setFormatter(formatter)
-    logger.addHandler(eh)
-
-    # Build the data.json file.
-    packages = get_packages(owner_org)
-
-    output = []
-    for pkg in packages:
-        # if pkg['owner_org'] == owner_org:
-        datajson_entry = make_datajson_entry(pkg)
-        if datajson_entry and is_valid(datajson_entry):
-            output.append(datajson_entry)
-        else:
-            logger.warn("Dataset id=[%s], title=[%s] omitted\n", pkg.get('id', None), pkg.get('title', None))
-
-    # Get the error log
-    eh.flush()
-    error = stream.getvalue()
-    eh.close()
-    logger.removeHandler(eh)
-    stream.close()
-
-    # return json.dumps(output)
-    return write_zip(output, error, zip_name='edi')
-
-
-def make_pdl(owner_org):
-    # Error handler for creating error log
-    stream = StringIO.StringIO()
-    eh = logging.StreamHandler(stream)
-    eh.setLevel(logging.WARN)
-    formatter = logging.Formatter('%(asctime)s - %(message)s')
-    eh.setFormatter(formatter)
-    logger.addHandler(eh)
-
-    # Build the data.json file.
-    packages = get_packages(owner_org)
-
-    output = []
-    # Create data.json only using public datasets, datasets marked non-public are not exposed
-    for pkg in packages:
-        extras = dict([(x['key'], x['value']) for x in pkg['extras']])
-        try:
-            if not (re.match(r'[Nn]on-public', extras['public_access_level'])):
-                datajson_entry = make_datajson_entry(pkg)
-                if datajson_entry and is_valid(datajson_entry):
-                    output.append(datajson_entry)
-                else:
-                    logger.warn("Dataset id=[%s], title=[%s] omitted\n", pkg.get('id', None), pkg.get('title', None))
-
-        except KeyError:
-            logger.warn("Dataset id=[%s], title=['%s'] missing required 'public_access_level' field",
-                        pkg.get('id', None), pkg.get('title', None))
-            pass
-
-    # Get the error log
-    eh.flush()
-    error = stream.getvalue()
-    eh.close()
-    logger.removeHandler(eh)
-    stream.close()
-
-    # return json.dumps(output)
-    return write_zip(output, error, zip_name='pdl')
-
-
-def get_packages(owner_org):
-    # Build the data.json file.
-    packages = get_all_group_packages(group_id=owner_org)
-    # get packages for sub-agencies.
-    sub_agency = model.Group.get(owner_org)
-    if 'sub-agencies' in sub_agency.extras.col.target and \
-                    sub_agency.extras.col.target['sub-agencies'].state == 'active':
-        sub_agencies = sub_agency.extras.col.target['sub-agencies'].value
-        sub_agencies_list = sub_agencies.split(",")
-        for sub in sub_agencies_list:
-            sub_packages = get_all_group_packages(group_id=sub)
-            for sub_package in sub_packages:
-                packages.append(sub_package)
-
-    return packages
-
-
-def get_all_group_packages(group_id):
-    """
-    Gets all of the group packages, public or private, returning them as a list of CKAN's dictized packages.
-    """
-    result = []
-    for pkg_rev in model.Group.get(group_id).packages(with_private=True, context={'user_is_admin': True}):
-        result.append(model_dictize.package_dictize(pkg_rev, {'model': model}))
-
-    return result
-
-
-def is_valid(instance):
-    """
-    Validates a data.json entry against the project open data's JSON schema. Log a warning message on validation error
-    """
-    error = best_match(validator.iter_errors(instance))
-    if error:
-        logger.warn("Validation failed, best guess of error = %s", error)
-        return False
-    return True
-
-
-def write_zip(data, error=None, zip_name='data'):
-    """
-    Data: a python object to write to the data.json
-    Error: unicode string representing the content of the error log.
-    zip_name: the name to use for the zip file
-    """
-    import zipfile
-
-    o = StringIO.StringIO()
-    zf = zipfile.ZipFile(o, mode='w')
-
-    # Write the data file
-    if data:
-        zf.writestr('data.json', json.dumps(make_datajson_catalog(data), ensure_ascii=False).encode('utf8'))
-
-    # Write the error log
-    if error:
-        zf.writestr('errorlog.txt', error.encode('utf8'))
-
-    zf.close()
-    o.seek(0)
-
-    binary = o.read()
-    o.close()
-
-    response.content_type = 'application/octet-stream'
-    response.content_disposition = 'attachment; filename="%s.zip"' % zip_name
-
-    return binary
+    def make_json(self):
+        # Build the data.json file.
+        packages = p.toolkit.get_action("current_package_list_with_resources")(None, {})
+        output = []
+        # Create data.json only using public and public-restricted datasets, datasets marked non-public are not exposed
+        for pkg in packages:
+            extras = dict([(x['key'], x['value']) for x in pkg['extras']])
+            try:
+                if not (re.match(r'[Nn]on-public', extras['public_access_level'])):
+                    datajson_entry = make_datajson_export_entry(pkg)
+                    if datajson_entry:
+                        output.append(datajson_entry)
+                    else:
+                        logger.warn("Dataset id=[%s], title=[%s] omitted\n", pkg.get('id', None),
+                                    pkg.get('title', None))
+            except KeyError:
+                logger.warn("Dataset id=[%s], title=[%s] missing required 'public_access_level' field",
+                            pkg.get('id', None),
+                            pkg.get('title', None))
+                pass
+        return output
+
+
+    def make_edi(self, owner_org):
+        # Error handler for creating error log
+        stream = StringIO.StringIO()
+        eh = logging.StreamHandler(stream)
+        eh.setLevel(logging.WARN)
+        formatter = logging.Formatter('%(asctime)s - %(message)s')
+        eh.setFormatter(formatter)
+        logger.addHandler(eh)
+
+        # Build the data.json file.
+        packages = self.get_packages(owner_org)
+
+        output = []
+        for pkg in packages:
+            # if pkg['owner_org'] == owner_org:
+            datajson_entry = make_datajson_export_entry(pkg)
+            if datajson_entry and self.is_valid(datajson_entry):
+                output.append(datajson_entry)
+            else:
+                logger.warn("Dataset id=[%s], title=[%s] omitted\n", pkg.get('id', None), pkg.get('title', None))
+
+        # Get the error log
+        eh.flush()
+        error = stream.getvalue()
+        eh.close()
+        logger.removeHandler(eh)
+        stream.close()
+
+        # return json.dumps(output)
+        return self.write_zip(output, error, zip_name='edi')
+
+
+    def make_pdl(self, owner_org):
+        # Error handler for creating error log
+        stream = StringIO.StringIO()
+        eh = logging.StreamHandler(stream)
+        eh.setLevel(logging.WARN)
+        formatter = logging.Formatter('%(asctime)s - %(message)s')
+        eh.setFormatter(formatter)
+        logger.addHandler(eh)
+
+        # Build the data.json file.
+        packages = self.get_packages(owner_org)
+
+        output = []
+        # Create data.json only using public datasets, datasets marked non-public are not exposed
+        for pkg in packages:
+            extras = dict([(x['key'], x['value']) for x in pkg['extras']])
+            try:
+                if not (re.match(r'[Nn]on-public', extras['public_access_level'])):
+                    datajson_entry = make_datajson_export_entry(pkg)
+                    if datajson_entry and self.is_valid(datajson_entry):
+                        output.append(datajson_entry)
+                    else:
+                        logger.warn("Dataset id=[%s], title=[%s] omitted\n", pkg.get('id', None),
+                                    pkg.get('title', None))
+
+            except KeyError:
+                logger.warn("Dataset id=[%s], title=['%s'] missing required 'public_access_level' field",
+                            pkg.get('id', None), pkg.get('title', None))
+                pass
+
+        # Get the error log
+        eh.flush()
+        error = stream.getvalue()
+        eh.close()
+        logger.removeHandler(eh)
+        stream.close()
+
+        # return json.dumps(output)
+        return self.write_zip(output, error, zip_name='pdl')
+
+
+    def get_packages(self, owner_org):
+        # Build the data.json file.
+        packages = self.get_all_group_packages(group_id=owner_org)
+        # get packages for sub-agencies.
+        sub_agency = model.Group.get(owner_org)
+        if 'sub-agencies' in sub_agency.extras.col.target and \
+                        sub_agency.extras.col.target['sub-agencies'].state == 'active':
+            sub_agencies = sub_agency.extras.col.target['sub-agencies'].value
+            sub_agencies_list = sub_agencies.split(",")
+            for sub in sub_agencies_list:
+                sub_packages = self, self.get_all_group_packages(group_id=sub)
+                for sub_package in sub_packages:
+                    packages.append(sub_package)
+
+        return packages
+
+
+    def get_all_group_packages(self, group_id):
+        """
+        Gets all of the group packages, public or private, returning them as a list of CKAN's dictized packages.
+        """
+        result = []
+        for pkg_rev in model.Group.get(group_id).packages(with_private=True, context={'user_is_admin': True}):
+            result.append(model_dictize.package_dictize(pkg_rev, {'model': model}))
+
+        return result
+
+
+    def is_valid(self, instance):
+        """
+        Validates a data.json entry against the project open data's JSON schema. Log a warning message on validation error
+        """
+        error = best_match(validator.iter_errors(instance))
+        if error:
+            logger.warn("Validation failed, best guess of error = %s", error)
+            return False
+        return True
+
+
+    def write_zip(self, data, error=None, zip_name='data'):
+        """
+        Data: a python object to write to the data.json
+        Error: unicode string representing the content of the error log.
+        zip_name: the name to use for the zip file
+        """
+        import zipfile
+
+        o = StringIO.StringIO()
+        zf = zipfile.ZipFile(o, mode='w')
+
+        # Write the data file
+        if data:
+            zf.writestr('data.json', json.dumps(make_datajson_export_catalog(data), ensure_ascii=False).encode('utf8'))
+
+        # Write the error log
+        if error:
+            zf.writestr('errorlog.txt', error.encode('utf8'))
+
+        zf.close()
+        o.seek(0)
+
+        binary = o.read()
+        o.close()
+
+        response.content_type = 'application/octet-stream'
+        response.content_disposition = 'attachment; filename="%s.zip"' % zip_name
+
+        return binary
+
+
+def get_validator():
+    import os
+    from jsonschema import Draft4Validator, FormatChecker
+
+    schema_path = os.path.join(os.path.dirname(__file__), 'pod_schema', 'federal-v1.1', 'dataset.json')
+    with open(schema_path, 'r') as file:
+        schema = json.loads(file.read())
+        return Draft4Validator(schema, format_checker=FormatChecker())
+
+    logger.warn('Unable to create validator')
+    return None
+
 
+validator = get_validator()
\ No newline at end of file

From 28208d33f07fd156ff690b2fe8da127482f1e8c2 Mon Sep 17 00:00:00 2001
From: Alex Perfilov <alexandr.perfilov@reisystems.com>
Date: Thu, 12 Mar 2015 15:40:13 -0400
Subject: [PATCH 06/22] replaced schema path to catalog structure

---
 ckanext/datajson/build_datajson.py            | 843 ++++++++++--------
 ckanext/datajson/build_datajsonld.py          |  29 +-
 ckanext/datajson/plugin.py                    |  14 +-
 .../pod_schema/federal-v1.1/dataset.json      |  10 +-
 .../non-federal-v1.1/dataset-non-federal.json |   8 +-
 5 files changed, 517 insertions(+), 387 deletions(-)

diff --git a/ckanext/datajson/build_datajson.py b/ckanext/datajson/build_datajson.py
index 17b1c8f5..d4e472eb 100644
--- a/ckanext/datajson/build_datajson.py
+++ b/ckanext/datajson/build_datajson.py
@@ -11,299 +11,64 @@
 
 log = logging.getLogger('datajson')
 
-# TODO this file is pretty sloppy, needs cleanup and redundancies removed
-
-def make_datajson_export_catalog(datasets):
-    catalog = OrderedDict([
-        ('conformsTo', 'https://project-open-data.cio.gov/v1.1/schema'),  # requred
-        ('describedBy', 'https://project-open-data.cio.gov/v1.1/schema/catalog.json'),  # optional
-        ('@context', 'https://project-open-data.cio.gov/v1.1/schema/catalog.jsonld'),  # optional
-        ('@type', 'dcat:Catalog'),  # optional
-        ('dataset', datasets),  # required
-    ])
-    return catalog
-
-
-def make_datajson_export_entry(package):
-    # extras is a list of dicts [{},{}, {}]. For each dict, extract the key, value entries into a new dict
-    extras = dict([(x['key'], x['value']) for x in package['extras']])
-
-    parent_dataset_id = extras.get('parent_dataset')
-    if parent_dataset_id:
-        parent = model.Package.get(parent_dataset_id)
-        parent_uid = parent.extras.col.target['unique_id'].value
-        if parent_uid:
-            parent_dataset_id = parent_uid
-
-    # if resource format is CSV then convert it to text/csv
-    # Resource format has to be in 'csv' format for automatic datastore push.
-    for r in package["resources"]:
-        if r["format"].lower() == "csv":
-            r["format"] = "text/csv"
-        if r["format"].lower() == "json":
-            r["format"] = "application/json"
-        if r["format"].lower() == "pdf":
-            r["format"] = "application/pdf"
-
-    try:
-        retlist = [
-            ("@type", "dcat:Dataset"),  # optional
-
-            ("title", strip_if_string(package["title"])),  # required
-
-            # ("accessLevel", 'public'),  # required
-            ("accessLevel", strip_if_string(extras.get('public_access_level'))),  # required
-
-            # ("accrualPeriodicity", "R/P1Y"),  # optional
-            # ('accrualPeriodicity', 'accrual_periodicity'),
-            ('accrualPeriodicity', get_accrual_periodicity(extras.get('accrual_periodicity'))),  # optional
-
-            ("conformsTo", strip_if_string(extras.get('conforms_to'))),  # optional
-
-            # ('contactPoint', OrderedDict([
-            # ("@type", "vcard:Contact"),
-            # ("fn", "Jane Doe"),
-            # ("hasEmail", "mailto:jane.doe@agency.gov")
-            # ])),  # required
-            ('contactPoint', get_contact_point(extras, package)),  # required
-
-            ("dataQuality", strip_if_string(extras.get('data_quality'))),  # required-if-applicable
-
-            ("describedBy", strip_if_string(extras.get('data_dictionary'))),  # optional
-            ("describedByType", strip_if_string(extras.get('data_dictionary_type'))),  # optional
-
-            ("description", strip_if_string(package["notes"])),  # required
-
-            # ("description", 'asdfasdf'),  # required
-
-            ("identifier", strip_if_string(extras.get('unique_id'))),  # required
-            # ("identifier", 'asdfasdfasdf'),  # required
-
-            ("isPartOf", parent_dataset_id),  # optional
-            ("issued", strip_if_string(extras.get('release_date'))),  # optional
-
-            # ("keyword", ['a', 'b']),  # required
-            ("keyword", [t["display_name"] for t in package["tags"]]),  # required
-
-            ("landingPage", strip_if_string(extras.get('homepage_url'))),  # optional
-
-            ("license", strip_if_string(extras.get("license_new"))),  # required-if-applicable
-
-            ("modified", strip_if_string(extras.get("modified"))),  # required
-
-            ("primaryITInvestmentUII", strip_if_string(extras.get('primary_it_investment_uii'))),  # optional
-
-            # ('publisher', OrderedDict([
-            # ("@type", "org:Organization"),
-            # ("name", "Widget Services")
-            # ])),  # required
-            # ("publisher", get_publisher_tree(extras)),  # required
-            ("publisher", get_publisher_tree_wrong_order(extras)),  # required
-
-            ("rights", strip_if_string(extras.get('access_level_comment'))),  # required
-
-            ("spatial", strip_if_string(package.get("spatial"))),  # required-if-applicable
-
-            ('systemOfRecords', strip_if_string(extras.get('system_of_records'))),  # optional
-
-            ("temporal", strip_if_string(extras.get('temporal'))),  # required-if-applicable
-
-            ("distribution", generate_distribution(package)),  # required-if-applicable
-
-            # ("distribution",
-            # #TODO distribution should hide any key/value pairs where value is "" or None (e.g. format)
-            # [
-            # OrderedDict([
-            # ("downloadURL", r["url"]),
-            # ("mediaType", r["formatReadable"]),
-            # ])
-            # for r in package["resources"]
-            # ])
-        ]
-
-        for pair in [
-            ('bureauCode', 'bureau_code'),  # required
-            ('language', 'language'),  # optional
-            ('programCode', 'program_code'),  # required
-            ('references', 'related_documents'),  # optional
-            ('theme', 'category'),  # optional
-        ]:
-            split_multiple_entries(retlist, extras, pair)
-
-    except KeyError as e:
-        log.warn("Invalid field detected for package with id=[%s], title=['%s']: '%s'", package.get('id'),
-                 package.get('title'), e)
-        return
-
-    # # TODO this is a lazy hack to make sure we don't have redundant fields when the free form key/value pairs are added
-    # extras_to_filter_out = ['publisher', 'contact_name', 'contact_email', 'unique_id', 'public_access_level',
-    # 'data_dictionary', 'bureau_code', 'program_code', 'access_level_comment', 'license_title',
-    # 'spatial', 'temporal', 'release_date', 'accrual_periodicity', 'language', 'granularity',
-    # 'data_quality', 'size', 'homepage_url', 'rss_feed', 'category', 'related_documents',
-    # 'system_of_records', 'system_of_records_none_related_to_this_dataset', 'tags',
-    # 'extrasRollup', 'format', 'accessURL', 'notes', 'publisher_1', 'publisher_2', 'publisher_3',
-    # 'publisher_4', 'publisher_5']
-    #
-    # # Append any free extras (key/value pairs) that aren't part of common core but have been associated with the dataset
-    # # TODO really hackey, short on time, had to hardcode a lot of the names to remove. there's much better ways, maybe
-    # # generate a list of keys to ignore by calling a specific function to get the extras
-    # retlist_keys = [x for x, y in retlist]
-    # extras_keys = set(extras.keys()) - set(extras_to_filter_out)
-    #
-    # for key in extras_keys:
-    # convertedKey = underscore_to_camelcase(key)
-    # if convertedKey not in retlist_keys:
-    # retlist.append((convertedKey, extras[key]))
-
-    # Remove entries where value is None, "", or empty list []
-    striped_retlist = [(x, y) for x, y in retlist if y is not None and y != "" and y != []]
-    striped_retlist_keys = [x for x, y in striped_retlist]
-
-
-    # If a required metadata field was removed, return empty string
-    # for required_field in ["accessLevel", "bureauCode", "contactPoint", "description", "identifier", "keyword",
-    # "modified", "programCode", "publisher", "title"]:
-    # if required_field not in striped_retlist_keys:
-    # log.warn("Missing required field detected for package with id=[%s], title=['%s']: '%s'",
-    # package.get('id'), package.get('title'), required_field)
-    # return
-
-    # When saved from UI DataQuality value is stored as "on" instead of True.
-    # Check if value is "on" and replace it with True.
-    striped_retlist_dict = OrderedDict(striped_retlist)
-    if striped_retlist_dict.get('dataQuality') == "on" \
-            or striped_retlist_dict.get('dataQuality') == "true" \
-            or striped_retlist_dict.get('dataQuality') == "True":
-        striped_retlist_dict['dataQuality'] = True
-    elif striped_retlist_dict.get('dataQuality') == "false" \
-            or striped_retlist_dict.get('dataQuality') == "False":
-        striped_retlist_dict['dataQuality'] = False
-
-    from datajsonvalidator import do_validation
-
-    errors = []
-    try:
-        do_validation([dict(striped_retlist_dict)], errors)
-    except Exception as e:
-        errors.append(("Internal Error", ["Something bad happened: " + unicode(e)]))
-    if len(errors) > 0:
-        for error in errors:
-            log.warn(error)
-        return
-
-    return striped_retlist_dict
-
-
-# used by get_accrual_periodicity
-accrual_periodicity_dict = {
-    'completely irregular': 'irregular',
-    'decennial': 'R/P10Y',
-    'quadrennial': 'R/P4Y',
-    'annual': 'R/P1Y',
-    'bimonthly': 'R/P2M',  # or R/P0.5M
-    'semiweekly': 'R/P3.5D',
-    'daily': 'R/P1D',
-    'biweekly': 'R/P2W',  # or R/P0.5W
-    'semiannual': 'R/P6M',
-    'biennial': 'R/P2Y',
-    'triennial': 'R/P3Y',
-    'three times a week': 'R/P0.33W',
-    'three times a month': 'R/P0.33M',
-    'continuously updated': 'R/PT1S',
-    'monthly': 'R/P1M',
-    'quarterly': 'R/P3M',
-    'semimonthly': 'R/P0.5M',
-    'three times a year': 'R/P4M',
-    'weekly': 'R/P1W'
-}
-
-
-def get_accrual_periodicity(frequency):
-    return accrual_periodicity_dict.get(str(frequency).lower().strip(), frequency)
-
-
-def generate_distribution(package):
-    arr = []
-    for r in package["resources"]:
-        resource = [("@type", "dcat:Distribution")]
-        rkeys = r.keys()
-        if 'url' in rkeys:
-            res_url = strip_if_string(r.get('url'))
-            if res_url:
-                if 'api' == r.get('resource_type') or 'accessurl' == r.get('resource_type'):
-                    resource += [("accessURL", res_url)]
-                else:
-                    resource += [("downloadURL", res_url)]
-                    if 'format' in rkeys:
-                        res_format = strip_if_string(r.get('format'))
-                        if res_format:
-                            resource += [("mediaType", res_format)]
-                    else:
-                        log.warn("Missing mediaType for resource in package ['%s']", package.get('id'))
-        else:
-            log.warn("Missing downloadURL for resource in package ['%s']", package.get('id'))
-
-        # if 'accessURL_new' in rkeys:
-        # res_access_url = strip_if_string(r.get('accessURL_new'))
-        # if res_access_url:
-        # resource += [("accessURL", res_access_url)]
-
-        if 'formatReadable' in rkeys:
-            res_attr = strip_if_string(r.get('formatReadable'))
-            if res_attr:
-                resource += [("format", res_attr)]
-
-        if 'name' in rkeys:
-            res_attr = strip_if_string(r.get('name'))
-            if res_attr:
-                resource += [("title", res_attr)]
-
-        if 'notes' in rkeys:
-            res_attr = strip_if_string(r.get('notes'))
-            if res_attr:
-                resource += [("description", res_attr)]
-
-        if 'conformsTo' in rkeys:
-            res_attr = strip_if_string(r.get('conformsTo'))
-            if res_attr:
-                resource += [("conformsTo", res_attr)]
-
-        if 'describedBy' in rkeys:
-            res_attr = strip_if_string(r.get('describedBy'))
-            if res_attr:
-                resource += [("describedBy", res_attr)]
-
-        if 'describedByType' in rkeys:
-            res_attr = strip_if_string(r.get('describedByType'))
-            if res_attr:
-                resource += [("describedByType", res_attr)]
-
-        striped_resource = [(x, y) for x, y in resource if y is not None and y != "" and y != []]
-
-        arr += [OrderedDict(striped_resource)]
-
-    return arr
-
-
-def get_contact_point(extras, package):
-    for required_field in ["contact_name", "contact_email"]:
-        if required_field not in extras.keys():
-            raise KeyError(required_field)
-
-    email = strip_if_string(extras['contact_email'])
-    if email is None or '@' not in email:
-        raise KeyError(required_field)
 
-    fn = strip_if_string(extras['contact_name'])
-    if fn is None:
-        raise KeyError(required_field)
-
-    contact_point = OrderedDict([
-        ('@type', 'vcard:Contact'),  # optional
-        ('fn', fn),  # required
-        ('hasEmail', 'mailto:' + email),  # required
+def get_facet_fields():
+    # Return fields that we'd like to add to default CKAN faceting. This really has
+    # nothing to do with exporting data.json but it's probably a common consideration.
+    facets = OrderedDict()
+    facets[
+        # using "author" produces weird results because the Solr schema indexes it as "text" rather than "string"
+        "Agency"] = "Publishers"
+    # search facets remove spaces from field names
+    facets["SubjectArea1"] = "Subjects"
+    return facets
+
+
+def make_datajson_entry(package):
+    return OrderedDict([
+        ("title", package["title"]),
+        ("description", package["notes"]),
+        ("keyword", [t["display_name"] for t in package["tags"]]),
+        ("modified", extra(package, "Date Updated")),
+        ("publisher", package["author"]),
+        ("bureauCode", extra(package, "Bureau Code").split(" ") if extra(package, "Bureau Code") else None),
+        ("programCode", extra(package, "Program Code").split(" ") if extra(package, "Program Code") else None),
+        ("contactPoint", extra(package, "Contact Name")),
+        ("mbox", extra(package, "Contact Email")),
+        ("identifier", package["id"]),
+        ("accessLevel", extra(package, "Access Level", default="public")),
+        ("accessLevelComment", extra(package, "Access Level Comment")),
+        ("dataDictionary", extra(package, "Data Dictionary")),
+        ("accessURL", get_primary_resource(package).get("url", None)),
+        ("webService", get_api_resource(package).get("url", None)),
+        ("format", extension_to_mime_type(get_primary_resource(package).get("format", None)) ),
+        ("license", extra(package, "License Agreement")),
+        ("spatial", extra(package, "Geographic Scope")),
+        ("temporal", build_temporal(package)),
+        ("issued", extra(package, "Date Released")),
+        ("accrualPeriodicity", extra(package, "Publish Frequency")),
+        ("language", extra(package, "Language")),
+        ("PrimaryITInvestmentUII", extra(package, "PrimaryITInvestmentUII")),
+        ("granularity", "/".join(
+            x for x in [extra(package, "Unit of Analysis"), extra(package, "Geographic Granularity")] if x != None)),
+        ("dataQuality", extra(package, "Data Quality Met", default="true") == "true"),
+        ("theme", [s for s in (
+            extra(package, "Subject Area 1"), extra(package, "Subject Area 2"), extra(package, "Subject Area 3")) if
+                   s != None]),
+        ("references", [s for s in [extra(package, "Technical Documentation")] if s != None]),
+        ("landingPage", package["url"]),
+        ("systemOfRecords", extra(package, "System Of Records")),
+        ("distribution",
+         [
+             OrderedDict([
+                 ("identifier", r["id"]),  # NOT in POD standard, but useful for conversion to JSON-LD
+                 ("accessURL", r["url"]),
+                 ("format", r.get("mimetype", extension_to_mime_type(r["format"]))),
+             ])
+             for r in package["resources"]
+             if r["format"].lower() not in ("api", "query tool", "widget")
+         ]),
     ])
-    return contact_point
 
 
 def extra(package, key, default=None):
@@ -314,72 +79,23 @@ def extra(package, key, default=None):
     return default
 
 
-def get_publisher_tree_wrong_order(extras):
-    publisher = strip_if_string(extras.get('publisher'))
-    if publisher is None:
-        raise KeyError('publisher')
-
-    organization_list = list()
-    organization_list.append([
-        ('@type', 'org:Organization'),  # optional
-        ('name', publisher),  # required
-    ])
-
-    for i in range(1, 6):
-        key = 'publisher_' + str(i)
-        if key in extras and extras[key] and strip_if_string(extras[key]):
-            organization_list.append([
-                ('@type', 'org:Organization'),  # optional
-                ('name', strip_if_string(extras[key])),  # required
-            ])
-
-    size = len(organization_list)
-
-    # [OSCIT, GSA]
-    # organization_list.reverse()
-    # [GSA, OSCIT]
-
-    tree = False
-    for i in range(0, size):
-        if tree:
-            organization_list[i] += [('subOrganizationOf', OrderedDict(tree))]
-        tree = organization_list[i]
-
-    return OrderedDict(tree)
-
-
-def underscore_to_camelcase(value):
-    """
-    Convert underscored strings to camel case, e.g. one_two_three to oneTwoThree
-    """
-
-    def camelcase():
-        yield unicode.lower
-        while True:
-            yield unicode.capitalize
-
-    c = camelcase()
-    return "".join(c.next()(x) if x else '_' for x in value.split("_"))
-
-
-def get_best_resource(package, acceptable_formats):
+def get_best_resource(package, acceptable_formats, unacceptable_formats=None):
     resources = list(r for r in package["resources"] if r["format"].lower() in acceptable_formats)
-    if len(resources) == 0: return {}
-    resources.sort(key=lambda r: acceptable_formats.index(r["format"].lower()))
+    if len(resources) == 0:
+        if unacceptable_formats:
+            # try at least any resource that's not unacceptable
+            resources = list(r for r in package["resources"] if r["format"].lower() not in unacceptable_formats)
+        if len(resources) == 0:
+            # there is no acceptable resource to show
+            return {}
+    else:
+        resources.sort(key=lambda r: acceptable_formats.index(r["format"].lower()))
     return resources[0]
 
 
-def strip_if_string(val):
-    if isinstance(val, (str, unicode)):
-        val = val.strip()
-        if '' == val:
-            val = None
-    return val
-
-
 def get_primary_resource(package):
     # Return info about a "primary" resource. Select a good one.
-    return get_best_resource(package, ("csv", "xls", "xml", "text", "zip", "rdf"))
+    return get_best_resource(package, ("csv", "xls", "xml", "text", "zip", "rdf"), ("api", "query tool", "widget"))
 
 
 def get_api_resource(package):
@@ -387,9 +103,422 @@ def get_api_resource(package):
     return get_best_resource(package, ("api", "query tool"))
 
 
-def split_multiple_entries(retlist, extras, names):
-    found_element = string.strip(extras.get(names[1], ""))
-    if found_element:
-        retlist.append(
-            (names[0], [string.strip(x) for x in string.split(found_element, ',')])
-        )
+def build_temporal(package):
+    # Build one dataset entry of the data.json file.
+    temporal = ""
+    if extra(package, "Coverage Period Fiscal Year Start"):
+        temporal = "FY" + extra(package, "Coverage Period Fiscal Year Start").replace(" ", "T").replace("T00:00:00", "")
+    else:
+        temporal = extra(package, "Coverage Period Start", "Unknown").replace(" ", "T").replace("T00:00:00", "")
+    temporal += "/"
+    if extra(package, "Coverage Period Fiscal Year End"):
+        temporal += "FY" + extra(package, "Coverage Period Fiscal Year End").replace(" ", "T").replace("T00:00:00", "")
+    else:
+        temporal += extra(package, "Coverage Period End", "Unknown").replace(" ", "T").replace("T00:00:00", "")
+    if temporal == "Unknown/Unknown": return None
+    return temporal
+
+
+def extension_to_mime_type(file_ext):
+    if file_ext is None: return None
+    ext = {
+        "csv": "text/csv",
+        "xls": "application/vnd.ms-excel",
+        "xml": "application/xml",
+        "rdf": "application/rdf+xml",
+        "json": "application/json",
+        "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+        "text": "text/plain",
+        "feed": "application/rss+xml",
+    }
+    return ext.get(file_ext.lower(), "application/unknown")
+
+
+class JsonExportBuilder:
+    @staticmethod
+    def make_datajson_export_catalog(datasets):
+        catalog = OrderedDict([
+            ('conformsTo', 'https://project-open-data.cio.gov/v1.1/schema'),  # requred
+            ('describedBy', 'https://project-open-data.cio.gov/v1.1/schema/catalog.json'),  # optional
+            ('@context', 'https://project-open-data.cio.gov/v1.1/schema/catalog.jsonld'),  # optional
+            ('@type', 'dcat:Catalog'),  # optional
+            ('dataset', datasets),  # required
+        ])
+        return catalog
+
+    @staticmethod
+    def make_datajson_export_entry(package):
+        # extras is a list of dicts [{},{}, {}]. For each dict, extract the key, value entries into a new dict
+        extras = dict([(x['key'], x['value']) for x in package['extras']])
+
+        parent_dataset_id = extras.get('parent_dataset')
+        if parent_dataset_id:
+            parent = model.Package.get(parent_dataset_id)
+            parent_uid = parent.extras.col.target['unique_id'].value
+            if parent_uid:
+                parent_dataset_id = parent_uid
+
+        # if resource format is CSV then convert it to text/csv
+        # Resource format has to be in 'csv' format for automatic datastore push.
+        for r in package["resources"]:
+            if r["format"].lower() == "csv":
+                r["format"] = "text/csv"
+            if r["format"].lower() == "json":
+                r["format"] = "application/json"
+            if r["format"].lower() == "pdf":
+                r["format"] = "application/pdf"
+
+        try:
+            retlist = [
+                ("@type", "dcat:Dataset"),  # optional
+
+                ("title", JsonExportBuilder.strip_if_string(package["title"])),  # required
+
+                # ("accessLevel", 'public'),  # required
+                ("accessLevel", JsonExportBuilder.strip_if_string(extras.get('public_access_level'))),  # required
+
+                # ("accrualPeriodicity", "R/P1Y"),  # optional
+                # ('accrualPeriodicity', 'accrual_periodicity'),
+                ('accrualPeriodicity', JsonExportBuilder.get_accrual_periodicity(extras.get('accrual_periodicity'))),
+                # optional
+
+                ("conformsTo", JsonExportBuilder.strip_if_string(extras.get('conforms_to'))),  # optional
+
+                # ('contactPoint', OrderedDict([
+                # ("@type", "vcard:Contact"),
+                # ("fn", "Jane Doe"),
+                # ("hasEmail", "mailto:jane.doe@agency.gov")
+                # ])),  # required
+                ('contactPoint', JsonExportBuilder.get_contact_point(extras, package)),  # required
+
+                ("dataQuality", JsonExportBuilder.strip_if_string(extras.get('data_quality'))),
+                # required-if-applicable
+
+                ("describedBy", JsonExportBuilder.strip_if_string(extras.get('data_dictionary'))),  # optional
+                ("describedByType", JsonExportBuilder.strip_if_string(extras.get('data_dictionary_type'))),  # optional
+
+                ("description", JsonExportBuilder.strip_if_string(package["notes"])),  # required
+
+                # ("description", 'asdfasdf'),  # required
+
+                ("identifier", JsonExportBuilder.strip_if_string(extras.get('unique_id'))),  # required
+                # ("identifier", 'asdfasdfasdf'),  # required
+
+                ("isPartOf", parent_dataset_id),  # optional
+                ("issued", JsonExportBuilder.strip_if_string(extras.get('release_date'))),  # optional
+
+                # ("keyword", ['a', 'b']),  # required
+                ("keyword", [t["display_name"] for t in package["tags"]]),  # required
+
+                ("landingPage", JsonExportBuilder.strip_if_string(extras.get('homepage_url'))),  # optional
+
+                ("license", JsonExportBuilder.strip_if_string(extras.get("license_new"))),  # required-if-applicable
+
+                ("modified", JsonExportBuilder.strip_if_string(extras.get("modified"))),  # required
+
+                ("primaryITInvestmentUII", JsonExportBuilder.strip_if_string(extras.get('primary_it_investment_uii'))),
+                # optional
+
+                # ('publisher', OrderedDict([
+                # ("@type", "org:Organization"),
+                # ("name", "Widget Services")
+                # ])),  # required
+                # ("publisher", get_publisher_tree(extras)),  # required
+                ("publisher", JsonExportBuilder.get_publisher_tree_wrong_order(extras)),  # required
+
+                ("rights", JsonExportBuilder.strip_if_string(extras.get('access_level_comment'))),  # required
+
+                ("spatial", JsonExportBuilder.strip_if_string(package.get("spatial"))),  # required-if-applicable
+
+                ('systemOfRecords', JsonExportBuilder.strip_if_string(extras.get('system_of_records'))),  # optional
+
+                ("temporal", JsonExportBuilder.strip_if_string(extras.get('temporal'))),  # required-if-applicable
+
+                ("distribution", JsonExportBuilder.generate_distribution(package)),  # required-if-applicable
+
+                # ("distribution",
+                # #TODO distribution should hide any key/value pairs where value is "" or None (e.g. format)
+                # [
+                # OrderedDict([
+                # ("downloadURL", r["url"]),
+                # ("mediaType", r["formatReadable"]),
+                # ])
+                # for r in package["resources"]
+                # ])
+            ]
+
+            for pair in [
+                ('bureauCode', 'bureau_code'),  # required
+                ('language', 'language'),  # optional
+                ('programCode', 'program_code'),  # required
+                ('references', 'related_documents'),  # optional
+                ('theme', 'category'),  # optional
+            ]:
+                JsonExportBuilder.split_multiple_entries(retlist, extras, pair)
+
+        except KeyError as e:
+            log.warn("Invalid field detected for package with id=[%s], title=['%s']: '%s'", package.get('id'),
+                     package.get('title'), e)
+            return
+
+        # # TODO this is a lazy hack to make sure we don't have redundant fields when the free form key/value pairs are added
+        # extras_to_filter_out = ['publisher', 'contact_name', 'contact_email', 'unique_id', 'public_access_level',
+        # 'data_dictionary', 'bureau_code', 'program_code', 'access_level_comment', 'license_title',
+        # 'spatial', 'temporal', 'release_date', 'accrual_periodicity', 'language', 'granularity',
+        # 'data_quality', 'size', 'homepage_url', 'rss_feed', 'category', 'related_documents',
+        # 'system_of_records', 'system_of_records_none_related_to_this_dataset', 'tags',
+        # 'extrasRollup', 'format', 'accessURL', 'notes', 'publisher_1', 'publisher_2', 'publisher_3',
+        # 'publisher_4', 'publisher_5']
+        #
+        # # Append any free extras (key/value pairs) that aren't part of common core but have been associated with the dataset
+        # # TODO really hackey, short on time, had to hardcode a lot of the names to remove. there's much better ways, maybe
+        # # generate a list of keys to ignore by calling a specific function to get the extras
+        # retlist_keys = [x for x, y in retlist]
+        # extras_keys = set(extras.keys()) - set(extras_to_filter_out)
+        #
+        # for key in extras_keys:
+        # convertedKey = underscore_to_camelcase(key)
+        # if convertedKey not in retlist_keys:
+        # retlist.append((convertedKey, extras[key]))
+
+        # Remove entries where value is None, "", or empty list []
+        striped_retlist = [(x, y) for x, y in retlist if y is not None and y != "" and y != []]
+        striped_retlist_keys = [x for x, y in striped_retlist]
+
+
+        # If a required metadata field was removed, return empty string
+        # for required_field in ["accessLevel", "bureauCode", "contactPoint", "description", "identifier", "keyword",
+        # "modified", "programCode", "publisher", "title"]:
+        # if required_field not in striped_retlist_keys:
+        # log.warn("Missing required field detected for package with id=[%s], title=['%s']: '%s'",
+        # package.get('id'), package.get('title'), required_field)
+        # return
+
+        # When saved from UI DataQuality value is stored as "on" instead of True.
+        # Check if value is "on" and replace it with True.
+        striped_retlist_dict = OrderedDict(striped_retlist)
+        if striped_retlist_dict.get('dataQuality') == "on" \
+                or striped_retlist_dict.get('dataQuality') == "true" \
+                or striped_retlist_dict.get('dataQuality') == "True":
+            striped_retlist_dict['dataQuality'] = True
+        elif striped_retlist_dict.get('dataQuality') == "false" \
+                or striped_retlist_dict.get('dataQuality') == "False":
+            striped_retlist_dict['dataQuality'] = False
+
+        from datajsonvalidator import do_validation
+
+        errors = []
+        try:
+            do_validation([dict(striped_retlist_dict)], errors)
+        except Exception as e:
+            errors.append(("Internal Error", ["Something bad happened: " + unicode(e)]))
+        if len(errors) > 0:
+            for error in errors:
+                log.warn(error)
+            return
+
+        return striped_retlist_dict
+
+
+    # used by get_accrual_periodicity
+    accrual_periodicity_dict = {
+        'completely irregular': 'irregular',
+        'decennial': 'R/P10Y',
+        'quadrennial': 'R/P4Y',
+        'annual': 'R/P1Y',
+        'bimonthly': 'R/P2M',  # or R/P0.5M
+        'semiweekly': 'R/P3.5D',
+        'daily': 'R/P1D',
+        'biweekly': 'R/P2W',  # or R/P0.5W
+        'semiannual': 'R/P6M',
+        'biennial': 'R/P2Y',
+        'triennial': 'R/P3Y',
+        'three times a week': 'R/P0.33W',
+        'three times a month': 'R/P0.33M',
+        'continuously updated': 'R/PT1S',
+        'monthly': 'R/P1M',
+        'quarterly': 'R/P3M',
+        'semimonthly': 'R/P0.5M',
+        'three times a year': 'R/P4M',
+        'weekly': 'R/P1W'
+    }
+
+    @staticmethod
+    def get_accrual_periodicity(frequency):
+        return JsonExportBuilder.accrual_periodicity_dict.get(str(frequency).lower().strip(), frequency)
+
+    @staticmethod
+    def generate_distribution(package):
+        arr = []
+        for r in package["resources"]:
+            resource = [("@type", "dcat:Distribution")]
+            rkeys = r.keys()
+            if 'url' in rkeys:
+                res_url = JsonExportBuilder.strip_if_string(r.get('url'))
+                if res_url:
+                    if 'api' == r.get('resource_type') or 'accessurl' == r.get('resource_type'):
+                        resource += [("accessURL", res_url)]
+                    else:
+                        resource += [("downloadURL", res_url)]
+                        if 'format' in rkeys:
+                            res_format = JsonExportBuilder.strip_if_string(r.get('format'))
+                            if res_format:
+                                resource += [("mediaType", res_format)]
+                        else:
+                            log.warn("Missing mediaType for resource in package ['%s']", package.get('id'))
+            else:
+                log.warn("Missing downloadURL for resource in package ['%s']", package.get('id'))
+
+            # if 'accessURL_new' in rkeys:
+            # res_access_url = JsonExportBuilder.strip_if_string(r.get('accessURL_new'))
+            # if res_access_url:
+            # resource += [("accessURL", res_access_url)]
+
+            if 'formatReadable' in rkeys:
+                res_attr = JsonExportBuilder.strip_if_string(r.get('formatReadable'))
+                if res_attr:
+                    resource += [("format", res_attr)]
+
+            if 'name' in rkeys:
+                res_attr = JsonExportBuilder.strip_if_string(r.get('name'))
+                if res_attr:
+                    resource += [("title", res_attr)]
+
+            if 'notes' in rkeys:
+                res_attr = JsonExportBuilder.strip_if_string(r.get('notes'))
+                if res_attr:
+                    resource += [("description", res_attr)]
+
+            if 'conformsTo' in rkeys:
+                res_attr = JsonExportBuilder.strip_if_string(r.get('conformsTo'))
+                if res_attr:
+                    resource += [("conformsTo", res_attr)]
+
+            if 'describedBy' in rkeys:
+                res_attr = JsonExportBuilder.strip_if_string(r.get('describedBy'))
+                if res_attr:
+                    resource += [("describedBy", res_attr)]
+
+            if 'describedByType' in rkeys:
+                res_attr = JsonExportBuilder.strip_if_string(r.get('describedByType'))
+                if res_attr:
+                    resource += [("describedByType", res_attr)]
+
+            striped_resource = [(x, y) for x, y in resource if y is not None and y != "" and y != []]
+
+            arr += [OrderedDict(striped_resource)]
+
+        return arr
+
+    @staticmethod
+    def get_contact_point(extras, package):
+        for required_field in ["contact_name", "contact_email"]:
+            if required_field not in extras.keys():
+                raise KeyError(required_field)
+
+        email = JsonExportBuilder.strip_if_string(extras['contact_email'])
+        if email is None or '@' not in email:
+            raise KeyError(required_field)
+
+        fn = JsonExportBuilder.strip_if_string(extras['contact_name'])
+        if fn is None:
+            raise KeyError(required_field)
+
+        contact_point = OrderedDict([
+            ('@type', 'vcard:Contact'),  # optional
+            ('fn', fn),  # required
+            ('hasEmail', 'mailto:' + email),  # required
+        ])
+        return contact_point
+
+    @staticmethod
+    def extra(package, key, default=None):
+        # Retrieves the value of an extras field.
+        for extra in package["extras"]:
+            if extra["key"] == key:
+                return extra["value"]
+        return default
+
+    @staticmethod
+    def get_publisher_tree_wrong_order(extras):
+        publisher = JsonExportBuilder.strip_if_string(extras.get('publisher'))
+        if publisher is None:
+            raise KeyError('publisher')
+
+        organization_list = list()
+        organization_list.append([
+            ('@type', 'org:Organization'),  # optional
+            ('name', publisher),  # required
+        ])
+
+        for i in range(1, 6):
+            key = 'publisher_' + str(i)
+            if key in extras and extras[key] and JsonExportBuilder.strip_if_string(extras[key]):
+                organization_list.append([
+                    ('@type', 'org:Organization'),  # optional
+                    ('name', JsonExportBuilder.strip_if_string(extras[key])),  # required
+                ])
+
+        size = len(organization_list)
+
+        # [OSCIT, GSA]
+        # organization_list.reverse()
+        # [GSA, OSCIT]
+
+        tree = False
+        for i in range(0, size):
+            if tree:
+                organization_list[i] += [('subOrganizationOf', OrderedDict(tree))]
+            tree = organization_list[i]
+
+        return OrderedDict(tree)
+
+    @staticmethod
+    def underscore_to_camelcase(value):
+        """
+        Convert underscored strings to camel case, e.g. one_two_three to oneTwoThree
+        """
+
+        def camelcase():
+            yield unicode.lower
+            while True:
+                yield unicode.capitalize
+
+        c = camelcase()
+        return "".join(c.next()(x) if x else '_' for x in value.split("_"))
+
+    @staticmethod
+    def get_best_resource(package, acceptable_formats):
+        resources = list(r for r in package["resources"] if r["format"].lower() in acceptable_formats)
+        if len(resources) == 0: return {}
+        resources.sort(key=lambda r: acceptable_formats.index(r["format"].lower()))
+        return resources[0]
+
+    @staticmethod
+    def strip_if_string(val):
+        if isinstance(val, (str, unicode)):
+            val = val.strip()
+            if '' == val:
+                val = None
+        return val
+
+
+    @staticmethod
+    def get_primary_resource(package):
+        # Return info about a "primary" resource. Select a good one.
+        return JsonExportBuilder.get_best_resource(package, ("csv", "xls", "xml", "text", "zip", "rdf"))
+
+
+    @staticmethod
+    def get_api_resource(package):
+        # Return info about an API resource.
+        return JsonExportBuilder.get_best_resource(package, ("api", "query tool"))
+
+
+    @staticmethod
+    def split_multiple_entries(retlist, extras, names):
+        found_element = string.strip(extras.get(names[1], ""))
+        if found_element:
+            retlist.append(
+                (names[0], [string.strip(x) for x in string.split(found_element, ',')])
+            )
diff --git a/ckanext/datajson/build_datajsonld.py b/ckanext/datajson/build_datajsonld.py
index fb88f6dc..8a223912 100644
--- a/ckanext/datajson/build_datajsonld.py
+++ b/ckanext/datajson/build_datajsonld.py
@@ -4,10 +4,10 @@
     from sqlalchemy.util import OrderedDict
 
 def dataset_to_jsonld(dataset):
-    from plugin import JsonExportPlugin
+    from plugin import DataJsonPlugin
 	
     ret = OrderedDict([
-       ("@id", JsonExportPlugin.site_url + "/dataset/" + dataset["identifier"]),
+       ("@id", DataJsonPlugin.site_url + "/dataset/" + dataset["identifier"]),
        ("@type", "dcat:Dataset"),
     ])
     
@@ -20,9 +20,9 @@ def dataset_to_jsonld(dataset):
     return ret
         
 def distribution_to_jsonld(distribution):
-    from plugin import JsonExportPlugin
+    from plugin import DataJsonPlugin
     ret = OrderedDict([
-       ("@id", JsonExportPlugin.site_url + "/resource/" + distribution["identifier"]),
+       ("@id", DataJsonPlugin.site_url + "/resource/" + distribution["identifier"]),
        ("@type", "dcat:Distribution"),
     ])
     apply_jsonld_metadata_mapping(distribution, ret)
@@ -33,14 +33,18 @@ def distribution_to_jsonld(distribution):
     "description": "dcterms:description",
     "keyword": "dcat:keyword",
     "modified": "dcterms:modified",
-    "publisher": "dcat:publisher",
-    "person": "foaf:Person",
+    "publisher": "dcterms:publisher",
+    "contactPoint": "dcat:contactPoint",
     "mbox": "foaf:mbox",
     "identifier": "dcterms:identifier",
+    "accessLevel": "pod:accessLevel",
         
+    "bureauCode": "pod:bureauCode",
+    "programCode": "pod:programCode",
+    "accessLevelComment": "pod:accessLevelComment",
     "dataDictionary": "dcat:dataDictionary",
     "accessURL": "dcat:accessURL",
-    "webService": "dcat:webService",
+    "webService": "pod:webService",
     "format": "dcterms:format", # must be a dcterms:MediaTypeOrExtent
     "license": "dcterms:license",
     "spatial": "dcterms:spatial", # must be a dcterms:Location entity
@@ -49,19 +53,16 @@ def distribution_to_jsonld(distribution):
     "issued": "dcterms:issued",
     "accrualPeriodicity": "dcterms:accrualPeriodicity", # must be a dcterms:Frequency 
     "language": "dcat:language", # must be an IRI
-    "granularity": "dcat:granularity",
-    "dataQuality": "xsd:boolean",
+    "dataQuality": "pod:dataQuality",
     "theme": "dcat:theme",
     "references": "dcterms:references",
-    "size": "dcat:size",
     "landingPage": "dcat:landingPage",
-    "feed": "dcat:feed",
+    "systemOfRecords": "pod:systemOfRecords",
 }
 
 jsonld_metadata_datatypes = {
     "modified": "http://www.w3.org/2001/XMLSchema#dateTime",
     "issued": "http://www.w3.org/2001/XMLSchema#dateTime",
-    "size": "http://www.w3.org/2001/XMLSchema#decimal",
 }
     
 def apply_jsonld_metadata_mapping(data, newdict):
@@ -72,10 +73,6 @@ def apply_jsonld_metadata_mapping(data, newdict):
         # skip fields with no mapping to RDF
         if k not in jsonld_metadata_mapping: continue
         
-        # specially handle 'keyword' which in JSON is packed in a comma-separated field
-        if k == "keyword":
-            v = v.split(",")
-            
         # specially handle literal fields with datatypes
         if k in jsonld_metadata_datatypes:
             # Convert ISO datetime format to xsd:dateTime format.
diff --git a/ckanext/datajson/plugin.py b/ckanext/datajson/plugin.py
index 6997079c..1fd95b3f 100644
--- a/ckanext/datajson/plugin.py
+++ b/ckanext/datajson/plugin.py
@@ -18,7 +18,9 @@
 except ImportError:
     from sqlalchemy.util import OrderedDict
 
-from build_datajson import make_datajson_export_entry, make_datajson_export_catalog
+from build_datajson import JsonExportBuilder
+
+from build_datajson import make_datajson_entry, get_facet_fields
 
 # from build_enterprisedatajson import make_enterprisedatajson_entry
 from build_datajsonld import dataset_to_jsonld
@@ -194,7 +196,7 @@ def make_json(self):
             extras = dict([(x['key'], x['value']) for x in pkg['extras']])
             try:
                 if not (re.match(r'[Nn]on-public', extras['public_access_level'])):
-                    datajson_entry = make_datajson_export_entry(pkg)
+                    datajson_entry = JsonExportBuilder.make_datajson_export_entry(pkg)
                     if datajson_entry:
                         output.append(datajson_entry)
                     else:
@@ -223,7 +225,7 @@ def make_edi(self, owner_org):
         output = []
         for pkg in packages:
             # if pkg['owner_org'] == owner_org:
-            datajson_entry = make_datajson_export_entry(pkg)
+            datajson_entry = JsonExportBuilder.make_datajson_export_entry(pkg)
             if datajson_entry and self.is_valid(datajson_entry):
                 output.append(datajson_entry)
             else:
@@ -258,7 +260,7 @@ def make_pdl(self, owner_org):
             extras = dict([(x['key'], x['value']) for x in pkg['extras']])
             try:
                 if not (re.match(r'[Nn]on-public', extras['public_access_level'])):
-                    datajson_entry = make_datajson_export_entry(pkg)
+                    datajson_entry = JsonExportBuilder.make_datajson_export_entry(pkg)
                     if datajson_entry and self.is_valid(datajson_entry):
                         output.append(datajson_entry)
                     else:
@@ -333,7 +335,9 @@ def write_zip(self, data, error=None, zip_name='data'):
 
         # Write the data file
         if data:
-            zf.writestr('data.json', json.dumps(make_datajson_export_catalog(data), ensure_ascii=False).encode('utf8'))
+            zf.writestr('data.json',
+                        json.dumps(JsonExportBuilder.make_datajson_export_catalog(data), ensure_ascii=False).encode(
+                            'utf8'))
 
         # Write the error log
         if error:
diff --git a/ckanext/datajson/pod_schema/federal-v1.1/dataset.json b/ckanext/datajson/pod_schema/federal-v1.1/dataset.json
index 06fb984c..b9037fb8 100644
--- a/ckanext/datajson/pod_schema/federal-v1.1/dataset.json
+++ b/ckanext/datajson/pod_schema/federal-v1.1/dataset.json
@@ -77,7 +77,7 @@
       "uniqueItems": true
     },
     "contactPoint": {
-      "$ref": "vcard.json"
+      "$ref": "#/definitions/vcard"
     },
     "describedBy": {
       "title": "Data Dictionary",
@@ -143,7 +143,7 @@
         {
           "type": "array",
           "items": {
-            "$ref": "distribution.json",
+            "$ref": "#/definitions/distribution",
             "minItems": 1,
             "uniqueItems": true
           }
@@ -267,7 +267,7 @@
       "uniqueItems": true
     },
     "publisher": {
-      "$ref": "organization.json"
+      "$ref": "#/definitions/organization"
     },
     "references": {
       "title": "Related Documents",
@@ -397,7 +397,7 @@
         "hasEmail": {
           "title": "Email",
           "description": "Email address for the contact",
-          "pattern": "^mailto:([\\w.-]+@[\\w.-]+\\.[\\w.-]+)?$",
+          "pattern": "^mailto:[\\w.-]+@[\\w.-]+\\.[\\w.-]+?$",
           "type": "string"
         }
       }
@@ -566,7 +566,7 @@
         },
         "subOrganizationOf": {
           "title": "Parent Organization",
-          "$ref": "organization.json"
+          "$ref": "#"
         }
       }
     }
diff --git a/ckanext/datajson/pod_schema/non-federal-v1.1/dataset-non-federal.json b/ckanext/datajson/pod_schema/non-federal-v1.1/dataset-non-federal.json
index b0a7f846..3495512b 100644
--- a/ckanext/datajson/pod_schema/non-federal-v1.1/dataset-non-federal.json
+++ b/ckanext/datajson/pod_schema/non-federal-v1.1/dataset-non-federal.json
@@ -139,7 +139,7 @@
         {
           "type": "array",
           "items": {
-            "$ref": "distribution.json",
+            "$ref": "#/definitions/distribution",
             "minItems": 1,
             "uniqueItems": true
           }
@@ -263,7 +263,7 @@
       "uniqueItems": true
     },
     "publisher": {
-      "$ref": "organization.json"
+      "$ref": "#/definitions/organization"
     },
     "references": {
       "title": "Related Documents",
@@ -392,7 +392,7 @@
         "hasEmail": {
           "title": "Email",
           "description": "Email address for the contact",
-          "pattern": "^mailto:([\\w.-]+@[\\w.-]+\\.[\\w.-]+)?$",
+          "pattern": "^mailto:[\\w.-]+@[\\w.-]+\\.[\\w.-]+?$",
           "type": "string"
         }
       }
@@ -561,7 +561,7 @@
         },
         "subOrganizationOf": {
           "title": "Parent Organization",
-          "$ref": "organization.json"
+          "$ref": "#"
         }
       }
     }

From 6a44b6896e8cb73a61f13dbed667b46d183bcffb Mon Sep 17 00:00:00 2001
From: Alex Perfilov <alexandr.perfilov@reisystems.com>
Date: Fri, 13 Mar 2015 12:17:14 -0400
Subject: [PATCH 07/22] Catalog changes integrated

---
 ckanext/datajson/build_datajson.py            |  14 +-
 ckanext/datajson/harvester_base.py            | 586 ++++++++++++++++--
 .../datajson/harvester_cmsdatanavigator.py    |  29 +-
 ckanext/datajson/harvester_datajson.py        |  51 +-
 ckanext/datajson/parse_datajson.py            | 225 ++++---
 ckanext/datajson/plugin.py                    | 148 +++++
 .../datajson/templates/html_rendition.html    |  43 ++
 7 files changed, 942 insertions(+), 154 deletions(-)
 create mode 100644 ckanext/datajson/templates/html_rendition.html

diff --git a/ckanext/datajson/build_datajson.py b/ckanext/datajson/build_datajson.py
index d4e472eb..27c93d8b 100644
--- a/ckanext/datajson/build_datajson.py
+++ b/ckanext/datajson/build_datajson.py
@@ -16,9 +16,9 @@ def get_facet_fields():
     # Return fields that we'd like to add to default CKAN faceting. This really has
     # nothing to do with exporting data.json but it's probably a common consideration.
     facets = OrderedDict()
-    facets[
-        # using "author" produces weird results because the Solr schema indexes it as "text" rather than "string"
-        "Agency"] = "Publishers"
+
+    # using "author" produces weird results because the Solr schema indexes it as "text" rather than "string"
+    facets["Agency"] = "Publishers"
     # search facets remove spaces from field names
     facets["SubjectArea1"] = "Subjects"
     return facets
@@ -50,12 +50,13 @@ def make_datajson_entry(package):
         ("language", extra(package, "Language")),
         ("PrimaryITInvestmentUII", extra(package, "PrimaryITInvestmentUII")),
         ("granularity", "/".join(
-            x for x in [extra(package, "Unit of Analysis"), extra(package, "Geographic Granularity")] if x != None)),
+            x for x in [extra(package, "Unit of Analysis"), extra(package, "Geographic Granularity")] if
+            x is not None)),
         ("dataQuality", extra(package, "Data Quality Met", default="true") == "true"),
         ("theme", [s for s in (
             extra(package, "Subject Area 1"), extra(package, "Subject Area 2"), extra(package, "Subject Area 3")) if
-                   s != None]),
-        ("references", [s for s in [extra(package, "Technical Documentation")] if s != None]),
+                   s is not None]),
+        ("references", [s for s in [extra(package, "Technical Documentation")] if s is not None]),
         ("landingPage", package["url"]),
         ("systemOfRecords", extra(package, "System Of Records")),
         ("distribution",
@@ -105,7 +106,6 @@ def get_api_resource(package):
 
 def build_temporal(package):
     # Build one dataset entry of the data.json file.
-    temporal = ""
     if extra(package, "Coverage Period Fiscal Year Start"):
         temporal = "FY" + extra(package, "Coverage Period Fiscal Year Start").replace(" ", "T").replace("T00:00:00", "")
     else:
diff --git a/ckanext/datajson/harvester_base.py b/ckanext/datajson/harvester_base.py
index 89d4ffd6..d1cf4de0 100644
--- a/ckanext/datajson/harvester_base.py
+++ b/ckanext/datajson/harvester_base.py
@@ -1,23 +1,40 @@
 from ckan.lib.base import c
 from ckan import model
+from ckan import plugins as p
 from ckan.model import Session, Package
 from ckan.logic import ValidationError, NotFound, get_action
 from ckan.lib.munge import munge_title_to_name
 from ckan.lib.search.index import PackageSearchIndex
+from ckan.lib.navl.dictization_functions import Invalid
+from ckan.lib.navl.validators import ignore_empty
 
 from ckanext.harvest.model import HarvestJob, HarvestObject, HarvestGatherError, \
-                                    HarvestObjectError
+                                    HarvestObjectError, HarvestObjectExtra
 from ckanext.harvest.harvesters.base import HarvesterBase
 
-import uuid, datetime, hashlib, urllib2, json, yaml
+import uuid, datetime, hashlib, urllib2, json, yaml, json, os
+
+from jsonschema.validators import Draft4Validator
+from jsonschema import FormatChecker
 
 import logging
 log = logging.getLogger("harvester")
 
+VALIDATION_SCHEMA = [
+                        ('', 'Project Open Data (Federal)'),
+                        ('non-federal', 'Project Open Data (Non-Federal)'),
+                    ]
+
+def validate_schema(schema):
+    if schema not in [s[0] for s in VALIDATION_SCHEMA]:
+        raise Invalid('Unknown validation schema: {0}'.format(schema))
+    return schema
+
 class DatasetHarvesterBase(HarvesterBase):
     '''
     A Harvester for datasets.
     '''
+    _user_name = None
 
     # SUBCLASSES MUST IMPLEMENT
     #HARVESTER_VERSION = "1.0"
@@ -34,13 +51,49 @@ def validate_config(self, config):
         config_obj = yaml.load(config)
         return config
 
+    def load_config(self, harvest_source):
+        # Load the harvest source's configuration data. We expect it to be a YAML
+        # string. Unfortunately I went ahead of CKAN on this. The stock CKAN harvester
+        # only allows JSON in the configuration box. My fork is necessary for this
+        # to work: https://github.com/joshdata/ckanext-harvest
+
+        ret = {
+            "filters": { }, # map data.json field name to list of values one of which must be present
+            "defaults": { }, # map field name to value to supply as default if none exists, handled by the actual importer module, so the field names may be arbitrary
+        }
+
+        source_config = yaml.load(harvest_source.config)
+
+        try:
+            ret["filters"].update(source_config["filters"])
+        except TypeError:
+            pass
+        except KeyError:
+            pass
+
+        try:
+            ret["defaults"].update(source_config["defaults"])
+        except TypeError:
+            pass
+        except KeyError:
+            pass
+
+        return ret
+
+    def _get_user_name(self):
+        if not self._user_name:
+            user = p.toolkit.get_action('get_site_user')({'model': model, 'ignore_auth': True}, {})
+            self._user_name = user['name']
+
+        return self._user_name
+
     def context(self):
         # Reusing the dict across calls to action methods can be dangerous, so
         # create a new dict every time we need it.
         # Setting validate to False is critical for getting the harvester plugin
         # to set extra fields on the package during indexing (see ckanext/harvest/plugin.py
         # line 99, https://github.com/okfn/ckanext-harvest/blob/master/ckanext/harvest/plugin.py#L99).
-        return { "user": "harvest", "ignore_auth": True, "validate": False }
+        return { "user": self._get_user_name(), "ignore_auth": True }
         
     # SUBCLASSES MUST IMPLEMENT
     def load_remote_catalog(self, harvest_job):
@@ -49,6 +102,11 @@ def load_remote_catalog(self, harvest_job):
         # with a locally unique identifier string and a 'title' field.
         raise Exception("Not implemented")
 
+    def extra_schema(self):
+        return {
+            'validator_schema': [ignore_empty, unicode, validate_schema],
+        }
+
     def gather_stage(self, harvest_job):
         # The gather stage scans a remote resource (like a /data.json file) for
         # a list of datasets to import.
@@ -56,32 +114,158 @@ def gather_stage(self, harvest_job):
         log.debug('In %s gather_stage (%s)' % (repr(self), harvest_job.source.url))
 
         # Start gathering.
-        source = self.load_remote_catalog(harvest_job)
-        if len(source) == 0: return []
+        try:
+            source_datasets, catalog_values = self.load_remote_catalog(harvest_job)
+        except ValueError as e:
+            self._save_gather_error("Error loading json content: %s." % (e), harvest_job)
+            return []
+
+        if len(source_datasets) == 0: return []
+
+        DATAJSON_SCHEMA = {
+            "https://project-open-data.cio.gov/v1.1/schema": '1.1',
+            }
+
+        # schema version is default 1.0, or a valid one (1.1, ...)
+        schema_version = '1.0'
+        parent_identifiers = set()
+        child_identifiers = set()
+        catalog_extras = {}
+        if isinstance(catalog_values, dict):
+            schema_value = catalog_values.get('conformsTo', '')
+            if schema_value not in DATAJSON_SCHEMA.keys():
+                self._save_gather_error('Error reading json schema value.' \
+                    ' The given value is %s.' % ('empty' if schema_value == ''
+                    else schema_value), harvest_job)
+                return []
+            schema_version = DATAJSON_SCHEMA.get(schema_value, '1.0')
+
+            for dataset in source_datasets:
+                parent_identifier = dataset.get('isPartOf')
+                if parent_identifier:
+                    parent_identifiers.add(parent_identifier)
+                    child_identifiers.add(dataset.get('identifier'))
+
+            # get a list of needed catalog values and put into hobj
+            catalog_fields = ['@context', '@id', 'conformsTo', 'describedBy']
+            catalog_extras = dict(('catalog_'+k, v)
+                for (k, v) in catalog_values.iteritems()
+                if k in catalog_fields)
 
         # Loop through the packages we've already imported from this source
         # and go into their extra fields to get their source_identifier,
         # which corresponds to the remote catalog's 'identifier' field.
         # Make a mapping so we know how to update existing records.
+        # Added: mark all existing parent datasets.
         existing_datasets = { }
+        existing_parents = { }
         for hobj in model.Session.query(HarvestObject).filter_by(source=harvest_job.source, current=True):
             try:
                 pkg = get_action('package_show')(self.context(), { "id": hobj.package_id })
             except:
                 # reference is broken
                 continue
-            sid = self.find_extra(pkg, "source_identifier")
+            sid = self.find_extra(pkg, "identifier")
+            is_parent = self.find_extra(pkg, "collection_metadata")
             if sid:
                 existing_datasets[sid] = pkg
+            if is_parent and pkg.get("state") == "active":
+                existing_parents[sid] = pkg
+
+        # which parents has been demoted to child level?
+        existing_parents_demoted = set(
+            identifier for identifier in existing_parents.keys() \
+            if identifier not in parent_identifiers)
+
+        # if there is any new parents, we will have to harvest parents
+        # first, mark the status in harvest_source config, which
+        # triggers a children harvest_job after parents job is finished.
+        source = harvest_job.source
+        source_config = json.loads(source.config or '{}')
+        # run status: None, or parents_run, or children_run?
+        run_status = source_config.get('datajson_collection')
+        if parent_identifiers:
+            for parent in parent_identifiers & child_identifiers:
+                self._save_gather_error("Collection identifier '%s' \
+                    cannot be isPartOf another collection." \
+                    % parent, harvest_job)
+
+            new_parents = set(identifier for identifier in parent_identifiers \
+                if identifier not in existing_parents.keys())
+            if new_parents:
+                if not run_status:
+                    # fresh start
+                    run_status = 'parents_run'
+                    source_config['datajson_collection'] = run_status
+                    source.config = json.dumps(source_config)
+                    source.save()
+                elif run_status == 'children_run':
+                    # it means new parents are tried and failed.
+                    # but skip some which have previously reported with
+                    # parent_identifiers & child_identifiers
+                    for parent in new_parents - \
+                        (parent_identifiers & child_identifiers):
+                        self._save_gather_error("Collection identifier '%s' \
+                            not found. Records which are part of this \
+                            collection will not be harvested." \
+                            % parent, harvest_job)
+                else:
+                    # run_status was parents_run, and did not finish.
+                    # something wrong but not sure what happened.
+                    # let's leave it as it is, let it run one more time.
+                    pass
+            else:
+                # all parents are already in place. run it as usual.
+                run_status = None
+        elif run_status:
+            # need to clear run_status
+            run_status = None
+            source_config['datajson_collection'] = run_status
+            source.config = json.dumps(source_config)
+            source.save()
                     
         # Create HarvestObjects for any records in the remote catalog.
             
         object_ids = []
         seen_datasets = set()
+        unique_datasets = set()
         
-        for dataset in source:
+        filters = self.load_config(harvest_job.source)["filters"]
+
+        for dataset in source_datasets:
             # Create a new HarvestObject for this dataset and save the
             # dataset metdata inside it for later.
+
+            # Check the config's filters to see if we should import this dataset.
+            # For each filter, check that the value specified in the data.json file
+            # is among the permitted values in the filter specification.
+            matched_filters = True
+            for k, v in filters.items():
+                if dataset.get(k) not in v:
+                    matched_filters = False
+            if not matched_filters:
+                continue
+
+            if parent_identifiers and new_parents \
+                and dataset['identifier'] not in parent_identifiers \
+                and dataset.get('isPartOf') in new_parents:
+                if run_status == 'parents_run':
+                    # skip those whose parents still need to run.
+                    continue
+                else:
+                    # which is 'children_run'.
+                    # error out since parents got issues.
+                    self._save_gather_error(
+                        "Record with identifier '%s': isPartOf '%s' points to \
+                        an erroneous record." % (dataset['identifier'],
+                            dataset.get('isPartOf')), harvest_job)
+                    continue
+
+            # Some source contains duplicate identifiers. skip all except the first one
+            if dataset['identifier'] in unique_datasets:
+                self._save_gather_error("Duplicate entry ignored for identifier: '%s'." % (dataset['identifier']), harvest_job)
+                continue
+            unique_datasets.add(dataset['identifier'])
             
             # Get the package_id of this resource if we've already imported
             # it into our system. Otherwise, assign a brand new GUID to the
@@ -96,7 +280,8 @@ def gather_stage(self, harvest_job):
                 # in the package so we can avoid updating datasets that
                 # don't look like they've changed.
                 if pkg.get("state") == "active" \
-                    and self.find_extra(pkg, "source_hash") == self.make_upstream_content_hash(dataset, harvest_job.source):
+                    and dataset['identifier'] not in existing_parents_demoted \
+                    and self.find_extra(pkg, "source_hash") == self.make_upstream_content_hash(dataset, harvest_job.source, catalog_extras, schema_version):
                     continue
             else:
                 pkg_id = uuid.uuid4().hex
@@ -104,9 +289,22 @@ def gather_stage(self, harvest_job):
             # Create a new HarvestObject and store in it the GUID of the
             # existing dataset (if it exists here already) and the dataset's
             # metadata from the remote catalog file.
+            extras = [HarvestObjectExtra(
+                key='schema_version', value=schema_version)]
+            if dataset['identifier'] in parent_identifiers:
+                extras.append(HarvestObjectExtra(
+                    key='is_collection', value=True))
+            elif dataset.get('isPartOf'):
+                parent_pkg_id = existing_parents[dataset.get('isPartOf')]['id']
+                extras.append(HarvestObjectExtra(
+                    key='collection_pkg_id', value=parent_pkg_id))
+            for k, v in catalog_extras.iteritems():
+                extras.append(HarvestObjectExtra(key=k, value=v))
+
             obj = HarvestObject(
                 guid=pkg_id,
                 job=harvest_job,
+                extras=extras,
                 content=json.dumps(dataset, sort_keys=True)) # use sort_keys to preserve field order so hashes of this string are constant from run to run
             obj.save()
             object_ids.append(obj.id)
@@ -116,9 +314,14 @@ def gather_stage(self, harvest_job):
             if upstreamid in seen_datasets: continue # was just updated
             if pkg.get("state") == "deleted": continue # already deleted
             pkg["state"] = "deleted"
-            pkg["name"] = self.make_package_name(pkg["title"], pkg["id"], True) # try to prevent name clash by giving it a "deleted-" name
             log.warn('deleting package %s (%s) because it is no longer in %s' % (pkg["name"], pkg["id"], harvest_job.source.url))
             get_action('package_update')(self.context(), pkg)
+            obj = HarvestObject(
+                guid=pkg_id,
+                job=harvest_job,
+                ) 
+            obj.save()
+            object_ids.append(obj.id)
             
         return object_ids
 
@@ -128,29 +331,217 @@ def fetch_stage(self, harvest_object):
         return True
 
     # SUBCLASSES MUST IMPLEMENT
-    def set_dataset_info(self, pkg, dataset, dataset_defaults):
+    def set_dataset_info(self, pkg, dataset, dataset_defaults, schema_version):
         # Sets package metadata on 'pkg' using the remote catalog's metadata
         # in 'dataset' and default values as configured in 'dataset_defaults'.
         raise Exception("Not implemented.")
 
+    # validate dataset against POD schema
+    # use a local copy.
+    def _validate_dataset(self, validator_schema, schema_version, dataset):
+        if validator_schema == 'non-federal':
+            if schema_version == '1.1':
+                file_path = 'pod_schema/non-federal-v1.1/dataset-non-federal.json'
+            else:
+                file_path = 'pod_schema/non-federal/single_entry.json'
+        else:
+            if schema_version == '1.1':
+                file_path = 'pod_schema/federal-v1.1/dataset.json'
+            else:
+                file_path = 'pod_schema/single_entry.json'
+
+        with open(os.path.join(
+            os.path.dirname(__file__), file_path)) as json_file:
+            schema = json.load(json_file)
+
+        msg = ";"
+        errors = Draft4Validator(schema, format_checker=FormatChecker()).iter_errors(dataset)
+        count = 0
+        for error in errors:
+            count += 1
+            msg = msg + " ### ERROR #" + str(count) + ": " + self._validate_readable_msg(error) + "; "
+        msg = msg.strip("; ")
+        if msg:
+            id = "Identifier: " + (dataset.get("identifier") if dataset.get("identifier") else "Unknown")
+            title = "Title: " + (dataset.get("title") if dataset.get("title") else "Unknown")
+            msg = id + "; " + title + "; " + str(count) + " Error(s) Found. " + msg + "."
+        return msg
+
+    # make ValidationError readable.
+    def _validate_readable_msg(self, e):
+        msg = e.message.replace("u'", "'")
+        elem = ""
+        try:
+            if e.schema_path[0] == 'properties':
+                elem = e.schema_path[1]
+                elem = "'" + elem + "':" 
+        except:
+            pass
+
+        return elem + msg
+
     def import_stage(self, harvest_object):
         # The import stage actually creates the dataset.
         
         log.debug('In %s import_stage' % repr(self))
         
-        # Get default values.
-       	source_config = yaml.load(harvest_object.source.config)
-       	dataset_defaults = None
-       	try:
-       		dataset_defaults = source_config["defaults"]
-       	except TypeError:
-       		pass
-       	except KeyError:
-       		pass
-        if not dataset_defaults: dataset_defaults = { }
-
-        # Get the metadata that we stored in the HarvestObject's content field.
+        if(harvest_object.content == None):
+           return True
+        
         dataset = json.loads(harvest_object.content)
+        schema_version = '1.0' # default to '1.0'
+        is_collection = False
+        parent_pkg_id = ''
+        catalog_extras = {}
+        for extra in harvest_object.extras:
+            if extra.key == 'schema_version':
+                schema_version = extra.value
+            if extra.key == 'is_collection' and extra.value:
+                is_collection = True
+            if extra.key == 'collection_pkg_id' and extra.value:
+                parent_pkg_id = extra.value
+            if extra.key.startswith('catalog_'):
+                catalog_extras[extra.key] = extra.value
+
+        # if this dataset is part of collection, we need to check if
+        # parent dataset exist or not. we dont support any hierarchy
+        # in this, so the check does not apply to those of is_collection
+        if parent_pkg_id and not is_collection:
+            parent_pkg = None
+            try:
+                parent_pkg = get_action('package_show')(self.context(),
+                    { "id": parent_pkg_id })
+            except:
+                pass
+            if not parent_pkg:
+                parent_check_message = "isPartOf identifer '%s' not found." \
+                    % dataset.get('isPartOf')
+                self._save_object_error(parent_check_message, harvest_object,
+                    'Import')
+                return None
+
+        # Get default values.
+        dataset_defaults = self.load_config(harvest_object.source)["defaults"]
+
+        source_config = json.loads(harvest_object.source.config or '{}')
+        validator_schema = source_config.get('validator_schema')
+        if schema_version == '1.0' and validator_schema != 'non-federal':
+            lowercase_conversion = True
+        else:
+            lowercase_conversion = False
+
+        MAPPING = {
+            "title": "title",
+            "description": "notes",
+            "keyword": "tags",
+            "modified": "extras__modified", # ! revision_timestamp
+            "publisher": "extras__publisher", # !owner_org
+            "contactPoint": "maintainer",
+            "mbox": "maintainer_email",
+            "identifier": "extras__identifier", # !id
+            "accessLevel": "extras__accessLevel",
+
+            "bureauCode": "extras__bureauCode",
+            "programCode": "extras__programCode",
+            "accessLevelComment": "extras__accessLevelComment",
+            "license": "extras__license", # !license_id 
+            "spatial": "extras__spatial", # Geometry not valid GeoJSON, not indexing
+            "temporal": "extras__temporal",
+
+            "theme": "extras__theme",
+            "dataDictionary": "extras__dataDictionary", # !data_dict
+            "dataQuality": "extras__dataQuality",
+            "accrualPeriodicity":"extras__accrualPeriodicity",
+            "landingPage": "extras__landingPage",
+            "language": "extras__language",
+            "primaryITInvestmentUII": "extras__primaryITInvestmentUII", # !PrimaryITInvestmentUII
+            "references": "extras__references",
+            "issued": "extras__issued",
+            "systemOfRecords": "extras__systemOfRecords",
+
+            "accessURL": None,
+            "webService": None,
+            "format": None,
+            "distribution": None,
+        }
+
+        MAPPING_V1_1 = {
+            "title": "title",
+            "description": "notes",
+            "keyword": "tags",
+            "modified": "extras__modified", # ! revision_timestamp
+            "publisher": "extras__publisher", # !owner_org
+            "contactPoint": {"fn":"maintainer", "hasEmail":"maintainer_email"},
+            "identifier": "extras__identifier", # !id
+            "accessLevel": "extras__accessLevel",
+
+            "bureauCode": "extras__bureauCode",
+            "programCode": "extras__programCode",
+            "rights": "extras__rights",
+            "license": "extras__license", # !license_id
+            "spatial": "extras__spatial", # Geometry not valid GeoJSON, not indexing
+            "temporal": "extras__temporal",
+
+            "theme": "extras__theme",
+            "dataDictionary": "extras__dataDictionary", # !data_dict
+            "dataQuality": "extras__dataQuality",
+            "accrualPeriodicity":"extras__accrualPeriodicity",
+            "landingPage": "extras__landingPage",
+            "language": "extras__language",
+            "primaryITInvestmentUII": "extras__primaryITInvestmentUII", # !PrimaryITInvestmentUII
+            "references": "extras__references",
+            "issued": "extras__issued",
+            "systemOfRecords": "extras__systemOfRecords",
+
+            "distribution": None,
+        }
+
+        SKIP = ["accessURL", "webService", "format", "distribution"] # will go into pkg["resources"]
+        # also skip the processed_how key, it was added to indicate how we processed the dataset.
+        SKIP.append("processed_how");
+
+        SKIP_V1_1 = ["@type", "isPartOf", "distribution"]
+        SKIP_V1_1.append("processed_how");
+
+        if lowercase_conversion:
+
+            mapping_processed = {}
+            for k,v in MAPPING.items():
+                mapping_processed[k.lower()] = v
+
+            skip_processed = [k.lower() for k in SKIP]
+
+            dataset_processed = {'processed_how': ['lowercase']}
+            for k,v in dataset.items():
+              if k.lower() in mapping_processed.keys():
+                dataset_processed[k.lower()] = v
+              else:
+                dataset_processed[k] = v
+
+            if 'distribution' in dataset and dataset['distribution'] is not None:
+              dataset_processed['distribution'] = []
+              for d in dataset['distribution']:
+                d_lower = {}
+                for k,v in d.items():
+                  if k.lower() in mapping_processed.keys():
+                    d_lower[k.lower()] = v
+                  else:
+                    d_lower[k] = v
+                dataset_processed['distribution'].append(d_lower)
+        else:
+            dataset_processed = dataset
+            mapping_processed = MAPPING
+            skip_processed = SKIP
+
+        if schema_version == '1.1':
+            mapping_processed = MAPPING_V1_1
+            skip_processed = SKIP_V1_1
+
+        validate_message = self._validate_dataset(validator_schema,
+            schema_version, dataset_processed)
+        if validate_message:
+            self._save_object_error(validate_message, harvest_object, 'Import')
+            return None
 
         # We need to get the owner organization (if any) from the harvest
         # source dataset
@@ -158,40 +549,111 @@ def import_stage(self, harvest_object):
         source_dataset = model.Package.get(harvest_object.source.id)
         if source_dataset.owner_org:
             owner_org = source_dataset.owner_org
-        
+
+
+        source_config = json.loads(harvest_object.source.config or '{}')
+        group_name = source_config.get('default_groups', '')
+
         # Assemble basic information about the dataset.
+
         pkg = {
-            "name": self.make_package_name(dataset["title"], harvest_object.guid, False),
             "state": "active", # in case was previously deleted
             "owner_org": owner_org,
-            "extras": [{
-                "key": "source_url",
-                "value": harvest_object.source.url,
+            "groups": [{"name": group_name}],
+            "resources": [],
+            "extras": [
+                {
+                    "key": "resource-type",
+                    "value": "Dataset",
                 },
                 {
-                "key": "source_title",
-                "value": harvest_object.source.title,
+                    "key": "source_hash",
+                    "value": self.make_upstream_content_hash(dataset, harvest_object.source, catalog_extras, schema_version),
                 },
                 {
-                "key": "source_identifier",
-                "value": dataset["identifier"],
+                    "key": "source_datajson_identifier",
+                    "value": True,
                 },
                 {
-                "key": "source_hash",
-                "value": self.make_upstream_content_hash(dataset, harvest_object.source),
+                    "key": "harvest_source_id",
+                    "value": harvest_object.harvest_source_id,
                 },
                 {
-                "key": "harvest_harvester_version",
-                "value": self.HARVESTER_VERSION,
+                    "key": "harvest_object_id",
+                    "value": harvest_object.id,
                 },
                 {
-                "key": "harvest_last_updated",
-                "value": datetime.datetime.utcnow().isoformat(),
-                }]
+                    "key": "harvest_source_title",
+                    "value": harvest_object.source.title,
+                },                
+                {
+                    "key": "source_schema_version",
+                    "value": schema_version,
+                },
+            ]
         }
-        
+
+        extras = pkg["extras"]
+        unmapped = []
+
+        for key, value in dataset_processed.iteritems():
+            if key in skip_processed:
+                continue
+            new_key = mapping_processed.get(key)
+            if not new_key:
+                unmapped.append(key)
+                continue
+
+            # after schema 1.0+, we need to deal with multiple new_keys
+            new_keys = []
+            values = []
+            if isinstance(new_key, dict): # when schema is not 1.0
+                _new_key_keys = new_key.keys()
+                new_keys = new_key.values()
+                values = []
+                for _key in _new_key_keys:
+                    values.append(value.get(_key))
+            else:
+                new_keys.append(new_key)
+                values.append(value)
+
+            if not any(item for item in values):
+                continue
+
+            mini_dataset = dict(zip(new_keys, values))
+            for mini_key, mini_value in mini_dataset.iteritems():
+                if not mini_value:
+                    continue
+                if mini_key.startswith('extras__'):
+                    extras.append({"key": mini_key[8:], "value": mini_value})
+                else:
+                    pkg[mini_key] = mini_value
+
+        # pick a fix number of unmapped entries and put into extra
+        if unmapped:
+            unmapped.sort()
+            del unmapped[100:]
+            for key in unmapped:
+                value = dataset_processed.get(key, "")
+                if value is not None: extras.append({"key": key, "value": value})
+
+        # if theme is geospatial/Geospatial, we tag it in metadata_type.
+        themes = self.find_extra(pkg, "theme")
+        if themes and ('geospatial' in [x.lower() for x in themes]):
+            extras.append({'key':'metadata_type', 'value':'geospatial'})
+
+        if is_collection:
+            extras.append({'key':'collection_metadata', 'value':'true'})
+        elif parent_pkg_id:
+            extras.append(
+                {'key':'collection_package_id', 'value':parent_pkg_id}
+            )
+
+        for k, v in catalog_extras.iteritems():
+            extras.append({'key':k, 'value':v})
+
         # Set specific information about the dataset.
-        self.set_dataset_info(pkg, dataset, dataset_defaults)
+        self.set_dataset_info(pkg, dataset_processed, dataset_defaults, schema_version)
     
         # Try to update an existing package with the ID set in harvest_object.guid. If that GUID
         # corresponds with an existing package, get its current metadata.
@@ -209,7 +671,7 @@ def import_stage(self, harvest_object):
                 for existing_res in existing_pkg.get("resources", []):
                     if res["url"] == existing_res["url"]:
                         res["id"] = existing_res["id"]
-            
+            pkg['groups'] = existing_pkg['groups']
             existing_pkg.update(pkg) # preserve other fields that we're not setting, but clobber extras
             pkg = existing_pkg
             
@@ -217,6 +679,7 @@ def import_stage(self, harvest_object):
             pkg = get_action('package_update')(self.context(), pkg)
         else:
             # It doesn't exist yet. Create a new one.
+            pkg['name'] = self.make_package_name(dataset_processed["title"], harvest_object.guid)
             try:
                 pkg = get_action('package_create')(self.context(), pkg)
                 log.warn('created package %s (%s) from %s' % (pkg["name"], pkg["id"], harvest_object.source.url))
@@ -243,9 +706,16 @@ def import_stage(self, harvest_object):
 
         return True
         
-    def make_upstream_content_hash(self, datasetdict, harvest_source):
-        return hashlib.sha1(json.dumps(datasetdict, sort_keys=True)
-        	+ "|" + harvest_source.config + "|" + self.HARVESTER_VERSION).hexdigest()
+    def make_upstream_content_hash(self, datasetdict, harvest_source,
+        catalog_extras, schema_version='1.0'):
+        if schema_version == '1.0':
+            return hashlib.sha1(json.dumps(datasetdict, sort_keys=True)
+                + "|" + harvest_source.config + "|"
+                + self.HARVESTER_VERSION).hexdigest()
+        else:
+            return hashlib.sha1(json.dumps(datasetdict, sort_keys=True)
+                + "|" + json.dumps(catalog_extras,
+                sort_keys=True)).hexdigest()
         
     def find_extra(self, pkg, key):
         for extra in pkg["extras"]:
@@ -253,7 +723,7 @@ def find_extra(self, pkg, key):
                 return extra["value"]
         return None
 
-    def make_package_name(self, title, exclude_existing_package, for_deletion):
+    def make_package_name(self, title, exclude_existing_package):
         '''
         Creates a URL friendly name from a title
 
@@ -261,13 +731,29 @@ def make_package_name(self, title, exclude_existing_package, for_deletion):
         '''
 
         name = munge_title_to_name(title).replace('_', '-')
-        if for_deletion: name = "deleted-" + name
         while '--' in name:
             name = name.replace('--', '-')
         name = name[0:90] # max length is 100
+
+        # Is this slug already in use (and if we're updating a package, is it in
+        # use by a different package?).
         pkg_obj = Session.query(Package).filter(Package.name == name).filter(Package.id != exclude_existing_package).first()
-        if pkg_obj:
-            return name + "-" + str(uuid.uuid4())[:5]
-        else:
+        if not pkg_obj:
+            # The name is available, so use it. Note that if we're updating an
+            # existing package we will be updating this package's URL, so incoming
+            # links may break.
             return name
-            
+
+        if exclude_existing_package:
+            # The name is not available, and we're updating a package. Chances
+            # are the package's name already had some random string attached
+            # to it last time. Prevent spurrious updates to the package's URL
+            # (choosing new random text) by just reusing the existing package's
+            # name.
+            pkg_obj = Session.query(Package).filter(Package.id == exclude_existing_package).first()
+            if pkg_obj: # the package may not exist yet because we may be passed the desired package GUID before a new package is instantiated
+                return pkg_obj.name
+
+        # Append some random text to the URL. Hope that with five character
+        # there will be no collsion.
+        return name + "-" + str(uuid.uuid4())[:5]
diff --git a/ckanext/datajson/harvester_cmsdatanavigator.py b/ckanext/datajson/harvester_cmsdatanavigator.py
index 8ae0bef6..f364683d 100644
--- a/ckanext/datajson/harvester_cmsdatanavigator.py
+++ b/ckanext/datajson/harvester_cmsdatanavigator.py
@@ -7,7 +7,7 @@ class CmsDataNavigatorHarvester(DatasetHarvesterBase):
     A Harvester for the CMS Data Navigator catalog.
     '''
 
-    HARVESTER_VERSION = "0.9aj" # increment to force an update even if nothing has changed
+    HARVESTER_VERSION = "0.9al" # increment to force an update even if nothing has changed
 
     def info(self):
         return {
@@ -27,23 +27,26 @@ def set_dataset_info(self, package, dataset, dataset_defaults):
         extra(package, "Agency", "Department of Health & Human Services")
         package["author"] = "Centers for Medicare & Medicaid Services"
         extra(package, "author_id", "http://healthdata.gov/id/agency/cms")
+        extra(package, "Bureau Code", "009:38")
         package["title"] = dataset["Name"].strip()
         package["notes"] = dataset.get("Description")
         
         package["url"] = dataset.get("Address")
-        extra(package, "Date Released", parsedate(dataset["HealthData"].get("DateReleased")))
-        extra(package, "Date Updated", parsedate(dataset["HealthData"].get("DateUpdated")))
-        extra(package, "Agency Program URL", dataset["HealthData"].get("AgencyProgramURL"))
+
+        dataset_hd = dataset["HealthData"]
+        extra(package, "Date Released", parsedate(dataset_hd.get("DateReleased")))
+        extra(package, "Date Updated", parsedate(dataset_hd.get("DateUpdated")))
+        extra(package, "Agency Program URL", dataset_hd.get("AgencyProgramURL"))
         extra(package, "Subject Area 1", "Medicare")
-        extra(package, "Unit of Analysis", dataset["HealthData"].get("UnitOfAnalysis"))
-        extra(package, "Data Dictionary", dataset["HealthData"].get("DataDictionaryURL"))
-        extra(package, "Coverage Period", dataset["HealthData"].get("Coverage Period"))
-        extra(package, "Collection Frequency", dataset["HealthData"].get("Collection Frequency"))
-        extra(package, "Geographic Scope", dataset["HealthData"].get("GeographicScope"))
-        #extra(package, "Contact Person", dataset["HealthData"].get("ContactName")) # not in HHS schema
-        #extra(package, "Contact Email", dataset["HealthData"].get("ContactEmail")) # not in HHS schema
-        extra(package, "License Agreement", dataset["HealthData"].get("DataLicenseAgreementURL"))
-	
+        extra(package, "Unit of Analysis", dataset_hd.get("UnitOfAnalysis"))
+        extra(package, "Data Dictionary", dataset_hd.get("DataDictionaryURL"))
+        extra(package, "Coverage Period", dataset_hd.get("Coverage Period"))
+        extra(package, "Collection Frequency", dataset_hd.get("Collection Frequency"))
+        extra(package, "Geographic Scope", dataset_hd.get("GeographicScope"))
+        extra(package, "Contact Name", dataset_hd.get("GenericContactName", None) or dataset_hd.get("ContactName")) # 'X or Y' syntax returns Y if X is either None or the empty string
+        extra(package, "Contact Email", dataset_hd.get("GenericContactEmail", None) or dataset_hd.get("ContactEmail"))
+        extra(package, "License Agreement", dataset_hd.get("DataLicenseAgreementURL"))
+        
         from ckan.lib.munge import munge_title_to_name
         package["tags"] = [ { "name": munge_title_to_name(t["Name"]) } for t in dataset.get("Keywords", [])]
         
diff --git a/ckanext/datajson/harvester_datajson.py b/ckanext/datajson/harvester_datajson.py
index c72b3c90..67891c01 100644
--- a/ckanext/datajson/harvester_datajson.py
+++ b/ckanext/datajson/harvester_datajson.py
@@ -1,4 +1,6 @@
 from ckanext.datajson.harvester_base import DatasetHarvesterBase
+from parse_datajson import parse_datajson_entry
+
 
 import urllib2, json
 
@@ -7,7 +9,7 @@ class DataJsonHarvester(DatasetHarvesterBase):
     A Harvester for /data.json files.
     '''
 
-    HARVESTER_VERSION = "0.9aj" # increment to force an update even if nothing has changed
+    HARVESTER_VERSION = "0.9al" # increment to force an update even if nothing has changed
 
     def info(self):
         return {
@@ -17,10 +19,47 @@ def info(self):
         }
 
     def load_remote_catalog(self, harvest_job):
-        return json.load(urllib2.urlopen(harvest_job.source.url))
+        req = urllib2.Request(harvest_job.source.url)
+        # todo: into config and across harvester
+        req.add_header('User-agent', 'Data.gov/2.0')
+        try:
+            datasets = json.load(urllib2.urlopen(req))
+        except UnicodeDecodeError:
+            # try different encode
+            try:
+                datasets = json.load(urllib2.urlopen(req), 'cp1252')
+            except:
+                datasets = json.load(urllib2.urlopen(req), 'iso-8859-1')
+        except:
+            # remove BOM
+            datasets = json.loads(lstrip_bom(urllib2.urlopen(req).read()))
+
+        # The first dataset should be for the data.json file itself. Check that
+        # it is, and if so rewrite the dataset's title because Socrata exports
+        # these items all with the same generic name that is confusing when
+        # harvesting a bunch from different sources. It should have an accessURL
+        # but Socrata fills the URL of these in under webService.
+        if isinstance(datasets, list) and len(datasets) > 0 and (datasets[0].get("accessURL") == harvest_job.source.url
+            or datasets[0].get("webService") == harvest_job.source.url) and \
+            datasets[0].get("title") == "Project Open Data, /data.json file":
+            datasets[0]["title"] = "%s Project Open Data data.json File" % harvest_job.source.title
+
+        catalog_values = None
+        if isinstance(datasets, dict):
+            # this is a catalog, not dataset array as in schema 1.0.
+            catalog_values = datasets.copy()
+            datasets = catalog_values.pop("dataset", [])
+
+        return (datasets, catalog_values)
         
-    def set_dataset_info(self, pkg, dataset, dataset_defaults):
-        from parse_datajson import parse_datajson_entry
-        parse_datajson_entry(dataset, pkg, dataset_defaults)
-    
+    def set_dataset_info(self, pkg, dataset, dataset_defaults, schema_version):
+        parse_datajson_entry(dataset, pkg, dataset_defaults, schema_version)
 
+# helper function to remove BOM
+def lstrip_bom(str_):
+    from codecs import BOM_UTF8
+    bom = BOM_UTF8
+    if str_.startswith(bom):
+        return str_[len(bom):]
+    else:
+        return str_
diff --git a/ckanext/datajson/parse_datajson.py b/ckanext/datajson/parse_datajson.py
index e624096c..493cd6de 100644
--- a/ckanext/datajson/parse_datajson.py
+++ b/ckanext/datajson/parse_datajson.py
@@ -1,81 +1,150 @@
+from ckan.lib.munge import munge_title_to_name
+
 import re
 
-def parse_datajson_entry(datajson, package, defaults):
-	package["title"] = datajson.get("title", defaults.get("Title"))
-	package["notes"] = datajson.get("description", defaults.get("Notes"))
-	package["tags"] = [ { "name": t } for t in
-		datajson.get("keyword", defaults.get("Tags", "")).split(",") if t.strip() != ""]
-	package["groups"] = [ { "name": g } for g in 
-		defaults.get("Groups", [])] # the complexity of permissions makes this useless, CKAN seems to ignore
-	package["organization"] = datajson.get("organization", defaults.get("Organization"))
-	extra(package, "Group Name", defaults.get("Group Name")) # i.e. dataset grouping string
-	extra(package, "Date Updated", datajson.get("modified"))
-	extra(package, "Agency", defaults.get("Agency")) # i.e. federal department
-	package["publisher"] = datajson.get("publisher", defaults.get("Author")) # i.e. agency within HHS
-	extra(package, "author_id", defaults.get("author_id")) # i.e. URI for agency
-	extra(package, "Agency Program URL", defaults.get("Agency Program URL")) # i.e. URL for agency program
-	extra(package, "Contact Person", datajson.get("person")) # not in HHS schema
-	extra(package, "Contact Email", datajson.get("mbox")) # not in HHS schema
-	# "identifier" is handled by the harvester
-	extra(package, "Access Level", datajson.get("accessLevel")) # not in HHS schema
-	extra(package, "Data Dictionary", datajson.get("dataDictionary", defaults.get("Data Dictionary")))
-	# accessURL is redundant with resources
-	# webService is redundant with resources
-	extra(package, "Format", datajson.get("format")) # not in HHS schema
-	extra(package, "License Agreement", datajson.get("license"))
-	#extra(package, "License Agreement Required", ...)
-	extra(package, "Geographic Scope", datajson.get("spatial"))
-	extra(package, "Temporal", datajson.get("temporal")) # HHS uses Coverage Period (FY) Start/End
-	extra(package, "Date Released", datajson.get("issued"))
-	#extra(package, "Collection Frequency", ...)
-	extra(package, "Publish Frequency", datajson.get("accrualPeriodicity")) # not in HHS schema
-	extra(package, "Language", datajson.get("language")) # not in HHS schema
-	extra(package, "Granularity", datajson.get("granularity")) # not in HHS schema
-	extra(package, "Data Quality Met", datajson.get("dataQuality")) # not in HHS schema
-	#extra(package, "Unit of Analysis", ...)
-	#extra(package, "Collection Instrument", ...)
-	extra(package, "Subject Area 1", datajson.get("theme", defaults.get("Subject Area 1")))
-	extra(package, "Subject Area 2", defaults.get("Subject Area 2"))
-	extra(package, "Subject Area 2", defaults.get("Subject Area 3"))
-	extra(package, "Technical Documentation", datajson.get("references"))
-	extra(package, "Size", datajson.get("size")) # not in HHS schema
-	package["url"] = datajson.get("landingPage", datajson.get("webService", datajson.get("accessURL")))
-	extra(package, "Feed", datajson.get("feed")) # not in HHS schema
-	extra(package, "System Of Records", datajson.get("systemOfRecords")) # not in HHS schema
-	package["resources"] = [ ]
-	for d in datajson.get("distribution", []):
-		for k in ("accessURL", "webService"):
-			if d.get(k, "").strip() != "":
-				r = {
-					"url": d[k],
-					"format": normalize_format(d.get("format", "Query Tool" if k == "webService" else "Unknown")),
-				}
-				extra(r, "Language", d.get("language"))
-				extra(r, "Size", d.get("size"))
-				
-				# work-around for Socrata-style formats array
-				try:
-					r["format"] = normalize_format(d["formats"][0]["label"])
-				except:
-					pass
-				
-				r["name"] = r["format"]
-				
-				package["resources"].append(r)
-	
+def parse_datajson_entry(datajson, package, defaults, schema_version):
+  # four fields need extra handling, which are
+  # 1.tag, 2.license, 3.maintainer_email, 4.publisher_hierarchy,
+  # 5.resources
+
+  # 1. package["tags"]
+  package["tags"] = [ { "name": munge_title_to_name(t) } for t in
+    package.get("tags", "") if t.strip() != ""]
+
+  # 2. package["license"]
+  licenses = {
+    'Creative Commons Attribution':'cc-by',
+    'Creative Commons Attribution Share-Alike':'cc-by-sa',
+    'Creative Commons CCZero':'cc-zero',
+    'Creative Commons Non-Commercial (Any)':'cc-nc',
+    'GNU Free Documentation License':'gfdl',
+    'License Not Specified':'notspecified',
+    'Open Data Commons Attribution License':'odc-by',
+    'Open Data Commons Open Database License (ODbL)':'odc-odbl',
+    'Open Data Commons Public Domain Dedication and License (PDDL)':'odc-pddl',
+    'Other (Attribution)':'other-at',
+    'Other (Non-Commercial)':'other-nc',
+    'Other (Not Open)':'other-closed',
+    'Other (Open)':'other-open',
+    'Other (Public Domain)':'other-pd',
+    'UK Open Government Licence (OGL)':'uk-ogl',
+  }
+
+  if not datajson.get("license", ""):
+    package["license_id"] = licenses.get("License Not Specified", "");
+  elif licenses.get(datajson.get("license", ""), ""):
+    package["license_id"] = licenses.get(datajson.get("license", ""), "")
+
+  # 3. package["maintainer_email"]
+  if package.get("maintainer_email"):
+    package["maintainer_email"] = \
+      package.get("maintainer_email").replace("mailto:", "", 1)
+
+  # 4. extras-publisher and extras-publisher_hierarchy
+  if schema_version == '1.1':
+    publisher = find_extra(package, "publisher", {})
+    publisher_name = publisher.get("name", "")
+    set_extra(package, "publisher", publisher_name)
+    parent_publisher = publisher.get("subOrganizationOf", {})
+    publisher_hierarchy = []
+    while parent_publisher:
+      parent_name = parent_publisher.get("name", "")
+      parent_publisher = parent_publisher.get("subOrganizationOf", {})
+      publisher_hierarchy.append(parent_name)
+    if publisher_hierarchy:
+      publisher_hierarchy.reverse()
+      publisher_hierarchy.append(publisher_name)
+      publisher_hierarchy = " > ".join(publisher_hierarchy)
+      set_extra(package, "publisher_hierarchy", publisher_hierarchy)
+
+  # 5. package["resources"]
+  # if distribution is empty, assemble it with root level accessURL and format.
+  # but firstly it can be an ill-formated dict.
+  distribution = datajson.get("distribution", [])
+  if isinstance(distribution, dict): distribution = [distribution]
+  if not isinstance(distribution, list): distribution = []
+
+  downloadurl_key = "downloadURL"
+  acccessurl_key = "accessURL"
+  webservice_key = "webService"
+  if datajson.get("processed_how", []) and "lowercase" in datajson.get("processed_how", []):
+    acccessurl_key = acccessurl_key.lower()
+    webservice_key = webservice_key.lower()
+
+  if not distribution:
+    for url in (acccessurl_key, webservice_key):
+      if datajson.get(url, "") and datajson.get(url, "").strip():
+        d = {
+          url: datajson.get(url, ""),
+          "format": datajson.get("format", ""),
+          "mimetype": datajson.get("format", ""),
+        }
+        distribution.append(d)
+
+  datajson["distribution"] = distribution
+
+  for d in datajson.get("distribution", []):
+    downloadurl_value = d.get(downloadurl_key, "").strip()
+    accessurl_value = d.get(acccessurl_key, "").strip()
+    webservice_value = d.get(webservice_key, "").strip()
+
+    which_value = (accessurl_value or webservice_value) if schema_version == '1.0' else (downloadurl_value or accessurl_value)
+
+    if which_value:
+      r = {}
+      r['url'] = which_value
+      r['format'] = d.get("format", "") if schema_version == '1.0' else d.get("format", d.get("mediaType", ""))
+      r['mimetype'] = d.get("format", "") if schema_version == '1.0' else d.get("mediaType", "")
+      r['description'] = d.get('description', '')
+      r['name'] = d.get('title', '')
+
+      # after schema 1.1+, we have some extra fields for resource
+      resource_extras = ['conformsTo', 'describedBy', 'describedByType']
+      for resource_extra_key in resource_extras:
+        resource_extra_value = d.get(resource_extra_key)
+        if resource_extra_value:
+          r[resource_extra_key] = resource_extra_value
+
+      # after schema 1.1+, include acccessurl if it is left over
+      if downloadurl_value and accessurl_value:
+        r['accessURL'] = accessurl_value
+
+      package["resources"].append(r)
+
 def extra(package, key, value):
-	if not value: return
-	package.setdefault("extras", []).append({ "key": key, "value": value })
-	
-def normalize_format(format):
-	# Format should be a file extension. But sometimes Socrata outputs a MIME type.
-	format = format.lower()
-	m = re.match(r"((application|text)/(\S+))(; charset=.*)?", format)
-	if m:
-		if m.group(1) == "text/plain": return "Text"
-		if m.group(1) == "application/zip": return "ZIP"
-		if m.group(1) == "application/vnd.ms-excel": return "XLS"
-		if m.group(1) == "application/x-msaccess": return "Access"
-		return "Other"
-	if format == "text": return "Text"
-	return format.upper() # hope it's one of our formats by converting to upprecase
+  if not value: return
+  package.setdefault("extras", []).append({ "key": key, "value": value })
+
+def find_extra(pkg, key, default):
+  for extra in pkg["extras"]:
+    if extra["key"] == key:
+      ret = extra["value"]
+      break
+  else:
+    ret = default
+
+  return ret
+
+def set_extra(pkg, key, value):
+  for extra in pkg["extras"]:
+    if extra["key"] == key:
+      extra["value"] = value
+      break
+  else:
+    pkg["extras"].append({"key":key, "value":value})
+
+def normalize_format(format, raise_on_unknown=False):
+  if format is None: return
+  # Format should be a file extension. But sometimes Socrata outputs a MIME type.
+  format = format.lower()
+  m = re.match(r"((application|text)/(\S+))(; charset=.*)?", format)
+  if m:
+    if m.group(1) == "text/plain": return "Text"
+    if m.group(1) == "application/zip": return "ZIP"
+    if m.group(1) == "application/vnd.ms-excel": return "XLS"
+    if m.group(1) == "application/x-msaccess": return "Access"
+    if raise_on_unknown: raise ValueError() # caught & ignored by caller
+    return "Other"
+  if format == "text": return "Text"
+  if raise_on_unknown and "?" in format: raise ValueError() # weird value we should try to filter out; exception is caught & ignored by caller
+  return format.upper() # hope it's one of our formats by converting to upprecase
diff --git a/ckanext/datajson/plugin.py b/ckanext/datajson/plugin.py
index 1fd95b3f..b86a8ebf 100644
--- a/ckanext/datajson/plugin.py
+++ b/ckanext/datajson/plugin.py
@@ -29,6 +29,154 @@
 class DataJsonPlugin(p.SingletonPlugin):
     p.implements(p.interfaces.IConfigurer)
     p.implements(p.interfaces.IRoutes, inherit=True)
+    p.implements(p.interfaces.IFacets)
+
+    # IConfigurer
+
+    def update_config(self, config):
+        # Must use IConfigurer rather than IConfigurable because only IConfigurer
+        # is called before after_map, in which we need the configuration directives
+        # to know how to set the paths.
+        DataJsonPlugin.route_path = config.get("ckanext.datajson.path", "/data.json")
+        DataJsonPlugin.route_ld_path = config.get("ckanext.datajsonld.path",
+                                                  re.sub(r"\.json$", ".jsonld", DataJsonPlugin.route_path))
+        DataJsonPlugin.ld_id = config.get("ckanext.datajsonld.id", config.get("ckan.site_url"))
+        DataJsonPlugin.ld_title = config.get("ckan.site_title", "Catalog")
+        DataJsonPlugin.site_url = config.get("ckan.site_url")
+
+        # Adds our local templates directory. It's smart. It knows it's
+        # relative to the path of *this* file. Wow.
+        p.toolkit.add_template_directory(config, "templates")
+
+    # IRoutes
+
+    def before_map(self, m):
+        return m
+
+    def after_map(self, m):
+        # /data.json and /data.jsonld (or other path as configured by user)
+        m.connect('datajson', DataJsonPlugin.route_path, controller='ckanext.datajson.plugin:DataJsonController',
+                  action='generate_json')
+        m.connect('datajsonld', DataJsonPlugin.route_ld_path, controller='ckanext.datajson.plugin:DataJsonController',
+                  action='generate_jsonld')
+
+        # /pod/validate
+        m.connect('datajsonvalidator', "/pod/validate", controller='ckanext.datajson.plugin:DataJsonController',
+                  action='validator')
+
+        # /pod/data-listing
+        m.connect('datajsonhtml', "/pod/data-catalog", controller='ckanext.datajson.plugin:DataJsonController',
+                  action='show_html_rendition')
+
+        return m
+
+    # IFacets
+
+    def dataset_facets(self, facets, package_type):
+        # Add any facets specified in build_datajson.get_facet_fields() to the top
+        # of the facet list, and then put the CKAN default facets below that.
+        f = OrderedDict()
+        f.update(get_facet_fields())
+        f.update(facets)
+        return f
+
+    def group_facets(self, facets_dict, group_type, package_type):
+        return facets_dict
+
+    def organization_facets(self, facets_dict, organization_type, package_type):
+        return facets_dict
+
+
+class DataJsonController(BaseController):
+    def generate_output(self, format):
+        # set content type (charset required or pylons throws an error)
+        response.content_type = 'application/json; charset=UTF-8'
+
+        # allow caching of response (e.g. by Apache)
+        del response.headers["Cache-Control"]
+        del response.headers["Pragma"]
+
+        # output
+        data = self.make_json()
+
+        if format == 'json-ld':
+            # Convert this to JSON-LD.
+            data = OrderedDict([
+                ("@context", OrderedDict([
+                    ("rdfs", "http://www.w3.org/2000/01/rdf-schema#"),
+                    ("dcterms", "http://purl.org/dc/terms/"),
+                    ("dcat", "http://www.w3.org/ns/dcat#"),
+                    ("foaf", "http://xmlns.com/foaf/0.1/"),
+                    ("pod", "http://project-open-data.github.io/schema/2013-09-20_1.0#"),
+                ])
+                ),
+                ("@id", DataJsonPlugin.ld_id),
+                ("@type", "dcat:Catalog"),
+                ("dcterms:title", DataJsonPlugin.ld_title),
+                ("rdfs:label", DataJsonPlugin.ld_title),
+                ("foaf:homepage", DataJsonPlugin.site_url),
+                ("dcat:dataset", [dataset_to_jsonld(d) for d in data]),
+            ])
+
+        return p.toolkit.literal(json.dumps(data, indent=2))
+
+    def make_json(self):
+        # Build the data.json file.
+        packages = p.toolkit.get_action("current_package_list_with_resources")(None, {})
+        return [make_datajson_entry(pkg) for pkg in packages if pkg["type"] == "dataset"]
+
+    def generate_json(self):
+        return self.generate_output('json')
+
+    def generate_jsonld(self):
+        return self.generate_output('json-ld')
+
+    def validator(self):
+        # Validates that a URL is a good data.json file.
+        if request.method == "POST" and "url" in request.POST and request.POST["url"].strip() != "":
+            c.source_url = request.POST["url"]
+            c.errors = []
+
+            import urllib, json
+            from datajsonvalidator import do_validation
+
+            body = None
+            try:
+                body = json.load(urllib.urlopen(c.source_url))
+            except IOError as e:
+                c.errors.append(("Error Loading File", ["The address could not be loaded: " + unicode(e)]))
+            except ValueError as e:
+                c.errors.append(("Invalid JSON", ["The file does not meet basic JSON syntax requirements: " + unicode(
+                    e) + ". Try using JSONLint.com."]))
+            except Exception as e:
+                c.errors.append((
+                "Internal Error", ["Something bad happened while trying to load and parse the file: " + unicode(e)]))
+
+            if body:
+                try:
+                    do_validation(body, c.source_url, c.errors)
+                except Exception as e:
+                    c.errors.append(("Internal Error", ["Something bad happened: " + unicode(e)]))
+                if len(c.errors) == 0:
+                    c.errors.append(("No Errors", ["Great job!"]))
+
+        return render('datajsonvalidator.html')
+
+    def show_html_rendition(self):
+        # Shows an HTML rendition of the data.json file. Requests the file live
+        # from http://localhost/data.json.
+
+        import urllib, json
+
+        try:
+            c.catalog_data = json.load(urllib.urlopen("http://localhost/data.json"))
+        except:
+            c.catalog_data = []
+
+        c.catalog_data.sort(key=lambda x: x.get("modified"), reverse=True)
+
+        return render('html_rendition.html')
+
 
 
 class JsonExportPlugin(p.SingletonPlugin):
diff --git a/ckanext/datajson/templates/html_rendition.html b/ckanext/datajson/templates/html_rendition.html
new file mode 100644
index 00000000..96ff1ee7
--- /dev/null
+++ b/ckanext/datajson/templates/html_rendition.html
@@ -0,0 +1,43 @@
+{% extends "page.html" %}
+
+{% block subtitle %}Data Catalog (HTML Table Rendition){% endblock %}
+
+{% block breadcrumb_content %}
+{% endblock %}
+
+{% block primary %}
+  <article class="module">
+    <div class="module-content">
+      <h1>Data Catalog</h1>
+      
+      <p>Welcome to the {{g.site_title}}. There are several ways you may view &amp; download the data catalog:</p>
+
+      <ul>
+        <li>Use the <a href='/dataset'>Dataset Search &amp; Browse Page</a> to search datasets or browse and filter by publisher or subject.</li>
+        <li>Download the complete data catalog in our <a href="/data.json">data.json</a> or <a href="/data.jsonld">data.jsonld</a> file, which uses the <a href="http://project-open-data.github.io/schema/">Project Open Data Metadata Schema</a>.</li>
+        <li>Scroll down to see a list of datasets in the catalog with the most recently updated datesets listed first.</li>
+      </ul>
+
+      <hr style="margin: 1.5em 0">
+	
+      <table>
+      {% for item in c.catalog_data %}
+        <tr>
+          <td>
+            <p style="font-weight: bold"><a href="{{item.landingPage}}">{{item.title}}</a></p>
+            <div style="margin-left: 2em">
+              <p>{{item.description}}</p>
+              <p>
+                {% if item.accessURL %}<a href="{{item.accessURL}}">{{item.accessURL}}</a><br>{% endif %}
+                Last Modified: {% if item.modified %}{{item.modified}}{% else %}unknown{% endif %}<br>
+              </p>
+            </div>
+          </td>
+        </tr>
+      {% endfor %}
+      </table>
+    </div>
+  </article>
+{% endblock %}
+
+{% block secondary %}{% endblock %}

From 8a915a4124be1c698b93fcd300f5f6d5b927b997 Mon Sep 17 00:00:00 2001
From: Alex Perfilov <alexandr.perfilov@reisystems.com>
Date: Fri, 13 Mar 2015 13:06:33 -0400
Subject: [PATCH 08/22] Schema updated to latest POD changes / master branch

---
 ckanext/datajson/parse_datajson.py            |   2 +-
 .../pod_schema/federal-v1.1/dataset.json      | 163 +++-
 .../non-federal-v1.1/dataset-non-federal.json |  12 +-
 .../pod_schema/non-federal/single_entry.json  | 844 ++++++++---------
 ckanext/datajson/pod_schema/single_entry.json | 855 +++++++++---------
 5 files changed, 1032 insertions(+), 844 deletions(-)

diff --git a/ckanext/datajson/parse_datajson.py b/ckanext/datajson/parse_datajson.py
index 493cd6de..63c6f5f3 100644
--- a/ckanext/datajson/parse_datajson.py
+++ b/ckanext/datajson/parse_datajson.py
@@ -147,4 +147,4 @@ def normalize_format(format, raise_on_unknown=False):
     return "Other"
   if format == "text": return "Text"
   if raise_on_unknown and "?" in format: raise ValueError() # weird value we should try to filter out; exception is caught & ignored by caller
-  return format.upper() # hope it's one of our formats by converting to upprecase
+  return format.upper() # hope it's one of our formats by converting to upprecase
\ No newline at end of file
diff --git a/ckanext/datajson/pod_schema/federal-v1.1/dataset.json b/ckanext/datajson/pod_schema/federal-v1.1/dataset.json
index b9037fb8..21b09dbe 100644
--- a/ckanext/datajson/pod_schema/federal-v1.1/dataset.json
+++ b/ckanext/datajson/pod_schema/federal-v1.1/dataset.json
@@ -62,6 +62,10 @@
         },
         {
           "type": "null"
+        },
+        {
+          "type": "string",
+          "pattern": "^(\\[\\[REDACTED).*?(\\]\\])$"
         }
       ]
     },
@@ -89,6 +93,10 @@
         },
         {
           "type": "null"
+        },
+        {
+          "type": "string",
+          "pattern": "^(\\[\\[REDACTED).*?(\\]\\])$"
         }
       ]
     },
@@ -102,6 +110,10 @@
         },
         {
           "type": "null"
+        },
+        {
+          "type": "string",
+          "pattern": "^(\\[\\[REDACTED).*?(\\]\\])$"
         }
       ]
     },
@@ -115,6 +127,10 @@
         },
         {
           "type": "null"
+        },
+        {
+          "type": "string",
+          "pattern": "^(\\[\\[REDACTED).*?(\\]\\])$"
         }
       ]
     },
@@ -127,6 +143,10 @@
         },
         {
           "type": "null"
+        },
+        {
+          "type": "string",
+          "pattern": "^(\\[\\[REDACTED).*?(\\]\\])$"
         }
       ]
     },
@@ -143,13 +163,25 @@
         {
           "type": "array",
           "items": {
-            "$ref": "#/definitions/distribution",
-            "minItems": 1,
-            "uniqueItems": true
+            "anyOf": [
+              {
+                "$ref": "#/definitions/distribution",
+                "minItems": 1,
+                "uniqueItems": true
+              },
+              {
+                "type": "string",
+                "pattern": "^(\\[\\[REDACTED).*?(\\]\\])$"
+              }
+            ]
           }
         },
         {
           "type": "null"
+        },
+        {
+          "type": "string",
+          "pattern": "^(\\[\\[REDACTED).*?(\\]\\])$"
         }
       ]
     },
@@ -169,18 +201,30 @@
         },
         {
           "type": "null"
+        },
+        {
+          "type": "string",
+          "pattern": "^(\\[\\[REDACTED).*?(\\]\\])$"
         }
       ]
     },
     "keyword": {
       "title": "Tags",
       "description": "Tags (or keywords) help users discover your dataset; please include terms that would be used by technical and non-technical users.",
-      "type": "array",
-      "items": {
-        "type": "string",
-        "minLength": 1
-      },
-      "minItems": 1
+      "anyOf": [
+        {
+          "type": "array",
+          "items": {
+            "type": "string",
+            "minLength": 1
+          },
+          "minItems": 1
+        },
+        {
+          "type": "string",
+          "pattern": "^(\\[\\[REDACTED).*?(\\]\\])$"
+        }
+      ]
     },
     "landingPage": {
       "title": "Homepage URL",
@@ -192,6 +236,10 @@
         },
         {
           "type": "null"
+        },
+        {
+          "type": "string",
+          "pattern": "^(\\[\\[REDACTED).*?(\\]\\])$"
         }
       ]
     },
@@ -208,6 +256,10 @@
         },
         {
           "type": "null"
+        },
+        {
+          "type": "string",
+          "pattern": "^(\\[\\[REDACTED).*?(\\]\\])$"
         }
       ]
     },
@@ -221,6 +273,10 @@
         },
         {
           "type": "null"
+        },
+        {
+          "type": "string",
+          "pattern": "^(\\[\\[REDACTED).*?(\\]\\])$"
         }
       ]
     },
@@ -239,6 +295,10 @@
         {
           "type": "string",
           "pattern": "^(R\\d*\\/)?([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\4([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\18[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)P(?:\\d+(?:\\.\\d+)?Y)?(?:\\d+(?:\\.\\d+)?M)?(?:\\d+(?:\\.\\d+)?W)?(?:\\d+(?:\\.\\d+)?D)?(?:T(?:\\d+(?:\\.\\d+)?H)?(?:\\d+(?:\\.\\d+)?M)?(?:\\d+(?:\\.\\d+)?S)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^(\\[\\[REDACTED).*?(\\]\\])$"
         }
       ]
     },
@@ -252,6 +312,10 @@
         },
         {
           "type": "null"
+        },
+        {
+          "type": "string",
+          "pattern": "^(\\[\\[REDACTED).*?(\\]\\])$"
         }
       ]
     },
@@ -276,14 +340,26 @@
         {
           "type": "array",
           "items": {
-            "type": "string",
-            "format": "uri"
+            "anyOf": [
+              {
+                "type": "string",
+                "format": "uri"
+              },
+              {
+                "type": "string",
+                "pattern": "^(\\[\\[REDACTED).*?(\\]\\])$"
+              }
+            ]
           },
           "minItems": 1,
           "uniqueItems": true
         },
         {
           "type": "null"
+        },
+        {
+          "type": "string",
+          "pattern": "^(\\[\\[REDACTED).*?(\\]\\])$"
         }
       ]
     },
@@ -331,6 +407,10 @@
         },
         {
           "type": "null"
+        },
+        {
+          "type": "string",
+          "pattern": "^(\\[\\[REDACTED).*?(\\]\\])$"
         }
       ]
     },
@@ -341,6 +421,9 @@
         {
           "type": "string",
           "minLength": 1
+        },
+        {
+          "type": "null"
         }
       ]
     },
@@ -359,13 +442,17 @@
         },
         {
           "type": "null"
+        },
+        {
+          "type": "string",
+          "pattern": "^(\\[\\[REDACTED).*?(\\]\\])$"
         }
       ]
     },
     "title": {
       "title": "Title",
       "description": "Human-readable name of the asset. Should be in plain English and include sufficient detail to facilitate search and discovery.",
-      "type": "string", 
+      "type": "string",
       "minLength": 1
     }
   },
@@ -387,18 +474,26 @@
           "enum": [
             "vcard:Contact"
           ]
-        },    
+        },
         "fn": {
           "title": "Contact Name",
           "description": "A full formatted name, eg Firstname Lastname",
           "type": "string",
-          "minLength": 1      
+          "minLength": 1
         },
         "hasEmail": {
           "title": "Email",
           "description": "Email address for the contact",
-          "pattern": "^mailto:[\\w.-]+@[\\w.-]+\\.[\\w.-]+?$",
-          "type": "string"
+          "anyOf": [
+            {
+              "pattern": "^mailto:[\\w\\_\\~\\!\\$\\&\\'\\(\\)\\*\\+\\,\\;\\=\\:.-]+@[\\w.-]+\\.[\\w.-]+?$",
+              "type": "string"
+            },
+            {
+              "type": "string",
+              "pattern": "^(\\[\\[REDACTED).*?(\\]\\])$"
+            }
+          ]
         }
       }
     },
@@ -432,8 +527,16 @@
         "downloadURL": {
           "title": "Download URL",
           "description": "URL providing direct access to a downloadable file of a dataset",
-          "type": "string",
-          "format": "uri"
+          "anyOf": [
+            {
+              "type": "string",
+              "format": "uri"
+            },
+            {
+              "type": "string",
+              "pattern": "^(\\[\\[REDACTED).*?(\\]\\])$"
+            }
+          ]
         },
         "mediaType": {
           "title": "Media Type",
@@ -445,8 +548,12 @@
             },
             {
               "type": "null"
+            },
+            {
+              "type": "string",
+              "pattern": "^(\\[\\[REDACTED).*?(\\]\\])$"
             }
-          ]      
+          ]
         },
         "format": {
           "title": "Format",
@@ -471,6 +578,10 @@
             },
             {
               "type": "null"
+            },
+            {
+              "type": "string",
+              "pattern": "^(\\[\\[REDACTED).*?(\\]\\])$"
             }
           ]
         },
@@ -510,6 +621,10 @@
             },
             {
               "type": "null"
+            },
+            {
+              "type": "string",
+              "pattern": "^(\\[\\[REDACTED).*?(\\]\\])$"
             }
           ]
         },
@@ -523,6 +638,10 @@
             },
             {
               "type": "null"
+            },
+            {
+              "type": "string",
+              "pattern": "^(\\[\\[REDACTED).*?(\\]\\])$"
             }
           ]
         },
@@ -536,6 +655,10 @@
             },
             {
               "type": "null"
+            },
+            {
+              "type": "string",
+              "pattern": "^(\\[\\[REDACTED).*?(\\]\\])$"
             }
           ]
         }
@@ -562,7 +685,7 @@
           "title": "Publisher Name",
           "description": "A full formatted name, eg Firstname Lastname",
           "type": "string",
-          "minLength": 1 
+          "minLength": 1
         },
         "subOrganizationOf": {
           "title": "Parent Organization",
diff --git a/ckanext/datajson/pod_schema/non-federal-v1.1/dataset-non-federal.json b/ckanext/datajson/pod_schema/non-federal-v1.1/dataset-non-federal.json
index 3495512b..b131a63b 100644
--- a/ckanext/datajson/pod_schema/non-federal-v1.1/dataset-non-federal.json
+++ b/ckanext/datajson/pod_schema/non-federal-v1.1/dataset-non-federal.json
@@ -361,7 +361,7 @@
     "title": {
       "title": "Title",
       "description": "Human-readable name of the asset. Should be in plain English and include sufficient detail to facilitate search and discovery.",
-      "type": "string", 
+      "type": "string",
       "minLength": 1
     }
   },
@@ -382,17 +382,17 @@
           "enum": [
             "vcard:Contact"
           ]
-        },    
+        },
         "fn": {
           "title": "Contact Name",
           "description": "A full formatted name, eg Firstname Lastname",
           "type": "string",
-          "minLength": 1      
+          "minLength": 1
         },
         "hasEmail": {
           "title": "Email",
           "description": "Email address for the contact",
-          "pattern": "^mailto:[\\w.-]+@[\\w.-]+\\.[\\w.-]+?$",
+          "pattern": "^mailto:[\\w\\_\\~\\!\\$\\&\\'\\(\\)\\*\\+\\,\\;\\=\\:.-]+@[\\w.-]+\\.[\\w.-]+?$",
           "type": "string"
         }
       }
@@ -441,7 +441,7 @@
             {
               "type": "null"
             }
-          ]      
+          ]
         },
         "format": {
           "title": "Format",
@@ -557,7 +557,7 @@
           "title": "Publisher Name",
           "description": "A full formatted name, eg Firstname Lastname",
           "type": "string",
-          "minLength": 1 
+          "minLength": 1
         },
         "subOrganizationOf": {
           "title": "Parent Organization",
diff --git a/ckanext/datajson/pod_schema/non-federal/single_entry.json b/ckanext/datajson/pod_schema/non-federal/single_entry.json
index 4ab4b311..ddc53fd4 100644
--- a/ckanext/datajson/pod_schema/non-federal/single_entry.json
+++ b/ckanext/datajson/pod_schema/non-federal/single_entry.json
@@ -1,415 +1,445 @@
 {
-    "$schema": "http://json-schema.org/draft-04/schema#",
-    "id": "http://project-open-data.github.io/schema/1_0_final/single_entry.json#",
-    "title": "Common Core Metadata Schema",
-    "description": "The metadata format for all federal open data. Validates a single JSON object entry (as opposed to entire Data.json catalog).",
-    "type": "object",
-    "required": ["title", "description", "license", "publisher", "contactPoint", "identifier", "accessLevel"],
-    "properties": {
-        "accessLevel": {
-            "description":"The degree to which this dataset could be made publicly-available, regardless of whether it has been made available. Choices: public (Data asset is or could be made publicly available to all without restrictions), restricted public (Data asset is available under certain use restrictions), or non-public (Data asset is not available to members of the public)",
-            "title": "Public Access Level",
-            "enum": ["public", "restricted public", "non-public"]
-        },
-        "accessLevelComment": {
-            "title":"Access Level Comment",
-            "description":"An explanation for the selected \"accessLevel\" including instructions for how to access a restricted file, if applicable, or explanation for why a \"non-public\" or \"restricted public\" data assetis not \"public,\" if applicable. Text, 255 characters.",
-            "anyOf": [
-                {
-                   "type": "string",
-                   "minLength": 1,
-                   "maxLength":255
-                },
-                {
-                    "type": "null"                    
-                }
-              ]            
-        },       
-        "accrualPeriodicity": {
-            "title":"Frequency",
-            "description":"Frequency with which dataset is published.",
-            "anyOf": [
-                {
-                    "enum": ["Annual", "Bimonthly", "Semiweekly", "Daily", "Biweekly", "Semiannual", "Biennial", "Triennial",
-                        "Three times a week", "Three times a month", "Continuously updated", "Monthly", "Quarterly", "Semimonthly",
-                        "Three times a year", "Weekly", "Completely irregular"]
-                },
-                {
-                    "type": "null"                    
-                }
-              ]            
-        },
-        "bureauCode": {
-            "title":"Bureau Code",
-            "description":"Federal agencies, combined agency and bureau code from <a href=\"http://www.whitehouse.gov/sites/default/files/omb/assets/a11_current_year/app_c.pdf\">OMB Circular A-11, Appendix C</a> in the format of <code>015:010</code>.",
-            "anyOf": [
-                {
-                   "type": "array",
-                   "items": {
-                        "type": "string",
-                        "pattern": "[0-9]{3}:[0-9]{2}"
-                    }, 
-                   "minItems": 1,
-                   "uniqueItems": true
-                },
-                {
-                    "type": "null"                    
-                }
-              ]
-        },
-        "contactPoint": {
-            "title":"Contact Name",
-            "description":"Contact person’s name for the asset.",
-            "type": "string"
-        },
-        "dataDictionary": {
-            "title":"Data Dictionary",
-            "description":"URL to the data dictionary for the dataset or API. Note that documentation other than a data dictionary can be referenced using Related Documents as shown in the expanded fields.",
-            "anyOf": [
-                {
-                   "type": "string",
-                   "format": "uri"
-                },
-                {
-                    "type": "null"                    
-                }
-              ] 
-        },
-        "dataQuality": {
-            "title":"Data Quality",
-            "description":"Whether the dataset meets the agency’s Information Quality Guidelines (true/false).",
-            "anyOf": [
-                {
-                   "type": "boolean"
-                },
-                {
-                    "type": "null"                    
-                }
-              ]             
-        },
-        "description": {
-            "title" : "Description",
-            "description": "Human-readable description (e.g., an abstract) with sufficient detail to enable a user to quickly understand whether the asset is of interest.",
-            "type": "string"
-        },
-        "distribution": {
-            "title":"Distribution",
-            "description":"Holds multiple download URLs for datasets composed of multiple files and/or file types",
-            "anyOf": [
-                {
-                    "type": "array",                    
-                    "items": {
-                        "type": "object",
-                        "required": ["accessURL", "format"],
-                        "properties": {
-                            "accessURL": {
-                                "title":"Download URL",
-                                "description":"URL providing direct access to the downloadable distribution of a dataset.",
-                                "type": "string",
-                                "format": "uri"
-                            },
-                            "format": {
-                                "title":"Format",
-                                "description":"The file format or API type of the distribution.",
-                                "pattern": "^[-\\w]+/[-\\w]+(\\.[-\\w]+)*([+][-\\w]+)?$",
-                                "type": "string"
-                            }
-                        }
-                    },
-                    "minItems": 1,
-                    "uniqueItems": true
-                },
-                {
-                    "type": "null"                    
-                }
-              ]       
-        },         
-        "identifier": {
-            "title":"Unique Identifier",
-            "description":"A unique identifier for the dataset or API as maintained within an Agency catalog or database.",
+  "$schema": "http://json-schema.org/draft-04/schema#",
+  "id": "http://project-open-data.github.io/schema/1_0_final/single_entry.json#",
+  "title": "Common Core Metadata Schema",
+  "description": "The metadata format for all federal open data. Validates a single JSON object entry (as opposed to entire Data.json catalog).",
+  "type": "object",
+  "required": [
+    "title",
+    "description",
+    "license",
+    "publisher",
+    "contactPoint",
+    "identifier",
+    "accessLevel"
+  ],
+  "properties": {
+    "accessLevel": {
+      "description": "The degree to which this dataset could be made publicly-available, regardless of whether it has been made available. Choices: public (Data asset is or could be made publicly available to all without restrictions), restricted public (Data asset is available under certain use restrictions), or non-public (Data asset is not available to members of the public)",
+      "title": "Public Access Level",
+      "enum": [
+        "public",
+        "restricted public",
+        "non-public"
+      ]
+    },
+    "accessLevelComment": {
+      "title": "Access Level Comment",
+      "description": "An explanation for the selected \"accessLevel\" including instructions for how to access a restricted file, if applicable, or explanation for why a \"non-public\" or \"restricted public\" data assetis not \"public,\" if applicable. Text, 255 characters.",
+      "anyOf": [
+        {
+          "type": "string",
+          "minLength": 1,
+          "maxLength": 255
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "accrualPeriodicity": {
+      "title": "Frequency",
+      "description": "Frequency with which dataset is published.",
+      "anyOf": [
+        {
+          "enum": [
+            "Annual",
+            "Bimonthly",
+            "Semiweekly",
+            "Daily",
+            "Biweekly",
+            "Semiannual",
+            "Biennial",
+            "Triennial",
+            "Three times a week",
+            "Three times a month",
+            "Continuously updated",
+            "Monthly",
+            "Quarterly",
+            "Semimonthly",
+            "Three times a year",
+            "Weekly",
+            "Completely irregular"
+          ]
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "bureauCode": {
+      "title": "Bureau Code",
+      "description": "Federal agencies, combined agency and bureau code from <a href=\"http://www.whitehouse.gov/sites/default/files/omb/assets/a11_current_year/app_c.pdf\">OMB Circular A-11, Appendix C</a> in the format of <code>015:010</code>.",
+      "anyOf": [
+        {
+          "type": "array",
+          "items": {
+            "type": "string",
+            "pattern": "[0-9]{3}:[0-9]{2}"
+          },
+          "minItems": 1,
+          "uniqueItems": true
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "contactPoint": {
+      "title": "Contact Name",
+      "description": "Contact person’s name for the asset.",
+      "type": "string"
+    },
+    "dataDictionary": {
+      "title": "Data Dictionary",
+      "description": "URL to the data dictionary for the dataset or API. Note that documentation other than a data dictionary can be referenced using Related Documents as shown in the expanded fields.",
+      "anyOf": [
+        {
+          "type": "string",
+          "format": "uri"
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "dataQuality": {
+      "title": "Data Quality",
+      "description": "Whether the dataset meets the agency’s Information Quality Guidelines (true/false).",
+      "anyOf": [
+        {
+          "type": "boolean"
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "description": {
+      "title": "Description",
+      "description": "Human-readable description (e.g., an abstract) with sufficient detail to enable a user to quickly understand whether the asset is of interest.",
+      "type": "string"
+    },
+    "distribution": {
+      "title": "Distribution",
+      "description": "Holds multiple download URLs for datasets composed of multiple files and/or file types",
+      "anyOf": [
+        {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "required": [
+              "accessURL",
+              "format"
+            ],
+            "properties": {
+              "accessURL": {
+                "title": "Download URL",
+                "description": "URL providing direct access to the downloadable distribution of a dataset.",
+                "type": "string",
+                "format": "uri"
+              },
+              "format": {
+                "title": "Format",
+                "description": "The file format or API type of the distribution.",
+                "pattern": "^[-\\w]+/[-\\w]+(\\.[-\\w]+)*([+][-\\w]+)?$",
+                "type": "string"
+              }
+            }
+          },
+          "minItems": 1,
+          "uniqueItems": true
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "identifier": {
+      "title": "Unique Identifier",
+      "description": "A unique identifier for the dataset or API as maintained within an Agency catalog or database.",
+      "type": "string",
+      "pattern": "[\\w]+"
+    },
+    "issued": {
+      "title": "Release Date",
+      "description": "Date of formal issuance.",
+      "anyOf": [
+        {
+          "type": "string",
+          "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^R\\d*\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?\\/P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "keyword": {
+      "title": "Tags",
+      "description": "Tags (or keywords) help users discover your dataset; please include terms that would be used by technical and non-technical users.",
+      "anyOf": [
+        {
+          "type": "array",
+          "items": {
+            "type": "string",
+            "minLength": 1
+          },
+          "minItems": 1
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "landingPage": {
+      "title": "Homepage URL",
+      "description": "Alternative landing page used to redirect user to a contextual, Agency-hosted “homepage” for the Dataset or API when selecting this resource from the Data.gov user interface.",
+      "anyOf": [
+        {
+          "type": "string",
+          "format": "uri"
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "language": {
+      "title": "Language",
+      "description": "The language of the dataset.",
+      "anyOf": [
+        {
+          "type": "array",
+          "items": {
+            "type": "string",
+            "pattern": "^(((([A-Za-z]{2,3}(-([A-Za-z]{3}(-[A-Za-z]{3}){0,2}))?)|[A-Za-z]{4}|[A-Za-z]{5,8})(-([A-Za-z]{4}))?(-([A-Za-z]{2}|[0-9]{3}))?(-([A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(-([0-9A-WY-Za-wy-z](-[A-Za-z0-9]{2,8})+))*(-(x(-[A-Za-z0-9]{1,8})+))?)|(x(-[A-Za-z0-9]{1,8})+)|((en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)|(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)))$"
+          }
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "license": {
+      "title": "License",
+      "description": "The license dataset or API is published with. See <a href=\"http://project-open-data.github.io/open-licenses/\">Open Licenses</a> for more information.",
+      "type": "string",
+      "minLength": 1
+    },
+    "mbox": {
+      "title": "Contact Email",
+      "description": "Contact person’s email address.",
+      "anyOf": [
+        {
+          "type": "string",
+          "format": "email"
+        },
+        {
+          "type": "null"
+        },
+        {
+          "type": "string"
+        }
+      ]
+    },
+    "modified": {
+      "title": "Last Update",
+      "description": "Most recent date on which the dataset was changed, updated or modified.",
+      "anyOf": [
+        {
+          "type": "string",
+          "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^R\\d*\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?\\/P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+        }
+      ]
+    },
+    "PrimaryITInvestmentUII": {
+      "title": "Primary IT Investment UII",
+      "description": "For linking a dataset with an IT Unique Investment Identifier (UII)",
+      "anyOf": [
+        {
+          "type": "string",
+          "pattern": "[0-9]{3}-[0-9]{9}"
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "programCode": {
+      "title": "Program Code",
+      "description": "Federal agencies, list the primary program related to this data asset, from the <a href=\"http://goals.performance.gov/sites/default/files/images/FederalProgramInventory_FY13_MachineReadable_091613.xls\">Federal Program Inventory</a>. Use the format of <code>015:001</code>",
+      "anyOf": [
+        {
+          "type": "array",
+          "items": {
             "type": "string",
-            "pattern": "[\\w]+"
-        },
-        "issued": {
-            "title":"Release Date",
-            "description":"Date of formal issuance.",
-            "anyOf": [
-                {
-                   "type": "string",
-                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^R\\d*\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?\\/P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
-                },
-                {
-                    "type": "null"                    
-                }
-              ]            
-        },
-        "keyword": {
-            "title": "Tags",
-            "description": "Tags (or keywords) help users discover your dataset; please include terms that would be used by technical and non-technical users.",
-            "anyOf": [
-                {
-                   "type": "array",
-                   "items": {
-                       "type": "string", 
-                       "minLength": 1
-                   },
-                   "minItems": 1
-                },
-                {
-                    "type": "null"                    
-                }
-              ]             
-            
-        },
-        "landingPage": {
-            "title":"Homepage URL",
-            "description":"Alternative landing page used to redirect user to a contextual, Agency-hosted “homepage” for the Dataset or API when selecting this resource from the Data.gov user interface.",
-            "anyOf": [
-                {
-                   "type": "string",
-                   "format": "uri"
-                },
-                {
-                    "type": "null"                    
-                }
-              ]            
-        },
-        "language": {
-            "title":"Language",
-            "description":"The language of the dataset.",
-            "anyOf": [
-                {
-                   "type": "array",
-                   "items": {
-                       "type": "string",
-                       "pattern": "^(((([A-Za-z]{2,3}(-([A-Za-z]{3}(-[A-Za-z]{3}){0,2}))?)|[A-Za-z]{4}|[A-Za-z]{5,8})(-([A-Za-z]{4}))?(-([A-Za-z]{2}|[0-9]{3}))?(-([A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(-([0-9A-WY-Za-wy-z](-[A-Za-z0-9]{2,8})+))*(-(x(-[A-Za-z0-9]{1,8})+))?)|(x(-[A-Za-z0-9]{1,8})+)|((en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)|(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)))$"
-                   }
-                },
-                {
-                    "type": "null"                    
-                }
-              ]           
-        },
-        "license": {
-            "title":"License",
-            "description":"The license dataset or API is published with. See <a href=\"http://project-open-data.github.io/open-licenses/\">Open Licenses</a> for more information.",
+            "pattern": "[0-9]{3}:[0-9]{3}"
+          },
+          "minItems": 1,
+          "uniqueItems": true
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "publisher": {
+      "title": "Publisher",
+      "description": "The publishing entity.",
+      "type": "string"
+    },
+    "references": {
+      "title": "Related Documents",
+      "description": "Related documents such as technical information about a dataset, developer documentation, etc.",
+      "anyOf": [
+        {
+          "type": "array",
+          "items": {
+            "type": "string",
+            "format": "uri"
+          },
+          "minItems": 1,
+          "uniqueItems": true
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "spatial": {
+      "title": "Spatial",
+      "description": "The range of spatial applicability of a dataset. Could include a spatial region like a bounding box or a named place.",
+      "anyOf": [
+        {
+          "type": "string",
+          "minLength": 1
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "systemOfRecords": {
+      "title": "System of Records",
+      "description": "If the systems is designated as a system of records under the Privacy Act of 1974, provide the URL to the System of Records Notice related to this dataset.",
+      "anyOf": [
+        {
+          "type": "string",
+          "minLength": 1
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "temporal": {
+      "title": "Temporal",
+      "description": "The range of temporal applicability of a dataset (i.e., a start and end date of applicability for the data).",
+      "anyOf": [
+        {
+          "type": "string",
+          "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^R\\d*\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?\\/P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "theme": {
+      "title": "Category",
+      "description": "Main thematic category of the dataset.",
+      "anyOf": [
+        {
+          "type": "array",
+          "items": {
             "type": "string",
             "minLength": 1
+          },
+          "minItems": 1,
+          "uniqueItems": true
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "title": {
+      "title": "Title",
+      "description": "Human-readable name of the asset. Should be in plain English and include sufficient detail to facilitate search and discovery.",
+      "type": "string"
+    },
+    "webService": {
+      "title": "Endpoint",
+      "description": "Endpoint of web service to access dataset.",
+      "anyOf": [
+        {
+          "type": "string",
+          "format": "uri"
         },
-        "mbox": {
-            "title":"Contact Email",
-            "description":"Contact person’s email address.",
-            "anyOf": [
-                {
-                   "type": "string",
-                   "format": "email"
-                },
-                {
-                    "type": "null"                    
-                },
-                {
-                   "type": "string"
-                }
-              ]            
-        },
-        "modified": {
-            "title": "Last Update",
-            "description": "Most recent date on which the dataset was changed, updated or modified.",
-            "anyOf": [
-                {
-                   "type": "string",
-                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^R\\d*\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?\\/P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
-                }
-              ]
-        },
-        "PrimaryITInvestmentUII": {
-            "title":"Primary IT Investment UII",
-            "description":"For linking a dataset with an IT Unique Investment Identifier (UII)",
-            "anyOf": [
-                {
-                   "type": "string",
-                   "pattern": "[0-9]{3}-[0-9]{9}"
-                },
-                {
-                    "type": "null"                    
-                }
-              ]            
-        },
-        "programCode": {
-            "title":"Program Code",
-            "description":"Federal agencies, list the primary program related to this data asset, from the <a href=\"http://goals.performance.gov/sites/default/files/images/FederalProgramInventory_FY13_MachineReadable_091613.xls\">Federal Program Inventory</a>. Use the format of <code>015:001</code>",
-            "anyOf": [
-                {
-                   "type": "array",
-                   "items": {
-                       "type": "string",
-                       "pattern": "[0-9]{3}:[0-9]{3}"
-                   },
-                   "minItems": 1,
-                   "uniqueItems": true
-                },
-                {
-                    "type": "null"                    
-                }
-              ]            
-        },
-        "publisher": {
-            "title":"Publisher",
-            "description": "The publishing entity.",
-            "type": "string"
-        },
-        "references": {
-            "title":"Related Documents",
-            "description":"Related documents such as technical information about a dataset, developer documentation, etc.",
-            "anyOf": [
-                {
-                   "type": "array",
-                   "items": {
-                       "type": "string",
-                       "format": "uri"
-                   },
-                   "minItems": 1,
-                   "uniqueItems": true
-                },
-                {
-                    "type": "null"                    
-                }
-              ]             
-        },
-        "spatial": {
-            "title":"Spatial",
-            "description":"The range of spatial applicability of a dataset. Could include a spatial region like a bounding box or a named place.",
-            "anyOf": [
-                {
-                   "type": "string",
-                   "minLength": 1
-                },
-                {
-                    "type": "null"                    
-                }
-              ]
-        },
-        "systemOfRecords": {
-            "title":"System of Records",
-            "description":"If the systems is designated as a system of records under the Privacy Act of 1974, provide the URL to the System of Records Notice related to this dataset.",
-            "anyOf": [
-                {
-                   "type": "string",
-                   "minLength": 1
-                },
-                {
-                    "type": "null"                    
-                }
-              ]            
-        },
-        "temporal": {
-            "title":"Temporal",
-            "description":"The range of temporal applicability of a dataset (i.e., a start and end date of applicability for the data).",
-            "anyOf": [
-                {
-                   "type": "string",
-                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^R\\d*\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?\\/P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
-                },
-                {
-                    "type": "null"                    
-                }
-              ]            
-        },
-        "theme": {
-            "title":"Category",
-            "description":"Main thematic category of the dataset.",
-            "anyOf": [
-                {
-                   "type": "array",
-                   "items": {
-                       "type": "string",
-                       "minLength": 1
-                   },
-                   "minItems": 1,
-                   "uniqueItems": true
-                },
-                {
-                    "type": "null"                    
-                }
-              ]            
-        },
-        "title": {
-            "title": "Title",
-            "description": "Human-readable name of the asset. Should be in plain English and include sufficient detail to facilitate search and discovery.",
-            "type": "string"
-        },
-        "webService": {
-            "title":"Endpoint",
-            "description":"Endpoint of web service to access dataset.",
-            "anyOf": [
-                {
-                   "type": "string",
-                   "format": "uri"
-                },
-                {
-                    "type": "null"                    
-                }
-              ]
+        {
+          "type": "null"
         }
+      ]
     }
+  }
 } 
diff --git a/ckanext/datajson/pod_schema/single_entry.json b/ckanext/datajson/pod_schema/single_entry.json
index 52dcda77..825203ad 100644
--- a/ckanext/datajson/pod_schema/single_entry.json
+++ b/ckanext/datajson/pod_schema/single_entry.json
@@ -1,416 +1,451 @@
 {
-    "$schema": "http://json-schema.org/draft-04/schema#",
-    "id": "http://project-open-data.github.io/schema/1_0_final/single_entry.json#",
-    "title": "Common Core Metadata Schema",
-    "description": "The metadata format for all federal open data. Validates a single JSON object entry (as opposed to entire Data.json catalog).",
-    "type": "object",
-    "required": ["bureaucode", "programcode", "title", "description", "keyword", "modified", "publisher", "contactpoint", "mbox", "identifier", "accesslevel"],
-    "properties": {
-        "accesslevel": {
-            "description":"The degree to which this dataset could be made publicly-available, regardless of whether it has been made available. Choices: public (Data asset is or could be made publicly available to all without restrictions), restricted public (Data asset is available under certain use restrictions), or non-public (Data asset is not available to members of the public)",
-            "title": "Public Access Level",
-            "enum": ["public", "restricted public", "non-public"]
-        },
-        "accesslevelcomment": {
-            "title":"Access Level Comment",
-            "description":"An explanation for the selected \"accesslevel\" including instructions for how to access a restricted file, if applicable, or explanation for why a \"non-public\" or \"restricted public\" data assetis not \"public,\" if applicable. Text, 255 characters.",
-            "anyOf": [
-                {
-                   "type": "string",
-                   "minLength": 1,
-                   "maxLength":255
-                },
-                {
-                    "type": "null"                    
-                }
-              ]            
-        },
-        "accessurl": {
-            "title":"Download URL",
-            "description":"URL providing direct access to the downloadable distribution of a dataset.",
-            "anyOf": [
-                {
-                    "type": "string",
-                    "format": "uri"
-                },
-                {
-                    "type": "null"                    
-                }
-              ]              
-        },       
-        "accrualperiodicity": {
-            "title":"Frequency",
-            "description":"Frequency with which dataset is published.",
-            "anyOf": [
-                {
-                    "enum": ["Annual", "Bimonthly", "Semiweekly", "Daily", "Biweekly", "Semiannual", "Biennial", "Triennial",
-                        "Three times a week", "Three times a month", "Continuously updated", "Monthly", "Quarterly", "Semimonthly",
-                        "Three times a year", "Weekly", "Completely irregular"]
-                },
-                {
-                    "type": "null"                    
-                }
-              ]            
-        },
-        "bureaucode": {
-            "title":"Bureau Code",
-            "description":"Federal agencies, combined agency and bureau code from <a href=\"http://www.whitehouse.gov/sites/default/files/omb/assets/a11_current_year/app_c.pdf\">OMB Circular A-11, Appendix C</a> in the format of <code>015:010</code>.",
-            "type": "array",
-            "items": {
-                 "type": "string",
-                 "pattern": "[0-9]{3}:[0-9]{2}"
-             }, 
-            "minItems": 1,
-            "uniqueItems": true
-        },
-        "contactpoint": {
-            "title":"Contact Name",
-            "description":"Contact person’s name for the asset.",
-            "type": "string"
-        },
-        "datadictionary": {
-            "title":"Data Dictionary",
-            "description":"URL to the data dictionary for the dataset or API. Note that documentation other than a data dictionary can be referenced using Related Documents as shown in the expanded fields.",
-            "anyOf": [
-                {
-                   "type": "string",
-                   "format": "uri"
-                },
-                {
-                    "type": "null"                    
-                }
-              ] 
-        },
-        "dataquality": {
-            "title":"Data Quality",
-            "description":"Whether the dataset meets the agency’s Information Quality Guidelines (true/false).",
-            "anyOf": [
-                {
-                   "type": "boolean"
-                },
-                {
-                    "type": "null"                    
-                }
-              ]             
-        },
-        "description": {
-            "title" : "Description",
-            "description": "Human-readable description (e.g., an abstract) with sufficient detail to enable a user to quickly understand whether the asset is of interest.",
-            "type": "string"
-        },
-        "distribution": {
-            "title":"Distribution",
-            "description":"Holds multiple download URLs for datasets composed of multiple files and/or file types",
-            "anyOf": [
-                {
-                    "type": "array",                    
-                    "items": {
-                        "type": "object",
-                        "required": ["accessurl", "format"],
-                        "properties": {
-                            "accessurl": {
-                                "title":"Download URL",
-                                "description":"URL providing direct access to the downloadable distribution of a dataset.",
-                                "type": "string",
-                                "format": "uri"
-                            },
-                            "format": {
-                                "title":"Format",
-                                "description":"The file format or API type of the distribution.",
-                                "pattern": "^[-\\w]+/[-\\w]+(\\.[-\\w]+)*([+][-\\w]+)?$",
-                                "type": "string"
-                            }
-                        }
-                    },
-                    "minItems": 1,
-                    "uniqueItems": true
-                },
-                {
-                    "type": "null"                    
-                }
-              ]       
-        },
-        "format": {
-            "title":"Format",
-            "description":"The file format or API type of the distribution.",            
-            "anyOf": [
-                {
-                   "type": "string",
-                   "pattern": "^[-\\w]+/[-\\w]+(\\.[-\\w]+)*([+][-\\w]+)?$"
-                },
-                {
-                    "type": "null"                    
-                }
-              ]            
-        },         
-        "identifier": {
-            "title":"Unique Identifier",
-            "description":"A unique identifier for the dataset or API as maintained within an Agency catalog or database.",
+  "$schema": "http://json-schema.org/draft-04/schema#",
+  "id": "http://project-open-data.github.io/schema/1_0_final/single_entry.json#",
+  "title": "Common Core Metadata Schema",
+  "description": "The metadata format for all federal open data. Validates a single JSON object entry (as opposed to entire Data.json catalog).",
+  "type": "object",
+  "required": [
+    "bureaucode",
+    "programcode",
+    "title",
+    "description",
+    "keyword",
+    "modified",
+    "publisher",
+    "contactpoint",
+    "mbox",
+    "identifier",
+    "accesslevel"
+  ],
+  "properties": {
+    "accesslevel": {
+      "description": "The degree to which this dataset could be made publicly-available, regardless of whether it has been made available. Choices: public (Data asset is or could be made publicly available to all without restrictions), restricted public (Data asset is available under certain use restrictions), or non-public (Data asset is not available to members of the public)",
+      "title": "Public Access Level",
+      "enum": [
+        "public",
+        "restricted public",
+        "non-public"
+      ]
+    },
+    "accesslevelcomment": {
+      "title": "Access Level Comment",
+      "description": "An explanation for the selected \"accesslevel\" including instructions for how to access a restricted file, if applicable, or explanation for why a \"non-public\" or \"restricted public\" data assetis not \"public,\" if applicable. Text, 255 characters.",
+      "anyOf": [
+        {
+          "type": "string",
+          "minLength": 1,
+          "maxLength": 255
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "accessurl": {
+      "title": "Download URL",
+      "description": "URL providing direct access to the downloadable distribution of a dataset.",
+      "anyOf": [
+        {
+          "type": "string",
+          "format": "uri"
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "accrualperiodicity": {
+      "title": "Frequency",
+      "description": "Frequency with which dataset is published.",
+      "anyOf": [
+        {
+          "enum": [
+            "Annual",
+            "Bimonthly",
+            "Semiweekly",
+            "Daily",
+            "Biweekly",
+            "Semiannual",
+            "Biennial",
+            "Triennial",
+            "Three times a week",
+            "Three times a month",
+            "Continuously updated",
+            "Monthly",
+            "Quarterly",
+            "Semimonthly",
+            "Three times a year",
+            "Weekly",
+            "Completely irregular"
+          ]
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "bureaucode": {
+      "title": "Bureau Code",
+      "description": "Federal agencies, combined agency and bureau code from <a href=\"http://www.whitehouse.gov/sites/default/files/omb/assets/a11_current_year/app_c.pdf\">OMB Circular A-11, Appendix C</a> in the format of <code>015:010</code>.",
+      "type": "array",
+      "items": {
+        "type": "string",
+        "pattern": "[0-9]{3}:[0-9]{2}"
+      },
+      "minItems": 1,
+      "uniqueItems": true
+    },
+    "contactpoint": {
+      "title": "Contact Name",
+      "description": "Contact person’s name for the asset.",
+      "type": "string"
+    },
+    "datadictionary": {
+      "title": "Data Dictionary",
+      "description": "URL to the data dictionary for the dataset or API. Note that documentation other than a data dictionary can be referenced using Related Documents as shown in the expanded fields.",
+      "anyOf": [
+        {
+          "type": "string",
+          "format": "uri"
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "dataquality": {
+      "title": "Data Quality",
+      "description": "Whether the dataset meets the agency’s Information Quality Guidelines (true/false).",
+      "anyOf": [
+        {
+          "type": "boolean"
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "description": {
+      "title": "Description",
+      "description": "Human-readable description (e.g., an abstract) with sufficient detail to enable a user to quickly understand whether the asset is of interest.",
+      "type": "string"
+    },
+    "distribution": {
+      "title": "Distribution",
+      "description": "Holds multiple download URLs for datasets composed of multiple files and/or file types",
+      "anyOf": [
+        {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "required": [
+              "accessurl",
+              "format"
+            ],
+            "properties": {
+              "accessurl": {
+                "title": "Download URL",
+                "description": "URL providing direct access to the downloadable distribution of a dataset.",
+                "type": "string",
+                "format": "uri"
+              },
+              "format": {
+                "title": "Format",
+                "description": "The file format or API type of the distribution.",
+                "pattern": "^[-\\w]+/[-\\w]+(\\.[-\\w]+)*([+][-\\w]+)?$",
+                "type": "string"
+              }
+            }
+          },
+          "minItems": 1,
+          "uniqueItems": true
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "format": {
+      "title": "Format",
+      "description": "The file format or API type of the distribution.",
+      "anyOf": [
+        {
+          "type": "string",
+          "pattern": "^[-\\w]+/[-\\w]+(\\.[-\\w]+)*([+][-\\w]+)?$"
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "identifier": {
+      "title": "Unique Identifier",
+      "description": "A unique identifier for the dataset or API as maintained within an Agency catalog or database.",
+      "type": "string",
+      "pattern": "[\\w]+"
+    },
+    "issued": {
+      "title": "Release Date",
+      "description": "Date of formal issuance.",
+      "anyOf": [
+        {
+          "type": "string",
+          "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^R\\d*\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?\\/P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "keyword": {
+      "title": "Tags",
+      "description": "Tags (or keywords) help users discover your dataset; please include terms that would be used by technical and non-technical users.",
+      "type": "array",
+      "items": {
+        "type": "string",
+        "minLength": 1
+      },
+      "minItems": 1
+    },
+    "landingpage": {
+      "title": "Homepage URL",
+      "description": "Alternative landing page used to redirect user to a contextual, Agency-hosted “homepage” for the Dataset or API when selecting this resource from the Data.gov user interface.",
+      "anyOf": [
+        {
+          "type": "string",
+          "format": "uri"
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "language": {
+      "title": "Language",
+      "description": "The language of the dataset.",
+      "anyOf": [
+        {
+          "type": "array",
+          "items": {
+            "type": "string",
+            "pattern": "^(((([A-Za-z]{2,3}(-([A-Za-z]{3}(-[A-Za-z]{3}){0,2}))?)|[A-Za-z]{4}|[A-Za-z]{5,8})(-([A-Za-z]{4}))?(-([A-Za-z]{2}|[0-9]{3}))?(-([A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(-([0-9A-WY-Za-wy-z](-[A-Za-z0-9]{2,8})+))*(-(x(-[A-Za-z0-9]{1,8})+))?)|(x(-[A-Za-z0-9]{1,8})+)|((en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)|(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)))$"
+          }
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "license": {
+      "title": "License",
+      "description": "The license dataset or API is published with. See <a href=\"http://project-open-data.github.io/open-licenses/\">Open Licenses</a> for more information.",
+      "anyOf": [
+        {
+          "type": "string",
+          "minLength": 1
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "mbox": {
+      "title": "Contact Email",
+      "description": "Contact person’s email address.",
+      "type": "string",
+      "format": "email"
+    },
+    "modified": {
+      "title": "Last Update",
+      "description": "Most recent date on which the dataset was changed, updated or modified.",
+      "anyOf": [
+        {
+          "type": "string",
+          "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^R\\d*\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?\\/P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+        }
+      ]
+    },
+    "primaryitinvestmentuii": {
+      "title": "Primary IT Investment UII",
+      "description": "For linking a dataset with an IT Unique Investment Identifier (UII)",
+      "anyOf": [
+        {
+          "type": "string",
+          "pattern": "[0-9]{3}-[0-9]{9}"
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "programcode": {
+      "title": "Program Code",
+      "description": "Federal agencies, list the primary program related to this data asset, from the <a href=\"http://goals.performance.gov/sites/default/files/images/FederalProgramInventory_FY13_MachineReadable_091613.xls\">Federal Program Inventory</a>. Use the format of <code>015:001</code>",
+      "type": "array",
+      "items": {
+        "type": "string",
+        "pattern": "[0-9]{3}:[0-9]{3}"
+      },
+      "minItems": 1,
+      "uniqueItems": true
+    },
+    "publisher": {
+      "title": "Publisher",
+      "description": "The publishing entity.",
+      "type": "string"
+    },
+    "references": {
+      "title": "Related Documents",
+      "description": "Related documents such as technical information about a dataset, developer documentation, etc.",
+      "anyOf": [
+        {
+          "type": "array",
+          "items": {
             "type": "string",
-            "pattern": "[\\w]+"
-        },
-        "issued": {
-            "title":"Release Date",
-            "description":"Date of formal issuance.",
-            "anyOf": [
-                {
-                   "type": "string",
-                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^R\\d*\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?\\/P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
-                },
-                {
-                    "type": "null"                    
-                }
-              ]            
-        },
-        "keyword": {
-            "title": "Tags",
-            "description": "Tags (or keywords) help users discover your dataset; please include terms that would be used by technical and non-technical users.",
-            "type": "array",
-             "items": {
-                 "type": "string", 
-                 "minLength": 1
-             },
-             "minItems": 1
-        },
-        "landingpage": {
-            "title":"Homepage URL",
-            "description":"Alternative landing page used to redirect user to a contextual, Agency-hosted “homepage” for the Dataset or API when selecting this resource from the Data.gov user interface.",
-            "anyOf": [
-                {
-                   "type": "string",
-                   "format": "uri"
-                },
-                {
-                    "type": "null"                    
-                }
-              ]            
-        },
-        "language": {
-            "title":"Language",
-            "description":"The language of the dataset.",
-            "anyOf": [
-                {
-                   "type": "array",
-                   "items": {
-                       "type": "string",
-                       "pattern": "^(((([A-Za-z]{2,3}(-([A-Za-z]{3}(-[A-Za-z]{3}){0,2}))?)|[A-Za-z]{4}|[A-Za-z]{5,8})(-([A-Za-z]{4}))?(-([A-Za-z]{2}|[0-9]{3}))?(-([A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(-([0-9A-WY-Za-wy-z](-[A-Za-z0-9]{2,8})+))*(-(x(-[A-Za-z0-9]{1,8})+))?)|(x(-[A-Za-z0-9]{1,8})+)|((en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)|(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)))$"
-                   }
-                },
-                {
-                    "type": "null"                    
-                }
-              ]           
-        },
-        "license": {
-            "title":"License",
-            "description":"The license dataset or API is published with. See <a href=\"http://project-open-data.github.io/open-licenses/\">Open Licenses</a> for more information.",
-            "anyOf": [
-                {
-                   "type": "string",
-                   "minLength": 1
-                },
-                {
-                    "type": "null"                    
-                }
-              ]
-        },
-        "mbox": {
-            "title":"Contact Email",
-            "description":"Contact person’s email address.",
+            "format": "uri"
+          },
+          "minItems": 1,
+          "uniqueItems": true
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "spatial": {
+      "title": "Spatial",
+      "description": "The range of spatial applicability of a dataset. Could include a spatial region like a bounding box or a named place.",
+      "anyOf": [
+        {
+          "type": "string",
+          "minLength": 1
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "systemofrecords": {
+      "title": "System of Records",
+      "description": "If the systems is designated as a system of records under the Privacy Act of 1974, provide the URL to the System of Records Notice related to this dataset.",
+      "anyOf": [
+        {
+          "type": "string",
+          "minLength": 1
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "temporal": {
+      "title": "Temporal",
+      "description": "The range of temporal applicability of a dataset (i.e., a start and end date of applicability for the data).",
+      "anyOf": [
+        {
+          "type": "string",
+          "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
+        },
+        {
+          "type": "string",
+          "pattern": "^R\\d*\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?\\/P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "theme": {
+      "title": "Category",
+      "description": "Main thematic category of the dataset.",
+      "anyOf": [
+        {
+          "type": "array",
+          "items": {
             "type": "string",
-            "format": "email"
-        },
-        "modified": {
-            "title": "Last Update",
-            "description": "Most recent date on which the dataset was changed, updated or modified.",
-            "anyOf": [
-                {
-                   "type": "string",
-                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^R\\d*\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?\\/P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
-                }
-              ]
-        },
-        "primaryitinvestmentuii": {
-            "title":"Primary IT Investment UII",
-            "description":"For linking a dataset with an IT Unique Investment Identifier (UII)",
-            "anyOf": [
-                {
-                   "type": "string",
-                   "pattern": "[0-9]{3}-[0-9]{9}"
-                },
-                {
-                    "type": "null"                    
-                }
-              ]            
-        },
-        "programcode": {
-            "title":"Program Code",
-            "description":"Federal agencies, list the primary program related to this data asset, from the <a href=\"http://goals.performance.gov/sites/default/files/images/FederalProgramInventory_FY13_MachineReadable_091613.xls\">Federal Program Inventory</a>. Use the format of <code>015:001</code>",
-            "type": "array",
-            "items": {
-                "type": "string",
-                "pattern": "[0-9]{3}:[0-9]{3}"
-            },
-            "minItems": 1,
-            "uniqueItems": true            
-        },
-        "publisher": {
-            "title":"Publisher",
-            "description": "The publishing entity.",
-            "type": "string"
-        },
-        "references": {
-            "title":"Related Documents",
-            "description":"Related documents such as technical information about a dataset, developer documentation, etc.",
-            "anyOf": [
-                {
-                   "type": "array",
-                   "items": {
-                       "type": "string",
-                       "format": "uri"
-                   },
-                   "minItems": 1,
-                   "uniqueItems": true
-                },
-                {
-                    "type": "null"                    
-                }
-              ]             
-        },
-        "spatial": {
-            "title":"Spatial",
-            "description":"The range of spatial applicability of a dataset. Could include a spatial region like a bounding box or a named place.",
-            "anyOf": [
-                {
-                   "type": "string",
-                   "minLength": 1
-                },
-                {
-                    "type": "null"                    
-                }
-              ]
-        },
-        "systemofrecords": {
-            "title":"System of Records",
-            "description":"If the systems is designated as a system of records under the Privacy Act of 1974, provide the URL to the System of Records Notice related to this dataset.",
-            "anyOf": [
-                {
-                   "type": "string",
-                   "minLength": 1
-                },
-                {
-                    "type": "null"                    
-                }
-              ]            
-        },
-        "temporal": {
-            "title":"Temporal",
-            "description":"The range of temporal applicability of a dataset (i.e., a start and end date of applicability for the data).",
-            "anyOf": [
-                {
-                   "type": "string",
-                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?(\\/)P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$"
-                },
-                {
-                   "type": "string",
-                   "pattern": "^R\\d*\\/([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?\\/P(?=\\w*\\d)(?:\\d+Y|Y)?(?:\\d+M|M)?(?:\\d+W|W)?(?:\\d+D|D)?(?:T(?:\\d+H|H)?(?:\\d+M|M)?(?:\\d+(?:\\­.\\d{1,2})?S|S)?)?$"
-                },
-                {
-                    "type": "null"                    
-                }
-              ]            
-        },
-        "theme": {
-            "title":"Category",
-            "description":"Main thematic category of the dataset.",
-            "anyOf": [
-                {
-                   "type": "array",
-                   "items": {
-                       "type": "string",
-                       "minLength": 1
-                   },
-                   "minItems": 1,
-                   "uniqueItems": true
-                },
-                {
-                    "type": "null"                    
-                }
-              ]            
-        },
-        "title": {
-            "title": "Title",
-            "description": "Human-readable name of the asset. Should be in plain English and include sufficient detail to facilitate search and discovery.",
-            "type": "string"
-        },
-        "webservice": {
-            "title":"Endpoint",
-            "description":"Endpoint of web service to access dataset.",
-            "anyOf": [
-                {
-                   "type": "string",
-                   "format": "uri"
-                },
-                {
-                    "type": "null"                    
-                }
-              ]
+            "minLength": 1
+          },
+          "minItems": 1,
+          "uniqueItems": true
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
+    "title": {
+      "title": "Title",
+      "description": "Human-readable name of the asset. Should be in plain English and include sufficient detail to facilitate search and discovery.",
+      "type": "string"
+    },
+    "webservice": {
+      "title": "Endpoint",
+      "description": "Endpoint of web service to access dataset.",
+      "anyOf": [
+        {
+          "type": "string",
+          "format": "uri"
+        },
+        {
+          "type": "null"
         }
+      ]
     }
+  }
 } 

From 6d46c8ec856f64cd69c25a25466183c82f9dc875 Mon Sep 17 00:00:00 2001
From: ykhadilkar <ykhadilkar@reisys.com>
Date: Thu, 19 Mar 2015 17:04:31 -0400
Subject: [PATCH 09/22] Github # 152 - Enable flag to set a dataset as
 unpublished draft

---
 ckanext/datajson/templates/organization/read.html | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/ckanext/datajson/templates/organization/read.html b/ckanext/datajson/templates/organization/read.html
index fbcaca29..1c5d73ba 100644
--- a/ckanext/datajson/templates/organization/read.html
+++ b/ckanext/datajson/templates/organization/read.html
@@ -5,10 +5,12 @@
         {% link_for _('Add Dataset'), controller='package', action='new', group=c.group_dict.id, class_='btn btn-primary', icon='plus-sign-alt' %}
         <button id="btnEdi" class="btn btn-secondary">Enterprise Data Inventory</button>
 		<button id="btnPdl" class="btn btn-secondary">Public Data Listing</button>
+		<button id="btnDraft" class="btn btn-secondary">Export Drafts</button>
     {% endif %}
 
         <form id="formEdi" action="{{c.group_dict.id + '/edi.json' }}"></form>
         <form id="formPdl" action="{{c.group_dict.id + '/data.json' }}"></form>
+        <form id="formDraft" action="{{c.group_dict.id + '/draft.json' }}"></form>
 
         <script type="text/javascript" src="/base/vendor/jquery.min.js"></script>
         <script type="text/javascript">
@@ -20,6 +22,10 @@
                 $('#btnPdl').click(function () {
                     $('#formPdl').submit();
                 });
+
+                $('#btnDraft').click(function () {
+                    $('#formDraft').submit();
+                });
             });
         </script>
 {% endblock %}

From 882d1b86eb2c82cb244f5742298f8d39cd15c755 Mon Sep 17 00:00:00 2001
From: Alex Perfilov <alexandr.perfilov@reisystems.com>
Date: Thu, 19 Mar 2015 17:16:04 -0400
Subject: [PATCH 10/22] Do not export Drafts on PDL & EDI

---
 ckanext/datajson/plugin.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/ckanext/datajson/plugin.py b/ckanext/datajson/plugin.py
index b86a8ebf..7d60826e 100644
--- a/ckanext/datajson/plugin.py
+++ b/ckanext/datajson/plugin.py
@@ -372,7 +372,8 @@ def make_edi(self, owner_org):
 
         output = []
         for pkg in packages:
-            # if pkg['owner_org'] == owner_org:
+            if pkg['publishing_status'] == 'Draft':
+                continue
             datajson_entry = JsonExportBuilder.make_datajson_export_entry(pkg)
             if datajson_entry and self.is_valid(datajson_entry):
                 output.append(datajson_entry)
@@ -405,6 +406,8 @@ def make_pdl(self, owner_org):
         output = []
         # Create data.json only using public datasets, datasets marked non-public are not exposed
         for pkg in packages:
+            if pkg['publishing_status'] == 'Draft':
+                continue
             extras = dict([(x['key'], x['value']) for x in pkg['extras']])
             try:
                 if not (re.match(r'[Nn]on-public', extras['public_access_level'])):

From 1f28edde3f9cc2fe5349ef1e462e951261f0afcf Mon Sep 17 00:00:00 2001
From: Alex Perfilov <alexandr.perfilov@reisystems.com>
Date: Thu, 19 Mar 2015 17:49:10 -0400
Subject: [PATCH 11/22] Export Draft datasets only button

---
 ckanext/datajson/plugin.py | 77 +++++++++++++++++++++++++++++++++-----
 1 file changed, 67 insertions(+), 10 deletions(-)

diff --git a/ckanext/datajson/plugin.py b/ckanext/datajson/plugin.py
index 7d60826e..6ad09fc4 100644
--- a/ckanext/datajson/plugin.py
+++ b/ckanext/datajson/plugin.py
@@ -225,6 +225,11 @@ def after_map(self, m):
         m.connect('enterprise_data_inventory', '/organization/{org}/edi.json',
                   controller='ckanext.datajson.plugin:JsonExportController', action='generate_edi')
 
+        # TODO DWC update action
+        # /data/{org}/edi.json
+        m.connect('enterprise_data_inventory', '/organization/{org}/draft.json',
+                  controller='ckanext.datajson.plugin:JsonExportController', action='generate_draft')
+
         # /pod/validate
         # m.connect('datajsonvalidator', "/pod/validate", controller='ckanext.datajson.plugin:JsonExportController', action='validator')
 
@@ -334,6 +339,22 @@ def generate_edi(self):
                 return self.make_edi(match.group(1))
         return "Invalid organization id"
 
+    def generate_draft(self):
+        # DWC this is a hack, as I couldn't get to the request parameters. For whatever reason, the multidict was always empty
+        match = re.match(r"/organization/([-a-z0-9]+)/draft.json", request.path)
+
+        # If user is not editor or admin of the organization then don't allow edi download
+        if p.toolkit.check_access('package_create', {'model': model, 'user': c.user}, {'owner_org': match.group(1)}):
+            if match:
+                # set content type (charset required or pylons throws an error)
+                response.content_type = 'application/json; charset=UTF-8'
+
+                # allow caching of response (e.g. by Apache)
+                del response.headers["Cache-Control"]
+                del response.headers["Pragma"]
+                return self.make_draft(match.group(1))
+        return "Invalid organization id"
+
 
     def make_json(self):
         # Build the data.json file.
@@ -358,6 +379,40 @@ def make_json(self):
         return output
 
 
+    def make_draft(self, owner_org):
+        # Error handler for creating error log
+        stream = StringIO.StringIO()
+        eh = logging.StreamHandler(stream)
+        eh.setLevel(logging.WARN)
+        formatter = logging.Formatter('%(asctime)s - %(message)s')
+        eh.setFormatter(formatter)
+        logger.addHandler(eh)
+
+        # Build the data.json file.
+        packages = self.get_packages(owner_org)
+
+        output = []
+        for pkg in packages:
+            extras = dict([(x['key'], x['value']) for x in pkg['extras']])
+            if 'publishing_status' in extras.keys() and extras['publishing_status'] != 'Draft':
+                continue
+            datajson_entry = JsonExportBuilder.make_datajson_export_entry(pkg)
+            if datajson_entry and self.is_valid(datajson_entry):
+                output.append(datajson_entry)
+            else:
+                logger.warn("Dataset id=[%s], title=[%s] omitted\n", pkg.get('id', None), pkg.get('title', None))
+
+        # Get the error log
+        eh.flush()
+        error = stream.getvalue()
+        eh.close()
+        logger.removeHandler(eh)
+        stream.close()
+
+        # return json.dumps(output)
+        return self.write_zip(output, error, zip_name='edi')
+
+
     def make_edi(self, owner_org):
         # Error handler for creating error log
         stream = StringIO.StringIO()
@@ -372,7 +427,8 @@ def make_edi(self, owner_org):
 
         output = []
         for pkg in packages:
-            if pkg['publishing_status'] == 'Draft':
+            extras = dict([(x['key'], x['value']) for x in pkg['extras']])
+            if 'publishing_status' in extras.keys() and extras['publishing_status'] == 'Draft':
                 continue
             datajson_entry = JsonExportBuilder.make_datajson_export_entry(pkg)
             if datajson_entry and self.is_valid(datajson_entry):
@@ -406,17 +462,18 @@ def make_pdl(self, owner_org):
         output = []
         # Create data.json only using public datasets, datasets marked non-public are not exposed
         for pkg in packages:
-            if pkg['publishing_status'] == 'Draft':
-                continue
             extras = dict([(x['key'], x['value']) for x in pkg['extras']])
+            if 'publishing_status' in extras.keys() and extras['publishing_status'] == 'Draft':
+                continue
             try:
-                if not (re.match(r'[Nn]on-public', extras['public_access_level'])):
-                    datajson_entry = JsonExportBuilder.make_datajson_export_entry(pkg)
-                    if datajson_entry and self.is_valid(datajson_entry):
-                        output.append(datajson_entry)
-                    else:
-                        logger.warn("Dataset id=[%s], title=[%s] omitted\n", pkg.get('id', None),
-                                    pkg.get('title', None))
+                if re.match(r'[Nn]on-public', extras['public_access_level']):
+                    continue
+                datajson_entry = JsonExportBuilder.make_datajson_export_entry(pkg)
+                if datajson_entry and self.is_valid(datajson_entry):
+                    output.append(datajson_entry)
+                else:
+                    logger.warn("Dataset id=[%s], title=[%s] omitted\n", pkg.get('id', None),
+                                pkg.get('title', None))
 
             except KeyError:
                 logger.warn("Dataset id=[%s], title=['%s'] missing required 'public_access_level' field",

From 37c0fffd3d39a8ceb283ce93c6b2b7af0c24e87a Mon Sep 17 00:00:00 2001
From: Alex Perfilov <alexandr.perfilov@reisystems.com>
Date: Thu, 19 Mar 2015 23:32:58 -0400
Subject: [PATCH 12/22] Errors.json

---
 ckanext/datajson/build_datajson.py    | 28 +++++++++++++++++----
 ckanext/datajson/datajsonvalidator.py |  2 +-
 ckanext/datajson/plugin.py            | 36 +++++++++++++++++++++------
 3 files changed, 52 insertions(+), 14 deletions(-)

diff --git a/ckanext/datajson/build_datajson.py b/ckanext/datajson/build_datajson.py
index 27c93d8b..8c765229 100644
--- a/ckanext/datajson/build_datajson.py
+++ b/ckanext/datajson/build_datajson.py
@@ -257,9 +257,18 @@ def make_datajson_export_entry(package):
                 JsonExportBuilder.split_multiple_entries(retlist, extras, pair)
 
         except KeyError as e:
-            log.warn("Invalid field detected for package with id=[%s], title=['%s']: '%s'", package.get('id'),
-                     package.get('title'), e)
-            return
+            log.warn("Missing Required Field for package with id=[%s], title=['%s']: '%s'" % (
+                package.get('id'), package.get('title'), e))
+
+            errors = ['Missing Required Field', ["%s" % e]]
+            errors_dict = OrderedDict([
+                ('id', package.get('id')),
+                ('name', package.get('name')),
+                ('title', package.get('title')),
+                ('errors', errors),
+            ])
+
+            return errors_dict
 
         # # TODO this is a lazy hack to make sure we don't have redundant fields when the free form key/value pairs are added
         # extras_to_filter_out = ['publisher', 'contact_name', 'contact_email', 'unique_id', 'public_access_level',
@@ -315,7 +324,15 @@ def make_datajson_export_entry(package):
         if len(errors) > 0:
             for error in errors:
                 log.warn(error)
-            return
+
+            errors_dict = OrderedDict([
+                ('id', package.get('id')),
+                ('name', package.get('name')),
+                ('title', package.get('title')),
+                ('errors', errors),
+            ])
+
+            return errors_dict
 
         return striped_retlist_dict
 
@@ -443,7 +460,8 @@ def extra(package, key, default=None):
     def get_publisher_tree_wrong_order(extras):
         publisher = JsonExportBuilder.strip_if_string(extras.get('publisher'))
         if publisher is None:
-            raise KeyError('publisher')
+            return None
+            # raise KeyError('publisher')
 
         organization_list = list()
         organization_list.append([
diff --git a/ckanext/datajson/datajsonvalidator.py b/ckanext/datajson/datajsonvalidator.py
index b1102a0b..cd61d6a1 100644
--- a/ckanext/datajson/datajsonvalidator.py
+++ b/ckanext/datajson/datajsonvalidator.py
@@ -397,7 +397,7 @@ def check_required_field(obj, field_name, data_type, dataset_name, errs):
         add_error(errs, 10, "Missing Required Fields", "The '%s' field is missing." % field_name, dataset_name)
         return False
     elif obj[field_name] is None:
-        add_error(errs, 10, "Missing Required Fields", "The '%s' field is set to null." % field_name, dataset_name)
+        add_error(errs, 10, "Missing Required Fields", "The '%s' field is empty." % field_name, dataset_name)
         return False
     elif not isinstance(obj[field_name], data_type):
         add_error(errs, 5, "Invalid Required Field Value",
diff --git a/ckanext/datajson/plugin.py b/ckanext/datajson/plugin.py
index 6ad09fc4..d04cfe5d 100644
--- a/ckanext/datajson/plugin.py
+++ b/ckanext/datajson/plugin.py
@@ -109,7 +109,7 @@ def generate_output(self, format):
                     ("foaf", "http://xmlns.com/foaf/0.1/"),
                     ("pod", "http://project-open-data.github.io/schema/2013-09-20_1.0#"),
                 ])
-                ),
+                 ),
                 ("@id", DataJsonPlugin.ld_id),
                 ("@type", "dcat:Catalog"),
                 ("dcterms:title", DataJsonPlugin.ld_title),
@@ -150,7 +150,8 @@ def validator(self):
                     e) + ". Try using JSONLint.com."]))
             except Exception as e:
                 c.errors.append((
-                "Internal Error", ["Something bad happened while trying to load and parse the file: " + unicode(e)]))
+                    "Internal Error",
+                    ["Something bad happened while trying to load and parse the file: " + unicode(e)]))
 
             if body:
                 try:
@@ -178,7 +179,6 @@ def show_html_rendition(self):
         return render('html_rendition.html')
 
 
-
 class JsonExportPlugin(p.SingletonPlugin):
     p.implements(p.interfaces.IConfigurer)
     p.implements(p.interfaces.IRoutes, inherit=True)
@@ -258,7 +258,7 @@ def generate_output(self, format):
                     ("dcat", "http://www.w3.org/ns/dcat#"),
                     ("foaf", "http://xmlns.com/foaf/0.1/"),
                 ])
-                ),
+                 ),
                 ("@id", JsonExportPlugin.ld_id),
                 ("@type", "dcat:Catalog"),
                 ("dcterms:title", JsonExportPlugin.ld_title),
@@ -391,12 +391,17 @@ def make_draft(self, owner_org):
         # Build the data.json file.
         packages = self.get_packages(owner_org)
 
+        errors_json = []
+
         output = []
         for pkg in packages:
             extras = dict([(x['key'], x['value']) for x in pkg['extras']])
             if 'publishing_status' in extras.keys() and extras['publishing_status'] != 'Draft':
                 continue
             datajson_entry = JsonExportBuilder.make_datajson_export_entry(pkg)
+            if 'errors' in datajson_entry.keys():
+                errors_json.append(datajson_entry)
+                datajson_entry = None
             if datajson_entry and self.is_valid(datajson_entry):
                 output.append(datajson_entry)
             else:
@@ -410,7 +415,7 @@ def make_draft(self, owner_org):
         stream.close()
 
         # return json.dumps(output)
-        return self.write_zip(output, error, zip_name='edi')
+        return self.write_zip(output, error, errors_json, zip_name='edi')
 
 
     def make_edi(self, owner_org):
@@ -426,11 +431,15 @@ def make_edi(self, owner_org):
         packages = self.get_packages(owner_org)
 
         output = []
+        errors_json = []
         for pkg in packages:
             extras = dict([(x['key'], x['value']) for x in pkg['extras']])
             if 'publishing_status' in extras.keys() and extras['publishing_status'] == 'Draft':
                 continue
             datajson_entry = JsonExportBuilder.make_datajson_export_entry(pkg)
+            if 'errors' in datajson_entry.keys():
+                errors_json.append(datajson_entry)
+                datajson_entry = None
             if datajson_entry and self.is_valid(datajson_entry):
                 output.append(datajson_entry)
             else:
@@ -444,7 +453,7 @@ def make_edi(self, owner_org):
         stream.close()
 
         # return json.dumps(output)
-        return self.write_zip(output, error, zip_name='edi')
+        return self.write_zip(output, error, errors_json, zip_name='edi')
 
 
     def make_pdl(self, owner_org):
@@ -460,6 +469,7 @@ def make_pdl(self, owner_org):
         packages = self.get_packages(owner_org)
 
         output = []
+        errors_json = []
         # Create data.json only using public datasets, datasets marked non-public are not exposed
         for pkg in packages:
             extras = dict([(x['key'], x['value']) for x in pkg['extras']])
@@ -469,6 +479,9 @@ def make_pdl(self, owner_org):
                 if re.match(r'[Nn]on-public', extras['public_access_level']):
                     continue
                 datajson_entry = JsonExportBuilder.make_datajson_export_entry(pkg)
+                if 'errors' in datajson_entry.keys():
+                    errors_json.append(datajson_entry)
+                    datajson_entry = None
                 if datajson_entry and self.is_valid(datajson_entry):
                     output.append(datajson_entry)
                 else:
@@ -488,7 +501,7 @@ def make_pdl(self, owner_org):
         stream.close()
 
         # return json.dumps(output)
-        return self.write_zip(output, error, zip_name='pdl')
+        return self.write_zip(output, error, errors_json, zip_name='pdl')
 
 
     def get_packages(self, owner_org):
@@ -530,7 +543,7 @@ def is_valid(self, instance):
         return True
 
 
-    def write_zip(self, data, error=None, zip_name='data'):
+    def write_zip(self, data, error=None, errors_json=None, zip_name='data'):
         """
         Data: a python object to write to the data.json
         Error: unicode string representing the content of the error log.
@@ -546,6 +559,13 @@ def write_zip(self, data, error=None, zip_name='data'):
             zf.writestr('data.json',
                         json.dumps(JsonExportBuilder.make_datajson_export_catalog(data), ensure_ascii=False).encode(
                             'utf8'))
+        # Write empty.json if nothing to return
+        else:
+            zf.writestr('empty.json', '')
+
+        # Errors in json format
+        if errors_json:
+            zf.writestr('errors.json', json.dumps(errors_json).encode('utf8'))
 
         # Write the error log
         if error:

From 7851f56a713fde309c8fc24e89e011727735aeca Mon Sep 17 00:00:00 2001
From: Alex Perfilov <alexandr.perfilov@reisystems.com>
Date: Fri, 20 Mar 2015 10:21:56 -0400
Subject: [PATCH 13/22] Draft export fix

---
 ckanext/datajson/plugin.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ckanext/datajson/plugin.py b/ckanext/datajson/plugin.py
index d04cfe5d..2fa5ae49 100644
--- a/ckanext/datajson/plugin.py
+++ b/ckanext/datajson/plugin.py
@@ -396,7 +396,7 @@ def make_draft(self, owner_org):
         output = []
         for pkg in packages:
             extras = dict([(x['key'], x['value']) for x in pkg['extras']])
-            if 'publishing_status' in extras.keys() and extras['publishing_status'] != 'Draft':
+            if 'publishing_status' not in extras.keys() or extras['publishing_status'] != 'Draft':
                 continue
             datajson_entry = JsonExportBuilder.make_datajson_export_entry(pkg)
             if 'errors' in datajson_entry.keys():

From d7a8627ffecf1468259facb6cb139a843b1b5d98 Mon Sep 17 00:00:00 2001
From: Alex Perfilov <alexandr.perfilov@reisystems.com>
Date: Fri, 20 Mar 2015 10:48:04 -0400
Subject: [PATCH 14/22] Draft filename changed

---
 ckanext/datajson/plugin.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/ckanext/datajson/plugin.py b/ckanext/datajson/plugin.py
index 2fa5ae49..4acdac63 100644
--- a/ckanext/datajson/plugin.py
+++ b/ckanext/datajson/plugin.py
@@ -415,7 +415,7 @@ def make_draft(self, owner_org):
         stream.close()
 
         # return json.dumps(output)
-        return self.write_zip(output, error, errors_json, zip_name='edi')
+        return self.write_zip(output, error, errors_json, zip_name='draft')
 
 
     def make_edi(self, owner_org):
@@ -554,9 +554,13 @@ def write_zip(self, data, error=None, errors_json=None, zip_name='data'):
         o = StringIO.StringIO()
         zf = zipfile.ZipFile(o, mode='w')
 
+        data_file_name = 'data.json'
+        if 'draft' == zip_name:
+            data_file_name = 'draft_data.json'
+
         # Write the data file
         if data:
-            zf.writestr('data.json',
+            zf.writestr(data_file_name,
                         json.dumps(JsonExportBuilder.make_datajson_export_catalog(data), ensure_ascii=False).encode(
                             'utf8'))
         # Write empty.json if nothing to return

From d9093cd0c45380834f6e50f070ff2716ccb16b62 Mon Sep 17 00:00:00 2001
From: Alex Perfilov <alexandr.perfilov@reisystems.com>
Date: Fri, 20 Mar 2015 10:58:47 -0400
Subject: [PATCH 15/22] Export metadata_modified if extras[modified] not
 available

---
 ckanext/datajson/build_datajson.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ckanext/datajson/build_datajson.py b/ckanext/datajson/build_datajson.py
index 8c765229..705a7e15 100644
--- a/ckanext/datajson/build_datajson.py
+++ b/ckanext/datajson/build_datajson.py
@@ -214,7 +214,9 @@ def make_datajson_export_entry(package):
 
                 ("license", JsonExportBuilder.strip_if_string(extras.get("license_new"))),  # required-if-applicable
 
-                ("modified", JsonExportBuilder.strip_if_string(extras.get("modified"))),  # required
+                ("modified",
+                 JsonExportBuilder.strip_if_string(extras.get("modified", package.get("metadata_modified")))),
+                # required
 
                 ("primaryITInvestmentUII", JsonExportBuilder.strip_if_string(extras.get('primary_it_investment_uii'))),
                 # optional

From d6519e81424f4023d467df3930eeae16a3e9f93d Mon Sep 17 00:00:00 2001
From: Alex Perfilov <alexandr.perfilov@reisystems.com>
Date: Fri, 20 Mar 2015 12:56:49 -0400
Subject: [PATCH 16/22] Json error log update

---
 ckanext/datajson/plugin.py | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/ckanext/datajson/plugin.py b/ckanext/datajson/plugin.py
index 4acdac63..112dabf7 100644
--- a/ckanext/datajson/plugin.py
+++ b/ckanext/datajson/plugin.py
@@ -237,6 +237,8 @@ def after_map(self, m):
 
 
 class JsonExportController(BaseController):
+    _errors_json = []
+
     def generate_output(self, format):
         # set content type (charset required or pylons throws an error)
         response.content_type = 'application/json; charset=UTF-8'
@@ -355,7 +357,6 @@ def generate_draft(self):
                 return self.make_draft(match.group(1))
         return "Invalid organization id"
 
-
     def make_json(self):
         # Build the data.json file.
         packages = p.toolkit.get_action("current_package_list_with_resources")(None, {})
@@ -375,10 +376,17 @@ def make_json(self):
                 logger.warn("Dataset id=[%s], title=[%s] missing required 'public_access_level' field",
                             pkg.get('id', None),
                             pkg.get('title', None))
+
+                errors = ['Missing Required Field', ['public_access_level']]
+                self._errors_json.append(OrderedDict([
+                    ('id', pkg.get('id')),
+                    ('name', pkg.get('name')),
+                    ('title', pkg.get('title')),
+                    ('errors', errors),
+                ]))
                 pass
         return output
 
-
     def make_draft(self, owner_org):
         # Error handler for creating error log
         stream = StringIO.StringIO()
@@ -417,7 +425,6 @@ def make_draft(self, owner_org):
         # return json.dumps(output)
         return self.write_zip(output, error, errors_json, zip_name='draft')
 
-
     def make_edi(self, owner_org):
         # Error handler for creating error log
         stream = StringIO.StringIO()
@@ -455,7 +462,6 @@ def make_edi(self, owner_org):
         # return json.dumps(output)
         return self.write_zip(output, error, errors_json, zip_name='edi')
 
-
     def make_pdl(self, owner_org):
         # Error handler for creating error log
         stream = StringIO.StringIO()
@@ -491,6 +497,13 @@ def make_pdl(self, owner_org):
             except KeyError:
                 logger.warn("Dataset id=[%s], title=['%s'] missing required 'public_access_level' field",
                             pkg.get('id', None), pkg.get('title', None))
+                errors = ['Missing Required Field', ['public_access_level']]
+                self._errors_json.append(OrderedDict([
+                    ('id', pkg.get('id')),
+                    ('name', pkg.get('name')),
+                    ('title', pkg.get('title')),
+                    ('errors', errors),
+                ]))
                 pass
 
         # Get the error log
@@ -503,7 +516,6 @@ def make_pdl(self, owner_org):
         # return json.dumps(output)
         return self.write_zip(output, error, errors_json, zip_name='pdl')
 
-
     def get_packages(self, owner_org):
         # Build the data.json file.
         packages = self.get_all_group_packages(group_id=owner_org)
@@ -520,7 +532,6 @@ def get_packages(self, owner_org):
 
         return packages
 
-
     def get_all_group_packages(self, group_id):
         """
         Gets all of the group packages, public or private, returning them as a list of CKAN's dictized packages.
@@ -531,7 +542,6 @@ def get_all_group_packages(self, group_id):
 
         return result
 
-
     def is_valid(self, instance):
         """
         Validates a data.json entry against the project open data's JSON schema. Log a warning message on validation error
@@ -542,7 +552,6 @@ def is_valid(self, instance):
             return False
         return True
 
-
     def write_zip(self, data, error=None, errors_json=None, zip_name='data'):
         """
         Data: a python object to write to the data.json
@@ -567,6 +576,12 @@ def write_zip(self, data, error=None, errors_json=None, zip_name='data'):
         else:
             zf.writestr('empty.json', '')
 
+        if self._errors_json:
+            if errors_json:
+                errors_json += self._errors_json
+            else:
+                errors_json = self._errors_json
+
         # Errors in json format
         if errors_json:
             zf.writestr('errors.json', json.dumps(errors_json).encode('utf8'))

From 37537e0b15b9c9e87526a3035ab499294e8fded8 Mon Sep 17 00:00:00 2001
From: Alex Perfilov <alexandr.perfilov@reisystems.com>
Date: Fri, 20 Mar 2015 13:30:56 -0400
Subject: [PATCH 17/22] Ogranization name in errors.json

---
 ckanext/datajson/build_datajson.py | 10 ++++++++++
 ckanext/datajson/plugin.py         | 22 ++++++++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/ckanext/datajson/build_datajson.py b/ckanext/datajson/build_datajson.py
index 705a7e15..0c4acca3 100644
--- a/ckanext/datajson/build_datajson.py
+++ b/ckanext/datajson/build_datajson.py
@@ -133,8 +133,10 @@ def extension_to_mime_type(file_ext):
     }
     return ext.get(file_ext.lower(), "application/unknown")
 
+currentPackageOrg = None
 
 class JsonExportBuilder:
+
     @staticmethod
     def make_datajson_export_catalog(datasets):
         catalog = OrderedDict([
@@ -148,6 +150,8 @@ def make_datajson_export_catalog(datasets):
 
     @staticmethod
     def make_datajson_export_entry(package):
+        global currentPackageOrg
+        currentPackageOrg = None
         # extras is a list of dicts [{},{}, {}]. For each dict, extract the key, value entries into a new dict
         extras = dict([(x['key'], x['value']) for x in package['extras']])
 
@@ -267,6 +271,7 @@ def make_datajson_export_entry(package):
                 ('id', package.get('id')),
                 ('name', package.get('name')),
                 ('title', package.get('title')),
+                ('organization', currentPackageOrg),
                 ('errors', errors),
             ])
 
@@ -331,6 +336,7 @@ def make_datajson_export_entry(package):
                 ('id', package.get('id')),
                 ('name', package.get('name')),
                 ('title', package.get('title')),
+                ('organization', currentPackageOrg),
                 ('errors', errors),
             ])
 
@@ -460,11 +466,14 @@ def extra(package, key, default=None):
 
     @staticmethod
     def get_publisher_tree_wrong_order(extras):
+        global currentPackageOrg
         publisher = JsonExportBuilder.strip_if_string(extras.get('publisher'))
         if publisher is None:
             return None
             # raise KeyError('publisher')
 
+        currentPackageOrg = publisher
+
         organization_list = list()
         organization_list.append([
             ('@type', 'org:Organization'),  # optional
@@ -478,6 +487,7 @@ def get_publisher_tree_wrong_order(extras):
                     ('@type', 'org:Organization'),  # optional
                     ('name', JsonExportBuilder.strip_if_string(extras[key])),  # required
                 ])
+                currentPackageOrg = extras[key]
 
         size = len(organization_list)
 
diff --git a/ckanext/datajson/plugin.py b/ckanext/datajson/plugin.py
index 112dabf7..f6641677 100644
--- a/ckanext/datajson/plugin.py
+++ b/ckanext/datajson/plugin.py
@@ -378,10 +378,22 @@ def make_json(self):
                             pkg.get('title', None))
 
                 errors = ['Missing Required Field', ['public_access_level']]
+
+                currentPackageOrg = None
+
+                if 'publisher' in extras and extras['publisher']:
+                    currentPackageOrg = JsonExportBuilder.strip_if_string(extras['publisher'])
+
+                for i in range(1, 6):
+                    key = 'publisher_' + str(i)
+                    if key in extras and extras[key] and JsonExportBuilder.strip_if_string(extras[key]):
+                        currentPackageOrg = JsonExportBuilder.strip_if_string(extras[key])
+
                 self._errors_json.append(OrderedDict([
                     ('id', pkg.get('id')),
                     ('name', pkg.get('name')),
                     ('title', pkg.get('title')),
+                    ('organization', currentPackageOrg),
                     ('errors', errors),
                 ]))
                 pass
@@ -498,10 +510,20 @@ def make_pdl(self, owner_org):
                 logger.warn("Dataset id=[%s], title=['%s'] missing required 'public_access_level' field",
                             pkg.get('id', None), pkg.get('title', None))
                 errors = ['Missing Required Field', ['public_access_level']]
+
+                currentPackageOrg = None
+                if 'publisher' in extras and extras['publisher']:
+                    currentPackageOrg = JsonExportBuilder.strip_if_string(extras['publisher'])
+                for i in range(1, 6):
+                    key = 'publisher_' + str(i)
+                    if key in extras and extras[key] and JsonExportBuilder.strip_if_string(extras[key]):
+                        currentPackageOrg = JsonExportBuilder.strip_if_string(extras[key])
+
                 self._errors_json.append(OrderedDict([
                     ('id', pkg.get('id')),
                     ('name', pkg.get('name')),
                     ('title', pkg.get('title')),
+                    ('organization', currentPackageOrg),
                     ('errors', errors),
                 ]))
                 pass

From 252ff335976dcda3d004cfdb2f99d0d5adbc465e Mon Sep 17 00:00:00 2001
From: Alex Perfilov <alexandr.perfilov@reisystems.com>
Date: Fri, 20 Mar 2015 13:47:20 -0400
Subject: [PATCH 18/22] Add organization title to errorlog.txt too

---
 ckanext/datajson/build_datajson.py |  4 ++--
 ckanext/datajson/plugin.py         | 26 +++++++++++++++++++-------
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/ckanext/datajson/build_datajson.py b/ckanext/datajson/build_datajson.py
index 0c4acca3..8a78ada0 100644
--- a/ckanext/datajson/build_datajson.py
+++ b/ckanext/datajson/build_datajson.py
@@ -263,8 +263,8 @@ def make_datajson_export_entry(package):
                 JsonExportBuilder.split_multiple_entries(retlist, extras, pair)
 
         except KeyError as e:
-            log.warn("Missing Required Field for package with id=[%s], title=['%s']: '%s'" % (
-                package.get('id'), package.get('title'), e))
+            log.warn("Missing Required Field for package with id=[%s], title=['%s'], organization=['%s']: '%s'" % (
+                package.get('id'), package.get('title'), currentPackageOrg, e))
 
             errors = ['Missing Required Field', ["%s" % e]]
             errors_dict = OrderedDict([
diff --git a/ckanext/datajson/plugin.py b/ckanext/datajson/plugin.py
index f6641677..00fa4f2a 100644
--- a/ckanext/datajson/plugin.py
+++ b/ckanext/datajson/plugin.py
@@ -373,12 +373,6 @@ def make_json(self):
                         logger.warn("Dataset id=[%s], title=[%s] omitted\n", pkg.get('id', None),
                                     pkg.get('title', None))
             except KeyError:
-                logger.warn("Dataset id=[%s], title=[%s] missing required 'public_access_level' field",
-                            pkg.get('id', None),
-                            pkg.get('title', None))
-
-                errors = ['Missing Required Field', ['public_access_level']]
-
                 currentPackageOrg = None
 
                 if 'publisher' in extras and extras['publisher']:
@@ -389,6 +383,14 @@ def make_json(self):
                     if key in extras and extras[key] and JsonExportBuilder.strip_if_string(extras[key]):
                         currentPackageOrg = JsonExportBuilder.strip_if_string(extras[key])
 
+                logger.warn(
+                    "Dataset id=[%s], title=[%s], organization=[%s] missing required 'public_access_level' field",
+                    pkg.get('id', None),
+                    pkg.get('title', None),
+                    currentPackageOrg)
+
+                errors = ['Missing Required Field', ['public_access_level']]
+
                 self._errors_json.append(OrderedDict([
                     ('id', pkg.get('id')),
                     ('name', pkg.get('name')),
@@ -425,7 +427,17 @@ def make_draft(self, owner_org):
             if datajson_entry and self.is_valid(datajson_entry):
                 output.append(datajson_entry)
             else:
-                logger.warn("Dataset id=[%s], title=[%s] omitted\n", pkg.get('id', None), pkg.get('title', None))
+                currentPackageOrg = None
+
+                if 'publisher' in extras and extras['publisher']:
+                    currentPackageOrg = JsonExportBuilder.strip_if_string(extras['publisher'])
+
+                for i in range(1, 6):
+                    key = 'publisher_' + str(i)
+                    if key in extras and extras[key] and JsonExportBuilder.strip_if_string(extras[key]):
+                        currentPackageOrg = JsonExportBuilder.strip_if_string(extras[key])
+                logger.warn("Dataset id=[%s], title=[%s], organization=[%s] omitted\n", pkg.get('id', None),
+                            pkg.get('title', None), currentPackageOrg)
 
         # Get the error log
         eh.flush()

From 5b871326829f9fb8b70a26ad6222aabdf2eb65b9 Mon Sep 17 00:00:00 2001
From: Alex Perfilov <alexandr.perfilov@reisystems.com>
Date: Fri, 20 Mar 2015 13:55:23 -0400
Subject: [PATCH 19/22] Adding Organization to errorlog.txt, Step 2

---
 ckanext/datajson/plugin.py | 71 ++++++++++++++++++--------------------
 1 file changed, 34 insertions(+), 37 deletions(-)

diff --git a/ckanext/datajson/plugin.py b/ckanext/datajson/plugin.py
index 00fa4f2a..75d3ad4c 100644
--- a/ckanext/datajson/plugin.py
+++ b/ckanext/datajson/plugin.py
@@ -370,24 +370,17 @@ def make_json(self):
                     if datajson_entry:
                         output.append(datajson_entry)
                     else:
-                        logger.warn("Dataset id=[%s], title=[%s] omitted\n", pkg.get('id', None),
-                                    pkg.get('title', None))
+                        publisher = self.detect_publisher(extras)
+                        logger.warn("Dataset id=[%s], title=[%s], organization=[%s] omitted\n", pkg.get('id', None),
+                                    pkg.get('title', None), publisher)
             except KeyError:
-                currentPackageOrg = None
-
-                if 'publisher' in extras and extras['publisher']:
-                    currentPackageOrg = JsonExportBuilder.strip_if_string(extras['publisher'])
-
-                for i in range(1, 6):
-                    key = 'publisher_' + str(i)
-                    if key in extras and extras[key] and JsonExportBuilder.strip_if_string(extras[key]):
-                        currentPackageOrg = JsonExportBuilder.strip_if_string(extras[key])
+                publisher = self.detect_publisher(extras)
 
                 logger.warn(
                     "Dataset id=[%s], title=[%s], organization=[%s] missing required 'public_access_level' field",
                     pkg.get('id', None),
                     pkg.get('title', None),
-                    currentPackageOrg)
+                    publisher)
 
                 errors = ['Missing Required Field', ['public_access_level']]
 
@@ -395,7 +388,7 @@ def make_json(self):
                     ('id', pkg.get('id')),
                     ('name', pkg.get('name')),
                     ('title', pkg.get('title')),
-                    ('organization', currentPackageOrg),
+                    ('organization', publisher),
                     ('errors', errors),
                 ]))
                 pass
@@ -427,17 +420,9 @@ def make_draft(self, owner_org):
             if datajson_entry and self.is_valid(datajson_entry):
                 output.append(datajson_entry)
             else:
-                currentPackageOrg = None
-
-                if 'publisher' in extras and extras['publisher']:
-                    currentPackageOrg = JsonExportBuilder.strip_if_string(extras['publisher'])
-
-                for i in range(1, 6):
-                    key = 'publisher_' + str(i)
-                    if key in extras and extras[key] and JsonExportBuilder.strip_if_string(extras[key]):
-                        currentPackageOrg = JsonExportBuilder.strip_if_string(extras[key])
+                publisher = self.detect_publisher(extras)
                 logger.warn("Dataset id=[%s], title=[%s], organization=[%s] omitted\n", pkg.get('id', None),
-                            pkg.get('title', None), currentPackageOrg)
+                            pkg.get('title', None), publisher)
 
         # Get the error log
         eh.flush()
@@ -449,6 +434,19 @@ def make_draft(self, owner_org):
         # return json.dumps(output)
         return self.write_zip(output, error, errors_json, zip_name='draft')
 
+    @staticmethod
+    def detect_publisher(extras):
+        publisher = None
+
+        if 'publisher' in extras and extras['publisher']:
+            publisher = JsonExportBuilder.strip_if_string(extras['publisher'])
+
+        for i in range(1, 6):
+            key = 'publisher_' + str(i)
+            if key in extras and extras[key] and JsonExportBuilder.strip_if_string(extras[key]):
+                publisher = JsonExportBuilder.strip_if_string(extras[key])
+        return publisher
+
     def make_edi(self, owner_org):
         # Error handler for creating error log
         stream = StringIO.StringIO()
@@ -474,7 +472,9 @@ def make_edi(self, owner_org):
             if datajson_entry and self.is_valid(datajson_entry):
                 output.append(datajson_entry)
             else:
-                logger.warn("Dataset id=[%s], title=[%s] omitted\n", pkg.get('id', None), pkg.get('title', None))
+                publisher = self.detect_publisher(extras)
+                logger.warn("Dataset id=[%s], title=[%s], organization=[%s] omitted\n", pkg.get('id', None),
+                            pkg.get('title', None), publisher)
 
         # Get the error log
         eh.flush()
@@ -515,27 +515,24 @@ def make_pdl(self, owner_org):
                 if datajson_entry and self.is_valid(datajson_entry):
                     output.append(datajson_entry)
                 else:
-                    logger.warn("Dataset id=[%s], title=[%s] omitted\n", pkg.get('id', None),
-                                pkg.get('title', None))
+                    publisher = self.detect_publisher(extras)
+                    logger.warn("Dataset id=[%s], title=[%s], organization=[%s] omitted\n", pkg.get('id', None),
+                                pkg.get('title', None), publisher)
 
             except KeyError:
-                logger.warn("Dataset id=[%s], title=['%s'] missing required 'public_access_level' field",
-                            pkg.get('id', None), pkg.get('title', None))
-                errors = ['Missing Required Field', ['public_access_level']]
+                publisher = self.detect_publisher(extras)
 
-                currentPackageOrg = None
-                if 'publisher' in extras and extras['publisher']:
-                    currentPackageOrg = JsonExportBuilder.strip_if_string(extras['publisher'])
-                for i in range(1, 6):
-                    key = 'publisher_' + str(i)
-                    if key in extras and extras[key] and JsonExportBuilder.strip_if_string(extras[key]):
-                        currentPackageOrg = JsonExportBuilder.strip_if_string(extras[key])
+                logger.warn(
+                    "Dataset id=[%s], title=['%s'], organization=['%s'] missing required 'public_access_level' field",
+                    pkg.get('id', None), pkg.get('title', None), publisher)
+
+                errors = ['Missing Required Field', ['public_access_level']]
 
                 self._errors_json.append(OrderedDict([
                     ('id', pkg.get('id')),
                     ('name', pkg.get('name')),
                     ('title', pkg.get('title')),
-                    ('organization', currentPackageOrg),
+                    ('organization', publisher),
                     ('errors', errors),
                 ]))
                 pass

From a03925cdca2ad045b96f581a3bf47c7c2ce15ac3 Mon Sep 17 00:00:00 2001
From: Alex Perfilov <alexandr.perfilov@reisystems.com>
Date: Fri, 20 Mar 2015 17:09:05 -0400
Subject: [PATCH 20/22] Sub-orgs export fix

---
 ckanext/datajson/plugin.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ckanext/datajson/plugin.py b/ckanext/datajson/plugin.py
index 75d3ad4c..871b615a 100644
--- a/ckanext/datajson/plugin.py
+++ b/ckanext/datajson/plugin.py
@@ -557,7 +557,7 @@ def get_packages(self, owner_org):
             sub_agencies = sub_agency.extras.col.target['sub-agencies'].value
             sub_agencies_list = sub_agencies.split(",")
             for sub in sub_agencies_list:
-                sub_packages = self, self.get_all_group_packages(group_id=sub)
+                sub_packages = self.get_all_group_packages(group_id=sub)
                 for sub_package in sub_packages:
                     packages.append(sub_package)
 

From 1ce18139056b1570d675ae83ddc709a0b437313e Mon Sep 17 00:00:00 2001
From: Alex Perfilov <alexandr.perfilov@reisystems.com>
Date: Fri, 20 Mar 2015 17:51:33 -0400
Subject: [PATCH 21/22] Refactored for better format

---
 ckanext/datajson/build_datajson.py | 67 +++++++++---------------------
 ckanext/datajson/plugin.py         | 59 ++++++++++++++------------
 2 files changed, 52 insertions(+), 74 deletions(-)

diff --git a/ckanext/datajson/build_datajson.py b/ckanext/datajson/build_datajson.py
index 8a78ada0..c65aa6ee 100644
--- a/ckanext/datajson/build_datajson.py
+++ b/ckanext/datajson/build_datajson.py
@@ -41,7 +41,7 @@ def make_datajson_entry(package):
         ("dataDictionary", extra(package, "Data Dictionary")),
         ("accessURL", get_primary_resource(package).get("url", None)),
         ("webService", get_api_resource(package).get("url", None)),
-        ("format", extension_to_mime_type(get_primary_resource(package).get("format", None)) ),
+        ("format", extension_to_mime_type(get_primary_resource(package).get("format", None))),
         ("license", extra(package, "License Agreement")),
         ("spatial", extra(package, "Geographic Scope")),
         ("temporal", build_temporal(package)),
@@ -54,8 +54,9 @@ def make_datajson_entry(package):
             x is not None)),
         ("dataQuality", extra(package, "Data Quality Met", default="true") == "true"),
         ("theme", [s for s in (
-            extra(package, "Subject Area 1"), extra(package, "Subject Area 2"), extra(package, "Subject Area 3")) if
-                   s is not None]),
+            extra(package, "Subject Area 1"), extra(package, "Subject Area 2"), extra(package, "Subject Area 3")
+        ) if s is not None]),
+
         ("references", [s for s in [extra(package, "Technical Documentation")] if s is not None]),
         ("landingPage", package["url"]),
         ("systemOfRecords", extra(package, "System Of Records")),
@@ -74,9 +75,9 @@ def make_datajson_entry(package):
 
 def extra(package, key, default=None):
     # Retrieves the value of an extras field.
-    for extra in package["extras"]:
-        if extra["key"] == key:
-            return extra["value"]
+    for xtra in package["extras"]:
+        if xtra["key"] == key:
+            return xtra["value"]
     return default
 
 
@@ -133,10 +134,16 @@ def extension_to_mime_type(file_ext):
     }
     return ext.get(file_ext.lower(), "application/unknown")
 
+
 currentPackageOrg = None
 
+
 class JsonExportBuilder:
 
+    def __init__(self):
+        global currentPackageOrg
+        currentPackageOrg = None
+
     @staticmethod
     def make_datajson_export_catalog(datasets):
         catalog = OrderedDict([
@@ -193,7 +200,7 @@ def make_datajson_export_entry(package):
                 # ("fn", "Jane Doe"),
                 # ("hasEmail", "mailto:jane.doe@agency.gov")
                 # ])),  # required
-                ('contactPoint', JsonExportBuilder.get_contact_point(extras, package)),  # required
+                ('contactPoint', JsonExportBuilder.get_contact_point(extras)),  # required
 
                 ("dataQuality", JsonExportBuilder.strip_if_string(extras.get('data_quality'))),
                 # required-if-applicable
@@ -277,38 +284,8 @@ def make_datajson_export_entry(package):
 
             return errors_dict
 
-        # # TODO this is a lazy hack to make sure we don't have redundant fields when the free form key/value pairs are added
-        # extras_to_filter_out = ['publisher', 'contact_name', 'contact_email', 'unique_id', 'public_access_level',
-        # 'data_dictionary', 'bureau_code', 'program_code', 'access_level_comment', 'license_title',
-        # 'spatial', 'temporal', 'release_date', 'accrual_periodicity', 'language', 'granularity',
-        # 'data_quality', 'size', 'homepage_url', 'rss_feed', 'category', 'related_documents',
-        # 'system_of_records', 'system_of_records_none_related_to_this_dataset', 'tags',
-        # 'extrasRollup', 'format', 'accessURL', 'notes', 'publisher_1', 'publisher_2', 'publisher_3',
-        # 'publisher_4', 'publisher_5']
-        #
-        # # Append any free extras (key/value pairs) that aren't part of common core but have been associated with the dataset
-        # # TODO really hackey, short on time, had to hardcode a lot of the names to remove. there's much better ways, maybe
-        # # generate a list of keys to ignore by calling a specific function to get the extras
-        # retlist_keys = [x for x, y in retlist]
-        # extras_keys = set(extras.keys()) - set(extras_to_filter_out)
-        #
-        # for key in extras_keys:
-        # convertedKey = underscore_to_camelcase(key)
-        # if convertedKey not in retlist_keys:
-        # retlist.append((convertedKey, extras[key]))
-
         # Remove entries where value is None, "", or empty list []
         striped_retlist = [(x, y) for x, y in retlist if y is not None and y != "" and y != []]
-        striped_retlist_keys = [x for x, y in striped_retlist]
-
-
-        # If a required metadata field was removed, return empty string
-        # for required_field in ["accessLevel", "bureauCode", "contactPoint", "description", "identifier", "keyword",
-        # "modified", "programCode", "publisher", "title"]:
-        # if required_field not in striped_retlist_keys:
-        # log.warn("Missing required field detected for package with id=[%s], title=['%s']: '%s'",
-        # package.get('id'), package.get('title'), required_field)
-        # return
 
         # When saved from UI DataQuality value is stored as "on" instead of True.
         # Check if value is "on" and replace it with True.
@@ -344,7 +321,6 @@ def make_datajson_export_entry(package):
 
         return striped_retlist_dict
 
-
     # used by get_accrual_periodicity
     accrual_periodicity_dict = {
         'completely irregular': 'irregular',
@@ -436,18 +412,18 @@ def generate_distribution(package):
         return arr
 
     @staticmethod
-    def get_contact_point(extras, package):
+    def get_contact_point(extras):
         for required_field in ["contact_name", "contact_email"]:
             if required_field not in extras.keys():
                 raise KeyError(required_field)
 
         email = JsonExportBuilder.strip_if_string(extras['contact_email'])
         if email is None or '@' not in email:
-            raise KeyError(required_field)
+            raise KeyError('contact_email')
 
         fn = JsonExportBuilder.strip_if_string(extras['contact_name'])
         if fn is None:
-            raise KeyError(required_field)
+            raise KeyError('contact_name')
 
         contact_point = OrderedDict([
             ('@type', 'vcard:Contact'),  # optional
@@ -459,9 +435,9 @@ def get_contact_point(extras, package):
     @staticmethod
     def extra(package, key, default=None):
         # Retrieves the value of an extras field.
-        for extra in package["extras"]:
-            if extra["key"] == key:
-                return extra["value"]
+        for xtra in package["extras"]:
+            if xtra["key"] == key:
+                return xtra["value"]
         return default
 
     @staticmethod
@@ -532,19 +508,16 @@ def strip_if_string(val):
                 val = None
         return val
 
-
     @staticmethod
     def get_primary_resource(package):
         # Return info about a "primary" resource. Select a good one.
         return JsonExportBuilder.get_best_resource(package, ("csv", "xls", "xml", "text", "zip", "rdf"))
 
-
     @staticmethod
     def get_api_resource(package):
         # Return info about an API resource.
         return JsonExportBuilder.get_best_resource(package, ("api", "query tool"))
 
-
     @staticmethod
     def split_multiple_entries(retlist, extras, names):
         found_element = string.strip(extras.get(names[1], ""))
diff --git a/ckanext/datajson/plugin.py b/ckanext/datajson/plugin.py
index 871b615a..7320d3d4 100644
--- a/ckanext/datajson/plugin.py
+++ b/ckanext/datajson/plugin.py
@@ -88,7 +88,7 @@ def organization_facets(self, facets_dict, organization_type, package_type):
 
 
 class DataJsonController(BaseController):
-    def generate_output(self, format):
+    def generate_output(self, fmt):
         # set content type (charset required or pylons throws an error)
         response.content_type = 'application/json; charset=UTF-8'
 
@@ -99,7 +99,7 @@ def generate_output(self, format):
         # output
         data = self.make_json()
 
-        if format == 'json-ld':
+        if fmt == 'json-ld':
             # Convert this to JSON-LD.
             data = OrderedDict([
                 ("@context", OrderedDict([
@@ -108,8 +108,7 @@ def generate_output(self, format):
                     ("dcat", "http://www.w3.org/ns/dcat#"),
                     ("foaf", "http://xmlns.com/foaf/0.1/"),
                     ("pod", "http://project-open-data.github.io/schema/2013-09-20_1.0#"),
-                ])
-                 ),
+                ])),
                 ("@id", DataJsonPlugin.ld_id),
                 ("@type", "dcat:Catalog"),
                 ("dcterms:title", DataJsonPlugin.ld_title),
@@ -137,7 +136,8 @@ def validator(self):
             c.source_url = request.POST["url"]
             c.errors = []
 
-            import urllib, json
+            import urllib
+            import json
             from datajsonvalidator import do_validation
 
             body = None
@@ -167,11 +167,12 @@ def show_html_rendition(self):
         # Shows an HTML rendition of the data.json file. Requests the file live
         # from http://localhost/data.json.
 
-        import urllib, json
+        import urllib
+        import json
 
         try:
             c.catalog_data = json.load(urllib.urlopen("http://localhost/data.json"))
-        except:
+        except Exception as e:
             c.catalog_data = []
 
         c.catalog_data.sort(key=lambda x: x.get("modified"), reverse=True)
@@ -212,8 +213,11 @@ def after_map(self, m):
                       controller='ckanext.datajson.plugin:JsonExportController',
                       action='generate_json')
             # TODO commenting out enterprise data inventory for right now
-            # m.connect('enterprisedatajson', JsonExportPlugin.route_edata_path, controller='ckanext.datajson.plugin:JsonExportController', action='generate_enterprise')
-            # m.connect('datajsonld', JsonExportPlugin.route_ld_path, controller='ckanext.datajson.plugin:JsonExportController', action='generate_jsonld')
+            # m.connect('enterprisedatajson', JsonExportPlugin.route_edata_path,
+            # controller='ckanext.datajson.plugin:JsonExportController', action='generate_enterprise')
+
+            # m.connect('datajsonld', JsonExportPlugin.route_ld_path,
+            # controller='ckanext.datajson.plugin:JsonExportController', action='generate_jsonld')
 
         # TODO DWC update action
         # /data/{org}/data.json
@@ -231,7 +235,8 @@ def after_map(self, m):
                   controller='ckanext.datajson.plugin:JsonExportController', action='generate_draft')
 
         # /pod/validate
-        # m.connect('datajsonvalidator', "/pod/validate", controller='ckanext.datajson.plugin:JsonExportController', action='validator')
+        # m.connect('datajsonvalidator', "/pod/validate",
+        # controller='ckanext.datajson.plugin:JsonExportController', action='validator')
 
         return m
 
@@ -239,7 +244,7 @@ def after_map(self, m):
 class JsonExportController(BaseController):
     _errors_json = []
 
-    def generate_output(self, format):
+    def generate_output(self, fmt):
         # set content type (charset required or pylons throws an error)
         response.content_type = 'application/json; charset=UTF-8'
 
@@ -251,7 +256,7 @@ def generate_output(self, format):
         # output
         data = self.make_json()
 
-        if format == 'json-ld':
+        if fmt == 'json-ld':
             # Convert this to JSON-LD.
             data = OrderedDict([
                 ("@context", OrderedDict([
@@ -259,8 +264,7 @@ def generate_output(self, format):
                     ("dcterms", "http://purl.org/dc/terms/"),
                     ("dcat", "http://www.w3.org/ns/dcat#"),
                     ("foaf", "http://xmlns.com/foaf/0.1/"),
-                ])
-                 ),
+                ])),
                 ("@id", JsonExportPlugin.ld_id),
                 ("@type", "dcat:Catalog"),
                 ("dcterms:title", JsonExportPlugin.ld_title),
@@ -283,7 +287,8 @@ def validator(self):
             c.source_url = request.POST["url"]
             c.errors = []
 
-            import urllib, json
+            import urllib
+            import json
             from datajsonvalidator import do_validation
 
             body = None
@@ -310,7 +315,8 @@ def validator(self):
         return render('datajsonvalidator.html')
 
     def generate_pdl(self):
-        # DWC this is a hack, as I couldn't get to the request parameters. For whatever reason, the multidict was always empty
+        # DWC this is a hack, as I couldn't get to the request parameters.
+        #  For whatever reason, the multidict was always empty
         match = re.match(r"/organization/([-a-z0-9]+)/data.json", request.path)
 
         # If user is not editor or admin of the organization then don't allow pdl download
@@ -326,7 +332,8 @@ def generate_pdl(self):
         return "Invalid organization id"
 
     def generate_edi(self):
-        # DWC this is a hack, as I couldn't get to the request parameters. For whatever reason, the multidict was always empty
+        # DWC this is a hack, as I couldn't get to the request parameters.
+        # For whatever reason, the multidict was always empty
         match = re.match(r"/organization/([-a-z0-9]+)/edi.json", request.path)
 
         # If user is not editor or admin of the organization then don't allow edi download
@@ -342,7 +349,8 @@ def generate_edi(self):
         return "Invalid organization id"
 
     def generate_draft(self):
-        # DWC this is a hack, as I couldn't get to the request parameters. For whatever reason, the multidict was always empty
+        # DWC this is a hack, as I couldn't get to the request parameters.
+        # For whatever reason, the multidict was always empty
         match = re.match(r"/organization/([-a-z0-9]+)/draft.json", request.path)
 
         # If user is not editor or admin of the organization then don't allow edi download
@@ -552,8 +560,8 @@ def get_packages(self, owner_org):
         packages = self.get_all_group_packages(group_id=owner_org)
         # get packages for sub-agencies.
         sub_agency = model.Group.get(owner_org)
-        if 'sub-agencies' in sub_agency.extras.col.target and \
-                        sub_agency.extras.col.target['sub-agencies'].state == 'active':
+        if 'sub-agencies' in sub_agency.extras.col.target \
+                and sub_agency.extras.col.target['sub-agencies'].state == 'active':
             sub_agencies = sub_agency.extras.col.target['sub-agencies'].value
             sub_agencies_list = sub_agencies.split(",")
             for sub in sub_agencies_list:
@@ -575,7 +583,8 @@ def get_all_group_packages(self, group_id):
 
     def is_valid(self, instance):
         """
-        Validates a data.json entry against the project open data's JSON schema. Log a warning message on validation error
+        Validates a data.json entry against the project open data's JSON schema.
+        Log a warning message on validation error
         """
         error = best_match(validator.iter_errors(instance))
         if error:
@@ -638,12 +647,8 @@ def get_validator():
     from jsonschema import Draft4Validator, FormatChecker
 
     schema_path = os.path.join(os.path.dirname(__file__), 'pod_schema', 'federal-v1.1', 'dataset.json')
-    with open(schema_path, 'r') as file:
-        schema = json.loads(file.read())
+    with open(schema_path, 'r') as schema:
+        schema = json.loads(schema.read())
         return Draft4Validator(schema, format_checker=FormatChecker())
 
-    logger.warn('Unable to create validator')
-    return None
-
-
 validator = get_validator()
\ No newline at end of file

From 3658c5dc205c626db3cf5e85bda27148578ad1a8 Mon Sep 17 00:00:00 2001
From: Alex Perfilov <alexandr.perfilov@reisystems.com>
Date: Tue, 24 Mar 2015 10:08:34 -0400
Subject: [PATCH 22/22] allow [[REDACTED*] values to export

---
 ckanext/datajson/build_datajson.py    |  19 +++--
 ckanext/datajson/datajsonvalidator.py | 117 ++++++++++++++++----------
 2 files changed, 85 insertions(+), 51 deletions(-)

diff --git a/ckanext/datajson/build_datajson.py b/ckanext/datajson/build_datajson.py
index c65aa6ee..f8a704cf 100644
--- a/ckanext/datajson/build_datajson.py
+++ b/ckanext/datajson/build_datajson.py
@@ -139,7 +139,6 @@ def extension_to_mime_type(file_ext):
 
 
 class JsonExportBuilder:
-
     def __init__(self):
         global currentPackageOrg
         currentPackageOrg = None
@@ -357,6 +356,8 @@ def generate_distribution(package):
             if 'url' in rkeys:
                 res_url = JsonExportBuilder.strip_if_string(r.get('url'))
                 if res_url:
+                    res_url = res_url.replace('http://[[REDACTED', '[[REDACTED')
+                    res_url = res_url.replace('http://http', 'http')
                     if 'api' == r.get('resource_type') or 'accessurl' == r.get('resource_type'):
                         resource += [("accessURL", res_url)]
                     else:
@@ -417,18 +418,24 @@ def get_contact_point(extras):
             if required_field not in extras.keys():
                 raise KeyError(required_field)
 
-        email = JsonExportBuilder.strip_if_string(extras['contact_email'])
-        if email is None or '@' not in email:
-            raise KeyError('contact_email')
-
         fn = JsonExportBuilder.strip_if_string(extras['contact_name'])
         if fn is None:
             raise KeyError('contact_name')
 
+        email = JsonExportBuilder.strip_if_string(extras['contact_email'])
+        if email is None:
+            raise KeyError('contact_email')
+
+        if '[[REDACTED' not in email:
+            if '@' not in email:
+                raise KeyError('contact_email')
+            else:
+                email = 'mailto:' + email
+
         contact_point = OrderedDict([
             ('@type', 'vcard:Contact'),  # optional
             ('fn', fn),  # required
-            ('hasEmail', 'mailto:' + email),  # required
+            ('hasEmail', email),  # required
         ])
         return contact_point
 
diff --git a/ckanext/datajson/datajsonvalidator.py b/ckanext/datajson/datajsonvalidator.py
index cd61d6a1..28739f9e 100644
--- a/ckanext/datajson/datajsonvalidator.py
+++ b/ckanext/datajson/datajsonvalidator.py
@@ -76,13 +76,19 @@
     r'(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)))$'
 )
 
+REDACTED_REGEX = re.compile(
+    r'^(\[\[REDACTED).*?(\]\])$'
+)
+
 # load the OMB bureau codes on first load of this module
-import urllib, csv
+import urllib
+import csv
 
 omb_burueau_codes = set()
 for row in csv.DictReader(urllib.urlopen("https://project-open-data.cio.gov/data/omb_bureau_codes.csv")):
     omb_burueau_codes.add(row["Agency Code"] + ":" + row["Bureau Code"])
 
+
 # main function for validation
 def do_validation(doc, errors_array):
     errs = {}
@@ -136,14 +142,15 @@ def do_validation(doc, errors_array):
 
                 # contactPoint - hasEmail # required
                 if check_required_string_field(cp, "hasEmail", 9, dataset_name, errs):
-                    import lepl.apps.rfc3696
+                    if not is_redacted(cp.get('hasEmail')):
+                        import lepl.apps.rfc3696
 
-                    email_validator = lepl.apps.rfc3696.Email()
-                    email = cp["hasEmail"].replace('mailto:', '')
-                    if not email_validator(email):
-                        add_error(errs, 5, "Invalid Required Field Value",
-                                  "The email address \"%s\" is not a valid email address." % email,
-                                  dataset_name)
+                        email_validator = lepl.apps.rfc3696.Email()
+                        email = cp["hasEmail"].replace('mailto:', '')
+                        if not email_validator(email):
+                            add_error(errs, 5, "Invalid Required Field Value",
+                                      "The email address \"%s\" is not a valid email address." % email,
+                                      dataset_name)
 
             # description # required
             check_required_string_field(item, "description", 1, dataset_name, errs)
@@ -158,8 +165,9 @@ def do_validation(doc, errors_array):
 
             # keyword # required
             if isinstance(item.get("keyword"), (str, unicode)):
-                add_error(errs, 5, "Update Your File!",
-                          "The keyword field used to be a string but now it must be an array.", dataset_name)
+                if not is_redacted(item.get("keyword")):
+                    add_error(errs, 5, "Update Your File!",
+                              "The keyword field used to be a string but now it must be an array.", dataset_name)
             elif check_required_field(item, "keyword", list, dataset_name, errs):
                 for kw in item["keyword"]:
                     if not isinstance(kw, (str, unicode)):
@@ -171,7 +179,8 @@ def do_validation(doc, errors_array):
 
             # modified # required
             if check_required_string_field(item, "modified", 1, dataset_name, errs):
-                if not MODIFIED_REGEX_1.match(item['modified']) \
+                if not is_redacted(item['modified']) \
+                        and not MODIFIED_REGEX_1.match(item['modified']) \
                         and not MODIFIED_REGEX_2.match(item['modified']) \
                         and not MODIFIED_REGEX_3.match(item['modified']):
                     add_error(errs, 5, "Invalid Required Field Value",
@@ -195,8 +204,8 @@ def do_validation(doc, errors_array):
             # Required-If-Applicable
 
             # dataQuality # Required-If-Applicable
-            if item.get("dataQuality") is None:
-                pass  # not required
+            if item.get("dataQuality") is None or is_redacted(item.get("dataQuality")):
+                pass  # not required or REDACTED
             elif not isinstance(item["dataQuality"], bool):
                 add_error(errs, 50, "Invalid Field Value (Optional Fields)",
                           "The field 'dataQuality' must be true or false, "
@@ -207,35 +216,42 @@ def do_validation(doc, errors_array):
             if item.get("distribution") is None:
                 pass  # not required
             elif not isinstance(item["distribution"], list):
-                add_error(errs, 50, "Invalid Field Value (Optional Fields)",
-                          "The field 'distribution' must be an array, if present.", dataset_name)
+                if isinstance(item["distribution"], (str, unicode)) and is_redacted(item.get("distribution")):
+                    pass
+                else:
+                    add_error(errs, 50, "Invalid Field Value (Optional Fields)",
+                              "The field 'distribution' must be an array, if present.", dataset_name)
             else:
                 for j, dt in enumerate(item["distribution"]):
+                    if isinstance(dt, (str, unicode)):
+                        if is_redacted(dt):
+                            continue
                     distribution_name = dataset_name + (" distribution %d" % (j + 1))
                     # distribution - downloadURL # Required-If-Applicable
-                    check_url_field(False, dt, "downloadURL", distribution_name, errs)
+                    check_url_field(False, dt, "downloadURL", distribution_name, errs, True)
 
                     # distribution - mediaType # Required-If-Applicable
                     if 'downloadURL' in dt:
                         if check_required_string_field(dt, "mediaType", 1, distribution_name, errs):
-                            if not IANA_MIME_REGEX.match(dt["mediaType"]):
+                            if not IANA_MIME_REGEX.match(dt["mediaType"]) \
+                                    and not is_redacted(dt["mediaType"]):
                                 add_error(errs, 5, "Invalid Field Value",
                                           "The distribution mediaType \"%s\" is invalid. "
                                           "It must be in IANA MIME format." % dt["mediaType"],
                                           distribution_name)
 
                     # distribution - accessURL # optional
-                    check_url_field(False, dt, "accessURL", distribution_name, errs)
+                    check_url_field(False, dt, "accessURL", distribution_name, errs, True)
 
                     # distribution - conformsTo # optional
-                    check_url_field(False, dt, "conformsTo", distribution_name, errs)
+                    check_url_field(False, dt, "conformsTo", distribution_name, errs, True)
 
                     # distribution - describedBy # optional
-                    check_url_field(False, dt, "describedBy", distribution_name, errs)
+                    check_url_field(False, dt, "describedBy", distribution_name, errs, True)
 
                     # distribution - describedByType # optional
-                    if dt.get("describedByType") is None:
-                        pass  # not required
+                    if dt.get("describedByType") is None or is_redacted(dt.get("describedByType")):
+                        pass  # not required or REDACTED
                     elif not IANA_MIME_REGEX.match(dt["describedByType"]):
                         add_error(errs, 5, "Invalid Field Value",
                                   "The describedByType \"%s\" is invalid. "
@@ -255,7 +271,7 @@ def do_validation(doc, errors_array):
                         check_required_string_field(dt, "title", 1, distribution_name, errs)
 
             # license # Required-If-Applicable
-            check_url_field(False, item, "license", dataset_name, errs)
+            check_url_field(False, item, "license", dataset_name, errs, True)
 
             # rights # Required-If-Applicable
             # TODO move to warnings
@@ -269,8 +285,8 @@ def do_validation(doc, errors_array):
                           "The field 'spatial' must be a string value if specified.", dataset_name)
 
             # temporal # Required-If-Applicable
-            if item.get("temporal") is None:
-                pass  # not required
+            if item.get("temporal") is None or is_redacted(item.get("temporal")):
+                pass  # not required or REDACTED
             elif not isinstance(item["temporal"], (str, unicode)):
                 add_error(errs, 10, "Invalid Field Value (Optional Fields)",
                           "The field 'temporal' must be a string value if specified.", dataset_name)
@@ -286,19 +302,20 @@ def do_validation(doc, errors_array):
             # Expanded Fields
 
             # accrualPeriodicity # optional
-            if item.get("accrualPeriodicity") not in ACCRUAL_PERIODICITY_VALUES:
+            if item.get("accrualPeriodicity") not in ACCRUAL_PERIODICITY_VALUES \
+                    and not is_redacted(item.get("accrualPeriodicity")):
                 add_error(errs, 50, "Invalid Field Value (Optional Fields)",
                           "The field 'accrualPeriodicity' had an invalid value.", dataset_name)
 
             # conformsTo # optional
-            check_url_field(False, item, "conformsTo", dataset_name, errs)
+            check_url_field(False, item, "conformsTo", dataset_name, errs, True)
 
             # describedBy # optional
-            check_url_field(False, item, "describedBy", dataset_name, errs)
+            check_url_field(False, item, "describedBy", dataset_name, errs, True)
 
             # describedByType # optional
-            if item.get("describedByType") is None:
-                pass  # not required
+            if item.get("describedByType") is None or is_redacted(item.get("describedByType")):
+                pass  # not required or REDACTED
             elif not IANA_MIME_REGEX.match(item["describedByType"]):
                 add_error(errs, 5, "Invalid Field Value",
                           "The describedByType \"%s\" is invalid. "
@@ -310,29 +327,29 @@ def do_validation(doc, errors_array):
                 check_required_string_field(item, "isPartOf", 1, dataset_name, errs)
 
             # issued # optional
-            if item.get("issued") is not None:
+            if item.get("issued") is not None and not is_redacted(item.get("issued")):
                 if not ISSUED_REGEX.match(item['issued']):
                     add_error(errs, 50, "Invalid Field Value (Optional Fields)",
                               "The field 'issued' is not in a valid format.", dataset_name)
 
             # landingPage # optional
-            check_url_field(False, item, "landingPage", dataset_name, errs)
+            check_url_field(False, item, "landingPage", dataset_name, errs, True)
 
             # language # optional
-            if item.get("language") is None:
-                pass  # not required
+            if item.get("language") is None or is_redacted(item.get("language")):
+                pass  # not required or REDACTED
             elif not isinstance(item["language"], list):
                 add_error(errs, 50, "Invalid Field Value (Optional Fields)",
                           "The field 'language' must be an array, if present.", dataset_name)
             else:
                 for s in item["language"]:
-                    if not LANGUAGE_REGEX.match(s):
+                    if not LANGUAGE_REGEX.match(s) and not is_redacted(s):
                         add_error(errs, 50, "Invalid Field Value (Optional Fields)",
                                   "The field 'language' had an invalid language: \"%s\"" % s, dataset_name)
 
             # PrimaryITInvestmentUII # optional
-            if item.get("PrimaryITInvestmentUII") is None:
-                pass  # not required
+            if item.get("PrimaryITInvestmentUII") is None or is_redacted(item.get("PrimaryITInvestmentUII")):
+                pass  # not required or REDACTED
             elif not PRIMARY_IT_INVESTMENT_UII_REGEX.match(item["PrimaryITInvestmentUII"]):
                 add_error(errs, 50, "Invalid Field Value (Optional Fields)",
                           "The field 'PrimaryITInvestmentUII' must be a string "
@@ -340,13 +357,16 @@ def do_validation(doc, errors_array):
 
             # references # optional
             if item.get("references") is None:
-                pass  # not required
+                pass  # not required or REDACTED
             elif not isinstance(item["references"], list):
-                add_error(errs, 50, "Invalid Field Value (Optional Fields)",
-                          "The field 'references' must be an array, if present.", dataset_name)
+                if isinstance(item["references"], (str, unicode)) and is_redacted(item.get("references")):
+                    pass
+                else:
+                    add_error(errs, 50, "Invalid Field Value (Optional Fields)",
+                              "The field 'references' must be an array, if present.", dataset_name)
             else:
                 for s in item["references"]:
-                    if not URL_REGEX.match(s):
+                    if not URL_REGEX.match(s) and not is_redacted(s):
                         add_error(errs, 50, "Invalid Field Value (Optional Fields)",
                                   "The field 'references' had an invalid URL: \"%s\"" % s, dataset_name)
 
@@ -354,8 +374,8 @@ def do_validation(doc, errors_array):
             check_url_field(False, item, "systemOfRecords", dataset_name, errs)
 
             # theme #optional
-            if item.get("theme") is None:
-                pass  # not required
+            if item.get("theme") is None or is_redacted(item.get("theme")):
+                pass  # not required or REDACTED
             elif not isinstance(item["theme"], list):
                 add_error(errs, 50, "Invalid Field Value (Optional Fields)", "The field 'theme' must be an array.",
                           dataset_name)
@@ -374,7 +394,7 @@ def do_validation(doc, errors_array):
             err_type[1],  # heading
             [err_item + (" (%d locations)" % len(errs[err_type][err_item]) if len(errs[err_type][err_item]) else "")
              for err_item in sorted(errs[err_type], key=lambda x: (-len(errs[err_type][x]), x))
-            ]))
+             ]))
 
 
 def add_error(errs, severity, heading, description, context=None):
@@ -426,11 +446,18 @@ def check_required_string_field(obj, field_name, min_length, dataset_name, errs)
     return True
 
 
-def check_url_field(required, obj, field_name, dataset_name, errs):
+def is_redacted(field):
+    if isinstance(field, (str, unicode)) and REDACTED_REGEX.match(field):
+        return True
+    return False
+
+
+def check_url_field(required, obj, field_name, dataset_name, errs, allow_redacted=False):
     # checks that a required or optional field, if specified, looks like a URL
     if not required and (field_name not in obj or obj[field_name] is None): return True  # not required, so OK
     if not check_required_field(obj, field_name, (str, unicode), dataset_name,
                                 errs): return False  # just checking data type
+    if allow_redacted and is_redacted(obj[field_name]): return True
     if not URL_REGEX.match(obj[field_name]):
         add_error(errs, 5, "Invalid Required Field Value",
                   "The '%s' field has an invalid URL: \"%s\"." % (field_name, obj[field_name]), dataset_name)