diff --git a/metadata_catalogue/datasets/libs/harvesters.py b/metadata_catalogue/datasets/libs/harvesters.py index 6e86dca..6d7ad92 100644 --- a/metadata_catalogue/datasets/libs/harvesters.py +++ b/metadata_catalogue/datasets/libs/harvesters.py @@ -27,7 +27,6 @@ def handle_file_as_darwincore_zip(file: NamedTemporaryFile, dataset: Dataset): def harvest_dataset(dataset_id: int): - print(dataset_id) try: dataset = Dataset.objects.get(id=dataset_id) dataset.set_fetch_message("", success=False) diff --git a/metadata_catalogue/datasets/libs/ipt.py b/metadata_catalogue/datasets/libs/ipt.py index e803034..35f038e 100644 --- a/metadata_catalogue/datasets/libs/ipt.py +++ b/metadata_catalogue/datasets/libs/ipt.py @@ -18,7 +18,11 @@ def rss_to_datasets(rss_content): archive = item.find("ipt:dwca") if archive: d, _ = Dataset.objects.update_or_create( - defaults={"name": item.find("title").text, "fetch_type": Dataset.FetchType.DARWINCORE}, + defaults={ + "name": item.find("title").text, + "source": item.find("link").text, + "fetch_type": Dataset.FetchType.DARWINCORE, + }, fetch_url=archive.text, ) async_task("metadata_catalogue.datasets.libs.harvesters.harvest_dataset", d.id) diff --git a/metadata_catalogue/datasets/migrations/0003_dataset_source.py b/metadata_catalogue/datasets/migrations/0003_dataset_source.py new file mode 100644 index 0000000..0050d5b --- /dev/null +++ b/metadata_catalogue/datasets/migrations/0003_dataset_source.py @@ -0,0 +1,17 @@ +# Generated by Django 4.2.6 on 2023-10-31 13:18 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("datasets", "0002_taxonomy_common_alter_taxonomy_name_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="dataset", + name="source", + field=models.URLField(blank=True, null=True), + ), + ] diff --git a/metadata_catalogue/datasets/migrations/0004_dataset_last_fetch_at_dataset_uuid_and_more.py b/metadata_catalogue/datasets/migrations/0004_dataset_last_fetch_at_dataset_uuid_and_more.py new file mode 100644 index 0000000..b290447 --- /dev/null +++ b/metadata_catalogue/datasets/migrations/0004_dataset_last_fetch_at_dataset_uuid_and_more.py @@ -0,0 +1,39 @@ +# Generated by Django 4.2.6 on 2023-11-01 07:29 + +from django.db import migrations, models +import uuid + + +def create_uuid(apps, schema_editor): + Dataset = apps.get_model('datasets', 'Dataset') + for d in Dataset.objects.all(): + d.uuid = uuid.uuid4() + d.save() + +class Migration(migrations.Migration): + dependencies = [ + ("datasets", "0003_dataset_source"), + ] + + operations = [ + migrations.AddField( + model_name="dataset", + name="last_fetch_at", + field=models.DateTimeField(blank=True, null=True), + ), + migrations.AddField( + model_name="dataset", + name="uuid", + field=models.UUIDField(null=True), + ), + migrations.RunPython(create_uuid), + migrations.AlterField( + model_name="dataset", + name="uuid", + field=models.UUIDField(default=uuid.uuid4), + ), + migrations.AddConstraint( + model_name="dataset", + constraint=models.UniqueConstraint(fields=("uuid",), name="unique_dataset_uuid"), + ), + ] diff --git a/metadata_catalogue/datasets/models.py b/metadata_catalogue/datasets/models.py index 69bf180..6078a6d 100644 --- a/metadata_catalogue/datasets/models.py +++ b/metadata_catalogue/datasets/models.py @@ -1,6 +1,9 @@ +import uuid + from django.contrib.gis.db import models from django.db.models import Value from django.db.models.functions import Coalesce +from django.utils.timezone import now from django.utils.translation import gettext_lazy as _ from metadata_catalogue.core.fields import AutoOneToOneField @@ -11,6 +14,8 @@ class FetchType(models.IntegerChoices): DARWINCORE = 1, "DarwinCORE" name = models.CharField(max_length=250, verbose_name=_("Internal name")) + uuid = models.UUIDField(default=uuid.uuid4) + source = models.URLField(null=True, blank=True) fetch_url = models.URLField(verbose_name=_("URL of the resource to fetch"), null=True, blank=True) fetch_type = models.IntegerField(choices=FetchType.choices, null=True, blank=True) created_at = models.DateTimeField(auto_now_add=True, verbose_name=_("Created at")) @@ -34,6 +39,7 @@ class FetchType(models.IntegerChoices): ) fetch_success = models.BooleanField(default=False) fetch_message = models.TextField(null=True, blank=True) + last_fetch_at = models.DateTimeField(null=True, blank=True) def __str__(self): return self.name @@ -47,6 +53,7 @@ def set_fetch_message(self, message, *args, append=False, success=None, logger_f if success is not None: self.fetch_success = success + self.last_fetch_at = now() if logger_fn: logger_fn(message) @@ -55,7 +62,10 @@ def set_fetch_message(self, message, *args, append=False, success=None, logger_f class Meta: verbose_name = _("Dataset") - constraints = [models.UniqueConstraint(name="unique_dataset_source", fields=["fetch_url"])] + constraints = [ + models.UniqueConstraint(name="unique_dataset_source", fields=["fetch_url"]), + models.UniqueConstraint(name="unique_dataset_uuid", fields=["uuid"]), + ] class Keyword(models.Model):