Module fusion_platform.models.data_file

Data file model class file.

author: Matthew Casey

© Digital Content Analysis Technology Ltd

Classes

class DataFile (session)
Expand source code
class DataFile(Model):
    """
    DataFile model class providing attributes and methods to manipulate data file details.
    """

    # Override the schema.
    _SCHEMA = DataFileSchema()

    # Override the base model class name.
    _BASE_MODEL_CLASS_NAME = 'Data'  # A string to prevent circular imports.

    # No standard model paths as data file objects cannot be retrieved directly from the API.

    # Add in the custom model paths.
    _PATH_DOWNLOAD_FILE = '/organisations/{organisation_id}/data/{data_id}/download_file/{file_id}'

    # Plus query parameters.
    _QUERY_PREVIEW = 'preview'

    # Geospatial file types.
    _GEOSPATIAL_RASTER = 'raster'
    _GEOSPATIAL_VECTOR = 'vector'

    # File type information - a tuple of typical extension and whether the file type is a geospatial vector, raster or not.
    # @formatter:off
    _FILE_TYPES = {
        fusion_platform.FILE_TYPE_GEOTIFF: ('tif', _GEOSPATIAL_RASTER),
        fusion_platform.FILE_TYPE_JPEG2000: ('jpeg', _GEOSPATIAL_RASTER),
        fusion_platform.FILE_TYPE_DEM: ('dem', _GEOSPATIAL_RASTER),
        fusion_platform.FILE_TYPE_GEOJSON: ('json', _GEOSPATIAL_VECTOR),
        fusion_platform.FILE_TYPE_KML: ('kml', _GEOSPATIAL_VECTOR),
        fusion_platform.FILE_TYPE_KMZ: ('kmz', _GEOSPATIAL_VECTOR),
        fusion_platform.FILE_TYPE_CSV: ('csv', None),
        fusion_platform.FILE_TYPE_ESRI_SHAPEFILE: ('zip', _GEOSPATIAL_VECTOR),
        fusion_platform.FILE_TYPE_JPEG: ('jpg', None),
        fusion_platform.FILE_TYPE_PNG: ('png', None),
        fusion_platform.FILE_TYPE_PDF: ('pdf', None),
        fusion_platform.FILE_TYPE_GZIP: ('gz', None),
        fusion_platform.FILE_TYPE_ZIP: ('zip', None),
        fusion_platform.FILE_TYPE_OTHER: ('*', None),
    }
    # @formatter:on

    # STAC file name constants. We do not use ".json" because this is used by services and specific file types. Similarly, the name starts with "." to hide it.
    _STAC_COLLECTION_FILE_NAME = '.collection.stac'
    _STAC_ITEM_FILE_NAME_FORMAT = '.{file_name}.stac'

    def __init__(self, session):
        """
        Initialises the object.

        Args:
            session: The linked session object for interfacing with the Fusion Platform<sup>&reg;</sup>.
        """
        super(DataFile, self).__init__(session)

        # Initialise the fields.
        self.__download_progress = None
        self.__download_thread = None

    def download(self, path, preview=False, wait=False):
        """
        Downloads the file to the specified path. Optionally waits for the download to complete.

        Args:
            path: The local path to download the file to.
            preview: Optionally specify that the preview (PNG) of the file should be downloaded instead of the main file.
            wait: Optionally wait for the download to complete? Default False.

        Raises:
            RequestError: if the download fails.
        """
        # Make sure no download is currently in progress.
        if self.__download_thread is not None:
            raise ModelError(i18n.t('models.data_file.download_already_in_progress'))

        # Obtain the download URL.
        url = self.download_url(preview=preview)

        # Start the download in a separate thread.
        self.__download_progress = (url, path, 0)
        self.__download_thread = RaiseThread(target=self._session.download_file, args=(url, path, self.__download_callback))
        self.__download_thread.start()

        # Optionally wait for completion.
        self.download_complete(wait=wait)  # Ignore response.

    def __download_callback(self, url, destination, size):
        """
        Callback method used to receive progress from download. Updates the download progress.

        Args:
            url: The URL to download as a file.
            destination: The destination file path.
            size: The total size in bytes so far downloaded.
        """
        self.__download_progress = (url, destination, size)

    def download_complete(self, wait=False):
        """
        Checks whether the download has completed. If an error has occurred during the download, then this will raise a corresponding exception. Optionally waits
        for the download to complete.

        Args:
            wait: Optionally wait for the download to complete? Default False.

        Returns:
            True if the download is complete.

        Raises:
            RequestError: if any request fails.
            ModelError: if no download is in progress.
        """
        # Make sure a download is in progress.
        if self.__download_thread is None:
            raise ModelError(i18n.t('models.data_file.no_download'))

        # Check the download thread. This will raise an exception if an error has occurred.
        finished = False

        try:
            # Join will return immediately because of the timeout, but will raise an exception if something has gone wrong.
            self.__download_thread.join(timeout=None if wait else 0)

            # Check if the thread is still running after the join.
            finished = not self.__download_thread.is_alive()

        except:
            # Something went wrong. Make sure we mark the download as finished and re-raise the error.
            finished = True
            raise

        finally:
            # Make sure we clear the progress and thread if it has finished.
            if finished:
                self.__download_progress = None
                self.__download_thread = None

        return finished

    def download_progress(self):
        """
        Returns current download progress.

        Returns:
            A tuple of the URL, destination and total number of bytes downloaded so far.

        Raises:
            ModelError: if no download is in progress.
        """
        # Make sure a download is in progress.
        if self.__download_thread is None:
            raise ModelError(i18n.t('models.data_file.no_download'))

        return self.__download_progress

    def download_url(self, preview=False):
        """
        Obtains a URL which can be used to download the file. This URL is only valid for up to 1 hour.

        Args:
            preview: Optionally specify that the preview (PNG) URL for the file should be obtained instead of the URL for the main file.

        Raises:
            RequestError: if the request fails.
        """
        response = self._session.request(path=self._get_path(self.__class__._PATH_DOWNLOAD_FILE, file_id=self.file_id, organisation_id=self.organisation_id),
                                         query_parameters={DataFile._QUERY_PREVIEW: preview})

        # Assume that the URL held within the expected key within the resulting dictionary.
        url = dict_nested_get(response, [self.__class__._RESPONSE_KEY_EXTRAS, self.__class__._RESPONSE_KEY_URL])

        if url is None:
            raise ModelError(i18n.t('models.data_file.failed_download_url'))

        return url

    def get_stac_item(self, item_file_name_format=_STAC_ITEM_FILE_NAME_FORMAT, collection_file_name=_STAC_COLLECTION_FILE_NAME):
        """
        Converts the data file representation into a STAC item.

        Args:
            item_file_name_format: The optional item file name format string, where the "file_name" placeholder is replaced with the item's file name.
            collection_file_name: The optional owning collection file name.

        Returns:
            A tuple of the STAC item definition as a dictionary and the item file name used in the definition.
        """

        # Extract the STAC item from the model.
        stac_item = self.stac_item if hasattr(self, Model._FIELD_STAC_ITEM) else None
        stac_item_file = self.stac_item_file if hasattr(self, Model._FIELD_STAC_ITEM_FILE) else None

        return stac_item, stac_item_file

DataFile model class providing attributes and methods to manipulate data file details.

Initialises the object.

Args

session
The linked session object for interfacing with the Fusion Platform®.

Ancestors

  • Model
  • fusion_platform.base.Base

Instance variables

prop attributes

Inherited from: Model.attributes

Returns

The model attributes as a dictionary.

Methods

def delete(self)

Inherited from: Model.delete

Attempts to delete the model object. This assumes the model is deleted using a DELETE RESTful request …

def download(self, path, preview=False, wait=False)
Expand source code
def download(self, path, preview=False, wait=False):
    """
    Downloads the file to the specified path. Optionally waits for the download to complete.

    Args:
        path: The local path to download the file to.
        preview: Optionally specify that the preview (PNG) of the file should be downloaded instead of the main file.
        wait: Optionally wait for the download to complete? Default False.

    Raises:
        RequestError: if the download fails.
    """
    # Make sure no download is currently in progress.
    if self.__download_thread is not None:
        raise ModelError(i18n.t('models.data_file.download_already_in_progress'))

    # Obtain the download URL.
    url = self.download_url(preview=preview)

    # Start the download in a separate thread.
    self.__download_progress = (url, path, 0)
    self.__download_thread = RaiseThread(target=self._session.download_file, args=(url, path, self.__download_callback))
    self.__download_thread.start()

    # Optionally wait for completion.
    self.download_complete(wait=wait)  # Ignore response.

Downloads the file to the specified path. Optionally waits for the download to complete.

Args

path
The local path to download the file to.
preview
Optionally specify that the preview (PNG) of the file should be downloaded instead of the main file.
wait
Optionally wait for the download to complete? Default False.

Raises

RequestError
if the download fails.
def download_complete(self, wait=False)
Expand source code
def download_complete(self, wait=False):
    """
    Checks whether the download has completed. If an error has occurred during the download, then this will raise a corresponding exception. Optionally waits
    for the download to complete.

    Args:
        wait: Optionally wait for the download to complete? Default False.

    Returns:
        True if the download is complete.

    Raises:
        RequestError: if any request fails.
        ModelError: if no download is in progress.
    """
    # Make sure a download is in progress.
    if self.__download_thread is None:
        raise ModelError(i18n.t('models.data_file.no_download'))

    # Check the download thread. This will raise an exception if an error has occurred.
    finished = False

    try:
        # Join will return immediately because of the timeout, but will raise an exception if something has gone wrong.
        self.__download_thread.join(timeout=None if wait else 0)

        # Check if the thread is still running after the join.
        finished = not self.__download_thread.is_alive()

    except:
        # Something went wrong. Make sure we mark the download as finished and re-raise the error.
        finished = True
        raise

    finally:
        # Make sure we clear the progress and thread if it has finished.
        if finished:
            self.__download_progress = None
            self.__download_thread = None

    return finished

Checks whether the download has completed. If an error has occurred during the download, then this will raise a corresponding exception. Optionally waits for the download to complete.

Args

wait
Optionally wait for the download to complete? Default False.

Returns

True if the download is complete.

Raises

RequestError
if any request fails.
ModelError
if no download is in progress.
def download_progress(self)
Expand source code
def download_progress(self):
    """
    Returns current download progress.

    Returns:
        A tuple of the URL, destination and total number of bytes downloaded so far.

    Raises:
        ModelError: if no download is in progress.
    """
    # Make sure a download is in progress.
    if self.__download_thread is None:
        raise ModelError(i18n.t('models.data_file.no_download'))

    return self.__download_progress

Returns current download progress.

Returns

A tuple of the URL, destination and total number of bytes downloaded so far.

Raises

ModelError
if no download is in progress.
def download_url(self, preview=False)
Expand source code
def download_url(self, preview=False):
    """
    Obtains a URL which can be used to download the file. This URL is only valid for up to 1 hour.

    Args:
        preview: Optionally specify that the preview (PNG) URL for the file should be obtained instead of the URL for the main file.

    Raises:
        RequestError: if the request fails.
    """
    response = self._session.request(path=self._get_path(self.__class__._PATH_DOWNLOAD_FILE, file_id=self.file_id, organisation_id=self.organisation_id),
                                     query_parameters={DataFile._QUERY_PREVIEW: preview})

    # Assume that the URL held within the expected key within the resulting dictionary.
    url = dict_nested_get(response, [self.__class__._RESPONSE_KEY_EXTRAS, self.__class__._RESPONSE_KEY_URL])

    if url is None:
        raise ModelError(i18n.t('models.data_file.failed_download_url'))

    return url

Obtains a URL which can be used to download the file. This URL is only valid for up to 1 hour.

Args

preview
Optionally specify that the preview (PNG) URL for the file should be obtained instead of the URL for the main file.

Raises

RequestError
if the request fails.
def get(self, **kwargs)

Inherited from: Model.get

Gets the model object by loading it from the Fusion Platform®. Uses the model's current id and base model id for the get unless …

def get_stac_item(self,
item_file_name_format='.{file_name}.stac',
collection_file_name='.collection.stac')
Expand source code
def get_stac_item(self, item_file_name_format=_STAC_ITEM_FILE_NAME_FORMAT, collection_file_name=_STAC_COLLECTION_FILE_NAME):
    """
    Converts the data file representation into a STAC item.

    Args:
        item_file_name_format: The optional item file name format string, where the "file_name" placeholder is replaced with the item's file name.
        collection_file_name: The optional owning collection file name.

    Returns:
        A tuple of the STAC item definition as a dictionary and the item file name used in the definition.
    """

    # Extract the STAC item from the model.
    stac_item = self.stac_item if hasattr(self, Model._FIELD_STAC_ITEM) else None
    stac_item_file = self.stac_item_file if hasattr(self, Model._FIELD_STAC_ITEM_FILE) else None

    return stac_item, stac_item_file

Converts the data file representation into a STAC item.

Args

item_file_name_format
The optional item file name format string, where the "file_name" placeholder is replaced with the item's file name.
collection_file_name
The optional owning collection file name.

Returns

A tuple of the STAC item definition as a dictionary and the item file name used in the definition.

def to_csv(self, exclude=None)

Inherited from: Model.to_csv

Converts the model attributes into a CSV string …

def update(self, **kwargs)

Inherited from: Model.update

Attempts to update the model object with the given values. For models which have not been persisted, the relevant fields are updated without …

class DataFileSchema (*,
only: types.StrSequenceOrSet | None = None,
exclude: types.StrSequenceOrSet = (),
many: bool | None = None,
load_only: types.StrSequenceOrSet = (),
dump_only: types.StrSequenceOrSet = (),
partial: bool | types.StrSequenceOrSet | None = None,
unknown: types.UnknownOption | None = None)
Expand source code
class DataFileSchema(Schema):
    """
    Schema class for data file model.

    Each data file model has the following fields (and nested fields):

    .. include::data_file.md
    """
    id = fields.UUID(required=True, metadata={'read_only': True})  # Changed to prevent this being updated.

    created_at = fields.DateTime(required=True, metadata={'read_only': True})  # Changed to prevent this being updated.
    updated_at = fields.DateTime(required=True, metadata={'read_only': True})  # Changed to prevent this being updated.

    organisation_id = fields.UUID(allow_none=True, metadata={'read_only': True, 'title': i18n.t('models.data_file.organisation_id.title'), 'description': i18n.t(
        'models.data_file.organisation_id.description')})  # Added parameter so that it can be used.
    data_id = fields.UUID(required=True, metadata={'read_only': True})  # Changed to prevent this being updated.
    # Removed organisation_id_file_type.

    file_id = fields.UUID(required=True, metadata={'read_only': True})  # Changed to prevent this being updated.
    preview_file_id = fields.UUID(allow_none=True, metadata={'read_only': True})  # Changed to prevent this being updated.
    file_type = fields.String(required=True, metadata={'read_only': True})  # Changed to prevent this being updated.
    file_name = fields.String(allow_none=True, metadata={'read_only': True})  # Changed to prevent this being updated.

    resolution = fields.Integer(allow_none=True, metadata={'read_only': True})  # Changed to prevent this being updated.
    crs = fields.String(allow_none=True, metadata={'read_only': True})  # Changed to prevent this being updated.
    bounds = fields.List(fields.Decimal(required=True), allow_none=True, metadata={'read_only': True})  # Changed to prevent this being updated.
    area = fields.Decimal(allow_none=True, metadata={'read_only': True})  # Changed to prevent this being updated.
    length = fields.Decimal(allow_none=True, metadata={'read_only': True})  # Changed to prevent this being updated.
    points = fields.Integer(allow_none=True, metadata={'read_only': True})  # Changed to prevent this being updated.
    lines = fields.Integer(allow_none=True, metadata={'read_only': True})  # Changed to prevent this being updated.
    polygons = fields.Integer(allow_none=True, metadata={'read_only': True})  # Changed to prevent this being updated.

    size = fields.Integer(allow_none=True, metadata={'read_only': True})  # Changed to prevent this being updated.
    error = fields.Boolean(allow_none=True, metadata={'read_only': True})  # Changed to prevent this being updated.
    publishable = fields.Boolean(allow_none=True, metadata={'read_only': True})  # Changed to prevent this being updated.

    alternative = fields.UUID(allow_none=True, metadata={'read_only': True})  # Changed to prevent this being updated.
    source = fields.UUID(allow_none=True, metadata={'read_only': True})  # Changed to prevent this being updated.

    selectors = fields.List(fields.Nested(DataFileSelectorSchema()), allow_none=True, metadata={'read_only': True})  # Changed to prevent this being updated.

    number_of_ingesters = fields.Integer(allow_none=True, metadata={'read_only': True})  # Changed to prevent this being updated.
    ingesters = fields.Dict(allow_none=True, metadata={'read_only': True})  # Changed to prevent this being updated.

    downloads = fields.Integer(allow_none=True, metadata={'read_only': True})  # Changed to prevent this being updated.
    geojson = fields.String(allow_none=True, metadata={'read_only': True})  # Changed to prevent this being updated.

    # Removed detail.

    title = fields.String(allow_none=True, metadata={'read_only': True})  # Changed to prevent this being updated.
    description = fields.String(allow_none=True, metadata={'read_only': True})  # Changed to prevent this being updated.

    stac_item = fields.Dict(allow_none=True)  # Added pseudo-parameter.
    stac_item_file = fields.String(allow_none=True)  # Added pseudo-parameter.

    class Meta:
        """
        When loading an object, make sure we exclude any unknown fields, rather than raising an exception, and put fields in their definition order.
        """
        unknown = EXCLUDE

Schema class for data file model.

Each data file model has the following fields (and nested fields):

  • id: The unique identifier for the record.
  • created_at: When was the record created?
  • updated_at: When was the record last updated?
  • organisation_id: The owning organisation.
  • data_id: The data item linked to this file record.
  • file_id: The file associated with this data item.
  • preview_file_id: The preview file associated with this data item.
  • file_type: The type of file.
  • file_name: The name of the file.
  • resolution: For raster files, the resolution in metres.
  • crs: The optional coordinate reference system for the file.
  • bounds: The longitude and latitude bounds for the file (west, south, east, north).
  • area: The optional total area covered by the file content in metres squared.
  • length: The optional total length covered by the file content in metres.
  • points: The optional total number of points in the file.
  • lines: The optional total number of lines in the file.
  • polygons: The optional total number of polygons in the file.
  • size: The size of the file in bytes.
  • error: Was there an error encountered during analysis of the file?
  • publishable: Is the file suitable for publishing as it is without optimisation?
  • alternative: The alternative file to use if this file is not publishable.
  • source: If this file is an alternative created from a non-publishable file, then this specifies the source file.
  • selectors: The selectors for the file.
    • selector: The selector to be applied to the file, such as the required raster band or data field.
    • category: The category associated with the selector.
    • data_type: The data type associated with the selector.
    • unit: The optional unit associated with the selector.
    • validation: The optional validation for the selector. This must be supplied for date/time and constrained values.
    • area: The optional area for the selector in metres squared.
    • length: The optional length for the selector in metres.
    • points: The optional total number of points in the selector.
    • lines: The optional total number of lines in the selector.
    • polygons: The optional total number of polygons in the selector.
    • initial_values: The first initial values associated with the selector.
    • minimum: The minimum value associated with the selector values.
    • maximum: The maximum value associated with the selector values.
    • mean: The mean value associated with the selector values.
    • sd: The standard deviation associated with the selector values.
    • histogram_minimum: The histogram maximum value associated with the selector values.
    • histogram_maximum: The histogram maximum value associated with the selector values.
    • histogram: The histogram associated with the selector values.
  • number_of_ingesters: The expected number of ingesters that may be used to analyse an uploaded, publishable file.
  • ingesters: The ingester analysis.
  • downloads: How many times has the file been downloaded?
  • geojson: The content of the associated file if it is a GeoJSON file (of limited size).
  • title: The title for the selector.
  • description: The description of the selector.
  • stac_item: The STAC item associated with this file.
  • stac_item_file: The name of the STAC item file.

Ancestors

  • marshmallow.schema.Schema

Class variables

var OPTIONS_CLASS : type

Defines defaults for marshmallow.Schema.Meta.

var TYPE_MAPPING : dict[type, type[Field]]

The type of the None singleton.

var dict_class : type[dict]

dict() -> new empty dictionary dict(mapping) -> new dictionary initialized from a mapping object's (key, value) pairs dict(iterable) -> new dictionary initialized as if via: d = {} for k, v in iterable: d[k] = v dict(**kwargs) -> new dictionary initialized with the name=value pairs in the keyword argument list. For example: dict(one=1, two=2)

var error_messages : dict[str, str]

The type of the None singleton.