Source code for invenio_files_rest.storage.base

# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2016-2019 CERN.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.

"""File storage base module."""

import hashlib
from calendar import timegm
from functools import partial

from ..errors import FileSizeError, StorageError, UnexpectedFileSizeError
from ..helpers import chunk_size_or_default, compute_checksum, send_stream


def check_sizelimit(size_limit, bytes_written, total_size):
    """Check if size limit was exceeded.

    :param size_limit: The size limit.
    :param bytes_written: The total number of bytes written.
    :param total_size: The total file size.
    :raises invenio_files_rest.errors.UnexpectedFileSizeError: If the bytes
        written exceed the total size.
    :raises invenio_files_rest.errors.FileSizeError: If the bytes
        written are major than the limit size.
    """
    if size_limit is not None and bytes_written > size_limit:
        desc = (
            "File size limit exceeded."
            if isinstance(size_limit, int)
            else size_limit.reason
        )
        raise FileSizeError(description=desc)

    # Never write more than advertised
    if total_size is not None and bytes_written > total_size:
        raise UnexpectedFileSizeError(description="File is bigger than expected.")


def check_size(bytes_written, total_size):
    """Check if expected amounts of bytes have been written.

    :param bytes_written: The total number of bytes written.
    :param total_size: The total file size.
    :raises invenio_files_rest.errors.UnexpectedFileSizeError: If the bytes
        written exceed the total size.
    """
    if total_size and bytes_written < total_size:
        raise UnexpectedFileSizeError(description="File is smaller than expected.")


[docs]class FileStorage(object): """Base class for storage interface to a single file.""" def __init__(self, size=None, modified=None): """Initialize storage object.""" self._size = size self._modified = timegm(modified.timetuple()) if modified else None
[docs] def open(self, mode=None): """Open the file. The caller is responsible for closing the file. """ raise NotImplementedError
[docs] def delete(self): """Delete the file.""" raise NotImplementedError
[docs] def initialize(self, size=0): """Initialize the file on the storage + truncate to the given size.""" raise NotImplementedError
[docs] def save( self, incoming_stream, size_limit=None, size=None, chunk_size=None, progress_callback=None, ): """Save incoming stream to file storage.""" raise NotImplementedError
[docs] def update( self, incoming_stream, seek=0, size=None, chunk_size=None, progress_callback=None, ): """Update part of file with incoming stream.""" raise NotImplementedError
# # Default implementation #
[docs] def send_file( self, filename, mimetype=None, restricted=True, checksum=None, trusted=False, chunk_size=None, as_attachment=False, ): """Send the file to the client.""" try: fp = self.open(mode="rb") except Exception as e: raise StorageError("Could not send file: {}".format(e)) try: md5_checksum = None if checksum: algo, value = checksum.split(":") if algo == "md5": md5_checksum = value # Send stream is responsible for closing the file. return send_stream( fp, filename, self._size, self._modified, mimetype=mimetype, restricted=restricted, etag=checksum, content_md5=md5_checksum, chunk_size=chunk_size, trusted=trusted, as_attachment=as_attachment, ) except Exception as e: fp.close() raise StorageError("Could not send file: {}".format(e))
[docs] def checksum(self, chunk_size=None, progress_callback=None, **kwargs): """Compute checksum of file.""" fp = self.open(mode="rb") try: value = self._compute_checksum( fp, size=self._size, chunk_size=None, progress_callback=progress_callback, ) except StorageError: raise finally: fp.close() return value
[docs] def copy(self, src, chunk_size=None, progress_callback=None): """Copy data from another file instance. :param src: Source stream. :param chunk_size: Chunk size to read from source stream. """ fp = src.open(mode="rb") try: return self.save( fp, chunk_size=chunk_size, progress_callback=progress_callback ) finally: fp.close()
# # Helpers # def _init_hash(self): """Initialize message digest object. Overwrite this method if you want to use different checksum algorithm for your storage backend. """ return "md5", hashlib.md5() def _compute_checksum( self, stream, size=None, chunk_size=None, progress_callback=None, **kwargs ): """Get helper method to compute checksum from a stream. Naive implementation that can be overwritten by subclasses in order to provide more efficient implementation. """ if progress_callback and size: progress_callback = partial(progress_callback, size) else: progress_callback = None try: algo, m = self._init_hash() return compute_checksum( stream, algo, m, chunk_size=chunk_size, progress_callback=progress_callback, **kwargs ) except Exception as e: raise StorageError("Could not compute checksum of file: {0}".format(e)) def _write_stream( self, src, dst, size=None, size_limit=None, chunk_size=None, progress_callback=None, ): """Get helper to save stream from src to dest + compute checksum. :param src: Source stream. :param dst: Destination stream. :param size: If provided, this exact amount of bytes will be written to the destination file. :param size_limit: ``FileSizeLimit`` instance to limit number of bytes to write. """ chunk_size = chunk_size_or_default(chunk_size) algo, m = self._init_hash() bytes_written = 0 while 1: # Check that size limits aren't bypassed check_sizelimit(size_limit, bytes_written, size) chunk = src.read(chunk_size) if not chunk: if progress_callback: progress_callback(bytes_written, bytes_written) break dst.write(chunk) bytes_written += len(chunk) if m: m.update(chunk) if progress_callback: progress_callback(None, bytes_written) check_size(bytes_written, size) return bytes_written, "{0}:{1}".format(algo, m.hexdigest()) if m else None