cirro.sdk.file

  1import gzip
  2import json
  3import pickle
  4from io import BytesIO, StringIO
  5from pathlib import Path
  6from typing import List
  7
  8from typing import TYPE_CHECKING
  9if TYPE_CHECKING:
 10    import anndata
 11    from pandas import DataFrame
 12
 13from cirro.cirro_client import CirroApi
 14from cirro.models.file import File, PathLike
 15from cirro.sdk.asset import DataPortalAssets, DataPortalAsset
 16from cirro.sdk.exceptions import DataPortalInputError
 17from cirro.utils import convert_size
 18
 19
 20class DataPortalFile(DataPortalAsset):
 21    """
 22    Datasets are made up of a collection of File objects in the Data Portal.
 23    """
 24
 25    def __init__(self, file: File, client: CirroApi):
 26        """
 27        Instantiate by listing files from a dataset.
 28
 29        ```python
 30        from cirro import DataPortal
 31        portal = DataPortal()
 32        dataset = portal.get_dataset(
 33            project="id-or-name-of-project",
 34            dataset="id-or-name-of-dataset"
 35        )
 36        files = dataset.list_files()
 37        ```
 38        """
 39        # Attach the file object
 40        self._file = file
 41        self._client = client
 42
 43    # Note that the 'name' and 'id' attributes are set to the relative path
 44    # The purpose of this is to support the DataPortalAssets class functions
 45    @property
 46    def id(self) -> str:
 47        """Relative path of file within the dataset"""
 48        return self._file.relative_path
 49
 50    @property
 51    def name(self) -> str:
 52        """Relative path of file within the dataset"""
 53        return self._file.relative_path
 54
 55    @property
 56    def file_name(self) -> str:
 57        """Name of file, excluding the full folder path within the dataset"""
 58        return self._file.name
 59
 60    @property
 61    def relative_path(self) -> str:
 62        """Relative path of file within the dataset"""
 63        return self._file.relative_path
 64
 65    @property
 66    def absolute_path(self) -> str:
 67        """Fully URI to file object in AWS S3"""
 68        return self._file.absolute_path
 69
 70    @property
 71    def metadata(self) -> dict:
 72        """File metadata"""
 73        return self._file.metadata
 74
 75    @property
 76    def size_bytes(self) -> int:
 77        """File size (in bytes)"""
 78        return self._file.size
 79
 80    @property
 81    def size(self) -> str:
 82        """File size converted to human-readable (e.g., 4.50 GB)"""
 83        return convert_size(self._file.size)
 84
 85    def __str__(self):
 86        return f"{self.relative_path} ({self.size})"
 87
 88    def _get(self) -> bytes:
 89        """Internal method to call client.file.get_file"""
 90
 91        return self._client.file.get_file(self._file)
 92
 93    def read_csv(self, compression='infer', encoding='utf-8', **kwargs) -> 'DataFrame':
 94        """
 95        Parse the file as a Pandas DataFrame.
 96
 97        The default field separator is a comma (for CSV), use sep='\\t' for TSV.
 98
 99        File compression is inferred from the extension, but can be set
100        explicitly with the compression= flag.
101
102        All other keyword arguments are passed to pandas.read_csv
103        https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
104        """
105        import pandas
106
107        if compression == 'infer':
108            # If the file appears to be compressed
109            if self.relative_path.endswith('.gz'):
110                compression = dict(method='gzip')
111            elif self.relative_path.endswith('.bz2'):
112                compression = dict(method='bz2')
113            elif self.relative_path.endswith('.xz'):
114                compression = dict(method='xz')
115            elif self.relative_path.endswith('.zst'):
116                compression = dict(method='zstd')
117            else:
118                compression = None
119
120        if compression is not None:
121            handle = BytesIO(self._get())
122        else:
123            handle = StringIO(self._get().decode(encoding))
124
125        df = pandas.read_csv(
126            handle,
127            compression=compression,
128            encoding=encoding,
129            **kwargs
130        )
131        handle.close()
132        return df
133
134    def read_h5ad(self) -> 'anndata.AnnData':
135        """Read an AnnData object from a file."""
136        # Import the anndata library, and raise an error if it is not available
137        try:
138            import anndata as ad # noqa
139        except ImportError:
140            raise ImportError("The anndata library is required to read AnnData files. "
141                              "Please install it using 'pip install anndata'.")
142
143        # Download the file to a temporary file handle and parse the contents
144        with BytesIO(self._get()) as handle:
145            return ad.read_h5ad(handle)
146
147    def read_json(self, **kwargs):
148        """Read the file contents as a parsed JSON object (dict, list, etc.)."""
149        return json.loads(self._get(), **kwargs)
150
151    def read_parquet(self, **kwargs) -> 'DataFrame':
152        """
153        Read a Parquet file as a Pandas DataFrame.
154
155        Requires ``pyarrow`` or ``fastparquet`` to be installed.
156        All keyword arguments are passed to :func:`pandas.read_parquet`.
157        """
158        import pandas
159        return pandas.read_parquet(BytesIO(self._get()), **kwargs)
160
161    def read_feather(self, **kwargs) -> 'DataFrame':
162        """
163        Read a Feather file as a Pandas DataFrame.
164
165        Requires ``pyarrow`` to be installed.
166        All keyword arguments are passed to :func:`pandas.read_feather`.
167        """
168        import pandas
169        return pandas.read_feather(BytesIO(self._get()), **kwargs)
170
171    def read_pickle(self, **kwargs):
172        """Read the file contents as a Python pickle object."""
173        return pickle.loads(self._get(), **kwargs)
174
175    def read_excel(self, **kwargs) -> 'DataFrame':
176        """
177        Read an Excel file (``.xlsx`` / ``.xls``) as a Pandas DataFrame.
178
179        Requires ``openpyxl`` (for ``.xlsx``) or ``xlrd`` (for ``.xls``).
180        All keyword arguments are passed to :func:`pandas.read_excel`.
181        """
182        import pandas
183        return pandas.read_excel(BytesIO(self._get()), **kwargs)
184
185    def readlines(self, encoding='utf-8', compression=None) -> List[str]:
186        """Read the file contents as a list of lines."""
187
188        return self.read(
189            encoding=encoding,
190            compression=compression
191        ).splitlines()
192
193    def read(self, encoding='utf-8', compression=None) -> str:
194        """Read the file contents as text."""
195
196        # Get the raw file contents
197        cont = self._get()
198
199        # If the file is uncompressed
200        if compression is None:
201            return cont.decode(encoding)
202        # If the file is compressed
203        else:
204
205            # Only gzip-compression is supported currently
206            if compression != "gzip":
207                raise DataPortalInputError("compression may be 'gzip' or None")
208
209            with gzip.open(
210                BytesIO(
211                    cont
212                ),
213                'rt',
214                encoding=encoding
215            ) as handle:
216                return handle.read()
217
218    def read_bytes(self) -> BytesIO:
219        """Get a generic BytesIO object representing the Data Portal File, to be passed into readers."""
220        return BytesIO(self._get())
221
222    def download(self, download_location: str = None) -> Path:
223        """
224        Download the file to a local directory.
225
226        Returns:
227            Path to download file
228        """
229
230        if download_location is None:
231            raise DataPortalInputError("Must provide download location")
232
233        return self._client.file.download_files(
234            self._file.access_context,
235            download_location,
236            [self.relative_path]
237        )[0]
238
239    def validate(self, local_path: PathLike):
240        """
241        Validate that the local file matches the remote file by comparing checksums.
242
243        Args:
244            local_path (PathLike): Path to the local file to validate
245        Raises:
246            ValueError: If checksums do not match
247            RuntimeWarning: If the remote checksum is not available or not supported
248        """
249        self._client.file.validate_file(self._file, local_path)
250
251    def is_valid(self, local_path: PathLike) -> bool:
252        """
253        Check if the local file matches the remote file by comparing checksums.
254
255        Args:
256            local_path (PathLike): Path to the local file to validate
257        Returns:
258            bool: True if the local file matches the remote file, False otherwise
259        Raises:
260            RuntimeWarning: If the remote checksum is not available or not supported
261        """
262        if not local_path:
263            raise DataPortalInputError("Must provide local path to validate file")
264
265        return self._client.file.is_valid_file(self._file, local_path)
266
267
268class DataPortalFiles(DataPortalAssets[DataPortalFile]):
269    """Collection of DataPortalFile objects."""
270
271    asset_name = "file"
272
273    def download(self, download_location: str = None) -> List[Path]:
274        """
275        Download the collection of files to a local directory.
276
277        Returns:
278            List of paths to downloaded files.
279        """
280
281        local_paths = []
282        for f in self:
283            local_paths.append(f.download(download_location))
284        return local_paths
class DataPortalFile(cirro.sdk.asset.DataPortalAsset):
 21class DataPortalFile(DataPortalAsset):
 22    """
 23    Datasets are made up of a collection of File objects in the Data Portal.
 24    """
 25
 26    def __init__(self, file: File, client: CirroApi):
 27        """
 28        Instantiate by listing files from a dataset.
 29
 30        ```python
 31        from cirro import DataPortal
 32        portal = DataPortal()
 33        dataset = portal.get_dataset(
 34            project="id-or-name-of-project",
 35            dataset="id-or-name-of-dataset"
 36        )
 37        files = dataset.list_files()
 38        ```
 39        """
 40        # Attach the file object
 41        self._file = file
 42        self._client = client
 43
 44    # Note that the 'name' and 'id' attributes are set to the relative path
 45    # The purpose of this is to support the DataPortalAssets class functions
 46    @property
 47    def id(self) -> str:
 48        """Relative path of file within the dataset"""
 49        return self._file.relative_path
 50
 51    @property
 52    def name(self) -> str:
 53        """Relative path of file within the dataset"""
 54        return self._file.relative_path
 55
 56    @property
 57    def file_name(self) -> str:
 58        """Name of file, excluding the full folder path within the dataset"""
 59        return self._file.name
 60
 61    @property
 62    def relative_path(self) -> str:
 63        """Relative path of file within the dataset"""
 64        return self._file.relative_path
 65
 66    @property
 67    def absolute_path(self) -> str:
 68        """Fully URI to file object in AWS S3"""
 69        return self._file.absolute_path
 70
 71    @property
 72    def metadata(self) -> dict:
 73        """File metadata"""
 74        return self._file.metadata
 75
 76    @property
 77    def size_bytes(self) -> int:
 78        """File size (in bytes)"""
 79        return self._file.size
 80
 81    @property
 82    def size(self) -> str:
 83        """File size converted to human-readable (e.g., 4.50 GB)"""
 84        return convert_size(self._file.size)
 85
 86    def __str__(self):
 87        return f"{self.relative_path} ({self.size})"
 88
 89    def _get(self) -> bytes:
 90        """Internal method to call client.file.get_file"""
 91
 92        return self._client.file.get_file(self._file)
 93
 94    def read_csv(self, compression='infer', encoding='utf-8', **kwargs) -> 'DataFrame':
 95        """
 96        Parse the file as a Pandas DataFrame.
 97
 98        The default field separator is a comma (for CSV), use sep='\\t' for TSV.
 99
100        File compression is inferred from the extension, but can be set
101        explicitly with the compression= flag.
102
103        All other keyword arguments are passed to pandas.read_csv
104        https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
105        """
106        import pandas
107
108        if compression == 'infer':
109            # If the file appears to be compressed
110            if self.relative_path.endswith('.gz'):
111                compression = dict(method='gzip')
112            elif self.relative_path.endswith('.bz2'):
113                compression = dict(method='bz2')
114            elif self.relative_path.endswith('.xz'):
115                compression = dict(method='xz')
116            elif self.relative_path.endswith('.zst'):
117                compression = dict(method='zstd')
118            else:
119                compression = None
120
121        if compression is not None:
122            handle = BytesIO(self._get())
123        else:
124            handle = StringIO(self._get().decode(encoding))
125
126        df = pandas.read_csv(
127            handle,
128            compression=compression,
129            encoding=encoding,
130            **kwargs
131        )
132        handle.close()
133        return df
134
135    def read_h5ad(self) -> 'anndata.AnnData':
136        """Read an AnnData object from a file."""
137        # Import the anndata library, and raise an error if it is not available
138        try:
139            import anndata as ad # noqa
140        except ImportError:
141            raise ImportError("The anndata library is required to read AnnData files. "
142                              "Please install it using 'pip install anndata'.")
143
144        # Download the file to a temporary file handle and parse the contents
145        with BytesIO(self._get()) as handle:
146            return ad.read_h5ad(handle)
147
148    def read_json(self, **kwargs):
149        """Read the file contents as a parsed JSON object (dict, list, etc.)."""
150        return json.loads(self._get(), **kwargs)
151
152    def read_parquet(self, **kwargs) -> 'DataFrame':
153        """
154        Read a Parquet file as a Pandas DataFrame.
155
156        Requires ``pyarrow`` or ``fastparquet`` to be installed.
157        All keyword arguments are passed to :func:`pandas.read_parquet`.
158        """
159        import pandas
160        return pandas.read_parquet(BytesIO(self._get()), **kwargs)
161
162    def read_feather(self, **kwargs) -> 'DataFrame':
163        """
164        Read a Feather file as a Pandas DataFrame.
165
166        Requires ``pyarrow`` to be installed.
167        All keyword arguments are passed to :func:`pandas.read_feather`.
168        """
169        import pandas
170        return pandas.read_feather(BytesIO(self._get()), **kwargs)
171
172    def read_pickle(self, **kwargs):
173        """Read the file contents as a Python pickle object."""
174        return pickle.loads(self._get(), **kwargs)
175
176    def read_excel(self, **kwargs) -> 'DataFrame':
177        """
178        Read an Excel file (``.xlsx`` / ``.xls``) as a Pandas DataFrame.
179
180        Requires ``openpyxl`` (for ``.xlsx``) or ``xlrd`` (for ``.xls``).
181        All keyword arguments are passed to :func:`pandas.read_excel`.
182        """
183        import pandas
184        return pandas.read_excel(BytesIO(self._get()), **kwargs)
185
186    def readlines(self, encoding='utf-8', compression=None) -> List[str]:
187        """Read the file contents as a list of lines."""
188
189        return self.read(
190            encoding=encoding,
191            compression=compression
192        ).splitlines()
193
194    def read(self, encoding='utf-8', compression=None) -> str:
195        """Read the file contents as text."""
196
197        # Get the raw file contents
198        cont = self._get()
199
200        # If the file is uncompressed
201        if compression is None:
202            return cont.decode(encoding)
203        # If the file is compressed
204        else:
205
206            # Only gzip-compression is supported currently
207            if compression != "gzip":
208                raise DataPortalInputError("compression may be 'gzip' or None")
209
210            with gzip.open(
211                BytesIO(
212                    cont
213                ),
214                'rt',
215                encoding=encoding
216            ) as handle:
217                return handle.read()
218
219    def read_bytes(self) -> BytesIO:
220        """Get a generic BytesIO object representing the Data Portal File, to be passed into readers."""
221        return BytesIO(self._get())
222
223    def download(self, download_location: str = None) -> Path:
224        """
225        Download the file to a local directory.
226
227        Returns:
228            Path to download file
229        """
230
231        if download_location is None:
232            raise DataPortalInputError("Must provide download location")
233
234        return self._client.file.download_files(
235            self._file.access_context,
236            download_location,
237            [self.relative_path]
238        )[0]
239
240    def validate(self, local_path: PathLike):
241        """
242        Validate that the local file matches the remote file by comparing checksums.
243
244        Args:
245            local_path (PathLike): Path to the local file to validate
246        Raises:
247            ValueError: If checksums do not match
248            RuntimeWarning: If the remote checksum is not available or not supported
249        """
250        self._client.file.validate_file(self._file, local_path)
251
252    def is_valid(self, local_path: PathLike) -> bool:
253        """
254        Check if the local file matches the remote file by comparing checksums.
255
256        Args:
257            local_path (PathLike): Path to the local file to validate
258        Returns:
259            bool: True if the local file matches the remote file, False otherwise
260        Raises:
261            RuntimeWarning: If the remote checksum is not available or not supported
262        """
263        if not local_path:
264            raise DataPortalInputError("Must provide local path to validate file")
265
266        return self._client.file.is_valid_file(self._file, local_path)

Datasets are made up of a collection of File objects in the Data Portal.

DataPortalFile(file: cirro.models.file.File, client: cirro.CirroApi)
26    def __init__(self, file: File, client: CirroApi):
27        """
28        Instantiate by listing files from a dataset.
29
30        ```python
31        from cirro import DataPortal
32        portal = DataPortal()
33        dataset = portal.get_dataset(
34            project="id-or-name-of-project",
35            dataset="id-or-name-of-dataset"
36        )
37        files = dataset.list_files()
38        ```
39        """
40        # Attach the file object
41        self._file = file
42        self._client = client

Instantiate by listing files from a dataset.

from cirro import DataPortal
portal = DataPortal()
dataset = portal.get_dataset(
    project="id-or-name-of-project",
    dataset="id-or-name-of-dataset"
)
files = dataset.list_files()
id: str
46    @property
47    def id(self) -> str:
48        """Relative path of file within the dataset"""
49        return self._file.relative_path

Relative path of file within the dataset

name: str
51    @property
52    def name(self) -> str:
53        """Relative path of file within the dataset"""
54        return self._file.relative_path

Relative path of file within the dataset

file_name: str
56    @property
57    def file_name(self) -> str:
58        """Name of file, excluding the full folder path within the dataset"""
59        return self._file.name

Name of file, excluding the full folder path within the dataset

relative_path: str
61    @property
62    def relative_path(self) -> str:
63        """Relative path of file within the dataset"""
64        return self._file.relative_path

Relative path of file within the dataset

absolute_path: str
66    @property
67    def absolute_path(self) -> str:
68        """Fully URI to file object in AWS S3"""
69        return self._file.absolute_path

Fully URI to file object in AWS S3

metadata: dict
71    @property
72    def metadata(self) -> dict:
73        """File metadata"""
74        return self._file.metadata

File metadata

size_bytes: int
76    @property
77    def size_bytes(self) -> int:
78        """File size (in bytes)"""
79        return self._file.size

File size (in bytes)

size: str
81    @property
82    def size(self) -> str:
83        """File size converted to human-readable (e.g., 4.50 GB)"""
84        return convert_size(self._file.size)

File size converted to human-readable (e.g., 4.50 GB)

def read_csv(self, compression='infer', encoding='utf-8', **kwargs) -> 'DataFrame':
 94    def read_csv(self, compression='infer', encoding='utf-8', **kwargs) -> 'DataFrame':
 95        """
 96        Parse the file as a Pandas DataFrame.
 97
 98        The default field separator is a comma (for CSV), use sep='\\t' for TSV.
 99
100        File compression is inferred from the extension, but can be set
101        explicitly with the compression= flag.
102
103        All other keyword arguments are passed to pandas.read_csv
104        https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
105        """
106        import pandas
107
108        if compression == 'infer':
109            # If the file appears to be compressed
110            if self.relative_path.endswith('.gz'):
111                compression = dict(method='gzip')
112            elif self.relative_path.endswith('.bz2'):
113                compression = dict(method='bz2')
114            elif self.relative_path.endswith('.xz'):
115                compression = dict(method='xz')
116            elif self.relative_path.endswith('.zst'):
117                compression = dict(method='zstd')
118            else:
119                compression = None
120
121        if compression is not None:
122            handle = BytesIO(self._get())
123        else:
124            handle = StringIO(self._get().decode(encoding))
125
126        df = pandas.read_csv(
127            handle,
128            compression=compression,
129            encoding=encoding,
130            **kwargs
131        )
132        handle.close()
133        return df

Parse the file as a Pandas DataFrame.

The default field separator is a comma (for CSV), use sep='\t' for TSV.

File compression is inferred from the extension, but can be set explicitly with the compression= flag.

All other keyword arguments are passed to pandas.read_csv https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html

def read_h5ad(self) -> 'anndata.AnnData':
135    def read_h5ad(self) -> 'anndata.AnnData':
136        """Read an AnnData object from a file."""
137        # Import the anndata library, and raise an error if it is not available
138        try:
139            import anndata as ad # noqa
140        except ImportError:
141            raise ImportError("The anndata library is required to read AnnData files. "
142                              "Please install it using 'pip install anndata'.")
143
144        # Download the file to a temporary file handle and parse the contents
145        with BytesIO(self._get()) as handle:
146            return ad.read_h5ad(handle)

Read an AnnData object from a file.

def read_json(self, **kwargs):
148    def read_json(self, **kwargs):
149        """Read the file contents as a parsed JSON object (dict, list, etc.)."""
150        return json.loads(self._get(), **kwargs)

Read the file contents as a parsed JSON object (dict, list, etc.).

def read_parquet(self, **kwargs) -> 'DataFrame':
152    def read_parquet(self, **kwargs) -> 'DataFrame':
153        """
154        Read a Parquet file as a Pandas DataFrame.
155
156        Requires ``pyarrow`` or ``fastparquet`` to be installed.
157        All keyword arguments are passed to :func:`pandas.read_parquet`.
158        """
159        import pandas
160        return pandas.read_parquet(BytesIO(self._get()), **kwargs)

Read a Parquet file as a Pandas DataFrame.

Requires pyarrow or fastparquet to be installed. All keyword arguments are passed to pandas.read_parquet().

def read_feather(self, **kwargs) -> 'DataFrame':
162    def read_feather(self, **kwargs) -> 'DataFrame':
163        """
164        Read a Feather file as a Pandas DataFrame.
165
166        Requires ``pyarrow`` to be installed.
167        All keyword arguments are passed to :func:`pandas.read_feather`.
168        """
169        import pandas
170        return pandas.read_feather(BytesIO(self._get()), **kwargs)

Read a Feather file as a Pandas DataFrame.

Requires pyarrow to be installed. All keyword arguments are passed to pandas.read_feather().

def read_pickle(self, **kwargs):
172    def read_pickle(self, **kwargs):
173        """Read the file contents as a Python pickle object."""
174        return pickle.loads(self._get(), **kwargs)

Read the file contents as a Python pickle object.

def read_excel(self, **kwargs) -> 'DataFrame':
176    def read_excel(self, **kwargs) -> 'DataFrame':
177        """
178        Read an Excel file (``.xlsx`` / ``.xls``) as a Pandas DataFrame.
179
180        Requires ``openpyxl`` (for ``.xlsx``) or ``xlrd`` (for ``.xls``).
181        All keyword arguments are passed to :func:`pandas.read_excel`.
182        """
183        import pandas
184        return pandas.read_excel(BytesIO(self._get()), **kwargs)

Read an Excel file (.xlsx / .xls) as a Pandas DataFrame.

Requires openpyxl (for .xlsx) or xlrd (for .xls). All keyword arguments are passed to pandas.read_excel().

def readlines(self, encoding='utf-8', compression=None) -> List[str]:
186    def readlines(self, encoding='utf-8', compression=None) -> List[str]:
187        """Read the file contents as a list of lines."""
188
189        return self.read(
190            encoding=encoding,
191            compression=compression
192        ).splitlines()

Read the file contents as a list of lines.

def read(self, encoding='utf-8', compression=None) -> str:
194    def read(self, encoding='utf-8', compression=None) -> str:
195        """Read the file contents as text."""
196
197        # Get the raw file contents
198        cont = self._get()
199
200        # If the file is uncompressed
201        if compression is None:
202            return cont.decode(encoding)
203        # If the file is compressed
204        else:
205
206            # Only gzip-compression is supported currently
207            if compression != "gzip":
208                raise DataPortalInputError("compression may be 'gzip' or None")
209
210            with gzip.open(
211                BytesIO(
212                    cont
213                ),
214                'rt',
215                encoding=encoding
216            ) as handle:
217                return handle.read()

Read the file contents as text.

def read_bytes(self) -> _io.BytesIO:
219    def read_bytes(self) -> BytesIO:
220        """Get a generic BytesIO object representing the Data Portal File, to be passed into readers."""
221        return BytesIO(self._get())

Get a generic BytesIO object representing the Data Portal File, to be passed into readers.

def download(self, download_location: str = None) -> pathlib.Path:
223    def download(self, download_location: str = None) -> Path:
224        """
225        Download the file to a local directory.
226
227        Returns:
228            Path to download file
229        """
230
231        if download_location is None:
232            raise DataPortalInputError("Must provide download location")
233
234        return self._client.file.download_files(
235            self._file.access_context,
236            download_location,
237            [self.relative_path]
238        )[0]

Download the file to a local directory.

Returns:

Path to download file

def validate(self, local_path: ~PathLike):
240    def validate(self, local_path: PathLike):
241        """
242        Validate that the local file matches the remote file by comparing checksums.
243
244        Args:
245            local_path (PathLike): Path to the local file to validate
246        Raises:
247            ValueError: If checksums do not match
248            RuntimeWarning: If the remote checksum is not available or not supported
249        """
250        self._client.file.validate_file(self._file, local_path)

Validate that the local file matches the remote file by comparing checksums.

Arguments:
  • local_path (PathLike): Path to the local file to validate
Raises:
  • ValueError: If checksums do not match
  • RuntimeWarning: If the remote checksum is not available or not supported
def is_valid(self, local_path: ~PathLike) -> bool:
252    def is_valid(self, local_path: PathLike) -> bool:
253        """
254        Check if the local file matches the remote file by comparing checksums.
255
256        Args:
257            local_path (PathLike): Path to the local file to validate
258        Returns:
259            bool: True if the local file matches the remote file, False otherwise
260        Raises:
261            RuntimeWarning: If the remote checksum is not available or not supported
262        """
263        if not local_path:
264            raise DataPortalInputError("Must provide local path to validate file")
265
266        return self._client.file.is_valid_file(self._file, local_path)

Check if the local file matches the remote file by comparing checksums.

Arguments:
  • local_path (PathLike): Path to the local file to validate
Returns:

bool: True if the local file matches the remote file, False otherwise

Raises:
  • RuntimeWarning: If the remote checksum is not available or not supported
269class DataPortalFiles(DataPortalAssets[DataPortalFile]):
270    """Collection of DataPortalFile objects."""
271
272    asset_name = "file"
273
274    def download(self, download_location: str = None) -> List[Path]:
275        """
276        Download the collection of files to a local directory.
277
278        Returns:
279            List of paths to downloaded files.
280        """
281
282        local_paths = []
283        for f in self:
284            local_paths.append(f.download(download_location))
285        return local_paths

Collection of DataPortalFile objects.

asset_name = 'file'
def download(self, download_location: str = None) -> List[pathlib.Path]:
274    def download(self, download_location: str = None) -> List[Path]:
275        """
276        Download the collection of files to a local directory.
277
278        Returns:
279            List of paths to downloaded files.
280        """
281
282        local_paths = []
283        for f in self:
284            local_paths.append(f.download(download_location))
285        return local_paths

Download the collection of files to a local directory.

Returns:

List of paths to downloaded files.