cirro.sdk.project

  1from functools import cache
  2from time import sleep
  3from typing import List, Union
  4
  5from cirro_api_client.v1.models import Project, UploadDatasetRequest, Dataset
  6
  7from cirro.cirro_client import CirroApi
  8from cirro.file_utils import get_files_in_directory
  9from cirro.sdk.asset import DataPortalAssets, DataPortalAsset
 10from cirro.sdk.dataset import DataPortalDataset, DataPortalDatasets
 11from cirro.sdk.exceptions import DataPortalAssetNotFound, DataPortalInputError
 12from cirro.sdk.helpers import parse_process_name_or_id
 13from cirro.sdk.process import DataPortalProcess
 14from cirro.sdk.reference import DataPortalReference, DataPortalReferences
 15from cirro.sdk.reference_type import DataPortalReferenceType, DataPortalReferenceTypes
 16
 17
 18class DataPortalProject(DataPortalAsset):
 19    """
 20    Projects in the Data Portal contain collections of Datasets.
 21    Users are granted permissions at the project-level, allowing them
 22    to view and/or modify all the datasets in that collection.
 23    """
 24    def __init__(self, proj: Project, client: CirroApi):
 25        """
 26        Instantiate with helper method
 27
 28        ```python
 29        from cirro import DataPortal()
 30        portal = DataPortal()
 31        project = portal.get_project_by_name("Project Name")
 32        ```
 33
 34        """
 35        self._data = proj
 36        self._client = client
 37
 38    @property
 39    def id(self) -> str:
 40        """
 41        Unique identifier
 42        """
 43        return self._data.id
 44
 45    @property
 46    def name(self) -> str:
 47        """
 48        Readable name
 49        """
 50        return self._data.name
 51
 52    @property
 53    def description(self) -> str:
 54        """
 55        Longer description of the project
 56        """
 57        return self._data.description
 58
 59    def __str__(self):
 60        """Control how the Project is rendered as a string."""
 61
 62        return '\n'.join([
 63            f"{i.title()}: {self.__getattribute__(i)}"
 64            for i in ['name', 'id', 'description']
 65        ])
 66
 67    @cache
 68    def _get_datasets(self) -> List[Dataset]:
 69        return self._client.datasets.list(self.id)
 70
 71    def list_datasets(self, force_refresh=False) -> DataPortalDatasets:
 72        """List all the datasets available in the project."""
 73        if force_refresh:
 74            self._get_datasets.cache_clear()
 75
 76        return DataPortalDatasets(
 77            [
 78                DataPortalDataset(d, self._client)
 79                for d in self._get_datasets()
 80            ]
 81        )
 82
 83    def get_dataset_by_name(self, name: str, force_refresh=False) -> DataPortalDataset:
 84        """Return the dataset with the specified name."""
 85        if force_refresh:
 86            self._get_datasets.cache_clear()
 87
 88        dataset = next((d for d in self._get_datasets() if d.name == name), None)
 89        if dataset is None:
 90            raise DataPortalAssetNotFound(f'Dataset with name {name} not found')
 91        return self.get_dataset_by_id(dataset.id)
 92
 93    def get_dataset_by_id(self, _id: str = None) -> DataPortalDataset:
 94        """Return the dataset with the specified id."""
 95
 96        dataset = self._client.datasets.get(project_id=self.id, dataset_id=_id)
 97        if dataset is None:
 98            raise DataPortalAssetNotFound(f'Dataset with ID {_id} not found')
 99        return DataPortalDataset(dataset, self._client)
100
101    def list_references(self, reference_type: str = None) -> DataPortalReferences:
102        """
103        List the references available in a project.
104        Optionally filter to references of a particular type (identified by name)
105        """
106
107        # Get the complete list of references which are available
108        reference_types = DataPortalReferenceTypes(
109            [
110                DataPortalReferenceType(ref)
111                for ref in self._client.references.get_types()
112            ]
113        )
114
115        # If a particular name was specified
116        if reference_type is not None:
117            reference_types = reference_types.filter_by_pattern(reference_type)
118            if len(reference_types) == 0:
119                msg = f"Could not find any reference types with the name {reference_type}"
120                raise DataPortalAssetNotFound(msg)
121
122        return DataPortalReferences(
123            [
124                DataPortalReference(ref, project_id=self.id, client=self._client)
125                for ref in self._client.references.get_for_project(
126                    self.id
127                )
128                if reference_type is None or ref.type == reference_type
129            ]
130        )
131
132    def get_reference_by_name(self, name: str = None, ref_type: str = None) -> DataPortalReference:
133        """Return the reference of a particular type with the specified name."""
134
135        if name is None:
136            raise DataPortalInputError("Must specify the reference name")
137
138        return self.list_references(ref_type).get_by_name(name)
139
140    def upload_dataset(
141        self,
142        name: str = None,
143        description='',
144        process: Union[DataPortalProcess, str] = None,
145        upload_folder: str = None,
146        files: list = None
147    ):
148        """
149        Upload a set of files to the Data Portal, creating a new dataset.
150
151        If the files parameter is not provided, it will upload all files in the upload folder
152
153        Args:
154            name (str): Name of newly created dataset
155            description (str): Description of newly created dataset
156            process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object
157            upload_folder (str): Folder containing files to upload
158            files (List[str]): Optional subset of files to upload from the folder
159        """
160
161        if name is None:
162            raise DataPortalInputError("Must provide name for new dataset")
163        if process is None:
164            raise DataPortalInputError("Must provide the process which is used for ingest")
165        if upload_folder is None:
166            raise DataPortalInputError("Must provide upload_folder -- folder containing files to upload")
167
168        # Parse the process provided by the user
169        process = parse_process_name_or_id(process, self._client)
170
171        # If no files were provided
172        if files is None:
173            # Get the list of files in the upload folder
174            files = get_files_in_directory(upload_folder)
175
176        if files is None or len(files) == 0:
177            raise RuntimeWarning("No files to upload, exiting")
178
179        # Make sure that the files match the expected pattern
180        self._client.processes.check_dataset_files(files, process.id, upload_folder)
181
182        # Create the ingest process request
183        dataset_create_request = UploadDatasetRequest(
184            process_id=process.id,
185            name=name,
186            description=description,
187            expected_files=files
188        )
189
190        # Get the response
191        create_response = self._client.datasets.create(project_id=self.id,
192                                                       upload_request=dataset_create_request)
193
194        # Upload the files
195        self._client.datasets.upload_files(
196            project_id=self.id,
197            dataset_id=create_response.id,
198            local_directory=upload_folder,
199            files=files
200        )
201
202        # Return the dataset which was created, which might take a second to update
203        max_attempts = 5
204        for attempt in range(max_attempts):
205            try:
206                return self.get_dataset_by_id(create_response.id)
207            except DataPortalAssetNotFound as e:
208                if attempt == max_attempts - 1:
209                    raise e
210                else:
211                    sleep(2)
212
213
214class DataPortalProjects(DataPortalAssets[DataPortalProject]):
215    """Collection of DataPortalProject objects"""
216    asset_name = "project"
class DataPortalProject(cirro.sdk.asset.DataPortalAsset):
 19class DataPortalProject(DataPortalAsset):
 20    """
 21    Projects in the Data Portal contain collections of Datasets.
 22    Users are granted permissions at the project-level, allowing them
 23    to view and/or modify all the datasets in that collection.
 24    """
 25    def __init__(self, proj: Project, client: CirroApi):
 26        """
 27        Instantiate with helper method
 28
 29        ```python
 30        from cirro import DataPortal()
 31        portal = DataPortal()
 32        project = portal.get_project_by_name("Project Name")
 33        ```
 34
 35        """
 36        self._data = proj
 37        self._client = client
 38
 39    @property
 40    def id(self) -> str:
 41        """
 42        Unique identifier
 43        """
 44        return self._data.id
 45
 46    @property
 47    def name(self) -> str:
 48        """
 49        Readable name
 50        """
 51        return self._data.name
 52
 53    @property
 54    def description(self) -> str:
 55        """
 56        Longer description of the project
 57        """
 58        return self._data.description
 59
 60    def __str__(self):
 61        """Control how the Project is rendered as a string."""
 62
 63        return '\n'.join([
 64            f"{i.title()}: {self.__getattribute__(i)}"
 65            for i in ['name', 'id', 'description']
 66        ])
 67
 68    @cache
 69    def _get_datasets(self) -> List[Dataset]:
 70        return self._client.datasets.list(self.id)
 71
 72    def list_datasets(self, force_refresh=False) -> DataPortalDatasets:
 73        """List all the datasets available in the project."""
 74        if force_refresh:
 75            self._get_datasets.cache_clear()
 76
 77        return DataPortalDatasets(
 78            [
 79                DataPortalDataset(d, self._client)
 80                for d in self._get_datasets()
 81            ]
 82        )
 83
 84    def get_dataset_by_name(self, name: str, force_refresh=False) -> DataPortalDataset:
 85        """Return the dataset with the specified name."""
 86        if force_refresh:
 87            self._get_datasets.cache_clear()
 88
 89        dataset = next((d for d in self._get_datasets() if d.name == name), None)
 90        if dataset is None:
 91            raise DataPortalAssetNotFound(f'Dataset with name {name} not found')
 92        return self.get_dataset_by_id(dataset.id)
 93
 94    def get_dataset_by_id(self, _id: str = None) -> DataPortalDataset:
 95        """Return the dataset with the specified id."""
 96
 97        dataset = self._client.datasets.get(project_id=self.id, dataset_id=_id)
 98        if dataset is None:
 99            raise DataPortalAssetNotFound(f'Dataset with ID {_id} not found')
100        return DataPortalDataset(dataset, self._client)
101
102    def list_references(self, reference_type: str = None) -> DataPortalReferences:
103        """
104        List the references available in a project.
105        Optionally filter to references of a particular type (identified by name)
106        """
107
108        # Get the complete list of references which are available
109        reference_types = DataPortalReferenceTypes(
110            [
111                DataPortalReferenceType(ref)
112                for ref in self._client.references.get_types()
113            ]
114        )
115
116        # If a particular name was specified
117        if reference_type is not None:
118            reference_types = reference_types.filter_by_pattern(reference_type)
119            if len(reference_types) == 0:
120                msg = f"Could not find any reference types with the name {reference_type}"
121                raise DataPortalAssetNotFound(msg)
122
123        return DataPortalReferences(
124            [
125                DataPortalReference(ref, project_id=self.id, client=self._client)
126                for ref in self._client.references.get_for_project(
127                    self.id
128                )
129                if reference_type is None or ref.type == reference_type
130            ]
131        )
132
133    def get_reference_by_name(self, name: str = None, ref_type: str = None) -> DataPortalReference:
134        """Return the reference of a particular type with the specified name."""
135
136        if name is None:
137            raise DataPortalInputError("Must specify the reference name")
138
139        return self.list_references(ref_type).get_by_name(name)
140
141    def upload_dataset(
142        self,
143        name: str = None,
144        description='',
145        process: Union[DataPortalProcess, str] = None,
146        upload_folder: str = None,
147        files: list = None
148    ):
149        """
150        Upload a set of files to the Data Portal, creating a new dataset.
151
152        If the files parameter is not provided, it will upload all files in the upload folder
153
154        Args:
155            name (str): Name of newly created dataset
156            description (str): Description of newly created dataset
157            process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object
158            upload_folder (str): Folder containing files to upload
159            files (List[str]): Optional subset of files to upload from the folder
160        """
161
162        if name is None:
163            raise DataPortalInputError("Must provide name for new dataset")
164        if process is None:
165            raise DataPortalInputError("Must provide the process which is used for ingest")
166        if upload_folder is None:
167            raise DataPortalInputError("Must provide upload_folder -- folder containing files to upload")
168
169        # Parse the process provided by the user
170        process = parse_process_name_or_id(process, self._client)
171
172        # If no files were provided
173        if files is None:
174            # Get the list of files in the upload folder
175            files = get_files_in_directory(upload_folder)
176
177        if files is None or len(files) == 0:
178            raise RuntimeWarning("No files to upload, exiting")
179
180        # Make sure that the files match the expected pattern
181        self._client.processes.check_dataset_files(files, process.id, upload_folder)
182
183        # Create the ingest process request
184        dataset_create_request = UploadDatasetRequest(
185            process_id=process.id,
186            name=name,
187            description=description,
188            expected_files=files
189        )
190
191        # Get the response
192        create_response = self._client.datasets.create(project_id=self.id,
193                                                       upload_request=dataset_create_request)
194
195        # Upload the files
196        self._client.datasets.upload_files(
197            project_id=self.id,
198            dataset_id=create_response.id,
199            local_directory=upload_folder,
200            files=files
201        )
202
203        # Return the dataset which was created, which might take a second to update
204        max_attempts = 5
205        for attempt in range(max_attempts):
206            try:
207                return self.get_dataset_by_id(create_response.id)
208            except DataPortalAssetNotFound as e:
209                if attempt == max_attempts - 1:
210                    raise e
211                else:
212                    sleep(2)

Projects in the Data Portal contain collections of Datasets. Users are granted permissions at the project-level, allowing them to view and/or modify all the datasets in that collection.

DataPortalProject( proj: cirro_api_client.v1.models.Project, client: cirro.CirroApi)
25    def __init__(self, proj: Project, client: CirroApi):
26        """
27        Instantiate with helper method
28
29        ```python
30        from cirro import DataPortal()
31        portal = DataPortal()
32        project = portal.get_project_by_name("Project Name")
33        ```
34
35        """
36        self._data = proj
37        self._client = client

Instantiate with helper method

from cirro import DataPortal()
portal = DataPortal()
project = portal.get_project_by_name("Project Name")
id: str
39    @property
40    def id(self) -> str:
41        """
42        Unique identifier
43        """
44        return self._data.id

Unique identifier

name: str
46    @property
47    def name(self) -> str:
48        """
49        Readable name
50        """
51        return self._data.name

Readable name

description: str
53    @property
54    def description(self) -> str:
55        """
56        Longer description of the project
57        """
58        return self._data.description

Longer description of the project

def list_datasets(self, force_refresh=False) -> cirro.sdk.dataset.DataPortalDatasets:
72    def list_datasets(self, force_refresh=False) -> DataPortalDatasets:
73        """List all the datasets available in the project."""
74        if force_refresh:
75            self._get_datasets.cache_clear()
76
77        return DataPortalDatasets(
78            [
79                DataPortalDataset(d, self._client)
80                for d in self._get_datasets()
81            ]
82        )

List all the datasets available in the project.

def get_dataset_by_name( self, name: str, force_refresh=False) -> cirro.sdk.dataset.DataPortalDataset:
84    def get_dataset_by_name(self, name: str, force_refresh=False) -> DataPortalDataset:
85        """Return the dataset with the specified name."""
86        if force_refresh:
87            self._get_datasets.cache_clear()
88
89        dataset = next((d for d in self._get_datasets() if d.name == name), None)
90        if dataset is None:
91            raise DataPortalAssetNotFound(f'Dataset with name {name} not found')
92        return self.get_dataset_by_id(dataset.id)

Return the dataset with the specified name.

def get_dataset_by_id(self, _id: str = None) -> cirro.sdk.dataset.DataPortalDataset:
 94    def get_dataset_by_id(self, _id: str = None) -> DataPortalDataset:
 95        """Return the dataset with the specified id."""
 96
 97        dataset = self._client.datasets.get(project_id=self.id, dataset_id=_id)
 98        if dataset is None:
 99            raise DataPortalAssetNotFound(f'Dataset with ID {_id} not found')
100        return DataPortalDataset(dataset, self._client)

Return the dataset with the specified id.

def list_references( self, reference_type: str = None) -> cirro.sdk.reference.DataPortalReferences:
102    def list_references(self, reference_type: str = None) -> DataPortalReferences:
103        """
104        List the references available in a project.
105        Optionally filter to references of a particular type (identified by name)
106        """
107
108        # Get the complete list of references which are available
109        reference_types = DataPortalReferenceTypes(
110            [
111                DataPortalReferenceType(ref)
112                for ref in self._client.references.get_types()
113            ]
114        )
115
116        # If a particular name was specified
117        if reference_type is not None:
118            reference_types = reference_types.filter_by_pattern(reference_type)
119            if len(reference_types) == 0:
120                msg = f"Could not find any reference types with the name {reference_type}"
121                raise DataPortalAssetNotFound(msg)
122
123        return DataPortalReferences(
124            [
125                DataPortalReference(ref, project_id=self.id, client=self._client)
126                for ref in self._client.references.get_for_project(
127                    self.id
128                )
129                if reference_type is None or ref.type == reference_type
130            ]
131        )

List the references available in a project. Optionally filter to references of a particular type (identified by name)

def get_reference_by_name( self, name: str = None, ref_type: str = None) -> cirro.sdk.reference.DataPortalReference:
133    def get_reference_by_name(self, name: str = None, ref_type: str = None) -> DataPortalReference:
134        """Return the reference of a particular type with the specified name."""
135
136        if name is None:
137            raise DataPortalInputError("Must specify the reference name")
138
139        return self.list_references(ref_type).get_by_name(name)

Return the reference of a particular type with the specified name.

def upload_dataset( self, name: str = None, description='', process: Union[cirro.sdk.process.DataPortalProcess, str] = None, upload_folder: str = None, files: list = None):
141    def upload_dataset(
142        self,
143        name: str = None,
144        description='',
145        process: Union[DataPortalProcess, str] = None,
146        upload_folder: str = None,
147        files: list = None
148    ):
149        """
150        Upload a set of files to the Data Portal, creating a new dataset.
151
152        If the files parameter is not provided, it will upload all files in the upload folder
153
154        Args:
155            name (str): Name of newly created dataset
156            description (str): Description of newly created dataset
157            process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object
158            upload_folder (str): Folder containing files to upload
159            files (List[str]): Optional subset of files to upload from the folder
160        """
161
162        if name is None:
163            raise DataPortalInputError("Must provide name for new dataset")
164        if process is None:
165            raise DataPortalInputError("Must provide the process which is used for ingest")
166        if upload_folder is None:
167            raise DataPortalInputError("Must provide upload_folder -- folder containing files to upload")
168
169        # Parse the process provided by the user
170        process = parse_process_name_or_id(process, self._client)
171
172        # If no files were provided
173        if files is None:
174            # Get the list of files in the upload folder
175            files = get_files_in_directory(upload_folder)
176
177        if files is None or len(files) == 0:
178            raise RuntimeWarning("No files to upload, exiting")
179
180        # Make sure that the files match the expected pattern
181        self._client.processes.check_dataset_files(files, process.id, upload_folder)
182
183        # Create the ingest process request
184        dataset_create_request = UploadDatasetRequest(
185            process_id=process.id,
186            name=name,
187            description=description,
188            expected_files=files
189        )
190
191        # Get the response
192        create_response = self._client.datasets.create(project_id=self.id,
193                                                       upload_request=dataset_create_request)
194
195        # Upload the files
196        self._client.datasets.upload_files(
197            project_id=self.id,
198            dataset_id=create_response.id,
199            local_directory=upload_folder,
200            files=files
201        )
202
203        # Return the dataset which was created, which might take a second to update
204        max_attempts = 5
205        for attempt in range(max_attempts):
206            try:
207                return self.get_dataset_by_id(create_response.id)
208            except DataPortalAssetNotFound as e:
209                if attempt == max_attempts - 1:
210                    raise e
211                else:
212                    sleep(2)

Upload a set of files to the Data Portal, creating a new dataset.

If the files parameter is not provided, it will upload all files in the upload folder

Arguments:
  • name (str): Name of newly created dataset
  • description (str): Description of newly created dataset
  • process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object
  • upload_folder (str): Folder containing files to upload
  • files (List[str]): Optional subset of files to upload from the folder
215class DataPortalProjects(DataPortalAssets[DataPortalProject]):
216    """Collection of DataPortalProject objects"""
217    asset_name = "project"

Collection of DataPortalProject objects

asset_name = 'project'
Inherited Members
cirro.sdk.asset.DataPortalAssets
DataPortalAssets
description
get_by_name
get_by_id
filter_by_pattern
builtins.list
clear
copy
append
insert
extend
pop
remove
index
count
reverse
sort