cirro.sdk.project
1from functools import cache 2from time import sleep 3from typing import List, Union 4 5from cirro_api_client.v1.models import Project, UploadDatasetRequest, Dataset 6 7from cirro.cirro_client import CirroApi 8from cirro.file_utils import get_files_in_directory 9from cirro.sdk.asset import DataPortalAssets, DataPortalAsset 10from cirro.sdk.dataset import DataPortalDataset, DataPortalDatasets 11from cirro.sdk.exceptions import DataPortalAssetNotFound, DataPortalInputError 12from cirro.sdk.helpers import parse_process_name_or_id 13from cirro.sdk.process import DataPortalProcess 14from cirro.sdk.reference import DataPortalReference, DataPortalReferences 15from cirro.sdk.reference_type import DataPortalReferenceType, DataPortalReferenceTypes 16 17 18class DataPortalProject(DataPortalAsset): 19 """ 20 Projects in the Data Portal contain collections of Datasets. 21 Users are granted permissions at the project-level, allowing them 22 to view and/or modify all the datasets in that collection. 23 """ 24 def __init__(self, proj: Project, client: CirroApi): 25 """ 26 Instantiate with helper method 27 28 ```python 29 from cirro import DataPortal() 30 portal = DataPortal() 31 project = portal.get_project_by_name("Project Name") 32 ``` 33 34 """ 35 self._data = proj 36 self._client = client 37 38 @property 39 def id(self) -> str: 40 """ 41 Unique identifier 42 """ 43 return self._data.id 44 45 @property 46 def name(self) -> str: 47 """ 48 Readable name 49 """ 50 return self._data.name 51 52 @property 53 def description(self) -> str: 54 """ 55 Longer description of the project 56 """ 57 return self._data.description 58 59 def __str__(self): 60 """Control how the Project is rendered as a string.""" 61 62 return '\n'.join([ 63 f"{i.title()}: {self.__getattribute__(i)}" 64 for i in ['name', 'id', 'description'] 65 ]) 66 67 @cache 68 def _get_datasets(self) -> List[Dataset]: 69 return self._client.datasets.list(self.id) 70 71 def list_datasets(self, force_refresh=False) -> DataPortalDatasets: 72 """List all the datasets available in the project.""" 73 if force_refresh: 74 self._get_datasets.cache_clear() 75 76 return DataPortalDatasets( 77 [ 78 DataPortalDataset(d, self._client) 79 for d in self._get_datasets() 80 ] 81 ) 82 83 def get_dataset_by_name(self, name: str, force_refresh=False) -> DataPortalDataset: 84 """Return the dataset with the specified name.""" 85 if force_refresh: 86 self._get_datasets.cache_clear() 87 88 dataset = next((d for d in self._get_datasets() if d.name == name), None) 89 if dataset is None: 90 raise DataPortalAssetNotFound(f'Dataset with name {name} not found') 91 return self.get_dataset_by_id(dataset.id) 92 93 def get_dataset_by_id(self, _id: str = None) -> DataPortalDataset: 94 """Return the dataset with the specified id.""" 95 96 dataset = self._client.datasets.get(project_id=self.id, dataset_id=_id) 97 if dataset is None: 98 raise DataPortalAssetNotFound(f'Dataset with ID {_id} not found') 99 return DataPortalDataset(dataset, self._client) 100 101 def list_references(self, reference_type: str = None) -> DataPortalReferences: 102 """ 103 List the references available in a project. 104 Optionally filter to references of a particular type (identified by name) 105 """ 106 107 # Get the complete list of references which are available 108 reference_types = DataPortalReferenceTypes( 109 [ 110 DataPortalReferenceType(ref) 111 for ref in self._client.references.get_types() 112 ] 113 ) 114 115 # If a particular name was specified 116 if reference_type is not None: 117 reference_types = reference_types.filter_by_pattern(reference_type) 118 if len(reference_types) == 0: 119 msg = f"Could not find any reference types with the name {reference_type}" 120 raise DataPortalAssetNotFound(msg) 121 122 return DataPortalReferences( 123 [ 124 DataPortalReference(ref, project_id=self.id, client=self._client) 125 for ref in self._client.references.get_for_project( 126 self.id 127 ) 128 if reference_type is None or ref.type == reference_type 129 ] 130 ) 131 132 def get_reference_by_name(self, name: str = None, ref_type: str = None) -> DataPortalReference: 133 """Return the reference of a particular type with the specified name.""" 134 135 if name is None: 136 raise DataPortalInputError("Must specify the reference name") 137 138 return self.list_references(ref_type).get_by_name(name) 139 140 def upload_dataset( 141 self, 142 name: str = None, 143 description='', 144 process: Union[DataPortalProcess, str] = None, 145 upload_folder: str = None, 146 files: list = None 147 ): 148 """ 149 Upload a set of files to the Data Portal, creating a new dataset. 150 151 If the files parameter is not provided, it will upload all files in the upload folder 152 153 Args: 154 name (str): Name of newly created dataset 155 description (str): Description of newly created dataset 156 process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object 157 upload_folder (str): Folder containing files to upload 158 files (List[str]): Optional subset of files to upload from the folder 159 """ 160 161 if name is None: 162 raise DataPortalInputError("Must provide name for new dataset") 163 if process is None: 164 raise DataPortalInputError("Must provide the process which is used for ingest") 165 if upload_folder is None: 166 raise DataPortalInputError("Must provide upload_folder -- folder containing files to upload") 167 168 # Parse the process provided by the user 169 process = parse_process_name_or_id(process, self._client) 170 171 # If no files were provided 172 if files is None: 173 # Get the list of files in the upload folder 174 files = get_files_in_directory(upload_folder) 175 176 if files is None or len(files) == 0: 177 raise RuntimeWarning("No files to upload, exiting") 178 179 # Make sure that the files match the expected pattern 180 self._client.processes.check_dataset_files(files, process.id, upload_folder) 181 182 # Create the ingest process request 183 dataset_create_request = UploadDatasetRequest( 184 process_id=process.id, 185 name=name, 186 description=description, 187 expected_files=files 188 ) 189 190 # Get the response 191 create_response = self._client.datasets.create(project_id=self.id, 192 upload_request=dataset_create_request) 193 194 # Upload the files 195 self._client.datasets.upload_files( 196 project_id=self.id, 197 dataset_id=create_response.id, 198 local_directory=upload_folder, 199 files=files 200 ) 201 202 # Return the dataset which was created, which might take a second to update 203 max_attempts = 5 204 for attempt in range(max_attempts): 205 try: 206 return self.get_dataset_by_id(create_response.id) 207 except DataPortalAssetNotFound as e: 208 if attempt == max_attempts - 1: 209 raise e 210 else: 211 sleep(2) 212 213 214class DataPortalProjects(DataPortalAssets[DataPortalProject]): 215 """Collection of DataPortalProject objects""" 216 asset_name = "project"
19class DataPortalProject(DataPortalAsset): 20 """ 21 Projects in the Data Portal contain collections of Datasets. 22 Users are granted permissions at the project-level, allowing them 23 to view and/or modify all the datasets in that collection. 24 """ 25 def __init__(self, proj: Project, client: CirroApi): 26 """ 27 Instantiate with helper method 28 29 ```python 30 from cirro import DataPortal() 31 portal = DataPortal() 32 project = portal.get_project_by_name("Project Name") 33 ``` 34 35 """ 36 self._data = proj 37 self._client = client 38 39 @property 40 def id(self) -> str: 41 """ 42 Unique identifier 43 """ 44 return self._data.id 45 46 @property 47 def name(self) -> str: 48 """ 49 Readable name 50 """ 51 return self._data.name 52 53 @property 54 def description(self) -> str: 55 """ 56 Longer description of the project 57 """ 58 return self._data.description 59 60 def __str__(self): 61 """Control how the Project is rendered as a string.""" 62 63 return '\n'.join([ 64 f"{i.title()}: {self.__getattribute__(i)}" 65 for i in ['name', 'id', 'description'] 66 ]) 67 68 @cache 69 def _get_datasets(self) -> List[Dataset]: 70 return self._client.datasets.list(self.id) 71 72 def list_datasets(self, force_refresh=False) -> DataPortalDatasets: 73 """List all the datasets available in the project.""" 74 if force_refresh: 75 self._get_datasets.cache_clear() 76 77 return DataPortalDatasets( 78 [ 79 DataPortalDataset(d, self._client) 80 for d in self._get_datasets() 81 ] 82 ) 83 84 def get_dataset_by_name(self, name: str, force_refresh=False) -> DataPortalDataset: 85 """Return the dataset with the specified name.""" 86 if force_refresh: 87 self._get_datasets.cache_clear() 88 89 dataset = next((d for d in self._get_datasets() if d.name == name), None) 90 if dataset is None: 91 raise DataPortalAssetNotFound(f'Dataset with name {name} not found') 92 return self.get_dataset_by_id(dataset.id) 93 94 def get_dataset_by_id(self, _id: str = None) -> DataPortalDataset: 95 """Return the dataset with the specified id.""" 96 97 dataset = self._client.datasets.get(project_id=self.id, dataset_id=_id) 98 if dataset is None: 99 raise DataPortalAssetNotFound(f'Dataset with ID {_id} not found') 100 return DataPortalDataset(dataset, self._client) 101 102 def list_references(self, reference_type: str = None) -> DataPortalReferences: 103 """ 104 List the references available in a project. 105 Optionally filter to references of a particular type (identified by name) 106 """ 107 108 # Get the complete list of references which are available 109 reference_types = DataPortalReferenceTypes( 110 [ 111 DataPortalReferenceType(ref) 112 for ref in self._client.references.get_types() 113 ] 114 ) 115 116 # If a particular name was specified 117 if reference_type is not None: 118 reference_types = reference_types.filter_by_pattern(reference_type) 119 if len(reference_types) == 0: 120 msg = f"Could not find any reference types with the name {reference_type}" 121 raise DataPortalAssetNotFound(msg) 122 123 return DataPortalReferences( 124 [ 125 DataPortalReference(ref, project_id=self.id, client=self._client) 126 for ref in self._client.references.get_for_project( 127 self.id 128 ) 129 if reference_type is None or ref.type == reference_type 130 ] 131 ) 132 133 def get_reference_by_name(self, name: str = None, ref_type: str = None) -> DataPortalReference: 134 """Return the reference of a particular type with the specified name.""" 135 136 if name is None: 137 raise DataPortalInputError("Must specify the reference name") 138 139 return self.list_references(ref_type).get_by_name(name) 140 141 def upload_dataset( 142 self, 143 name: str = None, 144 description='', 145 process: Union[DataPortalProcess, str] = None, 146 upload_folder: str = None, 147 files: list = None 148 ): 149 """ 150 Upload a set of files to the Data Portal, creating a new dataset. 151 152 If the files parameter is not provided, it will upload all files in the upload folder 153 154 Args: 155 name (str): Name of newly created dataset 156 description (str): Description of newly created dataset 157 process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object 158 upload_folder (str): Folder containing files to upload 159 files (List[str]): Optional subset of files to upload from the folder 160 """ 161 162 if name is None: 163 raise DataPortalInputError("Must provide name for new dataset") 164 if process is None: 165 raise DataPortalInputError("Must provide the process which is used for ingest") 166 if upload_folder is None: 167 raise DataPortalInputError("Must provide upload_folder -- folder containing files to upload") 168 169 # Parse the process provided by the user 170 process = parse_process_name_or_id(process, self._client) 171 172 # If no files were provided 173 if files is None: 174 # Get the list of files in the upload folder 175 files = get_files_in_directory(upload_folder) 176 177 if files is None or len(files) == 0: 178 raise RuntimeWarning("No files to upload, exiting") 179 180 # Make sure that the files match the expected pattern 181 self._client.processes.check_dataset_files(files, process.id, upload_folder) 182 183 # Create the ingest process request 184 dataset_create_request = UploadDatasetRequest( 185 process_id=process.id, 186 name=name, 187 description=description, 188 expected_files=files 189 ) 190 191 # Get the response 192 create_response = self._client.datasets.create(project_id=self.id, 193 upload_request=dataset_create_request) 194 195 # Upload the files 196 self._client.datasets.upload_files( 197 project_id=self.id, 198 dataset_id=create_response.id, 199 local_directory=upload_folder, 200 files=files 201 ) 202 203 # Return the dataset which was created, which might take a second to update 204 max_attempts = 5 205 for attempt in range(max_attempts): 206 try: 207 return self.get_dataset_by_id(create_response.id) 208 except DataPortalAssetNotFound as e: 209 if attempt == max_attempts - 1: 210 raise e 211 else: 212 sleep(2)
Projects in the Data Portal contain collections of Datasets. Users are granted permissions at the project-level, allowing them to view and/or modify all the datasets in that collection.
25 def __init__(self, proj: Project, client: CirroApi): 26 """ 27 Instantiate with helper method 28 29 ```python 30 from cirro import DataPortal() 31 portal = DataPortal() 32 project = portal.get_project_by_name("Project Name") 33 ``` 34 35 """ 36 self._data = proj 37 self._client = client
Instantiate with helper method
from cirro import DataPortal()
portal = DataPortal()
project = portal.get_project_by_name("Project Name")
53 @property 54 def description(self) -> str: 55 """ 56 Longer description of the project 57 """ 58 return self._data.description
Longer description of the project
72 def list_datasets(self, force_refresh=False) -> DataPortalDatasets: 73 """List all the datasets available in the project.""" 74 if force_refresh: 75 self._get_datasets.cache_clear() 76 77 return DataPortalDatasets( 78 [ 79 DataPortalDataset(d, self._client) 80 for d in self._get_datasets() 81 ] 82 )
List all the datasets available in the project.
84 def get_dataset_by_name(self, name: str, force_refresh=False) -> DataPortalDataset: 85 """Return the dataset with the specified name.""" 86 if force_refresh: 87 self._get_datasets.cache_clear() 88 89 dataset = next((d for d in self._get_datasets() if d.name == name), None) 90 if dataset is None: 91 raise DataPortalAssetNotFound(f'Dataset with name {name} not found') 92 return self.get_dataset_by_id(dataset.id)
Return the dataset with the specified name.
94 def get_dataset_by_id(self, _id: str = None) -> DataPortalDataset: 95 """Return the dataset with the specified id.""" 96 97 dataset = self._client.datasets.get(project_id=self.id, dataset_id=_id) 98 if dataset is None: 99 raise DataPortalAssetNotFound(f'Dataset with ID {_id} not found') 100 return DataPortalDataset(dataset, self._client)
Return the dataset with the specified id.
102 def list_references(self, reference_type: str = None) -> DataPortalReferences: 103 """ 104 List the references available in a project. 105 Optionally filter to references of a particular type (identified by name) 106 """ 107 108 # Get the complete list of references which are available 109 reference_types = DataPortalReferenceTypes( 110 [ 111 DataPortalReferenceType(ref) 112 for ref in self._client.references.get_types() 113 ] 114 ) 115 116 # If a particular name was specified 117 if reference_type is not None: 118 reference_types = reference_types.filter_by_pattern(reference_type) 119 if len(reference_types) == 0: 120 msg = f"Could not find any reference types with the name {reference_type}" 121 raise DataPortalAssetNotFound(msg) 122 123 return DataPortalReferences( 124 [ 125 DataPortalReference(ref, project_id=self.id, client=self._client) 126 for ref in self._client.references.get_for_project( 127 self.id 128 ) 129 if reference_type is None or ref.type == reference_type 130 ] 131 )
List the references available in a project. Optionally filter to references of a particular type (identified by name)
133 def get_reference_by_name(self, name: str = None, ref_type: str = None) -> DataPortalReference: 134 """Return the reference of a particular type with the specified name.""" 135 136 if name is None: 137 raise DataPortalInputError("Must specify the reference name") 138 139 return self.list_references(ref_type).get_by_name(name)
Return the reference of a particular type with the specified name.
141 def upload_dataset( 142 self, 143 name: str = None, 144 description='', 145 process: Union[DataPortalProcess, str] = None, 146 upload_folder: str = None, 147 files: list = None 148 ): 149 """ 150 Upload a set of files to the Data Portal, creating a new dataset. 151 152 If the files parameter is not provided, it will upload all files in the upload folder 153 154 Args: 155 name (str): Name of newly created dataset 156 description (str): Description of newly created dataset 157 process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object 158 upload_folder (str): Folder containing files to upload 159 files (List[str]): Optional subset of files to upload from the folder 160 """ 161 162 if name is None: 163 raise DataPortalInputError("Must provide name for new dataset") 164 if process is None: 165 raise DataPortalInputError("Must provide the process which is used for ingest") 166 if upload_folder is None: 167 raise DataPortalInputError("Must provide upload_folder -- folder containing files to upload") 168 169 # Parse the process provided by the user 170 process = parse_process_name_or_id(process, self._client) 171 172 # If no files were provided 173 if files is None: 174 # Get the list of files in the upload folder 175 files = get_files_in_directory(upload_folder) 176 177 if files is None or len(files) == 0: 178 raise RuntimeWarning("No files to upload, exiting") 179 180 # Make sure that the files match the expected pattern 181 self._client.processes.check_dataset_files(files, process.id, upload_folder) 182 183 # Create the ingest process request 184 dataset_create_request = UploadDatasetRequest( 185 process_id=process.id, 186 name=name, 187 description=description, 188 expected_files=files 189 ) 190 191 # Get the response 192 create_response = self._client.datasets.create(project_id=self.id, 193 upload_request=dataset_create_request) 194 195 # Upload the files 196 self._client.datasets.upload_files( 197 project_id=self.id, 198 dataset_id=create_response.id, 199 local_directory=upload_folder, 200 files=files 201 ) 202 203 # Return the dataset which was created, which might take a second to update 204 max_attempts = 5 205 for attempt in range(max_attempts): 206 try: 207 return self.get_dataset_by_id(create_response.id) 208 except DataPortalAssetNotFound as e: 209 if attempt == max_attempts - 1: 210 raise e 211 else: 212 sleep(2)
Upload a set of files to the Data Portal, creating a new dataset.
If the files parameter is not provided, it will upload all files in the upload folder
Arguments:
- name (str): Name of newly created dataset
- description (str): Description of newly created dataset
- process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object
- upload_folder (str): Folder containing files to upload
- files (List[str]): Optional subset of files to upload from the folder
215class DataPortalProjects(DataPortalAssets[DataPortalProject]): 216 """Collection of DataPortalProject objects""" 217 asset_name = "project"
Collection of DataPortalProject objects
Inherited Members
- cirro.sdk.asset.DataPortalAssets
- DataPortalAssets
- description
- get_by_name
- get_by_id
- filter_by_pattern
- builtins.list
- clear
- copy
- append
- insert
- extend
- pop
- remove
- index
- count
- reverse
- sort