cirro
1import cirro.file_utils # noqa 2from cirro.cirro_client import CirroApi 3from cirro.sdk.dataset import DataPortalDataset 4from cirro.sdk.login import DataPortalLogin 5from cirro.sdk.portal import DataPortal 6from cirro.sdk.process import DataPortalProcess 7from cirro.sdk.project import DataPortalProject 8from cirro.sdk.reference import DataPortalReference 9 10__all__ = [ 11 'DataPortal', 12 'DataPortalLogin', 13 'DataPortalProject', 14 'DataPortalProcess', 15 'DataPortalDataset', 16 'DataPortalReference', 17 'CirroApi', 18 'file_utils' 19]
13class DataPortal: 14 """ 15 Helper functions for exploring the Projects, Datasets, Samples, and Files 16 available in the Data Portal. 17 """ 18 19 def __init__(self, base_url: str = None, client: CirroApi = None): 20 """ 21 Set up the DataPortal object, establishing an authenticated connection. 22 23 Args: 24 base_url (str): Optional base URL of the Cirro instance 25 (if not provided, it uses the `CIRRO_BASE_URL` environment variable, or the config file) 26 client (`cirro.cirro_client.CirroApi`): Optional pre-configured client 27 28 Example: 29 ```python 30 from cirro import DataPortal 31 32 Portal = DataPortal(base_url="app.cirro.bio") 33 portal.list_projects() 34 ``` 35 """ 36 37 if client is not None: 38 self._client = client 39 40 # Set up default client if not provided 41 else: 42 self._client = CirroApi(base_url=base_url) 43 44 def list_projects(self) -> DataPortalProjects: 45 """List all the projects available in the Data Portal.""" 46 47 return DataPortalProjects( 48 [ 49 DataPortalProject(proj, self._client) 50 for proj in self._client.projects.list() 51 ] 52 ) 53 54 def get_project_by_name(self, name: str = None) -> DataPortalProject: 55 """Return the project with the specified name.""" 56 57 return self.list_projects().get_by_name(name) 58 59 def get_project_by_id(self, _id: str = None) -> DataPortalProject: 60 """Return the project with the specified id.""" 61 62 return self.list_projects().get_by_id(_id) 63 64 def get_project(self, project: str = None) -> DataPortalProject: 65 """ 66 Return a project identified by ID or name. 67 68 Args: 69 project (str): ID or name of project 70 71 Returns: 72 `from cirro.sdk.project import DataPortalProject` 73 """ 74 try: 75 return self.get_project_by_id(project) 76 except DataPortalAssetNotFound: 77 return self.get_project_by_name(project) 78 79 def get_dataset(self, project: str = None, dataset: str = None) -> DataPortalDataset: 80 """ 81 Return a dataset identified by ID or name. 82 83 Args: 84 project (str): ID or name of project 85 dataset (str): ID or name of dataset 86 87 Returns: 88 `cirro.sdk.dataset.DataPortalDataset` 89 90 ```python 91 from cirro import DataPortal() 92 portal = DataPortal() 93 dataset = portal.get_dataset( 94 project="id-or-name-of-project", 95 dataset="id-or-name-of-dataset" 96 ) 97 ``` 98 """ 99 try: 100 project: DataPortalProject = self.get_project_by_id(project) 101 except DataPortalAssetNotFound: 102 project: DataPortalProject = self.get_project_by_name(project) 103 104 try: 105 return project.get_dataset_by_id(dataset) 106 except DataPortalAssetNotFound: 107 return project.get_dataset_by_name(dataset) 108 109 def list_processes(self, ingest=False) -> DataPortalProcesses: 110 """ 111 List all the processes available in the Data Portal. 112 By default, only list non-ingest processes (those which can be run on existing datasets). 113 To list the processes which can be used to upload datasets, use `ingest = True`. 114 115 Args: 116 ingest (bool): If True, only list those processes which can be used to ingest datasets directly 117 """ 118 119 return DataPortalProcesses( 120 [ 121 DataPortalProcess(p, self._client) 122 for p in self._client.processes.list() 123 if not ingest or p.executor == Executor.INGEST 124 ] 125 ) 126 127 def get_process_by_name(self, name: str, ingest=False) -> DataPortalProcess: 128 """ 129 Return the process with the specified name. 130 131 Args: 132 name (str): Name of process 133 """ 134 135 return self.list_processes(ingest=ingest).get_by_name(name) 136 137 def get_process_by_id(self, id: str, ingest=False) -> DataPortalProcess: 138 """ 139 Return the process with the specified id 140 141 Args: 142 id (str): ID of process 143 """ 144 145 return self.list_processes(ingest=ingest).get_by_id(id) 146 147 def list_reference_types(self) -> DataPortalReferenceTypes: 148 """ 149 Return the list of all available reference types 150 """ 151 152 return DataPortalReferenceTypes( 153 [ 154 DataPortalReferenceType(ref) 155 for ref in self._client.references.get_types() 156 ] 157 ) 158 159 @property 160 def developer_helper(self) -> DeveloperHelper: 161 return DeveloperHelper(self._client)
Helper functions for exploring the Projects, Datasets, Samples, and Files available in the Data Portal.
19 def __init__(self, base_url: str = None, client: CirroApi = None): 20 """ 21 Set up the DataPortal object, establishing an authenticated connection. 22 23 Args: 24 base_url (str): Optional base URL of the Cirro instance 25 (if not provided, it uses the `CIRRO_BASE_URL` environment variable, or the config file) 26 client (`cirro.cirro_client.CirroApi`): Optional pre-configured client 27 28 Example: 29 ```python 30 from cirro import DataPortal 31 32 Portal = DataPortal(base_url="app.cirro.bio") 33 portal.list_projects() 34 ``` 35 """ 36 37 if client is not None: 38 self._client = client 39 40 # Set up default client if not provided 41 else: 42 self._client = CirroApi(base_url=base_url)
Set up the DataPortal object, establishing an authenticated connection.
Arguments:
- base_url (str): Optional base URL of the Cirro instance
(if not provided, it uses the
CIRRO_BASE_URL
environment variable, or the config file) - client (
cirro.cirro_client.CirroApi
): Optional pre-configured client
Example:
from cirro import DataPortal
Portal = DataPortal(base_url="app.cirro.bio")
portal.list_projects()
44 def list_projects(self) -> DataPortalProjects: 45 """List all the projects available in the Data Portal.""" 46 47 return DataPortalProjects( 48 [ 49 DataPortalProject(proj, self._client) 50 for proj in self._client.projects.list() 51 ] 52 )
List all the projects available in the Data Portal.
54 def get_project_by_name(self, name: str = None) -> DataPortalProject: 55 """Return the project with the specified name.""" 56 57 return self.list_projects().get_by_name(name)
Return the project with the specified name.
59 def get_project_by_id(self, _id: str = None) -> DataPortalProject: 60 """Return the project with the specified id.""" 61 62 return self.list_projects().get_by_id(_id)
Return the project with the specified id.
64 def get_project(self, project: str = None) -> DataPortalProject: 65 """ 66 Return a project identified by ID or name. 67 68 Args: 69 project (str): ID or name of project 70 71 Returns: 72 `from cirro.sdk.project import DataPortalProject` 73 """ 74 try: 75 return self.get_project_by_id(project) 76 except DataPortalAssetNotFound: 77 return self.get_project_by_name(project)
Return a project identified by ID or name.
Arguments:
- project (str): ID or name of project
Returns:
from cirro.sdk.project import DataPortalProject
79 def get_dataset(self, project: str = None, dataset: str = None) -> DataPortalDataset: 80 """ 81 Return a dataset identified by ID or name. 82 83 Args: 84 project (str): ID or name of project 85 dataset (str): ID or name of dataset 86 87 Returns: 88 `cirro.sdk.dataset.DataPortalDataset` 89 90 ```python 91 from cirro import DataPortal() 92 portal = DataPortal() 93 dataset = portal.get_dataset( 94 project="id-or-name-of-project", 95 dataset="id-or-name-of-dataset" 96 ) 97 ``` 98 """ 99 try: 100 project: DataPortalProject = self.get_project_by_id(project) 101 except DataPortalAssetNotFound: 102 project: DataPortalProject = self.get_project_by_name(project) 103 104 try: 105 return project.get_dataset_by_id(dataset) 106 except DataPortalAssetNotFound: 107 return project.get_dataset_by_name(dataset)
Return a dataset identified by ID or name.
Arguments:
- project (str): ID or name of project
- dataset (str): ID or name of dataset
Returns:
cirro.sdk.dataset.DataPortalDataset
from cirro import DataPortal() portal = DataPortal() dataset = portal.get_dataset( project="id-or-name-of-project", dataset="id-or-name-of-dataset" )
109 def list_processes(self, ingest=False) -> DataPortalProcesses: 110 """ 111 List all the processes available in the Data Portal. 112 By default, only list non-ingest processes (those which can be run on existing datasets). 113 To list the processes which can be used to upload datasets, use `ingest = True`. 114 115 Args: 116 ingest (bool): If True, only list those processes which can be used to ingest datasets directly 117 """ 118 119 return DataPortalProcesses( 120 [ 121 DataPortalProcess(p, self._client) 122 for p in self._client.processes.list() 123 if not ingest or p.executor == Executor.INGEST 124 ] 125 )
List all the processes available in the Data Portal.
By default, only list non-ingest processes (those which can be run on existing datasets).
To list the processes which can be used to upload datasets, use ingest = True
.
Arguments:
- ingest (bool): If True, only list those processes which can be used to ingest datasets directly
127 def get_process_by_name(self, name: str, ingest=False) -> DataPortalProcess: 128 """ 129 Return the process with the specified name. 130 131 Args: 132 name (str): Name of process 133 """ 134 135 return self.list_processes(ingest=ingest).get_by_name(name)
Return the process with the specified name.
Arguments:
- name (str): Name of process
137 def get_process_by_id(self, id: str, ingest=False) -> DataPortalProcess: 138 """ 139 Return the process with the specified id 140 141 Args: 142 id (str): ID of process 143 """ 144 145 return self.list_processes(ingest=ingest).get_by_id(id)
Return the process with the specified id
Arguments:
- id (str): ID of process
147 def list_reference_types(self) -> DataPortalReferenceTypes: 148 """ 149 Return the list of all available reference types 150 """ 151 152 return DataPortalReferenceTypes( 153 [ 154 DataPortalReferenceType(ref) 155 for ref in self._client.references.get_types() 156 ] 157 )
Return the list of all available reference types
8class DataPortalLogin: 9 """ 10 Start the login process, obtaining the authorization message from Cirro 11 needed to confirm the user identity. 12 13 Useful when you need to authenticate a user in a non-blocking way. 14 15 Usage: 16 17 ```python 18 # Replace app.cirro.bio as appropriate 19 login = DataPortalLogin(base_url="app.cirro.bio") 20 21 # Present the user with the authorization message 22 print(login.auth_message) 23 24 # Generate the authenticated DataPortal object, 25 # blocking until the user completes the login process in their browser 26 portal = login.await_completion() 27 ``` 28 """ 29 base_url: str 30 auth_info: DeviceCodeAuth 31 32 def __init__(self, base_url: str = None, enable_cache=False): 33 app_config = AppConfig(base_url=base_url) 34 35 self.base_url = base_url 36 37 self.auth_info = DeviceCodeAuth( 38 region=app_config.region, 39 client_id=app_config.client_id, 40 auth_endpoint=app_config.auth_endpoint, 41 enable_cache=enable_cache, 42 await_completion=False 43 ) 44 45 @property 46 def auth_message(self) -> str: 47 """Authorization message provided by Cirro.""" 48 return self.auth_info.auth_message 49 50 @property 51 def auth_message_markdown(self) -> str: 52 """Authorization message provided by Cirro (Markdown format).""" 53 return self.auth_info.auth_message_markdown 54 55 def await_completion(self) -> DataPortal: 56 """Complete the login process and return an authenticated client""" 57 58 # Block until the user completes the login flow 59 self.auth_info.await_completion() 60 61 # Set up the client object 62 cirro_client = CirroApi( 63 auth_info=self.auth_info, 64 base_url=self.base_url 65 ) 66 67 # Return the Data Portal object 68 return DataPortal(client=cirro_client)
Start the login process, obtaining the authorization message from Cirro needed to confirm the user identity.
Useful when you need to authenticate a user in a non-blocking way.
Usage:
# Replace app.cirro.bio as appropriate
login = DataPortalLogin(base_url="app.cirro.bio")
# Present the user with the authorization message
print(login.auth_message)
# Generate the authenticated DataPortal object,
# blocking until the user completes the login process in their browser
portal = login.await_completion()
32 def __init__(self, base_url: str = None, enable_cache=False): 33 app_config = AppConfig(base_url=base_url) 34 35 self.base_url = base_url 36 37 self.auth_info = DeviceCodeAuth( 38 region=app_config.region, 39 client_id=app_config.client_id, 40 auth_endpoint=app_config.auth_endpoint, 41 enable_cache=enable_cache, 42 await_completion=False 43 )
45 @property 46 def auth_message(self) -> str: 47 """Authorization message provided by Cirro.""" 48 return self.auth_info.auth_message
Authorization message provided by Cirro.
50 @property 51 def auth_message_markdown(self) -> str: 52 """Authorization message provided by Cirro (Markdown format).""" 53 return self.auth_info.auth_message_markdown
Authorization message provided by Cirro (Markdown format).
55 def await_completion(self) -> DataPortal: 56 """Complete the login process and return an authenticated client""" 57 58 # Block until the user completes the login flow 59 self.auth_info.await_completion() 60 61 # Set up the client object 62 cirro_client = CirroApi( 63 auth_info=self.auth_info, 64 base_url=self.base_url 65 ) 66 67 # Return the Data Portal object 68 return DataPortal(client=cirro_client)
Complete the login process and return an authenticated client
20class DataPortalProject(DataPortalAsset): 21 """ 22 Projects in the Data Portal contain collections of Datasets. 23 Users are granted permissions at the project-level, allowing them 24 to view and/or modify all the datasets in that collection. 25 """ 26 def __init__(self, proj: Project, client: CirroApi): 27 """ 28 Instantiate with helper method 29 30 ```python 31 from cirro import DataPortal() 32 portal = DataPortal() 33 project = portal.get_project_by_name("Project Name") 34 ``` 35 36 """ 37 self._data = proj 38 self._client = client 39 40 @property 41 def id(self) -> str: 42 """ 43 Unique identifier 44 """ 45 return self._data.id 46 47 @property 48 def name(self) -> str: 49 """ 50 Readable name 51 """ 52 return self._data.name 53 54 @property 55 def description(self) -> str: 56 """ 57 Longer description of the project 58 """ 59 return self._data.description 60 61 @property 62 def status(self) -> Status: 63 """ 64 Status of the project 65 """ 66 return self._data.status 67 68 def __str__(self): 69 """Control how the Project is rendered as a string.""" 70 71 return '\n'.join([ 72 f"{i.title()}: {self.__getattribute__(i)}" 73 for i in ['name', 'id', 'description'] 74 ]) 75 76 @cache 77 def _get_datasets(self) -> List[Dataset]: 78 return list_all_datasets(project_id=self.id, 79 client=self._client) 80 81 def list_datasets(self, force_refresh=False) -> DataPortalDatasets: 82 """List all the datasets available in the project.""" 83 if force_refresh: 84 self._get_datasets.cache_clear() 85 86 return DataPortalDatasets( 87 [ 88 DataPortalDataset(d, self._client) 89 for d in self._get_datasets() 90 ] 91 ) 92 93 def get_dataset_by_name(self, name: str, force_refresh=False) -> DataPortalDataset: 94 """Return the dataset with the specified name.""" 95 if force_refresh: 96 self._get_datasets.cache_clear() 97 98 dataset = next((d for d in self._get_datasets() if d.name == name), None) 99 if dataset is None: 100 raise DataPortalAssetNotFound(f'Dataset with name {name} not found') 101 return self.get_dataset_by_id(dataset.id) 102 103 def get_dataset_by_id(self, _id: str = None) -> DataPortalDataset: 104 """Return the dataset with the specified id.""" 105 106 dataset = self._client.datasets.get(project_id=self.id, dataset_id=_id) 107 if dataset is None: 108 raise DataPortalAssetNotFound(f'Dataset with ID {_id} not found') 109 return DataPortalDataset(dataset, self._client) 110 111 def list_references(self, reference_type: str = None) -> DataPortalReferences: 112 """ 113 List the references available in a project. 114 Optionally filter to references of a particular type (identified by name) 115 """ 116 117 # Get the complete list of references which are available 118 reference_types = DataPortalReferenceTypes( 119 [ 120 DataPortalReferenceType(ref) 121 for ref in self._client.references.get_types() 122 ] 123 ) 124 125 # If a particular name was specified 126 if reference_type is not None: 127 reference_types = reference_types.filter_by_pattern(reference_type) 128 if len(reference_types) == 0: 129 msg = f"Could not find any reference types with the name {reference_type}" 130 raise DataPortalAssetNotFound(msg) 131 132 return DataPortalReferences( 133 [ 134 DataPortalReference(ref, project_id=self.id, client=self._client) 135 for ref in self._client.references.get_for_project( 136 self.id 137 ) 138 if reference_type is None or ref.type == reference_type 139 ] 140 ) 141 142 def get_reference_by_name(self, name: str = None, ref_type: str = None) -> DataPortalReference: 143 """Return the reference of a particular type with the specified name.""" 144 145 if name is None: 146 raise DataPortalInputError("Must specify the reference name") 147 148 return self.list_references(ref_type).get_by_name(name) 149 150 def upload_dataset( 151 self, 152 name: str = None, 153 description='', 154 process: Union[DataPortalProcess, str] = None, 155 upload_folder: str = None, 156 files: List[str] = None, 157 tags: List[str] = None, 158 ): 159 """ 160 Upload a set of files to the Data Portal, creating a new dataset. 161 162 If the files parameter is not provided, it will upload all files in the upload folder 163 164 Args: 165 name (str): Name of newly created dataset 166 description (str): Description of newly created dataset 167 process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object 168 upload_folder (str): Folder containing files to upload 169 files (List[str]): Optional subset of files to upload from the folder 170 tags (List[str]): Optional list of tags to apply to the dataset 171 """ 172 173 if name is None: 174 raise DataPortalInputError("Must provide name for new dataset") 175 if process is None: 176 raise DataPortalInputError("Must provide the process which is used for ingest") 177 if upload_folder is None: 178 raise DataPortalInputError("Must provide upload_folder -- folder containing files to upload") 179 180 # Parse the process provided by the user 181 process = parse_process_name_or_id(process, self._client) 182 183 # If no files were provided 184 if files is None: 185 # Get the list of files in the upload folder 186 files = get_files_in_directory(upload_folder) 187 188 if files is None or len(files) == 0: 189 raise RuntimeWarning("No files to upload, exiting") 190 191 # Normalize into Tag object 192 if tags is not None: 193 tags = [Tag(value=value) for value in tags] 194 195 # Make sure that the files match the expected pattern 196 self._client.processes.check_dataset_files(files, process.id, upload_folder) 197 198 # Create the ingest process request 199 dataset_create_request = UploadDatasetRequest( 200 process_id=process.id, 201 name=name, 202 description=description, 203 expected_files=files, 204 tags=tags, 205 ) 206 207 # Get the response 208 create_response = self._client.datasets.create(project_id=self.id, 209 upload_request=dataset_create_request) 210 211 # Upload the files 212 self._client.datasets.upload_files( 213 project_id=self.id, 214 dataset_id=create_response.id, 215 directory=upload_folder, 216 files=files 217 ) 218 219 # Return the dataset which was created, which might take a second to update 220 max_attempts = 5 221 for attempt in range(max_attempts): 222 try: 223 return self.get_dataset_by_id(create_response.id) 224 except DataPortalAssetNotFound as e: 225 if attempt == max_attempts - 1: 226 raise e 227 else: 228 sleep(2) 229 230 def samples(self, max_items: int = 10000) -> List[Sample]: 231 """ 232 Retrieves a list of samples associated with a project along with their metadata 233 234 Args: 235 max_items (int): Maximum number of records to get (default 10,000) 236 """ 237 return self._client.metadata.get_project_samples(self.id, max_items)
Projects in the Data Portal contain collections of Datasets. Users are granted permissions at the project-level, allowing them to view and/or modify all the datasets in that collection.
26 def __init__(self, proj: Project, client: CirroApi): 27 """ 28 Instantiate with helper method 29 30 ```python 31 from cirro import DataPortal() 32 portal = DataPortal() 33 project = portal.get_project_by_name("Project Name") 34 ``` 35 36 """ 37 self._data = proj 38 self._client = client
Instantiate with helper method
from cirro import DataPortal()
portal = DataPortal()
project = portal.get_project_by_name("Project Name")
54 @property 55 def description(self) -> str: 56 """ 57 Longer description of the project 58 """ 59 return self._data.description
Longer description of the project
61 @property 62 def status(self) -> Status: 63 """ 64 Status of the project 65 """ 66 return self._data.status
Status of the project
81 def list_datasets(self, force_refresh=False) -> DataPortalDatasets: 82 """List all the datasets available in the project.""" 83 if force_refresh: 84 self._get_datasets.cache_clear() 85 86 return DataPortalDatasets( 87 [ 88 DataPortalDataset(d, self._client) 89 for d in self._get_datasets() 90 ] 91 )
List all the datasets available in the project.
93 def get_dataset_by_name(self, name: str, force_refresh=False) -> DataPortalDataset: 94 """Return the dataset with the specified name.""" 95 if force_refresh: 96 self._get_datasets.cache_clear() 97 98 dataset = next((d for d in self._get_datasets() if d.name == name), None) 99 if dataset is None: 100 raise DataPortalAssetNotFound(f'Dataset with name {name} not found') 101 return self.get_dataset_by_id(dataset.id)
Return the dataset with the specified name.
103 def get_dataset_by_id(self, _id: str = None) -> DataPortalDataset: 104 """Return the dataset with the specified id.""" 105 106 dataset = self._client.datasets.get(project_id=self.id, dataset_id=_id) 107 if dataset is None: 108 raise DataPortalAssetNotFound(f'Dataset with ID {_id} not found') 109 return DataPortalDataset(dataset, self._client)
Return the dataset with the specified id.
111 def list_references(self, reference_type: str = None) -> DataPortalReferences: 112 """ 113 List the references available in a project. 114 Optionally filter to references of a particular type (identified by name) 115 """ 116 117 # Get the complete list of references which are available 118 reference_types = DataPortalReferenceTypes( 119 [ 120 DataPortalReferenceType(ref) 121 for ref in self._client.references.get_types() 122 ] 123 ) 124 125 # If a particular name was specified 126 if reference_type is not None: 127 reference_types = reference_types.filter_by_pattern(reference_type) 128 if len(reference_types) == 0: 129 msg = f"Could not find any reference types with the name {reference_type}" 130 raise DataPortalAssetNotFound(msg) 131 132 return DataPortalReferences( 133 [ 134 DataPortalReference(ref, project_id=self.id, client=self._client) 135 for ref in self._client.references.get_for_project( 136 self.id 137 ) 138 if reference_type is None or ref.type == reference_type 139 ] 140 )
List the references available in a project. Optionally filter to references of a particular type (identified by name)
142 def get_reference_by_name(self, name: str = None, ref_type: str = None) -> DataPortalReference: 143 """Return the reference of a particular type with the specified name.""" 144 145 if name is None: 146 raise DataPortalInputError("Must specify the reference name") 147 148 return self.list_references(ref_type).get_by_name(name)
Return the reference of a particular type with the specified name.
150 def upload_dataset( 151 self, 152 name: str = None, 153 description='', 154 process: Union[DataPortalProcess, str] = None, 155 upload_folder: str = None, 156 files: List[str] = None, 157 tags: List[str] = None, 158 ): 159 """ 160 Upload a set of files to the Data Portal, creating a new dataset. 161 162 If the files parameter is not provided, it will upload all files in the upload folder 163 164 Args: 165 name (str): Name of newly created dataset 166 description (str): Description of newly created dataset 167 process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object 168 upload_folder (str): Folder containing files to upload 169 files (List[str]): Optional subset of files to upload from the folder 170 tags (List[str]): Optional list of tags to apply to the dataset 171 """ 172 173 if name is None: 174 raise DataPortalInputError("Must provide name for new dataset") 175 if process is None: 176 raise DataPortalInputError("Must provide the process which is used for ingest") 177 if upload_folder is None: 178 raise DataPortalInputError("Must provide upload_folder -- folder containing files to upload") 179 180 # Parse the process provided by the user 181 process = parse_process_name_or_id(process, self._client) 182 183 # If no files were provided 184 if files is None: 185 # Get the list of files in the upload folder 186 files = get_files_in_directory(upload_folder) 187 188 if files is None or len(files) == 0: 189 raise RuntimeWarning("No files to upload, exiting") 190 191 # Normalize into Tag object 192 if tags is not None: 193 tags = [Tag(value=value) for value in tags] 194 195 # Make sure that the files match the expected pattern 196 self._client.processes.check_dataset_files(files, process.id, upload_folder) 197 198 # Create the ingest process request 199 dataset_create_request = UploadDatasetRequest( 200 process_id=process.id, 201 name=name, 202 description=description, 203 expected_files=files, 204 tags=tags, 205 ) 206 207 # Get the response 208 create_response = self._client.datasets.create(project_id=self.id, 209 upload_request=dataset_create_request) 210 211 # Upload the files 212 self._client.datasets.upload_files( 213 project_id=self.id, 214 dataset_id=create_response.id, 215 directory=upload_folder, 216 files=files 217 ) 218 219 # Return the dataset which was created, which might take a second to update 220 max_attempts = 5 221 for attempt in range(max_attempts): 222 try: 223 return self.get_dataset_by_id(create_response.id) 224 except DataPortalAssetNotFound as e: 225 if attempt == max_attempts - 1: 226 raise e 227 else: 228 sleep(2)
Upload a set of files to the Data Portal, creating a new dataset.
If the files parameter is not provided, it will upload all files in the upload folder
Arguments:
- name (str): Name of newly created dataset
- description (str): Description of newly created dataset
- process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object
- upload_folder (str): Folder containing files to upload
- files (List[str]): Optional subset of files to upload from the folder
- tags (List[str]): Optional list of tags to apply to the dataset
230 def samples(self, max_items: int = 10000) -> List[Sample]: 231 """ 232 Retrieves a list of samples associated with a project along with their metadata 233 234 Args: 235 max_items (int): Maximum number of records to get (default 10,000) 236 """ 237 return self._client.metadata.get_project_samples(self.id, max_items)
Retrieves a list of samples associated with a project along with their metadata
Arguments:
- max_items (int): Maximum number of records to get (default 10,000)
11class DataPortalProcess(DataPortalAsset): 12 """Helper functions for interacting with analysis processes.""" 13 14 def __init__(self, process: Union[Process, ProcessDetail], client: CirroApi): 15 """ 16 Instantiate with helper method 17 18 ```python 19 from cirro import DataPortal() 20 portal = DataPortal() 21 process = portal.get_process_by_name("Process Name") 22 ``` 23 """ 24 self._data = process 25 self._client = client 26 27 @property 28 def id(self) -> str: 29 """Unique identifier""" 30 return self._data.id 31 32 @property 33 def name(self) -> str: 34 """Readable name""" 35 return self._data.name 36 37 @property 38 def description(self) -> str: 39 """Longer description of process""" 40 return self._data.description 41 42 @property 43 def child_process_ids(self) -> List[str]: 44 """List of processes which can be run on the output of this process""" 45 return self._data.child_process_ids 46 47 @property 48 def executor(self) -> Executor: 49 """INGEST, CROMWELL, or NEXTFLOW""" 50 return self._data.executor 51 52 @property 53 def category(self) -> str: 54 """Category of process""" 55 return self._data.category 56 57 @property 58 def pipeline_type(self) -> str: 59 """Pipeline type""" 60 return self._data.pipeline_type 61 62 @property 63 def documentation_url(self) -> str: 64 """Documentation URL""" 65 return self._data.documentation_url 66 67 @property 68 def file_requirements_message(self) -> str: 69 """Description of files required for INGEST processes""" 70 return self._data.file_requirements_message 71 72 @property 73 def code(self) -> PipelineCode: 74 """Pipeline code configuration""" 75 return self._get_detail().pipeline_code 76 77 @property 78 def custom_settings(self) -> CustomPipelineSettings: 79 """Custom settings for the process""" 80 return self._get_detail().custom_settings 81 82 def _get_detail(self) -> ProcessDetail: 83 if not isinstance(self._data, ProcessDetail): 84 self._data = self._client.processes.get(self.id) 85 return self._data 86 87 def __str__(self): 88 return '\n'.join([ 89 f"{i.title()}: {self.__getattribute__(i)}" 90 for i in ['name', 'id', 'description'] 91 ]) 92 93 def get_parameter_spec(self) -> ParameterSpecification: 94 """ 95 Gets a specification used to describe the parameters used in the process. 96 """ 97 return self._client.processes.get_parameter_spec(self.id)
Helper functions for interacting with analysis processes.
14 def __init__(self, process: Union[Process, ProcessDetail], client: CirroApi): 15 """ 16 Instantiate with helper method 17 18 ```python 19 from cirro import DataPortal() 20 portal = DataPortal() 21 process = portal.get_process_by_name("Process Name") 22 ``` 23 """ 24 self._data = process 25 self._client = client
Instantiate with helper method
from cirro import DataPortal()
portal = DataPortal()
process = portal.get_process_by_name("Process Name")
37 @property 38 def description(self) -> str: 39 """Longer description of process""" 40 return self._data.description
Longer description of process
42 @property 43 def child_process_ids(self) -> List[str]: 44 """List of processes which can be run on the output of this process""" 45 return self._data.child_process_ids
List of processes which can be run on the output of this process
47 @property 48 def executor(self) -> Executor: 49 """INGEST, CROMWELL, or NEXTFLOW""" 50 return self._data.executor
INGEST, CROMWELL, or NEXTFLOW
52 @property 53 def category(self) -> str: 54 """Category of process""" 55 return self._data.category
Category of process
57 @property 58 def pipeline_type(self) -> str: 59 """Pipeline type""" 60 return self._data.pipeline_type
Pipeline type
62 @property 63 def documentation_url(self) -> str: 64 """Documentation URL""" 65 return self._data.documentation_url
Documentation URL
67 @property 68 def file_requirements_message(self) -> str: 69 """Description of files required for INGEST processes""" 70 return self._data.file_requirements_message
Description of files required for INGEST processes
72 @property 73 def code(self) -> PipelineCode: 74 """Pipeline code configuration""" 75 return self._get_detail().pipeline_code
Pipeline code configuration
77 @property 78 def custom_settings(self) -> CustomPipelineSettings: 79 """Custom settings for the process""" 80 return self._get_detail().custom_settings
Custom settings for the process
18class DataPortalDataset(DataPortalAsset): 19 """ 20 Datasets in the Data Portal are collections of files which have 21 either been uploaded directly, or which have been output by 22 an analysis pipeline or notebook. 23 """ 24 25 def __init__(self, dataset: Union[Dataset, DatasetDetail], client: CirroApi): 26 """ 27 Instantiate a dataset object 28 29 Should be invoked from a top-level constructor, for example: 30 31 ```python 32 from cirro import DataPortal() 33 portal = DataPortal() 34 dataset = portal.get_dataset( 35 project="id-or-name-of-project", 36 dataset="id-or-name-of-dataset" 37 ) 38 ``` 39 40 """ 41 assert dataset.project_id is not None, "Must provide dataset with project_id attribute" 42 self._data = dataset 43 self._assets: Optional[DatasetAssets] = None 44 self._client = client 45 46 @property 47 def id(self) -> str: 48 """Unique identifier for the dataset""" 49 return self._data.id 50 51 @property 52 def name(self) -> str: 53 """Editable name for the dataset""" 54 return self._data.name 55 56 @property 57 def description(self) -> str: 58 """Longer name for the dataset""" 59 return self._data.description 60 61 @property 62 def process_id(self) -> str: 63 """Unique ID of process used to create the dataset""" 64 return self._data.process_id 65 66 @property 67 def process(self) -> ProcessDetail: 68 """ 69 Object representing the process used to create the dataset 70 """ 71 return self._client.processes.get(self.process_id) 72 73 @property 74 def project_id(self) -> str: 75 """ID of the project containing the dataset""" 76 return self._data.project_id 77 78 @property 79 def status(self) -> Status: 80 """ 81 Status of the dataset 82 """ 83 return self._data.status 84 85 @property 86 def source_dataset_ids(self) -> List[str]: 87 """IDs of the datasets used as sources for this dataset (if any)""" 88 return self._data.source_dataset_ids 89 90 @property 91 def source_datasets(self) -> List['DataPortalDataset']: 92 """ 93 Objects representing the datasets used as sources for this dataset (if any) 94 """ 95 return [ 96 DataPortalDataset( 97 dataset=self._client.datasets.get(project_id=self.project_id, dataset_id=dataset_id), 98 client=self._client 99 ) 100 for dataset_id in self.source_dataset_ids 101 ] 102 103 @property 104 def params(self) -> dict: 105 """ 106 Parameters used to generate the dataset 107 """ 108 return self._get_detail().params.to_dict() 109 110 @property 111 def info(self) -> dict: 112 """ 113 Extra information about the dataset 114 """ 115 return self._get_detail().info.to_dict() 116 117 @property 118 def tags(self) -> List[Tag]: 119 """ 120 Tags applied to the dataset 121 """ 122 return self._data.tags 123 124 @property 125 def share(self) -> Optional[NamedItem]: 126 """ 127 Share associated with the dataset, if any. 128 """ 129 return self._get_detail().share 130 131 @property 132 def created_by(self) -> str: 133 """User who created the dataset""" 134 return self._data.created_by 135 136 @property 137 def created_at(self) -> datetime.datetime: 138 """Timestamp of dataset creation""" 139 return self._data.created_at 140 141 def _get_detail(self): 142 if not isinstance(self._data, DatasetDetail): 143 self._data = self._client.datasets.get(project_id=self.project_id, dataset_id=self.id) 144 return self._data 145 146 def _get_assets(self): 147 if not self._assets: 148 self._assets = self._client.datasets.get_assets_listing( 149 project_id=self.project_id, 150 dataset_id=self.id 151 ) 152 return self._assets 153 154 def __str__(self): 155 return '\n'.join([ 156 f"{i.title()}: {self.__getattribute__(i)}" 157 for i in ['name', 'id', 'description', 'status'] 158 ]) 159 160 def get_file(self, relative_path: str) -> DataPortalFile: 161 """ 162 Get a file from the dataset using its relative path. 163 164 Args: 165 relative_path (str): Relative path of file within the dataset 166 167 Returns: 168 `from cirro.sdk.file import DataPortalFile` 169 """ 170 171 # Get the list of files in this dataset 172 files = self.list_files() 173 174 # Try getting the file using the relative path provided by the user 175 try: 176 return files.get_by_id(relative_path) 177 except DataPortalAssetNotFound: 178 # Try getting the file with the 'data/' prefix prepended 179 try: 180 return files.get_by_id("data/" + relative_path) 181 except DataPortalAssetNotFound: 182 # If not found, raise the exception using the string provided 183 # by the user, not the data/ prepended version (which may be 184 # confusing to the user) 185 msg = '\n'.join([f"No file found with path '{relative_path}'."]) 186 raise DataPortalAssetNotFound(msg) 187 188 def list_files(self) -> DataPortalFiles: 189 """ 190 Return the list of files which make up the dataset. 191 """ 192 files = self._get_assets().files 193 return DataPortalFiles( 194 [ 195 DataPortalFile(file=file, client=self._client) 196 for file in files 197 ] 198 ) 199 200 def get_artifact(self, artifact_type: ArtifactType) -> DataPortalFile: 201 """ 202 Get the artifact of a particular type from the dataset 203 """ 204 artifacts = self._get_assets().artifacts 205 artifact = next((a for a in artifacts if a.artifact_type == artifact_type), None) 206 if artifact is None: 207 raise DataPortalAssetNotFound(f"No artifact found with type '{artifact_type}'") 208 return DataPortalFile(file=artifact.file, client=self._client) 209 210 def list_artifacts(self) -> List[DataPortalFile]: 211 """ 212 Return the list of artifacts associated with the dataset 213 214 An artifact may be something generated as part of the analysis or other process. 215 See `cirro_api_client.v1.models.ArtifactType` for the list of possible artifact types. 216 217 """ 218 artifacts = self._get_assets().artifacts 219 return DataPortalFiles( 220 [ 221 DataPortalFile(file=artifact.file, client=self._client) 222 for artifact in artifacts 223 ] 224 ) 225 226 def download_files(self, download_location: str = None) -> None: 227 """ 228 Download all the files from the dataset to a local directory. 229 230 Args: 231 download_location (str): Path to local directory 232 """ 233 234 # Alias for internal method 235 self.list_files().download(download_location) 236 237 def run_analysis( 238 self, 239 name: str = None, 240 description: str = "", 241 process: Union[DataPortalProcess, str] = None, 242 params=None, 243 notifications_emails: List[str] = None, 244 compute_environment: str = None, 245 resume_dataset_id: str = None 246 ) -> str: 247 """ 248 Runs an analysis on a dataset, returns the ID of the newly created dataset. 249 250 The process can be provided as either a DataPortalProcess object, 251 or a string which corresponds to the name or ID of the process. 252 253 Args: 254 name (str): Name of newly created dataset 255 description (str): Description of newly created dataset 256 process (DataPortalProcess or str): Process to run 257 params (dict): Analysis parameters 258 notifications_emails (List[str]): Notification email address(es) 259 compute_environment (str): Name or ID of compute environment to use, 260 if blank it will run in AWS 261 resume_dataset_id (str): ID of dataset to resume from, used for caching task execution. 262 It will attempt to re-use the previous output to minimize duplicate work 263 264 Returns: 265 dataset_id (str): ID of newly created dataset 266 """ 267 if name is None: 268 raise DataPortalInputError("Must specify 'name' for run_analysis") 269 if process is None: 270 raise DataPortalInputError("Must specify 'process' for run_analysis") 271 if notifications_emails is None: 272 notifications_emails = [] 273 if params is None: 274 params = {} 275 276 # If the process is a string, try to parse it as a process name or ID 277 process = parse_process_name_or_id(process, self._client) 278 279 if compute_environment: 280 compute_environments = self._client.compute_environments.list_environments_for_project( 281 project_id=self.project_id 282 ) 283 compute_environment = next( 284 (env for env in compute_environments 285 if env.name == compute_environment or env.id == compute_environment), 286 None 287 ) 288 if compute_environment is None: 289 raise DataPortalInputError(f"Compute environment '{compute_environment}' not found") 290 291 resp = self._client.execution.run_analysis( 292 project_id=self.project_id, 293 request=RunAnalysisRequest( 294 name=name, 295 description=description, 296 process_id=process.id, 297 source_dataset_ids=[self.id], 298 params=RunAnalysisRequestParams.from_dict(params), 299 notification_emails=notifications_emails, 300 resume_dataset_id=resume_dataset_id, 301 compute_environment_id=compute_environment.id if compute_environment else None 302 ) 303 ) 304 return resp.id
Datasets in the Data Portal are collections of files which have either been uploaded directly, or which have been output by an analysis pipeline or notebook.
25 def __init__(self, dataset: Union[Dataset, DatasetDetail], client: CirroApi): 26 """ 27 Instantiate a dataset object 28 29 Should be invoked from a top-level constructor, for example: 30 31 ```python 32 from cirro import DataPortal() 33 portal = DataPortal() 34 dataset = portal.get_dataset( 35 project="id-or-name-of-project", 36 dataset="id-or-name-of-dataset" 37 ) 38 ``` 39 40 """ 41 assert dataset.project_id is not None, "Must provide dataset with project_id attribute" 42 self._data = dataset 43 self._assets: Optional[DatasetAssets] = None 44 self._client = client
Instantiate a dataset object
Should be invoked from a top-level constructor, for example:
from cirro import DataPortal()
portal = DataPortal()
dataset = portal.get_dataset(
project="id-or-name-of-project",
dataset="id-or-name-of-dataset"
)
46 @property 47 def id(self) -> str: 48 """Unique identifier for the dataset""" 49 return self._data.id
Unique identifier for the dataset
51 @property 52 def name(self) -> str: 53 """Editable name for the dataset""" 54 return self._data.name
Editable name for the dataset
56 @property 57 def description(self) -> str: 58 """Longer name for the dataset""" 59 return self._data.description
Longer name for the dataset
61 @property 62 def process_id(self) -> str: 63 """Unique ID of process used to create the dataset""" 64 return self._data.process_id
Unique ID of process used to create the dataset
66 @property 67 def process(self) -> ProcessDetail: 68 """ 69 Object representing the process used to create the dataset 70 """ 71 return self._client.processes.get(self.process_id)
Object representing the process used to create the dataset
73 @property 74 def project_id(self) -> str: 75 """ID of the project containing the dataset""" 76 return self._data.project_id
ID of the project containing the dataset
78 @property 79 def status(self) -> Status: 80 """ 81 Status of the dataset 82 """ 83 return self._data.status
Status of the dataset
85 @property 86 def source_dataset_ids(self) -> List[str]: 87 """IDs of the datasets used as sources for this dataset (if any)""" 88 return self._data.source_dataset_ids
IDs of the datasets used as sources for this dataset (if any)
90 @property 91 def source_datasets(self) -> List['DataPortalDataset']: 92 """ 93 Objects representing the datasets used as sources for this dataset (if any) 94 """ 95 return [ 96 DataPortalDataset( 97 dataset=self._client.datasets.get(project_id=self.project_id, dataset_id=dataset_id), 98 client=self._client 99 ) 100 for dataset_id in self.source_dataset_ids 101 ]
Objects representing the datasets used as sources for this dataset (if any)
103 @property 104 def params(self) -> dict: 105 """ 106 Parameters used to generate the dataset 107 """ 108 return self._get_detail().params.to_dict()
Parameters used to generate the dataset
110 @property 111 def info(self) -> dict: 112 """ 113 Extra information about the dataset 114 """ 115 return self._get_detail().info.to_dict()
Extra information about the dataset
131 @property 132 def created_by(self) -> str: 133 """User who created the dataset""" 134 return self._data.created_by
User who created the dataset
136 @property 137 def created_at(self) -> datetime.datetime: 138 """Timestamp of dataset creation""" 139 return self._data.created_at
Timestamp of dataset creation
160 def get_file(self, relative_path: str) -> DataPortalFile: 161 """ 162 Get a file from the dataset using its relative path. 163 164 Args: 165 relative_path (str): Relative path of file within the dataset 166 167 Returns: 168 `from cirro.sdk.file import DataPortalFile` 169 """ 170 171 # Get the list of files in this dataset 172 files = self.list_files() 173 174 # Try getting the file using the relative path provided by the user 175 try: 176 return files.get_by_id(relative_path) 177 except DataPortalAssetNotFound: 178 # Try getting the file with the 'data/' prefix prepended 179 try: 180 return files.get_by_id("data/" + relative_path) 181 except DataPortalAssetNotFound: 182 # If not found, raise the exception using the string provided 183 # by the user, not the data/ prepended version (which may be 184 # confusing to the user) 185 msg = '\n'.join([f"No file found with path '{relative_path}'."]) 186 raise DataPortalAssetNotFound(msg)
Get a file from the dataset using its relative path.
Arguments:
- relative_path (str): Relative path of file within the dataset
Returns:
from cirro.sdk.file import DataPortalFile
188 def list_files(self) -> DataPortalFiles: 189 """ 190 Return the list of files which make up the dataset. 191 """ 192 files = self._get_assets().files 193 return DataPortalFiles( 194 [ 195 DataPortalFile(file=file, client=self._client) 196 for file in files 197 ] 198 )
Return the list of files which make up the dataset.
200 def get_artifact(self, artifact_type: ArtifactType) -> DataPortalFile: 201 """ 202 Get the artifact of a particular type from the dataset 203 """ 204 artifacts = self._get_assets().artifacts 205 artifact = next((a for a in artifacts if a.artifact_type == artifact_type), None) 206 if artifact is None: 207 raise DataPortalAssetNotFound(f"No artifact found with type '{artifact_type}'") 208 return DataPortalFile(file=artifact.file, client=self._client)
Get the artifact of a particular type from the dataset
210 def list_artifacts(self) -> List[DataPortalFile]: 211 """ 212 Return the list of artifacts associated with the dataset 213 214 An artifact may be something generated as part of the analysis or other process. 215 See `cirro_api_client.v1.models.ArtifactType` for the list of possible artifact types. 216 217 """ 218 artifacts = self._get_assets().artifacts 219 return DataPortalFiles( 220 [ 221 DataPortalFile(file=artifact.file, client=self._client) 222 for artifact in artifacts 223 ] 224 )
Return the list of artifacts associated with the dataset
An artifact may be something generated as part of the analysis or other process.
See cirro_api_client.v1.models.ArtifactType
for the list of possible artifact types.
226 def download_files(self, download_location: str = None) -> None: 227 """ 228 Download all the files from the dataset to a local directory. 229 230 Args: 231 download_location (str): Path to local directory 232 """ 233 234 # Alias for internal method 235 self.list_files().download(download_location)
Download all the files from the dataset to a local directory.
Arguments:
- download_location (str): Path to local directory
237 def run_analysis( 238 self, 239 name: str = None, 240 description: str = "", 241 process: Union[DataPortalProcess, str] = None, 242 params=None, 243 notifications_emails: List[str] = None, 244 compute_environment: str = None, 245 resume_dataset_id: str = None 246 ) -> str: 247 """ 248 Runs an analysis on a dataset, returns the ID of the newly created dataset. 249 250 The process can be provided as either a DataPortalProcess object, 251 or a string which corresponds to the name or ID of the process. 252 253 Args: 254 name (str): Name of newly created dataset 255 description (str): Description of newly created dataset 256 process (DataPortalProcess or str): Process to run 257 params (dict): Analysis parameters 258 notifications_emails (List[str]): Notification email address(es) 259 compute_environment (str): Name or ID of compute environment to use, 260 if blank it will run in AWS 261 resume_dataset_id (str): ID of dataset to resume from, used for caching task execution. 262 It will attempt to re-use the previous output to minimize duplicate work 263 264 Returns: 265 dataset_id (str): ID of newly created dataset 266 """ 267 if name is None: 268 raise DataPortalInputError("Must specify 'name' for run_analysis") 269 if process is None: 270 raise DataPortalInputError("Must specify 'process' for run_analysis") 271 if notifications_emails is None: 272 notifications_emails = [] 273 if params is None: 274 params = {} 275 276 # If the process is a string, try to parse it as a process name or ID 277 process = parse_process_name_or_id(process, self._client) 278 279 if compute_environment: 280 compute_environments = self._client.compute_environments.list_environments_for_project( 281 project_id=self.project_id 282 ) 283 compute_environment = next( 284 (env for env in compute_environments 285 if env.name == compute_environment or env.id == compute_environment), 286 None 287 ) 288 if compute_environment is None: 289 raise DataPortalInputError(f"Compute environment '{compute_environment}' not found") 290 291 resp = self._client.execution.run_analysis( 292 project_id=self.project_id, 293 request=RunAnalysisRequest( 294 name=name, 295 description=description, 296 process_id=process.id, 297 source_dataset_ids=[self.id], 298 params=RunAnalysisRequestParams.from_dict(params), 299 notification_emails=notifications_emails, 300 resume_dataset_id=resume_dataset_id, 301 compute_environment_id=compute_environment.id if compute_environment else None 302 ) 303 ) 304 return resp.id
Runs an analysis on a dataset, returns the ID of the newly created dataset.
The process can be provided as either a DataPortalProcess object, or a string which corresponds to the name or ID of the process.
Arguments:
- name (str): Name of newly created dataset
- description (str): Description of newly created dataset
- process (DataPortalProcess or str): Process to run
- params (dict): Analysis parameters
- notifications_emails (List[str]): Notification email address(es)
- compute_environment (str): Name or ID of compute environment to use, if blank it will run in AWS
- resume_dataset_id (str): ID of dataset to resume from, used for caching task execution. It will attempt to re-use the previous output to minimize duplicate work
Returns:
dataset_id (str): ID of newly created dataset
12class DataPortalReference(DataPortalAsset): 13 """ 14 Reference data object containing files which can be used for analysis in a particular project. 15 """ 16 def __init__(self, ref: Reference, project_id: str, client: CirroApi): 17 """ 18 Instantiate by listing the references which have been added to a particular project 19 ```python 20 from cirro import DataPortal() 21 portal = DataPortal() 22 project = portal.get_project_by_name("Project Name") 23 references = project.list_references() 24 ``` 25 """ 26 self._data = ref 27 self._files = [ 28 DataPortalFile(File.from_file_entry(f, project_id), client) for f in ref.files 29 ] 30 31 @property 32 def files(self) -> List[DataPortalFile]: 33 """File(s) contained in the reference""" 34 return self._files 35 36 @property 37 def name(self) -> str: 38 """Reference name""" 39 return self._data.name 40 41 @property 42 def type(self) -> str: 43 """Type of reference data (e.g. genome_fasta)""" 44 return self._data.type 45 46 @property 47 def absolute_path(self): 48 if len(self._files) == 0: 49 return None 50 return self._files[0].absolute_path 51 52 def __str__(self): 53 return self.name
Reference data object containing files which can be used for analysis in a particular project.
16 def __init__(self, ref: Reference, project_id: str, client: CirroApi): 17 """ 18 Instantiate by listing the references which have been added to a particular project 19 ```python 20 from cirro import DataPortal() 21 portal = DataPortal() 22 project = portal.get_project_by_name("Project Name") 23 references = project.list_references() 24 ``` 25 """ 26 self._data = ref 27 self._files = [ 28 DataPortalFile(File.from_file_entry(f, project_id), client) for f in ref.files 29 ]
Instantiate by listing the references which have been added to a particular project
from cirro import DataPortal()
portal = DataPortal()
project = portal.get_project_by_name("Project Name")
references = project.list_references()
31 @property 32 def files(self) -> List[DataPortalFile]: 33 """File(s) contained in the reference""" 34 return self._files
File(s) contained in the reference
12class CirroApi: 13 """ 14 Client for interacting directly with the Cirro API 15 """ 16 def __init__(self, auth_info: AuthInfo = None, base_url: str = None): 17 """ 18 Instantiates the Cirro API object 19 20 Args: 21 auth_info (cirro.auth.base.AuthInfo): 22 base_url (str): Optional base URL of the Cirro instance 23 (if not provided, it uses the `CIRRO_BASE_URL` environment variable, or the config file) 24 25 Returns: 26 Authenticated Cirro API object, which can be used to call endpoint functions. 27 28 Example: 29 ```python 30 from cirro.cirro_client import CirroApi 31 32 cirro = CirroApi(base_url="app.cirro.bio") 33 print(cirro.projects.list()) 34 ``` 35 """ 36 37 self._configuration = AppConfig(base_url=base_url) 38 if not auth_info: 39 auth_info = get_auth_info_from_config(self._configuration, auth_io=None) 40 41 self._api_client = CirroApiClient( 42 base_url=self._configuration.rest_endpoint, 43 auth_method=auth_info.get_auth_method(), 44 client_name='Cirro SDK', 45 package_name='cirro' 46 ) 47 48 # Init services 49 self._file_service = FileService(self._api_client, 50 checksum_method=self._configuration.checksum_method, 51 transfer_retries=self._configuration.transfer_max_retries) 52 self._dataset_service = DatasetService(self._api_client, file_service=self._file_service) 53 self._project_service = ProjectService(self._api_client) 54 self._process_service = ProcessService(self._api_client) 55 self._execution_service = ExecutionService(self._api_client) 56 self._compute_environment_service = ComputeEnvironmentService(self._api_client) 57 self._metrics_service = MetricsService(self._api_client) 58 self._metadata_service = MetadataService(self._api_client) 59 self._billing_service = BillingService(self._api_client) 60 self._references_service = ReferenceService(self._api_client, file_service=self._file_service) 61 self._shares_service = ShareService(self._api_client) 62 self._users_service = UserService(self._api_client) 63 64 @property 65 def datasets(self) -> DatasetService: 66 """ 67 Create, list, delete, and modify Datasets 68 """ 69 return self._dataset_service 70 71 @property 72 def projects(self) -> ProjectService: 73 """ 74 Create, list, delete, and modify Projects 75 """ 76 return self._project_service 77 78 @property 79 def processes(self) -> ProcessService: 80 """ 81 List and retrieve detailed information about Processes 82 """ 83 return self._process_service 84 85 @property 86 def execution(self) -> ExecutionService: 87 """ 88 List, run, stop, and describe the analysis jobs (executing Processes to create new Datasets) 89 """ 90 return self._execution_service 91 92 @property 93 def compute_environments(self) -> ComputeEnvironmentService: 94 """ 95 List and update compute environments 96 """ 97 return self._compute_environment_service 98 99 @property 100 def metrics(self) -> MetricsService: 101 """ 102 Project-level summary metrics 103 """ 104 return self._metrics_service 105 106 @property 107 def metadata(self) -> MetadataService: 108 """ 109 List and modify Sample metadata or metadata schemas 110 """ 111 return self._metadata_service 112 113 @property 114 def billing(self) -> BillingService: 115 """ 116 List and update billing accounts 117 """ 118 return self._billing_service 119 120 @property 121 def references(self) -> ReferenceService: 122 """ 123 List References and Reference types 124 """ 125 return self._references_service 126 127 @property 128 def shares(self) -> ShareService: 129 """ 130 List, create, update, delete, and subscribe to shares 131 """ 132 return self._shares_service 133 134 @property 135 def users(self) -> UserService: 136 """ 137 List and update user information 138 """ 139 return self._users_service 140 141 @property 142 def file(self) -> FileService: 143 """ 144 Read, download, and create file objects 145 """ 146 return self._file_service 147 148 @property 149 def api_client(self) -> CirroApiClient: 150 """ 151 Gets the underlying API client 152 """ 153 return self._api_client 154 155 @property 156 def configuration(self) -> AppConfig: 157 """ 158 Gets the configuration of the instance 159 """ 160 return self._configuration
Client for interacting directly with the Cirro API
16 def __init__(self, auth_info: AuthInfo = None, base_url: str = None): 17 """ 18 Instantiates the Cirro API object 19 20 Args: 21 auth_info (cirro.auth.base.AuthInfo): 22 base_url (str): Optional base URL of the Cirro instance 23 (if not provided, it uses the `CIRRO_BASE_URL` environment variable, or the config file) 24 25 Returns: 26 Authenticated Cirro API object, which can be used to call endpoint functions. 27 28 Example: 29 ```python 30 from cirro.cirro_client import CirroApi 31 32 cirro = CirroApi(base_url="app.cirro.bio") 33 print(cirro.projects.list()) 34 ``` 35 """ 36 37 self._configuration = AppConfig(base_url=base_url) 38 if not auth_info: 39 auth_info = get_auth_info_from_config(self._configuration, auth_io=None) 40 41 self._api_client = CirroApiClient( 42 base_url=self._configuration.rest_endpoint, 43 auth_method=auth_info.get_auth_method(), 44 client_name='Cirro SDK', 45 package_name='cirro' 46 ) 47 48 # Init services 49 self._file_service = FileService(self._api_client, 50 checksum_method=self._configuration.checksum_method, 51 transfer_retries=self._configuration.transfer_max_retries) 52 self._dataset_service = DatasetService(self._api_client, file_service=self._file_service) 53 self._project_service = ProjectService(self._api_client) 54 self._process_service = ProcessService(self._api_client) 55 self._execution_service = ExecutionService(self._api_client) 56 self._compute_environment_service = ComputeEnvironmentService(self._api_client) 57 self._metrics_service = MetricsService(self._api_client) 58 self._metadata_service = MetadataService(self._api_client) 59 self._billing_service = BillingService(self._api_client) 60 self._references_service = ReferenceService(self._api_client, file_service=self._file_service) 61 self._shares_service = ShareService(self._api_client) 62 self._users_service = UserService(self._api_client)
Instantiates the Cirro API object
Arguments:
- auth_info (cirro.auth.base.AuthInfo):
- base_url (str): Optional base URL of the Cirro instance
(if not provided, it uses the
CIRRO_BASE_URL
environment variable, or the config file)
Returns:
Authenticated Cirro API object, which can be used to call endpoint functions.
Example:
from cirro.cirro_client import CirroApi
cirro = CirroApi(base_url="app.cirro.bio")
print(cirro.projects.list())
64 @property 65 def datasets(self) -> DatasetService: 66 """ 67 Create, list, delete, and modify Datasets 68 """ 69 return self._dataset_service
Create, list, delete, and modify Datasets
71 @property 72 def projects(self) -> ProjectService: 73 """ 74 Create, list, delete, and modify Projects 75 """ 76 return self._project_service
Create, list, delete, and modify Projects
78 @property 79 def processes(self) -> ProcessService: 80 """ 81 List and retrieve detailed information about Processes 82 """ 83 return self._process_service
List and retrieve detailed information about Processes
85 @property 86 def execution(self) -> ExecutionService: 87 """ 88 List, run, stop, and describe the analysis jobs (executing Processes to create new Datasets) 89 """ 90 return self._execution_service
List, run, stop, and describe the analysis jobs (executing Processes to create new Datasets)
92 @property 93 def compute_environments(self) -> ComputeEnvironmentService: 94 """ 95 List and update compute environments 96 """ 97 return self._compute_environment_service
List and update compute environments
99 @property 100 def metrics(self) -> MetricsService: 101 """ 102 Project-level summary metrics 103 """ 104 return self._metrics_service
Project-level summary metrics
106 @property 107 def metadata(self) -> MetadataService: 108 """ 109 List and modify Sample metadata or metadata schemas 110 """ 111 return self._metadata_service
List and modify Sample metadata or metadata schemas
113 @property 114 def billing(self) -> BillingService: 115 """ 116 List and update billing accounts 117 """ 118 return self._billing_service
List and update billing accounts
120 @property 121 def references(self) -> ReferenceService: 122 """ 123 List References and Reference types 124 """ 125 return self._references_service
List References and Reference types
134 @property 135 def users(self) -> UserService: 136 """ 137 List and update user information 138 """ 139 return self._users_service
List and update user information
141 @property 142 def file(self) -> FileService: 143 """ 144 Read, download, and create file objects 145 """ 146 return self._file_service
Read, download, and create file objects