cirro
1import cirro.file_utils # noqa 2from cirro.cirro_client import CirroApi 3from cirro.sdk.dataset import DataPortalDataset 4from cirro.sdk.login import DataPortalLogin 5from cirro.sdk.portal import DataPortal 6from cirro.sdk.process import DataPortalProcess 7from cirro.sdk.project import DataPortalProject 8from cirro.sdk.reference import DataPortalReference 9 10__all__ = [ 11 'DataPortal', 12 'DataPortalLogin', 13 'DataPortalProject', 14 'DataPortalProcess', 15 'DataPortalDataset', 16 'DataPortalReference', 17 'CirroApi', 18 'file_utils' 19]
12class DataPortal: 13 """ 14 Helper functions for exploring the Projects, Datasets, Samples, and Files 15 available in the Data Portal. 16 """ 17 18 def __init__(self, base_url: str = None, client: CirroApi = None): 19 """ 20 Set up the DataPortal object, establishing an authenticated connection. 21 22 Args: 23 base_url (str): Optional base URL of the Cirro instance 24 (if not provided, it uses the `CIRRO_BASE_URL` environment variable, or the config file) 25 client (`cirro.cirro_client.CirroApi`): Optional pre-configured client 26 27 Example: 28 ```python 29 from cirro import DataPortal 30 31 Portal = DataPortal(base_url="app.cirro.bio") 32 portal.list_projects() 33 ``` 34 """ 35 36 if client is not None: 37 self._client = client 38 39 # Set up default client if not provided 40 else: 41 self._client = CirroApi(base_url=base_url) 42 43 def list_projects(self) -> DataPortalProjects: 44 """List all the projects available in the Data Portal.""" 45 46 return DataPortalProjects( 47 [ 48 DataPortalProject(proj, self._client) 49 for proj in self._client.projects.list() 50 ] 51 ) 52 53 def get_project_by_name(self, name: str = None) -> DataPortalProject: 54 """Return the project with the specified name.""" 55 56 return self.list_projects().get_by_name(name) 57 58 def get_project_by_id(self, _id: str = None) -> DataPortalProject: 59 """Return the project with the specified id.""" 60 61 return self.list_projects().get_by_id(_id) 62 63 def get_project(self, project: str = None) -> DataPortalProject: 64 """ 65 Return a project identified by ID or name. 66 67 Args: 68 project (str): ID or name of project 69 70 Returns: 71 `from cirro.sdk.project import DataPortalProject` 72 """ 73 try: 74 return self.get_project_by_id(project) 75 except DataPortalAssetNotFound: 76 return self.get_project_by_name(project) 77 78 def get_dataset(self, project: str = None, dataset: str = None) -> DataPortalDataset: 79 """ 80 Return a dataset identified by ID or name. 81 82 Args: 83 project (str): ID or name of project 84 dataset (str): ID or name of dataset 85 86 Returns: 87 `cirro.sdk.dataset.DataPortalDataset` 88 89 ```python 90 from cirro import DataPortal() 91 portal = DataPortal() 92 dataset = portal.get_dataset( 93 project="id-or-name-of-project", 94 dataset="id-or-name-of-dataset" 95 ) 96 ``` 97 """ 98 try: 99 project: DataPortalProject = self.get_project_by_id(project) 100 except DataPortalAssetNotFound: 101 project: DataPortalProject = self.get_project_by_name(project) 102 103 try: 104 return project.get_dataset_by_id(dataset) 105 except DataPortalAssetNotFound: 106 return project.get_dataset_by_name(dataset) 107 108 def list_processes(self, ingest=False) -> DataPortalProcesses: 109 """ 110 List all the processes available in the Data Portal. 111 By default, only list non-ingest processes (those which can be run on existing datasets). 112 To list the processes which can be used to upload datasets, use `ingest = True`. 113 114 Args: 115 ingest (bool): If True, only list those processes which can be used to ingest datasets directly 116 """ 117 118 return DataPortalProcesses( 119 [ 120 DataPortalProcess(p, self._client) 121 for p in self._client.processes.list() 122 if not ingest or p.executor == Executor.INGEST 123 ] 124 ) 125 126 def get_process_by_name(self, name: str, ingest=False) -> DataPortalProcess: 127 """ 128 Return the process with the specified name. 129 130 Args: 131 name (str): Name of process 132 """ 133 134 return self.list_processes(ingest=ingest).get_by_name(name) 135 136 def get_process_by_id(self, id: str, ingest=False) -> DataPortalProcess: 137 """ 138 Return the process with the specified id 139 140 Args: 141 id (str): ID of process 142 """ 143 144 return self.list_processes(ingest=ingest).get_by_id(id) 145 146 def list_reference_types(self) -> DataPortalReferenceTypes: 147 """ 148 Return the list of all available reference types 149 """ 150 151 return DataPortalReferenceTypes( 152 [ 153 DataPortalReferenceType(ref) 154 for ref in self._client.references.get_types() 155 ] 156 )
Helper functions for exploring the Projects, Datasets, Samples, and Files available in the Data Portal.
18 def __init__(self, base_url: str = None, client: CirroApi = None): 19 """ 20 Set up the DataPortal object, establishing an authenticated connection. 21 22 Args: 23 base_url (str): Optional base URL of the Cirro instance 24 (if not provided, it uses the `CIRRO_BASE_URL` environment variable, or the config file) 25 client (`cirro.cirro_client.CirroApi`): Optional pre-configured client 26 27 Example: 28 ```python 29 from cirro import DataPortal 30 31 Portal = DataPortal(base_url="app.cirro.bio") 32 portal.list_projects() 33 ``` 34 """ 35 36 if client is not None: 37 self._client = client 38 39 # Set up default client if not provided 40 else: 41 self._client = CirroApi(base_url=base_url)
Set up the DataPortal object, establishing an authenticated connection.
Arguments:
- base_url (str): Optional base URL of the Cirro instance
(if not provided, it uses the
CIRRO_BASE_URL
environment variable, or the config file) - client (
cirro.cirro_client.CirroApi
): Optional pre-configured client
Example:
from cirro import DataPortal
Portal = DataPortal(base_url="app.cirro.bio")
portal.list_projects()
43 def list_projects(self) -> DataPortalProjects: 44 """List all the projects available in the Data Portal.""" 45 46 return DataPortalProjects( 47 [ 48 DataPortalProject(proj, self._client) 49 for proj in self._client.projects.list() 50 ] 51 )
List all the projects available in the Data Portal.
53 def get_project_by_name(self, name: str = None) -> DataPortalProject: 54 """Return the project with the specified name.""" 55 56 return self.list_projects().get_by_name(name)
Return the project with the specified name.
58 def get_project_by_id(self, _id: str = None) -> DataPortalProject: 59 """Return the project with the specified id.""" 60 61 return self.list_projects().get_by_id(_id)
Return the project with the specified id.
63 def get_project(self, project: str = None) -> DataPortalProject: 64 """ 65 Return a project identified by ID or name. 66 67 Args: 68 project (str): ID or name of project 69 70 Returns: 71 `from cirro.sdk.project import DataPortalProject` 72 """ 73 try: 74 return self.get_project_by_id(project) 75 except DataPortalAssetNotFound: 76 return self.get_project_by_name(project)
Return a project identified by ID or name.
Arguments:
- project (str): ID or name of project
Returns:
from cirro.sdk.project import DataPortalProject
78 def get_dataset(self, project: str = None, dataset: str = None) -> DataPortalDataset: 79 """ 80 Return a dataset identified by ID or name. 81 82 Args: 83 project (str): ID or name of project 84 dataset (str): ID or name of dataset 85 86 Returns: 87 `cirro.sdk.dataset.DataPortalDataset` 88 89 ```python 90 from cirro import DataPortal() 91 portal = DataPortal() 92 dataset = portal.get_dataset( 93 project="id-or-name-of-project", 94 dataset="id-or-name-of-dataset" 95 ) 96 ``` 97 """ 98 try: 99 project: DataPortalProject = self.get_project_by_id(project) 100 except DataPortalAssetNotFound: 101 project: DataPortalProject = self.get_project_by_name(project) 102 103 try: 104 return project.get_dataset_by_id(dataset) 105 except DataPortalAssetNotFound: 106 return project.get_dataset_by_name(dataset)
Return a dataset identified by ID or name.
Arguments:
- project (str): ID or name of project
- dataset (str): ID or name of dataset
Returns:
cirro.sdk.dataset.DataPortalDataset
from cirro import DataPortal() portal = DataPortal() dataset = portal.get_dataset( project="id-or-name-of-project", dataset="id-or-name-of-dataset" )
108 def list_processes(self, ingest=False) -> DataPortalProcesses: 109 """ 110 List all the processes available in the Data Portal. 111 By default, only list non-ingest processes (those which can be run on existing datasets). 112 To list the processes which can be used to upload datasets, use `ingest = True`. 113 114 Args: 115 ingest (bool): If True, only list those processes which can be used to ingest datasets directly 116 """ 117 118 return DataPortalProcesses( 119 [ 120 DataPortalProcess(p, self._client) 121 for p in self._client.processes.list() 122 if not ingest or p.executor == Executor.INGEST 123 ] 124 )
List all the processes available in the Data Portal.
By default, only list non-ingest processes (those which can be run on existing datasets).
To list the processes which can be used to upload datasets, use ingest = True
.
Arguments:
- ingest (bool): If True, only list those processes which can be used to ingest datasets directly
126 def get_process_by_name(self, name: str, ingest=False) -> DataPortalProcess: 127 """ 128 Return the process with the specified name. 129 130 Args: 131 name (str): Name of process 132 """ 133 134 return self.list_processes(ingest=ingest).get_by_name(name)
Return the process with the specified name.
Arguments:
- name (str): Name of process
136 def get_process_by_id(self, id: str, ingest=False) -> DataPortalProcess: 137 """ 138 Return the process with the specified id 139 140 Args: 141 id (str): ID of process 142 """ 143 144 return self.list_processes(ingest=ingest).get_by_id(id)
Return the process with the specified id
Arguments:
- id (str): ID of process
146 def list_reference_types(self) -> DataPortalReferenceTypes: 147 """ 148 Return the list of all available reference types 149 """ 150 151 return DataPortalReferenceTypes( 152 [ 153 DataPortalReferenceType(ref) 154 for ref in self._client.references.get_types() 155 ] 156 )
Return the list of all available reference types
8class DataPortalLogin: 9 """ 10 Start the login process, obtaining the authorization message from Cirro 11 needed to confirm the user identity. 12 13 Useful when you need to authenticate a user in a non-blocking way. 14 15 Usage: 16 17 ```python 18 # Replace app.cirro.bio as appropriate 19 login = DataPortalLogin(base_url="app.cirro.bio") 20 21 # Present the user with the authorization message 22 print(login.auth_message) 23 24 # Generate the authenticated DataPortal object, 25 # blocking until the user completes the login process in their browser 26 portal = login.await_completion() 27 ``` 28 """ 29 base_url: str 30 auth_info: DeviceCodeAuth 31 32 def __init__(self, base_url: str = None, enable_cache=False): 33 app_config = AppConfig(base_url=base_url) 34 35 self.base_url = base_url 36 37 self.auth_info = DeviceCodeAuth( 38 region=app_config.region, 39 client_id=app_config.client_id, 40 auth_endpoint=app_config.auth_endpoint, 41 enable_cache=enable_cache, 42 await_completion=False 43 ) 44 45 @property 46 def auth_message(self) -> str: 47 """Authorization message provided by Cirro.""" 48 return self.auth_info.auth_message 49 50 @property 51 def auth_message_markdown(self) -> str: 52 """Authorization message provided by Cirro (Markdown format).""" 53 return self.auth_info.auth_message_markdown 54 55 def await_completion(self) -> DataPortal: 56 """Complete the login process and return an authenticated client""" 57 58 # Block until the user completes the login flow 59 self.auth_info.await_completion() 60 61 # Set up the client object 62 cirro_client = CirroApi( 63 auth_info=self.auth_info, 64 base_url=self.base_url 65 ) 66 67 # Return the Data Portal object 68 return DataPortal(client=cirro_client)
Start the login process, obtaining the authorization message from Cirro needed to confirm the user identity.
Useful when you need to authenticate a user in a non-blocking way.
Usage:
# Replace app.cirro.bio as appropriate
login = DataPortalLogin(base_url="app.cirro.bio")
# Present the user with the authorization message
print(login.auth_message)
# Generate the authenticated DataPortal object,
# blocking until the user completes the login process in their browser
portal = login.await_completion()
32 def __init__(self, base_url: str = None, enable_cache=False): 33 app_config = AppConfig(base_url=base_url) 34 35 self.base_url = base_url 36 37 self.auth_info = DeviceCodeAuth( 38 region=app_config.region, 39 client_id=app_config.client_id, 40 auth_endpoint=app_config.auth_endpoint, 41 enable_cache=enable_cache, 42 await_completion=False 43 )
45 @property 46 def auth_message(self) -> str: 47 """Authorization message provided by Cirro.""" 48 return self.auth_info.auth_message
Authorization message provided by Cirro.
50 @property 51 def auth_message_markdown(self) -> str: 52 """Authorization message provided by Cirro (Markdown format).""" 53 return self.auth_info.auth_message_markdown
Authorization message provided by Cirro (Markdown format).
55 def await_completion(self) -> DataPortal: 56 """Complete the login process and return an authenticated client""" 57 58 # Block until the user completes the login flow 59 self.auth_info.await_completion() 60 61 # Set up the client object 62 cirro_client = CirroApi( 63 auth_info=self.auth_info, 64 base_url=self.base_url 65 ) 66 67 # Return the Data Portal object 68 return DataPortal(client=cirro_client)
Complete the login process and return an authenticated client
20class DataPortalProject(DataPortalAsset): 21 """ 22 Projects in the Data Portal contain collections of Datasets. 23 Users are granted permissions at the project-level, allowing them 24 to view and/or modify all the datasets in that collection. 25 """ 26 def __init__(self, proj: Project, client: CirroApi): 27 """ 28 Instantiate with helper method 29 30 ```python 31 from cirro import DataPortal() 32 portal = DataPortal() 33 project = portal.get_project_by_name("Project Name") 34 ``` 35 36 """ 37 self._data = proj 38 self._client = client 39 40 @property 41 def id(self) -> str: 42 """ 43 Unique identifier 44 """ 45 return self._data.id 46 47 @property 48 def name(self) -> str: 49 """ 50 Readable name 51 """ 52 return self._data.name 53 54 @property 55 def description(self) -> str: 56 """ 57 Longer description of the project 58 """ 59 return self._data.description 60 61 def __str__(self): 62 """Control how the Project is rendered as a string.""" 63 64 return '\n'.join([ 65 f"{i.title()}: {self.__getattribute__(i)}" 66 for i in ['name', 'id', 'description'] 67 ]) 68 69 @cache 70 def _get_datasets(self) -> List[Dataset]: 71 return list_all_datasets(project_id=self.id, 72 client=self._client) 73 74 def list_datasets(self, force_refresh=False) -> DataPortalDatasets: 75 """List all the datasets available in the project.""" 76 if force_refresh: 77 self._get_datasets.cache_clear() 78 79 return DataPortalDatasets( 80 [ 81 DataPortalDataset(d, self._client) 82 for d in self._get_datasets() 83 ] 84 ) 85 86 def get_dataset_by_name(self, name: str, force_refresh=False) -> DataPortalDataset: 87 """Return the dataset with the specified name.""" 88 if force_refresh: 89 self._get_datasets.cache_clear() 90 91 dataset = next((d for d in self._get_datasets() if d.name == name), None) 92 if dataset is None: 93 raise DataPortalAssetNotFound(f'Dataset with name {name} not found') 94 return self.get_dataset_by_id(dataset.id) 95 96 def get_dataset_by_id(self, _id: str = None) -> DataPortalDataset: 97 """Return the dataset with the specified id.""" 98 99 dataset = self._client.datasets.get(project_id=self.id, dataset_id=_id) 100 if dataset is None: 101 raise DataPortalAssetNotFound(f'Dataset with ID {_id} not found') 102 return DataPortalDataset(dataset, self._client) 103 104 def list_references(self, reference_type: str = None) -> DataPortalReferences: 105 """ 106 List the references available in a project. 107 Optionally filter to references of a particular type (identified by name) 108 """ 109 110 # Get the complete list of references which are available 111 reference_types = DataPortalReferenceTypes( 112 [ 113 DataPortalReferenceType(ref) 114 for ref in self._client.references.get_types() 115 ] 116 ) 117 118 # If a particular name was specified 119 if reference_type is not None: 120 reference_types = reference_types.filter_by_pattern(reference_type) 121 if len(reference_types) == 0: 122 msg = f"Could not find any reference types with the name {reference_type}" 123 raise DataPortalAssetNotFound(msg) 124 125 return DataPortalReferences( 126 [ 127 DataPortalReference(ref, project_id=self.id, client=self._client) 128 for ref in self._client.references.get_for_project( 129 self.id 130 ) 131 if reference_type is None or ref.type == reference_type 132 ] 133 ) 134 135 def get_reference_by_name(self, name: str = None, ref_type: str = None) -> DataPortalReference: 136 """Return the reference of a particular type with the specified name.""" 137 138 if name is None: 139 raise DataPortalInputError("Must specify the reference name") 140 141 return self.list_references(ref_type).get_by_name(name) 142 143 def upload_dataset( 144 self, 145 name: str = None, 146 description='', 147 process: Union[DataPortalProcess, str] = None, 148 upload_folder: str = None, 149 files: List[str] = None, 150 tags: List[str] = None, 151 ): 152 """ 153 Upload a set of files to the Data Portal, creating a new dataset. 154 155 If the files parameter is not provided, it will upload all files in the upload folder 156 157 Args: 158 name (str): Name of newly created dataset 159 description (str): Description of newly created dataset 160 process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object 161 upload_folder (str): Folder containing files to upload 162 files (List[str]): Optional subset of files to upload from the folder 163 tags (List[str]): Optional list of tags to apply to the dataset 164 """ 165 166 if name is None: 167 raise DataPortalInputError("Must provide name for new dataset") 168 if process is None: 169 raise DataPortalInputError("Must provide the process which is used for ingest") 170 if upload_folder is None: 171 raise DataPortalInputError("Must provide upload_folder -- folder containing files to upload") 172 173 # Parse the process provided by the user 174 process = parse_process_name_or_id(process, self._client) 175 176 # If no files were provided 177 if files is None: 178 # Get the list of files in the upload folder 179 files = get_files_in_directory(upload_folder) 180 181 if files is None or len(files) == 0: 182 raise RuntimeWarning("No files to upload, exiting") 183 184 # Normalize into Tag object 185 if tags is not None: 186 tags = [Tag(value=value) for value in tags] 187 188 # Make sure that the files match the expected pattern 189 self._client.processes.check_dataset_files(files, process.id, upload_folder) 190 191 # Create the ingest process request 192 dataset_create_request = UploadDatasetRequest( 193 process_id=process.id, 194 name=name, 195 description=description, 196 expected_files=files, 197 tags=tags, 198 ) 199 200 # Get the response 201 create_response = self._client.datasets.create(project_id=self.id, 202 upload_request=dataset_create_request) 203 204 # Upload the files 205 self._client.datasets.upload_files( 206 project_id=self.id, 207 dataset_id=create_response.id, 208 directory=upload_folder, 209 files=files 210 ) 211 212 # Return the dataset which was created, which might take a second to update 213 max_attempts = 5 214 for attempt in range(max_attempts): 215 try: 216 return self.get_dataset_by_id(create_response.id) 217 except DataPortalAssetNotFound as e: 218 if attempt == max_attempts - 1: 219 raise e 220 else: 221 sleep(2) 222 223 def samples(self, max_items: int = 10000) -> List[Sample]: 224 """ 225 Retrieves a list of samples associated with a project along with their metadata 226 227 Args: 228 max_items (int): Maximum number of records to get (default 10,000) 229 """ 230 return self._client.metadata.get_project_samples(self.id, max_items)
Projects in the Data Portal contain collections of Datasets. Users are granted permissions at the project-level, allowing them to view and/or modify all the datasets in that collection.
26 def __init__(self, proj: Project, client: CirroApi): 27 """ 28 Instantiate with helper method 29 30 ```python 31 from cirro import DataPortal() 32 portal = DataPortal() 33 project = portal.get_project_by_name("Project Name") 34 ``` 35 36 """ 37 self._data = proj 38 self._client = client
Instantiate with helper method
from cirro import DataPortal()
portal = DataPortal()
project = portal.get_project_by_name("Project Name")
54 @property 55 def description(self) -> str: 56 """ 57 Longer description of the project 58 """ 59 return self._data.description
Longer description of the project
74 def list_datasets(self, force_refresh=False) -> DataPortalDatasets: 75 """List all the datasets available in the project.""" 76 if force_refresh: 77 self._get_datasets.cache_clear() 78 79 return DataPortalDatasets( 80 [ 81 DataPortalDataset(d, self._client) 82 for d in self._get_datasets() 83 ] 84 )
List all the datasets available in the project.
86 def get_dataset_by_name(self, name: str, force_refresh=False) -> DataPortalDataset: 87 """Return the dataset with the specified name.""" 88 if force_refresh: 89 self._get_datasets.cache_clear() 90 91 dataset = next((d for d in self._get_datasets() if d.name == name), None) 92 if dataset is None: 93 raise DataPortalAssetNotFound(f'Dataset with name {name} not found') 94 return self.get_dataset_by_id(dataset.id)
Return the dataset with the specified name.
96 def get_dataset_by_id(self, _id: str = None) -> DataPortalDataset: 97 """Return the dataset with the specified id.""" 98 99 dataset = self._client.datasets.get(project_id=self.id, dataset_id=_id) 100 if dataset is None: 101 raise DataPortalAssetNotFound(f'Dataset with ID {_id} not found') 102 return DataPortalDataset(dataset, self._client)
Return the dataset with the specified id.
104 def list_references(self, reference_type: str = None) -> DataPortalReferences: 105 """ 106 List the references available in a project. 107 Optionally filter to references of a particular type (identified by name) 108 """ 109 110 # Get the complete list of references which are available 111 reference_types = DataPortalReferenceTypes( 112 [ 113 DataPortalReferenceType(ref) 114 for ref in self._client.references.get_types() 115 ] 116 ) 117 118 # If a particular name was specified 119 if reference_type is not None: 120 reference_types = reference_types.filter_by_pattern(reference_type) 121 if len(reference_types) == 0: 122 msg = f"Could not find any reference types with the name {reference_type}" 123 raise DataPortalAssetNotFound(msg) 124 125 return DataPortalReferences( 126 [ 127 DataPortalReference(ref, project_id=self.id, client=self._client) 128 for ref in self._client.references.get_for_project( 129 self.id 130 ) 131 if reference_type is None or ref.type == reference_type 132 ] 133 )
List the references available in a project. Optionally filter to references of a particular type (identified by name)
135 def get_reference_by_name(self, name: str = None, ref_type: str = None) -> DataPortalReference: 136 """Return the reference of a particular type with the specified name.""" 137 138 if name is None: 139 raise DataPortalInputError("Must specify the reference name") 140 141 return self.list_references(ref_type).get_by_name(name)
Return the reference of a particular type with the specified name.
143 def upload_dataset( 144 self, 145 name: str = None, 146 description='', 147 process: Union[DataPortalProcess, str] = None, 148 upload_folder: str = None, 149 files: List[str] = None, 150 tags: List[str] = None, 151 ): 152 """ 153 Upload a set of files to the Data Portal, creating a new dataset. 154 155 If the files parameter is not provided, it will upload all files in the upload folder 156 157 Args: 158 name (str): Name of newly created dataset 159 description (str): Description of newly created dataset 160 process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object 161 upload_folder (str): Folder containing files to upload 162 files (List[str]): Optional subset of files to upload from the folder 163 tags (List[str]): Optional list of tags to apply to the dataset 164 """ 165 166 if name is None: 167 raise DataPortalInputError("Must provide name for new dataset") 168 if process is None: 169 raise DataPortalInputError("Must provide the process which is used for ingest") 170 if upload_folder is None: 171 raise DataPortalInputError("Must provide upload_folder -- folder containing files to upload") 172 173 # Parse the process provided by the user 174 process = parse_process_name_or_id(process, self._client) 175 176 # If no files were provided 177 if files is None: 178 # Get the list of files in the upload folder 179 files = get_files_in_directory(upload_folder) 180 181 if files is None or len(files) == 0: 182 raise RuntimeWarning("No files to upload, exiting") 183 184 # Normalize into Tag object 185 if tags is not None: 186 tags = [Tag(value=value) for value in tags] 187 188 # Make sure that the files match the expected pattern 189 self._client.processes.check_dataset_files(files, process.id, upload_folder) 190 191 # Create the ingest process request 192 dataset_create_request = UploadDatasetRequest( 193 process_id=process.id, 194 name=name, 195 description=description, 196 expected_files=files, 197 tags=tags, 198 ) 199 200 # Get the response 201 create_response = self._client.datasets.create(project_id=self.id, 202 upload_request=dataset_create_request) 203 204 # Upload the files 205 self._client.datasets.upload_files( 206 project_id=self.id, 207 dataset_id=create_response.id, 208 directory=upload_folder, 209 files=files 210 ) 211 212 # Return the dataset which was created, which might take a second to update 213 max_attempts = 5 214 for attempt in range(max_attempts): 215 try: 216 return self.get_dataset_by_id(create_response.id) 217 except DataPortalAssetNotFound as e: 218 if attempt == max_attempts - 1: 219 raise e 220 else: 221 sleep(2)
Upload a set of files to the Data Portal, creating a new dataset.
If the files parameter is not provided, it will upload all files in the upload folder
Arguments:
- name (str): Name of newly created dataset
- description (str): Description of newly created dataset
- process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object
- upload_folder (str): Folder containing files to upload
- files (List[str]): Optional subset of files to upload from the folder
- tags (List[str]): Optional list of tags to apply to the dataset
223 def samples(self, max_items: int = 10000) -> List[Sample]: 224 """ 225 Retrieves a list of samples associated with a project along with their metadata 226 227 Args: 228 max_items (int): Maximum number of records to get (default 10,000) 229 """ 230 return self._client.metadata.get_project_samples(self.id, max_items)
Retrieves a list of samples associated with a project along with their metadata
Arguments:
- max_items (int): Maximum number of records to get (default 10,000)
11class DataPortalProcess(DataPortalAsset): 12 """Helper functions for interacting with analysis processes.""" 13 14 def __init__(self, process: Union[Process, ProcessDetail], client: CirroApi): 15 """ 16 Instantiate with helper method 17 18 ```python 19 from cirro import DataPortal() 20 portal = DataPortal() 21 process = portal.get_process_by_name("Process Name") 22 ``` 23 """ 24 self._data = process 25 self._client = client 26 27 @property 28 def id(self) -> str: 29 """Unique identifier""" 30 return self._data.id 31 32 @property 33 def name(self) -> str: 34 """Readable name""" 35 return self._data.name 36 37 @property 38 def description(self) -> str: 39 """Longer description of process""" 40 return self._data.description 41 42 @property 43 def child_process_ids(self) -> List[str]: 44 """List of processes which can be run on the output of this process""" 45 return self._data.child_process_ids 46 47 @property 48 def executor(self) -> Executor: 49 """INGEST, CROMWELL, or NEXTFLOW""" 50 return self._data.executor 51 52 @property 53 def category(self) -> str: 54 """Category of process""" 55 return self._data.category 56 57 @property 58 def pipeline_type(self) -> str: 59 """Pipeline type""" 60 return self._data.pipeline_type 61 62 @property 63 def documentation_url(self) -> str: 64 """Documentation URL""" 65 return self._data.documentation_url 66 67 @property 68 def file_requirements_message(self) -> str: 69 """Description of files required for INGEST processes""" 70 return self._data.file_requirements_message 71 72 @property 73 def code(self) -> PipelineCode: 74 """Pipeline code configuration""" 75 return self._get_detail().pipeline_code 76 77 @property 78 def custom_settings(self) -> CustomPipelineSettings: 79 """Custom settings for the process""" 80 return self._get_detail().custom_settings 81 82 def _get_detail(self) -> ProcessDetail: 83 if not isinstance(self._data, ProcessDetail): 84 self._data = self._client.processes.get(self.id) 85 return self._data 86 87 def __str__(self): 88 return '\n'.join([ 89 f"{i.title()}: {self.__getattribute__(i)}" 90 for i in ['name', 'id', 'description'] 91 ]) 92 93 def get_parameter_spec(self) -> ParameterSpecification: 94 """ 95 Gets a specification used to describe the parameters used in the process. 96 """ 97 return self._client.processes.get_parameter_spec(self.id)
Helper functions for interacting with analysis processes.
14 def __init__(self, process: Union[Process, ProcessDetail], client: CirroApi): 15 """ 16 Instantiate with helper method 17 18 ```python 19 from cirro import DataPortal() 20 portal = DataPortal() 21 process = portal.get_process_by_name("Process Name") 22 ``` 23 """ 24 self._data = process 25 self._client = client
Instantiate with helper method
from cirro import DataPortal()
portal = DataPortal()
process = portal.get_process_by_name("Process Name")
37 @property 38 def description(self) -> str: 39 """Longer description of process""" 40 return self._data.description
Longer description of process
42 @property 43 def child_process_ids(self) -> List[str]: 44 """List of processes which can be run on the output of this process""" 45 return self._data.child_process_ids
List of processes which can be run on the output of this process
47 @property 48 def executor(self) -> Executor: 49 """INGEST, CROMWELL, or NEXTFLOW""" 50 return self._data.executor
INGEST, CROMWELL, or NEXTFLOW
52 @property 53 def category(self) -> str: 54 """Category of process""" 55 return self._data.category
Category of process
57 @property 58 def pipeline_type(self) -> str: 59 """Pipeline type""" 60 return self._data.pipeline_type
Pipeline type
62 @property 63 def documentation_url(self) -> str: 64 """Documentation URL""" 65 return self._data.documentation_url
Documentation URL
67 @property 68 def file_requirements_message(self) -> str: 69 """Description of files required for INGEST processes""" 70 return self._data.file_requirements_message
Description of files required for INGEST processes
72 @property 73 def code(self) -> PipelineCode: 74 """Pipeline code configuration""" 75 return self._get_detail().pipeline_code
Pipeline code configuration
77 @property 78 def custom_settings(self) -> CustomPipelineSettings: 79 """Custom settings for the process""" 80 return self._get_detail().custom_settings
Custom settings for the process
19class DataPortalDataset(DataPortalAsset): 20 """ 21 Datasets in the Data Portal are collections of files which have 22 either been uploaded directly, or which have been output by 23 an analysis pipeline or notebook. 24 """ 25 26 def __init__(self, dataset: Union[Dataset, DatasetDetail], client: CirroApi): 27 """ 28 Instantiate a dataset object 29 30 Should be invoked from a top-level constructor, for example: 31 32 ```python 33 from cirro import DataPortal() 34 portal = DataPortal() 35 dataset = portal.get_dataset( 36 project="id-or-name-of-project", 37 dataset="id-or-name-of-dataset" 38 ) 39 ``` 40 41 """ 42 assert dataset.project_id is not None, "Must provide dataset with project_id attribute" 43 self._data = dataset 44 self._assets: Optional[DatasetAssets] = None 45 self._client = client 46 47 @property 48 def id(self) -> str: 49 """Unique identifier for the dataset""" 50 return self._data.id 51 52 @property 53 def name(self) -> str: 54 """Editible name for the dataset""" 55 return self._data.name 56 57 @property 58 def description(self) -> str: 59 """Longer name for the dataset""" 60 return self._data.description 61 62 @property 63 def process_id(self) -> str: 64 """Unique ID of process used to create the dataset""" 65 return self._data.process_id 66 67 @property 68 def process(self) -> ProcessDetail: 69 """ 70 Object representing the process used to create the dataset 71 """ 72 return self._client.processes.get(self.process_id) 73 74 @property 75 def project_id(self) -> str: 76 """ID of the project containing the dataset""" 77 return self._data.project_id 78 79 @property 80 def status(self) -> Status: 81 """ 82 Status of the dataset 83 """ 84 return self._data.status 85 86 @property 87 def source_dataset_ids(self) -> List[str]: 88 """IDs of the datasets used as sources for this dataset (if any)""" 89 return self._data.source_dataset_ids 90 91 @property 92 def source_datasets(self) -> List['DataPortalDataset']: 93 """ 94 Objects representing the datasets used as sources for this dataset (if any) 95 """ 96 return [ 97 DataPortalDataset( 98 dataset=self._client.datasets.get(project_id=self.project_id, dataset_id=dataset_id), 99 client=self._client 100 ) 101 for dataset_id in self.source_dataset_ids 102 ] 103 104 @property 105 def params(self) -> DatasetDetailParams: 106 """ 107 Parameters used to generate the dataset 108 """ 109 return self._get_detail().params 110 111 @property 112 def info(self) -> DatasetDetailInfo: 113 """ 114 Detailed information about the dataset 115 """ 116 return self._get_detail().info 117 118 @property 119 def tags(self) -> List[Tag]: 120 """ 121 Tags applied to the dataset 122 """ 123 return self._data.tags 124 125 @property 126 def created_by(self) -> str: 127 """User who created the dataset""" 128 return self._data.created_by 129 130 @property 131 def created_at(self) -> datetime.datetime: 132 """Timestamp of dataset creation""" 133 return self._data.created_at 134 135 def _get_detail(self): 136 if not isinstance(self._data, DatasetDetail): 137 self._data = self._client.datasets.get(project_id=self.project_id, dataset_id=self.id) 138 return self._data 139 140 def _get_assets(self): 141 if not self._assets: 142 self._assets = self._client.datasets.get_assets_listing( 143 project_id=self.project_id, 144 dataset_id=self.id 145 ) 146 return self._assets 147 148 def __str__(self): 149 return '\n'.join([ 150 f"{i.title()}: {self.__getattribute__(i)}" 151 for i in ['name', 'id', 'description', 'status'] 152 ]) 153 154 def get_file(self, relative_path: str) -> DataPortalFile: 155 """ 156 Get a file from the dataset using its relative path. 157 158 Args: 159 relative_path (str): Relative path of file within the dataset 160 161 Returns: 162 `from cirro.sdk.file import DataPortalFile` 163 """ 164 165 # Get the list of files in this dataset 166 files = self.list_files() 167 168 # Try getting the file using the relative path provided by the user 169 try: 170 return files.get_by_id(relative_path) 171 except DataPortalAssetNotFound: 172 # Try getting the file with the 'data/' prefix prepended 173 try: 174 return files.get_by_id("data/" + relative_path) 175 except DataPortalAssetNotFound: 176 # If not found, raise the exception using the string provided 177 # by the user, not the data/ prepended version (which may be 178 # confusing to the user) 179 msg = '\n'.join([f"No file found with path '{relative_path}'."]) 180 raise DataPortalAssetNotFound(msg) 181 182 def list_files(self) -> DataPortalFiles: 183 """ 184 Return the list of files which make up the dataset. 185 """ 186 files = self._get_assets().files 187 return DataPortalFiles( 188 [ 189 DataPortalFile(file=file, client=self._client) 190 for file in files 191 ] 192 ) 193 194 def get_artifact(self, artifact_type: ArtifactType) -> DataPortalFile: 195 """ 196 Get the artifact of a particular type from the dataset 197 """ 198 artifacts = self._get_assets().artifacts 199 artifact = next((a for a in artifacts if a.artifact_type == artifact_type), None) 200 if artifact is None: 201 raise DataPortalAssetNotFound(f"No artifact found with type '{artifact_type}'") 202 return DataPortalFile(file=artifact.file, client=self._client) 203 204 def list_artifacts(self) -> List[DataPortalFile]: 205 """ 206 Return the list of artifacts associated with the dataset 207 208 An artifact may be something generated as part of the analysis or other process. 209 See `cirro_api_client.v1.models.ArtifactType` for the list of possible artifact types. 210 211 """ 212 artifacts = self._get_assets().artifacts 213 return DataPortalFiles( 214 [ 215 DataPortalFile(file=artifact.file, client=self._client) 216 for artifact in artifacts 217 ] 218 ) 219 220 def download_files(self, download_location: str = None) -> None: 221 """ 222 Download all the files from the dataset to a local directory. 223 224 Args: 225 download_location (str): Path to local directory 226 """ 227 228 # Alias for internal method 229 self.list_files().download(download_location) 230 231 def run_analysis( 232 self, 233 name: str = None, 234 description: str = "", 235 process: Union[DataPortalProcess, str] = None, 236 params=None, 237 notifications_emails: List[str] = None, 238 compute_environment: str = None, 239 resume_dataset_id: str = None 240 ) -> str: 241 """ 242 Runs an analysis on a dataset, returns the ID of the newly created dataset. 243 244 The process can be provided as either a DataPortalProcess object, 245 or a string which corresponds to the name or ID of the process. 246 247 Args: 248 name (str): Name of newly created dataset 249 description (str): Description of newly created dataset 250 process (DataPortalProcess or str): Process to run 251 params (dict): Analysis parameters 252 notifications_emails (List[str]): Notification email address(es) 253 compute_environment (str): Name or ID of compute environment to use, 254 if blank it will run in AWS 255 resume_dataset_id (str): ID of dataset to resume from, used for caching task execution. 256 It will attempt to re-use the previous output to minimize duplicate work 257 258 Returns: 259 dataset_id (str): ID of newly created dataset 260 """ 261 if name is None: 262 raise DataPortalInputError("Must specify 'name' for run_analysis") 263 if process is None: 264 raise DataPortalInputError("Must specify 'process' for run_analysis") 265 if notifications_emails is None: 266 notifications_emails = [] 267 if params is None: 268 params = {} 269 270 # If the process is a string, try to parse it as a process name or ID 271 process = parse_process_name_or_id(process, self._client) 272 273 if compute_environment: 274 compute_environments = self._client.compute_environments.list_environments_for_project( 275 project_id=self.project_id 276 ) 277 compute_environment = next( 278 (env for env in compute_environments 279 if env.name == compute_environment or env.id == compute_environment), 280 None 281 ) 282 if compute_environment is None: 283 raise DataPortalInputError(f"Compute environment '{compute_environment}' not found") 284 285 resp = self._client.execution.run_analysis( 286 project_id=self.project_id, 287 request=RunAnalysisRequest( 288 name=name, 289 description=description, 290 process_id=process.id, 291 source_dataset_ids=[self.id], 292 params=RunAnalysisRequestParams.from_dict(params), 293 notification_emails=notifications_emails, 294 resume_dataset_id=resume_dataset_id, 295 compute_environment_id=compute_environment.id if compute_environment else None 296 ) 297 ) 298 return resp.id
Datasets in the Data Portal are collections of files which have either been uploaded directly, or which have been output by an analysis pipeline or notebook.
26 def __init__(self, dataset: Union[Dataset, DatasetDetail], client: CirroApi): 27 """ 28 Instantiate a dataset object 29 30 Should be invoked from a top-level constructor, for example: 31 32 ```python 33 from cirro import DataPortal() 34 portal = DataPortal() 35 dataset = portal.get_dataset( 36 project="id-or-name-of-project", 37 dataset="id-or-name-of-dataset" 38 ) 39 ``` 40 41 """ 42 assert dataset.project_id is not None, "Must provide dataset with project_id attribute" 43 self._data = dataset 44 self._assets: Optional[DatasetAssets] = None 45 self._client = client
Instantiate a dataset object
Should be invoked from a top-level constructor, for example:
from cirro import DataPortal()
portal = DataPortal()
dataset = portal.get_dataset(
project="id-or-name-of-project",
dataset="id-or-name-of-dataset"
)
47 @property 48 def id(self) -> str: 49 """Unique identifier for the dataset""" 50 return self._data.id
Unique identifier for the dataset
52 @property 53 def name(self) -> str: 54 """Editible name for the dataset""" 55 return self._data.name
Editible name for the dataset
57 @property 58 def description(self) -> str: 59 """Longer name for the dataset""" 60 return self._data.description
Longer name for the dataset
62 @property 63 def process_id(self) -> str: 64 """Unique ID of process used to create the dataset""" 65 return self._data.process_id
Unique ID of process used to create the dataset
67 @property 68 def process(self) -> ProcessDetail: 69 """ 70 Object representing the process used to create the dataset 71 """ 72 return self._client.processes.get(self.process_id)
Object representing the process used to create the dataset
74 @property 75 def project_id(self) -> str: 76 """ID of the project containing the dataset""" 77 return self._data.project_id
ID of the project containing the dataset
79 @property 80 def status(self) -> Status: 81 """ 82 Status of the dataset 83 """ 84 return self._data.status
Status of the dataset
86 @property 87 def source_dataset_ids(self) -> List[str]: 88 """IDs of the datasets used as sources for this dataset (if any)""" 89 return self._data.source_dataset_ids
IDs of the datasets used as sources for this dataset (if any)
91 @property 92 def source_datasets(self) -> List['DataPortalDataset']: 93 """ 94 Objects representing the datasets used as sources for this dataset (if any) 95 """ 96 return [ 97 DataPortalDataset( 98 dataset=self._client.datasets.get(project_id=self.project_id, dataset_id=dataset_id), 99 client=self._client 100 ) 101 for dataset_id in self.source_dataset_ids 102 ]
Objects representing the datasets used as sources for this dataset (if any)
104 @property 105 def params(self) -> DatasetDetailParams: 106 """ 107 Parameters used to generate the dataset 108 """ 109 return self._get_detail().params
Parameters used to generate the dataset
111 @property 112 def info(self) -> DatasetDetailInfo: 113 """ 114 Detailed information about the dataset 115 """ 116 return self._get_detail().info
Detailed information about the dataset
125 @property 126 def created_by(self) -> str: 127 """User who created the dataset""" 128 return self._data.created_by
User who created the dataset
130 @property 131 def created_at(self) -> datetime.datetime: 132 """Timestamp of dataset creation""" 133 return self._data.created_at
Timestamp of dataset creation
154 def get_file(self, relative_path: str) -> DataPortalFile: 155 """ 156 Get a file from the dataset using its relative path. 157 158 Args: 159 relative_path (str): Relative path of file within the dataset 160 161 Returns: 162 `from cirro.sdk.file import DataPortalFile` 163 """ 164 165 # Get the list of files in this dataset 166 files = self.list_files() 167 168 # Try getting the file using the relative path provided by the user 169 try: 170 return files.get_by_id(relative_path) 171 except DataPortalAssetNotFound: 172 # Try getting the file with the 'data/' prefix prepended 173 try: 174 return files.get_by_id("data/" + relative_path) 175 except DataPortalAssetNotFound: 176 # If not found, raise the exception using the string provided 177 # by the user, not the data/ prepended version (which may be 178 # confusing to the user) 179 msg = '\n'.join([f"No file found with path '{relative_path}'."]) 180 raise DataPortalAssetNotFound(msg)
Get a file from the dataset using its relative path.
Arguments:
- relative_path (str): Relative path of file within the dataset
Returns:
from cirro.sdk.file import DataPortalFile
182 def list_files(self) -> DataPortalFiles: 183 """ 184 Return the list of files which make up the dataset. 185 """ 186 files = self._get_assets().files 187 return DataPortalFiles( 188 [ 189 DataPortalFile(file=file, client=self._client) 190 for file in files 191 ] 192 )
Return the list of files which make up the dataset.
194 def get_artifact(self, artifact_type: ArtifactType) -> DataPortalFile: 195 """ 196 Get the artifact of a particular type from the dataset 197 """ 198 artifacts = self._get_assets().artifacts 199 artifact = next((a for a in artifacts if a.artifact_type == artifact_type), None) 200 if artifact is None: 201 raise DataPortalAssetNotFound(f"No artifact found with type '{artifact_type}'") 202 return DataPortalFile(file=artifact.file, client=self._client)
Get the artifact of a particular type from the dataset
204 def list_artifacts(self) -> List[DataPortalFile]: 205 """ 206 Return the list of artifacts associated with the dataset 207 208 An artifact may be something generated as part of the analysis or other process. 209 See `cirro_api_client.v1.models.ArtifactType` for the list of possible artifact types. 210 211 """ 212 artifacts = self._get_assets().artifacts 213 return DataPortalFiles( 214 [ 215 DataPortalFile(file=artifact.file, client=self._client) 216 for artifact in artifacts 217 ] 218 )
Return the list of artifacts associated with the dataset
An artifact may be something generated as part of the analysis or other process.
See cirro_api_client.v1.models.ArtifactType
for the list of possible artifact types.
220 def download_files(self, download_location: str = None) -> None: 221 """ 222 Download all the files from the dataset to a local directory. 223 224 Args: 225 download_location (str): Path to local directory 226 """ 227 228 # Alias for internal method 229 self.list_files().download(download_location)
Download all the files from the dataset to a local directory.
Arguments:
- download_location (str): Path to local directory
231 def run_analysis( 232 self, 233 name: str = None, 234 description: str = "", 235 process: Union[DataPortalProcess, str] = None, 236 params=None, 237 notifications_emails: List[str] = None, 238 compute_environment: str = None, 239 resume_dataset_id: str = None 240 ) -> str: 241 """ 242 Runs an analysis on a dataset, returns the ID of the newly created dataset. 243 244 The process can be provided as either a DataPortalProcess object, 245 or a string which corresponds to the name or ID of the process. 246 247 Args: 248 name (str): Name of newly created dataset 249 description (str): Description of newly created dataset 250 process (DataPortalProcess or str): Process to run 251 params (dict): Analysis parameters 252 notifications_emails (List[str]): Notification email address(es) 253 compute_environment (str): Name or ID of compute environment to use, 254 if blank it will run in AWS 255 resume_dataset_id (str): ID of dataset to resume from, used for caching task execution. 256 It will attempt to re-use the previous output to minimize duplicate work 257 258 Returns: 259 dataset_id (str): ID of newly created dataset 260 """ 261 if name is None: 262 raise DataPortalInputError("Must specify 'name' for run_analysis") 263 if process is None: 264 raise DataPortalInputError("Must specify 'process' for run_analysis") 265 if notifications_emails is None: 266 notifications_emails = [] 267 if params is None: 268 params = {} 269 270 # If the process is a string, try to parse it as a process name or ID 271 process = parse_process_name_or_id(process, self._client) 272 273 if compute_environment: 274 compute_environments = self._client.compute_environments.list_environments_for_project( 275 project_id=self.project_id 276 ) 277 compute_environment = next( 278 (env for env in compute_environments 279 if env.name == compute_environment or env.id == compute_environment), 280 None 281 ) 282 if compute_environment is None: 283 raise DataPortalInputError(f"Compute environment '{compute_environment}' not found") 284 285 resp = self._client.execution.run_analysis( 286 project_id=self.project_id, 287 request=RunAnalysisRequest( 288 name=name, 289 description=description, 290 process_id=process.id, 291 source_dataset_ids=[self.id], 292 params=RunAnalysisRequestParams.from_dict(params), 293 notification_emails=notifications_emails, 294 resume_dataset_id=resume_dataset_id, 295 compute_environment_id=compute_environment.id if compute_environment else None 296 ) 297 ) 298 return resp.id
Runs an analysis on a dataset, returns the ID of the newly created dataset.
The process can be provided as either a DataPortalProcess object, or a string which corresponds to the name or ID of the process.
Arguments:
- name (str): Name of newly created dataset
- description (str): Description of newly created dataset
- process (DataPortalProcess or str): Process to run
- params (dict): Analysis parameters
- notifications_emails (List[str]): Notification email address(es)
- compute_environment (str): Name or ID of compute environment to use, if blank it will run in AWS
- resume_dataset_id (str): ID of dataset to resume from, used for caching task execution. It will attempt to re-use the previous output to minimize duplicate work
Returns:
dataset_id (str): ID of newly created dataset
12class DataPortalReference(DataPortalAsset): 13 """ 14 Reference data object containing files which can be used for analysis in a particular project. 15 """ 16 def __init__(self, ref: Reference, project_id: str, client: CirroApi): 17 """ 18 Instantiate by listing the references which have been added to a particular project 19 ```python 20 from cirro import DataPortal() 21 portal = DataPortal() 22 project = portal.get_project_by_name("Project Name") 23 references = project.list_references() 24 ``` 25 """ 26 self._data = ref 27 self._files = [ 28 DataPortalFile(File.from_file_entry(f, project_id), client) for f in ref.files 29 ] 30 31 @property 32 def files(self) -> List[DataPortalFile]: 33 """File(s) contained in the reference""" 34 return self._files 35 36 @property 37 def name(self) -> str: 38 """Reference name""" 39 return self._data.name 40 41 @property 42 def type(self) -> str: 43 """Type of reference data (e.g. genome_fasta)""" 44 return self._data.type 45 46 @property 47 def absolute_path(self): 48 if len(self._files) == 0: 49 return None 50 return self._files[0].absolute_path 51 52 def __str__(self): 53 return self.name
Reference data object containing files which can be used for analysis in a particular project.
16 def __init__(self, ref: Reference, project_id: str, client: CirroApi): 17 """ 18 Instantiate by listing the references which have been added to a particular project 19 ```python 20 from cirro import DataPortal() 21 portal = DataPortal() 22 project = portal.get_project_by_name("Project Name") 23 references = project.list_references() 24 ``` 25 """ 26 self._data = ref 27 self._files = [ 28 DataPortalFile(File.from_file_entry(f, project_id), client) for f in ref.files 29 ]
Instantiate by listing the references which have been added to a particular project
from cirro import DataPortal()
portal = DataPortal()
project = portal.get_project_by_name("Project Name")
references = project.list_references()
31 @property 32 def files(self) -> List[DataPortalFile]: 33 """File(s) contained in the reference""" 34 return self._files
File(s) contained in the reference
12class CirroApi: 13 """ 14 Client for interacting directly with the Cirro API 15 """ 16 def __init__(self, auth_info: AuthInfo = None, base_url: str = None): 17 """ 18 Instantiates the Cirro API object 19 20 Args: 21 auth_info (cirro.auth.base.AuthInfo): 22 base_url (str): Optional base URL of the Cirro instance 23 (if not provided, it uses the `CIRRO_BASE_URL` environment variable, or the config file) 24 25 Returns: 26 Authenticated Cirro API object, which can be used to call endpoint functions. 27 28 Example: 29 ```python 30 from cirro.cirro_client import CirroApi 31 32 cirro = CirroApi(base_url="app.cirro.bio") 33 print(cirro.projects.list()) 34 ``` 35 """ 36 37 self._configuration = AppConfig(base_url=base_url) 38 if not auth_info: 39 auth_info = get_auth_info_from_config(self._configuration, auth_io=None) 40 41 self._api_client = CirroApiClient( 42 base_url=self._configuration.rest_endpoint, 43 auth_method=auth_info.get_auth_method(), 44 client_name='Cirro SDK', 45 package_name='cirro' 46 ) 47 48 # Init services 49 self._file_service = FileService(self._api_client, 50 enable_additional_checksum=self._configuration.enable_additional_checksum, 51 transfer_retries=self._configuration.transfer_max_retries) 52 self._dataset_service = DatasetService(self._api_client, file_service=self._file_service) 53 self._project_service = ProjectService(self._api_client) 54 self._process_service = ProcessService(self._api_client) 55 self._execution_service = ExecutionService(self._api_client) 56 self._compute_environment_service = ComputeEnvironmentService(self._api_client) 57 self._metrics_service = MetricsService(self._api_client) 58 self._metadata_service = MetadataService(self._api_client) 59 self._billing_service = BillingService(self._api_client) 60 self._references_service = ReferenceService(self._api_client) 61 self._shares_service = ShareService(self._api_client) 62 self._users_service = UserService(self._api_client) 63 64 @property 65 def datasets(self) -> DatasetService: 66 """ 67 Create, list, delete, and modify Datasets 68 """ 69 return self._dataset_service 70 71 @property 72 def projects(self) -> ProjectService: 73 """ 74 Create, list, delete, and modify Projects 75 """ 76 return self._project_service 77 78 @property 79 def processes(self) -> ProcessService: 80 """ 81 List and retrieve detailed information about Processes 82 """ 83 return self._process_service 84 85 @property 86 def execution(self) -> ExecutionService: 87 """ 88 List, run, stop, and describe the analysis jobs (executing Processes to create new Datasets) 89 """ 90 return self._execution_service 91 92 @property 93 def compute_environments(self) -> ComputeEnvironmentService: 94 """ 95 List and update compute environments 96 """ 97 return self._compute_environment_service 98 99 @property 100 def metrics(self) -> MetricsService: 101 """ 102 Project-level summary metrics 103 """ 104 return self._metrics_service 105 106 @property 107 def metadata(self) -> MetadataService: 108 """ 109 List and modify Sample metadata or metadata schemas 110 """ 111 return self._metadata_service 112 113 @property 114 def billing(self) -> BillingService: 115 """ 116 List and update billing accounts 117 """ 118 return self._billing_service 119 120 @property 121 def references(self) -> ReferenceService: 122 """ 123 List References and Reference types 124 """ 125 return self._references_service 126 127 @property 128 def shares(self) -> ShareService: 129 """ 130 List, create, update, delete, and subscribe to shares 131 """ 132 return self._shares_service 133 134 @property 135 def users(self) -> UserService: 136 """ 137 List and update user information 138 """ 139 return self._users_service 140 141 @property 142 def file(self) -> FileService: 143 """ 144 Read, download, and create file objects 145 """ 146 return self._file_service 147 148 @property 149 def api_client(self) -> CirroApiClient: 150 """ 151 Gets the underlying API client 152 """ 153 return self._api_client 154 155 @property 156 def configuration(self) -> AppConfig: 157 """ 158 Gets the configuration of the instance 159 """ 160 return self._configuration
Client for interacting directly with the Cirro API
16 def __init__(self, auth_info: AuthInfo = None, base_url: str = None): 17 """ 18 Instantiates the Cirro API object 19 20 Args: 21 auth_info (cirro.auth.base.AuthInfo): 22 base_url (str): Optional base URL of the Cirro instance 23 (if not provided, it uses the `CIRRO_BASE_URL` environment variable, or the config file) 24 25 Returns: 26 Authenticated Cirro API object, which can be used to call endpoint functions. 27 28 Example: 29 ```python 30 from cirro.cirro_client import CirroApi 31 32 cirro = CirroApi(base_url="app.cirro.bio") 33 print(cirro.projects.list()) 34 ``` 35 """ 36 37 self._configuration = AppConfig(base_url=base_url) 38 if not auth_info: 39 auth_info = get_auth_info_from_config(self._configuration, auth_io=None) 40 41 self._api_client = CirroApiClient( 42 base_url=self._configuration.rest_endpoint, 43 auth_method=auth_info.get_auth_method(), 44 client_name='Cirro SDK', 45 package_name='cirro' 46 ) 47 48 # Init services 49 self._file_service = FileService(self._api_client, 50 enable_additional_checksum=self._configuration.enable_additional_checksum, 51 transfer_retries=self._configuration.transfer_max_retries) 52 self._dataset_service = DatasetService(self._api_client, file_service=self._file_service) 53 self._project_service = ProjectService(self._api_client) 54 self._process_service = ProcessService(self._api_client) 55 self._execution_service = ExecutionService(self._api_client) 56 self._compute_environment_service = ComputeEnvironmentService(self._api_client) 57 self._metrics_service = MetricsService(self._api_client) 58 self._metadata_service = MetadataService(self._api_client) 59 self._billing_service = BillingService(self._api_client) 60 self._references_service = ReferenceService(self._api_client) 61 self._shares_service = ShareService(self._api_client) 62 self._users_service = UserService(self._api_client)
Instantiates the Cirro API object
Arguments:
- auth_info (cirro.auth.base.AuthInfo):
- base_url (str): Optional base URL of the Cirro instance
(if not provided, it uses the
CIRRO_BASE_URL
environment variable, or the config file)
Returns:
Authenticated Cirro API object, which can be used to call endpoint functions.
Example:
from cirro.cirro_client import CirroApi
cirro = CirroApi(base_url="app.cirro.bio")
print(cirro.projects.list())
64 @property 65 def datasets(self) -> DatasetService: 66 """ 67 Create, list, delete, and modify Datasets 68 """ 69 return self._dataset_service
Create, list, delete, and modify Datasets
71 @property 72 def projects(self) -> ProjectService: 73 """ 74 Create, list, delete, and modify Projects 75 """ 76 return self._project_service
Create, list, delete, and modify Projects
78 @property 79 def processes(self) -> ProcessService: 80 """ 81 List and retrieve detailed information about Processes 82 """ 83 return self._process_service
List and retrieve detailed information about Processes
85 @property 86 def execution(self) -> ExecutionService: 87 """ 88 List, run, stop, and describe the analysis jobs (executing Processes to create new Datasets) 89 """ 90 return self._execution_service
List, run, stop, and describe the analysis jobs (executing Processes to create new Datasets)
92 @property 93 def compute_environments(self) -> ComputeEnvironmentService: 94 """ 95 List and update compute environments 96 """ 97 return self._compute_environment_service
List and update compute environments
99 @property 100 def metrics(self) -> MetricsService: 101 """ 102 Project-level summary metrics 103 """ 104 return self._metrics_service
Project-level summary metrics
106 @property 107 def metadata(self) -> MetadataService: 108 """ 109 List and modify Sample metadata or metadata schemas 110 """ 111 return self._metadata_service
List and modify Sample metadata or metadata schemas
113 @property 114 def billing(self) -> BillingService: 115 """ 116 List and update billing accounts 117 """ 118 return self._billing_service
List and update billing accounts
120 @property 121 def references(self) -> ReferenceService: 122 """ 123 List References and Reference types 124 """ 125 return self._references_service
List References and Reference types
134 @property 135 def users(self) -> UserService: 136 """ 137 List and update user information 138 """ 139 return self._users_service
List and update user information
141 @property 142 def file(self) -> FileService: 143 """ 144 Read, download, and create file objects 145 """ 146 return self._file_service
Read, download, and create file objects