cirro
1import cirro.file_utils # noqa 2from cirro.cirro_client import CirroApi 3from cirro.sdk.dataset import DataPortalDataset 4from cirro.sdk.portal import DataPortal 5from cirro.sdk.process import DataPortalProcess 6from cirro.sdk.project import DataPortalProject 7from cirro.sdk.reference import DataPortalReference 8 9__all__ = [ 10 'DataPortal', 11 'DataPortalProject', 12 'DataPortalProcess', 13 'DataPortalDataset', 14 'DataPortalReference', 15 'CirroApi', 16 'file_utils' 17]
12class DataPortal: 13 """ 14 Helper functions for exploring the Projects, Datasets, Samples, and Files 15 available in the Data Portal. 16 """ 17 18 def __init__(self, base_url: str = None, client: CirroApi = None): 19 """ 20 Set up the DataPortal object, establishing an authenticated connection. 21 22 Args: 23 base_url (str): Optional base URL of the Cirro instance 24 (if not provided, it uses the `CIRRO_BASE_URL` environment variable, or the config file) 25 client (`cirro.cirro_client.CirroApi`): Optional pre-configured client 26 27 Example: 28 ```python 29 from cirro import DataPortal 30 31 Portal = DataPortal(base_url="app.cirro.bio") 32 portal.list_projects() 33 ``` 34 """ 35 36 if client is not None: 37 self._client = client 38 39 # Set up default client if not provided 40 else: 41 self._client = CirroApi(base_url=base_url) 42 43 def list_projects(self) -> DataPortalProjects: 44 """List all the projects available in the Data Portal.""" 45 46 return DataPortalProjects( 47 [ 48 DataPortalProject(proj, self._client) 49 for proj in self._client.projects.list() 50 ] 51 ) 52 53 def get_project_by_name(self, name: str = None) -> DataPortalProject: 54 """Return the project with the specified name.""" 55 56 return self.list_projects().get_by_name(name) 57 58 def get_project_by_id(self, _id: str = None) -> DataPortalProject: 59 """Return the project with the specified id.""" 60 61 return self.list_projects().get_by_id(_id) 62 63 def get_dataset(self, project: str = None, dataset: str = None) -> DataPortalDataset: 64 """ 65 Return a dataset identified by ID or name. 66 67 Args: 68 project (str): ID or name of project 69 dataset (str): ID or name of dataset 70 71 Returns: 72 `cirro.sdk.dataset.DataPortalDataset` 73 74 ```python 75 from cirro import DataPortal() 76 portal = DataPortal() 77 dataset = portal.get_dataset( 78 project="id-or-name-of-project", 79 dataset="id-or-name-of-dataset" 80 ) 81 ``` 82 """ 83 try: 84 project: DataPortalProject = self.get_project_by_id(project) 85 except DataPortalAssetNotFound: 86 project: DataPortalProject = self.get_project_by_name(project) 87 88 try: 89 return project.get_dataset_by_id(dataset) 90 except DataPortalAssetNotFound: 91 return project.get_dataset_by_name(dataset) 92 93 def list_processes(self, ingest=False) -> DataPortalProcesses: 94 """ 95 List all the processes available in the Data Portal. 96 By default, only list non-ingest processes (those which can be run on existing datasets). 97 To list the processes which can be used to upload datasets, use `ingest = True`. 98 99 Args: 100 ingest (bool): If True, only list those processes which can be used to ingest datasets directly 101 """ 102 103 return DataPortalProcesses( 104 [ 105 DataPortalProcess(p, self._client) 106 for p in self._client.processes.list() 107 if not ingest or p.executor == Executor.INGEST 108 ] 109 ) 110 111 def get_process_by_name(self, name: str, ingest=False) -> DataPortalProcess: 112 """ 113 Return the process with the specified name. 114 115 Args: 116 name (str): Name of process 117 """ 118 119 return self.list_processes(ingest=ingest).get_by_name(name) 120 121 def get_process_by_id(self, id: str, ingest=False) -> DataPortalProcess: 122 """ 123 Return the process with the specified id 124 125 Args: 126 id (str): ID of process 127 """ 128 129 return self.list_processes(ingest=ingest).get_by_id(id) 130 131 def list_reference_types(self) -> DataPortalReferenceTypes: 132 """ 133 Return the list of all available reference types 134 """ 135 136 return DataPortalReferenceTypes( 137 [ 138 DataPortalReferenceType(ref) 139 for ref in self._client.references.get_types() 140 ] 141 )
Helper functions for exploring the Projects, Datasets, Samples, and Files available in the Data Portal.
18 def __init__(self, base_url: str = None, client: CirroApi = None): 19 """ 20 Set up the DataPortal object, establishing an authenticated connection. 21 22 Args: 23 base_url (str): Optional base URL of the Cirro instance 24 (if not provided, it uses the `CIRRO_BASE_URL` environment variable, or the config file) 25 client (`cirro.cirro_client.CirroApi`): Optional pre-configured client 26 27 Example: 28 ```python 29 from cirro import DataPortal 30 31 Portal = DataPortal(base_url="app.cirro.bio") 32 portal.list_projects() 33 ``` 34 """ 35 36 if client is not None: 37 self._client = client 38 39 # Set up default client if not provided 40 else: 41 self._client = CirroApi(base_url=base_url)
Set up the DataPortal object, establishing an authenticated connection.
Arguments:
- base_url (str): Optional base URL of the Cirro instance
(if not provided, it uses the
CIRRO_BASE_URL
environment variable, or the config file) - client (
CirroApi
): Optional pre-configured client
Example:
from cirro import DataPortal
Portal = DataPortal(base_url="app.cirro.bio")
portal.list_projects()
43 def list_projects(self) -> DataPortalProjects: 44 """List all the projects available in the Data Portal.""" 45 46 return DataPortalProjects( 47 [ 48 DataPortalProject(proj, self._client) 49 for proj in self._client.projects.list() 50 ] 51 )
List all the projects available in the Data Portal.
53 def get_project_by_name(self, name: str = None) -> DataPortalProject: 54 """Return the project with the specified name.""" 55 56 return self.list_projects().get_by_name(name)
Return the project with the specified name.
58 def get_project_by_id(self, _id: str = None) -> DataPortalProject: 59 """Return the project with the specified id.""" 60 61 return self.list_projects().get_by_id(_id)
Return the project with the specified id.
63 def get_dataset(self, project: str = None, dataset: str = None) -> DataPortalDataset: 64 """ 65 Return a dataset identified by ID or name. 66 67 Args: 68 project (str): ID or name of project 69 dataset (str): ID or name of dataset 70 71 Returns: 72 `cirro.sdk.dataset.DataPortalDataset` 73 74 ```python 75 from cirro import DataPortal() 76 portal = DataPortal() 77 dataset = portal.get_dataset( 78 project="id-or-name-of-project", 79 dataset="id-or-name-of-dataset" 80 ) 81 ``` 82 """ 83 try: 84 project: DataPortalProject = self.get_project_by_id(project) 85 except DataPortalAssetNotFound: 86 project: DataPortalProject = self.get_project_by_name(project) 87 88 try: 89 return project.get_dataset_by_id(dataset) 90 except DataPortalAssetNotFound: 91 return project.get_dataset_by_name(dataset)
Return a dataset identified by ID or name.
Arguments:
- project (str): ID or name of project
- dataset (str): ID or name of dataset
Returns:
from cirro import DataPortal() portal = DataPortal() dataset = portal.get_dataset( project="id-or-name-of-project", dataset="id-or-name-of-dataset" )
93 def list_processes(self, ingest=False) -> DataPortalProcesses: 94 """ 95 List all the processes available in the Data Portal. 96 By default, only list non-ingest processes (those which can be run on existing datasets). 97 To list the processes which can be used to upload datasets, use `ingest = True`. 98 99 Args: 100 ingest (bool): If True, only list those processes which can be used to ingest datasets directly 101 """ 102 103 return DataPortalProcesses( 104 [ 105 DataPortalProcess(p, self._client) 106 for p in self._client.processes.list() 107 if not ingest or p.executor == Executor.INGEST 108 ] 109 )
List all the processes available in the Data Portal.
By default, only list non-ingest processes (those which can be run on existing datasets).
To list the processes which can be used to upload datasets, use ingest = True
.
Arguments:
- ingest (bool): If True, only list those processes which can be used to ingest datasets directly
111 def get_process_by_name(self, name: str, ingest=False) -> DataPortalProcess: 112 """ 113 Return the process with the specified name. 114 115 Args: 116 name (str): Name of process 117 """ 118 119 return self.list_processes(ingest=ingest).get_by_name(name)
Return the process with the specified name.
Arguments:
- name (str): Name of process
121 def get_process_by_id(self, id: str, ingest=False) -> DataPortalProcess: 122 """ 123 Return the process with the specified id 124 125 Args: 126 id (str): ID of process 127 """ 128 129 return self.list_processes(ingest=ingest).get_by_id(id)
Return the process with the specified id
Arguments:
- id (str): ID of process
131 def list_reference_types(self) -> DataPortalReferenceTypes: 132 """ 133 Return the list of all available reference types 134 """ 135 136 return DataPortalReferenceTypes( 137 [ 138 DataPortalReferenceType(ref) 139 for ref in self._client.references.get_types() 140 ] 141 )
Return the list of all available reference types
19class DataPortalProject(DataPortalAsset): 20 """ 21 Projects in the Data Portal contain collections of Datasets. 22 Users are granted permissions at the project-level, allowing them 23 to view and/or modify all the datasets in that collection. 24 """ 25 def __init__(self, proj: Project, client: CirroApi): 26 """ 27 Instantiate with helper method 28 29 ```python 30 from cirro import DataPortal() 31 portal = DataPortal() 32 project = portal.get_project_by_name("Project Name") 33 ``` 34 35 """ 36 self._data = proj 37 self._client = client 38 39 @property 40 def id(self) -> str: 41 """ 42 Unique identifier 43 """ 44 return self._data.id 45 46 @property 47 def name(self) -> str: 48 """ 49 Readable name 50 """ 51 return self._data.name 52 53 @property 54 def description(self) -> str: 55 """ 56 Longer description of the project 57 """ 58 return self._data.description 59 60 def __str__(self): 61 """Control how the Project is rendered as a string.""" 62 63 return '\n'.join([ 64 f"{i.title()}: {self.__getattribute__(i)}" 65 for i in ['name', 'id', 'description'] 66 ]) 67 68 @cache 69 def _get_datasets(self) -> List[Dataset]: 70 return self._client.datasets.list(self.id) 71 72 def list_datasets(self, force_refresh=False) -> DataPortalDatasets: 73 """List all the datasets available in the project.""" 74 if force_refresh: 75 self._get_datasets.cache_clear() 76 77 return DataPortalDatasets( 78 [ 79 DataPortalDataset(d, self._client) 80 for d in self._get_datasets() 81 ] 82 ) 83 84 def get_dataset_by_name(self, name: str, force_refresh=False) -> DataPortalDataset: 85 """Return the dataset with the specified name.""" 86 if force_refresh: 87 self._get_datasets.cache_clear() 88 89 dataset = next((d for d in self._get_datasets() if d.name == name), None) 90 if dataset is None: 91 raise DataPortalAssetNotFound(f'Dataset with name {name} not found') 92 return self.get_dataset_by_id(dataset.id) 93 94 def get_dataset_by_id(self, _id: str = None) -> DataPortalDataset: 95 """Return the dataset with the specified id.""" 96 97 dataset = self._client.datasets.get(project_id=self.id, dataset_id=_id) 98 if dataset is None: 99 raise DataPortalAssetNotFound(f'Dataset with ID {_id} not found') 100 return DataPortalDataset(dataset, self._client) 101 102 def list_references(self, reference_type: str = None) -> DataPortalReferences: 103 """ 104 List the references available in a project. 105 Optionally filter to references of a particular type (identified by name) 106 """ 107 108 # Get the complete list of references which are available 109 reference_types = DataPortalReferenceTypes( 110 [ 111 DataPortalReferenceType(ref) 112 for ref in self._client.references.get_types() 113 ] 114 ) 115 116 # If a particular name was specified 117 if reference_type is not None: 118 reference_types = reference_types.filter_by_pattern(reference_type) 119 if len(reference_types) == 0: 120 msg = f"Could not find any reference types with the name {reference_type}" 121 raise DataPortalAssetNotFound(msg) 122 123 return DataPortalReferences( 124 [ 125 DataPortalReference(ref, project_id=self.id, client=self._client) 126 for ref in self._client.references.get_for_project( 127 self.id 128 ) 129 if reference_type is None or ref.type == reference_type 130 ] 131 ) 132 133 def get_reference_by_name(self, name: str = None, ref_type: str = None) -> DataPortalReference: 134 """Return the reference of a particular type with the specified name.""" 135 136 if name is None: 137 raise DataPortalInputError("Must specify the reference name") 138 139 return self.list_references(ref_type).get_by_name(name) 140 141 def upload_dataset( 142 self, 143 name: str = None, 144 description='', 145 process: Union[DataPortalProcess, str] = None, 146 upload_folder: str = None, 147 files: list = None 148 ): 149 """ 150 Upload a set of files to the Data Portal, creating a new dataset. 151 152 If the files parameter is not provided, it will upload all files in the upload folder 153 154 Args: 155 name (str): Name of newly created dataset 156 description (str): Description of newly created dataset 157 process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object 158 upload_folder (str): Folder containing files to upload 159 files (List[str]): Optional subset of files to upload from the folder 160 """ 161 162 if name is None: 163 raise DataPortalInputError("Must provide name for new dataset") 164 if process is None: 165 raise DataPortalInputError("Must provide the process which is used for ingest") 166 if upload_folder is None: 167 raise DataPortalInputError("Must provide upload_folder -- folder containing files to upload") 168 169 # Parse the process provided by the user 170 process = parse_process_name_or_id(process, self._client) 171 172 # If no files were provided 173 if files is None: 174 # Get the list of files in the upload folder 175 files = get_files_in_directory(upload_folder) 176 177 if files is None or len(files) == 0: 178 raise RuntimeWarning("No files to upload, exiting") 179 180 # Make sure that the files match the expected pattern 181 self._client.processes.check_dataset_files(files, process.id, upload_folder) 182 183 # Create the ingest process request 184 dataset_create_request = UploadDatasetRequest( 185 process_id=process.id, 186 name=name, 187 description=description, 188 expected_files=files 189 ) 190 191 # Get the response 192 create_response = self._client.datasets.create(project_id=self.id, 193 upload_request=dataset_create_request) 194 195 # Upload the files 196 self._client.datasets.upload_files( 197 project_id=self.id, 198 dataset_id=create_response.id, 199 directory=upload_folder, 200 files=files 201 ) 202 203 # Return the dataset which was created, which might take a second to update 204 max_attempts = 5 205 for attempt in range(max_attempts): 206 try: 207 return self.get_dataset_by_id(create_response.id) 208 except DataPortalAssetNotFound as e: 209 if attempt == max_attempts - 1: 210 raise e 211 else: 212 sleep(2) 213 214 def samples(self, max_items: int = 10000): 215 """ 216 Retrieves a list of samples associated with a project along with their metadata 217 218 Args: 219 max_items (int): Maximum number of records to get (default 10,000) 220 """ 221 return self._client.metadata.get_project_samples(self.id, max_items)
Projects in the Data Portal contain collections of Datasets. Users are granted permissions at the project-level, allowing them to view and/or modify all the datasets in that collection.
25 def __init__(self, proj: Project, client: CirroApi): 26 """ 27 Instantiate with helper method 28 29 ```python 30 from cirro import DataPortal() 31 portal = DataPortal() 32 project = portal.get_project_by_name("Project Name") 33 ``` 34 35 """ 36 self._data = proj 37 self._client = client
Instantiate with helper method
from cirro import DataPortal()
portal = DataPortal()
project = portal.get_project_by_name("Project Name")
53 @property 54 def description(self) -> str: 55 """ 56 Longer description of the project 57 """ 58 return self._data.description
Longer description of the project
72 def list_datasets(self, force_refresh=False) -> DataPortalDatasets: 73 """List all the datasets available in the project.""" 74 if force_refresh: 75 self._get_datasets.cache_clear() 76 77 return DataPortalDatasets( 78 [ 79 DataPortalDataset(d, self._client) 80 for d in self._get_datasets() 81 ] 82 )
List all the datasets available in the project.
84 def get_dataset_by_name(self, name: str, force_refresh=False) -> DataPortalDataset: 85 """Return the dataset with the specified name.""" 86 if force_refresh: 87 self._get_datasets.cache_clear() 88 89 dataset = next((d for d in self._get_datasets() if d.name == name), None) 90 if dataset is None: 91 raise DataPortalAssetNotFound(f'Dataset with name {name} not found') 92 return self.get_dataset_by_id(dataset.id)
Return the dataset with the specified name.
94 def get_dataset_by_id(self, _id: str = None) -> DataPortalDataset: 95 """Return the dataset with the specified id.""" 96 97 dataset = self._client.datasets.get(project_id=self.id, dataset_id=_id) 98 if dataset is None: 99 raise DataPortalAssetNotFound(f'Dataset with ID {_id} not found') 100 return DataPortalDataset(dataset, self._client)
Return the dataset with the specified id.
102 def list_references(self, reference_type: str = None) -> DataPortalReferences: 103 """ 104 List the references available in a project. 105 Optionally filter to references of a particular type (identified by name) 106 """ 107 108 # Get the complete list of references which are available 109 reference_types = DataPortalReferenceTypes( 110 [ 111 DataPortalReferenceType(ref) 112 for ref in self._client.references.get_types() 113 ] 114 ) 115 116 # If a particular name was specified 117 if reference_type is not None: 118 reference_types = reference_types.filter_by_pattern(reference_type) 119 if len(reference_types) == 0: 120 msg = f"Could not find any reference types with the name {reference_type}" 121 raise DataPortalAssetNotFound(msg) 122 123 return DataPortalReferences( 124 [ 125 DataPortalReference(ref, project_id=self.id, client=self._client) 126 for ref in self._client.references.get_for_project( 127 self.id 128 ) 129 if reference_type is None or ref.type == reference_type 130 ] 131 )
List the references available in a project. Optionally filter to references of a particular type (identified by name)
133 def get_reference_by_name(self, name: str = None, ref_type: str = None) -> DataPortalReference: 134 """Return the reference of a particular type with the specified name.""" 135 136 if name is None: 137 raise DataPortalInputError("Must specify the reference name") 138 139 return self.list_references(ref_type).get_by_name(name)
Return the reference of a particular type with the specified name.
141 def upload_dataset( 142 self, 143 name: str = None, 144 description='', 145 process: Union[DataPortalProcess, str] = None, 146 upload_folder: str = None, 147 files: list = None 148 ): 149 """ 150 Upload a set of files to the Data Portal, creating a new dataset. 151 152 If the files parameter is not provided, it will upload all files in the upload folder 153 154 Args: 155 name (str): Name of newly created dataset 156 description (str): Description of newly created dataset 157 process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object 158 upload_folder (str): Folder containing files to upload 159 files (List[str]): Optional subset of files to upload from the folder 160 """ 161 162 if name is None: 163 raise DataPortalInputError("Must provide name for new dataset") 164 if process is None: 165 raise DataPortalInputError("Must provide the process which is used for ingest") 166 if upload_folder is None: 167 raise DataPortalInputError("Must provide upload_folder -- folder containing files to upload") 168 169 # Parse the process provided by the user 170 process = parse_process_name_or_id(process, self._client) 171 172 # If no files were provided 173 if files is None: 174 # Get the list of files in the upload folder 175 files = get_files_in_directory(upload_folder) 176 177 if files is None or len(files) == 0: 178 raise RuntimeWarning("No files to upload, exiting") 179 180 # Make sure that the files match the expected pattern 181 self._client.processes.check_dataset_files(files, process.id, upload_folder) 182 183 # Create the ingest process request 184 dataset_create_request = UploadDatasetRequest( 185 process_id=process.id, 186 name=name, 187 description=description, 188 expected_files=files 189 ) 190 191 # Get the response 192 create_response = self._client.datasets.create(project_id=self.id, 193 upload_request=dataset_create_request) 194 195 # Upload the files 196 self._client.datasets.upload_files( 197 project_id=self.id, 198 dataset_id=create_response.id, 199 directory=upload_folder, 200 files=files 201 ) 202 203 # Return the dataset which was created, which might take a second to update 204 max_attempts = 5 205 for attempt in range(max_attempts): 206 try: 207 return self.get_dataset_by_id(create_response.id) 208 except DataPortalAssetNotFound as e: 209 if attempt == max_attempts - 1: 210 raise e 211 else: 212 sleep(2)
Upload a set of files to the Data Portal, creating a new dataset.
If the files parameter is not provided, it will upload all files in the upload folder
Arguments:
- name (str): Name of newly created dataset
- description (str): Description of newly created dataset
- process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object
- upload_folder (str): Folder containing files to upload
- files (List[str]): Optional subset of files to upload from the folder
214 def samples(self, max_items: int = 10000): 215 """ 216 Retrieves a list of samples associated with a project along with their metadata 217 218 Args: 219 max_items (int): Maximum number of records to get (default 10,000) 220 """ 221 return self._client.metadata.get_project_samples(self.id, max_items)
Retrieves a list of samples associated with a project along with their metadata
Arguments:
- max_items (int): Maximum number of records to get (default 10,000)
11class DataPortalProcess(DataPortalAsset): 12 """Helper functions for interacting with analysis processes.""" 13 _data: Process 14 15 def __init__(self, process: Process, client: CirroApi): 16 """ 17 Instantiate with helper method 18 19 ```python 20 from cirro import DataPortal() 21 portal = DataPortal() 22 process = portal.get_process_by_name("Process Name") 23 ``` 24 """ 25 self._data = process 26 self._client = client 27 28 @property 29 def id(self) -> str: 30 """Unique identifier""" 31 return self._data.id 32 33 @property 34 def name(self) -> str: 35 """Readable name""" 36 return self._data.name 37 38 @property 39 def description(self) -> str: 40 """Longer description of process""" 41 return self._data.description 42 43 @property 44 def child_process_ids(self) -> List[str]: 45 """List of processes which can be run on the output of this process""" 46 return self._data.child_process_ids 47 48 @property 49 def executor(self) -> Executor: 50 """INGEST, CROMWELL, or NEXTFLOW""" 51 return self._data.executor 52 53 @property 54 def documentation_url(self) -> str: 55 """Documentation URL""" 56 return self._data.documentation_url 57 58 @property 59 def file_requirements_message(self) -> str: 60 """Description of files required for INGEST processes""" 61 return self._data.file_requirements_message 62 63 def __str__(self): 64 return '\n'.join([ 65 f"{i.title()}: {self.__getattribute__(i)}" 66 for i in ['name', 'id', 'description'] 67 ]) 68 69 def get_parameter_spec(self) -> ParameterSpecification: 70 """ 71 Gets a specification used to describe the parameters used in the process. 72 """ 73 return self._client.processes.get_parameter_spec(self.id)
Helper functions for interacting with analysis processes.
15 def __init__(self, process: Process, client: CirroApi): 16 """ 17 Instantiate with helper method 18 19 ```python 20 from cirro import DataPortal() 21 portal = DataPortal() 22 process = portal.get_process_by_name("Process Name") 23 ``` 24 """ 25 self._data = process 26 self._client = client
Instantiate with helper method
from cirro import DataPortal()
portal = DataPortal()
process = portal.get_process_by_name("Process Name")
38 @property 39 def description(self) -> str: 40 """Longer description of process""" 41 return self._data.description
Longer description of process
43 @property 44 def child_process_ids(self) -> List[str]: 45 """List of processes which can be run on the output of this process""" 46 return self._data.child_process_ids
List of processes which can be run on the output of this process
48 @property 49 def executor(self) -> Executor: 50 """INGEST, CROMWELL, or NEXTFLOW""" 51 return self._data.executor
INGEST, CROMWELL, or NEXTFLOW
53 @property 54 def documentation_url(self) -> str: 55 """Documentation URL""" 56 return self._data.documentation_url
Documentation URL
58 @property 59 def file_requirements_message(self) -> str: 60 """Description of files required for INGEST processes""" 61 return self._data.file_requirements_message
Description of files required for INGEST processes
17class DataPortalDataset(DataPortalAsset): 18 """ 19 Datasets in the Data Portal are collections of files which have 20 either been uploaded directly, or which have been output by 21 an analysis pipeline or notebook. 22 """ 23 24 def __init__(self, dataset: Union[Dataset, DatasetDetail], client: CirroApi): 25 """ 26 Instantiate a dataset object 27 28 Should be invoked from a top-level constructor, for example: 29 30 ```python 31 from cirro import DataPortal() 32 portal = DataPortal() 33 dataset = portal.get_dataset( 34 project="id-or-name-of-project", 35 dataset="id-or-name-of-dataset" 36 ) 37 ``` 38 39 """ 40 assert dataset.project_id is not None, "Must provide dataset with project_id attribute" 41 self._data = dataset 42 self._files: Optional[List[FileEntry]] = None 43 self._client = client 44 45 @property 46 def id(self) -> str: 47 """Unique identifier for the dataset""" 48 return self._data.id 49 50 @property 51 def name(self) -> str: 52 """Editible name for the dataset""" 53 return self._data.name 54 55 @property 56 def description(self) -> str: 57 """Longer name for the dataset""" 58 return self._data.description 59 60 @property 61 def process_id(self) -> str: 62 """Unique ID of process used to create the dataset""" 63 return self._data.process_id 64 65 @property 66 def process(self) -> ProcessDetail: 67 """ 68 Object representing the process used to create the dataset 69 """ 70 return self._client.processes.get(self.process_id) 71 72 @property 73 def project_id(self) -> str: 74 """ID of the project containing the dataset""" 75 return self._data.project_id 76 77 @property 78 def status(self) -> Status: 79 """ 80 Status of the dataset 81 """ 82 return self._data.status 83 84 @property 85 def source_dataset_ids(self) -> List[str]: 86 """IDs of the datasets used as sources for this dataset (if any)""" 87 return self._data.source_dataset_ids 88 89 @property 90 def source_datasets(self) -> List['DataPortalDataset']: 91 """ 92 Objects representing the datasets used as sources for this dataset (if any) 93 """ 94 return [ 95 DataPortalDataset( 96 dataset=self._client.datasets.get(project_id=self.project_id, dataset_id=dataset_id), 97 client=self._client 98 ) 99 for dataset_id in self.source_dataset_ids 100 ] 101 102 @property 103 def params(self) -> DatasetDetailParams: 104 """ 105 Parameters used to generate the dataset 106 """ 107 return self._get_detail().params 108 109 @property 110 def info(self) -> DatasetDetailInfo: 111 """ 112 Detailed information about the dataset 113 """ 114 return self._get_detail().info 115 116 @property 117 def tags(self) -> List[Tag]: 118 """ 119 Tags applied to the dataset 120 """ 121 return self._data.tags 122 123 @property 124 def created_by(self) -> str: 125 """User who created the dataset""" 126 return self._data.created_by 127 128 @property 129 def created_at(self) -> datetime.datetime: 130 """Timestamp of dataset creation""" 131 return self._data.created_at 132 133 def _get_detail(self): 134 if not isinstance(self._data, DatasetDetail): 135 self._data = self._client.datasets.get(project_id=self.project_id, dataset_id=self.id) 136 return self._data 137 138 def __str__(self): 139 return '\n'.join([ 140 f"{i.title()}: {self.__getattribute__(i)}" 141 for i in ['name', 'id', 'description', 'status'] 142 ]) 143 144 def list_files(self) -> DataPortalFiles: 145 """ 146 Return the list of files which make up the dataset. 147 """ 148 if not self._files: 149 self._files = DataPortalFiles( 150 [ 151 DataPortalFile(file=file, client=self._client) 152 for file in self._client.datasets.get_file_listing( 153 project_id=self.project_id, 154 dataset_id=self.id 155 ) 156 ] 157 ) 158 return self._files 159 160 def download_files(self, download_location: str = None) -> None: 161 """ 162 Download all the files from the dataset to a local directory. 163 164 Args: 165 download_location (str): Path to local directory 166 """ 167 168 # Alias for internal method 169 self.list_files().download(download_location) 170 171 def run_analysis( 172 self, 173 name: str = None, 174 description: str = "", 175 process: Union[DataPortalProcess, str] = None, 176 params=None, 177 notifications_emails=None 178 ) -> str: 179 """ 180 Runs an analysis on a dataset, returns the ID of the newly created dataset. 181 182 The process can be provided as either a DataPortalProcess object, 183 or a string which corresponds to the name or ID of the process. 184 185 Args: 186 name (str): Name of newly created dataset 187 description (str): Description of newly created dataset 188 process (DataPortalProcess or str): Process to run 189 params (dict): Analysis parameters 190 notifications_emails (List[str]): Notification email address(es) 191 192 Returns: 193 dataset_id (str): ID of newly created dataset 194 """ 195 if name is None: 196 raise DataPortalInputError("Must specify 'name' for run_analysis") 197 if process is None: 198 raise DataPortalInputError("Must specify 'process' for run_analysis") 199 if notifications_emails is None: 200 notifications_emails = [] 201 if params is None: 202 params = {} 203 204 # If the process is a string, try to parse it as a process name or ID 205 process = parse_process_name_or_id(process, self._client) 206 207 resp = self._client.execution.run_analysis( 208 project_id=self.project_id, 209 request=RunAnalysisRequest( 210 name=name, 211 description=description, 212 process_id=process.id, 213 source_dataset_ids=[self.id], 214 params=RunAnalysisRequestParams.from_dict(params), 215 notification_emails=notifications_emails 216 ) 217 ) 218 return resp.id
Datasets in the Data Portal are collections of files which have either been uploaded directly, or which have been output by an analysis pipeline or notebook.
24 def __init__(self, dataset: Union[Dataset, DatasetDetail], client: CirroApi): 25 """ 26 Instantiate a dataset object 27 28 Should be invoked from a top-level constructor, for example: 29 30 ```python 31 from cirro import DataPortal() 32 portal = DataPortal() 33 dataset = portal.get_dataset( 34 project="id-or-name-of-project", 35 dataset="id-or-name-of-dataset" 36 ) 37 ``` 38 39 """ 40 assert dataset.project_id is not None, "Must provide dataset with project_id attribute" 41 self._data = dataset 42 self._files: Optional[List[FileEntry]] = None 43 self._client = client
Instantiate a dataset object
Should be invoked from a top-level constructor, for example:
from cirro import DataPortal()
portal = DataPortal()
dataset = portal.get_dataset(
project="id-or-name-of-project",
dataset="id-or-name-of-dataset"
)
45 @property 46 def id(self) -> str: 47 """Unique identifier for the dataset""" 48 return self._data.id
Unique identifier for the dataset
50 @property 51 def name(self) -> str: 52 """Editible name for the dataset""" 53 return self._data.name
Editible name for the dataset
55 @property 56 def description(self) -> str: 57 """Longer name for the dataset""" 58 return self._data.description
Longer name for the dataset
60 @property 61 def process_id(self) -> str: 62 """Unique ID of process used to create the dataset""" 63 return self._data.process_id
Unique ID of process used to create the dataset
65 @property 66 def process(self) -> ProcessDetail: 67 """ 68 Object representing the process used to create the dataset 69 """ 70 return self._client.processes.get(self.process_id)
Object representing the process used to create the dataset
72 @property 73 def project_id(self) -> str: 74 """ID of the project containing the dataset""" 75 return self._data.project_id
ID of the project containing the dataset
77 @property 78 def status(self) -> Status: 79 """ 80 Status of the dataset 81 """ 82 return self._data.status
Status of the dataset
84 @property 85 def source_dataset_ids(self) -> List[str]: 86 """IDs of the datasets used as sources for this dataset (if any)""" 87 return self._data.source_dataset_ids
IDs of the datasets used as sources for this dataset (if any)
89 @property 90 def source_datasets(self) -> List['DataPortalDataset']: 91 """ 92 Objects representing the datasets used as sources for this dataset (if any) 93 """ 94 return [ 95 DataPortalDataset( 96 dataset=self._client.datasets.get(project_id=self.project_id, dataset_id=dataset_id), 97 client=self._client 98 ) 99 for dataset_id in self.source_dataset_ids 100 ]
Objects representing the datasets used as sources for this dataset (if any)
102 @property 103 def params(self) -> DatasetDetailParams: 104 """ 105 Parameters used to generate the dataset 106 """ 107 return self._get_detail().params
Parameters used to generate the dataset
109 @property 110 def info(self) -> DatasetDetailInfo: 111 """ 112 Detailed information about the dataset 113 """ 114 return self._get_detail().info
Detailed information about the dataset
123 @property 124 def created_by(self) -> str: 125 """User who created the dataset""" 126 return self._data.created_by
User who created the dataset
128 @property 129 def created_at(self) -> datetime.datetime: 130 """Timestamp of dataset creation""" 131 return self._data.created_at
Timestamp of dataset creation
144 def list_files(self) -> DataPortalFiles: 145 """ 146 Return the list of files which make up the dataset. 147 """ 148 if not self._files: 149 self._files = DataPortalFiles( 150 [ 151 DataPortalFile(file=file, client=self._client) 152 for file in self._client.datasets.get_file_listing( 153 project_id=self.project_id, 154 dataset_id=self.id 155 ) 156 ] 157 ) 158 return self._files
Return the list of files which make up the dataset.
160 def download_files(self, download_location: str = None) -> None: 161 """ 162 Download all the files from the dataset to a local directory. 163 164 Args: 165 download_location (str): Path to local directory 166 """ 167 168 # Alias for internal method 169 self.list_files().download(download_location)
Download all the files from the dataset to a local directory.
Arguments:
- download_location (str): Path to local directory
171 def run_analysis( 172 self, 173 name: str = None, 174 description: str = "", 175 process: Union[DataPortalProcess, str] = None, 176 params=None, 177 notifications_emails=None 178 ) -> str: 179 """ 180 Runs an analysis on a dataset, returns the ID of the newly created dataset. 181 182 The process can be provided as either a DataPortalProcess object, 183 or a string which corresponds to the name or ID of the process. 184 185 Args: 186 name (str): Name of newly created dataset 187 description (str): Description of newly created dataset 188 process (DataPortalProcess or str): Process to run 189 params (dict): Analysis parameters 190 notifications_emails (List[str]): Notification email address(es) 191 192 Returns: 193 dataset_id (str): ID of newly created dataset 194 """ 195 if name is None: 196 raise DataPortalInputError("Must specify 'name' for run_analysis") 197 if process is None: 198 raise DataPortalInputError("Must specify 'process' for run_analysis") 199 if notifications_emails is None: 200 notifications_emails = [] 201 if params is None: 202 params = {} 203 204 # If the process is a string, try to parse it as a process name or ID 205 process = parse_process_name_or_id(process, self._client) 206 207 resp = self._client.execution.run_analysis( 208 project_id=self.project_id, 209 request=RunAnalysisRequest( 210 name=name, 211 description=description, 212 process_id=process.id, 213 source_dataset_ids=[self.id], 214 params=RunAnalysisRequestParams.from_dict(params), 215 notification_emails=notifications_emails 216 ) 217 ) 218 return resp.id
Runs an analysis on a dataset, returns the ID of the newly created dataset.
The process can be provided as either a DataPortalProcess object, or a string which corresponds to the name or ID of the process.
Arguments:
- name (str): Name of newly created dataset
- description (str): Description of newly created dataset
- process (DataPortalProcess or str): Process to run
- params (dict): Analysis parameters
- notifications_emails (List[str]): Notification email address(es)
Returns:
dataset_id (str): ID of newly created dataset
12class DataPortalReference(DataPortalAsset): 13 """ 14 Reference data object containing files which can be used for analysis in a particular project. 15 """ 16 def __init__(self, ref: Reference, project_id: str, client: CirroApi): 17 """ 18 Instantiate by listing the references which have been added to a particular project 19 ```python 20 from cirro import DataPortal() 21 portal = DataPortal() 22 project = portal.get_project_by_name("Project Name") 23 references = project.list_references() 24 ``` 25 """ 26 self._data = ref 27 self._files = [ 28 DataPortalFile(File.from_file_entry(f, project_id), client) for f in ref.files 29 ] 30 31 @property 32 def files(self) -> List[DataPortalFile]: 33 """File(s) contained in the reference""" 34 return self._files 35 36 @property 37 def name(self) -> str: 38 """Reference name""" 39 return self._data.name 40 41 @property 42 def type(self) -> str: 43 """Type of reference data (e.g. genome_fasta)""" 44 return self._data.type 45 46 @property 47 def absolute_path(self): 48 if len(self._files) == 0: 49 return None 50 return self._files[0].absolute_path 51 52 def __str__(self): 53 return self.name
Reference data object containing files which can be used for analysis in a particular project.
16 def __init__(self, ref: Reference, project_id: str, client: CirroApi): 17 """ 18 Instantiate by listing the references which have been added to a particular project 19 ```python 20 from cirro import DataPortal() 21 portal = DataPortal() 22 project = portal.get_project_by_name("Project Name") 23 references = project.list_references() 24 ``` 25 """ 26 self._data = ref 27 self._files = [ 28 DataPortalFile(File.from_file_entry(f, project_id), client) for f in ref.files 29 ]
Instantiate by listing the references which have been added to a particular project
from cirro import DataPortal()
portal = DataPortal()
project = portal.get_project_by_name("Project Name")
references = project.list_references()
31 @property 32 def files(self) -> List[DataPortalFile]: 33 """File(s) contained in the reference""" 34 return self._files
File(s) contained in the reference
11class CirroApi: 12 """ 13 Client for interacting directly with the Cirro API 14 """ 15 def __init__(self, auth_info: AuthInfo = None, base_url: str = None): 16 """ 17 Instantiates the Cirro API object 18 19 Args: 20 auth_info (cirro.auth.base.AuthInfo): 21 base_url (str): Optional base URL of the Cirro instance 22 (if not provided, it uses the `CIRRO_BASE_URL` environment variable, or the config file) 23 24 Returns: 25 Authenticated Cirro API object, which can be used to call endpoint functions. 26 27 Example: 28 ```python 29 from cirro.cirro_client import CirroApi 30 31 cirro = CirroApi(base_url="app.cirro.bio") 32 print(cirro.projects.list()) 33 ``` 34 """ 35 36 self._configuration = AppConfig(base_url=base_url) 37 if not auth_info: 38 auth_info = get_auth_info_from_config(self._configuration, auth_io=None) 39 40 self._api_client = CirroApiClient( 41 base_url=self._configuration.rest_endpoint, 42 auth_method=auth_info.get_auth_method(), 43 client_name='Cirro SDK', 44 package_name='cirro' 45 ) 46 47 # Init services 48 self._file_service = FileService(self._api_client, 49 enable_additional_checksum=self._configuration.enable_additional_checksum, 50 transfer_retries=self._configuration.transfer_max_retries) 51 self._dataset_service = DatasetService(self._api_client, file_service=self._file_service) 52 self._project_service = ProjectService(self._api_client) 53 self._process_service = ProcessService(self._api_client) 54 self._execution_service = ExecutionService(self._api_client) 55 self._metrics_service = MetricsService(self._api_client) 56 self._metadata_service = MetadataService(self._api_client) 57 self._billing_service = BillingService(self._api_client) 58 self._references_service = ReferenceService(self._api_client) 59 self._users_service = UserService(self._api_client) 60 61 @property 62 def datasets(self) -> DatasetService: 63 """ 64 Create, list, delete, and modify Datasets 65 """ 66 return self._dataset_service 67 68 @property 69 def projects(self) -> ProjectService: 70 """ 71 Create, list, delete, and modify Projects 72 """ 73 return self._project_service 74 75 @property 76 def processes(self) -> ProcessService: 77 """ 78 List and retrieve detailed information about Processes 79 """ 80 return self._process_service 81 82 @property 83 def execution(self) -> ExecutionService: 84 """ 85 List, run, stop, and describe the analysis jobs (executing Processes to create new Datasets) 86 """ 87 return self._execution_service 88 89 @property 90 def metrics(self) -> MetricsService: 91 """ 92 Project-level summary metrics 93 """ 94 return self._metrics_service 95 96 @property 97 def metadata(self) -> MetadataService: 98 """ 99 List and modify Sample metadata or metadata schemas 100 """ 101 return self._metadata_service 102 103 @property 104 def billing(self) -> BillingService: 105 """ 106 List and update billing accounts 107 """ 108 return self._billing_service 109 110 @property 111 def references(self) -> ReferenceService: 112 """ 113 List References and Reference types 114 """ 115 return self._references_service 116 117 @property 118 def users(self) -> UserService: 119 """ 120 List and update user information 121 """ 122 return self._users_service 123 124 @property 125 def file(self) -> FileService: 126 """ 127 Read, download, and create file objects 128 """ 129 return self._file_service 130 131 @property 132 def api_client(self) -> CirroApiClient: 133 """ 134 Gets the underlying API client 135 """ 136 return self._api_client 137 138 @property 139 def configuration(self) -> AppConfig: 140 """ 141 Gets the configuration of the instance 142 """ 143 return self._configuration
Client for interacting directly with the Cirro API
15 def __init__(self, auth_info: AuthInfo = None, base_url: str = None): 16 """ 17 Instantiates the Cirro API object 18 19 Args: 20 auth_info (cirro.auth.base.AuthInfo): 21 base_url (str): Optional base URL of the Cirro instance 22 (if not provided, it uses the `CIRRO_BASE_URL` environment variable, or the config file) 23 24 Returns: 25 Authenticated Cirro API object, which can be used to call endpoint functions. 26 27 Example: 28 ```python 29 from cirro.cirro_client import CirroApi 30 31 cirro = CirroApi(base_url="app.cirro.bio") 32 print(cirro.projects.list()) 33 ``` 34 """ 35 36 self._configuration = AppConfig(base_url=base_url) 37 if not auth_info: 38 auth_info = get_auth_info_from_config(self._configuration, auth_io=None) 39 40 self._api_client = CirroApiClient( 41 base_url=self._configuration.rest_endpoint, 42 auth_method=auth_info.get_auth_method(), 43 client_name='Cirro SDK', 44 package_name='cirro' 45 ) 46 47 # Init services 48 self._file_service = FileService(self._api_client, 49 enable_additional_checksum=self._configuration.enable_additional_checksum, 50 transfer_retries=self._configuration.transfer_max_retries) 51 self._dataset_service = DatasetService(self._api_client, file_service=self._file_service) 52 self._project_service = ProjectService(self._api_client) 53 self._process_service = ProcessService(self._api_client) 54 self._execution_service = ExecutionService(self._api_client) 55 self._metrics_service = MetricsService(self._api_client) 56 self._metadata_service = MetadataService(self._api_client) 57 self._billing_service = BillingService(self._api_client) 58 self._references_service = ReferenceService(self._api_client) 59 self._users_service = UserService(self._api_client)
Instantiates the Cirro API object
Arguments:
- auth_info (cirro.auth.base.AuthInfo):
- base_url (str): Optional base URL of the Cirro instance
(if not provided, it uses the
CIRRO_BASE_URL
environment variable, or the config file)
Returns:
Authenticated Cirro API object, which can be used to call endpoint functions.
Example:
from cirro.cirro_client import CirroApi
cirro = CirroApi(base_url="app.cirro.bio")
print(cirro.projects.list())
61 @property 62 def datasets(self) -> DatasetService: 63 """ 64 Create, list, delete, and modify Datasets 65 """ 66 return self._dataset_service
Create, list, delete, and modify Datasets
68 @property 69 def projects(self) -> ProjectService: 70 """ 71 Create, list, delete, and modify Projects 72 """ 73 return self._project_service
Create, list, delete, and modify Projects
75 @property 76 def processes(self) -> ProcessService: 77 """ 78 List and retrieve detailed information about Processes 79 """ 80 return self._process_service
List and retrieve detailed information about Processes
82 @property 83 def execution(self) -> ExecutionService: 84 """ 85 List, run, stop, and describe the analysis jobs (executing Processes to create new Datasets) 86 """ 87 return self._execution_service
List, run, stop, and describe the analysis jobs (executing Processes to create new Datasets)
89 @property 90 def metrics(self) -> MetricsService: 91 """ 92 Project-level summary metrics 93 """ 94 return self._metrics_service
Project-level summary metrics
96 @property 97 def metadata(self) -> MetadataService: 98 """ 99 List and modify Sample metadata or metadata schemas 100 """ 101 return self._metadata_service
List and modify Sample metadata or metadata schemas
103 @property 104 def billing(self) -> BillingService: 105 """ 106 List and update billing accounts 107 """ 108 return self._billing_service
List and update billing accounts
110 @property 111 def references(self) -> ReferenceService: 112 """ 113 List References and Reference types 114 """ 115 return self._references_service
List References and Reference types
117 @property 118 def users(self) -> UserService: 119 """ 120 List and update user information 121 """ 122 return self._users_service
List and update user information
124 @property 125 def file(self) -> FileService: 126 """ 127 Read, download, and create file objects 128 """ 129 return self._file_service
Read, download, and create file objects