cirro

 1import cirro.file_utils  # noqa
 2from cirro.cirro_client import CirroApi
 3from cirro.sdk.dataset import DataPortalDataset
 4from cirro.sdk.login import DataPortalLogin
 5from cirro.sdk.portal import DataPortal
 6from cirro.sdk.process import DataPortalProcess
 7from cirro.sdk.project import DataPortalProject
 8from cirro.sdk.reference import DataPortalReference
 9
10__all__ = [
11    'DataPortal',
12    'DataPortalLogin',
13    'DataPortalProject',
14    'DataPortalProcess',
15    'DataPortalDataset',
16    'DataPortalReference',
17    'CirroApi',
18    'file_utils'
19]
class DataPortal:
 13class DataPortal:
 14    """
 15    Helper functions for exploring the Projects, Datasets, Samples, and Files
 16    available in the Data Portal.
 17    """
 18
 19    def __init__(self, base_url: str = None, client: CirroApi = None):
 20        """
 21        Set up the DataPortal object, establishing an authenticated connection.
 22
 23        Args:
 24            base_url (str): Optional base URL of the Cirro instance
 25             (if not provided, it uses the `CIRRO_BASE_URL` environment variable, or the config file)
 26            client (`cirro.cirro_client.CirroApi`): Optional pre-configured client
 27
 28        Example:
 29        ```python
 30        from cirro import DataPortal
 31
 32        Portal = DataPortal(base_url="app.cirro.bio")
 33        portal.list_projects()
 34        ```
 35        """
 36
 37        if client is not None:
 38            self._client = client
 39
 40        # Set up default client if not provided
 41        else:
 42            self._client = CirroApi(base_url=base_url)
 43
 44    def list_projects(self) -> DataPortalProjects:
 45        """List all the projects available in the Data Portal."""
 46
 47        return DataPortalProjects(
 48            [
 49                DataPortalProject(proj, self._client)
 50                for proj in self._client.projects.list()
 51            ]
 52        )
 53
 54    def get_project_by_name(self, name: str = None) -> DataPortalProject:
 55        """Return the project with the specified name."""
 56
 57        return self.list_projects().get_by_name(name)
 58
 59    def get_project_by_id(self, _id: str = None) -> DataPortalProject:
 60        """Return the project with the specified id."""
 61
 62        return self.list_projects().get_by_id(_id)
 63
 64    def get_project(self, project: str = None) -> DataPortalProject:
 65        """
 66        Return a project identified by ID or name.
 67
 68        Args:
 69            project (str): ID or name of project
 70
 71        Returns:
 72            `from cirro.sdk.project import DataPortalProject`
 73        """
 74        try:
 75            return self.get_project_by_id(project)
 76        except DataPortalAssetNotFound:
 77            return self.get_project_by_name(project)
 78
 79    def get_dataset(self, project: str = None, dataset: str = None) -> DataPortalDataset:
 80        """
 81        Return a dataset identified by ID or name.
 82
 83        Args:
 84            project (str): ID or name of project
 85            dataset (str): ID or name of dataset
 86
 87        Returns:
 88            `cirro.sdk.dataset.DataPortalDataset`
 89
 90            ```python
 91            from cirro import DataPortal()
 92            portal = DataPortal()
 93            dataset = portal.get_dataset(
 94                project="id-or-name-of-project",
 95                dataset="id-or-name-of-dataset"
 96            )
 97            ```
 98        """
 99        try:
100            project: DataPortalProject = self.get_project_by_id(project)
101        except DataPortalAssetNotFound:
102            project: DataPortalProject = self.get_project_by_name(project)
103
104        try:
105            return project.get_dataset_by_id(dataset)
106        except DataPortalAssetNotFound:
107            return project.get_dataset_by_name(dataset)
108
109    def list_processes(self, ingest=False) -> DataPortalProcesses:
110        """
111        List all the processes available in the Data Portal.
112        By default, only list non-ingest processes (those which can be run on existing datasets).
113        To list the processes which can be used to upload datasets, use `ingest = True`.
114
115        Args:
116            ingest (bool): If True, only list those processes which can be used to ingest datasets directly
117        """
118
119        return DataPortalProcesses(
120            [
121                DataPortalProcess(p, self._client)
122                for p in self._client.processes.list()
123                if not ingest or p.executor == Executor.INGEST
124            ]
125        )
126
127    def get_process_by_name(self, name: str, ingest=False) -> DataPortalProcess:
128        """
129        Return the process with the specified name.
130
131        Args:
132            name (str): Name of process
133        """
134
135        return self.list_processes(ingest=ingest).get_by_name(name)
136
137    def get_process_by_id(self, id: str, ingest=False) -> DataPortalProcess:
138        """
139        Return the process with the specified id
140
141        Args:
142            id (str): ID of process
143        """
144
145        return self.list_processes(ingest=ingest).get_by_id(id)
146
147    def list_reference_types(self) -> DataPortalReferenceTypes:
148        """
149        Return the list of all available reference types
150        """
151
152        return DataPortalReferenceTypes(
153            [
154                DataPortalReferenceType(ref)
155                for ref in self._client.references.get_types()
156            ]
157        )
158
159    @property
160    def developer_helper(self) -> DeveloperHelper:
161        return DeveloperHelper(self._client)

Helper functions for exploring the Projects, Datasets, Samples, and Files available in the Data Portal.

DataPortal(base_url: str = None, client: CirroApi = None)
19    def __init__(self, base_url: str = None, client: CirroApi = None):
20        """
21        Set up the DataPortal object, establishing an authenticated connection.
22
23        Args:
24            base_url (str): Optional base URL of the Cirro instance
25             (if not provided, it uses the `CIRRO_BASE_URL` environment variable, or the config file)
26            client (`cirro.cirro_client.CirroApi`): Optional pre-configured client
27
28        Example:
29        ```python
30        from cirro import DataPortal
31
32        Portal = DataPortal(base_url="app.cirro.bio")
33        portal.list_projects()
34        ```
35        """
36
37        if client is not None:
38            self._client = client
39
40        # Set up default client if not provided
41        else:
42            self._client = CirroApi(base_url=base_url)

Set up the DataPortal object, establishing an authenticated connection.

Arguments:
  • base_url (str): Optional base URL of the Cirro instance (if not provided, it uses the CIRRO_BASE_URL environment variable, or the config file)
  • client (cirro.cirro_client.CirroApi): Optional pre-configured client

Example:

from cirro import DataPortal

Portal = DataPortal(base_url="app.cirro.bio")
portal.list_projects()
def list_projects(self) -> cirro.sdk.project.DataPortalProjects:
44    def list_projects(self) -> DataPortalProjects:
45        """List all the projects available in the Data Portal."""
46
47        return DataPortalProjects(
48            [
49                DataPortalProject(proj, self._client)
50                for proj in self._client.projects.list()
51            ]
52        )

List all the projects available in the Data Portal.

def get_project_by_name(self, name: str = None) -> DataPortalProject:
54    def get_project_by_name(self, name: str = None) -> DataPortalProject:
55        """Return the project with the specified name."""
56
57        return self.list_projects().get_by_name(name)

Return the project with the specified name.

def get_project_by_id(self, _id: str = None) -> DataPortalProject:
59    def get_project_by_id(self, _id: str = None) -> DataPortalProject:
60        """Return the project with the specified id."""
61
62        return self.list_projects().get_by_id(_id)

Return the project with the specified id.

def get_project(self, project: str = None) -> DataPortalProject:
64    def get_project(self, project: str = None) -> DataPortalProject:
65        """
66        Return a project identified by ID or name.
67
68        Args:
69            project (str): ID or name of project
70
71        Returns:
72            `from cirro.sdk.project import DataPortalProject`
73        """
74        try:
75            return self.get_project_by_id(project)
76        except DataPortalAssetNotFound:
77            return self.get_project_by_name(project)

Return a project identified by ID or name.

Arguments:
  • project (str): ID or name of project
Returns:

from cirro.sdk.project import DataPortalProject

def get_dataset( self, project: str = None, dataset: str = None) -> DataPortalDataset:
 79    def get_dataset(self, project: str = None, dataset: str = None) -> DataPortalDataset:
 80        """
 81        Return a dataset identified by ID or name.
 82
 83        Args:
 84            project (str): ID or name of project
 85            dataset (str): ID or name of dataset
 86
 87        Returns:
 88            `cirro.sdk.dataset.DataPortalDataset`
 89
 90            ```python
 91            from cirro import DataPortal()
 92            portal = DataPortal()
 93            dataset = portal.get_dataset(
 94                project="id-or-name-of-project",
 95                dataset="id-or-name-of-dataset"
 96            )
 97            ```
 98        """
 99        try:
100            project: DataPortalProject = self.get_project_by_id(project)
101        except DataPortalAssetNotFound:
102            project: DataPortalProject = self.get_project_by_name(project)
103
104        try:
105            return project.get_dataset_by_id(dataset)
106        except DataPortalAssetNotFound:
107            return project.get_dataset_by_name(dataset)

Return a dataset identified by ID or name.

Arguments:
  • project (str): ID or name of project
  • dataset (str): ID or name of dataset
Returns:

cirro.sdk.dataset.DataPortalDataset

from cirro import DataPortal()
portal = DataPortal()
dataset = portal.get_dataset(
    project="id-or-name-of-project",
    dataset="id-or-name-of-dataset"
)
def list_processes(self, ingest=False) -> cirro.sdk.process.DataPortalProcesses:
109    def list_processes(self, ingest=False) -> DataPortalProcesses:
110        """
111        List all the processes available in the Data Portal.
112        By default, only list non-ingest processes (those which can be run on existing datasets).
113        To list the processes which can be used to upload datasets, use `ingest = True`.
114
115        Args:
116            ingest (bool): If True, only list those processes which can be used to ingest datasets directly
117        """
118
119        return DataPortalProcesses(
120            [
121                DataPortalProcess(p, self._client)
122                for p in self._client.processes.list()
123                if not ingest or p.executor == Executor.INGEST
124            ]
125        )

List all the processes available in the Data Portal. By default, only list non-ingest processes (those which can be run on existing datasets). To list the processes which can be used to upload datasets, use ingest = True.

Arguments:
  • ingest (bool): If True, only list those processes which can be used to ingest datasets directly
def get_process_by_name(self, name: str, ingest=False) -> DataPortalProcess:
127    def get_process_by_name(self, name: str, ingest=False) -> DataPortalProcess:
128        """
129        Return the process with the specified name.
130
131        Args:
132            name (str): Name of process
133        """
134
135        return self.list_processes(ingest=ingest).get_by_name(name)

Return the process with the specified name.

Arguments:
  • name (str): Name of process
def get_process_by_id(self, id: str, ingest=False) -> DataPortalProcess:
137    def get_process_by_id(self, id: str, ingest=False) -> DataPortalProcess:
138        """
139        Return the process with the specified id
140
141        Args:
142            id (str): ID of process
143        """
144
145        return self.list_processes(ingest=ingest).get_by_id(id)

Return the process with the specified id

Arguments:
  • id (str): ID of process
def list_reference_types(self) -> cirro.sdk.reference_type.DataPortalReferenceTypes:
147    def list_reference_types(self) -> DataPortalReferenceTypes:
148        """
149        Return the list of all available reference types
150        """
151
152        return DataPortalReferenceTypes(
153            [
154                DataPortalReferenceType(ref)
155                for ref in self._client.references.get_types()
156            ]
157        )

Return the list of all available reference types

developer_helper: cirro.sdk.developer.DeveloperHelper
159    @property
160    def developer_helper(self) -> DeveloperHelper:
161        return DeveloperHelper(self._client)
class DataPortalLogin:
 8class DataPortalLogin:
 9    """
10    Start the login process, obtaining the authorization message from Cirro
11    needed to confirm the user identity.
12
13    Useful when you need to authenticate a user in a non-blocking way.
14
15    Usage:
16
17    ```python
18    # Replace app.cirro.bio as appropriate
19    login = DataPortalLogin(base_url="app.cirro.bio")
20
21    # Present the user with the authorization message
22    print(login.auth_message)
23
24    # Generate the authenticated DataPortal object,
25    # blocking until the user completes the login process in their browser
26    portal = login.await_completion()
27    ```
28    """
29    base_url: str
30    auth_info: DeviceCodeAuth
31
32    def __init__(self, base_url: str = None, enable_cache=False):
33        app_config = AppConfig(base_url=base_url)
34
35        self.base_url = base_url
36
37        self.auth_info = DeviceCodeAuth(
38            region=app_config.region,
39            client_id=app_config.client_id,
40            auth_endpoint=app_config.auth_endpoint,
41            enable_cache=enable_cache,
42            await_completion=False
43        )
44
45    @property
46    def auth_message(self) -> str:
47        """Authorization message provided by Cirro."""
48        return self.auth_info.auth_message
49
50    @property
51    def auth_message_markdown(self) -> str:
52        """Authorization message provided by Cirro (Markdown format)."""
53        return self.auth_info.auth_message_markdown
54
55    def await_completion(self) -> DataPortal:
56        """Complete the login process and return an authenticated client"""
57
58        # Block until the user completes the login flow
59        self.auth_info.await_completion()
60
61        # Set up the client object
62        cirro_client = CirroApi(
63            auth_info=self.auth_info,
64            base_url=self.base_url
65        )
66
67        # Return the Data Portal object
68        return DataPortal(client=cirro_client)

Start the login process, obtaining the authorization message from Cirro needed to confirm the user identity.

Useful when you need to authenticate a user in a non-blocking way.

Usage:

# Replace app.cirro.bio as appropriate
login = DataPortalLogin(base_url="app.cirro.bio")

# Present the user with the authorization message
print(login.auth_message)

# Generate the authenticated DataPortal object,
# blocking until the user completes the login process in their browser
portal = login.await_completion()
DataPortalLogin(base_url: str = None, enable_cache=False)
32    def __init__(self, base_url: str = None, enable_cache=False):
33        app_config = AppConfig(base_url=base_url)
34
35        self.base_url = base_url
36
37        self.auth_info = DeviceCodeAuth(
38            region=app_config.region,
39            client_id=app_config.client_id,
40            auth_endpoint=app_config.auth_endpoint,
41            enable_cache=enable_cache,
42            await_completion=False
43        )
base_url: str
auth_message: str
45    @property
46    def auth_message(self) -> str:
47        """Authorization message provided by Cirro."""
48        return self.auth_info.auth_message

Authorization message provided by Cirro.

auth_message_markdown: str
50    @property
51    def auth_message_markdown(self) -> str:
52        """Authorization message provided by Cirro (Markdown format)."""
53        return self.auth_info.auth_message_markdown

Authorization message provided by Cirro (Markdown format).

def await_completion(self) -> DataPortal:
55    def await_completion(self) -> DataPortal:
56        """Complete the login process and return an authenticated client"""
57
58        # Block until the user completes the login flow
59        self.auth_info.await_completion()
60
61        # Set up the client object
62        cirro_client = CirroApi(
63            auth_info=self.auth_info,
64            base_url=self.base_url
65        )
66
67        # Return the Data Portal object
68        return DataPortal(client=cirro_client)

Complete the login process and return an authenticated client

class DataPortalProject(cirro.sdk.asset.DataPortalAsset):
 20class DataPortalProject(DataPortalAsset):
 21    """
 22    Projects in the Data Portal contain collections of Datasets.
 23    Users are granted permissions at the project-level, allowing them
 24    to view and/or modify all the datasets in that collection.
 25    """
 26    def __init__(self, proj: Project, client: CirroApi):
 27        """
 28        Instantiate with helper method
 29
 30        ```python
 31        from cirro import DataPortal()
 32        portal = DataPortal()
 33        project = portal.get_project_by_name("Project Name")
 34        ```
 35
 36        """
 37        self._data = proj
 38        self._client = client
 39
 40    @property
 41    def id(self) -> str:
 42        """
 43        Unique identifier
 44        """
 45        return self._data.id
 46
 47    @property
 48    def name(self) -> str:
 49        """
 50        Readable name
 51        """
 52        return self._data.name
 53
 54    @property
 55    def description(self) -> str:
 56        """
 57        Longer description of the project
 58        """
 59        return self._data.description
 60
 61    @property
 62    def status(self) -> Status:
 63        """
 64        Status of the project
 65        """
 66        return self._data.status
 67
 68    def __str__(self):
 69        """Control how the Project is rendered as a string."""
 70
 71        return '\n'.join([
 72            f"{i.title()}: {self.__getattribute__(i)}"
 73            for i in ['name', 'id', 'description']
 74        ])
 75
 76    @cache
 77    def _get_datasets(self) -> List[Dataset]:
 78        return list_all_datasets(project_id=self.id,
 79                                 client=self._client)
 80
 81    def list_datasets(self, force_refresh=False) -> DataPortalDatasets:
 82        """List all the datasets available in the project."""
 83        if force_refresh:
 84            self._get_datasets.cache_clear()
 85
 86        return DataPortalDatasets(
 87            [
 88                DataPortalDataset(d, self._client)
 89                for d in self._get_datasets()
 90            ]
 91        )
 92
 93    def get_dataset_by_name(self, name: str, force_refresh=False) -> DataPortalDataset:
 94        """Return the dataset with the specified name."""
 95        if force_refresh:
 96            self._get_datasets.cache_clear()
 97
 98        dataset = next((d for d in self._get_datasets() if d.name == name), None)
 99        if dataset is None:
100            raise DataPortalAssetNotFound(f'Dataset with name {name} not found')
101        return self.get_dataset_by_id(dataset.id)
102
103    def get_dataset_by_id(self, _id: str = None) -> DataPortalDataset:
104        """Return the dataset with the specified id."""
105
106        dataset = self._client.datasets.get(project_id=self.id, dataset_id=_id)
107        if dataset is None:
108            raise DataPortalAssetNotFound(f'Dataset with ID {_id} not found')
109        return DataPortalDataset(dataset, self._client)
110
111    def list_references(self, reference_type: str = None) -> DataPortalReferences:
112        """
113        List the references available in a project.
114        Optionally filter to references of a particular type (identified by name)
115        """
116
117        # Get the complete list of references which are available
118        reference_types = DataPortalReferenceTypes(
119            [
120                DataPortalReferenceType(ref)
121                for ref in self._client.references.get_types()
122            ]
123        )
124
125        # If a particular name was specified
126        if reference_type is not None:
127            reference_types = reference_types.filter_by_pattern(reference_type)
128            if len(reference_types) == 0:
129                msg = f"Could not find any reference types with the name {reference_type}"
130                raise DataPortalAssetNotFound(msg)
131
132        return DataPortalReferences(
133            [
134                DataPortalReference(ref, project_id=self.id, client=self._client)
135                for ref in self._client.references.get_for_project(
136                    self.id
137                )
138                if reference_type is None or ref.type == reference_type
139            ]
140        )
141
142    def get_reference_by_name(self, name: str = None, ref_type: str = None) -> DataPortalReference:
143        """Return the reference of a particular type with the specified name."""
144
145        if name is None:
146            raise DataPortalInputError("Must specify the reference name")
147
148        return self.list_references(ref_type).get_by_name(name)
149
150    def upload_dataset(
151        self,
152        name: str = None,
153        description='',
154        process: Union[DataPortalProcess, str] = None,
155        upload_folder: str = None,
156        files: List[str] = None,
157        tags: List[str] = None,
158    ):
159        """
160        Upload a set of files to the Data Portal, creating a new dataset.
161
162        If the files parameter is not provided, it will upload all files in the upload folder
163
164        Args:
165            name (str): Name of newly created dataset
166            description (str): Description of newly created dataset
167            process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object
168            upload_folder (str): Folder containing files to upload
169            files (List[str]): Optional subset of files to upload from the folder
170            tags (List[str]): Optional list of tags to apply to the dataset
171        """
172
173        if name is None:
174            raise DataPortalInputError("Must provide name for new dataset")
175        if process is None:
176            raise DataPortalInputError("Must provide the process which is used for ingest")
177        if upload_folder is None:
178            raise DataPortalInputError("Must provide upload_folder -- folder containing files to upload")
179
180        # Parse the process provided by the user
181        process = parse_process_name_or_id(process, self._client)
182
183        # If no files were provided
184        if files is None:
185            # Get the list of files in the upload folder
186            files = get_files_in_directory(upload_folder)
187
188        if files is None or len(files) == 0:
189            raise RuntimeWarning("No files to upload, exiting")
190
191        # Normalize into Tag object
192        if tags is not None:
193            tags = [Tag(value=value) for value in tags]
194
195        # Make sure that the files match the expected pattern
196        self._client.processes.check_dataset_files(files, process.id, upload_folder)
197
198        # Create the ingest process request
199        dataset_create_request = UploadDatasetRequest(
200            process_id=process.id,
201            name=name,
202            description=description,
203            expected_files=files,
204            tags=tags,
205        )
206
207        # Get the response
208        create_response = self._client.datasets.create(project_id=self.id,
209                                                       upload_request=dataset_create_request)
210
211        # Upload the files
212        self._client.datasets.upload_files(
213            project_id=self.id,
214            dataset_id=create_response.id,
215            directory=upload_folder,
216            files=files
217        )
218
219        # Return the dataset which was created, which might take a second to update
220        max_attempts = 5
221        for attempt in range(max_attempts):
222            try:
223                return self.get_dataset_by_id(create_response.id)
224            except DataPortalAssetNotFound as e:
225                if attempt == max_attempts - 1:
226                    raise e
227                else:
228                    sleep(2)
229
230    def samples(self, max_items: int = 10000) -> List[Sample]:
231        """
232        Retrieves a list of samples associated with a project along with their metadata
233
234        Args:
235            max_items (int): Maximum number of records to get (default 10,000)
236        """
237        return self._client.metadata.get_project_samples(self.id, max_items)

Projects in the Data Portal contain collections of Datasets. Users are granted permissions at the project-level, allowing them to view and/or modify all the datasets in that collection.

DataPortalProject( proj: cirro_api_client.v1.models.Project, client: CirroApi)
26    def __init__(self, proj: Project, client: CirroApi):
27        """
28        Instantiate with helper method
29
30        ```python
31        from cirro import DataPortal()
32        portal = DataPortal()
33        project = portal.get_project_by_name("Project Name")
34        ```
35
36        """
37        self._data = proj
38        self._client = client

Instantiate with helper method

from cirro import DataPortal()
portal = DataPortal()
project = portal.get_project_by_name("Project Name")
id: str
40    @property
41    def id(self) -> str:
42        """
43        Unique identifier
44        """
45        return self._data.id

Unique identifier

name: str
47    @property
48    def name(self) -> str:
49        """
50        Readable name
51        """
52        return self._data.name

Readable name

description: str
54    @property
55    def description(self) -> str:
56        """
57        Longer description of the project
58        """
59        return self._data.description

Longer description of the project

status: cirro_api_client.v1.models.Status
61    @property
62    def status(self) -> Status:
63        """
64        Status of the project
65        """
66        return self._data.status

Status of the project

def list_datasets(self, force_refresh=False) -> cirro.sdk.dataset.DataPortalDatasets:
81    def list_datasets(self, force_refresh=False) -> DataPortalDatasets:
82        """List all the datasets available in the project."""
83        if force_refresh:
84            self._get_datasets.cache_clear()
85
86        return DataPortalDatasets(
87            [
88                DataPortalDataset(d, self._client)
89                for d in self._get_datasets()
90            ]
91        )

List all the datasets available in the project.

def get_dataset_by_name( self, name: str, force_refresh=False) -> DataPortalDataset:
 93    def get_dataset_by_name(self, name: str, force_refresh=False) -> DataPortalDataset:
 94        """Return the dataset with the specified name."""
 95        if force_refresh:
 96            self._get_datasets.cache_clear()
 97
 98        dataset = next((d for d in self._get_datasets() if d.name == name), None)
 99        if dataset is None:
100            raise DataPortalAssetNotFound(f'Dataset with name {name} not found')
101        return self.get_dataset_by_id(dataset.id)

Return the dataset with the specified name.

def get_dataset_by_id(self, _id: str = None) -> DataPortalDataset:
103    def get_dataset_by_id(self, _id: str = None) -> DataPortalDataset:
104        """Return the dataset with the specified id."""
105
106        dataset = self._client.datasets.get(project_id=self.id, dataset_id=_id)
107        if dataset is None:
108            raise DataPortalAssetNotFound(f'Dataset with ID {_id} not found')
109        return DataPortalDataset(dataset, self._client)

Return the dataset with the specified id.

def list_references( self, reference_type: str = None) -> cirro.sdk.reference.DataPortalReferences:
111    def list_references(self, reference_type: str = None) -> DataPortalReferences:
112        """
113        List the references available in a project.
114        Optionally filter to references of a particular type (identified by name)
115        """
116
117        # Get the complete list of references which are available
118        reference_types = DataPortalReferenceTypes(
119            [
120                DataPortalReferenceType(ref)
121                for ref in self._client.references.get_types()
122            ]
123        )
124
125        # If a particular name was specified
126        if reference_type is not None:
127            reference_types = reference_types.filter_by_pattern(reference_type)
128            if len(reference_types) == 0:
129                msg = f"Could not find any reference types with the name {reference_type}"
130                raise DataPortalAssetNotFound(msg)
131
132        return DataPortalReferences(
133            [
134                DataPortalReference(ref, project_id=self.id, client=self._client)
135                for ref in self._client.references.get_for_project(
136                    self.id
137                )
138                if reference_type is None or ref.type == reference_type
139            ]
140        )

List the references available in a project. Optionally filter to references of a particular type (identified by name)

def get_reference_by_name( self, name: str = None, ref_type: str = None) -> DataPortalReference:
142    def get_reference_by_name(self, name: str = None, ref_type: str = None) -> DataPortalReference:
143        """Return the reference of a particular type with the specified name."""
144
145        if name is None:
146            raise DataPortalInputError("Must specify the reference name")
147
148        return self.list_references(ref_type).get_by_name(name)

Return the reference of a particular type with the specified name.

def upload_dataset( self, name: str = None, description='', process: Union[DataPortalProcess, str] = None, upload_folder: str = None, files: List[str] = None, tags: List[str] = None):
150    def upload_dataset(
151        self,
152        name: str = None,
153        description='',
154        process: Union[DataPortalProcess, str] = None,
155        upload_folder: str = None,
156        files: List[str] = None,
157        tags: List[str] = None,
158    ):
159        """
160        Upload a set of files to the Data Portal, creating a new dataset.
161
162        If the files parameter is not provided, it will upload all files in the upload folder
163
164        Args:
165            name (str): Name of newly created dataset
166            description (str): Description of newly created dataset
167            process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object
168            upload_folder (str): Folder containing files to upload
169            files (List[str]): Optional subset of files to upload from the folder
170            tags (List[str]): Optional list of tags to apply to the dataset
171        """
172
173        if name is None:
174            raise DataPortalInputError("Must provide name for new dataset")
175        if process is None:
176            raise DataPortalInputError("Must provide the process which is used for ingest")
177        if upload_folder is None:
178            raise DataPortalInputError("Must provide upload_folder -- folder containing files to upload")
179
180        # Parse the process provided by the user
181        process = parse_process_name_or_id(process, self._client)
182
183        # If no files were provided
184        if files is None:
185            # Get the list of files in the upload folder
186            files = get_files_in_directory(upload_folder)
187
188        if files is None or len(files) == 0:
189            raise RuntimeWarning("No files to upload, exiting")
190
191        # Normalize into Tag object
192        if tags is not None:
193            tags = [Tag(value=value) for value in tags]
194
195        # Make sure that the files match the expected pattern
196        self._client.processes.check_dataset_files(files, process.id, upload_folder)
197
198        # Create the ingest process request
199        dataset_create_request = UploadDatasetRequest(
200            process_id=process.id,
201            name=name,
202            description=description,
203            expected_files=files,
204            tags=tags,
205        )
206
207        # Get the response
208        create_response = self._client.datasets.create(project_id=self.id,
209                                                       upload_request=dataset_create_request)
210
211        # Upload the files
212        self._client.datasets.upload_files(
213            project_id=self.id,
214            dataset_id=create_response.id,
215            directory=upload_folder,
216            files=files
217        )
218
219        # Return the dataset which was created, which might take a second to update
220        max_attempts = 5
221        for attempt in range(max_attempts):
222            try:
223                return self.get_dataset_by_id(create_response.id)
224            except DataPortalAssetNotFound as e:
225                if attempt == max_attempts - 1:
226                    raise e
227                else:
228                    sleep(2)

Upload a set of files to the Data Portal, creating a new dataset.

If the files parameter is not provided, it will upload all files in the upload folder

Arguments:
  • name (str): Name of newly created dataset
  • description (str): Description of newly created dataset
  • process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object
  • upload_folder (str): Folder containing files to upload
  • files (List[str]): Optional subset of files to upload from the folder
  • tags (List[str]): Optional list of tags to apply to the dataset
def samples( self, max_items: int = 10000) -> List[cirro_api_client.v1.models.Sample]:
230    def samples(self, max_items: int = 10000) -> List[Sample]:
231        """
232        Retrieves a list of samples associated with a project along with their metadata
233
234        Args:
235            max_items (int): Maximum number of records to get (default 10,000)
236        """
237        return self._client.metadata.get_project_samples(self.id, max_items)

Retrieves a list of samples associated with a project along with their metadata

Arguments:
  • max_items (int): Maximum number of records to get (default 10,000)
class DataPortalProcess(cirro.sdk.asset.DataPortalAsset):
11class DataPortalProcess(DataPortalAsset):
12    """Helper functions for interacting with analysis processes."""
13
14    def __init__(self, process: Union[Process, ProcessDetail], client: CirroApi):
15        """
16        Instantiate with helper method
17
18        ```python
19        from cirro import DataPortal()
20        portal = DataPortal()
21        process = portal.get_process_by_name("Process Name")
22        ```
23        """
24        self._data = process
25        self._client = client
26
27    @property
28    def id(self) -> str:
29        """Unique identifier"""
30        return self._data.id
31
32    @property
33    def name(self) -> str:
34        """Readable name"""
35        return self._data.name
36
37    @property
38    def description(self) -> str:
39        """Longer description of process"""
40        return self._data.description
41
42    @property
43    def child_process_ids(self) -> List[str]:
44        """List of processes which can be run on the output of this process"""
45        return self._data.child_process_ids
46
47    @property
48    def executor(self) -> Executor:
49        """INGEST, CROMWELL, or NEXTFLOW"""
50        return self._data.executor
51
52    @property
53    def category(self) -> str:
54        """Category of process"""
55        return self._data.category
56
57    @property
58    def pipeline_type(self) -> str:
59        """Pipeline type"""
60        return self._data.pipeline_type
61
62    @property
63    def documentation_url(self) -> str:
64        """Documentation URL"""
65        return self._data.documentation_url
66
67    @property
68    def file_requirements_message(self) -> str:
69        """Description of files required for INGEST processes"""
70        return self._data.file_requirements_message
71
72    @property
73    def code(self) -> PipelineCode:
74        """Pipeline code configuration"""
75        return self._get_detail().pipeline_code
76
77    @property
78    def custom_settings(self) -> CustomPipelineSettings:
79        """Custom settings for the process"""
80        return self._get_detail().custom_settings
81
82    def _get_detail(self) -> ProcessDetail:
83        if not isinstance(self._data, ProcessDetail):
84            self._data = self._client.processes.get(self.id)
85        return self._data
86
87    def __str__(self):
88        return '\n'.join([
89            f"{i.title()}: {self.__getattribute__(i)}"
90            for i in ['name', 'id', 'description']
91        ])
92
93    def get_parameter_spec(self) -> ParameterSpecification:
94        """
95        Gets a specification used to describe the parameters used in the process.
96        """
97        return self._client.processes.get_parameter_spec(self.id)

Helper functions for interacting with analysis processes.

DataPortalProcess( process: Union[cirro_api_client.v1.models.Process, cirro_api_client.v1.models.ProcessDetail], client: CirroApi)
14    def __init__(self, process: Union[Process, ProcessDetail], client: CirroApi):
15        """
16        Instantiate with helper method
17
18        ```python
19        from cirro import DataPortal()
20        portal = DataPortal()
21        process = portal.get_process_by_name("Process Name")
22        ```
23        """
24        self._data = process
25        self._client = client

Instantiate with helper method

from cirro import DataPortal()
portal = DataPortal()
process = portal.get_process_by_name("Process Name")
id: str
27    @property
28    def id(self) -> str:
29        """Unique identifier"""
30        return self._data.id

Unique identifier

name: str
32    @property
33    def name(self) -> str:
34        """Readable name"""
35        return self._data.name

Readable name

description: str
37    @property
38    def description(self) -> str:
39        """Longer description of process"""
40        return self._data.description

Longer description of process

child_process_ids: List[str]
42    @property
43    def child_process_ids(self) -> List[str]:
44        """List of processes which can be run on the output of this process"""
45        return self._data.child_process_ids

List of processes which can be run on the output of this process

executor: cirro_api_client.v1.models.Executor
47    @property
48    def executor(self) -> Executor:
49        """INGEST, CROMWELL, or NEXTFLOW"""
50        return self._data.executor

INGEST, CROMWELL, or NEXTFLOW

category: str
52    @property
53    def category(self) -> str:
54        """Category of process"""
55        return self._data.category

Category of process

pipeline_type: str
57    @property
58    def pipeline_type(self) -> str:
59        """Pipeline type"""
60        return self._data.pipeline_type

Pipeline type

documentation_url: str
62    @property
63    def documentation_url(self) -> str:
64        """Documentation URL"""
65        return self._data.documentation_url

Documentation URL

file_requirements_message: str
67    @property
68    def file_requirements_message(self) -> str:
69        """Description of files required for INGEST processes"""
70        return self._data.file_requirements_message

Description of files required for INGEST processes

72    @property
73    def code(self) -> PipelineCode:
74        """Pipeline code configuration"""
75        return self._get_detail().pipeline_code

Pipeline code configuration

77    @property
78    def custom_settings(self) -> CustomPipelineSettings:
79        """Custom settings for the process"""
80        return self._get_detail().custom_settings

Custom settings for the process

def get_parameter_spec(self) -> cirro.models.form_specification.ParameterSpecification:
93    def get_parameter_spec(self) -> ParameterSpecification:
94        """
95        Gets a specification used to describe the parameters used in the process.
96        """
97        return self._client.processes.get_parameter_spec(self.id)

Gets a specification used to describe the parameters used in the process.

class DataPortalDataset(cirro.sdk.asset.DataPortalAsset):
 18class DataPortalDataset(DataPortalAsset):
 19    """
 20    Datasets in the Data Portal are collections of files which have
 21    either been uploaded directly, or which have been output by
 22    an analysis pipeline or notebook.
 23    """
 24
 25    def __init__(self, dataset: Union[Dataset, DatasetDetail], client: CirroApi):
 26        """
 27        Instantiate a dataset object
 28
 29        Should be invoked from a top-level constructor, for example:
 30
 31        ```python
 32        from cirro import DataPortal()
 33        portal = DataPortal()
 34        dataset = portal.get_dataset(
 35            project="id-or-name-of-project",
 36            dataset="id-or-name-of-dataset"
 37        )
 38        ```
 39
 40        """
 41        assert dataset.project_id is not None, "Must provide dataset with project_id attribute"
 42        self._data = dataset
 43        self._assets: Optional[DatasetAssets] = None
 44        self._client = client
 45
 46    @property
 47    def id(self) -> str:
 48        """Unique identifier for the dataset"""
 49        return self._data.id
 50
 51    @property
 52    def name(self) -> str:
 53        """Editable name for the dataset"""
 54        return self._data.name
 55
 56    @property
 57    def description(self) -> str:
 58        """Longer name for the dataset"""
 59        return self._data.description
 60
 61    @property
 62    def process_id(self) -> str:
 63        """Unique ID of process used to create the dataset"""
 64        return self._data.process_id
 65
 66    @property
 67    def process(self) -> ProcessDetail:
 68        """
 69        Object representing the process used to create the dataset
 70        """
 71        return self._client.processes.get(self.process_id)
 72
 73    @property
 74    def project_id(self) -> str:
 75        """ID of the project containing the dataset"""
 76        return self._data.project_id
 77
 78    @property
 79    def status(self) -> Status:
 80        """
 81        Status of the dataset
 82        """
 83        return self._data.status
 84
 85    @property
 86    def source_dataset_ids(self) -> List[str]:
 87        """IDs of the datasets used as sources for this dataset (if any)"""
 88        return self._data.source_dataset_ids
 89
 90    @property
 91    def source_datasets(self) -> List['DataPortalDataset']:
 92        """
 93        Objects representing the datasets used as sources for this dataset (if any)
 94        """
 95        return [
 96            DataPortalDataset(
 97                dataset=self._client.datasets.get(project_id=self.project_id, dataset_id=dataset_id),
 98                client=self._client
 99            )
100            for dataset_id in self.source_dataset_ids
101        ]
102
103    @property
104    def params(self) -> dict:
105        """
106        Parameters used to generate the dataset
107        """
108        return self._get_detail().params.to_dict()
109
110    @property
111    def info(self) -> dict:
112        """
113        Extra information about the dataset
114        """
115        return self._get_detail().info.to_dict()
116
117    @property
118    def tags(self) -> List[Tag]:
119        """
120        Tags applied to the dataset
121        """
122        return self._data.tags
123
124    @property
125    def share(self) -> Optional[NamedItem]:
126        """
127        Share associated with the dataset, if any.
128        """
129        return self._get_detail().share
130
131    @property
132    def created_by(self) -> str:
133        """User who created the dataset"""
134        return self._data.created_by
135
136    @property
137    def created_at(self) -> datetime.datetime:
138        """Timestamp of dataset creation"""
139        return self._data.created_at
140
141    def _get_detail(self):
142        if not isinstance(self._data, DatasetDetail):
143            self._data = self._client.datasets.get(project_id=self.project_id, dataset_id=self.id)
144        return self._data
145
146    def _get_assets(self):
147        if not self._assets:
148            self._assets = self._client.datasets.get_assets_listing(
149                project_id=self.project_id,
150                dataset_id=self.id
151            )
152        return self._assets
153
154    def __str__(self):
155        return '\n'.join([
156            f"{i.title()}: {self.__getattribute__(i)}"
157            for i in ['name', 'id', 'description', 'status']
158        ])
159
160    def get_file(self, relative_path: str) -> DataPortalFile:
161        """
162        Get a file from the dataset using its relative path.
163
164        Args:
165            relative_path (str): Relative path of file within the dataset
166
167        Returns:
168            `from cirro.sdk.file import DataPortalFile`
169        """
170
171        # Get the list of files in this dataset
172        files = self.list_files()
173
174        # Try getting the file using the relative path provided by the user
175        try:
176            return files.get_by_id(relative_path)
177        except DataPortalAssetNotFound:
178            # Try getting the file with the 'data/' prefix prepended
179            try:
180                return files.get_by_id("data/" + relative_path)
181            except DataPortalAssetNotFound:
182                # If not found, raise the exception using the string provided
183                # by the user, not the data/ prepended version (which may be
184                # confusing to the user)
185                msg = '\n'.join([f"No file found with path '{relative_path}'."])
186                raise DataPortalAssetNotFound(msg)
187
188    def list_files(self) -> DataPortalFiles:
189        """
190        Return the list of files which make up the dataset.
191        """
192        files = self._get_assets().files
193        return DataPortalFiles(
194            [
195                DataPortalFile(file=file, client=self._client)
196                for file in files
197            ]
198        )
199
200    def get_artifact(self, artifact_type: ArtifactType) -> DataPortalFile:
201        """
202        Get the artifact of a particular type from the dataset
203        """
204        artifacts = self._get_assets().artifacts
205        artifact = next((a for a in artifacts if a.artifact_type == artifact_type), None)
206        if artifact is None:
207            raise DataPortalAssetNotFound(f"No artifact found with type '{artifact_type}'")
208        return DataPortalFile(file=artifact.file, client=self._client)
209
210    def list_artifacts(self) -> List[DataPortalFile]:
211        """
212        Return the list of artifacts associated with the dataset
213
214        An artifact may be something generated as part of the analysis or other process.
215        See `cirro_api_client.v1.models.ArtifactType` for the list of possible artifact types.
216
217        """
218        artifacts = self._get_assets().artifacts
219        return DataPortalFiles(
220            [
221                DataPortalFile(file=artifact.file, client=self._client)
222                for artifact in artifacts
223            ]
224        )
225
226    def download_files(self, download_location: str = None) -> None:
227        """
228        Download all the files from the dataset to a local directory.
229
230        Args:
231            download_location (str): Path to local directory
232        """
233
234        # Alias for internal method
235        self.list_files().download(download_location)
236
237    def run_analysis(
238            self,
239            name: str = None,
240            description: str = "",
241            process: Union[DataPortalProcess, str] = None,
242            params=None,
243            notifications_emails: List[str] = None,
244            compute_environment: str = None,
245            resume_dataset_id: str = None
246    ) -> str:
247        """
248        Runs an analysis on a dataset, returns the ID of the newly created dataset.
249
250        The process can be provided as either a DataPortalProcess object,
251        or a string which corresponds to the name or ID of the process.
252
253        Args:
254            name (str): Name of newly created dataset
255            description (str): Description of newly created dataset
256            process (DataPortalProcess or str): Process to run
257            params (dict): Analysis parameters
258            notifications_emails (List[str]): Notification email address(es)
259            compute_environment (str): Name or ID of compute environment to use,
260             if blank it will run in AWS
261            resume_dataset_id (str): ID of dataset to resume from, used for caching task execution.
262             It will attempt to re-use the previous output to minimize duplicate work
263
264        Returns:
265            dataset_id (str): ID of newly created dataset
266        """
267        if name is None:
268            raise DataPortalInputError("Must specify 'name' for run_analysis")
269        if process is None:
270            raise DataPortalInputError("Must specify 'process' for run_analysis")
271        if notifications_emails is None:
272            notifications_emails = []
273        if params is None:
274            params = {}
275
276        # If the process is a string, try to parse it as a process name or ID
277        process = parse_process_name_or_id(process, self._client)
278
279        if compute_environment:
280            compute_environments = self._client.compute_environments.list_environments_for_project(
281                project_id=self.project_id
282            )
283            compute_environment = next(
284                (env for env in compute_environments
285                 if env.name == compute_environment or env.id == compute_environment),
286                None
287            )
288            if compute_environment is None:
289                raise DataPortalInputError(f"Compute environment '{compute_environment}' not found")
290
291        resp = self._client.execution.run_analysis(
292            project_id=self.project_id,
293            request=RunAnalysisRequest(
294                name=name,
295                description=description,
296                process_id=process.id,
297                source_dataset_ids=[self.id],
298                params=RunAnalysisRequestParams.from_dict(params),
299                notification_emails=notifications_emails,
300                resume_dataset_id=resume_dataset_id,
301                compute_environment_id=compute_environment.id if compute_environment else None
302            )
303        )
304        return resp.id

Datasets in the Data Portal are collections of files which have either been uploaded directly, or which have been output by an analysis pipeline or notebook.

DataPortalDataset( dataset: Union[cirro_api_client.v1.models.Dataset, cirro_api_client.v1.models.DatasetDetail], client: CirroApi)
25    def __init__(self, dataset: Union[Dataset, DatasetDetail], client: CirroApi):
26        """
27        Instantiate a dataset object
28
29        Should be invoked from a top-level constructor, for example:
30
31        ```python
32        from cirro import DataPortal()
33        portal = DataPortal()
34        dataset = portal.get_dataset(
35            project="id-or-name-of-project",
36            dataset="id-or-name-of-dataset"
37        )
38        ```
39
40        """
41        assert dataset.project_id is not None, "Must provide dataset with project_id attribute"
42        self._data = dataset
43        self._assets: Optional[DatasetAssets] = None
44        self._client = client

Instantiate a dataset object

Should be invoked from a top-level constructor, for example:

from cirro import DataPortal()
portal = DataPortal()
dataset = portal.get_dataset(
    project="id-or-name-of-project",
    dataset="id-or-name-of-dataset"
)
id: str
46    @property
47    def id(self) -> str:
48        """Unique identifier for the dataset"""
49        return self._data.id

Unique identifier for the dataset

name: str
51    @property
52    def name(self) -> str:
53        """Editable name for the dataset"""
54        return self._data.name

Editable name for the dataset

description: str
56    @property
57    def description(self) -> str:
58        """Longer name for the dataset"""
59        return self._data.description

Longer name for the dataset

process_id: str
61    @property
62    def process_id(self) -> str:
63        """Unique ID of process used to create the dataset"""
64        return self._data.process_id

Unique ID of process used to create the dataset

66    @property
67    def process(self) -> ProcessDetail:
68        """
69        Object representing the process used to create the dataset
70        """
71        return self._client.processes.get(self.process_id)

Object representing the process used to create the dataset

project_id: str
73    @property
74    def project_id(self) -> str:
75        """ID of the project containing the dataset"""
76        return self._data.project_id

ID of the project containing the dataset

status: cirro_api_client.v1.models.Status
78    @property
79    def status(self) -> Status:
80        """
81        Status of the dataset
82        """
83        return self._data.status

Status of the dataset

source_dataset_ids: List[str]
85    @property
86    def source_dataset_ids(self) -> List[str]:
87        """IDs of the datasets used as sources for this dataset (if any)"""
88        return self._data.source_dataset_ids

IDs of the datasets used as sources for this dataset (if any)

source_datasets: List[DataPortalDataset]
 90    @property
 91    def source_datasets(self) -> List['DataPortalDataset']:
 92        """
 93        Objects representing the datasets used as sources for this dataset (if any)
 94        """
 95        return [
 96            DataPortalDataset(
 97                dataset=self._client.datasets.get(project_id=self.project_id, dataset_id=dataset_id),
 98                client=self._client
 99            )
100            for dataset_id in self.source_dataset_ids
101        ]

Objects representing the datasets used as sources for this dataset (if any)

params: dict
103    @property
104    def params(self) -> dict:
105        """
106        Parameters used to generate the dataset
107        """
108        return self._get_detail().params.to_dict()

Parameters used to generate the dataset

info: dict
110    @property
111    def info(self) -> dict:
112        """
113        Extra information about the dataset
114        """
115        return self._get_detail().info.to_dict()

Extra information about the dataset

tags: List[cirro_api_client.v1.models.Tag]
117    @property
118    def tags(self) -> List[Tag]:
119        """
120        Tags applied to the dataset
121        """
122        return self._data.tags

Tags applied to the dataset

share: Optional[cirro_api_client.v1.models.NamedItem]
124    @property
125    def share(self) -> Optional[NamedItem]:
126        """
127        Share associated with the dataset, if any.
128        """
129        return self._get_detail().share

Share associated with the dataset, if any.

created_by: str
131    @property
132    def created_by(self) -> str:
133        """User who created the dataset"""
134        return self._data.created_by

User who created the dataset

created_at: datetime.datetime
136    @property
137    def created_at(self) -> datetime.datetime:
138        """Timestamp of dataset creation"""
139        return self._data.created_at

Timestamp of dataset creation

def get_file(self, relative_path: str) -> cirro.sdk.file.DataPortalFile:
160    def get_file(self, relative_path: str) -> DataPortalFile:
161        """
162        Get a file from the dataset using its relative path.
163
164        Args:
165            relative_path (str): Relative path of file within the dataset
166
167        Returns:
168            `from cirro.sdk.file import DataPortalFile`
169        """
170
171        # Get the list of files in this dataset
172        files = self.list_files()
173
174        # Try getting the file using the relative path provided by the user
175        try:
176            return files.get_by_id(relative_path)
177        except DataPortalAssetNotFound:
178            # Try getting the file with the 'data/' prefix prepended
179            try:
180                return files.get_by_id("data/" + relative_path)
181            except DataPortalAssetNotFound:
182                # If not found, raise the exception using the string provided
183                # by the user, not the data/ prepended version (which may be
184                # confusing to the user)
185                msg = '\n'.join([f"No file found with path '{relative_path}'."])
186                raise DataPortalAssetNotFound(msg)

Get a file from the dataset using its relative path.

Arguments:
  • relative_path (str): Relative path of file within the dataset
Returns:

from cirro.sdk.file import DataPortalFile

def list_files(self) -> cirro.sdk.file.DataPortalFiles:
188    def list_files(self) -> DataPortalFiles:
189        """
190        Return the list of files which make up the dataset.
191        """
192        files = self._get_assets().files
193        return DataPortalFiles(
194            [
195                DataPortalFile(file=file, client=self._client)
196                for file in files
197            ]
198        )

Return the list of files which make up the dataset.

def get_artifact( self, artifact_type: cirro_api_client.v1.models.ArtifactType) -> cirro.sdk.file.DataPortalFile:
200    def get_artifact(self, artifact_type: ArtifactType) -> DataPortalFile:
201        """
202        Get the artifact of a particular type from the dataset
203        """
204        artifacts = self._get_assets().artifacts
205        artifact = next((a for a in artifacts if a.artifact_type == artifact_type), None)
206        if artifact is None:
207            raise DataPortalAssetNotFound(f"No artifact found with type '{artifact_type}'")
208        return DataPortalFile(file=artifact.file, client=self._client)

Get the artifact of a particular type from the dataset

def list_artifacts(self) -> List[cirro.sdk.file.DataPortalFile]:
210    def list_artifacts(self) -> List[DataPortalFile]:
211        """
212        Return the list of artifacts associated with the dataset
213
214        An artifact may be something generated as part of the analysis or other process.
215        See `cirro_api_client.v1.models.ArtifactType` for the list of possible artifact types.
216
217        """
218        artifacts = self._get_assets().artifacts
219        return DataPortalFiles(
220            [
221                DataPortalFile(file=artifact.file, client=self._client)
222                for artifact in artifacts
223            ]
224        )

Return the list of artifacts associated with the dataset

An artifact may be something generated as part of the analysis or other process. See cirro_api_client.v1.models.ArtifactType for the list of possible artifact types.

def download_files(self, download_location: str = None) -> None:
226    def download_files(self, download_location: str = None) -> None:
227        """
228        Download all the files from the dataset to a local directory.
229
230        Args:
231            download_location (str): Path to local directory
232        """
233
234        # Alias for internal method
235        self.list_files().download(download_location)

Download all the files from the dataset to a local directory.

Arguments:
  • download_location (str): Path to local directory
def run_analysis( self, name: str = None, description: str = '', process: Union[DataPortalProcess, str] = None, params=None, notifications_emails: List[str] = None, compute_environment: str = None, resume_dataset_id: str = None) -> str:
237    def run_analysis(
238            self,
239            name: str = None,
240            description: str = "",
241            process: Union[DataPortalProcess, str] = None,
242            params=None,
243            notifications_emails: List[str] = None,
244            compute_environment: str = None,
245            resume_dataset_id: str = None
246    ) -> str:
247        """
248        Runs an analysis on a dataset, returns the ID of the newly created dataset.
249
250        The process can be provided as either a DataPortalProcess object,
251        or a string which corresponds to the name or ID of the process.
252
253        Args:
254            name (str): Name of newly created dataset
255            description (str): Description of newly created dataset
256            process (DataPortalProcess or str): Process to run
257            params (dict): Analysis parameters
258            notifications_emails (List[str]): Notification email address(es)
259            compute_environment (str): Name or ID of compute environment to use,
260             if blank it will run in AWS
261            resume_dataset_id (str): ID of dataset to resume from, used for caching task execution.
262             It will attempt to re-use the previous output to minimize duplicate work
263
264        Returns:
265            dataset_id (str): ID of newly created dataset
266        """
267        if name is None:
268            raise DataPortalInputError("Must specify 'name' for run_analysis")
269        if process is None:
270            raise DataPortalInputError("Must specify 'process' for run_analysis")
271        if notifications_emails is None:
272            notifications_emails = []
273        if params is None:
274            params = {}
275
276        # If the process is a string, try to parse it as a process name or ID
277        process = parse_process_name_or_id(process, self._client)
278
279        if compute_environment:
280            compute_environments = self._client.compute_environments.list_environments_for_project(
281                project_id=self.project_id
282            )
283            compute_environment = next(
284                (env for env in compute_environments
285                 if env.name == compute_environment or env.id == compute_environment),
286                None
287            )
288            if compute_environment is None:
289                raise DataPortalInputError(f"Compute environment '{compute_environment}' not found")
290
291        resp = self._client.execution.run_analysis(
292            project_id=self.project_id,
293            request=RunAnalysisRequest(
294                name=name,
295                description=description,
296                process_id=process.id,
297                source_dataset_ids=[self.id],
298                params=RunAnalysisRequestParams.from_dict(params),
299                notification_emails=notifications_emails,
300                resume_dataset_id=resume_dataset_id,
301                compute_environment_id=compute_environment.id if compute_environment else None
302            )
303        )
304        return resp.id

Runs an analysis on a dataset, returns the ID of the newly created dataset.

The process can be provided as either a DataPortalProcess object, or a string which corresponds to the name or ID of the process.

Arguments:
  • name (str): Name of newly created dataset
  • description (str): Description of newly created dataset
  • process (DataPortalProcess or str): Process to run
  • params (dict): Analysis parameters
  • notifications_emails (List[str]): Notification email address(es)
  • compute_environment (str): Name or ID of compute environment to use, if blank it will run in AWS
  • resume_dataset_id (str): ID of dataset to resume from, used for caching task execution. It will attempt to re-use the previous output to minimize duplicate work
Returns:

dataset_id (str): ID of newly created dataset

class DataPortalReference(cirro.sdk.asset.DataPortalAsset):
12class DataPortalReference(DataPortalAsset):
13    """
14    Reference data object containing files which can be used for analysis in a particular project.
15    """
16    def __init__(self, ref: Reference, project_id: str, client: CirroApi):
17        """
18        Instantiate by listing the references which have been added to a particular project
19        ```python
20        from cirro import DataPortal()
21        portal = DataPortal()
22        project = portal.get_project_by_name("Project Name")
23        references = project.list_references()
24        ```
25        """
26        self._data = ref
27        self._files = [
28            DataPortalFile(File.from_file_entry(f, project_id), client) for f in ref.files
29        ]
30
31    @property
32    def files(self) -> List[DataPortalFile]:
33        """File(s) contained in the reference"""
34        return self._files
35
36    @property
37    def name(self) -> str:
38        """Reference name"""
39        return self._data.name
40
41    @property
42    def type(self) -> str:
43        """Type of reference data (e.g. genome_fasta)"""
44        return self._data.type
45
46    @property
47    def absolute_path(self):
48        if len(self._files) == 0:
49            return None
50        return self._files[0].absolute_path
51
52    def __str__(self):
53        return self.name

Reference data object containing files which can be used for analysis in a particular project.

DataPortalReference( ref: cirro_api_client.v1.models.Reference, project_id: str, client: CirroApi)
16    def __init__(self, ref: Reference, project_id: str, client: CirroApi):
17        """
18        Instantiate by listing the references which have been added to a particular project
19        ```python
20        from cirro import DataPortal()
21        portal = DataPortal()
22        project = portal.get_project_by_name("Project Name")
23        references = project.list_references()
24        ```
25        """
26        self._data = ref
27        self._files = [
28            DataPortalFile(File.from_file_entry(f, project_id), client) for f in ref.files
29        ]

Instantiate by listing the references which have been added to a particular project

from cirro import DataPortal()
portal = DataPortal()
project = portal.get_project_by_name("Project Name")
references = project.list_references()
files: List[cirro.sdk.file.DataPortalFile]
31    @property
32    def files(self) -> List[DataPortalFile]:
33        """File(s) contained in the reference"""
34        return self._files

File(s) contained in the reference

name: str
36    @property
37    def name(self) -> str:
38        """Reference name"""
39        return self._data.name

Reference name

type: str
41    @property
42    def type(self) -> str:
43        """Type of reference data (e.g. genome_fasta)"""
44        return self._data.type

Type of reference data (e.g. genome_fasta)

absolute_path
46    @property
47    def absolute_path(self):
48        if len(self._files) == 0:
49            return None
50        return self._files[0].absolute_path
class CirroApi:
 12class CirroApi:
 13    """
 14    Client for interacting directly with the Cirro API
 15    """
 16    def __init__(self, auth_info: AuthInfo = None, base_url: str = None):
 17        """
 18        Instantiates the Cirro API object
 19
 20        Args:
 21            auth_info (cirro.auth.base.AuthInfo):
 22            base_url (str): Optional base URL of the Cirro instance
 23             (if not provided, it uses the `CIRRO_BASE_URL` environment variable, or the config file)
 24
 25        Returns:
 26            Authenticated Cirro API object, which can be used to call endpoint functions.
 27
 28        Example:
 29        ```python
 30        from cirro.cirro_client import CirroApi
 31
 32        cirro = CirroApi(base_url="app.cirro.bio")
 33        print(cirro.projects.list())
 34        ```
 35        """
 36
 37        self._configuration = AppConfig(base_url=base_url)
 38        if not auth_info:
 39            auth_info = get_auth_info_from_config(self._configuration, auth_io=None)
 40
 41        self._api_client = CirroApiClient(
 42            base_url=self._configuration.rest_endpoint,
 43            auth_method=auth_info.get_auth_method(),
 44            client_name='Cirro SDK',
 45            package_name='cirro'
 46        )
 47
 48        # Init services
 49        self._file_service = FileService(self._api_client,
 50                                         checksum_method=self._configuration.checksum_method,
 51                                         transfer_retries=self._configuration.transfer_max_retries)
 52        self._dataset_service = DatasetService(self._api_client, file_service=self._file_service)
 53        self._project_service = ProjectService(self._api_client)
 54        self._process_service = ProcessService(self._api_client)
 55        self._execution_service = ExecutionService(self._api_client)
 56        self._compute_environment_service = ComputeEnvironmentService(self._api_client)
 57        self._metrics_service = MetricsService(self._api_client)
 58        self._metadata_service = MetadataService(self._api_client)
 59        self._billing_service = BillingService(self._api_client)
 60        self._references_service = ReferenceService(self._api_client, file_service=self._file_service)
 61        self._shares_service = ShareService(self._api_client)
 62        self._users_service = UserService(self._api_client)
 63
 64    @property
 65    def datasets(self) -> DatasetService:
 66        """
 67        Create, list, delete, and modify Datasets
 68        """
 69        return self._dataset_service
 70
 71    @property
 72    def projects(self) -> ProjectService:
 73        """
 74        Create, list, delete, and modify Projects
 75        """
 76        return self._project_service
 77
 78    @property
 79    def processes(self) -> ProcessService:
 80        """
 81        List and retrieve detailed information about Processes
 82        """
 83        return self._process_service
 84
 85    @property
 86    def execution(self) -> ExecutionService:
 87        """
 88        List, run, stop, and describe the analysis jobs (executing Processes to create new Datasets)
 89        """
 90        return self._execution_service
 91
 92    @property
 93    def compute_environments(self) -> ComputeEnvironmentService:
 94        """
 95        List and update compute environments
 96        """
 97        return self._compute_environment_service
 98
 99    @property
100    def metrics(self) -> MetricsService:
101        """
102        Project-level summary metrics
103        """
104        return self._metrics_service
105
106    @property
107    def metadata(self) -> MetadataService:
108        """
109        List and modify Sample metadata or metadata schemas
110        """
111        return self._metadata_service
112
113    @property
114    def billing(self) -> BillingService:
115        """
116        List and update billing accounts
117        """
118        return self._billing_service
119
120    @property
121    def references(self) -> ReferenceService:
122        """
123        List References and Reference types
124        """
125        return self._references_service
126
127    @property
128    def shares(self) -> ShareService:
129        """
130        List, create, update, delete, and subscribe to shares
131        """
132        return self._shares_service
133
134    @property
135    def users(self) -> UserService:
136        """
137        List and update user information
138        """
139        return self._users_service
140
141    @property
142    def file(self) -> FileService:
143        """
144        Read, download, and create file objects
145        """
146        return self._file_service
147
148    @property
149    def api_client(self) -> CirroApiClient:
150        """
151        Gets the underlying API client
152        """
153        return self._api_client
154
155    @property
156    def configuration(self) -> AppConfig:
157        """
158        Gets the configuration of the instance
159        """
160        return self._configuration

Client for interacting directly with the Cirro API

CirroApi(auth_info: cirro.auth.base.AuthInfo = None, base_url: str = None)
16    def __init__(self, auth_info: AuthInfo = None, base_url: str = None):
17        """
18        Instantiates the Cirro API object
19
20        Args:
21            auth_info (cirro.auth.base.AuthInfo):
22            base_url (str): Optional base URL of the Cirro instance
23             (if not provided, it uses the `CIRRO_BASE_URL` environment variable, or the config file)
24
25        Returns:
26            Authenticated Cirro API object, which can be used to call endpoint functions.
27
28        Example:
29        ```python
30        from cirro.cirro_client import CirroApi
31
32        cirro = CirroApi(base_url="app.cirro.bio")
33        print(cirro.projects.list())
34        ```
35        """
36
37        self._configuration = AppConfig(base_url=base_url)
38        if not auth_info:
39            auth_info = get_auth_info_from_config(self._configuration, auth_io=None)
40
41        self._api_client = CirroApiClient(
42            base_url=self._configuration.rest_endpoint,
43            auth_method=auth_info.get_auth_method(),
44            client_name='Cirro SDK',
45            package_name='cirro'
46        )
47
48        # Init services
49        self._file_service = FileService(self._api_client,
50                                         checksum_method=self._configuration.checksum_method,
51                                         transfer_retries=self._configuration.transfer_max_retries)
52        self._dataset_service = DatasetService(self._api_client, file_service=self._file_service)
53        self._project_service = ProjectService(self._api_client)
54        self._process_service = ProcessService(self._api_client)
55        self._execution_service = ExecutionService(self._api_client)
56        self._compute_environment_service = ComputeEnvironmentService(self._api_client)
57        self._metrics_service = MetricsService(self._api_client)
58        self._metadata_service = MetadataService(self._api_client)
59        self._billing_service = BillingService(self._api_client)
60        self._references_service = ReferenceService(self._api_client, file_service=self._file_service)
61        self._shares_service = ShareService(self._api_client)
62        self._users_service = UserService(self._api_client)

Instantiates the Cirro API object

Arguments:
  • auth_info (cirro.auth.base.AuthInfo):
  • base_url (str): Optional base URL of the Cirro instance (if not provided, it uses the CIRRO_BASE_URL environment variable, or the config file)
Returns:

Authenticated Cirro API object, which can be used to call endpoint functions.

Example:

from cirro.cirro_client import CirroApi

cirro = CirroApi(base_url="app.cirro.bio")
print(cirro.projects.list())
datasets: cirro.services.DatasetService
64    @property
65    def datasets(self) -> DatasetService:
66        """
67        Create, list, delete, and modify Datasets
68        """
69        return self._dataset_service

Create, list, delete, and modify Datasets

projects: cirro.services.ProjectService
71    @property
72    def projects(self) -> ProjectService:
73        """
74        Create, list, delete, and modify Projects
75        """
76        return self._project_service

Create, list, delete, and modify Projects

processes: cirro.services.ProcessService
78    @property
79    def processes(self) -> ProcessService:
80        """
81        List and retrieve detailed information about Processes
82        """
83        return self._process_service

List and retrieve detailed information about Processes

execution: cirro.services.ExecutionService
85    @property
86    def execution(self) -> ExecutionService:
87        """
88        List, run, stop, and describe the analysis jobs (executing Processes to create new Datasets)
89        """
90        return self._execution_service

List, run, stop, and describe the analysis jobs (executing Processes to create new Datasets)

compute_environments: cirro.services.ComputeEnvironmentService
92    @property
93    def compute_environments(self) -> ComputeEnvironmentService:
94        """
95        List and update compute environments
96        """
97        return self._compute_environment_service

List and update compute environments

metrics: cirro.services.MetricsService
 99    @property
100    def metrics(self) -> MetricsService:
101        """
102        Project-level summary metrics
103        """
104        return self._metrics_service

Project-level summary metrics

metadata: cirro.services.MetadataService
106    @property
107    def metadata(self) -> MetadataService:
108        """
109        List and modify Sample metadata or metadata schemas
110        """
111        return self._metadata_service

List and modify Sample metadata or metadata schemas

billing: cirro.services.BillingService
113    @property
114    def billing(self) -> BillingService:
115        """
116        List and update billing accounts
117        """
118        return self._billing_service

List and update billing accounts

references: cirro.services.ReferenceService
120    @property
121    def references(self) -> ReferenceService:
122        """
123        List References and Reference types
124        """
125        return self._references_service

List References and Reference types

shares: cirro.services.ShareService
127    @property
128    def shares(self) -> ShareService:
129        """
130        List, create, update, delete, and subscribe to shares
131        """
132        return self._shares_service

List, create, update, delete, and subscribe to shares

users: cirro.services.UserService
134    @property
135    def users(self) -> UserService:
136        """
137        List and update user information
138        """
139        return self._users_service

List and update user information

file: cirro.services.FileService
141    @property
142    def file(self) -> FileService:
143        """
144        Read, download, and create file objects
145        """
146        return self._file_service

Read, download, and create file objects

api_client: cirro_api_client.CirroApiClient
148    @property
149    def api_client(self) -> CirroApiClient:
150        """
151        Gets the underlying API client
152        """
153        return self._api_client

Gets the underlying API client

configuration: cirro.config.AppConfig
155    @property
156    def configuration(self) -> AppConfig:
157        """
158        Gets the configuration of the instance
159        """
160        return self._configuration

Gets the configuration of the instance