cirro

 1import cirro.file_utils  # noqa
 2from cirro.cirro_client import CirroApi
 3from cirro.sdk.dataset import DataPortalDataset
 4from cirro.sdk.login import DataPortalLogin
 5from cirro.sdk.portal import DataPortal
 6from cirro.sdk.process import DataPortalProcess
 7from cirro.sdk.project import DataPortalProject
 8from cirro.sdk.reference import DataPortalReference
 9
10__all__ = [
11    'DataPortal',
12    'DataPortalLogin',
13    'DataPortalProject',
14    'DataPortalProcess',
15    'DataPortalDataset',
16    'DataPortalReference',
17    'CirroApi',
18    'file_utils'
19]
class DataPortal:
 12class DataPortal:
 13    """
 14    Helper functions for exploring the Projects, Datasets, Samples, and Files
 15    available in the Data Portal.
 16    """
 17
 18    def __init__(self, base_url: str = None, client: CirroApi = None):
 19        """
 20        Set up the DataPortal object, establishing an authenticated connection.
 21
 22        Args:
 23            base_url (str): Optional base URL of the Cirro instance
 24             (if not provided, it uses the `CIRRO_BASE_URL` environment variable, or the config file)
 25            client (`cirro.cirro_client.CirroApi`): Optional pre-configured client
 26
 27        Example:
 28        ```python
 29        from cirro import DataPortal
 30
 31        Portal = DataPortal(base_url="app.cirro.bio")
 32        portal.list_projects()
 33        ```
 34        """
 35
 36        if client is not None:
 37            self._client = client
 38
 39        # Set up default client if not provided
 40        else:
 41            self._client = CirroApi(base_url=base_url)
 42
 43    def list_projects(self) -> DataPortalProjects:
 44        """List all the projects available in the Data Portal."""
 45
 46        return DataPortalProjects(
 47            [
 48                DataPortalProject(proj, self._client)
 49                for proj in self._client.projects.list()
 50            ]
 51        )
 52
 53    def get_project_by_name(self, name: str = None) -> DataPortalProject:
 54        """Return the project with the specified name."""
 55
 56        return self.list_projects().get_by_name(name)
 57
 58    def get_project_by_id(self, _id: str = None) -> DataPortalProject:
 59        """Return the project with the specified id."""
 60
 61        return self.list_projects().get_by_id(_id)
 62
 63    def get_project(self, project: str = None) -> DataPortalProject:
 64        """
 65        Return a project identified by ID or name.
 66
 67        Args:
 68            project (str): ID or name of project
 69
 70        Returns:
 71            `from cirro.sdk.project import DataPortalProject`
 72        """
 73        try:
 74            return self.get_project_by_id(project)
 75        except DataPortalAssetNotFound:
 76            return self.get_project_by_name(project)
 77
 78    def get_dataset(self, project: str = None, dataset: str = None) -> DataPortalDataset:
 79        """
 80        Return a dataset identified by ID or name.
 81
 82        Args:
 83            project (str): ID or name of project
 84            dataset (str): ID or name of dataset
 85
 86        Returns:
 87            `cirro.sdk.dataset.DataPortalDataset`
 88
 89            ```python
 90            from cirro import DataPortal()
 91            portal = DataPortal()
 92            dataset = portal.get_dataset(
 93                project="id-or-name-of-project",
 94                dataset="id-or-name-of-dataset"
 95            )
 96            ```
 97        """
 98        try:
 99            project: DataPortalProject = self.get_project_by_id(project)
100        except DataPortalAssetNotFound:
101            project: DataPortalProject = self.get_project_by_name(project)
102
103        try:
104            return project.get_dataset_by_id(dataset)
105        except DataPortalAssetNotFound:
106            return project.get_dataset_by_name(dataset)
107
108    def list_processes(self, ingest=False) -> DataPortalProcesses:
109        """
110        List all the processes available in the Data Portal.
111        By default, only list non-ingest processes (those which can be run on existing datasets).
112        To list the processes which can be used to upload datasets, use `ingest = True`.
113
114        Args:
115            ingest (bool): If True, only list those processes which can be used to ingest datasets directly
116        """
117
118        return DataPortalProcesses(
119            [
120                DataPortalProcess(p, self._client)
121                for p in self._client.processes.list()
122                if not ingest or p.executor == Executor.INGEST
123            ]
124        )
125
126    def get_process_by_name(self, name: str, ingest=False) -> DataPortalProcess:
127        """
128        Return the process with the specified name.
129
130        Args:
131            name (str): Name of process
132        """
133
134        return self.list_processes(ingest=ingest).get_by_name(name)
135
136    def get_process_by_id(self, id: str, ingest=False) -> DataPortalProcess:
137        """
138        Return the process with the specified id
139
140        Args:
141            id (str): ID of process
142        """
143
144        return self.list_processes(ingest=ingest).get_by_id(id)
145
146    def list_reference_types(self) -> DataPortalReferenceTypes:
147        """
148        Return the list of all available reference types
149        """
150
151        return DataPortalReferenceTypes(
152            [
153                DataPortalReferenceType(ref)
154                for ref in self._client.references.get_types()
155            ]
156        )

Helper functions for exploring the Projects, Datasets, Samples, and Files available in the Data Portal.

DataPortal(base_url: str = None, client: CirroApi = None)
18    def __init__(self, base_url: str = None, client: CirroApi = None):
19        """
20        Set up the DataPortal object, establishing an authenticated connection.
21
22        Args:
23            base_url (str): Optional base URL of the Cirro instance
24             (if not provided, it uses the `CIRRO_BASE_URL` environment variable, or the config file)
25            client (`cirro.cirro_client.CirroApi`): Optional pre-configured client
26
27        Example:
28        ```python
29        from cirro import DataPortal
30
31        Portal = DataPortal(base_url="app.cirro.bio")
32        portal.list_projects()
33        ```
34        """
35
36        if client is not None:
37            self._client = client
38
39        # Set up default client if not provided
40        else:
41            self._client = CirroApi(base_url=base_url)

Set up the DataPortal object, establishing an authenticated connection.

Arguments:
  • base_url (str): Optional base URL of the Cirro instance (if not provided, it uses the CIRRO_BASE_URL environment variable, or the config file)
  • client (cirro.cirro_client.CirroApi): Optional pre-configured client

Example:

from cirro import DataPortal

Portal = DataPortal(base_url="app.cirro.bio")
portal.list_projects()
def list_projects(self) -> cirro.sdk.project.DataPortalProjects:
43    def list_projects(self) -> DataPortalProjects:
44        """List all the projects available in the Data Portal."""
45
46        return DataPortalProjects(
47            [
48                DataPortalProject(proj, self._client)
49                for proj in self._client.projects.list()
50            ]
51        )

List all the projects available in the Data Portal.

def get_project_by_name(self, name: str = None) -> DataPortalProject:
53    def get_project_by_name(self, name: str = None) -> DataPortalProject:
54        """Return the project with the specified name."""
55
56        return self.list_projects().get_by_name(name)

Return the project with the specified name.

def get_project_by_id(self, _id: str = None) -> DataPortalProject:
58    def get_project_by_id(self, _id: str = None) -> DataPortalProject:
59        """Return the project with the specified id."""
60
61        return self.list_projects().get_by_id(_id)

Return the project with the specified id.

def get_project(self, project: str = None) -> DataPortalProject:
63    def get_project(self, project: str = None) -> DataPortalProject:
64        """
65        Return a project identified by ID or name.
66
67        Args:
68            project (str): ID or name of project
69
70        Returns:
71            `from cirro.sdk.project import DataPortalProject`
72        """
73        try:
74            return self.get_project_by_id(project)
75        except DataPortalAssetNotFound:
76            return self.get_project_by_name(project)

Return a project identified by ID or name.

Arguments:
  • project (str): ID or name of project
Returns:

from cirro.sdk.project import DataPortalProject

def get_dataset( self, project: str = None, dataset: str = None) -> DataPortalDataset:
 78    def get_dataset(self, project: str = None, dataset: str = None) -> DataPortalDataset:
 79        """
 80        Return a dataset identified by ID or name.
 81
 82        Args:
 83            project (str): ID or name of project
 84            dataset (str): ID or name of dataset
 85
 86        Returns:
 87            `cirro.sdk.dataset.DataPortalDataset`
 88
 89            ```python
 90            from cirro import DataPortal()
 91            portal = DataPortal()
 92            dataset = portal.get_dataset(
 93                project="id-or-name-of-project",
 94                dataset="id-or-name-of-dataset"
 95            )
 96            ```
 97        """
 98        try:
 99            project: DataPortalProject = self.get_project_by_id(project)
100        except DataPortalAssetNotFound:
101            project: DataPortalProject = self.get_project_by_name(project)
102
103        try:
104            return project.get_dataset_by_id(dataset)
105        except DataPortalAssetNotFound:
106            return project.get_dataset_by_name(dataset)

Return a dataset identified by ID or name.

Arguments:
  • project (str): ID or name of project
  • dataset (str): ID or name of dataset
Returns:

cirro.sdk.dataset.DataPortalDataset

from cirro import DataPortal()
portal = DataPortal()
dataset = portal.get_dataset(
    project="id-or-name-of-project",
    dataset="id-or-name-of-dataset"
)
def list_processes(self, ingest=False) -> cirro.sdk.process.DataPortalProcesses:
108    def list_processes(self, ingest=False) -> DataPortalProcesses:
109        """
110        List all the processes available in the Data Portal.
111        By default, only list non-ingest processes (those which can be run on existing datasets).
112        To list the processes which can be used to upload datasets, use `ingest = True`.
113
114        Args:
115            ingest (bool): If True, only list those processes which can be used to ingest datasets directly
116        """
117
118        return DataPortalProcesses(
119            [
120                DataPortalProcess(p, self._client)
121                for p in self._client.processes.list()
122                if not ingest or p.executor == Executor.INGEST
123            ]
124        )

List all the processes available in the Data Portal. By default, only list non-ingest processes (those which can be run on existing datasets). To list the processes which can be used to upload datasets, use ingest = True.

Arguments:
  • ingest (bool): If True, only list those processes which can be used to ingest datasets directly
def get_process_by_name(self, name: str, ingest=False) -> DataPortalProcess:
126    def get_process_by_name(self, name: str, ingest=False) -> DataPortalProcess:
127        """
128        Return the process with the specified name.
129
130        Args:
131            name (str): Name of process
132        """
133
134        return self.list_processes(ingest=ingest).get_by_name(name)

Return the process with the specified name.

Arguments:
  • name (str): Name of process
def get_process_by_id(self, id: str, ingest=False) -> DataPortalProcess:
136    def get_process_by_id(self, id: str, ingest=False) -> DataPortalProcess:
137        """
138        Return the process with the specified id
139
140        Args:
141            id (str): ID of process
142        """
143
144        return self.list_processes(ingest=ingest).get_by_id(id)

Return the process with the specified id

Arguments:
  • id (str): ID of process
def list_reference_types(self) -> cirro.sdk.reference_type.DataPortalReferenceTypes:
146    def list_reference_types(self) -> DataPortalReferenceTypes:
147        """
148        Return the list of all available reference types
149        """
150
151        return DataPortalReferenceTypes(
152            [
153                DataPortalReferenceType(ref)
154                for ref in self._client.references.get_types()
155            ]
156        )

Return the list of all available reference types

class DataPortalLogin:
 8class DataPortalLogin:
 9    """
10    Start the login process, obtaining the authorization message from Cirro
11    needed to confirm the user identity.
12
13    Useful when you need to authenticate a user in a non-blocking way.
14
15    Usage:
16
17    ```python
18    # Replace app.cirro.bio as appropriate
19    login = DataPortalLogin(base_url="app.cirro.bio")
20
21    # Present the user with the authorization message
22    print(login.auth_message)
23
24    # Generate the authenticated DataPortal object,
25    # blocking until the user completes the login process in their browser
26    portal = login.await_completion()
27    ```
28    """
29    base_url: str
30    auth_info: DeviceCodeAuth
31
32    def __init__(self, base_url: str = None, enable_cache=False):
33        app_config = AppConfig(base_url=base_url)
34
35        self.base_url = base_url
36
37        self.auth_info = DeviceCodeAuth(
38            region=app_config.region,
39            client_id=app_config.client_id,
40            auth_endpoint=app_config.auth_endpoint,
41            enable_cache=enable_cache,
42            await_completion=False
43        )
44
45    @property
46    def auth_message(self) -> str:
47        """Authorization message provided by Cirro."""
48        return self.auth_info.auth_message
49
50    @property
51    def auth_message_markdown(self) -> str:
52        """Authorization message provided by Cirro (Markdown format)."""
53        return self.auth_info.auth_message_markdown
54
55    def await_completion(self) -> DataPortal:
56        """Complete the login process and return an authenticated client"""
57
58        # Block until the user completes the login flow
59        self.auth_info.await_completion()
60
61        # Set up the client object
62        cirro_client = CirroApi(
63            auth_info=self.auth_info,
64            base_url=self.base_url
65        )
66
67        # Return the Data Portal object
68        return DataPortal(client=cirro_client)

Start the login process, obtaining the authorization message from Cirro needed to confirm the user identity.

Useful when you need to authenticate a user in a non-blocking way.

Usage:

# Replace app.cirro.bio as appropriate
login = DataPortalLogin(base_url="app.cirro.bio")

# Present the user with the authorization message
print(login.auth_message)

# Generate the authenticated DataPortal object,
# blocking until the user completes the login process in their browser
portal = login.await_completion()
DataPortalLogin(base_url: str = None, enable_cache=False)
32    def __init__(self, base_url: str = None, enable_cache=False):
33        app_config = AppConfig(base_url=base_url)
34
35        self.base_url = base_url
36
37        self.auth_info = DeviceCodeAuth(
38            region=app_config.region,
39            client_id=app_config.client_id,
40            auth_endpoint=app_config.auth_endpoint,
41            enable_cache=enable_cache,
42            await_completion=False
43        )
base_url: str
auth_message: str
45    @property
46    def auth_message(self) -> str:
47        """Authorization message provided by Cirro."""
48        return self.auth_info.auth_message

Authorization message provided by Cirro.

auth_message_markdown: str
50    @property
51    def auth_message_markdown(self) -> str:
52        """Authorization message provided by Cirro (Markdown format)."""
53        return self.auth_info.auth_message_markdown

Authorization message provided by Cirro (Markdown format).

def await_completion(self) -> DataPortal:
55    def await_completion(self) -> DataPortal:
56        """Complete the login process and return an authenticated client"""
57
58        # Block until the user completes the login flow
59        self.auth_info.await_completion()
60
61        # Set up the client object
62        cirro_client = CirroApi(
63            auth_info=self.auth_info,
64            base_url=self.base_url
65        )
66
67        # Return the Data Portal object
68        return DataPortal(client=cirro_client)

Complete the login process and return an authenticated client

class DataPortalProject(cirro.sdk.asset.DataPortalAsset):
 20class DataPortalProject(DataPortalAsset):
 21    """
 22    Projects in the Data Portal contain collections of Datasets.
 23    Users are granted permissions at the project-level, allowing them
 24    to view and/or modify all the datasets in that collection.
 25    """
 26    def __init__(self, proj: Project, client: CirroApi):
 27        """
 28        Instantiate with helper method
 29
 30        ```python
 31        from cirro import DataPortal()
 32        portal = DataPortal()
 33        project = portal.get_project_by_name("Project Name")
 34        ```
 35
 36        """
 37        self._data = proj
 38        self._client = client
 39
 40    @property
 41    def id(self) -> str:
 42        """
 43        Unique identifier
 44        """
 45        return self._data.id
 46
 47    @property
 48    def name(self) -> str:
 49        """
 50        Readable name
 51        """
 52        return self._data.name
 53
 54    @property
 55    def description(self) -> str:
 56        """
 57        Longer description of the project
 58        """
 59        return self._data.description
 60
 61    def __str__(self):
 62        """Control how the Project is rendered as a string."""
 63
 64        return '\n'.join([
 65            f"{i.title()}: {self.__getattribute__(i)}"
 66            for i in ['name', 'id', 'description']
 67        ])
 68
 69    @cache
 70    def _get_datasets(self) -> List[Dataset]:
 71        return list_all_datasets(project_id=self.id,
 72                                 client=self._client)
 73
 74    def list_datasets(self, force_refresh=False) -> DataPortalDatasets:
 75        """List all the datasets available in the project."""
 76        if force_refresh:
 77            self._get_datasets.cache_clear()
 78
 79        return DataPortalDatasets(
 80            [
 81                DataPortalDataset(d, self._client)
 82                for d in self._get_datasets()
 83            ]
 84        )
 85
 86    def get_dataset_by_name(self, name: str, force_refresh=False) -> DataPortalDataset:
 87        """Return the dataset with the specified name."""
 88        if force_refresh:
 89            self._get_datasets.cache_clear()
 90
 91        dataset = next((d for d in self._get_datasets() if d.name == name), None)
 92        if dataset is None:
 93            raise DataPortalAssetNotFound(f'Dataset with name {name} not found')
 94        return self.get_dataset_by_id(dataset.id)
 95
 96    def get_dataset_by_id(self, _id: str = None) -> DataPortalDataset:
 97        """Return the dataset with the specified id."""
 98
 99        dataset = self._client.datasets.get(project_id=self.id, dataset_id=_id)
100        if dataset is None:
101            raise DataPortalAssetNotFound(f'Dataset with ID {_id} not found')
102        return DataPortalDataset(dataset, self._client)
103
104    def list_references(self, reference_type: str = None) -> DataPortalReferences:
105        """
106        List the references available in a project.
107        Optionally filter to references of a particular type (identified by name)
108        """
109
110        # Get the complete list of references which are available
111        reference_types = DataPortalReferenceTypes(
112            [
113                DataPortalReferenceType(ref)
114                for ref in self._client.references.get_types()
115            ]
116        )
117
118        # If a particular name was specified
119        if reference_type is not None:
120            reference_types = reference_types.filter_by_pattern(reference_type)
121            if len(reference_types) == 0:
122                msg = f"Could not find any reference types with the name {reference_type}"
123                raise DataPortalAssetNotFound(msg)
124
125        return DataPortalReferences(
126            [
127                DataPortalReference(ref, project_id=self.id, client=self._client)
128                for ref in self._client.references.get_for_project(
129                    self.id
130                )
131                if reference_type is None or ref.type == reference_type
132            ]
133        )
134
135    def get_reference_by_name(self, name: str = None, ref_type: str = None) -> DataPortalReference:
136        """Return the reference of a particular type with the specified name."""
137
138        if name is None:
139            raise DataPortalInputError("Must specify the reference name")
140
141        return self.list_references(ref_type).get_by_name(name)
142
143    def upload_dataset(
144        self,
145        name: str = None,
146        description='',
147        process: Union[DataPortalProcess, str] = None,
148        upload_folder: str = None,
149        files: List[str] = None,
150        tags: List[str] = None,
151    ):
152        """
153        Upload a set of files to the Data Portal, creating a new dataset.
154
155        If the files parameter is not provided, it will upload all files in the upload folder
156
157        Args:
158            name (str): Name of newly created dataset
159            description (str): Description of newly created dataset
160            process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object
161            upload_folder (str): Folder containing files to upload
162            files (List[str]): Optional subset of files to upload from the folder
163            tags (List[str]): Optional list of tags to apply to the dataset
164        """
165
166        if name is None:
167            raise DataPortalInputError("Must provide name for new dataset")
168        if process is None:
169            raise DataPortalInputError("Must provide the process which is used for ingest")
170        if upload_folder is None:
171            raise DataPortalInputError("Must provide upload_folder -- folder containing files to upload")
172
173        # Parse the process provided by the user
174        process = parse_process_name_or_id(process, self._client)
175
176        # If no files were provided
177        if files is None:
178            # Get the list of files in the upload folder
179            files = get_files_in_directory(upload_folder)
180
181        if files is None or len(files) == 0:
182            raise RuntimeWarning("No files to upload, exiting")
183
184        # Normalize into Tag object
185        if tags is not None:
186            tags = [Tag(value=value) for value in tags]
187
188        # Make sure that the files match the expected pattern
189        self._client.processes.check_dataset_files(files, process.id, upload_folder)
190
191        # Create the ingest process request
192        dataset_create_request = UploadDatasetRequest(
193            process_id=process.id,
194            name=name,
195            description=description,
196            expected_files=files,
197            tags=tags,
198        )
199
200        # Get the response
201        create_response = self._client.datasets.create(project_id=self.id,
202                                                       upload_request=dataset_create_request)
203
204        # Upload the files
205        self._client.datasets.upload_files(
206            project_id=self.id,
207            dataset_id=create_response.id,
208            directory=upload_folder,
209            files=files
210        )
211
212        # Return the dataset which was created, which might take a second to update
213        max_attempts = 5
214        for attempt in range(max_attempts):
215            try:
216                return self.get_dataset_by_id(create_response.id)
217            except DataPortalAssetNotFound as e:
218                if attempt == max_attempts - 1:
219                    raise e
220                else:
221                    sleep(2)
222
223    def samples(self, max_items: int = 10000) -> List[Sample]:
224        """
225        Retrieves a list of samples associated with a project along with their metadata
226
227        Args:
228            max_items (int): Maximum number of records to get (default 10,000)
229        """
230        return self._client.metadata.get_project_samples(self.id, max_items)

Projects in the Data Portal contain collections of Datasets. Users are granted permissions at the project-level, allowing them to view and/or modify all the datasets in that collection.

DataPortalProject( proj: cirro_api_client.v1.models.Project, client: CirroApi)
26    def __init__(self, proj: Project, client: CirroApi):
27        """
28        Instantiate with helper method
29
30        ```python
31        from cirro import DataPortal()
32        portal = DataPortal()
33        project = portal.get_project_by_name("Project Name")
34        ```
35
36        """
37        self._data = proj
38        self._client = client

Instantiate with helper method

from cirro import DataPortal()
portal = DataPortal()
project = portal.get_project_by_name("Project Name")
id: str
40    @property
41    def id(self) -> str:
42        """
43        Unique identifier
44        """
45        return self._data.id

Unique identifier

name: str
47    @property
48    def name(self) -> str:
49        """
50        Readable name
51        """
52        return self._data.name

Readable name

description: str
54    @property
55    def description(self) -> str:
56        """
57        Longer description of the project
58        """
59        return self._data.description

Longer description of the project

def list_datasets(self, force_refresh=False) -> cirro.sdk.dataset.DataPortalDatasets:
74    def list_datasets(self, force_refresh=False) -> DataPortalDatasets:
75        """List all the datasets available in the project."""
76        if force_refresh:
77            self._get_datasets.cache_clear()
78
79        return DataPortalDatasets(
80            [
81                DataPortalDataset(d, self._client)
82                for d in self._get_datasets()
83            ]
84        )

List all the datasets available in the project.

def get_dataset_by_name( self, name: str, force_refresh=False) -> DataPortalDataset:
86    def get_dataset_by_name(self, name: str, force_refresh=False) -> DataPortalDataset:
87        """Return the dataset with the specified name."""
88        if force_refresh:
89            self._get_datasets.cache_clear()
90
91        dataset = next((d for d in self._get_datasets() if d.name == name), None)
92        if dataset is None:
93            raise DataPortalAssetNotFound(f'Dataset with name {name} not found')
94        return self.get_dataset_by_id(dataset.id)

Return the dataset with the specified name.

def get_dataset_by_id(self, _id: str = None) -> DataPortalDataset:
 96    def get_dataset_by_id(self, _id: str = None) -> DataPortalDataset:
 97        """Return the dataset with the specified id."""
 98
 99        dataset = self._client.datasets.get(project_id=self.id, dataset_id=_id)
100        if dataset is None:
101            raise DataPortalAssetNotFound(f'Dataset with ID {_id} not found')
102        return DataPortalDataset(dataset, self._client)

Return the dataset with the specified id.

def list_references( self, reference_type: str = None) -> cirro.sdk.reference.DataPortalReferences:
104    def list_references(self, reference_type: str = None) -> DataPortalReferences:
105        """
106        List the references available in a project.
107        Optionally filter to references of a particular type (identified by name)
108        """
109
110        # Get the complete list of references which are available
111        reference_types = DataPortalReferenceTypes(
112            [
113                DataPortalReferenceType(ref)
114                for ref in self._client.references.get_types()
115            ]
116        )
117
118        # If a particular name was specified
119        if reference_type is not None:
120            reference_types = reference_types.filter_by_pattern(reference_type)
121            if len(reference_types) == 0:
122                msg = f"Could not find any reference types with the name {reference_type}"
123                raise DataPortalAssetNotFound(msg)
124
125        return DataPortalReferences(
126            [
127                DataPortalReference(ref, project_id=self.id, client=self._client)
128                for ref in self._client.references.get_for_project(
129                    self.id
130                )
131                if reference_type is None or ref.type == reference_type
132            ]
133        )

List the references available in a project. Optionally filter to references of a particular type (identified by name)

def get_reference_by_name( self, name: str = None, ref_type: str = None) -> DataPortalReference:
135    def get_reference_by_name(self, name: str = None, ref_type: str = None) -> DataPortalReference:
136        """Return the reference of a particular type with the specified name."""
137
138        if name is None:
139            raise DataPortalInputError("Must specify the reference name")
140
141        return self.list_references(ref_type).get_by_name(name)

Return the reference of a particular type with the specified name.

def upload_dataset( self, name: str = None, description='', process: Union[DataPortalProcess, str] = None, upload_folder: str = None, files: List[str] = None, tags: List[str] = None):
143    def upload_dataset(
144        self,
145        name: str = None,
146        description='',
147        process: Union[DataPortalProcess, str] = None,
148        upload_folder: str = None,
149        files: List[str] = None,
150        tags: List[str] = None,
151    ):
152        """
153        Upload a set of files to the Data Portal, creating a new dataset.
154
155        If the files parameter is not provided, it will upload all files in the upload folder
156
157        Args:
158            name (str): Name of newly created dataset
159            description (str): Description of newly created dataset
160            process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object
161            upload_folder (str): Folder containing files to upload
162            files (List[str]): Optional subset of files to upload from the folder
163            tags (List[str]): Optional list of tags to apply to the dataset
164        """
165
166        if name is None:
167            raise DataPortalInputError("Must provide name for new dataset")
168        if process is None:
169            raise DataPortalInputError("Must provide the process which is used for ingest")
170        if upload_folder is None:
171            raise DataPortalInputError("Must provide upload_folder -- folder containing files to upload")
172
173        # Parse the process provided by the user
174        process = parse_process_name_or_id(process, self._client)
175
176        # If no files were provided
177        if files is None:
178            # Get the list of files in the upload folder
179            files = get_files_in_directory(upload_folder)
180
181        if files is None or len(files) == 0:
182            raise RuntimeWarning("No files to upload, exiting")
183
184        # Normalize into Tag object
185        if tags is not None:
186            tags = [Tag(value=value) for value in tags]
187
188        # Make sure that the files match the expected pattern
189        self._client.processes.check_dataset_files(files, process.id, upload_folder)
190
191        # Create the ingest process request
192        dataset_create_request = UploadDatasetRequest(
193            process_id=process.id,
194            name=name,
195            description=description,
196            expected_files=files,
197            tags=tags,
198        )
199
200        # Get the response
201        create_response = self._client.datasets.create(project_id=self.id,
202                                                       upload_request=dataset_create_request)
203
204        # Upload the files
205        self._client.datasets.upload_files(
206            project_id=self.id,
207            dataset_id=create_response.id,
208            directory=upload_folder,
209            files=files
210        )
211
212        # Return the dataset which was created, which might take a second to update
213        max_attempts = 5
214        for attempt in range(max_attempts):
215            try:
216                return self.get_dataset_by_id(create_response.id)
217            except DataPortalAssetNotFound as e:
218                if attempt == max_attempts - 1:
219                    raise e
220                else:
221                    sleep(2)

Upload a set of files to the Data Portal, creating a new dataset.

If the files parameter is not provided, it will upload all files in the upload folder

Arguments:
  • name (str): Name of newly created dataset
  • description (str): Description of newly created dataset
  • process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object
  • upload_folder (str): Folder containing files to upload
  • files (List[str]): Optional subset of files to upload from the folder
  • tags (List[str]): Optional list of tags to apply to the dataset
def samples( self, max_items: int = 10000) -> List[cirro_api_client.v1.models.Sample]:
223    def samples(self, max_items: int = 10000) -> List[Sample]:
224        """
225        Retrieves a list of samples associated with a project along with their metadata
226
227        Args:
228            max_items (int): Maximum number of records to get (default 10,000)
229        """
230        return self._client.metadata.get_project_samples(self.id, max_items)

Retrieves a list of samples associated with a project along with their metadata

Arguments:
  • max_items (int): Maximum number of records to get (default 10,000)
class DataPortalProcess(cirro.sdk.asset.DataPortalAsset):
11class DataPortalProcess(DataPortalAsset):
12    """Helper functions for interacting with analysis processes."""
13
14    def __init__(self, process: Union[Process, ProcessDetail], client: CirroApi):
15        """
16        Instantiate with helper method
17
18        ```python
19        from cirro import DataPortal()
20        portal = DataPortal()
21        process = portal.get_process_by_name("Process Name")
22        ```
23        """
24        self._data = process
25        self._client = client
26
27    @property
28    def id(self) -> str:
29        """Unique identifier"""
30        return self._data.id
31
32    @property
33    def name(self) -> str:
34        """Readable name"""
35        return self._data.name
36
37    @property
38    def description(self) -> str:
39        """Longer description of process"""
40        return self._data.description
41
42    @property
43    def child_process_ids(self) -> List[str]:
44        """List of processes which can be run on the output of this process"""
45        return self._data.child_process_ids
46
47    @property
48    def executor(self) -> Executor:
49        """INGEST, CROMWELL, or NEXTFLOW"""
50        return self._data.executor
51
52    @property
53    def category(self) -> str:
54        """Category of process"""
55        return self._data.category
56
57    @property
58    def pipeline_type(self) -> str:
59        """Pipeline type"""
60        return self._data.pipeline_type
61
62    @property
63    def documentation_url(self) -> str:
64        """Documentation URL"""
65        return self._data.documentation_url
66
67    @property
68    def file_requirements_message(self) -> str:
69        """Description of files required for INGEST processes"""
70        return self._data.file_requirements_message
71
72    @property
73    def code(self) -> PipelineCode:
74        """Pipeline code configuration"""
75        return self._get_detail().pipeline_code
76
77    @property
78    def custom_settings(self) -> CustomPipelineSettings:
79        """Custom settings for the process"""
80        return self._get_detail().custom_settings
81
82    def _get_detail(self) -> ProcessDetail:
83        if not isinstance(self._data, ProcessDetail):
84            self._data = self._client.processes.get(self.id)
85        return self._data
86
87    def __str__(self):
88        return '\n'.join([
89            f"{i.title()}: {self.__getattribute__(i)}"
90            for i in ['name', 'id', 'description']
91        ])
92
93    def get_parameter_spec(self) -> ParameterSpecification:
94        """
95        Gets a specification used to describe the parameters used in the process.
96        """
97        return self._client.processes.get_parameter_spec(self.id)

Helper functions for interacting with analysis processes.

DataPortalProcess( process: Union[cirro_api_client.v1.models.Process, cirro_api_client.v1.models.ProcessDetail], client: CirroApi)
14    def __init__(self, process: Union[Process, ProcessDetail], client: CirroApi):
15        """
16        Instantiate with helper method
17
18        ```python
19        from cirro import DataPortal()
20        portal = DataPortal()
21        process = portal.get_process_by_name("Process Name")
22        ```
23        """
24        self._data = process
25        self._client = client

Instantiate with helper method

from cirro import DataPortal()
portal = DataPortal()
process = portal.get_process_by_name("Process Name")
id: str
27    @property
28    def id(self) -> str:
29        """Unique identifier"""
30        return self._data.id

Unique identifier

name: str
32    @property
33    def name(self) -> str:
34        """Readable name"""
35        return self._data.name

Readable name

description: str
37    @property
38    def description(self) -> str:
39        """Longer description of process"""
40        return self._data.description

Longer description of process

child_process_ids: List[str]
42    @property
43    def child_process_ids(self) -> List[str]:
44        """List of processes which can be run on the output of this process"""
45        return self._data.child_process_ids

List of processes which can be run on the output of this process

executor: cirro_api_client.v1.models.Executor
47    @property
48    def executor(self) -> Executor:
49        """INGEST, CROMWELL, or NEXTFLOW"""
50        return self._data.executor

INGEST, CROMWELL, or NEXTFLOW

category: str
52    @property
53    def category(self) -> str:
54        """Category of process"""
55        return self._data.category

Category of process

pipeline_type: str
57    @property
58    def pipeline_type(self) -> str:
59        """Pipeline type"""
60        return self._data.pipeline_type

Pipeline type

documentation_url: str
62    @property
63    def documentation_url(self) -> str:
64        """Documentation URL"""
65        return self._data.documentation_url

Documentation URL

file_requirements_message: str
67    @property
68    def file_requirements_message(self) -> str:
69        """Description of files required for INGEST processes"""
70        return self._data.file_requirements_message

Description of files required for INGEST processes

72    @property
73    def code(self) -> PipelineCode:
74        """Pipeline code configuration"""
75        return self._get_detail().pipeline_code

Pipeline code configuration

77    @property
78    def custom_settings(self) -> CustomPipelineSettings:
79        """Custom settings for the process"""
80        return self._get_detail().custom_settings

Custom settings for the process

def get_parameter_spec(self) -> cirro.models.form_specification.ParameterSpecification:
93    def get_parameter_spec(self) -> ParameterSpecification:
94        """
95        Gets a specification used to describe the parameters used in the process.
96        """
97        return self._client.processes.get_parameter_spec(self.id)

Gets a specification used to describe the parameters used in the process.

class DataPortalDataset(cirro.sdk.asset.DataPortalAsset):
 19class DataPortalDataset(DataPortalAsset):
 20    """
 21    Datasets in the Data Portal are collections of files which have
 22    either been uploaded directly, or which have been output by
 23    an analysis pipeline or notebook.
 24    """
 25
 26    def __init__(self, dataset: Union[Dataset, DatasetDetail], client: CirroApi):
 27        """
 28        Instantiate a dataset object
 29
 30        Should be invoked from a top-level constructor, for example:
 31
 32        ```python
 33        from cirro import DataPortal()
 34        portal = DataPortal()
 35        dataset = portal.get_dataset(
 36            project="id-or-name-of-project",
 37            dataset="id-or-name-of-dataset"
 38        )
 39        ```
 40
 41        """
 42        assert dataset.project_id is not None, "Must provide dataset with project_id attribute"
 43        self._data = dataset
 44        self._assets: Optional[DatasetAssets] = None
 45        self._client = client
 46
 47    @property
 48    def id(self) -> str:
 49        """Unique identifier for the dataset"""
 50        return self._data.id
 51
 52    @property
 53    def name(self) -> str:
 54        """Editible name for the dataset"""
 55        return self._data.name
 56
 57    @property
 58    def description(self) -> str:
 59        """Longer name for the dataset"""
 60        return self._data.description
 61
 62    @property
 63    def process_id(self) -> str:
 64        """Unique ID of process used to create the dataset"""
 65        return self._data.process_id
 66
 67    @property
 68    def process(self) -> ProcessDetail:
 69        """
 70        Object representing the process used to create the dataset
 71        """
 72        return self._client.processes.get(self.process_id)
 73
 74    @property
 75    def project_id(self) -> str:
 76        """ID of the project containing the dataset"""
 77        return self._data.project_id
 78
 79    @property
 80    def status(self) -> Status:
 81        """
 82        Status of the dataset
 83        """
 84        return self._data.status
 85
 86    @property
 87    def source_dataset_ids(self) -> List[str]:
 88        """IDs of the datasets used as sources for this dataset (if any)"""
 89        return self._data.source_dataset_ids
 90
 91    @property
 92    def source_datasets(self) -> List['DataPortalDataset']:
 93        """
 94        Objects representing the datasets used as sources for this dataset (if any)
 95        """
 96        return [
 97            DataPortalDataset(
 98                dataset=self._client.datasets.get(project_id=self.project_id, dataset_id=dataset_id),
 99                client=self._client
100            )
101            for dataset_id in self.source_dataset_ids
102        ]
103
104    @property
105    def params(self) -> DatasetDetailParams:
106        """
107        Parameters used to generate the dataset
108        """
109        return self._get_detail().params
110
111    @property
112    def info(self) -> DatasetDetailInfo:
113        """
114        Detailed information about the dataset
115        """
116        return self._get_detail().info
117
118    @property
119    def tags(self) -> List[Tag]:
120        """
121        Tags applied to the dataset
122        """
123        return self._data.tags
124
125    @property
126    def created_by(self) -> str:
127        """User who created the dataset"""
128        return self._data.created_by
129
130    @property
131    def created_at(self) -> datetime.datetime:
132        """Timestamp of dataset creation"""
133        return self._data.created_at
134
135    def _get_detail(self):
136        if not isinstance(self._data, DatasetDetail):
137            self._data = self._client.datasets.get(project_id=self.project_id, dataset_id=self.id)
138        return self._data
139
140    def _get_assets(self):
141        if not self._assets:
142            self._assets = self._client.datasets.get_assets_listing(
143                project_id=self.project_id,
144                dataset_id=self.id
145            )
146        return self._assets
147
148    def __str__(self):
149        return '\n'.join([
150            f"{i.title()}: {self.__getattribute__(i)}"
151            for i in ['name', 'id', 'description', 'status']
152        ])
153
154    def get_file(self, relative_path: str) -> DataPortalFile:
155        """
156        Get a file from the dataset using its relative path.
157
158        Args:
159            relative_path (str): Relative path of file within the dataset
160
161        Returns:
162            `from cirro.sdk.file import DataPortalFile`
163        """
164
165        # Get the list of files in this dataset
166        files = self.list_files()
167
168        # Try getting the file using the relative path provided by the user
169        try:
170            return files.get_by_id(relative_path)
171        except DataPortalAssetNotFound:
172            # Try getting the file with the 'data/' prefix prepended
173            try:
174                return files.get_by_id("data/" + relative_path)
175            except DataPortalAssetNotFound:
176                # If not found, raise the exception using the string provided
177                # by the user, not the data/ prepended version (which may be
178                # confusing to the user)
179                msg = '\n'.join([f"No file found with path '{relative_path}'."])
180                raise DataPortalAssetNotFound(msg)
181
182    def list_files(self) -> DataPortalFiles:
183        """
184        Return the list of files which make up the dataset.
185        """
186        files = self._get_assets().files
187        return DataPortalFiles(
188            [
189                DataPortalFile(file=file, client=self._client)
190                for file in files
191            ]
192        )
193
194    def get_artifact(self, artifact_type: ArtifactType) -> DataPortalFile:
195        """
196        Get the artifact of a particular type from the dataset
197        """
198        artifacts = self._get_assets().artifacts
199        artifact = next((a for a in artifacts if a.artifact_type == artifact_type), None)
200        if artifact is None:
201            raise DataPortalAssetNotFound(f"No artifact found with type '{artifact_type}'")
202        return DataPortalFile(file=artifact.file, client=self._client)
203
204    def list_artifacts(self) -> List[DataPortalFile]:
205        """
206        Return the list of artifacts associated with the dataset
207
208        An artifact may be something generated as part of the analysis or other process.
209        See `cirro_api_client.v1.models.ArtifactType` for the list of possible artifact types.
210
211        """
212        artifacts = self._get_assets().artifacts
213        return DataPortalFiles(
214            [
215                DataPortalFile(file=artifact.file, client=self._client)
216                for artifact in artifacts
217            ]
218        )
219
220    def download_files(self, download_location: str = None) -> None:
221        """
222        Download all the files from the dataset to a local directory.
223
224        Args:
225            download_location (str): Path to local directory
226        """
227
228        # Alias for internal method
229        self.list_files().download(download_location)
230
231    def run_analysis(
232            self,
233            name: str = None,
234            description: str = "",
235            process: Union[DataPortalProcess, str] = None,
236            params=None,
237            notifications_emails: List[str] = None,
238            compute_environment: str = None,
239            resume_dataset_id: str = None
240    ) -> str:
241        """
242        Runs an analysis on a dataset, returns the ID of the newly created dataset.
243
244        The process can be provided as either a DataPortalProcess object,
245        or a string which corresponds to the name or ID of the process.
246
247        Args:
248            name (str): Name of newly created dataset
249            description (str): Description of newly created dataset
250            process (DataPortalProcess or str): Process to run
251            params (dict): Analysis parameters
252            notifications_emails (List[str]): Notification email address(es)
253            compute_environment (str): Name or ID of compute environment to use,
254             if blank it will run in AWS
255            resume_dataset_id (str): ID of dataset to resume from, used for caching task execution.
256             It will attempt to re-use the previous output to minimize duplicate work
257
258        Returns:
259            dataset_id (str): ID of newly created dataset
260        """
261        if name is None:
262            raise DataPortalInputError("Must specify 'name' for run_analysis")
263        if process is None:
264            raise DataPortalInputError("Must specify 'process' for run_analysis")
265        if notifications_emails is None:
266            notifications_emails = []
267        if params is None:
268            params = {}
269
270        # If the process is a string, try to parse it as a process name or ID
271        process = parse_process_name_or_id(process, self._client)
272
273        if compute_environment:
274            compute_environments = self._client.compute_environments.list_environments_for_project(
275                project_id=self.project_id
276            )
277            compute_environment = next(
278                (env for env in compute_environments
279                 if env.name == compute_environment or env.id == compute_environment),
280                None
281            )
282            if compute_environment is None:
283                raise DataPortalInputError(f"Compute environment '{compute_environment}' not found")
284
285        resp = self._client.execution.run_analysis(
286            project_id=self.project_id,
287            request=RunAnalysisRequest(
288                name=name,
289                description=description,
290                process_id=process.id,
291                source_dataset_ids=[self.id],
292                params=RunAnalysisRequestParams.from_dict(params),
293                notification_emails=notifications_emails,
294                resume_dataset_id=resume_dataset_id,
295                compute_environment_id=compute_environment.id if compute_environment else None
296            )
297        )
298        return resp.id

Datasets in the Data Portal are collections of files which have either been uploaded directly, or which have been output by an analysis pipeline or notebook.

DataPortalDataset( dataset: Union[cirro_api_client.v1.models.Dataset, cirro_api_client.v1.models.DatasetDetail], client: CirroApi)
26    def __init__(self, dataset: Union[Dataset, DatasetDetail], client: CirroApi):
27        """
28        Instantiate a dataset object
29
30        Should be invoked from a top-level constructor, for example:
31
32        ```python
33        from cirro import DataPortal()
34        portal = DataPortal()
35        dataset = portal.get_dataset(
36            project="id-or-name-of-project",
37            dataset="id-or-name-of-dataset"
38        )
39        ```
40
41        """
42        assert dataset.project_id is not None, "Must provide dataset with project_id attribute"
43        self._data = dataset
44        self._assets: Optional[DatasetAssets] = None
45        self._client = client

Instantiate a dataset object

Should be invoked from a top-level constructor, for example:

from cirro import DataPortal()
portal = DataPortal()
dataset = portal.get_dataset(
    project="id-or-name-of-project",
    dataset="id-or-name-of-dataset"
)
id: str
47    @property
48    def id(self) -> str:
49        """Unique identifier for the dataset"""
50        return self._data.id

Unique identifier for the dataset

name: str
52    @property
53    def name(self) -> str:
54        """Editible name for the dataset"""
55        return self._data.name

Editible name for the dataset

description: str
57    @property
58    def description(self) -> str:
59        """Longer name for the dataset"""
60        return self._data.description

Longer name for the dataset

process_id: str
62    @property
63    def process_id(self) -> str:
64        """Unique ID of process used to create the dataset"""
65        return self._data.process_id

Unique ID of process used to create the dataset

67    @property
68    def process(self) -> ProcessDetail:
69        """
70        Object representing the process used to create the dataset
71        """
72        return self._client.processes.get(self.process_id)

Object representing the process used to create the dataset

project_id: str
74    @property
75    def project_id(self) -> str:
76        """ID of the project containing the dataset"""
77        return self._data.project_id

ID of the project containing the dataset

status: cirro_api_client.v1.models.Status
79    @property
80    def status(self) -> Status:
81        """
82        Status of the dataset
83        """
84        return self._data.status

Status of the dataset

source_dataset_ids: List[str]
86    @property
87    def source_dataset_ids(self) -> List[str]:
88        """IDs of the datasets used as sources for this dataset (if any)"""
89        return self._data.source_dataset_ids

IDs of the datasets used as sources for this dataset (if any)

source_datasets: List[DataPortalDataset]
 91    @property
 92    def source_datasets(self) -> List['DataPortalDataset']:
 93        """
 94        Objects representing the datasets used as sources for this dataset (if any)
 95        """
 96        return [
 97            DataPortalDataset(
 98                dataset=self._client.datasets.get(project_id=self.project_id, dataset_id=dataset_id),
 99                client=self._client
100            )
101            for dataset_id in self.source_dataset_ids
102        ]

Objects representing the datasets used as sources for this dataset (if any)

104    @property
105    def params(self) -> DatasetDetailParams:
106        """
107        Parameters used to generate the dataset
108        """
109        return self._get_detail().params

Parameters used to generate the dataset

111    @property
112    def info(self) -> DatasetDetailInfo:
113        """
114        Detailed information about the dataset
115        """
116        return self._get_detail().info

Detailed information about the dataset

tags: List[cirro_api_client.v1.models.Tag]
118    @property
119    def tags(self) -> List[Tag]:
120        """
121        Tags applied to the dataset
122        """
123        return self._data.tags

Tags applied to the dataset

created_by: str
125    @property
126    def created_by(self) -> str:
127        """User who created the dataset"""
128        return self._data.created_by

User who created the dataset

created_at: datetime.datetime
130    @property
131    def created_at(self) -> datetime.datetime:
132        """Timestamp of dataset creation"""
133        return self._data.created_at

Timestamp of dataset creation

def get_file(self, relative_path: str) -> cirro.sdk.file.DataPortalFile:
154    def get_file(self, relative_path: str) -> DataPortalFile:
155        """
156        Get a file from the dataset using its relative path.
157
158        Args:
159            relative_path (str): Relative path of file within the dataset
160
161        Returns:
162            `from cirro.sdk.file import DataPortalFile`
163        """
164
165        # Get the list of files in this dataset
166        files = self.list_files()
167
168        # Try getting the file using the relative path provided by the user
169        try:
170            return files.get_by_id(relative_path)
171        except DataPortalAssetNotFound:
172            # Try getting the file with the 'data/' prefix prepended
173            try:
174                return files.get_by_id("data/" + relative_path)
175            except DataPortalAssetNotFound:
176                # If not found, raise the exception using the string provided
177                # by the user, not the data/ prepended version (which may be
178                # confusing to the user)
179                msg = '\n'.join([f"No file found with path '{relative_path}'."])
180                raise DataPortalAssetNotFound(msg)

Get a file from the dataset using its relative path.

Arguments:
  • relative_path (str): Relative path of file within the dataset
Returns:

from cirro.sdk.file import DataPortalFile

def list_files(self) -> cirro.sdk.file.DataPortalFiles:
182    def list_files(self) -> DataPortalFiles:
183        """
184        Return the list of files which make up the dataset.
185        """
186        files = self._get_assets().files
187        return DataPortalFiles(
188            [
189                DataPortalFile(file=file, client=self._client)
190                for file in files
191            ]
192        )

Return the list of files which make up the dataset.

def get_artifact( self, artifact_type: cirro_api_client.v1.models.ArtifactType) -> cirro.sdk.file.DataPortalFile:
194    def get_artifact(self, artifact_type: ArtifactType) -> DataPortalFile:
195        """
196        Get the artifact of a particular type from the dataset
197        """
198        artifacts = self._get_assets().artifacts
199        artifact = next((a for a in artifacts if a.artifact_type == artifact_type), None)
200        if artifact is None:
201            raise DataPortalAssetNotFound(f"No artifact found with type '{artifact_type}'")
202        return DataPortalFile(file=artifact.file, client=self._client)

Get the artifact of a particular type from the dataset

def list_artifacts(self) -> List[cirro.sdk.file.DataPortalFile]:
204    def list_artifacts(self) -> List[DataPortalFile]:
205        """
206        Return the list of artifacts associated with the dataset
207
208        An artifact may be something generated as part of the analysis or other process.
209        See `cirro_api_client.v1.models.ArtifactType` for the list of possible artifact types.
210
211        """
212        artifacts = self._get_assets().artifacts
213        return DataPortalFiles(
214            [
215                DataPortalFile(file=artifact.file, client=self._client)
216                for artifact in artifacts
217            ]
218        )

Return the list of artifacts associated with the dataset

An artifact may be something generated as part of the analysis or other process. See cirro_api_client.v1.models.ArtifactType for the list of possible artifact types.

def download_files(self, download_location: str = None) -> None:
220    def download_files(self, download_location: str = None) -> None:
221        """
222        Download all the files from the dataset to a local directory.
223
224        Args:
225            download_location (str): Path to local directory
226        """
227
228        # Alias for internal method
229        self.list_files().download(download_location)

Download all the files from the dataset to a local directory.

Arguments:
  • download_location (str): Path to local directory
def run_analysis( self, name: str = None, description: str = '', process: Union[DataPortalProcess, str] = None, params=None, notifications_emails: List[str] = None, compute_environment: str = None, resume_dataset_id: str = None) -> str:
231    def run_analysis(
232            self,
233            name: str = None,
234            description: str = "",
235            process: Union[DataPortalProcess, str] = None,
236            params=None,
237            notifications_emails: List[str] = None,
238            compute_environment: str = None,
239            resume_dataset_id: str = None
240    ) -> str:
241        """
242        Runs an analysis on a dataset, returns the ID of the newly created dataset.
243
244        The process can be provided as either a DataPortalProcess object,
245        or a string which corresponds to the name or ID of the process.
246
247        Args:
248            name (str): Name of newly created dataset
249            description (str): Description of newly created dataset
250            process (DataPortalProcess or str): Process to run
251            params (dict): Analysis parameters
252            notifications_emails (List[str]): Notification email address(es)
253            compute_environment (str): Name or ID of compute environment to use,
254             if blank it will run in AWS
255            resume_dataset_id (str): ID of dataset to resume from, used for caching task execution.
256             It will attempt to re-use the previous output to minimize duplicate work
257
258        Returns:
259            dataset_id (str): ID of newly created dataset
260        """
261        if name is None:
262            raise DataPortalInputError("Must specify 'name' for run_analysis")
263        if process is None:
264            raise DataPortalInputError("Must specify 'process' for run_analysis")
265        if notifications_emails is None:
266            notifications_emails = []
267        if params is None:
268            params = {}
269
270        # If the process is a string, try to parse it as a process name or ID
271        process = parse_process_name_or_id(process, self._client)
272
273        if compute_environment:
274            compute_environments = self._client.compute_environments.list_environments_for_project(
275                project_id=self.project_id
276            )
277            compute_environment = next(
278                (env for env in compute_environments
279                 if env.name == compute_environment or env.id == compute_environment),
280                None
281            )
282            if compute_environment is None:
283                raise DataPortalInputError(f"Compute environment '{compute_environment}' not found")
284
285        resp = self._client.execution.run_analysis(
286            project_id=self.project_id,
287            request=RunAnalysisRequest(
288                name=name,
289                description=description,
290                process_id=process.id,
291                source_dataset_ids=[self.id],
292                params=RunAnalysisRequestParams.from_dict(params),
293                notification_emails=notifications_emails,
294                resume_dataset_id=resume_dataset_id,
295                compute_environment_id=compute_environment.id if compute_environment else None
296            )
297        )
298        return resp.id

Runs an analysis on a dataset, returns the ID of the newly created dataset.

The process can be provided as either a DataPortalProcess object, or a string which corresponds to the name or ID of the process.

Arguments:
  • name (str): Name of newly created dataset
  • description (str): Description of newly created dataset
  • process (DataPortalProcess or str): Process to run
  • params (dict): Analysis parameters
  • notifications_emails (List[str]): Notification email address(es)
  • compute_environment (str): Name or ID of compute environment to use, if blank it will run in AWS
  • resume_dataset_id (str): ID of dataset to resume from, used for caching task execution. It will attempt to re-use the previous output to minimize duplicate work
Returns:

dataset_id (str): ID of newly created dataset

class DataPortalReference(cirro.sdk.asset.DataPortalAsset):
12class DataPortalReference(DataPortalAsset):
13    """
14    Reference data object containing files which can be used for analysis in a particular project.
15    """
16    def __init__(self, ref: Reference, project_id: str, client: CirroApi):
17        """
18        Instantiate by listing the references which have been added to a particular project
19        ```python
20        from cirro import DataPortal()
21        portal = DataPortal()
22        project = portal.get_project_by_name("Project Name")
23        references = project.list_references()
24        ```
25        """
26        self._data = ref
27        self._files = [
28            DataPortalFile(File.from_file_entry(f, project_id), client) for f in ref.files
29        ]
30
31    @property
32    def files(self) -> List[DataPortalFile]:
33        """File(s) contained in the reference"""
34        return self._files
35
36    @property
37    def name(self) -> str:
38        """Reference name"""
39        return self._data.name
40
41    @property
42    def type(self) -> str:
43        """Type of reference data (e.g. genome_fasta)"""
44        return self._data.type
45
46    @property
47    def absolute_path(self):
48        if len(self._files) == 0:
49            return None
50        return self._files[0].absolute_path
51
52    def __str__(self):
53        return self.name

Reference data object containing files which can be used for analysis in a particular project.

DataPortalReference( ref: cirro_api_client.v1.models.Reference, project_id: str, client: CirroApi)
16    def __init__(self, ref: Reference, project_id: str, client: CirroApi):
17        """
18        Instantiate by listing the references which have been added to a particular project
19        ```python
20        from cirro import DataPortal()
21        portal = DataPortal()
22        project = portal.get_project_by_name("Project Name")
23        references = project.list_references()
24        ```
25        """
26        self._data = ref
27        self._files = [
28            DataPortalFile(File.from_file_entry(f, project_id), client) for f in ref.files
29        ]

Instantiate by listing the references which have been added to a particular project

from cirro import DataPortal()
portal = DataPortal()
project = portal.get_project_by_name("Project Name")
references = project.list_references()
files: List[cirro.sdk.file.DataPortalFile]
31    @property
32    def files(self) -> List[DataPortalFile]:
33        """File(s) contained in the reference"""
34        return self._files

File(s) contained in the reference

name: str
36    @property
37    def name(self) -> str:
38        """Reference name"""
39        return self._data.name

Reference name

type: str
41    @property
42    def type(self) -> str:
43        """Type of reference data (e.g. genome_fasta)"""
44        return self._data.type

Type of reference data (e.g. genome_fasta)

absolute_path
46    @property
47    def absolute_path(self):
48        if len(self._files) == 0:
49            return None
50        return self._files[0].absolute_path
class CirroApi:
 12class CirroApi:
 13    """
 14    Client for interacting directly with the Cirro API
 15    """
 16    def __init__(self, auth_info: AuthInfo = None, base_url: str = None):
 17        """
 18        Instantiates the Cirro API object
 19
 20        Args:
 21            auth_info (cirro.auth.base.AuthInfo):
 22            base_url (str): Optional base URL of the Cirro instance
 23             (if not provided, it uses the `CIRRO_BASE_URL` environment variable, or the config file)
 24
 25        Returns:
 26            Authenticated Cirro API object, which can be used to call endpoint functions.
 27
 28        Example:
 29        ```python
 30        from cirro.cirro_client import CirroApi
 31
 32        cirro = CirroApi(base_url="app.cirro.bio")
 33        print(cirro.projects.list())
 34        ```
 35        """
 36
 37        self._configuration = AppConfig(base_url=base_url)
 38        if not auth_info:
 39            auth_info = get_auth_info_from_config(self._configuration, auth_io=None)
 40
 41        self._api_client = CirroApiClient(
 42            base_url=self._configuration.rest_endpoint,
 43            auth_method=auth_info.get_auth_method(),
 44            client_name='Cirro SDK',
 45            package_name='cirro'
 46        )
 47
 48        # Init services
 49        self._file_service = FileService(self._api_client,
 50                                         enable_additional_checksum=self._configuration.enable_additional_checksum,
 51                                         transfer_retries=self._configuration.transfer_max_retries)
 52        self._dataset_service = DatasetService(self._api_client, file_service=self._file_service)
 53        self._project_service = ProjectService(self._api_client)
 54        self._process_service = ProcessService(self._api_client)
 55        self._execution_service = ExecutionService(self._api_client)
 56        self._compute_environment_service = ComputeEnvironmentService(self._api_client)
 57        self._metrics_service = MetricsService(self._api_client)
 58        self._metadata_service = MetadataService(self._api_client)
 59        self._billing_service = BillingService(self._api_client)
 60        self._references_service = ReferenceService(self._api_client)
 61        self._shares_service = ShareService(self._api_client)
 62        self._users_service = UserService(self._api_client)
 63
 64    @property
 65    def datasets(self) -> DatasetService:
 66        """
 67        Create, list, delete, and modify Datasets
 68        """
 69        return self._dataset_service
 70
 71    @property
 72    def projects(self) -> ProjectService:
 73        """
 74        Create, list, delete, and modify Projects
 75        """
 76        return self._project_service
 77
 78    @property
 79    def processes(self) -> ProcessService:
 80        """
 81        List and retrieve detailed information about Processes
 82        """
 83        return self._process_service
 84
 85    @property
 86    def execution(self) -> ExecutionService:
 87        """
 88        List, run, stop, and describe the analysis jobs (executing Processes to create new Datasets)
 89        """
 90        return self._execution_service
 91
 92    @property
 93    def compute_environments(self) -> ComputeEnvironmentService:
 94        """
 95        List and update compute environments
 96        """
 97        return self._compute_environment_service
 98
 99    @property
100    def metrics(self) -> MetricsService:
101        """
102        Project-level summary metrics
103        """
104        return self._metrics_service
105
106    @property
107    def metadata(self) -> MetadataService:
108        """
109        List and modify Sample metadata or metadata schemas
110        """
111        return self._metadata_service
112
113    @property
114    def billing(self) -> BillingService:
115        """
116        List and update billing accounts
117        """
118        return self._billing_service
119
120    @property
121    def references(self) -> ReferenceService:
122        """
123        List References and Reference types
124        """
125        return self._references_service
126
127    @property
128    def shares(self) -> ShareService:
129        """
130        List, create, update, delete, and subscribe to shares
131        """
132        return self._shares_service
133
134    @property
135    def users(self) -> UserService:
136        """
137        List and update user information
138        """
139        return self._users_service
140
141    @property
142    def file(self) -> FileService:
143        """
144        Read, download, and create file objects
145        """
146        return self._file_service
147
148    @property
149    def api_client(self) -> CirroApiClient:
150        """
151        Gets the underlying API client
152        """
153        return self._api_client
154
155    @property
156    def configuration(self) -> AppConfig:
157        """
158        Gets the configuration of the instance
159        """
160        return self._configuration

Client for interacting directly with the Cirro API

CirroApi(auth_info: cirro.auth.base.AuthInfo = None, base_url: str = None)
16    def __init__(self, auth_info: AuthInfo = None, base_url: str = None):
17        """
18        Instantiates the Cirro API object
19
20        Args:
21            auth_info (cirro.auth.base.AuthInfo):
22            base_url (str): Optional base URL of the Cirro instance
23             (if not provided, it uses the `CIRRO_BASE_URL` environment variable, or the config file)
24
25        Returns:
26            Authenticated Cirro API object, which can be used to call endpoint functions.
27
28        Example:
29        ```python
30        from cirro.cirro_client import CirroApi
31
32        cirro = CirroApi(base_url="app.cirro.bio")
33        print(cirro.projects.list())
34        ```
35        """
36
37        self._configuration = AppConfig(base_url=base_url)
38        if not auth_info:
39            auth_info = get_auth_info_from_config(self._configuration, auth_io=None)
40
41        self._api_client = CirroApiClient(
42            base_url=self._configuration.rest_endpoint,
43            auth_method=auth_info.get_auth_method(),
44            client_name='Cirro SDK',
45            package_name='cirro'
46        )
47
48        # Init services
49        self._file_service = FileService(self._api_client,
50                                         enable_additional_checksum=self._configuration.enable_additional_checksum,
51                                         transfer_retries=self._configuration.transfer_max_retries)
52        self._dataset_service = DatasetService(self._api_client, file_service=self._file_service)
53        self._project_service = ProjectService(self._api_client)
54        self._process_service = ProcessService(self._api_client)
55        self._execution_service = ExecutionService(self._api_client)
56        self._compute_environment_service = ComputeEnvironmentService(self._api_client)
57        self._metrics_service = MetricsService(self._api_client)
58        self._metadata_service = MetadataService(self._api_client)
59        self._billing_service = BillingService(self._api_client)
60        self._references_service = ReferenceService(self._api_client)
61        self._shares_service = ShareService(self._api_client)
62        self._users_service = UserService(self._api_client)

Instantiates the Cirro API object

Arguments:
  • auth_info (cirro.auth.base.AuthInfo):
  • base_url (str): Optional base URL of the Cirro instance (if not provided, it uses the CIRRO_BASE_URL environment variable, or the config file)
Returns:

Authenticated Cirro API object, which can be used to call endpoint functions.

Example:

from cirro.cirro_client import CirroApi

cirro = CirroApi(base_url="app.cirro.bio")
print(cirro.projects.list())
datasets: cirro.services.DatasetService
64    @property
65    def datasets(self) -> DatasetService:
66        """
67        Create, list, delete, and modify Datasets
68        """
69        return self._dataset_service

Create, list, delete, and modify Datasets

projects: cirro.services.ProjectService
71    @property
72    def projects(self) -> ProjectService:
73        """
74        Create, list, delete, and modify Projects
75        """
76        return self._project_service

Create, list, delete, and modify Projects

processes: cirro.services.ProcessService
78    @property
79    def processes(self) -> ProcessService:
80        """
81        List and retrieve detailed information about Processes
82        """
83        return self._process_service

List and retrieve detailed information about Processes

execution: cirro.services.ExecutionService
85    @property
86    def execution(self) -> ExecutionService:
87        """
88        List, run, stop, and describe the analysis jobs (executing Processes to create new Datasets)
89        """
90        return self._execution_service

List, run, stop, and describe the analysis jobs (executing Processes to create new Datasets)

compute_environments: cirro.services.ComputeEnvironmentService
92    @property
93    def compute_environments(self) -> ComputeEnvironmentService:
94        """
95        List and update compute environments
96        """
97        return self._compute_environment_service

List and update compute environments

metrics: cirro.services.MetricsService
 99    @property
100    def metrics(self) -> MetricsService:
101        """
102        Project-level summary metrics
103        """
104        return self._metrics_service

Project-level summary metrics

metadata: cirro.services.MetadataService
106    @property
107    def metadata(self) -> MetadataService:
108        """
109        List and modify Sample metadata or metadata schemas
110        """
111        return self._metadata_service

List and modify Sample metadata or metadata schemas

billing: cirro.services.BillingService
113    @property
114    def billing(self) -> BillingService:
115        """
116        List and update billing accounts
117        """
118        return self._billing_service

List and update billing accounts

references: cirro.services.ReferenceService
120    @property
121    def references(self) -> ReferenceService:
122        """
123        List References and Reference types
124        """
125        return self._references_service

List References and Reference types

shares: cirro.services.ShareService
127    @property
128    def shares(self) -> ShareService:
129        """
130        List, create, update, delete, and subscribe to shares
131        """
132        return self._shares_service

List, create, update, delete, and subscribe to shares

users: cirro.services.UserService
134    @property
135    def users(self) -> UserService:
136        """
137        List and update user information
138        """
139        return self._users_service

List and update user information

file: cirro.services.FileService
141    @property
142    def file(self) -> FileService:
143        """
144        Read, download, and create file objects
145        """
146        return self._file_service

Read, download, and create file objects

api_client: cirro_api_client.CirroApiClient
148    @property
149    def api_client(self) -> CirroApiClient:
150        """
151        Gets the underlying API client
152        """
153        return self._api_client

Gets the underlying API client

configuration: cirro.config.AppConfig
155    @property
156    def configuration(self) -> AppConfig:
157        """
158        Gets the configuration of the instance
159        """
160        return self._configuration

Gets the configuration of the instance