cirro

 1import cirro.file_utils  # noqa
 2from cirro.cirro_client import CirroApi
 3from cirro.sdk.dataset import DataPortalDataset
 4from cirro.sdk.portal import DataPortal
 5from cirro.sdk.process import DataPortalProcess
 6from cirro.sdk.project import DataPortalProject
 7from cirro.sdk.reference import DataPortalReference
 8
 9__all__ = [
10    'DataPortal',
11    'DataPortalProject',
12    'DataPortalProcess',
13    'DataPortalDataset',
14    'DataPortalReference',
15    'CirroApi',
16    'file_utils'
17]
class DataPortal:
 12class DataPortal:
 13    """
 14    Helper functions for exploring the Projects, Datasets, Samples, and Files
 15    available in the Data Portal.
 16    """
 17
 18    def __init__(self, client: CirroApi = None):
 19        """Set up the DataPortal object, establishing an authenticated connection."""
 20
 21        if client is not None:
 22            self._client = client
 23
 24        # Set up default client if not provided
 25        else:
 26            self._client = CirroApi()
 27
 28    def list_projects(self) -> DataPortalProjects:
 29        """List all the projects available in the Data Portal."""
 30
 31        return DataPortalProjects(
 32            [
 33                DataPortalProject(proj, self._client)
 34                for proj in self._client.projects.list()
 35            ]
 36        )
 37
 38    def get_project_by_name(self, name: str = None) -> DataPortalProject:
 39        """Return the project with the specified name."""
 40
 41        return self.list_projects().get_by_name(name)
 42
 43    def get_project_by_id(self, _id: str = None) -> DataPortalProject:
 44        """Return the project with the specified id."""
 45
 46        return self.list_projects().get_by_id(_id)
 47
 48    def get_dataset(self, project: str = None, dataset: str = None) -> DataPortalDataset:
 49        """
 50        Return a dataset identified by ID or name.
 51
 52        Args:
 53            project (str): ID or name of project
 54            dataset (str): ID or name of dataset
 55
 56        Returns:
 57            `cirro.sdk.dataset.DataPortalDataset`
 58
 59            ```python
 60            from cirro import DataPortal()
 61            portal = DataPortal()
 62            dataset = portal.get_dataset(
 63                project="id-or-name-of-project",
 64                dataset="id-or-name-of-dataset"
 65            )
 66            ```
 67        """
 68        try:
 69            project: DataPortalProject = self.get_project_by_id(project)
 70        except DataPortalAssetNotFound:
 71            project: DataPortalProject = self.get_project_by_name(project)
 72
 73        try:
 74            return project.get_dataset_by_id(dataset)
 75        except DataPortalAssetNotFound:
 76            return project.get_dataset_by_name(dataset)
 77
 78    def list_processes(self, ingest=False) -> DataPortalProcesses:
 79        """
 80        List all the processes available in the Data Portal.
 81        By default, only list non-ingest processes (those which can be run on existing datasets).
 82        To list the processes which can be used to upload datasets, use `ingest = True`.
 83
 84        Args:
 85            ingest (bool): If True, only list those processes which can be used to ingest datasets directly
 86        """
 87
 88        return DataPortalProcesses(
 89            [
 90                DataPortalProcess(p, self._client)
 91                for p in self._client.processes.list()
 92                if not ingest or p.executor == Executor.INGEST
 93            ]
 94        )
 95
 96    def get_process_by_name(self, name: str, ingest=False) -> DataPortalProcess:
 97        """
 98        Return the process with the specified name.
 99
100        Args:
101            name (str): Name of process
102        """
103
104        return self.list_processes(ingest=ingest).get_by_name(name)
105
106    def get_process_by_id(self, id: str, ingest=False) -> DataPortalProcess:
107        """
108        Return the process with the specified id
109
110        Args:
111            id (str): ID of process
112        """
113
114        return self.list_processes(ingest=ingest).get_by_id(id)
115
116    def list_reference_types(self) -> DataPortalReferenceTypes:
117        """
118        Return the list of all available reference types
119        """
120
121        return DataPortalReferenceTypes(
122            [
123                DataPortalReferenceType(ref)
124                for ref in self._client.references.get_types()
125            ]
126        )

Helper functions for exploring the Projects, Datasets, Samples, and Files available in the Data Portal.

DataPortal(client: CirroApi = None)
18    def __init__(self, client: CirroApi = None):
19        """Set up the DataPortal object, establishing an authenticated connection."""
20
21        if client is not None:
22            self._client = client
23
24        # Set up default client if not provided
25        else:
26            self._client = CirroApi()

Set up the DataPortal object, establishing an authenticated connection.

def list_projects(self) -> cirro.sdk.project.DataPortalProjects:
28    def list_projects(self) -> DataPortalProjects:
29        """List all the projects available in the Data Portal."""
30
31        return DataPortalProjects(
32            [
33                DataPortalProject(proj, self._client)
34                for proj in self._client.projects.list()
35            ]
36        )

List all the projects available in the Data Portal.

def get_project_by_name(self, name: str = None) -> DataPortalProject:
38    def get_project_by_name(self, name: str = None) -> DataPortalProject:
39        """Return the project with the specified name."""
40
41        return self.list_projects().get_by_name(name)

Return the project with the specified name.

def get_project_by_id(self, _id: str = None) -> DataPortalProject:
43    def get_project_by_id(self, _id: str = None) -> DataPortalProject:
44        """Return the project with the specified id."""
45
46        return self.list_projects().get_by_id(_id)

Return the project with the specified id.

def get_dataset( self, project: str = None, dataset: str = None) -> DataPortalDataset:
48    def get_dataset(self, project: str = None, dataset: str = None) -> DataPortalDataset:
49        """
50        Return a dataset identified by ID or name.
51
52        Args:
53            project (str): ID or name of project
54            dataset (str): ID or name of dataset
55
56        Returns:
57            `cirro.sdk.dataset.DataPortalDataset`
58
59            ```python
60            from cirro import DataPortal()
61            portal = DataPortal()
62            dataset = portal.get_dataset(
63                project="id-or-name-of-project",
64                dataset="id-or-name-of-dataset"
65            )
66            ```
67        """
68        try:
69            project: DataPortalProject = self.get_project_by_id(project)
70        except DataPortalAssetNotFound:
71            project: DataPortalProject = self.get_project_by_name(project)
72
73        try:
74            return project.get_dataset_by_id(dataset)
75        except DataPortalAssetNotFound:
76            return project.get_dataset_by_name(dataset)

Return a dataset identified by ID or name.

Arguments:
  • project (str): ID or name of project
  • dataset (str): ID or name of dataset
Returns:

DataPortalDataset

from cirro import DataPortal()
portal = DataPortal()
dataset = portal.get_dataset(
    project="id-or-name-of-project",
    dataset="id-or-name-of-dataset"
)
def list_processes(self, ingest=False) -> cirro.sdk.process.DataPortalProcesses:
78    def list_processes(self, ingest=False) -> DataPortalProcesses:
79        """
80        List all the processes available in the Data Portal.
81        By default, only list non-ingest processes (those which can be run on existing datasets).
82        To list the processes which can be used to upload datasets, use `ingest = True`.
83
84        Args:
85            ingest (bool): If True, only list those processes which can be used to ingest datasets directly
86        """
87
88        return DataPortalProcesses(
89            [
90                DataPortalProcess(p, self._client)
91                for p in self._client.processes.list()
92                if not ingest or p.executor == Executor.INGEST
93            ]
94        )

List all the processes available in the Data Portal. By default, only list non-ingest processes (those which can be run on existing datasets). To list the processes which can be used to upload datasets, use ingest = True.

Arguments:
  • ingest (bool): If True, only list those processes which can be used to ingest datasets directly
def get_process_by_name(self, name: str, ingest=False) -> DataPortalProcess:
 96    def get_process_by_name(self, name: str, ingest=False) -> DataPortalProcess:
 97        """
 98        Return the process with the specified name.
 99
100        Args:
101            name (str): Name of process
102        """
103
104        return self.list_processes(ingest=ingest).get_by_name(name)

Return the process with the specified name.

Arguments:
  • name (str): Name of process
def get_process_by_id(self, id: str, ingest=False) -> DataPortalProcess:
106    def get_process_by_id(self, id: str, ingest=False) -> DataPortalProcess:
107        """
108        Return the process with the specified id
109
110        Args:
111            id (str): ID of process
112        """
113
114        return self.list_processes(ingest=ingest).get_by_id(id)

Return the process with the specified id

Arguments:
  • id (str): ID of process
def list_reference_types(self) -> cirro.sdk.reference_type.DataPortalReferenceTypes:
116    def list_reference_types(self) -> DataPortalReferenceTypes:
117        """
118        Return the list of all available reference types
119        """
120
121        return DataPortalReferenceTypes(
122            [
123                DataPortalReferenceType(ref)
124                for ref in self._client.references.get_types()
125            ]
126        )

Return the list of all available reference types

class DataPortalProject(cirro.sdk.asset.DataPortalAsset):
 19class DataPortalProject(DataPortalAsset):
 20    """
 21    Projects in the Data Portal contain collections of Datasets.
 22    Users are granted permissions at the project-level, allowing them
 23    to view and/or modify all the datasets in that collection.
 24    """
 25    def __init__(self, proj: Project, client: CirroApi):
 26        """
 27        Instantiate with helper method
 28
 29        ```python
 30        from cirro import DataPortal()
 31        portal = DataPortal()
 32        project = portal.get_project_by_name("Project Name")
 33        ```
 34
 35        """
 36        self._data = proj
 37        self._client = client
 38
 39    @property
 40    def id(self) -> str:
 41        """
 42        Unique identifier
 43        """
 44        return self._data.id
 45
 46    @property
 47    def name(self) -> str:
 48        """
 49        Readable name
 50        """
 51        return self._data.name
 52
 53    @property
 54    def description(self) -> str:
 55        """
 56        Longer description of the project
 57        """
 58        return self._data.description
 59
 60    def __str__(self):
 61        """Control how the Project is rendered as a string."""
 62
 63        return '\n'.join([
 64            f"{i.title()}: {self.__getattribute__(i)}"
 65            for i in ['name', 'id', 'description']
 66        ])
 67
 68    @cache
 69    def _get_datasets(self) -> List[Dataset]:
 70        return self._client.datasets.list(self.id)
 71
 72    def list_datasets(self, force_refresh=False) -> DataPortalDatasets:
 73        """List all the datasets available in the project."""
 74        if force_refresh:
 75            self._get_datasets.cache_clear()
 76
 77        return DataPortalDatasets(
 78            [
 79                DataPortalDataset(d, self._client)
 80                for d in self._get_datasets()
 81            ]
 82        )
 83
 84    def get_dataset_by_name(self, name: str, force_refresh=False) -> DataPortalDataset:
 85        """Return the dataset with the specified name."""
 86        if force_refresh:
 87            self._get_datasets.cache_clear()
 88
 89        dataset = next((d for d in self._get_datasets() if d.name == name), None)
 90        if dataset is None:
 91            raise DataPortalAssetNotFound(f'Dataset with name {name} not found')
 92        return self.get_dataset_by_id(dataset.id)
 93
 94    def get_dataset_by_id(self, _id: str = None) -> DataPortalDataset:
 95        """Return the dataset with the specified id."""
 96
 97        dataset = self._client.datasets.get(project_id=self.id, dataset_id=_id)
 98        if dataset is None:
 99            raise DataPortalAssetNotFound(f'Dataset with ID {_id} not found')
100        return DataPortalDataset(dataset, self._client)
101
102    def list_references(self, reference_type: str = None) -> DataPortalReferences:
103        """
104        List the references available in a project.
105        Optionally filter to references of a particular type (identified by name)
106        """
107
108        # Get the complete list of references which are available
109        reference_types = DataPortalReferenceTypes(
110            [
111                DataPortalReferenceType(ref)
112                for ref in self._client.references.get_types()
113            ]
114        )
115
116        # If a particular name was specified
117        if reference_type is not None:
118            reference_types = reference_types.filter_by_pattern(reference_type)
119            if len(reference_types) == 0:
120                msg = f"Could not find any reference types with the name {reference_type}"
121                raise DataPortalAssetNotFound(msg)
122
123        return DataPortalReferences(
124            [
125                DataPortalReference(ref, project_id=self.id, client=self._client)
126                for ref in self._client.references.get_for_project(
127                    self.id
128                )
129                if reference_type is None or ref.type == reference_type
130            ]
131        )
132
133    def get_reference_by_name(self, name: str = None, ref_type: str = None) -> DataPortalReference:
134        """Return the reference of a particular type with the specified name."""
135
136        if name is None:
137            raise DataPortalInputError("Must specify the reference name")
138
139        return self.list_references(ref_type).get_by_name(name)
140
141    def upload_dataset(
142        self,
143        name: str = None,
144        description='',
145        process: Union[DataPortalProcess, str] = None,
146        upload_folder: str = None,
147        files: list = None
148    ):
149        """
150        Upload a set of files to the Data Portal, creating a new dataset.
151
152        If the files parameter is not provided, it will upload all files in the upload folder
153
154        Args:
155            name (str): Name of newly created dataset
156            description (str): Description of newly created dataset
157            process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object
158            upload_folder (str): Folder containing files to upload
159            files (List[str]): Optional subset of files to upload from the folder
160        """
161
162        if name is None:
163            raise DataPortalInputError("Must provide name for new dataset")
164        if process is None:
165            raise DataPortalInputError("Must provide the process which is used for ingest")
166        if upload_folder is None:
167            raise DataPortalInputError("Must provide upload_folder -- folder containing files to upload")
168
169        # Parse the process provided by the user
170        process = parse_process_name_or_id(process, self._client)
171
172        # If no files were provided
173        if files is None:
174            # Get the list of files in the upload folder
175            files = get_files_in_directory(upload_folder)
176
177        if files is None or len(files) == 0:
178            raise RuntimeWarning("No files to upload, exiting")
179
180        # Make sure that the files match the expected pattern
181        self._client.processes.check_dataset_files(files, process.id, upload_folder)
182
183        # Create the ingest process request
184        dataset_create_request = UploadDatasetRequest(
185            process_id=process.id,
186            name=name,
187            description=description,
188            expected_files=files
189        )
190
191        # Get the response
192        create_response = self._client.datasets.create(project_id=self.id,
193                                                       upload_request=dataset_create_request)
194
195        # Upload the files
196        self._client.datasets.upload_files(
197            project_id=self.id,
198            dataset_id=create_response.id,
199            local_directory=upload_folder,
200            files=files
201        )
202
203        # Return the dataset which was created, which might take a second to update
204        max_attempts = 5
205        for attempt in range(max_attempts):
206            try:
207                return self.get_dataset_by_id(create_response.id)
208            except DataPortalAssetNotFound as e:
209                if attempt == max_attempts - 1:
210                    raise e
211                else:
212                    sleep(2)

Projects in the Data Portal contain collections of Datasets. Users are granted permissions at the project-level, allowing them to view and/or modify all the datasets in that collection.

DataPortalProject( proj: cirro_api_client.v1.models.Project, client: CirroApi)
25    def __init__(self, proj: Project, client: CirroApi):
26        """
27        Instantiate with helper method
28
29        ```python
30        from cirro import DataPortal()
31        portal = DataPortal()
32        project = portal.get_project_by_name("Project Name")
33        ```
34
35        """
36        self._data = proj
37        self._client = client

Instantiate with helper method

from cirro import DataPortal()
portal = DataPortal()
project = portal.get_project_by_name("Project Name")
id: str
39    @property
40    def id(self) -> str:
41        """
42        Unique identifier
43        """
44        return self._data.id

Unique identifier

name: str
46    @property
47    def name(self) -> str:
48        """
49        Readable name
50        """
51        return self._data.name

Readable name

description: str
53    @property
54    def description(self) -> str:
55        """
56        Longer description of the project
57        """
58        return self._data.description

Longer description of the project

def list_datasets(self, force_refresh=False) -> cirro.sdk.dataset.DataPortalDatasets:
72    def list_datasets(self, force_refresh=False) -> DataPortalDatasets:
73        """List all the datasets available in the project."""
74        if force_refresh:
75            self._get_datasets.cache_clear()
76
77        return DataPortalDatasets(
78            [
79                DataPortalDataset(d, self._client)
80                for d in self._get_datasets()
81            ]
82        )

List all the datasets available in the project.

def get_dataset_by_name( self, name: str, force_refresh=False) -> DataPortalDataset:
84    def get_dataset_by_name(self, name: str, force_refresh=False) -> DataPortalDataset:
85        """Return the dataset with the specified name."""
86        if force_refresh:
87            self._get_datasets.cache_clear()
88
89        dataset = next((d for d in self._get_datasets() if d.name == name), None)
90        if dataset is None:
91            raise DataPortalAssetNotFound(f'Dataset with name {name} not found')
92        return self.get_dataset_by_id(dataset.id)

Return the dataset with the specified name.

def get_dataset_by_id(self, _id: str = None) -> DataPortalDataset:
 94    def get_dataset_by_id(self, _id: str = None) -> DataPortalDataset:
 95        """Return the dataset with the specified id."""
 96
 97        dataset = self._client.datasets.get(project_id=self.id, dataset_id=_id)
 98        if dataset is None:
 99            raise DataPortalAssetNotFound(f'Dataset with ID {_id} not found')
100        return DataPortalDataset(dataset, self._client)

Return the dataset with the specified id.

def list_references( self, reference_type: str = None) -> cirro.sdk.reference.DataPortalReferences:
102    def list_references(self, reference_type: str = None) -> DataPortalReferences:
103        """
104        List the references available in a project.
105        Optionally filter to references of a particular type (identified by name)
106        """
107
108        # Get the complete list of references which are available
109        reference_types = DataPortalReferenceTypes(
110            [
111                DataPortalReferenceType(ref)
112                for ref in self._client.references.get_types()
113            ]
114        )
115
116        # If a particular name was specified
117        if reference_type is not None:
118            reference_types = reference_types.filter_by_pattern(reference_type)
119            if len(reference_types) == 0:
120                msg = f"Could not find any reference types with the name {reference_type}"
121                raise DataPortalAssetNotFound(msg)
122
123        return DataPortalReferences(
124            [
125                DataPortalReference(ref, project_id=self.id, client=self._client)
126                for ref in self._client.references.get_for_project(
127                    self.id
128                )
129                if reference_type is None or ref.type == reference_type
130            ]
131        )

List the references available in a project. Optionally filter to references of a particular type (identified by name)

def get_reference_by_name( self, name: str = None, ref_type: str = None) -> DataPortalReference:
133    def get_reference_by_name(self, name: str = None, ref_type: str = None) -> DataPortalReference:
134        """Return the reference of a particular type with the specified name."""
135
136        if name is None:
137            raise DataPortalInputError("Must specify the reference name")
138
139        return self.list_references(ref_type).get_by_name(name)

Return the reference of a particular type with the specified name.

def upload_dataset( self, name: str = None, description='', process: Union[DataPortalProcess, str] = None, upload_folder: str = None, files: list = None):
141    def upload_dataset(
142        self,
143        name: str = None,
144        description='',
145        process: Union[DataPortalProcess, str] = None,
146        upload_folder: str = None,
147        files: list = None
148    ):
149        """
150        Upload a set of files to the Data Portal, creating a new dataset.
151
152        If the files parameter is not provided, it will upload all files in the upload folder
153
154        Args:
155            name (str): Name of newly created dataset
156            description (str): Description of newly created dataset
157            process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object
158            upload_folder (str): Folder containing files to upload
159            files (List[str]): Optional subset of files to upload from the folder
160        """
161
162        if name is None:
163            raise DataPortalInputError("Must provide name for new dataset")
164        if process is None:
165            raise DataPortalInputError("Must provide the process which is used for ingest")
166        if upload_folder is None:
167            raise DataPortalInputError("Must provide upload_folder -- folder containing files to upload")
168
169        # Parse the process provided by the user
170        process = parse_process_name_or_id(process, self._client)
171
172        # If no files were provided
173        if files is None:
174            # Get the list of files in the upload folder
175            files = get_files_in_directory(upload_folder)
176
177        if files is None or len(files) == 0:
178            raise RuntimeWarning("No files to upload, exiting")
179
180        # Make sure that the files match the expected pattern
181        self._client.processes.check_dataset_files(files, process.id, upload_folder)
182
183        # Create the ingest process request
184        dataset_create_request = UploadDatasetRequest(
185            process_id=process.id,
186            name=name,
187            description=description,
188            expected_files=files
189        )
190
191        # Get the response
192        create_response = self._client.datasets.create(project_id=self.id,
193                                                       upload_request=dataset_create_request)
194
195        # Upload the files
196        self._client.datasets.upload_files(
197            project_id=self.id,
198            dataset_id=create_response.id,
199            local_directory=upload_folder,
200            files=files
201        )
202
203        # Return the dataset which was created, which might take a second to update
204        max_attempts = 5
205        for attempt in range(max_attempts):
206            try:
207                return self.get_dataset_by_id(create_response.id)
208            except DataPortalAssetNotFound as e:
209                if attempt == max_attempts - 1:
210                    raise e
211                else:
212                    sleep(2)

Upload a set of files to the Data Portal, creating a new dataset.

If the files parameter is not provided, it will upload all files in the upload folder

Arguments:
  • name (str): Name of newly created dataset
  • description (str): Description of newly created dataset
  • process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object
  • upload_folder (str): Folder containing files to upload
  • files (List[str]): Optional subset of files to upload from the folder
class DataPortalProcess(cirro.sdk.asset.DataPortalAsset):
11class DataPortalProcess(DataPortalAsset):
12    """Helper functions for interacting with analysis processes."""
13    _data: Process
14
15    def __init__(self, process: Process, client: CirroApi):
16        """
17        Instantiate with helper method
18
19        ```python
20        from cirro import DataPortal()
21        portal = DataPortal()
22        process = portal.get_process_by_name("Process Name")
23        ```
24        """
25        self._data = process
26        self._client = client
27
28    @property
29    def id(self) -> str:
30        """Unique identifier"""
31        return self._data.id
32
33    @property
34    def name(self) -> str:
35        """Readable name"""
36        return self._data.name
37
38    @property
39    def description(self) -> str:
40        """Longer description of process"""
41        return self._data.description
42
43    @property
44    def child_process_ids(self) -> List[str]:
45        """List of processes which can be run on the output of this process"""
46        return self._data.child_process_ids
47
48    @property
49    def executor(self) -> Executor:
50        """INGEST, CROMWELL, or NEXTFLOW"""
51        return self._data.executor
52
53    @property
54    def documentation_url(self) -> str:
55        """Documentation URL"""
56        return self._data.documentation_url
57
58    @property
59    def file_requirements_message(self) -> str:
60        """Description of files required for INGEST processes"""
61        return self._data.file_requirements_message
62
63    def __str__(self):
64        return '\n'.join([
65            f"{i.title()}: {self.__getattribute__(i)}"
66            for i in ['name', 'id', 'description']
67        ])
68
69    def get_parameter_spec(self) -> ParameterSpecification:
70        """
71        Gets a specification used to describe the parameters used in the process.
72        """
73        return self._client.processes.get_parameter_spec(self.id)

Helper functions for interacting with analysis processes.

DataPortalProcess( process: cirro_api_client.v1.models.Process, client: CirroApi)
15    def __init__(self, process: Process, client: CirroApi):
16        """
17        Instantiate with helper method
18
19        ```python
20        from cirro import DataPortal()
21        portal = DataPortal()
22        process = portal.get_process_by_name("Process Name")
23        ```
24        """
25        self._data = process
26        self._client = client

Instantiate with helper method

from cirro import DataPortal()
portal = DataPortal()
process = portal.get_process_by_name("Process Name")
id: str
28    @property
29    def id(self) -> str:
30        """Unique identifier"""
31        return self._data.id

Unique identifier

name: str
33    @property
34    def name(self) -> str:
35        """Readable name"""
36        return self._data.name

Readable name

description: str
38    @property
39    def description(self) -> str:
40        """Longer description of process"""
41        return self._data.description

Longer description of process

child_process_ids: List[str]
43    @property
44    def child_process_ids(self) -> List[str]:
45        """List of processes which can be run on the output of this process"""
46        return self._data.child_process_ids

List of processes which can be run on the output of this process

executor: cirro_api_client.v1.models.Executor
48    @property
49    def executor(self) -> Executor:
50        """INGEST, CROMWELL, or NEXTFLOW"""
51        return self._data.executor

INGEST, CROMWELL, or NEXTFLOW

documentation_url: str
53    @property
54    def documentation_url(self) -> str:
55        """Documentation URL"""
56        return self._data.documentation_url

Documentation URL

file_requirements_message: str
58    @property
59    def file_requirements_message(self) -> str:
60        """Description of files required for INGEST processes"""
61        return self._data.file_requirements_message

Description of files required for INGEST processes

def get_parameter_spec(self) -> cirro.models.form_specification.ParameterSpecification:
69    def get_parameter_spec(self) -> ParameterSpecification:
70        """
71        Gets a specification used to describe the parameters used in the process.
72        """
73        return self._client.processes.get_parameter_spec(self.id)

Gets a specification used to describe the parameters used in the process.

class DataPortalDataset(cirro.sdk.asset.DataPortalAsset):
 17class DataPortalDataset(DataPortalAsset):
 18    """
 19    Datasets in the Data Portal are collections of files which have
 20    either been uploaded directly, or which have been output by
 21    an analysis pipeline or notebook.
 22    """
 23
 24    def __init__(self, dataset: Union[Dataset, DatasetDetail], client: CirroApi):
 25        """
 26        Instantiate a dataset object
 27
 28        Should be invoked from a top-level constructor, for example:
 29
 30        ```python
 31        from cirro import DataPortal()
 32        portal = DataPortal()
 33        dataset = portal.get_dataset(
 34            project="id-or-name-of-project",
 35            dataset="id-or-name-of-dataset"
 36        )
 37        ```
 38
 39        """
 40        assert dataset.project_id is not None, "Must provide dataset with project_id attribute"
 41        self._data = dataset
 42        self._files: Optional[List[FileEntry]] = None
 43        self._client = client
 44
 45    @property
 46    def id(self) -> str:
 47        """Unique identifier for the dataset"""
 48        return self._data.id
 49
 50    @property
 51    def name(self) -> str:
 52        """Editible name for the dataset"""
 53        return self._data.name
 54
 55    @property
 56    def description(self) -> str:
 57        """Longer name for the dataset"""
 58        return self._data.description
 59
 60    @property
 61    def process_id(self) -> str:
 62        """Unique ID of process used to create the dataset"""
 63        return self._data.process_id
 64
 65    @property
 66    def process(self) -> ProcessDetail:
 67        """
 68        Object representing the process used to create the dataset
 69        """
 70        return self._client.processes.get(self.process_id)
 71
 72    @property
 73    def project_id(self) -> str:
 74        """ID of the project containing the dataset"""
 75        return self._data.project_id
 76
 77    @property
 78    def status(self) -> Status:
 79        """
 80        Status of the dataset
 81        """
 82        return self._data.status
 83
 84    @property
 85    def source_dataset_ids(self) -> List[str]:
 86        """IDs of the datasets used as sources for this dataset (if any)"""
 87        return self._data.source_dataset_ids
 88
 89    @property
 90    def source_datasets(self) -> List['DataPortalDataset']:
 91        """
 92        Objects representing the datasets used as sources for this dataset (if any)
 93        """
 94        return [
 95            DataPortalDataset(
 96                dataset=self._client.datasets.get(project_id=self.project_id, dataset_id=dataset_id),
 97                client=self._client
 98            )
 99            for dataset_id in self.source_dataset_ids
100        ]
101
102    @property
103    def params(self) -> DatasetDetailParams:
104        """
105        Parameters used to generate the dataset
106        """
107        return self._get_detail().params
108
109    @property
110    def info(self) -> DatasetDetailInfo:
111        """
112        Detailed information about the dataset
113        """
114        return self._get_detail().info
115
116    @property
117    def tags(self) -> List[Tag]:
118        """
119        Tags applied to the dataset
120        """
121        return self._data.tags
122
123    @property
124    def created_by(self) -> str:
125        """User who created the dataset"""
126        return self._data.created_by
127
128    @property
129    def created_at(self) -> datetime.datetime:
130        """Timestamp of dataset creation"""
131        return self._data.created_at
132
133    def _get_detail(self):
134        if not isinstance(self._data, DatasetDetail):
135            self._data = self._client.datasets.get(project_id=self.project_id, dataset_id=self.id)
136        return self._data
137
138    def __str__(self):
139        return '\n'.join([
140            f"{i.title()}: {self.__getattribute__(i)}"
141            for i in ['name', 'id', 'description', 'status']
142        ])
143
144    def list_files(self) -> DataPortalFiles:
145        """
146        Return the list of files which make up the dataset.
147        """
148        if not self._files:
149            self._files = DataPortalFiles(
150                [
151                    DataPortalFile(file=file, client=self._client)
152                    for file in self._client.datasets.get_file_listing(
153                        project_id=self.project_id,
154                        dataset_id=self.id
155                    )
156                ]
157            )
158        return self._files
159
160    def download_files(self, download_location: str = None) -> None:
161        """
162        Download all the files from the dataset to a local directory.
163
164        Args:
165            download_location (str): Path to local directory
166        """
167
168        # Alias for internal method
169        self.list_files().download(download_location)
170
171    def run_analysis(
172            self,
173            name: str = None,
174            description: str = "",
175            process: Union[DataPortalProcess, str] = None,
176            params=None,
177            notifications_emails=None
178    ) -> str:
179        """
180        Runs an analysis on a dataset, returns the ID of the newly created dataset.
181
182        The process can be provided as either a DataPortalProcess object,
183        or a string which corresponds to the name or ID of the process.
184
185        Args:
186            name (str): Name of newly created dataset
187            description (str): Description of newly created dataset
188            process (DataPortalProcess or str): Process to run
189            params (dict): Analysis parameters
190            notifications_emails (List[str]): Notification email address(es)
191
192        Returns:
193            dataset_id (str): ID of newly created dataset
194        """
195        if name is None:
196            raise DataPortalInputError("Must specify 'name' for run_analysis")
197        if process is None:
198            raise DataPortalInputError("Must specify 'process' for run_analysis")
199        if notifications_emails is None:
200            notifications_emails = []
201        if params is None:
202            params = {}
203
204        # If the process is a string, try to parse it as a process name or ID
205        process = parse_process_name_or_id(process, self._client)
206
207        resp = self._client.execution.run_analysis(
208            project_id=self.project_id,
209            request=RunAnalysisRequest(
210                name=name,
211                description=description,
212                process_id=process.id,
213                source_dataset_ids=[self.id],
214                params=RunAnalysisRequestParams.from_dict(params),
215                notification_emails=notifications_emails
216            )
217        )
218        return resp.id

Datasets in the Data Portal are collections of files which have either been uploaded directly, or which have been output by an analysis pipeline or notebook.

DataPortalDataset( dataset: Union[cirro_api_client.v1.models.Dataset, cirro_api_client.v1.models.DatasetDetail], client: CirroApi)
24    def __init__(self, dataset: Union[Dataset, DatasetDetail], client: CirroApi):
25        """
26        Instantiate a dataset object
27
28        Should be invoked from a top-level constructor, for example:
29
30        ```python
31        from cirro import DataPortal()
32        portal = DataPortal()
33        dataset = portal.get_dataset(
34            project="id-or-name-of-project",
35            dataset="id-or-name-of-dataset"
36        )
37        ```
38
39        """
40        assert dataset.project_id is not None, "Must provide dataset with project_id attribute"
41        self._data = dataset
42        self._files: Optional[List[FileEntry]] = None
43        self._client = client

Instantiate a dataset object

Should be invoked from a top-level constructor, for example:

from cirro import DataPortal()
portal = DataPortal()
dataset = portal.get_dataset(
    project="id-or-name-of-project",
    dataset="id-or-name-of-dataset"
)
id: str
45    @property
46    def id(self) -> str:
47        """Unique identifier for the dataset"""
48        return self._data.id

Unique identifier for the dataset

name: str
50    @property
51    def name(self) -> str:
52        """Editible name for the dataset"""
53        return self._data.name

Editible name for the dataset

description: str
55    @property
56    def description(self) -> str:
57        """Longer name for the dataset"""
58        return self._data.description

Longer name for the dataset

process_id: str
60    @property
61    def process_id(self) -> str:
62        """Unique ID of process used to create the dataset"""
63        return self._data.process_id

Unique ID of process used to create the dataset

65    @property
66    def process(self) -> ProcessDetail:
67        """
68        Object representing the process used to create the dataset
69        """
70        return self._client.processes.get(self.process_id)

Object representing the process used to create the dataset

project_id: str
72    @property
73    def project_id(self) -> str:
74        """ID of the project containing the dataset"""
75        return self._data.project_id

ID of the project containing the dataset

status: cirro_api_client.v1.models.Status
77    @property
78    def status(self) -> Status:
79        """
80        Status of the dataset
81        """
82        return self._data.status

Status of the dataset

source_dataset_ids: List[str]
84    @property
85    def source_dataset_ids(self) -> List[str]:
86        """IDs of the datasets used as sources for this dataset (if any)"""
87        return self._data.source_dataset_ids

IDs of the datasets used as sources for this dataset (if any)

source_datasets: List[DataPortalDataset]
 89    @property
 90    def source_datasets(self) -> List['DataPortalDataset']:
 91        """
 92        Objects representing the datasets used as sources for this dataset (if any)
 93        """
 94        return [
 95            DataPortalDataset(
 96                dataset=self._client.datasets.get(project_id=self.project_id, dataset_id=dataset_id),
 97                client=self._client
 98            )
 99            for dataset_id in self.source_dataset_ids
100        ]

Objects representing the datasets used as sources for this dataset (if any)

102    @property
103    def params(self) -> DatasetDetailParams:
104        """
105        Parameters used to generate the dataset
106        """
107        return self._get_detail().params

Parameters used to generate the dataset

109    @property
110    def info(self) -> DatasetDetailInfo:
111        """
112        Detailed information about the dataset
113        """
114        return self._get_detail().info

Detailed information about the dataset

tags: List[cirro_api_client.v1.models.Tag]
116    @property
117    def tags(self) -> List[Tag]:
118        """
119        Tags applied to the dataset
120        """
121        return self._data.tags

Tags applied to the dataset

created_by: str
123    @property
124    def created_by(self) -> str:
125        """User who created the dataset"""
126        return self._data.created_by

User who created the dataset

created_at: datetime.datetime
128    @property
129    def created_at(self) -> datetime.datetime:
130        """Timestamp of dataset creation"""
131        return self._data.created_at

Timestamp of dataset creation

def list_files(self) -> cirro.sdk.file.DataPortalFiles:
144    def list_files(self) -> DataPortalFiles:
145        """
146        Return the list of files which make up the dataset.
147        """
148        if not self._files:
149            self._files = DataPortalFiles(
150                [
151                    DataPortalFile(file=file, client=self._client)
152                    for file in self._client.datasets.get_file_listing(
153                        project_id=self.project_id,
154                        dataset_id=self.id
155                    )
156                ]
157            )
158        return self._files

Return the list of files which make up the dataset.

def download_files(self, download_location: str = None) -> None:
160    def download_files(self, download_location: str = None) -> None:
161        """
162        Download all the files from the dataset to a local directory.
163
164        Args:
165            download_location (str): Path to local directory
166        """
167
168        # Alias for internal method
169        self.list_files().download(download_location)

Download all the files from the dataset to a local directory.

Arguments:
  • download_location (str): Path to local directory
def run_analysis( self, name: str = None, description: str = '', process: Union[DataPortalProcess, str] = None, params=None, notifications_emails=None) -> str:
171    def run_analysis(
172            self,
173            name: str = None,
174            description: str = "",
175            process: Union[DataPortalProcess, str] = None,
176            params=None,
177            notifications_emails=None
178    ) -> str:
179        """
180        Runs an analysis on a dataset, returns the ID of the newly created dataset.
181
182        The process can be provided as either a DataPortalProcess object,
183        or a string which corresponds to the name or ID of the process.
184
185        Args:
186            name (str): Name of newly created dataset
187            description (str): Description of newly created dataset
188            process (DataPortalProcess or str): Process to run
189            params (dict): Analysis parameters
190            notifications_emails (List[str]): Notification email address(es)
191
192        Returns:
193            dataset_id (str): ID of newly created dataset
194        """
195        if name is None:
196            raise DataPortalInputError("Must specify 'name' for run_analysis")
197        if process is None:
198            raise DataPortalInputError("Must specify 'process' for run_analysis")
199        if notifications_emails is None:
200            notifications_emails = []
201        if params is None:
202            params = {}
203
204        # If the process is a string, try to parse it as a process name or ID
205        process = parse_process_name_or_id(process, self._client)
206
207        resp = self._client.execution.run_analysis(
208            project_id=self.project_id,
209            request=RunAnalysisRequest(
210                name=name,
211                description=description,
212                process_id=process.id,
213                source_dataset_ids=[self.id],
214                params=RunAnalysisRequestParams.from_dict(params),
215                notification_emails=notifications_emails
216            )
217        )
218        return resp.id

Runs an analysis on a dataset, returns the ID of the newly created dataset.

The process can be provided as either a DataPortalProcess object, or a string which corresponds to the name or ID of the process.

Arguments:
  • name (str): Name of newly created dataset
  • description (str): Description of newly created dataset
  • process (DataPortalProcess or str): Process to run
  • params (dict): Analysis parameters
  • notifications_emails (List[str]): Notification email address(es)
Returns:

dataset_id (str): ID of newly created dataset

class DataPortalReference(cirro.sdk.asset.DataPortalAsset):
12class DataPortalReference(DataPortalAsset):
13    """
14    Reference data object containing files which can be used for analysis in a particular project.
15    """
16    def __init__(self, ref: Reference, project_id: str, client: CirroApi):
17        """
18        Instantiate by listing the references which have been added to a particular project
19        ```python
20        from cirro import DataPortal()
21        portal = DataPortal()
22        project = portal.get_project_by_name("Project Name")
23        references = project.list_references()
24        ```
25        """
26        self._data = ref
27        self._files = [
28            DataPortalFile(File.from_file_entry(f, project_id), client) for f in ref.files
29        ]
30
31    @property
32    def files(self) -> List[DataPortalFile]:
33        """File(s) contained in the reference"""
34        return self._files
35
36    @property
37    def name(self) -> str:
38        """Reference name"""
39        return self._data.name
40
41    @property
42    def type(self) -> str:
43        """Type of reference data (e.g. genome_fasta)"""
44        return self._data.type
45
46    @property
47    def absolute_path(self):
48        if len(self._files) == 0:
49            return None
50        return self._files[0].absolute_path
51
52    def __str__(self):
53        return self.name

Reference data object containing files which can be used for analysis in a particular project.

DataPortalReference( ref: cirro_api_client.v1.models.Reference, project_id: str, client: CirroApi)
16    def __init__(self, ref: Reference, project_id: str, client: CirroApi):
17        """
18        Instantiate by listing the references which have been added to a particular project
19        ```python
20        from cirro import DataPortal()
21        portal = DataPortal()
22        project = portal.get_project_by_name("Project Name")
23        references = project.list_references()
24        ```
25        """
26        self._data = ref
27        self._files = [
28            DataPortalFile(File.from_file_entry(f, project_id), client) for f in ref.files
29        ]

Instantiate by listing the references which have been added to a particular project

from cirro import DataPortal()
portal = DataPortal()
project = portal.get_project_by_name("Project Name")
references = project.list_references()
files: List[cirro.sdk.file.DataPortalFile]
31    @property
32    def files(self) -> List[DataPortalFile]:
33        """File(s) contained in the reference"""
34        return self._files

File(s) contained in the reference

name: str
36    @property
37    def name(self) -> str:
38        """Reference name"""
39        return self._data.name

Reference name

type: str
41    @property
42    def type(self) -> str:
43        """Type of reference data (e.g. genome_fasta)"""
44        return self._data.type

Type of reference data (e.g. genome_fasta)

absolute_path
46    @property
47    def absolute_path(self):
48        if len(self._files) == 0:
49            return None
50        return self._files[0].absolute_path
class CirroApi:
 11class CirroApi:
 12    """
 13    Client for interacting directly with the Cirro API
 14    """
 15    def __init__(self, auth_info: AuthInfo = None, base_url: str = None):
 16        """
 17        Instantiates the Cirro API object
 18
 19        Args:
 20            auth_info (cirro.auth.base.AuthInfo):
 21            base_url (str): Optional base URL for connection (default: `CIRRO_HOME` or 'cirro.bio')
 22
 23        Returns:
 24            Authenticated Cirro API object which can be used to call endpoint functions.
 25            For example:
 26
 27        ```python
 28        from cirro.cirro_client import CirroApi
 29        cirro = CirroApi()
 30        print(cirro.projects.list())
 31        ```
 32        """
 33
 34        self._configuration = AppConfig(base_url=base_url)
 35        if not auth_info:
 36            auth_info = get_auth_info_from_config(self._configuration, auth_io=None)
 37
 38        self._api_client = CirroApiClient(
 39            base_url=self._configuration.rest_endpoint,
 40            auth_method=auth_info.get_auth_method(),
 41            client_name='Cirro SDK',
 42            package_name='cirro'
 43        )
 44
 45        # Init services
 46        self._file_service = FileService(self._api_client,
 47                                         enable_additional_checksum=self._configuration.enable_additional_checksum,
 48                                         transfer_retries=self._configuration.transfer_max_retries)
 49        self._dataset_service = DatasetService(self._api_client, file_service=self._file_service)
 50        self._project_service = ProjectService(self._api_client)
 51        self._process_service = ProcessService(self._api_client)
 52        self._execution_service = ExecutionService(self._api_client)
 53        self._metrics_service = MetricsService(self._api_client)
 54        self._metadata_service = MetadataService(self._api_client)
 55        self._billing_service = BillingService(self._api_client)
 56        self._references_service = ReferenceService(self._api_client)
 57
 58    @property
 59    def datasets(self) -> DatasetService:
 60        """
 61        Create, list, delete, and modify Datasets
 62        """
 63        return self._dataset_service
 64
 65    @property
 66    def projects(self) -> ProjectService:
 67        """
 68        Create, list, delete, and modify Projects
 69        """
 70        return self._project_service
 71
 72    @property
 73    def processes(self) -> ProcessService:
 74        """
 75        List and retrieve detailed information about Processes
 76        """
 77        return self._process_service
 78
 79    @property
 80    def execution(self) -> ExecutionService:
 81        """
 82        List, run, stop, and describe the analysis jobs (executing Processes to create new Datasets)
 83        """
 84        return self._execution_service
 85
 86    @property
 87    def metrics(self) -> MetricsService:
 88        """
 89        Project-level summary metrics
 90        """
 91        return self._metrics_service
 92
 93    @property
 94    def metadata(self) -> MetadataService:
 95        """
 96        List and modify Sample metadata or metadata schemas
 97        """
 98        return self._metadata_service
 99
100    @property
101    def billing(self) -> BillingService:
102        """
103        List and update billing accounts
104        """
105        return self._billing_service
106
107    @property
108    def references(self) -> ReferenceService:
109        """
110        List References and Reference types
111        """
112        return self._references_service
113
114    @property
115    def file(self) -> FileService:
116        """
117        Read, download, and create file objects
118        """
119        return self._file_service
120
121    @property
122    def api_client(self) -> CirroApiClient:
123        """
124        Gets the underlying API client
125        """
126        return self._api_client
127
128    @property
129    def configuration(self) -> AppConfig:
130        """
131        Gets the configuration of the instance
132        """
133        return self._configuration

Client for interacting directly with the Cirro API

CirroApi(auth_info: cirro.auth.base.AuthInfo = None, base_url: str = None)
15    def __init__(self, auth_info: AuthInfo = None, base_url: str = None):
16        """
17        Instantiates the Cirro API object
18
19        Args:
20            auth_info (cirro.auth.base.AuthInfo):
21            base_url (str): Optional base URL for connection (default: `CIRRO_HOME` or 'cirro.bio')
22
23        Returns:
24            Authenticated Cirro API object which can be used to call endpoint functions.
25            For example:
26
27        ```python
28        from cirro.cirro_client import CirroApi
29        cirro = CirroApi()
30        print(cirro.projects.list())
31        ```
32        """
33
34        self._configuration = AppConfig(base_url=base_url)
35        if not auth_info:
36            auth_info = get_auth_info_from_config(self._configuration, auth_io=None)
37
38        self._api_client = CirroApiClient(
39            base_url=self._configuration.rest_endpoint,
40            auth_method=auth_info.get_auth_method(),
41            client_name='Cirro SDK',
42            package_name='cirro'
43        )
44
45        # Init services
46        self._file_service = FileService(self._api_client,
47                                         enable_additional_checksum=self._configuration.enable_additional_checksum,
48                                         transfer_retries=self._configuration.transfer_max_retries)
49        self._dataset_service = DatasetService(self._api_client, file_service=self._file_service)
50        self._project_service = ProjectService(self._api_client)
51        self._process_service = ProcessService(self._api_client)
52        self._execution_service = ExecutionService(self._api_client)
53        self._metrics_service = MetricsService(self._api_client)
54        self._metadata_service = MetadataService(self._api_client)
55        self._billing_service = BillingService(self._api_client)
56        self._references_service = ReferenceService(self._api_client)

Instantiates the Cirro API object

Arguments:
  • auth_info (cirro.auth.base.AuthInfo):
  • base_url (str): Optional base URL for connection (default: CIRRO_HOME or 'cirro.bio')
Returns:

Authenticated Cirro API object which can be used to call endpoint functions. For example:

from cirro.cirro_client import CirroApi
cirro = CirroApi()
print(cirro.projects.list())
datasets: cirro.services.DatasetService
58    @property
59    def datasets(self) -> DatasetService:
60        """
61        Create, list, delete, and modify Datasets
62        """
63        return self._dataset_service

Create, list, delete, and modify Datasets

projects: cirro.services.ProjectService
65    @property
66    def projects(self) -> ProjectService:
67        """
68        Create, list, delete, and modify Projects
69        """
70        return self._project_service

Create, list, delete, and modify Projects

processes: cirro.services.ProcessService
72    @property
73    def processes(self) -> ProcessService:
74        """
75        List and retrieve detailed information about Processes
76        """
77        return self._process_service

List and retrieve detailed information about Processes

execution: cirro.services.ExecutionService
79    @property
80    def execution(self) -> ExecutionService:
81        """
82        List, run, stop, and describe the analysis jobs (executing Processes to create new Datasets)
83        """
84        return self._execution_service

List, run, stop, and describe the analysis jobs (executing Processes to create new Datasets)

metrics: cirro.services.MetricsService
86    @property
87    def metrics(self) -> MetricsService:
88        """
89        Project-level summary metrics
90        """
91        return self._metrics_service

Project-level summary metrics

metadata: cirro.services.MetadataService
93    @property
94    def metadata(self) -> MetadataService:
95        """
96        List and modify Sample metadata or metadata schemas
97        """
98        return self._metadata_service

List and modify Sample metadata or metadata schemas

billing: cirro.services.BillingService
100    @property
101    def billing(self) -> BillingService:
102        """
103        List and update billing accounts
104        """
105        return self._billing_service

List and update billing accounts

references: cirro.services.ReferenceService
107    @property
108    def references(self) -> ReferenceService:
109        """
110        List References and Reference types
111        """
112        return self._references_service

List References and Reference types

file: cirro.services.FileService
114    @property
115    def file(self) -> FileService:
116        """
117        Read, download, and create file objects
118        """
119        return self._file_service

Read, download, and create file objects

121    @property
122    def api_client(self) -> CirroApiClient:
123        """
124        Gets the underlying API client
125        """
126        return self._api_client

Gets the underlying API client

configuration: cirro.config.AppConfig
128    @property
129    def configuration(self) -> AppConfig:
130        """
131        Gets the configuration of the instance
132        """
133        return self._configuration

Gets the configuration of the instance