cirro

 1import cirro.file_utils  # noqa
 2from cirro.cirro_client import CirroApi
 3from cirro.sdk.dataset import DataPortalDataset
 4from cirro.sdk.login import DataPortalLogin
 5from cirro.sdk.portal import DataPortal
 6from cirro.sdk.process import DataPortalProcess
 7from cirro.sdk.project import DataPortalProject
 8from cirro.sdk.reference import DataPortalReference
 9
10__all__ = [
11    'DataPortal',
12    'DataPortalLogin',
13    'DataPortalProject',
14    'DataPortalProcess',
15    'DataPortalDataset',
16    'DataPortalReference',
17    'CirroApi',
18    'file_utils'
19]
class DataPortal:
 13class DataPortal:
 14    """
 15    Helper functions for exploring the Projects, Datasets, Samples, and Files
 16    available in the Data Portal.
 17    """
 18
 19    def __init__(self, base_url: str = None, client: CirroApi = None):
 20        """
 21        Set up the DataPortal object, establishing an authenticated connection.
 22
 23        Args:
 24            base_url (str): Optional base URL of the Cirro instance
 25             (if not provided, it uses the `CIRRO_BASE_URL` environment variable, or the config file)
 26            client (`cirro.cirro_client.CirroApi`): Optional pre-configured client
 27
 28        Example:
 29        ```python
 30        from cirro import DataPortal
 31
 32        portal = DataPortal(base_url="app.cirro.bio")
 33        portal.list_projects()
 34        ```
 35        """
 36
 37        if client is not None:
 38            self._client = client
 39
 40        # Set up default client if not provided
 41        else:
 42            self._client = CirroApi(base_url=base_url)
 43
 44    def list_projects(self) -> DataPortalProjects:
 45        """List all the projects available in the Data Portal."""
 46
 47        return DataPortalProjects(
 48            [
 49                DataPortalProject(proj, self._client)
 50                for proj in self._client.projects.list()
 51            ]
 52        )
 53
 54    def get_project_by_name(self, name: str = None) -> DataPortalProject:
 55        """Return the project with the specified name."""
 56
 57        return self.list_projects().get_by_name(name)
 58
 59    def get_project_by_id(self, _id: str = None) -> DataPortalProject:
 60        """Return the project with the specified id."""
 61
 62        return self.list_projects().get_by_id(_id)
 63
 64    def get_project(self, project: str = None) -> DataPortalProject:
 65        """
 66        Return a project identified by ID or name.
 67
 68        Args:
 69            project (str): ID or name of project
 70
 71        Returns:
 72            `from cirro.sdk.project import DataPortalProject`
 73        """
 74        try:
 75            return self.get_project_by_id(project)
 76        except DataPortalAssetNotFound:
 77            return self.get_project_by_name(project)
 78
 79    def get_dataset(self, project: str = None, dataset: str = None) -> DataPortalDataset:
 80        """
 81        Return a dataset identified by ID or name.
 82
 83        Args:
 84            project (str): ID or name of project
 85            dataset (str): ID or name of dataset
 86
 87        Returns:
 88            `cirro.sdk.dataset.DataPortalDataset`
 89
 90            ```python
 91            from cirro import DataPortal()
 92            portal = DataPortal()
 93            dataset = portal.get_dataset(
 94                project="id-or-name-of-project",
 95                dataset="id-or-name-of-dataset"
 96            )
 97            ```
 98        """
 99        try:
100            project: DataPortalProject = self.get_project_by_id(project)
101        except DataPortalAssetNotFound:
102            project: DataPortalProject = self.get_project_by_name(project)
103
104        return project.get_dataset(dataset)
105
106    def read_files(
107            self,
108            project: str,
109            dataset: str,
110            glob: str = None,
111            pattern: str = None,
112            filetype: str = None,
113            **kwargs
114    ):
115        """
116        Read the contents of files from a dataset.
117
118        The project and dataset can each be identified by name or ID.
119        Exactly one of ``glob`` or ``pattern`` must be provided.
120
121        **glob** — standard wildcard matching; yields the file content for each
122        matching file:
123
124        - ``*`` matches any characters within a single path segment
125        - ``**`` matches zero or more path segments
126        - Matching is suffix-anchored (``*.csv`` matches at any depth)
127
128        **pattern** — like ``glob`` but ``{name}`` placeholders capture portions
129        of the path automatically; yields ``(content, meta)`` pairs where
130        *meta* is a ``dict`` of extracted values:
131
132        - ``{name}`` captures one path segment (no ``/``)
133        - ``*`` and ``**`` wildcards work as in ``glob``
134
135        Args:
136            project (str): ID or name of the project.
137            dataset (str): ID or name of the dataset.
138            glob (str): Wildcard expression to match files
139                (e.g., ``'*.csv'``, ``'data/**/*.tsv.gz'``).
140                Yields one item per matching file: the parsed content.
141            pattern (str): Wildcard expression with ``{name}`` capture
142                placeholders (e.g., ``'{sample}.csv'``,
143                ``'{condition}/{sample}.csv'``).
144                Yields ``(content, meta)`` per matching file.
145            filetype (str): File format used to parse each file. Supported values:
146
147                - ``'csv'``: parse with :func:`pandas.read_csv`, returns a ``DataFrame``
148                - ``'h5ad'``: parse as AnnData (requires ``anndata`` package)
149                - ``'json'``: parse with :func:`json.loads`, returns a Python object
150                - ``'parquet'``: parse with :func:`pandas.read_parquet`, returns a ``DataFrame``
151                  (requires ``pyarrow`` or ``fastparquet``)
152                - ``'feather'``: parse with :func:`pandas.read_feather`, returns a ``DataFrame``
153                  (requires ``pyarrow``)
154                - ``'pickle'``: deserialize with :mod:`pickle`, returns a Python object
155                - ``'excel'``: parse with :func:`pandas.read_excel`, returns a ``DataFrame``
156                  (requires ``openpyxl`` for ``.xlsx`` or ``xlrd`` for ``.xls``)
157                - ``'text'``: read as plain text, returns a ``str``
158                - ``'bytes'``: read as raw bytes, returns ``bytes``
159                - ``None`` (default): infer from file extension
160                  (``.csv``/``.tsv`` → ``'csv'``, ``.h5ad`` → ``'h5ad'``,
161                  ``.json`` → ``'json'``, ``.parquet`` → ``'parquet'``,
162                  ``.feather`` → ``'feather'``, ``.pkl``/``.pickle`` → ``'pickle'``,
163                  ``.xlsx``/``.xls`` → ``'excel'``, otherwise ``'text'``)
164            **kwargs: Additional keyword arguments forwarded to the file-parsing
165                function (e.g., ``sep='\\t'`` for CSV/TSV files).
166
167        Yields:
168            - When using ``glob``: *content* for each matching file
169            - When using ``pattern``: ``(content, meta)`` for each matching file,
170              where *meta* is a ``dict`` of values extracted from ``{name}``
171              placeholders
172
173        Raises:
174            DataPortalInputError: if both ``glob`` and ``pattern`` are provided,
175                or if neither is provided.
176
177        Example:
178            ```python
179            # Read all CSV files — just the content
180            for df in portal.read_files('My Project', 'My Dataset', glob='*.csv'):
181                print(df.shape)
182
183            # Extract sample names from filenames automatically
184            for df, meta in portal.read_files('My Project', 'My Dataset', pattern='{sample}.csv'):
185                print(meta['sample'], df.shape)
186
187            # Multi-level capture: condition directory + sample filename
188            for df, meta in portal.read_files('My Project', 'My Dataset', pattern='{condition}/{sample}.csv'):
189                print(meta['condition'], meta['sample'], df.shape)
190
191            # Read gzip-compressed TSV files with explicit separator
192            for df in portal.read_files('My Project', 'My Dataset', glob='**/*.tsv.gz', filetype='csv', sep='\\t'):
193                print(df.shape)
194            ```
195        """
196        ds = self.get_dataset(project=project, dataset=dataset)
197        yield from ds.read_files(glob=glob, pattern=pattern, filetype=filetype, **kwargs)
198
199    def read_file(
200            self,
201            project: str,
202            dataset: str,
203            path: str = None,
204            glob: str = None,
205            filetype: str = None,
206            **kwargs
207    ):
208        """
209        Read the contents of a single file from a dataset.
210
211        The project and dataset can each be identified by name or ID.
212        Provide either ``path`` (exact relative path) or ``glob`` (wildcard
213        expression). If ``glob`` is used it must match exactly one file.
214
215        Args:
216            project (str): ID or name of the project.
217            dataset (str): ID or name of the dataset.
218            path (str): Exact relative path of the file within the dataset.
219            glob (str): Wildcard expression matching exactly one file.
220            filetype (str): File format used to parse the file. Supported values
221                are the same as :meth:`read_files`.
222            **kwargs: Additional keyword arguments forwarded to the
223                file-parsing function.
224
225        Returns:
226            Parsed file content.
227
228        Raises:
229            DataPortalInputError: if both or neither of ``path``/``glob`` are
230                provided, or if ``glob`` matches zero or more than one file.
231        """
232        ds = self.get_dataset(project=project, dataset=dataset)
233        return ds.read_file(path=path, glob=glob, filetype=filetype, **kwargs)
234
235    def list_processes(self, ingest=False) -> DataPortalProcesses:
236        """
237        List all the processes available in the Data Portal.
238        By default, only list non-ingest processes (those which can be run on existing datasets).
239        To list the processes which can be used to upload datasets, use `ingest = True`.
240
241        Args:
242            ingest (bool): If True, only list those processes which can be used to ingest datasets directly
243        """
244
245        return DataPortalProcesses(
246            [
247                DataPortalProcess(p, self._client)
248                for p in self._client.processes.list()
249                if not ingest or p.executor == Executor.INGEST
250            ]
251        )
252
253    def get_process_by_name(self, name: str, ingest=False) -> DataPortalProcess:
254        """
255        Return the process with the specified name.
256
257        Args:
258            name (str): Name of process
259        """
260
261        return self.list_processes(ingest=ingest).get_by_name(name)
262
263    def get_process_by_id(self, id: str, ingest=False) -> DataPortalProcess:
264        """
265        Return the process with the specified id
266
267        Args:
268            id (str): ID of process
269        """
270
271        return self.list_processes(ingest=ingest).get_by_id(id)
272
273    def list_reference_types(self) -> DataPortalReferenceTypes:
274        """
275        Return the list of all available reference types
276        """
277
278        return DataPortalReferenceTypes(
279            [
280                DataPortalReferenceType(ref)
281                for ref in self._client.references.get_types()
282            ]
283        )
284
285    @property
286    def developer_helper(self) -> DeveloperHelper:
287        return DeveloperHelper(self._client)

Helper functions for exploring the Projects, Datasets, Samples, and Files available in the Data Portal.

DataPortal(base_url: str = None, client: CirroApi = None)
19    def __init__(self, base_url: str = None, client: CirroApi = None):
20        """
21        Set up the DataPortal object, establishing an authenticated connection.
22
23        Args:
24            base_url (str): Optional base URL of the Cirro instance
25             (if not provided, it uses the `CIRRO_BASE_URL` environment variable, or the config file)
26            client (`cirro.cirro_client.CirroApi`): Optional pre-configured client
27
28        Example:
29        ```python
30        from cirro import DataPortal
31
32        portal = DataPortal(base_url="app.cirro.bio")
33        portal.list_projects()
34        ```
35        """
36
37        if client is not None:
38            self._client = client
39
40        # Set up default client if not provided
41        else:
42            self._client = CirroApi(base_url=base_url)

Set up the DataPortal object, establishing an authenticated connection.

Arguments:
  • base_url (str): Optional base URL of the Cirro instance (if not provided, it uses the CIRRO_BASE_URL environment variable, or the config file)
  • client (cirro.cirro_client.CirroApi): Optional pre-configured client

Example:

from cirro import DataPortal

portal = DataPortal(base_url="app.cirro.bio")
portal.list_projects()
def list_projects(self) -> cirro.sdk.project.DataPortalProjects:
44    def list_projects(self) -> DataPortalProjects:
45        """List all the projects available in the Data Portal."""
46
47        return DataPortalProjects(
48            [
49                DataPortalProject(proj, self._client)
50                for proj in self._client.projects.list()
51            ]
52        )

List all the projects available in the Data Portal.

def get_project_by_name(self, name: str = None) -> DataPortalProject:
54    def get_project_by_name(self, name: str = None) -> DataPortalProject:
55        """Return the project with the specified name."""
56
57        return self.list_projects().get_by_name(name)

Return the project with the specified name.

def get_project_by_id(self, _id: str = None) -> DataPortalProject:
59    def get_project_by_id(self, _id: str = None) -> DataPortalProject:
60        """Return the project with the specified id."""
61
62        return self.list_projects().get_by_id(_id)

Return the project with the specified id.

def get_project(self, project: str = None) -> DataPortalProject:
64    def get_project(self, project: str = None) -> DataPortalProject:
65        """
66        Return a project identified by ID or name.
67
68        Args:
69            project (str): ID or name of project
70
71        Returns:
72            `from cirro.sdk.project import DataPortalProject`
73        """
74        try:
75            return self.get_project_by_id(project)
76        except DataPortalAssetNotFound:
77            return self.get_project_by_name(project)

Return a project identified by ID or name.

Arguments:
  • project (str): ID or name of project
Returns:

from cirro.sdk.project import DataPortalProject

def get_dataset( self, project: str = None, dataset: str = None) -> DataPortalDataset:
 79    def get_dataset(self, project: str = None, dataset: str = None) -> DataPortalDataset:
 80        """
 81        Return a dataset identified by ID or name.
 82
 83        Args:
 84            project (str): ID or name of project
 85            dataset (str): ID or name of dataset
 86
 87        Returns:
 88            `cirro.sdk.dataset.DataPortalDataset`
 89
 90            ```python
 91            from cirro import DataPortal()
 92            portal = DataPortal()
 93            dataset = portal.get_dataset(
 94                project="id-or-name-of-project",
 95                dataset="id-or-name-of-dataset"
 96            )
 97            ```
 98        """
 99        try:
100            project: DataPortalProject = self.get_project_by_id(project)
101        except DataPortalAssetNotFound:
102            project: DataPortalProject = self.get_project_by_name(project)
103
104        return project.get_dataset(dataset)

Return a dataset identified by ID or name.

Arguments:
  • project (str): ID or name of project
  • dataset (str): ID or name of dataset
Returns:

cirro.sdk.dataset.DataPortalDataset

from cirro import DataPortal()
portal = DataPortal()
dataset = portal.get_dataset(
    project="id-or-name-of-project",
    dataset="id-or-name-of-dataset"
)
def read_files( self, project: str, dataset: str, glob: str = None, pattern: str = None, filetype: str = None, **kwargs):
106    def read_files(
107            self,
108            project: str,
109            dataset: str,
110            glob: str = None,
111            pattern: str = None,
112            filetype: str = None,
113            **kwargs
114    ):
115        """
116        Read the contents of files from a dataset.
117
118        The project and dataset can each be identified by name or ID.
119        Exactly one of ``glob`` or ``pattern`` must be provided.
120
121        **glob** — standard wildcard matching; yields the file content for each
122        matching file:
123
124        - ``*`` matches any characters within a single path segment
125        - ``**`` matches zero or more path segments
126        - Matching is suffix-anchored (``*.csv`` matches at any depth)
127
128        **pattern** — like ``glob`` but ``{name}`` placeholders capture portions
129        of the path automatically; yields ``(content, meta)`` pairs where
130        *meta* is a ``dict`` of extracted values:
131
132        - ``{name}`` captures one path segment (no ``/``)
133        - ``*`` and ``**`` wildcards work as in ``glob``
134
135        Args:
136            project (str): ID or name of the project.
137            dataset (str): ID or name of the dataset.
138            glob (str): Wildcard expression to match files
139                (e.g., ``'*.csv'``, ``'data/**/*.tsv.gz'``).
140                Yields one item per matching file: the parsed content.
141            pattern (str): Wildcard expression with ``{name}`` capture
142                placeholders (e.g., ``'{sample}.csv'``,
143                ``'{condition}/{sample}.csv'``).
144                Yields ``(content, meta)`` per matching file.
145            filetype (str): File format used to parse each file. Supported values:
146
147                - ``'csv'``: parse with :func:`pandas.read_csv`, returns a ``DataFrame``
148                - ``'h5ad'``: parse as AnnData (requires ``anndata`` package)
149                - ``'json'``: parse with :func:`json.loads`, returns a Python object
150                - ``'parquet'``: parse with :func:`pandas.read_parquet`, returns a ``DataFrame``
151                  (requires ``pyarrow`` or ``fastparquet``)
152                - ``'feather'``: parse with :func:`pandas.read_feather`, returns a ``DataFrame``
153                  (requires ``pyarrow``)
154                - ``'pickle'``: deserialize with :mod:`pickle`, returns a Python object
155                - ``'excel'``: parse with :func:`pandas.read_excel`, returns a ``DataFrame``
156                  (requires ``openpyxl`` for ``.xlsx`` or ``xlrd`` for ``.xls``)
157                - ``'text'``: read as plain text, returns a ``str``
158                - ``'bytes'``: read as raw bytes, returns ``bytes``
159                - ``None`` (default): infer from file extension
160                  (``.csv``/``.tsv`` → ``'csv'``, ``.h5ad`` → ``'h5ad'``,
161                  ``.json`` → ``'json'``, ``.parquet`` → ``'parquet'``,
162                  ``.feather`` → ``'feather'``, ``.pkl``/``.pickle`` → ``'pickle'``,
163                  ``.xlsx``/``.xls`` → ``'excel'``, otherwise ``'text'``)
164            **kwargs: Additional keyword arguments forwarded to the file-parsing
165                function (e.g., ``sep='\\t'`` for CSV/TSV files).
166
167        Yields:
168            - When using ``glob``: *content* for each matching file
169            - When using ``pattern``: ``(content, meta)`` for each matching file,
170              where *meta* is a ``dict`` of values extracted from ``{name}``
171              placeholders
172
173        Raises:
174            DataPortalInputError: if both ``glob`` and ``pattern`` are provided,
175                or if neither is provided.
176
177        Example:
178            ```python
179            # Read all CSV files — just the content
180            for df in portal.read_files('My Project', 'My Dataset', glob='*.csv'):
181                print(df.shape)
182
183            # Extract sample names from filenames automatically
184            for df, meta in portal.read_files('My Project', 'My Dataset', pattern='{sample}.csv'):
185                print(meta['sample'], df.shape)
186
187            # Multi-level capture: condition directory + sample filename
188            for df, meta in portal.read_files('My Project', 'My Dataset', pattern='{condition}/{sample}.csv'):
189                print(meta['condition'], meta['sample'], df.shape)
190
191            # Read gzip-compressed TSV files with explicit separator
192            for df in portal.read_files('My Project', 'My Dataset', glob='**/*.tsv.gz', filetype='csv', sep='\\t'):
193                print(df.shape)
194            ```
195        """
196        ds = self.get_dataset(project=project, dataset=dataset)
197        yield from ds.read_files(glob=glob, pattern=pattern, filetype=filetype, **kwargs)

Read the contents of files from a dataset.

The project and dataset can each be identified by name or ID. Exactly one of glob or pattern must be provided.

glob — standard wildcard matching; yields the file content for each matching file:

  • * matches any characters within a single path segment
  • ** matches zero or more path segments
  • Matching is suffix-anchored (*.csv matches at any depth)

pattern — like glob but {name} placeholders capture portions of the path automatically; yields (content, meta) pairs where meta is a dict of extracted values:

  • {name} captures one path segment (no /)
  • * and ** wildcards work as in glob
Arguments:
  • project (str): ID or name of the project.
  • dataset (str): ID or name of the dataset.
  • glob (str): Wildcard expression to match files (e.g., '*.csv', 'data/**/*.tsv.gz'). Yields one item per matching file: the parsed content.
  • pattern (str): Wildcard expression with {name} capture placeholders (e.g., '{sample}.csv', '{condition}/{sample}.csv'). Yields (content, meta) per matching file.
  • filetype (str): File format used to parse each file. Supported values:

    • 'csv': parse with pandas.read_csv(), returns a DataFrame
    • 'h5ad': parse as AnnData (requires anndata package)
    • 'json': parse with json.loads(), returns a Python object
    • 'parquet': parse with pandas.read_parquet(), returns a DataFrame (requires pyarrow or fastparquet)
    • 'feather': parse with pandas.read_feather(), returns a DataFrame (requires pyarrow)
    • 'pickle': deserialize with pickle, returns a Python object
    • 'excel': parse with pandas.read_excel(), returns a DataFrame (requires openpyxl for .xlsx or xlrd for .xls)
    • 'text': read as plain text, returns a str
    • 'bytes': read as raw bytes, returns bytes
    • None (default): infer from file extension (.csv/.tsv → 'csv', .h5ad → 'h5ad', .json → 'json', .parquet → 'parquet', .feather → 'feather', .pkl/.pickle → 'pickle', .xlsx/.xls → 'excel', otherwise 'text')
  • **kwargs: Additional keyword arguments forwarded to the file-parsing function (e.g., sep='\t' for CSV/TSV files).
Yields:
  • When using glob: content for each matching file
  • When using pattern: (content, meta) for each matching file, where meta is a dict of values extracted from {name} placeholders
Raises:
  • DataPortalInputError: if both glob and pattern are provided, or if neither is provided.
Example:
# Read all CSV files — just the content
for df in portal.read_files('My Project', 'My Dataset', glob='*.csv'):
    print(df.shape)

# Extract sample names from filenames automatically
for df, meta in portal.read_files('My Project', 'My Dataset', pattern='{sample}.csv'):
    print(meta['sample'], df.shape)

# Multi-level capture: condition directory + sample filename
for df, meta in portal.read_files('My Project', 'My Dataset', pattern='{condition}/{sample}.csv'):
    print(meta['condition'], meta['sample'], df.shape)

# Read gzip-compressed TSV files with explicit separator
for df in portal.read_files('My Project', 'My Dataset', glob='**/*.tsv.gz', filetype='csv', sep='\t'):
    print(df.shape)
def read_file( self, project: str, dataset: str, path: str = None, glob: str = None, filetype: str = None, **kwargs):
199    def read_file(
200            self,
201            project: str,
202            dataset: str,
203            path: str = None,
204            glob: str = None,
205            filetype: str = None,
206            **kwargs
207    ):
208        """
209        Read the contents of a single file from a dataset.
210
211        The project and dataset can each be identified by name or ID.
212        Provide either ``path`` (exact relative path) or ``glob`` (wildcard
213        expression). If ``glob`` is used it must match exactly one file.
214
215        Args:
216            project (str): ID or name of the project.
217            dataset (str): ID or name of the dataset.
218            path (str): Exact relative path of the file within the dataset.
219            glob (str): Wildcard expression matching exactly one file.
220            filetype (str): File format used to parse the file. Supported values
221                are the same as :meth:`read_files`.
222            **kwargs: Additional keyword arguments forwarded to the
223                file-parsing function.
224
225        Returns:
226            Parsed file content.
227
228        Raises:
229            DataPortalInputError: if both or neither of ``path``/``glob`` are
230                provided, or if ``glob`` matches zero or more than one file.
231        """
232        ds = self.get_dataset(project=project, dataset=dataset)
233        return ds.read_file(path=path, glob=glob, filetype=filetype, **kwargs)

Read the contents of a single file from a dataset.

The project and dataset can each be identified by name or ID. Provide either path (exact relative path) or glob (wildcard expression). If glob is used it must match exactly one file.

Arguments:
  • project (str): ID or name of the project.
  • dataset (str): ID or name of the dataset.
  • path (str): Exact relative path of the file within the dataset.
  • glob (str): Wildcard expression matching exactly one file.
  • filetype (str): File format used to parse the file. Supported values are the same as read_files().
  • **kwargs: Additional keyword arguments forwarded to the file-parsing function.
Returns:

Parsed file content.

Raises:
  • DataPortalInputError: if both or neither of path/glob are provided, or if glob matches zero or more than one file.
def list_processes(self, ingest=False) -> cirro.sdk.process.DataPortalProcesses:
235    def list_processes(self, ingest=False) -> DataPortalProcesses:
236        """
237        List all the processes available in the Data Portal.
238        By default, only list non-ingest processes (those which can be run on existing datasets).
239        To list the processes which can be used to upload datasets, use `ingest = True`.
240
241        Args:
242            ingest (bool): If True, only list those processes which can be used to ingest datasets directly
243        """
244
245        return DataPortalProcesses(
246            [
247                DataPortalProcess(p, self._client)
248                for p in self._client.processes.list()
249                if not ingest or p.executor == Executor.INGEST
250            ]
251        )

List all the processes available in the Data Portal. By default, only list non-ingest processes (those which can be run on existing datasets). To list the processes which can be used to upload datasets, use ingest = True.

Arguments:
  • ingest (bool): If True, only list those processes which can be used to ingest datasets directly
def get_process_by_name(self, name: str, ingest=False) -> DataPortalProcess:
253    def get_process_by_name(self, name: str, ingest=False) -> DataPortalProcess:
254        """
255        Return the process with the specified name.
256
257        Args:
258            name (str): Name of process
259        """
260
261        return self.list_processes(ingest=ingest).get_by_name(name)

Return the process with the specified name.

Arguments:
  • name (str): Name of process
def get_process_by_id(self, id: str, ingest=False) -> DataPortalProcess:
263    def get_process_by_id(self, id: str, ingest=False) -> DataPortalProcess:
264        """
265        Return the process with the specified id
266
267        Args:
268            id (str): ID of process
269        """
270
271        return self.list_processes(ingest=ingest).get_by_id(id)

Return the process with the specified id

Arguments:
  • id (str): ID of process
def list_reference_types(self) -> cirro.sdk.reference_type.DataPortalReferenceTypes:
273    def list_reference_types(self) -> DataPortalReferenceTypes:
274        """
275        Return the list of all available reference types
276        """
277
278        return DataPortalReferenceTypes(
279            [
280                DataPortalReferenceType(ref)
281                for ref in self._client.references.get_types()
282            ]
283        )

Return the list of all available reference types

developer_helper: cirro.sdk.developer.DeveloperHelper
285    @property
286    def developer_helper(self) -> DeveloperHelper:
287        return DeveloperHelper(self._client)
class DataPortalLogin:
 8class DataPortalLogin:
 9    """
10    Start the login process, obtaining the authorization message from Cirro
11    needed to confirm the user identity.
12
13    Useful when you need to authenticate a user in a non-blocking way.
14
15    Usage:
16
17    ```python
18    # Replace app.cirro.bio as appropriate
19    login = DataPortalLogin(base_url="app.cirro.bio")
20
21    # Present the user with the authorization message
22    print(login.auth_message)
23
24    # Generate the authenticated DataPortal object,
25    # blocking until the user completes the login process in their browser
26    portal = login.await_completion()
27    ```
28    """
29    base_url: str
30    auth_info: DeviceCodeAuth
31
32    def __init__(self, base_url: str = None, enable_cache=False):
33        app_config = AppConfig(base_url=base_url)
34
35        self.base_url = base_url
36
37        self.auth_info = DeviceCodeAuth(
38            region=app_config.region,
39            client_id=app_config.client_id,
40            auth_endpoint=app_config.auth_endpoint,
41            enable_cache=enable_cache,
42            await_completion=False
43        )
44
45    @property
46    def auth_message(self) -> str:
47        """Authorization message provided by Cirro."""
48        return self.auth_info.auth_message
49
50    @property
51    def auth_message_markdown(self) -> str:
52        """Authorization message provided by Cirro (Markdown format)."""
53        return self.auth_info.auth_message_markdown
54
55    def await_completion(self) -> DataPortal:
56        """Complete the login process and return an authenticated client"""
57
58        # Block until the user completes the login flow
59        self.auth_info.await_completion()
60
61        # Set up the client object
62        cirro_client = CirroApi(
63            auth_info=self.auth_info,
64            base_url=self.base_url
65        )
66
67        # Return the Data Portal object
68        return DataPortal(client=cirro_client)

Start the login process, obtaining the authorization message from Cirro needed to confirm the user identity.

Useful when you need to authenticate a user in a non-blocking way.

Usage:

# Replace app.cirro.bio as appropriate
login = DataPortalLogin(base_url="app.cirro.bio")

# Present the user with the authorization message
print(login.auth_message)

# Generate the authenticated DataPortal object,
# blocking until the user completes the login process in their browser
portal = login.await_completion()
DataPortalLogin(base_url: str = None, enable_cache=False)
32    def __init__(self, base_url: str = None, enable_cache=False):
33        app_config = AppConfig(base_url=base_url)
34
35        self.base_url = base_url
36
37        self.auth_info = DeviceCodeAuth(
38            region=app_config.region,
39            client_id=app_config.client_id,
40            auth_endpoint=app_config.auth_endpoint,
41            enable_cache=enable_cache,
42            await_completion=False
43        )
base_url: str
auth_message: str
45    @property
46    def auth_message(self) -> str:
47        """Authorization message provided by Cirro."""
48        return self.auth_info.auth_message

Authorization message provided by Cirro.

auth_message_markdown: str
50    @property
51    def auth_message_markdown(self) -> str:
52        """Authorization message provided by Cirro (Markdown format)."""
53        return self.auth_info.auth_message_markdown

Authorization message provided by Cirro (Markdown format).

def await_completion(self) -> DataPortal:
55    def await_completion(self) -> DataPortal:
56        """Complete the login process and return an authenticated client"""
57
58        # Block until the user completes the login flow
59        self.auth_info.await_completion()
60
61        # Set up the client object
62        cirro_client = CirroApi(
63            auth_info=self.auth_info,
64            base_url=self.base_url
65        )
66
67        # Return the Data Portal object
68        return DataPortal(client=cirro_client)

Complete the login process and return an authenticated client

class DataPortalProject(cirro.sdk.asset.DataPortalAsset):
 20class DataPortalProject(DataPortalAsset):
 21    """
 22    Projects in the Data Portal contain collections of Datasets.
 23    Users are granted permissions at the project-level, allowing them
 24    to view and/or modify all the datasets in that collection.
 25    """
 26    def __init__(self, proj: Project, client: CirroApi):
 27        """
 28        Instantiate with helper method
 29
 30        ```python
 31        from cirro import DataPortal()
 32        portal = DataPortal()
 33        project = portal.get_project_by_name("Project Name")
 34        ```
 35
 36        """
 37        self._data = proj
 38        self._client = client
 39
 40    @property
 41    def id(self) -> str:
 42        """
 43        Unique identifier
 44        """
 45        return self._data.id
 46
 47    @property
 48    def name(self) -> str:
 49        """
 50        Readable name
 51        """
 52        return self._data.name
 53
 54    @property
 55    def description(self) -> str:
 56        """
 57        Longer description of the project
 58        """
 59        return self._data.description
 60
 61    @property
 62    def status(self) -> Status:
 63        """
 64        Status of the project
 65        """
 66        return self._data.status
 67
 68    def __str__(self):
 69        """Control how the Project is rendered as a string."""
 70
 71        return '\n'.join([
 72            f"{i.title()}: {self.__getattribute__(i)}"
 73            for i in ['name', 'id', 'description']
 74        ])
 75
 76    @cache
 77    def _get_datasets(self) -> List[Dataset]:
 78        return list_all_datasets(project_id=self.id,
 79                                 client=self._client)
 80
 81    def list_datasets(self, force_refresh=False) -> DataPortalDatasets:
 82        """List all the datasets available in the project."""
 83        if force_refresh:
 84            self._get_datasets.cache_clear()
 85
 86        return DataPortalDatasets(
 87            [
 88                DataPortalDataset(d, self._client)
 89                for d in self._get_datasets()
 90            ]
 91        )
 92
 93    def get_dataset(self, name_or_id: str, force_refresh=False) -> DataPortalDataset:
 94        """Return the dataset matching the given ID or name.
 95
 96        Tries to match by ID first, then by name.
 97        Raises an error if the name matches multiple datasets.
 98        """
 99        if force_refresh:
100            self._get_datasets.cache_clear()
101
102        # Try by ID first
103        try:
104            return self.get_dataset_by_id(name_or_id)
105        except Exception:
106            pass
107
108        # Fall back to name matching
109        matches = [d for d in self._get_datasets() if d.name == name_or_id]
110        if len(matches) == 0:
111            raise DataPortalAssetNotFound(f'Dataset with name or ID "{name_or_id}" not found')
112        if len(matches) > 1:
113            raise DataPortalInputError(
114                f'Multiple datasets found with the name "{name_or_id}" — use get_dataset_by_id instead'
115            )
116        return self.get_dataset_by_id(matches[0].id)
117
118    def get_dataset_by_name(self, name: str, force_refresh=False) -> DataPortalDataset:
119        """Return the dataset with the specified name."""
120        if force_refresh:
121            self._get_datasets.cache_clear()
122
123        dataset = next((d for d in self._get_datasets() if d.name == name), None)
124        if dataset is None:
125            raise DataPortalAssetNotFound(f'Dataset with name {name} not found')
126        return self.get_dataset_by_id(dataset.id)
127
128    def get_dataset_by_id(self, _id: str = None) -> DataPortalDataset:
129        """Return the dataset with the specified id."""
130
131        dataset = self._client.datasets.get(project_id=self.id, dataset_id=_id)
132        if dataset is None:
133            raise DataPortalAssetNotFound(f'Dataset with ID {_id} not found')
134        return DataPortalDataset(dataset, self._client)
135
136    def list_references(self, reference_type: str = None) -> DataPortalReferences:
137        """
138        List the references available in a project.
139        Optionally filter to references of a particular type (identified by name)
140        """
141
142        # Get the complete list of references which are available
143        reference_types = DataPortalReferenceTypes(
144            [
145                DataPortalReferenceType(ref)
146                for ref in self._client.references.get_types()
147            ]
148        )
149
150        # If a particular name was specified
151        if reference_type is not None:
152            reference_types = reference_types.filter_by_pattern(reference_type)
153            if len(reference_types) == 0:
154                msg = f"Could not find any reference types with the name {reference_type}"
155                raise DataPortalAssetNotFound(msg)
156
157        return DataPortalReferences(
158            [
159                DataPortalReference(ref, project_id=self.id, client=self._client)
160                for ref in self._client.references.get_for_project(
161                    self.id
162                )
163                if reference_type is None or ref.type_ == reference_type
164            ]
165        )
166
167    def get_reference_by_name(self, name: str = None, ref_type: str = None) -> DataPortalReference:
168        """Return the reference of a particular type with the specified name."""
169
170        if name is None:
171            raise DataPortalInputError("Must specify the reference name")
172
173        return self.list_references(ref_type).get_by_name(name)
174
175    def upload_dataset(
176        self,
177        name: str = None,
178        description='',
179        process: Union[DataPortalProcess, str] = None,
180        upload_folder: str = None,
181        files: List[str] = None,
182        tags: List[str] = None,
183    ):
184        """
185        Upload a set of files to the Data Portal, creating a new dataset.
186
187        If the files parameter is not provided, it will upload all files in the upload folder
188
189        Args:
190            name (str): Name of newly created dataset
191            description (str): Description of newly created dataset
192            process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object
193            upload_folder (str): Folder containing files to upload
194            files (List[str]): Optional subset of files to upload from the folder
195            tags (List[str]): Optional list of tags to apply to the dataset
196        """
197
198        if name is None:
199            raise DataPortalInputError("Must provide name for new dataset")
200        if process is None:
201            raise DataPortalInputError("Must provide the process which is used for ingest")
202        if upload_folder is None:
203            raise DataPortalInputError("Must provide upload_folder -- folder containing files to upload")
204
205        # Parse the process provided by the user
206        process = parse_process_name_or_id(process, self._client)
207
208        # If no files were provided
209        if files is None:
210            # Get the list of files in the upload folder
211            files = get_files_in_directory(upload_folder)
212
213        if files is None or len(files) == 0:
214            raise RuntimeWarning("No files to upload, exiting")
215
216        # Normalize into Tag object
217        if tags is not None:
218            tags = [Tag(value=value) for value in tags]
219
220        # Make sure that the files match the expected pattern
221        self._client.processes.check_dataset_files(files, process.id, upload_folder)
222
223        # Create the ingest process request
224        dataset_create_request = UploadDatasetRequest(
225            process_id=process.id,
226            name=name,
227            description=description,
228            expected_files=files,
229            tags=tags,
230        )
231
232        # Get the response
233        create_response = self._client.datasets.create(project_id=self.id,
234                                                       upload_request=dataset_create_request)
235
236        # Upload the files
237        self._client.datasets.upload_files(
238            project_id=self.id,
239            dataset_id=create_response.id,
240            directory=upload_folder,
241            files=files
242        )
243
244        # Return the dataset which was created, which might take a second to update
245        max_attempts = 5
246        for attempt in range(max_attempts):
247            try:
248                return self.get_dataset_by_id(create_response.id)
249            except DataPortalAssetNotFound as e:
250                if attempt == max_attempts - 1:
251                    raise e
252                else:
253                    sleep(2)
254
255    def samples(self, max_items: int = 10000) -> List[Sample]:
256        """
257        Retrieves a list of samples associated with a project along with their metadata
258
259        Args:
260            max_items (int): Maximum number of records to get (default 10,000)
261        """
262        return self._client.metadata.get_project_samples(self.id, max_items)

Projects in the Data Portal contain collections of Datasets. Users are granted permissions at the project-level, allowing them to view and/or modify all the datasets in that collection.

DataPortalProject( proj: cirro_api_client.v1.models.Project, client: CirroApi)
26    def __init__(self, proj: Project, client: CirroApi):
27        """
28        Instantiate with helper method
29
30        ```python
31        from cirro import DataPortal()
32        portal = DataPortal()
33        project = portal.get_project_by_name("Project Name")
34        ```
35
36        """
37        self._data = proj
38        self._client = client

Instantiate with helper method

from cirro import DataPortal()
portal = DataPortal()
project = portal.get_project_by_name("Project Name")
id: str
40    @property
41    def id(self) -> str:
42        """
43        Unique identifier
44        """
45        return self._data.id

Unique identifier

name: str
47    @property
48    def name(self) -> str:
49        """
50        Readable name
51        """
52        return self._data.name

Readable name

description: str
54    @property
55    def description(self) -> str:
56        """
57        Longer description of the project
58        """
59        return self._data.description

Longer description of the project

status: cirro_api_client.v1.models.Status
61    @property
62    def status(self) -> Status:
63        """
64        Status of the project
65        """
66        return self._data.status

Status of the project

def list_datasets(self, force_refresh=False) -> cirro.sdk.dataset.DataPortalDatasets:
81    def list_datasets(self, force_refresh=False) -> DataPortalDatasets:
82        """List all the datasets available in the project."""
83        if force_refresh:
84            self._get_datasets.cache_clear()
85
86        return DataPortalDatasets(
87            [
88                DataPortalDataset(d, self._client)
89                for d in self._get_datasets()
90            ]
91        )

List all the datasets available in the project.

def get_dataset( self, name_or_id: str, force_refresh=False) -> DataPortalDataset:
 93    def get_dataset(self, name_or_id: str, force_refresh=False) -> DataPortalDataset:
 94        """Return the dataset matching the given ID or name.
 95
 96        Tries to match by ID first, then by name.
 97        Raises an error if the name matches multiple datasets.
 98        """
 99        if force_refresh:
100            self._get_datasets.cache_clear()
101
102        # Try by ID first
103        try:
104            return self.get_dataset_by_id(name_or_id)
105        except Exception:
106            pass
107
108        # Fall back to name matching
109        matches = [d for d in self._get_datasets() if d.name == name_or_id]
110        if len(matches) == 0:
111            raise DataPortalAssetNotFound(f'Dataset with name or ID "{name_or_id}" not found')
112        if len(matches) > 1:
113            raise DataPortalInputError(
114                f'Multiple datasets found with the name "{name_or_id}" — use get_dataset_by_id instead'
115            )
116        return self.get_dataset_by_id(matches[0].id)

Return the dataset matching the given ID or name.

Tries to match by ID first, then by name. Raises an error if the name matches multiple datasets.

def get_dataset_by_name( self, name: str, force_refresh=False) -> DataPortalDataset:
118    def get_dataset_by_name(self, name: str, force_refresh=False) -> DataPortalDataset:
119        """Return the dataset with the specified name."""
120        if force_refresh:
121            self._get_datasets.cache_clear()
122
123        dataset = next((d for d in self._get_datasets() if d.name == name), None)
124        if dataset is None:
125            raise DataPortalAssetNotFound(f'Dataset with name {name} not found')
126        return self.get_dataset_by_id(dataset.id)

Return the dataset with the specified name.

def get_dataset_by_id(self, _id: str = None) -> DataPortalDataset:
128    def get_dataset_by_id(self, _id: str = None) -> DataPortalDataset:
129        """Return the dataset with the specified id."""
130
131        dataset = self._client.datasets.get(project_id=self.id, dataset_id=_id)
132        if dataset is None:
133            raise DataPortalAssetNotFound(f'Dataset with ID {_id} not found')
134        return DataPortalDataset(dataset, self._client)

Return the dataset with the specified id.

def list_references( self, reference_type: str = None) -> cirro.sdk.reference.DataPortalReferences:
136    def list_references(self, reference_type: str = None) -> DataPortalReferences:
137        """
138        List the references available in a project.
139        Optionally filter to references of a particular type (identified by name)
140        """
141
142        # Get the complete list of references which are available
143        reference_types = DataPortalReferenceTypes(
144            [
145                DataPortalReferenceType(ref)
146                for ref in self._client.references.get_types()
147            ]
148        )
149
150        # If a particular name was specified
151        if reference_type is not None:
152            reference_types = reference_types.filter_by_pattern(reference_type)
153            if len(reference_types) == 0:
154                msg = f"Could not find any reference types with the name {reference_type}"
155                raise DataPortalAssetNotFound(msg)
156
157        return DataPortalReferences(
158            [
159                DataPortalReference(ref, project_id=self.id, client=self._client)
160                for ref in self._client.references.get_for_project(
161                    self.id
162                )
163                if reference_type is None or ref.type_ == reference_type
164            ]
165        )

List the references available in a project. Optionally filter to references of a particular type (identified by name)

def get_reference_by_name( self, name: str = None, ref_type: str = None) -> DataPortalReference:
167    def get_reference_by_name(self, name: str = None, ref_type: str = None) -> DataPortalReference:
168        """Return the reference of a particular type with the specified name."""
169
170        if name is None:
171            raise DataPortalInputError("Must specify the reference name")
172
173        return self.list_references(ref_type).get_by_name(name)

Return the reference of a particular type with the specified name.

def upload_dataset( self, name: str = None, description='', process: Union[DataPortalProcess, str] = None, upload_folder: str = None, files: List[str] = None, tags: List[str] = None):
175    def upload_dataset(
176        self,
177        name: str = None,
178        description='',
179        process: Union[DataPortalProcess, str] = None,
180        upload_folder: str = None,
181        files: List[str] = None,
182        tags: List[str] = None,
183    ):
184        """
185        Upload a set of files to the Data Portal, creating a new dataset.
186
187        If the files parameter is not provided, it will upload all files in the upload folder
188
189        Args:
190            name (str): Name of newly created dataset
191            description (str): Description of newly created dataset
192            process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object
193            upload_folder (str): Folder containing files to upload
194            files (List[str]): Optional subset of files to upload from the folder
195            tags (List[str]): Optional list of tags to apply to the dataset
196        """
197
198        if name is None:
199            raise DataPortalInputError("Must provide name for new dataset")
200        if process is None:
201            raise DataPortalInputError("Must provide the process which is used for ingest")
202        if upload_folder is None:
203            raise DataPortalInputError("Must provide upload_folder -- folder containing files to upload")
204
205        # Parse the process provided by the user
206        process = parse_process_name_or_id(process, self._client)
207
208        # If no files were provided
209        if files is None:
210            # Get the list of files in the upload folder
211            files = get_files_in_directory(upload_folder)
212
213        if files is None or len(files) == 0:
214            raise RuntimeWarning("No files to upload, exiting")
215
216        # Normalize into Tag object
217        if tags is not None:
218            tags = [Tag(value=value) for value in tags]
219
220        # Make sure that the files match the expected pattern
221        self._client.processes.check_dataset_files(files, process.id, upload_folder)
222
223        # Create the ingest process request
224        dataset_create_request = UploadDatasetRequest(
225            process_id=process.id,
226            name=name,
227            description=description,
228            expected_files=files,
229            tags=tags,
230        )
231
232        # Get the response
233        create_response = self._client.datasets.create(project_id=self.id,
234                                                       upload_request=dataset_create_request)
235
236        # Upload the files
237        self._client.datasets.upload_files(
238            project_id=self.id,
239            dataset_id=create_response.id,
240            directory=upload_folder,
241            files=files
242        )
243
244        # Return the dataset which was created, which might take a second to update
245        max_attempts = 5
246        for attempt in range(max_attempts):
247            try:
248                return self.get_dataset_by_id(create_response.id)
249            except DataPortalAssetNotFound as e:
250                if attempt == max_attempts - 1:
251                    raise e
252                else:
253                    sleep(2)

Upload a set of files to the Data Portal, creating a new dataset.

If the files parameter is not provided, it will upload all files in the upload folder

Arguments:
  • name (str): Name of newly created dataset
  • description (str): Description of newly created dataset
  • process (str | DataPortalProcess): Process to run may be referenced by name, ID, or object
  • upload_folder (str): Folder containing files to upload
  • files (List[str]): Optional subset of files to upload from the folder
  • tags (List[str]): Optional list of tags to apply to the dataset
def samples( self, max_items: int = 10000) -> List[cirro_api_client.v1.models.Sample]:
255    def samples(self, max_items: int = 10000) -> List[Sample]:
256        """
257        Retrieves a list of samples associated with a project along with their metadata
258
259        Args:
260            max_items (int): Maximum number of records to get (default 10,000)
261        """
262        return self._client.metadata.get_project_samples(self.id, max_items)

Retrieves a list of samples associated with a project along with their metadata

Arguments:
  • max_items (int): Maximum number of records to get (default 10,000)
class DataPortalProcess(cirro.sdk.asset.DataPortalAsset):
 13class DataPortalProcess(DataPortalAsset):
 14    """Helper functions for interacting with analysis processes."""
 15
 16    def __init__(self, process: Union[Process, ProcessDetail], client: CirroApi):
 17        """
 18        Instantiate with helper method
 19
 20        ```python
 21        from cirro import DataPortal()
 22        portal = DataPortal()
 23        process = portal.get_process_by_name("Process Name")
 24        ```
 25        """
 26        self._data = process
 27        self._client = client
 28
 29    @property
 30    def id(self) -> str:
 31        """Unique identifier"""
 32        return self._data.id
 33
 34    @property
 35    def name(self) -> str:
 36        """Readable name"""
 37        return self._data.name
 38
 39    @property
 40    def description(self) -> str:
 41        """Longer description of process"""
 42        return self._data.description
 43
 44    @property
 45    def child_process_ids(self) -> List[str]:
 46        """List of processes which can be run on the output of this process"""
 47        return self._data.child_process_ids
 48
 49    @property
 50    def executor(self) -> Executor:
 51        """INGEST, CROMWELL, or NEXTFLOW"""
 52        return self._data.executor
 53
 54    @property
 55    def category(self) -> str:
 56        """Category of process"""
 57        return self._data.category
 58
 59    @property
 60    def pipeline_type(self) -> str:
 61        """Pipeline type"""
 62        return self._data.pipeline_type
 63
 64    @property
 65    def documentation_url(self) -> str:
 66        """Documentation URL"""
 67        return self._data.documentation_url
 68
 69    @property
 70    def file_requirements_message(self) -> str:
 71        """Description of files required for INGEST processes"""
 72        return self._data.file_requirements_message
 73
 74    @property
 75    def code(self) -> PipelineCode:
 76        """Pipeline code configuration"""
 77        return self._get_detail().pipeline_code
 78
 79    @property
 80    def custom_settings(self) -> CustomPipelineSettings:
 81        """Custom settings for the process"""
 82        return self._get_detail().custom_settings
 83
 84    def _get_detail(self) -> ProcessDetail:
 85        if not isinstance(self._data, ProcessDetail):
 86            self._data = self._client.processes.get(self.id)
 87        return self._data
 88
 89    def __str__(self):
 90        return '\n'.join([
 91            f"{i.title()}: {self.__getattribute__(i)}"
 92            for i in ['name', 'id', 'description']
 93        ])
 94
 95    def get_parameter_spec(self) -> ParameterSpecification:
 96        """
 97        Gets a specification used to describe the parameters used in the process.
 98        """
 99        return self._client.processes.get_parameter_spec(self.id)
100
101    def run_analysis(
102            self,
103            name: str = None,
104            project_id: str = None,
105            datasets: list = None,
106            description: str = "",
107            params=None,
108            notifications_emails: List[str] = None,
109            compute_environment: str = None,
110            resume_dataset_id: str = None,
111            source_sample_ids: List[str] = None
112    ) -> str:
113        """
114        Runs this process on one or more input datasets, returns the ID of the newly created dataset.
115
116        Args:
117            name (str): Name of newly created dataset
118            project_id (str): ID of the project to run the analysis in
119            datasets (List[DataPortalDataset or str]): One or more input datasets
120             (as DataPortalDataset objects or dataset ID strings)
121            description (str): Description of newly created dataset
122            params (dict): Analysis parameters
123            notifications_emails (List[str]): Notification email address(es)
124            compute_environment (str): Name or ID of compute environment to use,
125             if blank it will run in AWS
126            resume_dataset_id (str): ID of dataset to resume from, used for caching task execution.
127             It will attempt to re-use the previous output to minimize duplicate work.
128            Note that Nextflow does not require this parameter, as it will automatically resume
129             from any previous attempts using a global cache.
130            source_sample_ids (List[str]): List of sample IDs to use as input for the analysis.
131
132        Returns:
133            dataset_id (str): ID of newly created dataset
134        """
135        if name is None:
136            raise DataPortalInputError("Must specify 'name' for run_analysis")
137        if project_id is None:
138            raise DataPortalInputError("Must specify 'project_id' for run_analysis")
139        if not datasets:
140            raise DataPortalInputError("Must specify 'datasets' for run_analysis")
141        if notifications_emails is None:
142            notifications_emails = []
143        if params is None:
144            params = {}
145
146        # Accept DataPortalDataset objects or raw ID strings
147        source_dataset_ids = [
148            ds if isinstance(ds, str) else ds.id
149            for ds in datasets
150        ]
151
152        if compute_environment:
153            compute_environment_name = compute_environment
154            compute_environments = self._client.compute_environments.list_environments_for_project(
155                project_id=project_id
156            )
157            compute_environment = next(
158                (env for env in compute_environments
159                 if env.name == compute_environment or env.id == compute_environment),
160                None
161            )
162            if compute_environment is None:
163                raise DataPortalInputError(f"Compute environment '{compute_environment_name}' not found")
164
165        resp = self._client.execution.run_analysis(
166            project_id=project_id,
167            request=RunAnalysisRequest(
168                name=name,
169                description=description,
170                process_id=self.id,
171                source_dataset_ids=source_dataset_ids,
172                params=RunAnalysisRequestParams.from_dict(params),
173                notification_emails=notifications_emails,
174                resume_dataset_id=resume_dataset_id,
175                source_sample_ids=source_sample_ids,
176                compute_environment_id=compute_environment.id if compute_environment else None
177            )
178        )
179        return resp.id

Helper functions for interacting with analysis processes.

DataPortalProcess( process: Union[cirro_api_client.v1.models.Process, cirro_api_client.v1.models.ProcessDetail], client: CirroApi)
16    def __init__(self, process: Union[Process, ProcessDetail], client: CirroApi):
17        """
18        Instantiate with helper method
19
20        ```python
21        from cirro import DataPortal()
22        portal = DataPortal()
23        process = portal.get_process_by_name("Process Name")
24        ```
25        """
26        self._data = process
27        self._client = client

Instantiate with helper method

from cirro import DataPortal()
portal = DataPortal()
process = portal.get_process_by_name("Process Name")
id: str
29    @property
30    def id(self) -> str:
31        """Unique identifier"""
32        return self._data.id

Unique identifier

name: str
34    @property
35    def name(self) -> str:
36        """Readable name"""
37        return self._data.name

Readable name

description: str
39    @property
40    def description(self) -> str:
41        """Longer description of process"""
42        return self._data.description

Longer description of process

child_process_ids: List[str]
44    @property
45    def child_process_ids(self) -> List[str]:
46        """List of processes which can be run on the output of this process"""
47        return self._data.child_process_ids

List of processes which can be run on the output of this process

executor: cirro_api_client.v1.models.Executor
49    @property
50    def executor(self) -> Executor:
51        """INGEST, CROMWELL, or NEXTFLOW"""
52        return self._data.executor

INGEST, CROMWELL, or NEXTFLOW

category: str
54    @property
55    def category(self) -> str:
56        """Category of process"""
57        return self._data.category

Category of process

pipeline_type: str
59    @property
60    def pipeline_type(self) -> str:
61        """Pipeline type"""
62        return self._data.pipeline_type

Pipeline type

documentation_url: str
64    @property
65    def documentation_url(self) -> str:
66        """Documentation URL"""
67        return self._data.documentation_url

Documentation URL

file_requirements_message: str
69    @property
70    def file_requirements_message(self) -> str:
71        """Description of files required for INGEST processes"""
72        return self._data.file_requirements_message

Description of files required for INGEST processes

74    @property
75    def code(self) -> PipelineCode:
76        """Pipeline code configuration"""
77        return self._get_detail().pipeline_code

Pipeline code configuration

79    @property
80    def custom_settings(self) -> CustomPipelineSettings:
81        """Custom settings for the process"""
82        return self._get_detail().custom_settings

Custom settings for the process

def get_parameter_spec(self) -> cirro.models.form_specification.ParameterSpecification:
95    def get_parameter_spec(self) -> ParameterSpecification:
96        """
97        Gets a specification used to describe the parameters used in the process.
98        """
99        return self._client.processes.get_parameter_spec(self.id)

Gets a specification used to describe the parameters used in the process.

def run_analysis( self, name: str = None, project_id: str = None, datasets: list = None, description: str = '', params=None, notifications_emails: List[str] = None, compute_environment: str = None, resume_dataset_id: str = None, source_sample_ids: List[str] = None) -> str:
101    def run_analysis(
102            self,
103            name: str = None,
104            project_id: str = None,
105            datasets: list = None,
106            description: str = "",
107            params=None,
108            notifications_emails: List[str] = None,
109            compute_environment: str = None,
110            resume_dataset_id: str = None,
111            source_sample_ids: List[str] = None
112    ) -> str:
113        """
114        Runs this process on one or more input datasets, returns the ID of the newly created dataset.
115
116        Args:
117            name (str): Name of newly created dataset
118            project_id (str): ID of the project to run the analysis in
119            datasets (List[DataPortalDataset or str]): One or more input datasets
120             (as DataPortalDataset objects or dataset ID strings)
121            description (str): Description of newly created dataset
122            params (dict): Analysis parameters
123            notifications_emails (List[str]): Notification email address(es)
124            compute_environment (str): Name or ID of compute environment to use,
125             if blank it will run in AWS
126            resume_dataset_id (str): ID of dataset to resume from, used for caching task execution.
127             It will attempt to re-use the previous output to minimize duplicate work.
128            Note that Nextflow does not require this parameter, as it will automatically resume
129             from any previous attempts using a global cache.
130            source_sample_ids (List[str]): List of sample IDs to use as input for the analysis.
131
132        Returns:
133            dataset_id (str): ID of newly created dataset
134        """
135        if name is None:
136            raise DataPortalInputError("Must specify 'name' for run_analysis")
137        if project_id is None:
138            raise DataPortalInputError("Must specify 'project_id' for run_analysis")
139        if not datasets:
140            raise DataPortalInputError("Must specify 'datasets' for run_analysis")
141        if notifications_emails is None:
142            notifications_emails = []
143        if params is None:
144            params = {}
145
146        # Accept DataPortalDataset objects or raw ID strings
147        source_dataset_ids = [
148            ds if isinstance(ds, str) else ds.id
149            for ds in datasets
150        ]
151
152        if compute_environment:
153            compute_environment_name = compute_environment
154            compute_environments = self._client.compute_environments.list_environments_for_project(
155                project_id=project_id
156            )
157            compute_environment = next(
158                (env for env in compute_environments
159                 if env.name == compute_environment or env.id == compute_environment),
160                None
161            )
162            if compute_environment is None:
163                raise DataPortalInputError(f"Compute environment '{compute_environment_name}' not found")
164
165        resp = self._client.execution.run_analysis(
166            project_id=project_id,
167            request=RunAnalysisRequest(
168                name=name,
169                description=description,
170                process_id=self.id,
171                source_dataset_ids=source_dataset_ids,
172                params=RunAnalysisRequestParams.from_dict(params),
173                notification_emails=notifications_emails,
174                resume_dataset_id=resume_dataset_id,
175                source_sample_ids=source_sample_ids,
176                compute_environment_id=compute_environment.id if compute_environment else None
177            )
178        )
179        return resp.id

Runs this process on one or more input datasets, returns the ID of the newly created dataset.

Arguments:
  • name (str): Name of newly created dataset
  • project_id (str): ID of the project to run the analysis in
  • datasets (List[DataPortalDataset or str]): One or more input datasets (as DataPortalDataset objects or dataset ID strings)
  • description (str): Description of newly created dataset
  • params (dict): Analysis parameters
  • notifications_emails (List[str]): Notification email address(es)
  • compute_environment (str): Name or ID of compute environment to use, if blank it will run in AWS
  • resume_dataset_id (str): ID of dataset to resume from, used for caching task execution. It will attempt to re-use the previous output to minimize duplicate work.
  • Note that Nextflow does not require this parameter, as it will automatically resume from any previous attempts using a global cache.
  • source_sample_ids (List[str]): List of sample IDs to use as input for the analysis.
Returns:

dataset_id (str): ID of newly created dataset

class DataPortalDataset(cirro.sdk.asset.DataPortalAsset):
110class DataPortalDataset(DataPortalAsset):
111    """
112    Datasets in the Data Portal are collections of files which have
113    either been uploaded directly, or which have been output by
114    an analysis pipeline or notebook.
115    """
116
117    def __init__(self, dataset: Union[Dataset, DatasetDetail], client: CirroApi):
118        """
119        Instantiate a dataset object
120
121        Should be invoked from a top-level constructor, for example:
122
123        ```python
124        from cirro import DataPortal
125        portal = DataPortal()
126        dataset = portal.get_dataset(
127            project="id-or-name-of-project",
128            dataset="id-or-name-of-dataset"
129        )
130        ```
131
132        """
133        assert dataset.project_id is not None, "Must provide dataset with project_id attribute"
134        self._data = dataset
135        self._assets: Optional[DatasetAssets] = None
136        self._client = client
137
138    @property
139    def id(self) -> str:
140        """Unique identifier for the dataset"""
141        return self._data.id
142
143    @property
144    def name(self) -> str:
145        """Editable name for the dataset"""
146        return self._data.name
147
148    @property
149    def description(self) -> str:
150        """Longer name for the dataset"""
151        return self._data.description
152
153    @property
154    def process_id(self) -> str:
155        """Unique ID of process used to create the dataset"""
156        return self._data.process_id
157
158    @property
159    def process(self) -> ProcessDetail:
160        """
161        Object representing the process used to create the dataset
162        """
163        return self._client.processes.get(self.process_id)
164
165    @property
166    def project_id(self) -> str:
167        """ID of the project containing the dataset"""
168        return self._data.project_id
169
170    @property
171    def status(self) -> Status:
172        """
173        Status of the dataset
174        """
175        return self._data.status
176
177    @property
178    def source_dataset_ids(self) -> List[str]:
179        """IDs of the datasets used as sources for this dataset (if any)"""
180        return self._data.source_dataset_ids
181
182    @property
183    def source_datasets(self) -> List['DataPortalDataset']:
184        """
185        Objects representing the datasets used as sources for this dataset (if any)
186        """
187        return [
188            DataPortalDataset(
189                dataset=self._client.datasets.get(project_id=self.project_id, dataset_id=dataset_id),
190                client=self._client
191            )
192            for dataset_id in self.source_dataset_ids
193        ]
194
195    @property
196    def params(self) -> dict:
197        """
198        Parameters used to generate the dataset
199        """
200        return self._get_detail().params.to_dict()
201
202    @property
203    def info(self) -> dict:
204        """
205        Extra information about the dataset
206        """
207        return self._get_detail().info.to_dict()
208
209    @property
210    def tags(self) -> List[Tag]:
211        """
212        Tags applied to the dataset
213        """
214        return self._data.tags
215
216    @property
217    def share(self) -> Optional[NamedItem]:
218        """
219        Share associated with the dataset, if any.
220        """
221        return self._get_detail().share
222
223    @property
224    def created_by(self) -> str:
225        """User who created the dataset"""
226        return self._data.created_by
227
228    @property
229    def created_at(self) -> datetime.datetime:
230        """Timestamp of dataset creation"""
231        return self._data.created_at
232
233    def _get_detail(self):
234        if not isinstance(self._data, DatasetDetail):
235            self._data = self._client.datasets.get(project_id=self.project_id, dataset_id=self.id)
236        return self._data
237
238    def _get_assets(self):
239        if not self._assets:
240            self._assets = self._client.datasets.get_assets_listing(
241                project_id=self.project_id,
242                dataset_id=self.id
243            )
244        return self._assets
245
246    def __str__(self):
247        return '\n'.join([
248            f"{i.title()}: {self.__getattribute__(i)}"
249            for i in ['name', 'id', 'description', 'status']
250        ])
251
252    def get_file(self, relative_path: str) -> DataPortalFile:
253        """
254        Get a file from the dataset using its relative path.
255
256        Args:
257            relative_path (str): Relative path of file within the dataset
258
259        Returns:
260            `from cirro.sdk.file import DataPortalFile`
261        """
262
263        # Get the list of files in this dataset
264        files = self.list_files()
265
266        # Try getting the file using the relative path provided by the user
267        try:
268            return files.get_by_id(relative_path)
269        except DataPortalAssetNotFound:
270            # Try getting the file with the 'data/' prefix prepended
271            try:
272                return files.get_by_id("data/" + relative_path)
273            except DataPortalAssetNotFound:
274                # If not found, raise the exception using the string provided
275                # by the user, not the data/ prepended version (which may be
276                # confusing to the user)
277                msg = '\n'.join([f"No file found with path '{relative_path}'."])
278                raise DataPortalAssetNotFound(msg)
279
280    def list_files(self, file_limit: int = 100000) -> DataPortalFiles:
281        """
282        Return the list of files which make up the dataset.
283
284        Args:
285            file_limit (int): Maximum number of files to return (default 100,000)
286        """
287        assets = self._client.datasets.get_assets_listing(
288            project_id=self.project_id,
289            dataset_id=self.id,
290            file_limit=file_limit
291        )
292        files = assets.files
293
294        return DataPortalFiles(
295            [
296                DataPortalFile(file=file, client=self._client)
297                for file in files
298            ]
299        )
300
301    def read_files(
302            self,
303            glob: str = None,
304            pattern: str = None,
305            filetype: str = None,
306            **kwargs
307    ):
308        """
309        Read the contents of files in the dataset.
310
311        See :meth:`~cirro.sdk.portal.DataPortal.read_files` for full details
312        on ``glob``/``pattern`` matching and filetype options.
313
314        Args:
315            glob (str): Wildcard expression to match files.
316                Yields one item per matching file: the parsed content.
317            pattern (str): Wildcard expression with ``{name}`` capture
318                placeholders. Yields ``(content, meta)`` per matching file.
319            filetype (str): File format used to parse each file
320                (or ``None`` to infer from extension).
321            **kwargs: Additional keyword arguments forwarded to the
322                file-parsing function.
323
324        Yields:
325            - When using ``glob``: *content* for each matching file
326            - When using ``pattern``: ``(content, meta)`` for each matching file
327        """
328        if glob is not None and pattern is not None:
329            raise DataPortalInputError("Cannot specify both 'glob' and 'pattern' — use one or the other")
330        if glob is None and pattern is None:
331            raise DataPortalInputError("Must specify either 'glob' or 'pattern'")
332
333        if glob is not None:
334            for file in filter_files_by_pattern(list(self.list_files()), glob):
335                yield _read_file_with_format(file, filetype, **kwargs)
336        else:
337            compiled_regex, _ = _pattern_to_captures_regex(pattern)
338            for file in self.list_files():
339                m = compiled_regex.match(file.relative_path)
340                if m is not None:
341                    yield _read_file_with_format(file, filetype, **kwargs), m.groupdict()
342
343    def read_file(
344            self,
345            path: str = None,
346            glob: str = None,
347            filetype: str = None,
348            **kwargs
349    ) -> Any:
350        """
351        Read the contents of a single file from the dataset.
352
353        See :meth:`~cirro.sdk.portal.DataPortal.read_file` for full details.
354
355        Args:
356            path (str): Exact relative path of the file within the dataset.
357            glob (str): Wildcard expression matching exactly one file.
358            filetype (str): File format used to parse the file. Supported values
359                are the same as :meth:`~cirro.sdk.portal.DataPortal.read_files`.
360            **kwargs: Additional keyword arguments forwarded to the file-parsing
361                function.
362
363        Returns:
364            Parsed file content.
365        """
366        if path is not None and glob is not None:
367            raise DataPortalInputError("Cannot specify both 'path' and 'glob' — use one or the other")
368        if path is None and glob is None:
369            raise DataPortalInputError("Must specify either 'path' or 'glob'")
370
371        if path is not None:
372            file = self.get_file(path)
373        else:
374            matches = list(filter_files_by_pattern(list(self.list_files()), glob))
375            if len(matches) == 0:
376                raise DataPortalAssetNotFound(f"No files matched glob '{glob}'")
377            if len(matches) > 1:
378                raise DataPortalInputError(
379                    f"glob '{glob}' matched {len(matches)} files — use read_files() to read multiple files"
380                )
381            file = matches[0]
382
383        return _read_file_with_format(file, filetype, **kwargs)
384
385    def get_trace(self) -> Any:
386        """
387        Read the Nextflow workflow trace file for this dataset as a DataFrame.
388
389        Returns:
390            `pandas.DataFrame`
391        """
392        return self.get_artifact(ArtifactType.WORKFLOW_TRACE).read_csv(sep='\t')
393
394    def get_logs(self) -> str:
395        """
396        Read the Nextflow workflow logs for this dataset as a string.
397
398        Returns:
399            str
400        """
401        return self.get_artifact(ArtifactType.WORKFLOW_LOGS).read()
402
403    def get_artifact(self, artifact_type: ArtifactType) -> DataPortalFile:
404        """
405        Get the artifact of a particular type from the dataset
406        """
407        artifacts = self._get_assets().artifacts
408        artifact = next((a for a in artifacts if a.artifact_type == artifact_type), None)
409        if artifact is None:
410            raise DataPortalAssetNotFound(f"No artifact found with type '{artifact_type}'")
411        return DataPortalFile(file=artifact.file, client=self._client)
412
413    def list_artifacts(self) -> List[DataPortalFile]:
414        """
415        Return the list of artifacts associated with the dataset
416
417        An artifact may be something generated as part of the analysis or other process.
418        See `cirro_api_client.v1.models.ArtifactType` for the list of possible artifact types.
419
420        """
421        artifacts = self._get_assets().artifacts
422        return DataPortalFiles(
423            [
424                DataPortalFile(file=artifact.file, client=self._client)
425                for artifact in artifacts
426            ]
427        )
428
429    def download_files(self, download_location: str = None, glob: str = None) -> None:
430        """
431        Download all the files from the dataset to a local directory.
432
433        Args:
434            download_location (str): Path to local directory
435            glob (str): Optional wildcard expression to filter which files are downloaded
436                (e.g., ``'*.csv'``, ``'data/**/*.tsv.gz'``).
437                If omitted, all files are downloaded.
438        """
439
440        files = self.list_files()
441        if glob is not None:
442            files = DataPortalFiles(filter_files_by_pattern(list(files), glob))
443        files.download(download_location)
444
445    def run_analysis(
446            self,
447            name: str = None,
448            description: str = "",
449            process: Union[DataPortalProcess, str] = None,
450            params=None,
451            notifications_emails: List[str] = None,
452            compute_environment: str = None,
453            resume_dataset_id: str = None,
454            source_sample_ids: List[str] = None
455    ) -> str:
456        """
457        Runs an analysis on a dataset, returns the ID of the newly created dataset.
458
459        The process can be provided as either a DataPortalProcess object,
460        or a string which corresponds to the name or ID of the process.
461
462        Args:
463            name (str): Name of newly created dataset
464            description (str): Description of newly created dataset
465            process (DataPortalProcess or str): Process to run
466            params (dict): Analysis parameters
467            notifications_emails (List[str]): Notification email address(es)
468            compute_environment (str): Name or ID of compute environment to use,
469             if blank it will run in AWS
470            resume_dataset_id (str): ID of dataset to resume from, used for caching task execution.
471             It will attempt to re-use the previous output to minimize duplicate work
472            source_sample_ids (List[str]): List of sample IDs to use as input for the analysis.
473
474        Returns:
475            dataset_id (str): ID of newly created dataset
476        """
477        if name is None:
478            raise DataPortalInputError("Must specify 'name' for run_analysis")
479        if process is None:
480            raise DataPortalInputError("Must specify 'process' for run_analysis")
481        if notifications_emails is None:
482            notifications_emails = []
483        if params is None:
484            params = {}
485
486        # If the process is a string, try to parse it as a process name or ID
487        process = parse_process_name_or_id(process, self._client)
488
489        if compute_environment:
490            compute_environment_name = compute_environment
491            compute_environments = self._client.compute_environments.list_environments_for_project(
492                project_id=self.project_id
493            )
494            compute_environment = next(
495                (env for env in compute_environments
496                 if env.name == compute_environment or env.id == compute_environment),
497                None
498            )
499            if compute_environment is None:
500                raise DataPortalInputError(f"Compute environment '{compute_environment_name}' not found")
501
502        resp = self._client.execution.run_analysis(
503            project_id=self.project_id,
504            request=RunAnalysisRequest(
505                name=name,
506                description=description,
507                process_id=process.id,
508                source_dataset_ids=[self.id],
509                params=RunAnalysisRequestParams.from_dict(params),
510                notification_emails=notifications_emails,
511                resume_dataset_id=resume_dataset_id,
512                source_sample_ids=source_sample_ids,
513                compute_environment_id=compute_environment.id if compute_environment else None
514            )
515        )
516        return resp.id
517
518    def update_samplesheet(self,
519                           contents: str = None,
520                           file_path: PathLike = None):
521        """
522        Updates the samplesheet metadata of a dataset.
523        Provide either the contents (as a string) or a file path.
524        Both must be in the format of a CSV.
525
526        Args:
527            contents (str): Samplesheet contents to update (should be a CSV string)
528            file_path (PathLike): Path of file to update (should be a CSV file)
529
530        Example:
531        ```python
532        dataset.update_samplesheet(
533            file_path=Path('~/samplesheet.csv')
534        )
535        ```
536        """
537
538        if contents is None and file_path is None:
539            raise DataPortalInputError("Must specify either 'contents' or 'file_path' when updating samplesheet")
540
541        samplesheet_contents = contents
542        if file_path is not None:
543            samplesheet_contents = Path(file_path).expanduser().read_text()
544
545        # Validate samplesheet
546        file_names = [f.file_name for f in self.list_files()]
547        request = ValidateFileRequirementsRequest(
548            file_names=file_names,
549            sample_sheet=samplesheet_contents,
550        )
551        requirements = validate_file_requirements.sync(process_id=self.process_id,
552                                                       body=request,
553                                                       client=self._client.api_client)
554        if error_msg := requirements.error_msg:
555            raise DataPortalInputError(error_msg)
556
557        # Update the samplesheet if everything looks ok
558        self._client.datasets.update_samplesheet(
559            project_id=self.project_id,
560            dataset_id=self.id,
561            samplesheet=samplesheet_contents
562        )

Datasets in the Data Portal are collections of files which have either been uploaded directly, or which have been output by an analysis pipeline or notebook.

DataPortalDataset( dataset: Union[cirro_api_client.v1.models.Dataset, cirro_api_client.v1.models.DatasetDetail], client: CirroApi)
117    def __init__(self, dataset: Union[Dataset, DatasetDetail], client: CirroApi):
118        """
119        Instantiate a dataset object
120
121        Should be invoked from a top-level constructor, for example:
122
123        ```python
124        from cirro import DataPortal
125        portal = DataPortal()
126        dataset = portal.get_dataset(
127            project="id-or-name-of-project",
128            dataset="id-or-name-of-dataset"
129        )
130        ```
131
132        """
133        assert dataset.project_id is not None, "Must provide dataset with project_id attribute"
134        self._data = dataset
135        self._assets: Optional[DatasetAssets] = None
136        self._client = client

Instantiate a dataset object

Should be invoked from a top-level constructor, for example:

from cirro import DataPortal
portal = DataPortal()
dataset = portal.get_dataset(
    project="id-or-name-of-project",
    dataset="id-or-name-of-dataset"
)
id: str
138    @property
139    def id(self) -> str:
140        """Unique identifier for the dataset"""
141        return self._data.id

Unique identifier for the dataset

name: str
143    @property
144    def name(self) -> str:
145        """Editable name for the dataset"""
146        return self._data.name

Editable name for the dataset

description: str
148    @property
149    def description(self) -> str:
150        """Longer name for the dataset"""
151        return self._data.description

Longer name for the dataset

process_id: str
153    @property
154    def process_id(self) -> str:
155        """Unique ID of process used to create the dataset"""
156        return self._data.process_id

Unique ID of process used to create the dataset

158    @property
159    def process(self) -> ProcessDetail:
160        """
161        Object representing the process used to create the dataset
162        """
163        return self._client.processes.get(self.process_id)

Object representing the process used to create the dataset

project_id: str
165    @property
166    def project_id(self) -> str:
167        """ID of the project containing the dataset"""
168        return self._data.project_id

ID of the project containing the dataset

status: cirro_api_client.v1.models.Status
170    @property
171    def status(self) -> Status:
172        """
173        Status of the dataset
174        """
175        return self._data.status

Status of the dataset

source_dataset_ids: List[str]
177    @property
178    def source_dataset_ids(self) -> List[str]:
179        """IDs of the datasets used as sources for this dataset (if any)"""
180        return self._data.source_dataset_ids

IDs of the datasets used as sources for this dataset (if any)

source_datasets: List[DataPortalDataset]
182    @property
183    def source_datasets(self) -> List['DataPortalDataset']:
184        """
185        Objects representing the datasets used as sources for this dataset (if any)
186        """
187        return [
188            DataPortalDataset(
189                dataset=self._client.datasets.get(project_id=self.project_id, dataset_id=dataset_id),
190                client=self._client
191            )
192            for dataset_id in self.source_dataset_ids
193        ]

Objects representing the datasets used as sources for this dataset (if any)

params: dict
195    @property
196    def params(self) -> dict:
197        """
198        Parameters used to generate the dataset
199        """
200        return self._get_detail().params.to_dict()

Parameters used to generate the dataset

info: dict
202    @property
203    def info(self) -> dict:
204        """
205        Extra information about the dataset
206        """
207        return self._get_detail().info.to_dict()

Extra information about the dataset

tags: List[cirro_api_client.v1.models.Tag]
209    @property
210    def tags(self) -> List[Tag]:
211        """
212        Tags applied to the dataset
213        """
214        return self._data.tags

Tags applied to the dataset

share: Optional[cirro_api_client.v1.models.NamedItem]
216    @property
217    def share(self) -> Optional[NamedItem]:
218        """
219        Share associated with the dataset, if any.
220        """
221        return self._get_detail().share

Share associated with the dataset, if any.

created_by: str
223    @property
224    def created_by(self) -> str:
225        """User who created the dataset"""
226        return self._data.created_by

User who created the dataset

created_at: datetime.datetime
228    @property
229    def created_at(self) -> datetime.datetime:
230        """Timestamp of dataset creation"""
231        return self._data.created_at

Timestamp of dataset creation

def get_file(self, relative_path: str) -> cirro.sdk.file.DataPortalFile:
252    def get_file(self, relative_path: str) -> DataPortalFile:
253        """
254        Get a file from the dataset using its relative path.
255
256        Args:
257            relative_path (str): Relative path of file within the dataset
258
259        Returns:
260            `from cirro.sdk.file import DataPortalFile`
261        """
262
263        # Get the list of files in this dataset
264        files = self.list_files()
265
266        # Try getting the file using the relative path provided by the user
267        try:
268            return files.get_by_id(relative_path)
269        except DataPortalAssetNotFound:
270            # Try getting the file with the 'data/' prefix prepended
271            try:
272                return files.get_by_id("data/" + relative_path)
273            except DataPortalAssetNotFound:
274                # If not found, raise the exception using the string provided
275                # by the user, not the data/ prepended version (which may be
276                # confusing to the user)
277                msg = '\n'.join([f"No file found with path '{relative_path}'."])
278                raise DataPortalAssetNotFound(msg)

Get a file from the dataset using its relative path.

Arguments:
  • relative_path (str): Relative path of file within the dataset
Returns:

from cirro.sdk.file import DataPortalFile

def list_files(self, file_limit: int = 100000) -> cirro.sdk.file.DataPortalFiles:
280    def list_files(self, file_limit: int = 100000) -> DataPortalFiles:
281        """
282        Return the list of files which make up the dataset.
283
284        Args:
285            file_limit (int): Maximum number of files to return (default 100,000)
286        """
287        assets = self._client.datasets.get_assets_listing(
288            project_id=self.project_id,
289            dataset_id=self.id,
290            file_limit=file_limit
291        )
292        files = assets.files
293
294        return DataPortalFiles(
295            [
296                DataPortalFile(file=file, client=self._client)
297                for file in files
298            ]
299        )

Return the list of files which make up the dataset.

Arguments:
  • file_limit (int): Maximum number of files to return (default 100,000)
def read_files( self, glob: str = None, pattern: str = None, filetype: str = None, **kwargs):
301    def read_files(
302            self,
303            glob: str = None,
304            pattern: str = None,
305            filetype: str = None,
306            **kwargs
307    ):
308        """
309        Read the contents of files in the dataset.
310
311        See :meth:`~cirro.sdk.portal.DataPortal.read_files` for full details
312        on ``glob``/``pattern`` matching and filetype options.
313
314        Args:
315            glob (str): Wildcard expression to match files.
316                Yields one item per matching file: the parsed content.
317            pattern (str): Wildcard expression with ``{name}`` capture
318                placeholders. Yields ``(content, meta)`` per matching file.
319            filetype (str): File format used to parse each file
320                (or ``None`` to infer from extension).
321            **kwargs: Additional keyword arguments forwarded to the
322                file-parsing function.
323
324        Yields:
325            - When using ``glob``: *content* for each matching file
326            - When using ``pattern``: ``(content, meta)`` for each matching file
327        """
328        if glob is not None and pattern is not None:
329            raise DataPortalInputError("Cannot specify both 'glob' and 'pattern' — use one or the other")
330        if glob is None and pattern is None:
331            raise DataPortalInputError("Must specify either 'glob' or 'pattern'")
332
333        if glob is not None:
334            for file in filter_files_by_pattern(list(self.list_files()), glob):
335                yield _read_file_with_format(file, filetype, **kwargs)
336        else:
337            compiled_regex, _ = _pattern_to_captures_regex(pattern)
338            for file in self.list_files():
339                m = compiled_regex.match(file.relative_path)
340                if m is not None:
341                    yield _read_file_with_format(file, filetype, **kwargs), m.groupdict()

Read the contents of files in the dataset.

See ~cirro.sdk.portal.DataPortal.read_files() for full details on glob/pattern matching and filetype options.

Arguments:
  • glob (str): Wildcard expression to match files. Yields one item per matching file: the parsed content.
  • pattern (str): Wildcard expression with {name} capture placeholders. Yields (content, meta) per matching file.
  • filetype (str): File format used to parse each file (or None to infer from extension).
  • **kwargs: Additional keyword arguments forwarded to the file-parsing function.
Yields:
  • When using glob: content for each matching file
  • When using pattern: (content, meta) for each matching file
def read_file( self, path: str = None, glob: str = None, filetype: str = None, **kwargs) -> Any:
343    def read_file(
344            self,
345            path: str = None,
346            glob: str = None,
347            filetype: str = None,
348            **kwargs
349    ) -> Any:
350        """
351        Read the contents of a single file from the dataset.
352
353        See :meth:`~cirro.sdk.portal.DataPortal.read_file` for full details.
354
355        Args:
356            path (str): Exact relative path of the file within the dataset.
357            glob (str): Wildcard expression matching exactly one file.
358            filetype (str): File format used to parse the file. Supported values
359                are the same as :meth:`~cirro.sdk.portal.DataPortal.read_files`.
360            **kwargs: Additional keyword arguments forwarded to the file-parsing
361                function.
362
363        Returns:
364            Parsed file content.
365        """
366        if path is not None and glob is not None:
367            raise DataPortalInputError("Cannot specify both 'path' and 'glob' — use one or the other")
368        if path is None and glob is None:
369            raise DataPortalInputError("Must specify either 'path' or 'glob'")
370
371        if path is not None:
372            file = self.get_file(path)
373        else:
374            matches = list(filter_files_by_pattern(list(self.list_files()), glob))
375            if len(matches) == 0:
376                raise DataPortalAssetNotFound(f"No files matched glob '{glob}'")
377            if len(matches) > 1:
378                raise DataPortalInputError(
379                    f"glob '{glob}' matched {len(matches)} files — use read_files() to read multiple files"
380                )
381            file = matches[0]
382
383        return _read_file_with_format(file, filetype, **kwargs)

Read the contents of a single file from the dataset.

See ~cirro.sdk.portal.DataPortal.read_file() for full details.

Arguments:
  • path (str): Exact relative path of the file within the dataset.
  • glob (str): Wildcard expression matching exactly one file.
  • filetype (str): File format used to parse the file. Supported values are the same as ~cirro.sdk.portal.DataPortal.read_files().
  • **kwargs: Additional keyword arguments forwarded to the file-parsing function.
Returns:

Parsed file content.

def get_trace(self) -> Any:
385    def get_trace(self) -> Any:
386        """
387        Read the Nextflow workflow trace file for this dataset as a DataFrame.
388
389        Returns:
390            `pandas.DataFrame`
391        """
392        return self.get_artifact(ArtifactType.WORKFLOW_TRACE).read_csv(sep='\t')

Read the Nextflow workflow trace file for this dataset as a DataFrame.

Returns:

pandas.DataFrame

def get_logs(self) -> str:
394    def get_logs(self) -> str:
395        """
396        Read the Nextflow workflow logs for this dataset as a string.
397
398        Returns:
399            str
400        """
401        return self.get_artifact(ArtifactType.WORKFLOW_LOGS).read()

Read the Nextflow workflow logs for this dataset as a string.

Returns:

str

def get_artifact( self, artifact_type: cirro_api_client.v1.models.ArtifactType) -> cirro.sdk.file.DataPortalFile:
403    def get_artifact(self, artifact_type: ArtifactType) -> DataPortalFile:
404        """
405        Get the artifact of a particular type from the dataset
406        """
407        artifacts = self._get_assets().artifacts
408        artifact = next((a for a in artifacts if a.artifact_type == artifact_type), None)
409        if artifact is None:
410            raise DataPortalAssetNotFound(f"No artifact found with type '{artifact_type}'")
411        return DataPortalFile(file=artifact.file, client=self._client)

Get the artifact of a particular type from the dataset

def list_artifacts(self) -> List[cirro.sdk.file.DataPortalFile]:
413    def list_artifacts(self) -> List[DataPortalFile]:
414        """
415        Return the list of artifacts associated with the dataset
416
417        An artifact may be something generated as part of the analysis or other process.
418        See `cirro_api_client.v1.models.ArtifactType` for the list of possible artifact types.
419
420        """
421        artifacts = self._get_assets().artifacts
422        return DataPortalFiles(
423            [
424                DataPortalFile(file=artifact.file, client=self._client)
425                for artifact in artifacts
426            ]
427        )

Return the list of artifacts associated with the dataset

An artifact may be something generated as part of the analysis or other process. See cirro_api_client.v1.models.ArtifactType for the list of possible artifact types.

def download_files(self, download_location: str = None, glob: str = None) -> None:
429    def download_files(self, download_location: str = None, glob: str = None) -> None:
430        """
431        Download all the files from the dataset to a local directory.
432
433        Args:
434            download_location (str): Path to local directory
435            glob (str): Optional wildcard expression to filter which files are downloaded
436                (e.g., ``'*.csv'``, ``'data/**/*.tsv.gz'``).
437                If omitted, all files are downloaded.
438        """
439
440        files = self.list_files()
441        if glob is not None:
442            files = DataPortalFiles(filter_files_by_pattern(list(files), glob))
443        files.download(download_location)

Download all the files from the dataset to a local directory.

Arguments:
  • download_location (str): Path to local directory
  • glob (str): Optional wildcard expression to filter which files are downloaded (e.g., '*.csv', 'data/**/*.tsv.gz'). If omitted, all files are downloaded.
def run_analysis( self, name: str = None, description: str = '', process: Union[DataPortalProcess, str] = None, params=None, notifications_emails: List[str] = None, compute_environment: str = None, resume_dataset_id: str = None, source_sample_ids: List[str] = None) -> str:
445    def run_analysis(
446            self,
447            name: str = None,
448            description: str = "",
449            process: Union[DataPortalProcess, str] = None,
450            params=None,
451            notifications_emails: List[str] = None,
452            compute_environment: str = None,
453            resume_dataset_id: str = None,
454            source_sample_ids: List[str] = None
455    ) -> str:
456        """
457        Runs an analysis on a dataset, returns the ID of the newly created dataset.
458
459        The process can be provided as either a DataPortalProcess object,
460        or a string which corresponds to the name or ID of the process.
461
462        Args:
463            name (str): Name of newly created dataset
464            description (str): Description of newly created dataset
465            process (DataPortalProcess or str): Process to run
466            params (dict): Analysis parameters
467            notifications_emails (List[str]): Notification email address(es)
468            compute_environment (str): Name or ID of compute environment to use,
469             if blank it will run in AWS
470            resume_dataset_id (str): ID of dataset to resume from, used for caching task execution.
471             It will attempt to re-use the previous output to minimize duplicate work
472            source_sample_ids (List[str]): List of sample IDs to use as input for the analysis.
473
474        Returns:
475            dataset_id (str): ID of newly created dataset
476        """
477        if name is None:
478            raise DataPortalInputError("Must specify 'name' for run_analysis")
479        if process is None:
480            raise DataPortalInputError("Must specify 'process' for run_analysis")
481        if notifications_emails is None:
482            notifications_emails = []
483        if params is None:
484            params = {}
485
486        # If the process is a string, try to parse it as a process name or ID
487        process = parse_process_name_or_id(process, self._client)
488
489        if compute_environment:
490            compute_environment_name = compute_environment
491            compute_environments = self._client.compute_environments.list_environments_for_project(
492                project_id=self.project_id
493            )
494            compute_environment = next(
495                (env for env in compute_environments
496                 if env.name == compute_environment or env.id == compute_environment),
497                None
498            )
499            if compute_environment is None:
500                raise DataPortalInputError(f"Compute environment '{compute_environment_name}' not found")
501
502        resp = self._client.execution.run_analysis(
503            project_id=self.project_id,
504            request=RunAnalysisRequest(
505                name=name,
506                description=description,
507                process_id=process.id,
508                source_dataset_ids=[self.id],
509                params=RunAnalysisRequestParams.from_dict(params),
510                notification_emails=notifications_emails,
511                resume_dataset_id=resume_dataset_id,
512                source_sample_ids=source_sample_ids,
513                compute_environment_id=compute_environment.id if compute_environment else None
514            )
515        )
516        return resp.id

Runs an analysis on a dataset, returns the ID of the newly created dataset.

The process can be provided as either a DataPortalProcess object, or a string which corresponds to the name or ID of the process.

Arguments:
  • name (str): Name of newly created dataset
  • description (str): Description of newly created dataset
  • process (DataPortalProcess or str): Process to run
  • params (dict): Analysis parameters
  • notifications_emails (List[str]): Notification email address(es)
  • compute_environment (str): Name or ID of compute environment to use, if blank it will run in AWS
  • resume_dataset_id (str): ID of dataset to resume from, used for caching task execution. It will attempt to re-use the previous output to minimize duplicate work
  • source_sample_ids (List[str]): List of sample IDs to use as input for the analysis.
Returns:

dataset_id (str): ID of newly created dataset

def update_samplesheet(self, contents: str = None, file_path: ~PathLike = None):
518    def update_samplesheet(self,
519                           contents: str = None,
520                           file_path: PathLike = None):
521        """
522        Updates the samplesheet metadata of a dataset.
523        Provide either the contents (as a string) or a file path.
524        Both must be in the format of a CSV.
525
526        Args:
527            contents (str): Samplesheet contents to update (should be a CSV string)
528            file_path (PathLike): Path of file to update (should be a CSV file)
529
530        Example:
531        ```python
532        dataset.update_samplesheet(
533            file_path=Path('~/samplesheet.csv')
534        )
535        ```
536        """
537
538        if contents is None and file_path is None:
539            raise DataPortalInputError("Must specify either 'contents' or 'file_path' when updating samplesheet")
540
541        samplesheet_contents = contents
542        if file_path is not None:
543            samplesheet_contents = Path(file_path).expanduser().read_text()
544
545        # Validate samplesheet
546        file_names = [f.file_name for f in self.list_files()]
547        request = ValidateFileRequirementsRequest(
548            file_names=file_names,
549            sample_sheet=samplesheet_contents,
550        )
551        requirements = validate_file_requirements.sync(process_id=self.process_id,
552                                                       body=request,
553                                                       client=self._client.api_client)
554        if error_msg := requirements.error_msg:
555            raise DataPortalInputError(error_msg)
556
557        # Update the samplesheet if everything looks ok
558        self._client.datasets.update_samplesheet(
559            project_id=self.project_id,
560            dataset_id=self.id,
561            samplesheet=samplesheet_contents
562        )

Updates the samplesheet metadata of a dataset. Provide either the contents (as a string) or a file path. Both must be in the format of a CSV.

Arguments:
  • contents (str): Samplesheet contents to update (should be a CSV string)
  • file_path (PathLike): Path of file to update (should be a CSV file)

Example:

dataset.update_samplesheet(
    file_path=Path('~/samplesheet.csv')
)
class DataPortalReference(cirro.sdk.asset.DataPortalAsset):
12class DataPortalReference(DataPortalAsset):
13    """
14    Reference data object containing files which can be used for analysis in a particular project.
15    """
16    def __init__(self, ref: Reference, project_id: str, client: CirroApi):
17        """
18        Instantiate by listing the references which have been added to a particular project
19        ```python
20        from cirro import DataPortal()
21        portal = DataPortal()
22        project = portal.get_project_by_name("Project Name")
23        references = project.list_references()
24        ```
25        """
26        self._data = ref
27        self._files = [
28            DataPortalFile(File.from_file_entry(f, project_id), client) for f in ref.files
29        ]
30
31    @property
32    def files(self) -> List[DataPortalFile]:
33        """File(s) contained in the reference"""
34        return self._files
35
36    @property
37    def name(self) -> str:
38        """Reference name"""
39        return self._data.name
40
41    @property
42    def type(self) -> str:
43        """Type of reference data (e.g. genome_fasta)"""
44        return self._data.type_
45
46    @property
47    def absolute_path(self):
48        if len(self._files) == 0:
49            return None
50        return self._files[0].absolute_path
51
52    def __str__(self):
53        return self.name

Reference data object containing files which can be used for analysis in a particular project.

DataPortalReference( ref: cirro_api_client.v1.models.Reference, project_id: str, client: CirroApi)
16    def __init__(self, ref: Reference, project_id: str, client: CirroApi):
17        """
18        Instantiate by listing the references which have been added to a particular project
19        ```python
20        from cirro import DataPortal()
21        portal = DataPortal()
22        project = portal.get_project_by_name("Project Name")
23        references = project.list_references()
24        ```
25        """
26        self._data = ref
27        self._files = [
28            DataPortalFile(File.from_file_entry(f, project_id), client) for f in ref.files
29        ]

Instantiate by listing the references which have been added to a particular project

from cirro import DataPortal()
portal = DataPortal()
project = portal.get_project_by_name("Project Name")
references = project.list_references()
files: List[cirro.sdk.file.DataPortalFile]
31    @property
32    def files(self) -> List[DataPortalFile]:
33        """File(s) contained in the reference"""
34        return self._files

File(s) contained in the reference

name: str
36    @property
37    def name(self) -> str:
38        """Reference name"""
39        return self._data.name

Reference name

type: str
41    @property
42    def type(self) -> str:
43        """Type of reference data (e.g. genome_fasta)"""
44        return self._data.type_

Type of reference data (e.g. genome_fasta)

absolute_path
46    @property
47    def absolute_path(self):
48        if len(self._files) == 0:
49            return None
50        return self._files[0].absolute_path
class CirroApi:
 12class CirroApi:
 13    """
 14    Client for interacting directly with the Cirro API
 15    """
 16    def __init__(self, auth_info: AuthInfo = None, base_url: str = None, user_agent: str = 'Cirro SDK'):
 17        """
 18        Instantiates the Cirro API object
 19
 20        Args:
 21            auth_info (cirro.auth.base.AuthInfo):
 22            base_url (str): Optional base URL of the Cirro instance
 23             (if not provided, it uses the `CIRRO_BASE_URL` environment variable, or the config file)
 24
 25        Returns:
 26            Authenticated Cirro API object, which can be used to call endpoint functions.
 27
 28        Example:
 29        ```python
 30        from cirro.cirro_client import CirroApi
 31
 32        cirro = CirroApi(base_url="app.cirro.bio")
 33        print(cirro.projects.list())
 34        ```
 35        """
 36
 37        self._configuration = AppConfig(base_url=base_url)
 38        if not auth_info:
 39            auth_info = get_auth_info_from_config(self._configuration, auth_io=None)
 40
 41        self._api_client = CirroApiClient(
 42            base_url=self._configuration.rest_endpoint,
 43            auth_method=auth_info.get_auth_method(),
 44            client_name=user_agent,
 45            package_name='cirro'
 46        )
 47
 48        # Init services
 49        self._file_service = FileService(self._api_client,
 50                                         checksum_method=self._configuration.checksum_method,
 51                                         transfer_retries=self._configuration.transfer_max_retries)
 52        self._dataset_service = DatasetService(self._api_client, file_service=self._file_service)
 53        self._project_service = ProjectService(self._api_client)
 54        self._process_service = ProcessService(self._api_client)
 55        self._execution_service = ExecutionService(self._api_client)
 56        self._compute_environment_service = ComputeEnvironmentService(self._api_client)
 57        self._metrics_service = MetricsService(self._api_client)
 58        self._metadata_service = MetadataService(self._api_client)
 59        self._billing_service = BillingService(self._api_client)
 60        self._references_service = ReferenceService(self._api_client, file_service=self._file_service)
 61        self._shares_service = ShareService(self._api_client)
 62        self._users_service = UserService(self._api_client)
 63        self._workspace_service = WorkspaceService(self._api_client)
 64
 65    @property
 66    def datasets(self) -> DatasetService:
 67        """
 68        Create, list, delete, and modify Datasets
 69        """
 70        return self._dataset_service
 71
 72    @property
 73    def projects(self) -> ProjectService:
 74        """
 75        Create, list, delete, and modify Projects
 76        """
 77        return self._project_service
 78
 79    @property
 80    def processes(self) -> ProcessService:
 81        """
 82        List and retrieve detailed information about Processes
 83        """
 84        return self._process_service
 85
 86    @property
 87    def execution(self) -> ExecutionService:
 88        """
 89        List, run, stop, and describe the analysis jobs (executing Processes to create new Datasets)
 90        """
 91        return self._execution_service
 92
 93    @property
 94    def compute_environments(self) -> ComputeEnvironmentService:
 95        """
 96        List and update compute environments
 97        """
 98        return self._compute_environment_service
 99
100    @property
101    def metrics(self) -> MetricsService:
102        """
103        Project-level summary metrics
104        """
105        return self._metrics_service
106
107    @property
108    def metadata(self) -> MetadataService:
109        """
110        List and modify Sample metadata or metadata schemas
111        """
112        return self._metadata_service
113
114    @property
115    def billing(self) -> BillingService:
116        """
117        List and update billing accounts
118        """
119        return self._billing_service
120
121    @property
122    def references(self) -> ReferenceService:
123        """
124        List References and Reference types
125        """
126        return self._references_service
127
128    @property
129    def shares(self) -> ShareService:
130        """
131        List, create, update, delete, and subscribe to shares
132        """
133        return self._shares_service
134
135    @property
136    def users(self) -> UserService:
137        """
138        List and update user information
139        """
140        return self._users_service
141
142    @property
143    def workspaces(self) -> WorkspaceService:
144        """
145        Manage workspaces
146        """
147        return self._workspace_service
148
149    @property
150    def file(self) -> FileService:
151        """
152        Read, download, and create file objects
153        """
154        return self._file_service
155
156    @property
157    def api_client(self) -> CirroApiClient:
158        """
159        Gets the underlying API client
160        """
161        return self._api_client
162
163    @property
164    def configuration(self) -> AppConfig:
165        """
166        Gets the configuration of the instance
167        """
168        return self._configuration

Client for interacting directly with the Cirro API

CirroApi( auth_info: cirro.auth.AuthInfo = None, base_url: str = None, user_agent: str = 'Cirro SDK')
16    def __init__(self, auth_info: AuthInfo = None, base_url: str = None, user_agent: str = 'Cirro SDK'):
17        """
18        Instantiates the Cirro API object
19
20        Args:
21            auth_info (cirro.auth.base.AuthInfo):
22            base_url (str): Optional base URL of the Cirro instance
23             (if not provided, it uses the `CIRRO_BASE_URL` environment variable, or the config file)
24
25        Returns:
26            Authenticated Cirro API object, which can be used to call endpoint functions.
27
28        Example:
29        ```python
30        from cirro.cirro_client import CirroApi
31
32        cirro = CirroApi(base_url="app.cirro.bio")
33        print(cirro.projects.list())
34        ```
35        """
36
37        self._configuration = AppConfig(base_url=base_url)
38        if not auth_info:
39            auth_info = get_auth_info_from_config(self._configuration, auth_io=None)
40
41        self._api_client = CirroApiClient(
42            base_url=self._configuration.rest_endpoint,
43            auth_method=auth_info.get_auth_method(),
44            client_name=user_agent,
45            package_name='cirro'
46        )
47
48        # Init services
49        self._file_service = FileService(self._api_client,
50                                         checksum_method=self._configuration.checksum_method,
51                                         transfer_retries=self._configuration.transfer_max_retries)
52        self._dataset_service = DatasetService(self._api_client, file_service=self._file_service)
53        self._project_service = ProjectService(self._api_client)
54        self._process_service = ProcessService(self._api_client)
55        self._execution_service = ExecutionService(self._api_client)
56        self._compute_environment_service = ComputeEnvironmentService(self._api_client)
57        self._metrics_service = MetricsService(self._api_client)
58        self._metadata_service = MetadataService(self._api_client)
59        self._billing_service = BillingService(self._api_client)
60        self._references_service = ReferenceService(self._api_client, file_service=self._file_service)
61        self._shares_service = ShareService(self._api_client)
62        self._users_service = UserService(self._api_client)
63        self._workspace_service = WorkspaceService(self._api_client)

Instantiates the Cirro API object

Arguments:
  • auth_info (cirro.auth.base.AuthInfo):
  • base_url (str): Optional base URL of the Cirro instance (if not provided, it uses the CIRRO_BASE_URL environment variable, or the config file)
Returns:

Authenticated Cirro API object, which can be used to call endpoint functions.

Example:

from cirro.cirro_client import CirroApi

cirro = CirroApi(base_url="app.cirro.bio")
print(cirro.projects.list())
datasets: cirro.services.DatasetService
65    @property
66    def datasets(self) -> DatasetService:
67        """
68        Create, list, delete, and modify Datasets
69        """
70        return self._dataset_service

Create, list, delete, and modify Datasets

projects: cirro.services.ProjectService
72    @property
73    def projects(self) -> ProjectService:
74        """
75        Create, list, delete, and modify Projects
76        """
77        return self._project_service

Create, list, delete, and modify Projects

processes: cirro.services.ProcessService
79    @property
80    def processes(self) -> ProcessService:
81        """
82        List and retrieve detailed information about Processes
83        """
84        return self._process_service

List and retrieve detailed information about Processes

execution: cirro.services.ExecutionService
86    @property
87    def execution(self) -> ExecutionService:
88        """
89        List, run, stop, and describe the analysis jobs (executing Processes to create new Datasets)
90        """
91        return self._execution_service

List, run, stop, and describe the analysis jobs (executing Processes to create new Datasets)

compute_environments: cirro.services.ComputeEnvironmentService
93    @property
94    def compute_environments(self) -> ComputeEnvironmentService:
95        """
96        List and update compute environments
97        """
98        return self._compute_environment_service

List and update compute environments

metrics: cirro.services.MetricsService
100    @property
101    def metrics(self) -> MetricsService:
102        """
103        Project-level summary metrics
104        """
105        return self._metrics_service

Project-level summary metrics

metadata: cirro.services.MetadataService
107    @property
108    def metadata(self) -> MetadataService:
109        """
110        List and modify Sample metadata or metadata schemas
111        """
112        return self._metadata_service

List and modify Sample metadata or metadata schemas

billing: cirro.services.BillingService
114    @property
115    def billing(self) -> BillingService:
116        """
117        List and update billing accounts
118        """
119        return self._billing_service

List and update billing accounts

references: cirro.services.ReferenceService
121    @property
122    def references(self) -> ReferenceService:
123        """
124        List References and Reference types
125        """
126        return self._references_service

List References and Reference types

shares: cirro.services.ShareService
128    @property
129    def shares(self) -> ShareService:
130        """
131        List, create, update, delete, and subscribe to shares
132        """
133        return self._shares_service

List, create, update, delete, and subscribe to shares

users: cirro.services.UserService
135    @property
136    def users(self) -> UserService:
137        """
138        List and update user information
139        """
140        return self._users_service

List and update user information

workspaces: cirro.services.WorkspaceService
142    @property
143    def workspaces(self) -> WorkspaceService:
144        """
145        Manage workspaces
146        """
147        return self._workspace_service

Manage workspaces

file: cirro.services.FileService
149    @property
150    def file(self) -> FileService:
151        """
152        Read, download, and create file objects
153        """
154        return self._file_service

Read, download, and create file objects

api_client: cirro_api_client.CirroApiClient
156    @property
157    def api_client(self) -> CirroApiClient:
158        """
159        Gets the underlying API client
160        """
161        return self._api_client

Gets the underlying API client

configuration: cirro.config.AppConfig
163    @property
164    def configuration(self) -> AppConfig:
165        """
166        Gets the configuration of the instance
167        """
168        return self._configuration

Gets the configuration of the instance