cirro.sdk.dataset
1import datetime 2import re 3from pathlib import Path 4from typing import Union, List, Optional, Any 5 6from cirro_api_client.v1.api.processes import validate_file_requirements 7from cirro_api_client.v1.models import Dataset, DatasetDetail, RunAnalysisRequest, ProcessDetail, Status, \ 8 RunAnalysisRequestParams, Tag, ArtifactType, NamedItem, ValidateFileRequirementsRequest 9 10from cirro.cirro_client import CirroApi 11from cirro.file_utils import filter_files_by_pattern 12from cirro.models.assets import DatasetAssets 13from cirro.models.file import PathLike 14from cirro.sdk.asset import DataPortalAssets, DataPortalAsset 15from cirro.sdk.exceptions import DataPortalAssetNotFound 16from cirro.sdk.exceptions import DataPortalInputError 17from cirro.sdk.file import DataPortalFile, DataPortalFiles 18from cirro.sdk.helpers import parse_process_name_or_id 19from cirro.sdk.process import DataPortalProcess 20 21 22def _pattern_to_captures_regex(pattern: str): 23 """ 24 Convert a glob pattern that may contain ``{name}`` capture placeholders into 25 a compiled regex and return ``(compiled_regex, capture_names)``. 26 27 Conversion rules: 28 - ``{name}`` → named group matching a single path segment (no ``/``) 29 - ``*`` → matches any characters within a single path segment 30 - ``**`` → matches any characters including ``/`` (multiple segments) 31 - All other characters are regex-escaped. 32 33 The resulting regex is suffix-anchored (like ``pathlib.PurePath.match``): 34 a pattern without a leading ``/`` will match at any depth in the path. 35 """ 36 capture_names = re.findall(r'\{(\w+)\}', pattern) 37 tokens = re.split(r'(\*\*|\*|\{\w+\})', pattern) 38 parts = [] 39 for token in tokens: 40 if token == '**': 41 parts.append('.*') 42 elif token == '*': 43 parts.append('[^/]*') 44 elif re.match(r'^\{\w+\}$', token): 45 name = token[1:-1] 46 parts.append(f'(?P<{name}>[^/]+)') 47 else: 48 parts.append(re.escape(token)) 49 regex_str = ''.join(parts) 50 if not pattern.startswith('/'): 51 regex_str = r'(?:.+/)?' + regex_str 52 return re.compile('^' + regex_str + '$'), capture_names 53 54 55def _infer_file_format(path: str) -> str: 56 """Infer the file format from the file extension.""" 57 path_lower = path.lower() 58 for ext in ('.gz', '.bz2', '.xz', '.zst'): 59 if path_lower.endswith(ext): 60 path_lower = path_lower[:-len(ext)] 61 break 62 if path_lower.endswith('.csv') or path_lower.endswith('.tsv'): 63 return 'csv' 64 elif path_lower.endswith('.h5ad'): 65 return 'h5ad' 66 elif path_lower.endswith('.json'): 67 return 'json' 68 elif path_lower.endswith('.parquet'): 69 return 'parquet' 70 elif path_lower.endswith('.feather'): 71 return 'feather' 72 elif path_lower.endswith('.pkl') or path_lower.endswith('.pickle'): 73 return 'pickle' 74 elif path_lower.endswith('.xlsx') or path_lower.endswith('.xls'): 75 return 'excel' 76 else: 77 return 'text' 78 79 80def _read_file_with_format(file: DataPortalFile, file_format: Optional[str], **kwargs) -> Any: 81 """Read a file using the specified format, or auto-detect from extension.""" 82 if file_format is None: 83 file_format = _infer_file_format(file.relative_path) 84 if file_format == 'csv': 85 return file.read_csv(**kwargs) 86 elif file_format == 'h5ad': 87 return file.read_h5ad() 88 elif file_format == 'json': 89 return file.read_json(**kwargs) 90 elif file_format == 'parquet': 91 return file.read_parquet(**kwargs) 92 elif file_format == 'feather': 93 return file.read_feather(**kwargs) 94 elif file_format == 'pickle': 95 return file.read_pickle(**kwargs) 96 elif file_format == 'excel': 97 return file.read_excel(**kwargs) 98 elif file_format == 'text': 99 return file.read(**kwargs) 100 elif file_format == 'bytes': 101 return file._get() 102 else: 103 raise DataPortalInputError( 104 f"Unsupported file_format: '{file_format}'. " 105 f"Supported values: 'csv', 'h5ad', 'json', 'parquet', 'feather', 'pickle', 'excel', 'text', 'bytes'" 106 ) 107 108 109class DataPortalDataset(DataPortalAsset): 110 """ 111 Datasets in the Data Portal are collections of files which have 112 either been uploaded directly, or which have been output by 113 an analysis pipeline or notebook. 114 """ 115 116 def __init__(self, dataset: Union[Dataset, DatasetDetail], client: CirroApi): 117 """ 118 Instantiate a dataset object 119 120 Should be invoked from a top-level constructor, for example: 121 122 ```python 123 from cirro import DataPortal 124 portal = DataPortal() 125 dataset = portal.get_dataset( 126 project="id-or-name-of-project", 127 dataset="id-or-name-of-dataset" 128 ) 129 ``` 130 131 """ 132 assert dataset.project_id is not None, "Must provide dataset with project_id attribute" 133 self._data = dataset 134 self._assets: Optional[DatasetAssets] = None 135 self._client = client 136 137 @property 138 def id(self) -> str: 139 """Unique identifier for the dataset""" 140 return self._data.id 141 142 @property 143 def name(self) -> str: 144 """Editable name for the dataset""" 145 return self._data.name 146 147 @property 148 def description(self) -> str: 149 """Longer name for the dataset""" 150 return self._data.description 151 152 @property 153 def process_id(self) -> str: 154 """Unique ID of process used to create the dataset""" 155 return self._data.process_id 156 157 @property 158 def process(self) -> ProcessDetail: 159 """ 160 Object representing the process used to create the dataset 161 """ 162 return self._client.processes.get(self.process_id) 163 164 @property 165 def project_id(self) -> str: 166 """ID of the project containing the dataset""" 167 return self._data.project_id 168 169 @property 170 def status(self) -> Status: 171 """ 172 Status of the dataset 173 """ 174 return self._data.status 175 176 @property 177 def source_dataset_ids(self) -> List[str]: 178 """IDs of the datasets used as sources for this dataset (if any)""" 179 return self._data.source_dataset_ids 180 181 @property 182 def source_datasets(self) -> List['DataPortalDataset']: 183 """ 184 Objects representing the datasets used as sources for this dataset (if any) 185 """ 186 return [ 187 DataPortalDataset( 188 dataset=self._client.datasets.get(project_id=self.project_id, dataset_id=dataset_id), 189 client=self._client 190 ) 191 for dataset_id in self.source_dataset_ids 192 ] 193 194 @property 195 def params(self) -> dict: 196 """ 197 Parameters used to generate the dataset 198 """ 199 return self._get_detail().params.to_dict() 200 201 @property 202 def info(self) -> dict: 203 """ 204 Extra information about the dataset 205 """ 206 return self._get_detail().info.to_dict() 207 208 @property 209 def tags(self) -> List[Tag]: 210 """ 211 Tags applied to the dataset 212 """ 213 return self._data.tags 214 215 @property 216 def share(self) -> Optional[NamedItem]: 217 """ 218 Share associated with the dataset, if any. 219 """ 220 return self._get_detail().share 221 222 @property 223 def created_by(self) -> str: 224 """User who created the dataset""" 225 return self._data.created_by 226 227 @property 228 def created_at(self) -> datetime.datetime: 229 """Timestamp of dataset creation""" 230 return self._data.created_at 231 232 def _get_detail(self): 233 if not isinstance(self._data, DatasetDetail): 234 self._data = self._client.datasets.get(project_id=self.project_id, dataset_id=self.id) 235 return self._data 236 237 def _get_assets(self): 238 if not self._assets: 239 self._assets = self._client.datasets.get_assets_listing( 240 project_id=self.project_id, 241 dataset_id=self.id 242 ) 243 return self._assets 244 245 def __str__(self): 246 return '\n'.join([ 247 f"{i.title()}: {self.__getattribute__(i)}" 248 for i in ['name', 'id', 'description', 'status'] 249 ]) 250 251 def get_file(self, relative_path: str) -> DataPortalFile: 252 """ 253 Get a file from the dataset using its relative path. 254 255 Args: 256 relative_path (str): Relative path of file within the dataset 257 258 Returns: 259 `from cirro.sdk.file import DataPortalFile` 260 """ 261 262 # Get the list of files in this dataset 263 files = self.list_files() 264 265 # Try getting the file using the relative path provided by the user 266 try: 267 return files.get_by_id(relative_path) 268 except DataPortalAssetNotFound: 269 # Try getting the file with the 'data/' prefix prepended 270 try: 271 return files.get_by_id("data/" + relative_path) 272 except DataPortalAssetNotFound: 273 # If not found, raise the exception using the string provided 274 # by the user, not the data/ prepended version (which may be 275 # confusing to the user) 276 msg = '\n'.join([f"No file found with path '{relative_path}'."]) 277 raise DataPortalAssetNotFound(msg) 278 279 def list_files(self, file_limit: int = 100000) -> DataPortalFiles: 280 """ 281 Return the list of files which make up the dataset. 282 283 Args: 284 file_limit (int): Maximum number of files to return (default 100,000) 285 """ 286 assets = self._client.datasets.get_assets_listing( 287 project_id=self.project_id, 288 dataset_id=self.id, 289 file_limit=file_limit 290 ) 291 files = assets.files 292 293 return DataPortalFiles( 294 [ 295 DataPortalFile(file=file, client=self._client) 296 for file in files 297 ] 298 ) 299 300 def read_files( 301 self, 302 glob: str = None, 303 pattern: str = None, 304 filetype: str = None, 305 **kwargs 306 ): 307 """ 308 Read the contents of files in the dataset. 309 310 See :meth:`~cirro.sdk.portal.DataPortal.read_files` for full details 311 on ``glob``/``pattern`` matching and filetype options. 312 313 Args: 314 glob (str): Wildcard expression to match files. 315 Yields one item per matching file: the parsed content. 316 pattern (str): Wildcard expression with ``{name}`` capture 317 placeholders. Yields ``(content, meta)`` per matching file. 318 filetype (str): File format used to parse each file 319 (or ``None`` to infer from extension). 320 **kwargs: Additional keyword arguments forwarded to the 321 file-parsing function. 322 323 Yields: 324 - When using ``glob``: *content* for each matching file 325 - When using ``pattern``: ``(content, meta)`` for each matching file 326 """ 327 if glob is not None and pattern is not None: 328 raise DataPortalInputError("Cannot specify both 'glob' and 'pattern' — use one or the other") 329 if glob is None and pattern is None: 330 raise DataPortalInputError("Must specify either 'glob' or 'pattern'") 331 332 if glob is not None: 333 for file in filter_files_by_pattern(list(self.list_files()), glob): 334 yield _read_file_with_format(file, filetype, **kwargs) 335 else: 336 compiled_regex, _ = _pattern_to_captures_regex(pattern) 337 for file in self.list_files(): 338 m = compiled_regex.match(file.relative_path) 339 if m is not None: 340 yield _read_file_with_format(file, filetype, **kwargs), m.groupdict() 341 342 def read_file( 343 self, 344 path: str = None, 345 glob: str = None, 346 filetype: str = None, 347 **kwargs 348 ) -> Any: 349 """ 350 Read the contents of a single file from the dataset. 351 352 See :meth:`~cirro.sdk.portal.DataPortal.read_file` for full details. 353 354 Args: 355 path (str): Exact relative path of the file within the dataset. 356 glob (str): Wildcard expression matching exactly one file. 357 filetype (str): File format used to parse the file. Supported values 358 are the same as :meth:`~cirro.sdk.portal.DataPortal.read_files`. 359 **kwargs: Additional keyword arguments forwarded to the file-parsing 360 function. 361 362 Returns: 363 Parsed file content. 364 """ 365 if path is not None and glob is not None: 366 raise DataPortalInputError("Cannot specify both 'path' and 'glob' — use one or the other") 367 if path is None and glob is None: 368 raise DataPortalInputError("Must specify either 'path' or 'glob'") 369 370 if path is not None: 371 file = self.get_file(path) 372 else: 373 matches = list(filter_files_by_pattern(list(self.list_files()), glob)) 374 if len(matches) == 0: 375 raise DataPortalAssetNotFound(f"No files matched glob '{glob}'") 376 if len(matches) > 1: 377 raise DataPortalInputError( 378 f"glob '{glob}' matched {len(matches)} files — use read_files() to read multiple files" 379 ) 380 file = matches[0] 381 382 return _read_file_with_format(file, filetype, **kwargs) 383 384 def get_trace(self) -> Any: 385 """ 386 Read the Nextflow workflow trace file for this dataset as a DataFrame. 387 388 Returns: 389 `pandas.DataFrame` 390 """ 391 return self.get_artifact(ArtifactType.WORKFLOW_TRACE).read_csv(sep='\t') 392 393 def get_logs(self) -> str: 394 """ 395 Read the Nextflow workflow logs for this dataset as a string. 396 397 Returns: 398 str 399 """ 400 return self.get_artifact(ArtifactType.WORKFLOW_LOGS).read() 401 402 def get_artifact(self, artifact_type: ArtifactType) -> DataPortalFile: 403 """ 404 Get the artifact of a particular type from the dataset 405 """ 406 artifacts = self._get_assets().artifacts 407 artifact = next((a for a in artifacts if a.artifact_type == artifact_type), None) 408 if artifact is None: 409 raise DataPortalAssetNotFound(f"No artifact found with type '{artifact_type}'") 410 return DataPortalFile(file=artifact.file, client=self._client) 411 412 def list_artifacts(self) -> List[DataPortalFile]: 413 """ 414 Return the list of artifacts associated with the dataset 415 416 An artifact may be something generated as part of the analysis or other process. 417 See `cirro_api_client.v1.models.ArtifactType` for the list of possible artifact types. 418 419 """ 420 artifacts = self._get_assets().artifacts 421 return DataPortalFiles( 422 [ 423 DataPortalFile(file=artifact.file, client=self._client) 424 for artifact in artifacts 425 ] 426 ) 427 428 def download_files(self, download_location: str = None, glob: str = None) -> None: 429 """ 430 Download all the files from the dataset to a local directory. 431 432 Args: 433 download_location (str): Path to local directory 434 glob (str): Optional wildcard expression to filter which files are downloaded 435 (e.g., ``'*.csv'``, ``'data/**/*.tsv.gz'``). 436 If omitted, all files are downloaded. 437 """ 438 439 files = self.list_files() 440 if glob is not None: 441 files = DataPortalFiles(filter_files_by_pattern(list(files), glob)) 442 files.download(download_location) 443 444 def run_analysis( 445 self, 446 name: str = None, 447 description: str = "", 448 process: Union[DataPortalProcess, str] = None, 449 params=None, 450 notifications_emails: List[str] = None, 451 compute_environment: str = None, 452 resume_dataset_id: str = None, 453 source_sample_ids: List[str] = None 454 ) -> str: 455 """ 456 Runs an analysis on a dataset, returns the ID of the newly created dataset. 457 458 The process can be provided as either a DataPortalProcess object, 459 or a string which corresponds to the name or ID of the process. 460 461 Args: 462 name (str): Name of newly created dataset 463 description (str): Description of newly created dataset 464 process (DataPortalProcess or str): Process to run 465 params (dict): Analysis parameters 466 notifications_emails (List[str]): Notification email address(es) 467 compute_environment (str): Name or ID of compute environment to use, 468 if blank it will run in AWS 469 resume_dataset_id (str): ID of dataset to resume from, used for caching task execution. 470 It will attempt to re-use the previous output to minimize duplicate work 471 source_sample_ids (List[str]): List of sample IDs to use as input for the analysis. 472 473 Returns: 474 dataset_id (str): ID of newly created dataset 475 """ 476 if name is None: 477 raise DataPortalInputError("Must specify 'name' for run_analysis") 478 if process is None: 479 raise DataPortalInputError("Must specify 'process' for run_analysis") 480 if notifications_emails is None: 481 notifications_emails = [] 482 if params is None: 483 params = {} 484 485 # If the process is a string, try to parse it as a process name or ID 486 process = parse_process_name_or_id(process, self._client) 487 488 if compute_environment: 489 compute_environment_name = compute_environment 490 compute_environments = self._client.compute_environments.list_environments_for_project( 491 project_id=self.project_id 492 ) 493 compute_environment = next( 494 (env for env in compute_environments 495 if env.name == compute_environment or env.id == compute_environment), 496 None 497 ) 498 if compute_environment is None: 499 raise DataPortalInputError(f"Compute environment '{compute_environment_name}' not found") 500 501 resp = self._client.execution.run_analysis( 502 project_id=self.project_id, 503 request=RunAnalysisRequest( 504 name=name, 505 description=description, 506 process_id=process.id, 507 source_dataset_ids=[self.id], 508 params=RunAnalysisRequestParams.from_dict(params), 509 notification_emails=notifications_emails, 510 resume_dataset_id=resume_dataset_id, 511 source_sample_ids=source_sample_ids, 512 compute_environment_id=compute_environment.id if compute_environment else None 513 ) 514 ) 515 return resp.id 516 517 def update_samplesheet(self, 518 contents: str = None, 519 file_path: PathLike = None): 520 """ 521 Updates the samplesheet metadata of a dataset. 522 Provide either the contents (as a string) or a file path. 523 Both must be in the format of a CSV. 524 525 Args: 526 contents (str): Samplesheet contents to update (should be a CSV string) 527 file_path (PathLike): Path of file to update (should be a CSV file) 528 529 Example: 530 ```python 531 dataset.update_samplesheet( 532 file_path=Path('~/samplesheet.csv') 533 ) 534 ``` 535 """ 536 537 if contents is None and file_path is None: 538 raise DataPortalInputError("Must specify either 'contents' or 'file_path' when updating samplesheet") 539 540 samplesheet_contents = contents 541 if file_path is not None: 542 samplesheet_contents = Path(file_path).expanduser().read_text() 543 544 # Validate samplesheet 545 file_names = [f.file_name for f in self.list_files()] 546 request = ValidateFileRequirementsRequest( 547 file_names=file_names, 548 sample_sheet=samplesheet_contents, 549 ) 550 requirements = validate_file_requirements.sync(process_id=self.process_id, 551 body=request, 552 client=self._client.api_client) 553 if error_msg := requirements.error_msg: 554 raise DataPortalInputError(error_msg) 555 556 # Update the samplesheet if everything looks ok 557 self._client.datasets.update_samplesheet( 558 project_id=self.project_id, 559 dataset_id=self.id, 560 samplesheet=samplesheet_contents 561 ) 562 563 564class DataPortalDatasets(DataPortalAssets[DataPortalDataset]): 565 """Collection of multiple DataPortalDataset objects.""" 566 asset_name = "dataset"
110class DataPortalDataset(DataPortalAsset): 111 """ 112 Datasets in the Data Portal are collections of files which have 113 either been uploaded directly, or which have been output by 114 an analysis pipeline or notebook. 115 """ 116 117 def __init__(self, dataset: Union[Dataset, DatasetDetail], client: CirroApi): 118 """ 119 Instantiate a dataset object 120 121 Should be invoked from a top-level constructor, for example: 122 123 ```python 124 from cirro import DataPortal 125 portal = DataPortal() 126 dataset = portal.get_dataset( 127 project="id-or-name-of-project", 128 dataset="id-or-name-of-dataset" 129 ) 130 ``` 131 132 """ 133 assert dataset.project_id is not None, "Must provide dataset with project_id attribute" 134 self._data = dataset 135 self._assets: Optional[DatasetAssets] = None 136 self._client = client 137 138 @property 139 def id(self) -> str: 140 """Unique identifier for the dataset""" 141 return self._data.id 142 143 @property 144 def name(self) -> str: 145 """Editable name for the dataset""" 146 return self._data.name 147 148 @property 149 def description(self) -> str: 150 """Longer name for the dataset""" 151 return self._data.description 152 153 @property 154 def process_id(self) -> str: 155 """Unique ID of process used to create the dataset""" 156 return self._data.process_id 157 158 @property 159 def process(self) -> ProcessDetail: 160 """ 161 Object representing the process used to create the dataset 162 """ 163 return self._client.processes.get(self.process_id) 164 165 @property 166 def project_id(self) -> str: 167 """ID of the project containing the dataset""" 168 return self._data.project_id 169 170 @property 171 def status(self) -> Status: 172 """ 173 Status of the dataset 174 """ 175 return self._data.status 176 177 @property 178 def source_dataset_ids(self) -> List[str]: 179 """IDs of the datasets used as sources for this dataset (if any)""" 180 return self._data.source_dataset_ids 181 182 @property 183 def source_datasets(self) -> List['DataPortalDataset']: 184 """ 185 Objects representing the datasets used as sources for this dataset (if any) 186 """ 187 return [ 188 DataPortalDataset( 189 dataset=self._client.datasets.get(project_id=self.project_id, dataset_id=dataset_id), 190 client=self._client 191 ) 192 for dataset_id in self.source_dataset_ids 193 ] 194 195 @property 196 def params(self) -> dict: 197 """ 198 Parameters used to generate the dataset 199 """ 200 return self._get_detail().params.to_dict() 201 202 @property 203 def info(self) -> dict: 204 """ 205 Extra information about the dataset 206 """ 207 return self._get_detail().info.to_dict() 208 209 @property 210 def tags(self) -> List[Tag]: 211 """ 212 Tags applied to the dataset 213 """ 214 return self._data.tags 215 216 @property 217 def share(self) -> Optional[NamedItem]: 218 """ 219 Share associated with the dataset, if any. 220 """ 221 return self._get_detail().share 222 223 @property 224 def created_by(self) -> str: 225 """User who created the dataset""" 226 return self._data.created_by 227 228 @property 229 def created_at(self) -> datetime.datetime: 230 """Timestamp of dataset creation""" 231 return self._data.created_at 232 233 def _get_detail(self): 234 if not isinstance(self._data, DatasetDetail): 235 self._data = self._client.datasets.get(project_id=self.project_id, dataset_id=self.id) 236 return self._data 237 238 def _get_assets(self): 239 if not self._assets: 240 self._assets = self._client.datasets.get_assets_listing( 241 project_id=self.project_id, 242 dataset_id=self.id 243 ) 244 return self._assets 245 246 def __str__(self): 247 return '\n'.join([ 248 f"{i.title()}: {self.__getattribute__(i)}" 249 for i in ['name', 'id', 'description', 'status'] 250 ]) 251 252 def get_file(self, relative_path: str) -> DataPortalFile: 253 """ 254 Get a file from the dataset using its relative path. 255 256 Args: 257 relative_path (str): Relative path of file within the dataset 258 259 Returns: 260 `from cirro.sdk.file import DataPortalFile` 261 """ 262 263 # Get the list of files in this dataset 264 files = self.list_files() 265 266 # Try getting the file using the relative path provided by the user 267 try: 268 return files.get_by_id(relative_path) 269 except DataPortalAssetNotFound: 270 # Try getting the file with the 'data/' prefix prepended 271 try: 272 return files.get_by_id("data/" + relative_path) 273 except DataPortalAssetNotFound: 274 # If not found, raise the exception using the string provided 275 # by the user, not the data/ prepended version (which may be 276 # confusing to the user) 277 msg = '\n'.join([f"No file found with path '{relative_path}'."]) 278 raise DataPortalAssetNotFound(msg) 279 280 def list_files(self, file_limit: int = 100000) -> DataPortalFiles: 281 """ 282 Return the list of files which make up the dataset. 283 284 Args: 285 file_limit (int): Maximum number of files to return (default 100,000) 286 """ 287 assets = self._client.datasets.get_assets_listing( 288 project_id=self.project_id, 289 dataset_id=self.id, 290 file_limit=file_limit 291 ) 292 files = assets.files 293 294 return DataPortalFiles( 295 [ 296 DataPortalFile(file=file, client=self._client) 297 for file in files 298 ] 299 ) 300 301 def read_files( 302 self, 303 glob: str = None, 304 pattern: str = None, 305 filetype: str = None, 306 **kwargs 307 ): 308 """ 309 Read the contents of files in the dataset. 310 311 See :meth:`~cirro.sdk.portal.DataPortal.read_files` for full details 312 on ``glob``/``pattern`` matching and filetype options. 313 314 Args: 315 glob (str): Wildcard expression to match files. 316 Yields one item per matching file: the parsed content. 317 pattern (str): Wildcard expression with ``{name}`` capture 318 placeholders. Yields ``(content, meta)`` per matching file. 319 filetype (str): File format used to parse each file 320 (or ``None`` to infer from extension). 321 **kwargs: Additional keyword arguments forwarded to the 322 file-parsing function. 323 324 Yields: 325 - When using ``glob``: *content* for each matching file 326 - When using ``pattern``: ``(content, meta)`` for each matching file 327 """ 328 if glob is not None and pattern is not None: 329 raise DataPortalInputError("Cannot specify both 'glob' and 'pattern' — use one or the other") 330 if glob is None and pattern is None: 331 raise DataPortalInputError("Must specify either 'glob' or 'pattern'") 332 333 if glob is not None: 334 for file in filter_files_by_pattern(list(self.list_files()), glob): 335 yield _read_file_with_format(file, filetype, **kwargs) 336 else: 337 compiled_regex, _ = _pattern_to_captures_regex(pattern) 338 for file in self.list_files(): 339 m = compiled_regex.match(file.relative_path) 340 if m is not None: 341 yield _read_file_with_format(file, filetype, **kwargs), m.groupdict() 342 343 def read_file( 344 self, 345 path: str = None, 346 glob: str = None, 347 filetype: str = None, 348 **kwargs 349 ) -> Any: 350 """ 351 Read the contents of a single file from the dataset. 352 353 See :meth:`~cirro.sdk.portal.DataPortal.read_file` for full details. 354 355 Args: 356 path (str): Exact relative path of the file within the dataset. 357 glob (str): Wildcard expression matching exactly one file. 358 filetype (str): File format used to parse the file. Supported values 359 are the same as :meth:`~cirro.sdk.portal.DataPortal.read_files`. 360 **kwargs: Additional keyword arguments forwarded to the file-parsing 361 function. 362 363 Returns: 364 Parsed file content. 365 """ 366 if path is not None and glob is not None: 367 raise DataPortalInputError("Cannot specify both 'path' and 'glob' — use one or the other") 368 if path is None and glob is None: 369 raise DataPortalInputError("Must specify either 'path' or 'glob'") 370 371 if path is not None: 372 file = self.get_file(path) 373 else: 374 matches = list(filter_files_by_pattern(list(self.list_files()), glob)) 375 if len(matches) == 0: 376 raise DataPortalAssetNotFound(f"No files matched glob '{glob}'") 377 if len(matches) > 1: 378 raise DataPortalInputError( 379 f"glob '{glob}' matched {len(matches)} files — use read_files() to read multiple files" 380 ) 381 file = matches[0] 382 383 return _read_file_with_format(file, filetype, **kwargs) 384 385 def get_trace(self) -> Any: 386 """ 387 Read the Nextflow workflow trace file for this dataset as a DataFrame. 388 389 Returns: 390 `pandas.DataFrame` 391 """ 392 return self.get_artifact(ArtifactType.WORKFLOW_TRACE).read_csv(sep='\t') 393 394 def get_logs(self) -> str: 395 """ 396 Read the Nextflow workflow logs for this dataset as a string. 397 398 Returns: 399 str 400 """ 401 return self.get_artifact(ArtifactType.WORKFLOW_LOGS).read() 402 403 def get_artifact(self, artifact_type: ArtifactType) -> DataPortalFile: 404 """ 405 Get the artifact of a particular type from the dataset 406 """ 407 artifacts = self._get_assets().artifacts 408 artifact = next((a for a in artifacts if a.artifact_type == artifact_type), None) 409 if artifact is None: 410 raise DataPortalAssetNotFound(f"No artifact found with type '{artifact_type}'") 411 return DataPortalFile(file=artifact.file, client=self._client) 412 413 def list_artifacts(self) -> List[DataPortalFile]: 414 """ 415 Return the list of artifacts associated with the dataset 416 417 An artifact may be something generated as part of the analysis or other process. 418 See `cirro_api_client.v1.models.ArtifactType` for the list of possible artifact types. 419 420 """ 421 artifacts = self._get_assets().artifacts 422 return DataPortalFiles( 423 [ 424 DataPortalFile(file=artifact.file, client=self._client) 425 for artifact in artifacts 426 ] 427 ) 428 429 def download_files(self, download_location: str = None, glob: str = None) -> None: 430 """ 431 Download all the files from the dataset to a local directory. 432 433 Args: 434 download_location (str): Path to local directory 435 glob (str): Optional wildcard expression to filter which files are downloaded 436 (e.g., ``'*.csv'``, ``'data/**/*.tsv.gz'``). 437 If omitted, all files are downloaded. 438 """ 439 440 files = self.list_files() 441 if glob is not None: 442 files = DataPortalFiles(filter_files_by_pattern(list(files), glob)) 443 files.download(download_location) 444 445 def run_analysis( 446 self, 447 name: str = None, 448 description: str = "", 449 process: Union[DataPortalProcess, str] = None, 450 params=None, 451 notifications_emails: List[str] = None, 452 compute_environment: str = None, 453 resume_dataset_id: str = None, 454 source_sample_ids: List[str] = None 455 ) -> str: 456 """ 457 Runs an analysis on a dataset, returns the ID of the newly created dataset. 458 459 The process can be provided as either a DataPortalProcess object, 460 or a string which corresponds to the name or ID of the process. 461 462 Args: 463 name (str): Name of newly created dataset 464 description (str): Description of newly created dataset 465 process (DataPortalProcess or str): Process to run 466 params (dict): Analysis parameters 467 notifications_emails (List[str]): Notification email address(es) 468 compute_environment (str): Name or ID of compute environment to use, 469 if blank it will run in AWS 470 resume_dataset_id (str): ID of dataset to resume from, used for caching task execution. 471 It will attempt to re-use the previous output to minimize duplicate work 472 source_sample_ids (List[str]): List of sample IDs to use as input for the analysis. 473 474 Returns: 475 dataset_id (str): ID of newly created dataset 476 """ 477 if name is None: 478 raise DataPortalInputError("Must specify 'name' for run_analysis") 479 if process is None: 480 raise DataPortalInputError("Must specify 'process' for run_analysis") 481 if notifications_emails is None: 482 notifications_emails = [] 483 if params is None: 484 params = {} 485 486 # If the process is a string, try to parse it as a process name or ID 487 process = parse_process_name_or_id(process, self._client) 488 489 if compute_environment: 490 compute_environment_name = compute_environment 491 compute_environments = self._client.compute_environments.list_environments_for_project( 492 project_id=self.project_id 493 ) 494 compute_environment = next( 495 (env for env in compute_environments 496 if env.name == compute_environment or env.id == compute_environment), 497 None 498 ) 499 if compute_environment is None: 500 raise DataPortalInputError(f"Compute environment '{compute_environment_name}' not found") 501 502 resp = self._client.execution.run_analysis( 503 project_id=self.project_id, 504 request=RunAnalysisRequest( 505 name=name, 506 description=description, 507 process_id=process.id, 508 source_dataset_ids=[self.id], 509 params=RunAnalysisRequestParams.from_dict(params), 510 notification_emails=notifications_emails, 511 resume_dataset_id=resume_dataset_id, 512 source_sample_ids=source_sample_ids, 513 compute_environment_id=compute_environment.id if compute_environment else None 514 ) 515 ) 516 return resp.id 517 518 def update_samplesheet(self, 519 contents: str = None, 520 file_path: PathLike = None): 521 """ 522 Updates the samplesheet metadata of a dataset. 523 Provide either the contents (as a string) or a file path. 524 Both must be in the format of a CSV. 525 526 Args: 527 contents (str): Samplesheet contents to update (should be a CSV string) 528 file_path (PathLike): Path of file to update (should be a CSV file) 529 530 Example: 531 ```python 532 dataset.update_samplesheet( 533 file_path=Path('~/samplesheet.csv') 534 ) 535 ``` 536 """ 537 538 if contents is None and file_path is None: 539 raise DataPortalInputError("Must specify either 'contents' or 'file_path' when updating samplesheet") 540 541 samplesheet_contents = contents 542 if file_path is not None: 543 samplesheet_contents = Path(file_path).expanduser().read_text() 544 545 # Validate samplesheet 546 file_names = [f.file_name for f in self.list_files()] 547 request = ValidateFileRequirementsRequest( 548 file_names=file_names, 549 sample_sheet=samplesheet_contents, 550 ) 551 requirements = validate_file_requirements.sync(process_id=self.process_id, 552 body=request, 553 client=self._client.api_client) 554 if error_msg := requirements.error_msg: 555 raise DataPortalInputError(error_msg) 556 557 # Update the samplesheet if everything looks ok 558 self._client.datasets.update_samplesheet( 559 project_id=self.project_id, 560 dataset_id=self.id, 561 samplesheet=samplesheet_contents 562 )
Datasets in the Data Portal are collections of files which have either been uploaded directly, or which have been output by an analysis pipeline or notebook.
117 def __init__(self, dataset: Union[Dataset, DatasetDetail], client: CirroApi): 118 """ 119 Instantiate a dataset object 120 121 Should be invoked from a top-level constructor, for example: 122 123 ```python 124 from cirro import DataPortal 125 portal = DataPortal() 126 dataset = portal.get_dataset( 127 project="id-or-name-of-project", 128 dataset="id-or-name-of-dataset" 129 ) 130 ``` 131 132 """ 133 assert dataset.project_id is not None, "Must provide dataset with project_id attribute" 134 self._data = dataset 135 self._assets: Optional[DatasetAssets] = None 136 self._client = client
Instantiate a dataset object
Should be invoked from a top-level constructor, for example:
from cirro import DataPortal
portal = DataPortal()
dataset = portal.get_dataset(
project="id-or-name-of-project",
dataset="id-or-name-of-dataset"
)
138 @property 139 def id(self) -> str: 140 """Unique identifier for the dataset""" 141 return self._data.id
Unique identifier for the dataset
143 @property 144 def name(self) -> str: 145 """Editable name for the dataset""" 146 return self._data.name
Editable name for the dataset
148 @property 149 def description(self) -> str: 150 """Longer name for the dataset""" 151 return self._data.description
Longer name for the dataset
153 @property 154 def process_id(self) -> str: 155 """Unique ID of process used to create the dataset""" 156 return self._data.process_id
Unique ID of process used to create the dataset
158 @property 159 def process(self) -> ProcessDetail: 160 """ 161 Object representing the process used to create the dataset 162 """ 163 return self._client.processes.get(self.process_id)
Object representing the process used to create the dataset
165 @property 166 def project_id(self) -> str: 167 """ID of the project containing the dataset""" 168 return self._data.project_id
ID of the project containing the dataset
170 @property 171 def status(self) -> Status: 172 """ 173 Status of the dataset 174 """ 175 return self._data.status
Status of the dataset
177 @property 178 def source_dataset_ids(self) -> List[str]: 179 """IDs of the datasets used as sources for this dataset (if any)""" 180 return self._data.source_dataset_ids
IDs of the datasets used as sources for this dataset (if any)
182 @property 183 def source_datasets(self) -> List['DataPortalDataset']: 184 """ 185 Objects representing the datasets used as sources for this dataset (if any) 186 """ 187 return [ 188 DataPortalDataset( 189 dataset=self._client.datasets.get(project_id=self.project_id, dataset_id=dataset_id), 190 client=self._client 191 ) 192 for dataset_id in self.source_dataset_ids 193 ]
Objects representing the datasets used as sources for this dataset (if any)
195 @property 196 def params(self) -> dict: 197 """ 198 Parameters used to generate the dataset 199 """ 200 return self._get_detail().params.to_dict()
Parameters used to generate the dataset
202 @property 203 def info(self) -> dict: 204 """ 205 Extra information about the dataset 206 """ 207 return self._get_detail().info.to_dict()
Extra information about the dataset
223 @property 224 def created_by(self) -> str: 225 """User who created the dataset""" 226 return self._data.created_by
User who created the dataset
228 @property 229 def created_at(self) -> datetime.datetime: 230 """Timestamp of dataset creation""" 231 return self._data.created_at
Timestamp of dataset creation
252 def get_file(self, relative_path: str) -> DataPortalFile: 253 """ 254 Get a file from the dataset using its relative path. 255 256 Args: 257 relative_path (str): Relative path of file within the dataset 258 259 Returns: 260 `from cirro.sdk.file import DataPortalFile` 261 """ 262 263 # Get the list of files in this dataset 264 files = self.list_files() 265 266 # Try getting the file using the relative path provided by the user 267 try: 268 return files.get_by_id(relative_path) 269 except DataPortalAssetNotFound: 270 # Try getting the file with the 'data/' prefix prepended 271 try: 272 return files.get_by_id("data/" + relative_path) 273 except DataPortalAssetNotFound: 274 # If not found, raise the exception using the string provided 275 # by the user, not the data/ prepended version (which may be 276 # confusing to the user) 277 msg = '\n'.join([f"No file found with path '{relative_path}'."]) 278 raise DataPortalAssetNotFound(msg)
Get a file from the dataset using its relative path.
Arguments:
- relative_path (str): Relative path of file within the dataset
Returns:
from cirro.sdk.file import DataPortalFile
280 def list_files(self, file_limit: int = 100000) -> DataPortalFiles: 281 """ 282 Return the list of files which make up the dataset. 283 284 Args: 285 file_limit (int): Maximum number of files to return (default 100,000) 286 """ 287 assets = self._client.datasets.get_assets_listing( 288 project_id=self.project_id, 289 dataset_id=self.id, 290 file_limit=file_limit 291 ) 292 files = assets.files 293 294 return DataPortalFiles( 295 [ 296 DataPortalFile(file=file, client=self._client) 297 for file in files 298 ] 299 )
Return the list of files which make up the dataset.
Arguments:
- file_limit (int): Maximum number of files to return (default 100,000)
301 def read_files( 302 self, 303 glob: str = None, 304 pattern: str = None, 305 filetype: str = None, 306 **kwargs 307 ): 308 """ 309 Read the contents of files in the dataset. 310 311 See :meth:`~cirro.sdk.portal.DataPortal.read_files` for full details 312 on ``glob``/``pattern`` matching and filetype options. 313 314 Args: 315 glob (str): Wildcard expression to match files. 316 Yields one item per matching file: the parsed content. 317 pattern (str): Wildcard expression with ``{name}`` capture 318 placeholders. Yields ``(content, meta)`` per matching file. 319 filetype (str): File format used to parse each file 320 (or ``None`` to infer from extension). 321 **kwargs: Additional keyword arguments forwarded to the 322 file-parsing function. 323 324 Yields: 325 - When using ``glob``: *content* for each matching file 326 - When using ``pattern``: ``(content, meta)`` for each matching file 327 """ 328 if glob is not None and pattern is not None: 329 raise DataPortalInputError("Cannot specify both 'glob' and 'pattern' — use one or the other") 330 if glob is None and pattern is None: 331 raise DataPortalInputError("Must specify either 'glob' or 'pattern'") 332 333 if glob is not None: 334 for file in filter_files_by_pattern(list(self.list_files()), glob): 335 yield _read_file_with_format(file, filetype, **kwargs) 336 else: 337 compiled_regex, _ = _pattern_to_captures_regex(pattern) 338 for file in self.list_files(): 339 m = compiled_regex.match(file.relative_path) 340 if m is not None: 341 yield _read_file_with_format(file, filetype, **kwargs), m.groupdict()
Read the contents of files in the dataset.
See ~cirro.sdk.portal.DataPortal.read_files() for full details
on glob/pattern matching and filetype options.
Arguments:
- glob (str): Wildcard expression to match files. Yields one item per matching file: the parsed content.
- pattern (str): Wildcard expression with
{name}capture placeholders. Yields(content, meta)per matching file. - filetype (str): File format used to parse each file
(or
Noneto infer from extension). - **kwargs: Additional keyword arguments forwarded to the file-parsing function.
Yields:
- When using
glob: content for each matching file- When using
pattern:(content, meta)for each matching file
343 def read_file( 344 self, 345 path: str = None, 346 glob: str = None, 347 filetype: str = None, 348 **kwargs 349 ) -> Any: 350 """ 351 Read the contents of a single file from the dataset. 352 353 See :meth:`~cirro.sdk.portal.DataPortal.read_file` for full details. 354 355 Args: 356 path (str): Exact relative path of the file within the dataset. 357 glob (str): Wildcard expression matching exactly one file. 358 filetype (str): File format used to parse the file. Supported values 359 are the same as :meth:`~cirro.sdk.portal.DataPortal.read_files`. 360 **kwargs: Additional keyword arguments forwarded to the file-parsing 361 function. 362 363 Returns: 364 Parsed file content. 365 """ 366 if path is not None and glob is not None: 367 raise DataPortalInputError("Cannot specify both 'path' and 'glob' — use one or the other") 368 if path is None and glob is None: 369 raise DataPortalInputError("Must specify either 'path' or 'glob'") 370 371 if path is not None: 372 file = self.get_file(path) 373 else: 374 matches = list(filter_files_by_pattern(list(self.list_files()), glob)) 375 if len(matches) == 0: 376 raise DataPortalAssetNotFound(f"No files matched glob '{glob}'") 377 if len(matches) > 1: 378 raise DataPortalInputError( 379 f"glob '{glob}' matched {len(matches)} files — use read_files() to read multiple files" 380 ) 381 file = matches[0] 382 383 return _read_file_with_format(file, filetype, **kwargs)
Read the contents of a single file from the dataset.
See ~cirro.sdk.portal.DataPortal.read_file() for full details.
Arguments:
- path (str): Exact relative path of the file within the dataset.
- glob (str): Wildcard expression matching exactly one file.
- filetype (str): File format used to parse the file. Supported values
are the same as
~cirro.sdk.portal.DataPortal.read_files(). - **kwargs: Additional keyword arguments forwarded to the file-parsing function.
Returns:
Parsed file content.
385 def get_trace(self) -> Any: 386 """ 387 Read the Nextflow workflow trace file for this dataset as a DataFrame. 388 389 Returns: 390 `pandas.DataFrame` 391 """ 392 return self.get_artifact(ArtifactType.WORKFLOW_TRACE).read_csv(sep='\t')
Read the Nextflow workflow trace file for this dataset as a DataFrame.
Returns:
pandas.DataFrame
394 def get_logs(self) -> str: 395 """ 396 Read the Nextflow workflow logs for this dataset as a string. 397 398 Returns: 399 str 400 """ 401 return self.get_artifact(ArtifactType.WORKFLOW_LOGS).read()
Read the Nextflow workflow logs for this dataset as a string.
Returns:
str
403 def get_artifact(self, artifact_type: ArtifactType) -> DataPortalFile: 404 """ 405 Get the artifact of a particular type from the dataset 406 """ 407 artifacts = self._get_assets().artifacts 408 artifact = next((a for a in artifacts if a.artifact_type == artifact_type), None) 409 if artifact is None: 410 raise DataPortalAssetNotFound(f"No artifact found with type '{artifact_type}'") 411 return DataPortalFile(file=artifact.file, client=self._client)
Get the artifact of a particular type from the dataset
413 def list_artifacts(self) -> List[DataPortalFile]: 414 """ 415 Return the list of artifacts associated with the dataset 416 417 An artifact may be something generated as part of the analysis or other process. 418 See `cirro_api_client.v1.models.ArtifactType` for the list of possible artifact types. 419 420 """ 421 artifacts = self._get_assets().artifacts 422 return DataPortalFiles( 423 [ 424 DataPortalFile(file=artifact.file, client=self._client) 425 for artifact in artifacts 426 ] 427 )
Return the list of artifacts associated with the dataset
An artifact may be something generated as part of the analysis or other process.
See cirro_api_client.v1.models.ArtifactType for the list of possible artifact types.
429 def download_files(self, download_location: str = None, glob: str = None) -> None: 430 """ 431 Download all the files from the dataset to a local directory. 432 433 Args: 434 download_location (str): Path to local directory 435 glob (str): Optional wildcard expression to filter which files are downloaded 436 (e.g., ``'*.csv'``, ``'data/**/*.tsv.gz'``). 437 If omitted, all files are downloaded. 438 """ 439 440 files = self.list_files() 441 if glob is not None: 442 files = DataPortalFiles(filter_files_by_pattern(list(files), glob)) 443 files.download(download_location)
Download all the files from the dataset to a local directory.
Arguments:
- download_location (str): Path to local directory
- glob (str): Optional wildcard expression to filter which files are downloaded
(e.g.,
'*.csv','data/**/*.tsv.gz'). If omitted, all files are downloaded.
445 def run_analysis( 446 self, 447 name: str = None, 448 description: str = "", 449 process: Union[DataPortalProcess, str] = None, 450 params=None, 451 notifications_emails: List[str] = None, 452 compute_environment: str = None, 453 resume_dataset_id: str = None, 454 source_sample_ids: List[str] = None 455 ) -> str: 456 """ 457 Runs an analysis on a dataset, returns the ID of the newly created dataset. 458 459 The process can be provided as either a DataPortalProcess object, 460 or a string which corresponds to the name or ID of the process. 461 462 Args: 463 name (str): Name of newly created dataset 464 description (str): Description of newly created dataset 465 process (DataPortalProcess or str): Process to run 466 params (dict): Analysis parameters 467 notifications_emails (List[str]): Notification email address(es) 468 compute_environment (str): Name or ID of compute environment to use, 469 if blank it will run in AWS 470 resume_dataset_id (str): ID of dataset to resume from, used for caching task execution. 471 It will attempt to re-use the previous output to minimize duplicate work 472 source_sample_ids (List[str]): List of sample IDs to use as input for the analysis. 473 474 Returns: 475 dataset_id (str): ID of newly created dataset 476 """ 477 if name is None: 478 raise DataPortalInputError("Must specify 'name' for run_analysis") 479 if process is None: 480 raise DataPortalInputError("Must specify 'process' for run_analysis") 481 if notifications_emails is None: 482 notifications_emails = [] 483 if params is None: 484 params = {} 485 486 # If the process is a string, try to parse it as a process name or ID 487 process = parse_process_name_or_id(process, self._client) 488 489 if compute_environment: 490 compute_environment_name = compute_environment 491 compute_environments = self._client.compute_environments.list_environments_for_project( 492 project_id=self.project_id 493 ) 494 compute_environment = next( 495 (env for env in compute_environments 496 if env.name == compute_environment or env.id == compute_environment), 497 None 498 ) 499 if compute_environment is None: 500 raise DataPortalInputError(f"Compute environment '{compute_environment_name}' not found") 501 502 resp = self._client.execution.run_analysis( 503 project_id=self.project_id, 504 request=RunAnalysisRequest( 505 name=name, 506 description=description, 507 process_id=process.id, 508 source_dataset_ids=[self.id], 509 params=RunAnalysisRequestParams.from_dict(params), 510 notification_emails=notifications_emails, 511 resume_dataset_id=resume_dataset_id, 512 source_sample_ids=source_sample_ids, 513 compute_environment_id=compute_environment.id if compute_environment else None 514 ) 515 ) 516 return resp.id
Runs an analysis on a dataset, returns the ID of the newly created dataset.
The process can be provided as either a DataPortalProcess object, or a string which corresponds to the name or ID of the process.
Arguments:
- name (str): Name of newly created dataset
- description (str): Description of newly created dataset
- process (DataPortalProcess or str): Process to run
- params (dict): Analysis parameters
- notifications_emails (List[str]): Notification email address(es)
- compute_environment (str): Name or ID of compute environment to use, if blank it will run in AWS
- resume_dataset_id (str): ID of dataset to resume from, used for caching task execution. It will attempt to re-use the previous output to minimize duplicate work
- source_sample_ids (List[str]): List of sample IDs to use as input for the analysis.
Returns:
dataset_id (str): ID of newly created dataset
518 def update_samplesheet(self, 519 contents: str = None, 520 file_path: PathLike = None): 521 """ 522 Updates the samplesheet metadata of a dataset. 523 Provide either the contents (as a string) or a file path. 524 Both must be in the format of a CSV. 525 526 Args: 527 contents (str): Samplesheet contents to update (should be a CSV string) 528 file_path (PathLike): Path of file to update (should be a CSV file) 529 530 Example: 531 ```python 532 dataset.update_samplesheet( 533 file_path=Path('~/samplesheet.csv') 534 ) 535 ``` 536 """ 537 538 if contents is None and file_path is None: 539 raise DataPortalInputError("Must specify either 'contents' or 'file_path' when updating samplesheet") 540 541 samplesheet_contents = contents 542 if file_path is not None: 543 samplesheet_contents = Path(file_path).expanduser().read_text() 544 545 # Validate samplesheet 546 file_names = [f.file_name for f in self.list_files()] 547 request = ValidateFileRequirementsRequest( 548 file_names=file_names, 549 sample_sheet=samplesheet_contents, 550 ) 551 requirements = validate_file_requirements.sync(process_id=self.process_id, 552 body=request, 553 client=self._client.api_client) 554 if error_msg := requirements.error_msg: 555 raise DataPortalInputError(error_msg) 556 557 # Update the samplesheet if everything looks ok 558 self._client.datasets.update_samplesheet( 559 project_id=self.project_id, 560 dataset_id=self.id, 561 samplesheet=samplesheet_contents 562 )
Updates the samplesheet metadata of a dataset. Provide either the contents (as a string) or a file path. Both must be in the format of a CSV.
Arguments:
- contents (str): Samplesheet contents to update (should be a CSV string)
- file_path (PathLike): Path of file to update (should be a CSV file)
Example:
dataset.update_samplesheet(
file_path=Path('~/samplesheet.csv')
)
565class DataPortalDatasets(DataPortalAssets[DataPortalDataset]): 566 """Collection of multiple DataPortalDataset objects.""" 567 asset_name = "dataset"
Collection of multiple DataPortalDataset objects.