cirro.sdk.developer
1from io import StringIO 2 3from cirro_api_client.v1.api.datasets import get_sample_sheets, ingest_samples 4from cirro_api_client.v1.api.processes import validate_file_name_patterns 5from cirro_api_client.v1.models import SampleSheets, ValidateFileNamePatternsRequest, FileNameMatch 6 7from cirro.cirro_client import CirroApi 8from cirro.helpers import PreprocessDataset 9 10 11class Matches(list[FileNameMatch]): 12 def print(self): 13 """ 14 Prints the file name validation matches in a readable format. 15 """ 16 print(f'Matches: {len(self)}') 17 print() 18 for match in self: 19 print(f'{match.file_name}') 20 print(f'Sample name: {match.sample_name}') 21 print(f'Matched regex: {match.regex_pattern_match}') 22 print() 23 24 25class DeveloperHelper: 26 """ 27 Helper class for developer-related tasks, 28 such as adding samplesheet preprocessing for a pipeline 29 or testing file name validation and sample autopopulation. 30 """ 31 32 def __init__(self, client: CirroApi): 33 self.client = client 34 35 def generate_preprocess_for_input_datasets(self, 36 project_id: str, 37 input_dataset_ids: list[str], 38 params=None) -> PreprocessDataset: 39 """ 40 Generates a PreprocessDataset object for the given datasets 41 42 With optional parameters to pass into the preprocess script. 43 `metadata` is not available in this context, so it is mocked. 44 """ 45 samplesheets = self._generate_samplesheets_for_datasets(project_id, input_dataset_ids) 46 return PreprocessDataset( 47 samplesheet=samplesheets.samples, 48 files=samplesheets.files, 49 params=params or {}, 50 # Mock metadata 51 metadata={ 52 'dataset': {}, 53 'project': {}, 54 'inputs': [], 55 'process': {} 56 } 57 ) 58 59 def test_file_name_validation_for_dataset(self, 60 project_id: str, 61 dataset_id: str, 62 file_name_patterns: list[str]) -> Matches: 63 """ 64 Tests the file name validation for a given dataset against specified regex patterns. 65 66 Used when configuring Cirro's sample autopopulation feature. 67 More info: https://docs.cirro.bio/features/samples/#using-auto-population 68 """ 69 dataset_files = self.client.datasets.get_assets_listing(project_id=project_id, dataset_id=dataset_id).files 70 file_names = [file.relative_path for file in dataset_files] 71 return self.test_file_name_validation(file_names, file_name_patterns) 72 73 def test_file_name_validation(self, 74 file_names: list[str], 75 file_name_patterns: list[str]) -> Matches: 76 """ 77 Tests the file name validation for a list of file names against specified regex patterns. 78 """ 79 request_body = ValidateFileNamePatternsRequest( 80 file_names=file_names, 81 file_name_patterns=file_name_patterns 82 ) 83 84 matches = validate_file_name_patterns.sync( 85 process_id="test", 86 body=request_body, 87 client=self.client.api_client 88 ) 89 return Matches(matches) 90 91 def generate_samplesheets_for_dataset(self, project_id: str, dataset_id: str) -> SampleSheets: 92 """ 93 Generates Cirro samplesheets for a given dataset 94 """ 95 return get_sample_sheets.sync( 96 project_id=project_id, 97 dataset_id=dataset_id, 98 client=self.client.api_client 99 ) 100 101 def rerun_sample_ingest_for_dataset(self, project_id: str, dataset_id: str): 102 """ 103 Reruns the sample ingest process for a given dataset. 104 You'll want to do this if you have updated the file name patterns in your pipeline (or data type) 105 """ 106 ingest_samples.sync_detailed( 107 project_id=project_id, 108 dataset_id=dataset_id, 109 client=self.client.api_client 110 ) 111 112 def _generate_samplesheets_for_datasets(self, project_id: str, dataset_ids: list[str]) -> SampleSheets: 113 """ 114 Generates Cirro samplesheets for multiple datasets in a project. 115 """ 116 # Concatenate samplesheets using pandas 117 import pandas 118 samplesheets_dfs = [] 119 files_dfs = [] 120 for dataset_id in dataset_ids: 121 samplesheet = self.generate_samplesheets_for_dataset(project_id, dataset_id) 122 samplesheets_dfs.append(pandas.read_csv(StringIO(samplesheet.samples))) 123 files_dfs.append(pandas.read_csv(StringIO(samplesheet.files))) 124 125 samplesheets_df = pandas.concat(samplesheets_dfs, ignore_index=True) 126 files_df = pandas.concat(files_dfs, ignore_index=True) 127 return SampleSheets( 128 samples=samplesheets_df.to_csv(index=False), 129 files=files_df.to_csv(index=False) 130 )
12class Matches(list[FileNameMatch]): 13 def print(self): 14 """ 15 Prints the file name validation matches in a readable format. 16 """ 17 print(f'Matches: {len(self)}') 18 print() 19 for match in self: 20 print(f'{match.file_name}') 21 print(f'Sample name: {match.sample_name}') 22 print(f'Matched regex: {match.regex_pattern_match}') 23 print()
Built-in mutable sequence.
If no argument is given, the constructor creates a new empty list. The argument must be an iterable if specified.
13 def print(self): 14 """ 15 Prints the file name validation matches in a readable format. 16 """ 17 print(f'Matches: {len(self)}') 18 print() 19 for match in self: 20 print(f'{match.file_name}') 21 print(f'Sample name: {match.sample_name}') 22 print(f'Matched regex: {match.regex_pattern_match}') 23 print()
Prints the file name validation matches in a readable format.
26class DeveloperHelper: 27 """ 28 Helper class for developer-related tasks, 29 such as adding samplesheet preprocessing for a pipeline 30 or testing file name validation and sample autopopulation. 31 """ 32 33 def __init__(self, client: CirroApi): 34 self.client = client 35 36 def generate_preprocess_for_input_datasets(self, 37 project_id: str, 38 input_dataset_ids: list[str], 39 params=None) -> PreprocessDataset: 40 """ 41 Generates a PreprocessDataset object for the given datasets 42 43 With optional parameters to pass into the preprocess script. 44 `metadata` is not available in this context, so it is mocked. 45 """ 46 samplesheets = self._generate_samplesheets_for_datasets(project_id, input_dataset_ids) 47 return PreprocessDataset( 48 samplesheet=samplesheets.samples, 49 files=samplesheets.files, 50 params=params or {}, 51 # Mock metadata 52 metadata={ 53 'dataset': {}, 54 'project': {}, 55 'inputs': [], 56 'process': {} 57 } 58 ) 59 60 def test_file_name_validation_for_dataset(self, 61 project_id: str, 62 dataset_id: str, 63 file_name_patterns: list[str]) -> Matches: 64 """ 65 Tests the file name validation for a given dataset against specified regex patterns. 66 67 Used when configuring Cirro's sample autopopulation feature. 68 More info: https://docs.cirro.bio/features/samples/#using-auto-population 69 """ 70 dataset_files = self.client.datasets.get_assets_listing(project_id=project_id, dataset_id=dataset_id).files 71 file_names = [file.relative_path for file in dataset_files] 72 return self.test_file_name_validation(file_names, file_name_patterns) 73 74 def test_file_name_validation(self, 75 file_names: list[str], 76 file_name_patterns: list[str]) -> Matches: 77 """ 78 Tests the file name validation for a list of file names against specified regex patterns. 79 """ 80 request_body = ValidateFileNamePatternsRequest( 81 file_names=file_names, 82 file_name_patterns=file_name_patterns 83 ) 84 85 matches = validate_file_name_patterns.sync( 86 process_id="test", 87 body=request_body, 88 client=self.client.api_client 89 ) 90 return Matches(matches) 91 92 def generate_samplesheets_for_dataset(self, project_id: str, dataset_id: str) -> SampleSheets: 93 """ 94 Generates Cirro samplesheets for a given dataset 95 """ 96 return get_sample_sheets.sync( 97 project_id=project_id, 98 dataset_id=dataset_id, 99 client=self.client.api_client 100 ) 101 102 def rerun_sample_ingest_for_dataset(self, project_id: str, dataset_id: str): 103 """ 104 Reruns the sample ingest process for a given dataset. 105 You'll want to do this if you have updated the file name patterns in your pipeline (or data type) 106 """ 107 ingest_samples.sync_detailed( 108 project_id=project_id, 109 dataset_id=dataset_id, 110 client=self.client.api_client 111 ) 112 113 def _generate_samplesheets_for_datasets(self, project_id: str, dataset_ids: list[str]) -> SampleSheets: 114 """ 115 Generates Cirro samplesheets for multiple datasets in a project. 116 """ 117 # Concatenate samplesheets using pandas 118 import pandas 119 samplesheets_dfs = [] 120 files_dfs = [] 121 for dataset_id in dataset_ids: 122 samplesheet = self.generate_samplesheets_for_dataset(project_id, dataset_id) 123 samplesheets_dfs.append(pandas.read_csv(StringIO(samplesheet.samples))) 124 files_dfs.append(pandas.read_csv(StringIO(samplesheet.files))) 125 126 samplesheets_df = pandas.concat(samplesheets_dfs, ignore_index=True) 127 files_df = pandas.concat(files_dfs, ignore_index=True) 128 return SampleSheets( 129 samples=samplesheets_df.to_csv(index=False), 130 files=files_df.to_csv(index=False) 131 )
Helper class for developer-related tasks, such as adding samplesheet preprocessing for a pipeline or testing file name validation and sample autopopulation.
36 def generate_preprocess_for_input_datasets(self, 37 project_id: str, 38 input_dataset_ids: list[str], 39 params=None) -> PreprocessDataset: 40 """ 41 Generates a PreprocessDataset object for the given datasets 42 43 With optional parameters to pass into the preprocess script. 44 `metadata` is not available in this context, so it is mocked. 45 """ 46 samplesheets = self._generate_samplesheets_for_datasets(project_id, input_dataset_ids) 47 return PreprocessDataset( 48 samplesheet=samplesheets.samples, 49 files=samplesheets.files, 50 params=params or {}, 51 # Mock metadata 52 metadata={ 53 'dataset': {}, 54 'project': {}, 55 'inputs': [], 56 'process': {} 57 } 58 )
Generates a PreprocessDataset object for the given datasets
With optional parameters to pass into the preprocess script.
metadata
is not available in this context, so it is mocked.
60 def test_file_name_validation_for_dataset(self, 61 project_id: str, 62 dataset_id: str, 63 file_name_patterns: list[str]) -> Matches: 64 """ 65 Tests the file name validation for a given dataset against specified regex patterns. 66 67 Used when configuring Cirro's sample autopopulation feature. 68 More info: https://docs.cirro.bio/features/samples/#using-auto-population 69 """ 70 dataset_files = self.client.datasets.get_assets_listing(project_id=project_id, dataset_id=dataset_id).files 71 file_names = [file.relative_path for file in dataset_files] 72 return self.test_file_name_validation(file_names, file_name_patterns)
Tests the file name validation for a given dataset against specified regex patterns.
Used when configuring Cirro's sample autopopulation feature. More info: https://docs.cirro.bio/features/samples/#using-auto-population
74 def test_file_name_validation(self, 75 file_names: list[str], 76 file_name_patterns: list[str]) -> Matches: 77 """ 78 Tests the file name validation for a list of file names against specified regex patterns. 79 """ 80 request_body = ValidateFileNamePatternsRequest( 81 file_names=file_names, 82 file_name_patterns=file_name_patterns 83 ) 84 85 matches = validate_file_name_patterns.sync( 86 process_id="test", 87 body=request_body, 88 client=self.client.api_client 89 ) 90 return Matches(matches)
Tests the file name validation for a list of file names against specified regex patterns.
92 def generate_samplesheets_for_dataset(self, project_id: str, dataset_id: str) -> SampleSheets: 93 """ 94 Generates Cirro samplesheets for a given dataset 95 """ 96 return get_sample_sheets.sync( 97 project_id=project_id, 98 dataset_id=dataset_id, 99 client=self.client.api_client 100 )
Generates Cirro samplesheets for a given dataset
102 def rerun_sample_ingest_for_dataset(self, project_id: str, dataset_id: str): 103 """ 104 Reruns the sample ingest process for a given dataset. 105 You'll want to do this if you have updated the file name patterns in your pipeline (or data type) 106 """ 107 ingest_samples.sync_detailed( 108 project_id=project_id, 109 dataset_id=dataset_id, 110 client=self.client.api_client 111 )
Reruns the sample ingest process for a given dataset. You'll want to do this if you have updated the file name patterns in your pipeline (or data type)