cirro.models.file
1from dataclasses import dataclass 2from pathlib import PurePath, Path 3from typing import Dict, Optional, TypeVar, NamedTuple 4 5from cirro_api_client.v1.models import ProjectFileAccessRequest, ProjectAccessType, FileEntry, DatasetDetail 6 7from cirro.models.s3_path import S3Path 8 9PathLike = TypeVar('PathLike', str, Path) 10 11 12class DirectoryStatistics(NamedTuple): 13 size: float 14 " Size in bytes" 15 size_friendly: str 16 " Size in user friendly format (e.g. 1.2 KB)" 17 number_of_files: int 18 " Number of files" 19 20 21class FileAccessContext: 22 """ 23 Context holder for accessing various files in Cirro and abstracting out their location. 24 Prefer to use the class methods to instantiate. 25 """ 26 def __init__(self, 27 file_access_request: ProjectFileAccessRequest, 28 project_id: str, 29 base_url: str): 30 self.file_access_request = file_access_request 31 self.base_url = base_url 32 self.project_id = project_id 33 self._s3_path = S3Path(base_url) 34 35 @classmethod 36 def download(cls, project_id: str, base_url: str, token_lifetime_override: int = None): 37 return cls( 38 file_access_request=ProjectFileAccessRequest( 39 access_type=ProjectAccessType.PROJECT_DOWNLOAD, 40 token_lifetime_hours=token_lifetime_override 41 ), 42 base_url=base_url, 43 project_id=project_id 44 ) 45 46 @classmethod 47 def download_shared_dataset(cls, project_id: str, dataset_id: str, 48 base_url: str, token_lifetime_override: int = None): 49 return cls( 50 file_access_request=ProjectFileAccessRequest( 51 access_type=ProjectAccessType.SHARED_DATASET_DOWNLOAD, 52 dataset_id=dataset_id, 53 token_lifetime_hours=token_lifetime_override 54 ), 55 base_url=base_url, 56 project_id=project_id 57 ) 58 59 @classmethod 60 def upload_dataset(cls, project_id: str, dataset_id: str, base_url: str, token_lifetime_override: int = None): 61 return cls( 62 file_access_request=ProjectFileAccessRequest( 63 access_type=ProjectAccessType.DATASET_UPLOAD, 64 dataset_id=dataset_id, 65 token_lifetime_hours=token_lifetime_override 66 ), 67 base_url=f'{base_url}/data', 68 project_id=project_id 69 ) 70 71 @classmethod 72 def upload_reference(cls, project_id: str, base_url: str): 73 return cls( 74 file_access_request=ProjectFileAccessRequest( 75 access_type=ProjectAccessType.REFERENCE_UPLOAD 76 ), 77 base_url=base_url, 78 project_id=project_id 79 ) 80 81 @classmethod 82 def upload_sample_sheet(cls, project_id: str, dataset_id: str, base_url: str): 83 return cls( 84 file_access_request=ProjectFileAccessRequest( 85 access_type=ProjectAccessType.SAMPLESHEET_UPLOAD, 86 dataset_id=dataset_id 87 ), 88 base_url=base_url, 89 project_id=project_id 90 ) 91 92 @property 93 def bucket(self) -> str: 94 """ S3 Bucket """ 95 return self._s3_path.bucket 96 97 @property 98 def prefix(self) -> str: 99 """ S3 Prefix """ 100 return self._s3_path.key 101 102 def __repr__(self): 103 return f'{self.__class__.__name__}({self.file_access_request.access_type}@base_url={self.base_url})' 104 105 106@dataclass(frozen=True) 107class File: 108 relative_path: str 109 size: int 110 access_context: FileAccessContext 111 metadata: Optional[Dict] = None 112 113 @classmethod 114 def from_file_entry(cls, file: FileEntry, project_id: str, dataset: DatasetDetail = None, domain: str = None): 115 # Path is absolute rather than relative 116 if 's3://' in file.path: 117 parts = S3Path(file.path) 118 domain = parts.base 119 path = parts.key 120 else: 121 path = file.path 122 123 if dataset and dataset.share: 124 access_context = FileAccessContext.download_shared_dataset( 125 project_id=project_id, 126 dataset_id=dataset.id, 127 base_url=domain 128 ) 129 else: 130 access_context = FileAccessContext.download( 131 project_id=project_id, 132 base_url=domain 133 ) 134 135 return cls( 136 relative_path=path, 137 metadata=file.metadata.additional_properties if file.metadata else {}, 138 size=file.size, 139 access_context=access_context 140 ) 141 142 @property 143 def absolute_path(self): 144 return f'{self.access_context.base_url}/{self.relative_path.strip("/")}' 145 146 @property 147 def name(self): 148 return PurePath(self.absolute_path).name 149 150 def __repr__(self): 151 return f'{self.__class__.__name__}(path={self.relative_path})'
class
DirectoryStatistics(typing.NamedTuple):
13class DirectoryStatistics(NamedTuple): 14 size: float 15 " Size in bytes" 16 size_friendly: str 17 " Size in user friendly format (e.g. 1.2 KB)" 18 number_of_files: int 19 " Number of files"
DirectoryStatistics(size, size_friendly, number_of_files)
class
FileAccessContext:
22class FileAccessContext: 23 """ 24 Context holder for accessing various files in Cirro and abstracting out their location. 25 Prefer to use the class methods to instantiate. 26 """ 27 def __init__(self, 28 file_access_request: ProjectFileAccessRequest, 29 project_id: str, 30 base_url: str): 31 self.file_access_request = file_access_request 32 self.base_url = base_url 33 self.project_id = project_id 34 self._s3_path = S3Path(base_url) 35 36 @classmethod 37 def download(cls, project_id: str, base_url: str, token_lifetime_override: int = None): 38 return cls( 39 file_access_request=ProjectFileAccessRequest( 40 access_type=ProjectAccessType.PROJECT_DOWNLOAD, 41 token_lifetime_hours=token_lifetime_override 42 ), 43 base_url=base_url, 44 project_id=project_id 45 ) 46 47 @classmethod 48 def download_shared_dataset(cls, project_id: str, dataset_id: str, 49 base_url: str, token_lifetime_override: int = None): 50 return cls( 51 file_access_request=ProjectFileAccessRequest( 52 access_type=ProjectAccessType.SHARED_DATASET_DOWNLOAD, 53 dataset_id=dataset_id, 54 token_lifetime_hours=token_lifetime_override 55 ), 56 base_url=base_url, 57 project_id=project_id 58 ) 59 60 @classmethod 61 def upload_dataset(cls, project_id: str, dataset_id: str, base_url: str, token_lifetime_override: int = None): 62 return cls( 63 file_access_request=ProjectFileAccessRequest( 64 access_type=ProjectAccessType.DATASET_UPLOAD, 65 dataset_id=dataset_id, 66 token_lifetime_hours=token_lifetime_override 67 ), 68 base_url=f'{base_url}/data', 69 project_id=project_id 70 ) 71 72 @classmethod 73 def upload_reference(cls, project_id: str, base_url: str): 74 return cls( 75 file_access_request=ProjectFileAccessRequest( 76 access_type=ProjectAccessType.REFERENCE_UPLOAD 77 ), 78 base_url=base_url, 79 project_id=project_id 80 ) 81 82 @classmethod 83 def upload_sample_sheet(cls, project_id: str, dataset_id: str, base_url: str): 84 return cls( 85 file_access_request=ProjectFileAccessRequest( 86 access_type=ProjectAccessType.SAMPLESHEET_UPLOAD, 87 dataset_id=dataset_id 88 ), 89 base_url=base_url, 90 project_id=project_id 91 ) 92 93 @property 94 def bucket(self) -> str: 95 """ S3 Bucket """ 96 return self._s3_path.bucket 97 98 @property 99 def prefix(self) -> str: 100 """ S3 Prefix """ 101 return self._s3_path.key 102 103 def __repr__(self): 104 return f'{self.__class__.__name__}({self.file_access_request.access_type}@base_url={self.base_url})'
Context holder for accessing various files in Cirro and abstracting out their location. Prefer to use the class methods to instantiate.
FileAccessContext( file_access_request: cirro_api_client.v1.models.ProjectFileAccessRequest, project_id: str, base_url: str)
@classmethod
def
download( cls, project_id: str, base_url: str, token_lifetime_override: int = None):
36 @classmethod 37 def download(cls, project_id: str, base_url: str, token_lifetime_override: int = None): 38 return cls( 39 file_access_request=ProjectFileAccessRequest( 40 access_type=ProjectAccessType.PROJECT_DOWNLOAD, 41 token_lifetime_hours=token_lifetime_override 42 ), 43 base_url=base_url, 44 project_id=project_id 45 )
@classmethod
def
upload_dataset( cls, project_id: str, dataset_id: str, base_url: str, token_lifetime_override: int = None):
60 @classmethod 61 def upload_dataset(cls, project_id: str, dataset_id: str, base_url: str, token_lifetime_override: int = None): 62 return cls( 63 file_access_request=ProjectFileAccessRequest( 64 access_type=ProjectAccessType.DATASET_UPLOAD, 65 dataset_id=dataset_id, 66 token_lifetime_hours=token_lifetime_override 67 ), 68 base_url=f'{base_url}/data', 69 project_id=project_id 70 )
@dataclass(frozen=True)
class
File:
107@dataclass(frozen=True) 108class File: 109 relative_path: str 110 size: int 111 access_context: FileAccessContext 112 metadata: Optional[Dict] = None 113 114 @classmethod 115 def from_file_entry(cls, file: FileEntry, project_id: str, dataset: DatasetDetail = None, domain: str = None): 116 # Path is absolute rather than relative 117 if 's3://' in file.path: 118 parts = S3Path(file.path) 119 domain = parts.base 120 path = parts.key 121 else: 122 path = file.path 123 124 if dataset and dataset.share: 125 access_context = FileAccessContext.download_shared_dataset( 126 project_id=project_id, 127 dataset_id=dataset.id, 128 base_url=domain 129 ) 130 else: 131 access_context = FileAccessContext.download( 132 project_id=project_id, 133 base_url=domain 134 ) 135 136 return cls( 137 relative_path=path, 138 metadata=file.metadata.additional_properties if file.metadata else {}, 139 size=file.size, 140 access_context=access_context 141 ) 142 143 @property 144 def absolute_path(self): 145 return f'{self.access_context.base_url}/{self.relative_path.strip("/")}' 146 147 @property 148 def name(self): 149 return PurePath(self.absolute_path).name 150 151 def __repr__(self): 152 return f'{self.__class__.__name__}(path={self.relative_path})'
File( relative_path: str, size: int, access_context: FileAccessContext, metadata: Optional[Dict] = None)
access_context: FileAccessContext
@classmethod
def
from_file_entry( cls, file: cirro_api_client.v1.models.FileEntry, project_id: str, dataset: cirro_api_client.v1.models.DatasetDetail = None, domain: str = None):
114 @classmethod 115 def from_file_entry(cls, file: FileEntry, project_id: str, dataset: DatasetDetail = None, domain: str = None): 116 # Path is absolute rather than relative 117 if 's3://' in file.path: 118 parts = S3Path(file.path) 119 domain = parts.base 120 path = parts.key 121 else: 122 path = file.path 123 124 if dataset and dataset.share: 125 access_context = FileAccessContext.download_shared_dataset( 126 project_id=project_id, 127 dataset_id=dataset.id, 128 base_url=domain 129 ) 130 else: 131 access_context = FileAccessContext.download( 132 project_id=project_id, 133 base_url=domain 134 ) 135 136 return cls( 137 relative_path=path, 138 metadata=file.metadata.additional_properties if file.metadata else {}, 139 size=file.size, 140 access_context=access_context 141 )