cirro.models.file

  1from dataclasses import dataclass
  2from pathlib import PurePath, Path
  3from typing import Dict, Optional, TypeVar, NamedTuple
  4
  5from cirro_api_client.v1.models import ProjectFileAccessRequest, ProjectAccessType, FileEntry, DatasetDetail
  6
  7from cirro.models.s3_path import S3Path
  8
  9PathLike = TypeVar('PathLike', str, Path)
 10
 11
 12class DirectoryStatistics(NamedTuple):
 13    size: float
 14    " Size in bytes"
 15    size_friendly: str
 16    " Size in user friendly format (e.g. 1.2 KB)"
 17    number_of_files: int
 18    " Number of files"
 19
 20
 21class FileAccessContext:
 22    """
 23    Context holder for accessing various files in Cirro and abstracting out their location.
 24    Prefer to use the class methods to instantiate.
 25    """
 26    def __init__(self,
 27                 file_access_request: ProjectFileAccessRequest,
 28                 project_id: str,
 29                 base_url: str):
 30        self.file_access_request = file_access_request
 31        self.base_url = base_url
 32        self.project_id = project_id
 33        self._s3_path = S3Path(base_url)
 34
 35    @classmethod
 36    def download(cls, project_id: str, base_url: str, token_lifetime_override: int = None):
 37        return cls(
 38            file_access_request=ProjectFileAccessRequest(
 39                access_type=ProjectAccessType.PROJECT_DOWNLOAD,
 40                token_lifetime_hours=token_lifetime_override
 41            ),
 42            base_url=base_url,
 43            project_id=project_id
 44        )
 45
 46    @classmethod
 47    def download_shared_dataset(cls, project_id: str, dataset_id: str,
 48                                base_url: str, token_lifetime_override: int = None):
 49        return cls(
 50            file_access_request=ProjectFileAccessRequest(
 51                access_type=ProjectAccessType.SHARED_DATASET_DOWNLOAD,
 52                dataset_id=dataset_id,
 53                token_lifetime_hours=token_lifetime_override
 54            ),
 55            base_url=base_url,
 56            project_id=project_id
 57        )
 58
 59    @classmethod
 60    def upload_dataset(cls, project_id: str, dataset_id: str, base_url: str, token_lifetime_override: int = None):
 61        return cls(
 62            file_access_request=ProjectFileAccessRequest(
 63                access_type=ProjectAccessType.DATASET_UPLOAD,
 64                dataset_id=dataset_id,
 65                token_lifetime_hours=token_lifetime_override
 66            ),
 67            base_url=f'{base_url}/data',
 68            project_id=project_id
 69        )
 70
 71    @classmethod
 72    def upload_reference(cls, project_id: str, base_url: str):
 73        return cls(
 74            file_access_request=ProjectFileAccessRequest(
 75                access_type=ProjectAccessType.REFERENCE_UPLOAD
 76            ),
 77            base_url=base_url,
 78            project_id=project_id
 79        )
 80
 81    @classmethod
 82    def upload_sample_sheet(cls, project_id: str, dataset_id: str, base_url: str):
 83        return cls(
 84            file_access_request=ProjectFileAccessRequest(
 85                access_type=ProjectAccessType.SAMPLESHEET_UPLOAD,
 86                dataset_id=dataset_id
 87            ),
 88            base_url=base_url,
 89            project_id=project_id
 90        )
 91
 92    @property
 93    def bucket(self) -> str:
 94        """ S3 Bucket """
 95        return self._s3_path.bucket
 96
 97    @property
 98    def prefix(self) -> str:
 99        """ S3 Prefix """
100        return self._s3_path.key
101
102    def __repr__(self):
103        return f'{self.__class__.__name__}({self.file_access_request.access_type}@base_url={self.base_url})'
104
105
106@dataclass(frozen=True)
107class File:
108    relative_path: str
109    size: int
110    access_context: FileAccessContext
111    metadata: Optional[Dict] = None
112
113    @classmethod
114    def from_file_entry(cls, file: FileEntry, project_id: str, dataset: DatasetDetail = None, domain: str = None):
115        # Path is absolute rather than relative
116        if 's3://' in file.path:
117            parts = S3Path(file.path)
118            domain = parts.base
119            path = parts.key
120        else:
121            path = file.path
122
123        if dataset and dataset.share:
124            access_context = FileAccessContext.download_shared_dataset(
125                project_id=project_id,
126                dataset_id=dataset.id,
127                base_url=domain
128            )
129        else:
130            access_context = FileAccessContext.download(
131                project_id=project_id,
132                base_url=domain
133            )
134
135        return cls(
136            relative_path=path,
137            metadata=file.metadata.additional_properties if file.metadata else {},
138            size=file.size,
139            access_context=access_context
140        )
141
142    @property
143    def absolute_path(self):
144        return f'{self.access_context.base_url}/{self.relative_path.strip("/")}'
145
146    @property
147    def name(self):
148        return PurePath(self.absolute_path).name
149
150    def __repr__(self):
151        return f'{self.__class__.__name__}(path={self.relative_path})'
class DirectoryStatistics(typing.NamedTuple):
13class DirectoryStatistics(NamedTuple):
14    size: float
15    " Size in bytes"
16    size_friendly: str
17    " Size in user friendly format (e.g. 1.2 KB)"
18    number_of_files: int
19    " Number of files"

DirectoryStatistics(size, size_friendly, number_of_files)

DirectoryStatistics(size: float, size_friendly: str, number_of_files: int)

Create new instance of DirectoryStatistics(size, size_friendly, number_of_files)

size: float

Size in bytes

size_friendly: str

Size in user friendly format (e.g. 1.2 KB)

number_of_files: int

Number of files

class FileAccessContext:
 22class FileAccessContext:
 23    """
 24    Context holder for accessing various files in Cirro and abstracting out their location.
 25    Prefer to use the class methods to instantiate.
 26    """
 27    def __init__(self,
 28                 file_access_request: ProjectFileAccessRequest,
 29                 project_id: str,
 30                 base_url: str):
 31        self.file_access_request = file_access_request
 32        self.base_url = base_url
 33        self.project_id = project_id
 34        self._s3_path = S3Path(base_url)
 35
 36    @classmethod
 37    def download(cls, project_id: str, base_url: str, token_lifetime_override: int = None):
 38        return cls(
 39            file_access_request=ProjectFileAccessRequest(
 40                access_type=ProjectAccessType.PROJECT_DOWNLOAD,
 41                token_lifetime_hours=token_lifetime_override
 42            ),
 43            base_url=base_url,
 44            project_id=project_id
 45        )
 46
 47    @classmethod
 48    def download_shared_dataset(cls, project_id: str, dataset_id: str,
 49                                base_url: str, token_lifetime_override: int = None):
 50        return cls(
 51            file_access_request=ProjectFileAccessRequest(
 52                access_type=ProjectAccessType.SHARED_DATASET_DOWNLOAD,
 53                dataset_id=dataset_id,
 54                token_lifetime_hours=token_lifetime_override
 55            ),
 56            base_url=base_url,
 57            project_id=project_id
 58        )
 59
 60    @classmethod
 61    def upload_dataset(cls, project_id: str, dataset_id: str, base_url: str, token_lifetime_override: int = None):
 62        return cls(
 63            file_access_request=ProjectFileAccessRequest(
 64                access_type=ProjectAccessType.DATASET_UPLOAD,
 65                dataset_id=dataset_id,
 66                token_lifetime_hours=token_lifetime_override
 67            ),
 68            base_url=f'{base_url}/data',
 69            project_id=project_id
 70        )
 71
 72    @classmethod
 73    def upload_reference(cls, project_id: str, base_url: str):
 74        return cls(
 75            file_access_request=ProjectFileAccessRequest(
 76                access_type=ProjectAccessType.REFERENCE_UPLOAD
 77            ),
 78            base_url=base_url,
 79            project_id=project_id
 80        )
 81
 82    @classmethod
 83    def upload_sample_sheet(cls, project_id: str, dataset_id: str, base_url: str):
 84        return cls(
 85            file_access_request=ProjectFileAccessRequest(
 86                access_type=ProjectAccessType.SAMPLESHEET_UPLOAD,
 87                dataset_id=dataset_id
 88            ),
 89            base_url=base_url,
 90            project_id=project_id
 91        )
 92
 93    @property
 94    def bucket(self) -> str:
 95        """ S3 Bucket """
 96        return self._s3_path.bucket
 97
 98    @property
 99    def prefix(self) -> str:
100        """ S3 Prefix """
101        return self._s3_path.key
102
103    def __repr__(self):
104        return f'{self.__class__.__name__}({self.file_access_request.access_type}@base_url={self.base_url})'

Context holder for accessing various files in Cirro and abstracting out their location. Prefer to use the class methods to instantiate.

FileAccessContext( file_access_request: cirro_api_client.v1.models.ProjectFileAccessRequest, project_id: str, base_url: str)
27    def __init__(self,
28                 file_access_request: ProjectFileAccessRequest,
29                 project_id: str,
30                 base_url: str):
31        self.file_access_request = file_access_request
32        self.base_url = base_url
33        self.project_id = project_id
34        self._s3_path = S3Path(base_url)
file_access_request
base_url
project_id
@classmethod
def download( cls, project_id: str, base_url: str, token_lifetime_override: int = None):
36    @classmethod
37    def download(cls, project_id: str, base_url: str, token_lifetime_override: int = None):
38        return cls(
39            file_access_request=ProjectFileAccessRequest(
40                access_type=ProjectAccessType.PROJECT_DOWNLOAD,
41                token_lifetime_hours=token_lifetime_override
42            ),
43            base_url=base_url,
44            project_id=project_id
45        )
@classmethod
def download_shared_dataset( cls, project_id: str, dataset_id: str, base_url: str, token_lifetime_override: int = None):
47    @classmethod
48    def download_shared_dataset(cls, project_id: str, dataset_id: str,
49                                base_url: str, token_lifetime_override: int = None):
50        return cls(
51            file_access_request=ProjectFileAccessRequest(
52                access_type=ProjectAccessType.SHARED_DATASET_DOWNLOAD,
53                dataset_id=dataset_id,
54                token_lifetime_hours=token_lifetime_override
55            ),
56            base_url=base_url,
57            project_id=project_id
58        )
@classmethod
def upload_dataset( cls, project_id: str, dataset_id: str, base_url: str, token_lifetime_override: int = None):
60    @classmethod
61    def upload_dataset(cls, project_id: str, dataset_id: str, base_url: str, token_lifetime_override: int = None):
62        return cls(
63            file_access_request=ProjectFileAccessRequest(
64                access_type=ProjectAccessType.DATASET_UPLOAD,
65                dataset_id=dataset_id,
66                token_lifetime_hours=token_lifetime_override
67            ),
68            base_url=f'{base_url}/data',
69            project_id=project_id
70        )
@classmethod
def upload_reference(cls, project_id: str, base_url: str):
72    @classmethod
73    def upload_reference(cls, project_id: str, base_url: str):
74        return cls(
75            file_access_request=ProjectFileAccessRequest(
76                access_type=ProjectAccessType.REFERENCE_UPLOAD
77            ),
78            base_url=base_url,
79            project_id=project_id
80        )
@classmethod
def upload_sample_sheet(cls, project_id: str, dataset_id: str, base_url: str):
82    @classmethod
83    def upload_sample_sheet(cls, project_id: str, dataset_id: str, base_url: str):
84        return cls(
85            file_access_request=ProjectFileAccessRequest(
86                access_type=ProjectAccessType.SAMPLESHEET_UPLOAD,
87                dataset_id=dataset_id
88            ),
89            base_url=base_url,
90            project_id=project_id
91        )
bucket: str
93    @property
94    def bucket(self) -> str:
95        """ S3 Bucket """
96        return self._s3_path.bucket

S3 Bucket

prefix: str
 98    @property
 99    def prefix(self) -> str:
100        """ S3 Prefix """
101        return self._s3_path.key

S3 Prefix

@dataclass(frozen=True)
class File:
107@dataclass(frozen=True)
108class File:
109    relative_path: str
110    size: int
111    access_context: FileAccessContext
112    metadata: Optional[Dict] = None
113
114    @classmethod
115    def from_file_entry(cls, file: FileEntry, project_id: str, dataset: DatasetDetail = None, domain: str = None):
116        # Path is absolute rather than relative
117        if 's3://' in file.path:
118            parts = S3Path(file.path)
119            domain = parts.base
120            path = parts.key
121        else:
122            path = file.path
123
124        if dataset and dataset.share:
125            access_context = FileAccessContext.download_shared_dataset(
126                project_id=project_id,
127                dataset_id=dataset.id,
128                base_url=domain
129            )
130        else:
131            access_context = FileAccessContext.download(
132                project_id=project_id,
133                base_url=domain
134            )
135
136        return cls(
137            relative_path=path,
138            metadata=file.metadata.additional_properties if file.metadata else {},
139            size=file.size,
140            access_context=access_context
141        )
142
143    @property
144    def absolute_path(self):
145        return f'{self.access_context.base_url}/{self.relative_path.strip("/")}'
146
147    @property
148    def name(self):
149        return PurePath(self.absolute_path).name
150
151    def __repr__(self):
152        return f'{self.__class__.__name__}(path={self.relative_path})'
File( relative_path: str, size: int, access_context: FileAccessContext, metadata: Optional[Dict] = None)
relative_path: str
size: int
access_context: FileAccessContext
metadata: Optional[Dict] = None
@classmethod
def from_file_entry( cls, file: cirro_api_client.v1.models.FileEntry, project_id: str, dataset: cirro_api_client.v1.models.DatasetDetail = None, domain: str = None):
114    @classmethod
115    def from_file_entry(cls, file: FileEntry, project_id: str, dataset: DatasetDetail = None, domain: str = None):
116        # Path is absolute rather than relative
117        if 's3://' in file.path:
118            parts = S3Path(file.path)
119            domain = parts.base
120            path = parts.key
121        else:
122            path = file.path
123
124        if dataset and dataset.share:
125            access_context = FileAccessContext.download_shared_dataset(
126                project_id=project_id,
127                dataset_id=dataset.id,
128                base_url=domain
129            )
130        else:
131            access_context = FileAccessContext.download(
132                project_id=project_id,
133                base_url=domain
134            )
135
136        return cls(
137            relative_path=path,
138            metadata=file.metadata.additional_properties if file.metadata else {},
139            size=file.size,
140            access_context=access_context
141        )
absolute_path
143    @property
144    def absolute_path(self):
145        return f'{self.access_context.base_url}/{self.relative_path.strip("/")}'
name
147    @property
148    def name(self):
149        return PurePath(self.absolute_path).name