GND_Skript_Test/venv/lib/python3.10/site-packages/cloudpathlib/gs/gsclient.py

from datetime import datetime, timedelta
import mimetypes
import os
from pathlib import Path, PurePosixPath
from typing import Any, Callable, Dict, Iterable, Optional, TYPE_CHECKING, Tuple, Union
import warnings

from ..client import Client, register_client_class
from ..cloudpath import implementation_registry
from ..enums import FileCacheMode
from .gspath import GSPath

try:
    if TYPE_CHECKING:
        from google.auth.credentials import Credentials
        from google.api_core.retry import Retry

    from google.auth import default as google_default_auth
    from google.auth.exceptions import DefaultCredentialsError
    from google.cloud.storage import Client as StorageClient

except ModuleNotFoundError:
    implementation_registry["gs"].dependencies_loaded = False


try:
    from google.cloud.storage import transfer_manager
except ImportError:
    transfer_manager = None


@register_client_class("gs")
class GSClient(Client):
    """Client class for Google Cloud Storage which handles authentication with GCP for
    [`GSPath`](../gspath/) instances. See documentation for the
    [`__init__` method][cloudpathlib.gs.gsclient.GSClient.__init__] for detailed authentication
    options.
    """

    def __init__(
        self,
        application_credentials: Optional[Union[str, os.PathLike]] = None,
        credentials: Optional["Credentials"] = None,
        project: Optional[str] = None,
        storage_client: Optional["StorageClient"] = None,
        file_cache_mode: Optional[Union[str, FileCacheMode]] = None,
        local_cache_dir: Optional[Union[str, os.PathLike]] = None,
        content_type_method: Optional[Callable] = mimetypes.guess_type,
        download_chunks_concurrently_kwargs: Optional[Dict[str, Any]] = None,
        timeout: Optional[float] = None,
        retry: Optional["Retry"] = None,
    ):
        """Class constructor. Sets up a [`Storage
        Client`](https://googleapis.dev/python/storage/latest/client.html).
        Supports, in this order, the following authentication methods of `Storage Client`.

        - Instantiated and already authenticated `Storage Client`.
        - OAuth2 Credentials object and a project name.
        - File path to a JSON credentials file for a Google service account.
        - Google Cloud SDK default credentials. See [How Application Default Credentials works](https://cloud.google.com/docs/authentication/application-default-credentials)

        If no authentication methods are used,
        then the client will be instantiated as anonymous, which will only have
        access to public buckets.

        Args:
            application_credentials (Optional[Union[str, os.PathLike]]): Path to Google service
                account credentials file.
            credentials (Optional[Credentials]): The OAuth2 Credentials to use for this client.
                See documentation for [`StorageClient`](
                https://googleapis.dev/python/storage/latest/client.html).
            project (Optional[str]): The project which the client acts on behalf of. See
                documentation for [`StorageClient`](
                https://googleapis.dev/python/storage/latest/client.html).
            storage_client (Optional[StorageClient]): Instantiated [`StorageClient`](
                https://googleapis.dev/python/storage/latest/client.html).
            file_cache_mode (Optional[Union[str, FileCacheMode]]): How often to clear the file cache; see
                [the caching docs](https://cloudpathlib.drivendata.org/stable/caching/) for more information
                about the options in cloudpathlib.eums.FileCacheMode.
            local_cache_dir (Optional[Union[str, os.PathLike]]): Path to directory to use as cache
                for downloaded files. If None, will use a temporary directory. Default can be set with
                the `CLOUDPATHLIB_LOCAL_CACHE_DIR` environment variable.
            content_type_method (Optional[Callable]): Function to call to guess media type (mimetype) when
                writing a file to the cloud. Defaults to `mimetypes.guess_type`. Must return a tuple (content type, content encoding).
            download_chunks_concurrently_kwargs (Optional[Dict[str, Any]]): Keyword arguments to pass to
                [`download_chunks_concurrently`](https://cloud.google.com/python/docs/reference/storage/latest/google.cloud.storage.transfer_manager#google_cloud_storage_transfer_manager_download_chunks_concurrently)
                for sliced parallel downloads; Only available in `google-cloud-storage` version 2.7.0 or later, otherwise ignored and a warning is emitted.
            timeout (Optional[float]): Cloud Storage [timeout value](https://cloud.google.com/python/docs/reference/storage/1.39.0/retry_timeout)
            retry (Optional[google.api_core.retry.Retry]): Cloud Storage [retry configuration](https://cloud.google.com/python/docs/reference/storage/1.39.0/retry_timeout#configuring-retries)
        """
        # don't check `GOOGLE_APPLICATION_CREDENTIALS` since `google_default_auth` already does that
        # use explicit client
        if storage_client is not None:
            self.client = storage_client
        # use explicit credentials
        elif credentials is not None:
            self.client = StorageClient(credentials=credentials, project=project)
        # use explicit credential file
        elif application_credentials is not None:
            self.client = StorageClient.from_service_account_json(application_credentials)
        # use default credentials based on SDK precedence
        else:
            try:
                # use `google_default_auth` instead of `StorageClient()` since it
                # handles precedence of creds in different locations properly
                credentials, default_project = google_default_auth()
                project = project or default_project  # use explicit project if present
                self.client = StorageClient(credentials=credentials, project=project)
            except DefaultCredentialsError:
                self.client = StorageClient.create_anonymous_client()

        self.download_chunks_concurrently_kwargs = download_chunks_concurrently_kwargs
        self.blob_kwargs: dict[str, Any] = {}
        if timeout is not None:
            self.timeout: float = timeout
            self.blob_kwargs["timeout"] = self.timeout
        if retry is not None:
            self.retry: Retry = retry
            self.blob_kwargs["retry"] = self.retry

        super().__init__(
            local_cache_dir=local_cache_dir,
            content_type_method=content_type_method,
            file_cache_mode=file_cache_mode,
        )

    def _get_metadata(self, cloud_path: GSPath) -> Optional[Dict[str, Any]]:
        bucket = self.client.bucket(cloud_path.bucket)
        blob = bucket.get_blob(cloud_path.blob)

        if blob is None:
            return None
        else:
            return {
                "etag": blob.etag,
                "size": blob.size,
                "updated": blob.updated,
                "content_type": blob.content_type,
                "md5_hash": blob.md5_hash,
            }

    def _download_file(self, cloud_path: GSPath, local_path: Union[str, os.PathLike]) -> Path:
        bucket = self.client.bucket(cloud_path.bucket)
        blob = bucket.get_blob(cloud_path.blob)

        local_path = Path(local_path)
        if transfer_manager is not None and self.download_chunks_concurrently_kwargs is not None:
            transfer_manager.download_chunks_concurrently(
                blob, local_path, **self.download_chunks_concurrently_kwargs
            )
        else:
            if transfer_manager is None and self.download_chunks_concurrently_kwargs is not None:
                warnings.warn(
                    "Ignoring `download_chunks_concurrently_kwargs` for version of google-cloud-storage that does not support them (<2.7.0)."
                )

            blob.download_to_filename(local_path, **self.blob_kwargs)

        return local_path

    def _is_file_or_dir(self, cloud_path: GSPath) -> Optional[str]:
        # short-circuit the root-level bucket
        if not cloud_path.blob:
            return "dir"

        bucket = self.client.bucket(cloud_path.bucket)
        blob = bucket.get_blob(cloud_path.blob)

        if blob is not None:
            return "file"
        else:
            prefix = cloud_path.blob
            if prefix and not prefix.endswith("/"):
                prefix += "/"

            # not a file, see if it is a directory
            f = bucket.list_blobs(max_results=1, prefix=prefix)

            # at least one key with the prefix of the directory
            if bool(list(f)):
                return "dir"
            else:
                return None

    def _exists(self, cloud_path: GSPath) -> bool:
        # short-circuit the root-level bucket
        if not cloud_path.blob:
            return self.client.bucket(cloud_path.bucket).exists()

        return self._is_file_or_dir(cloud_path) in ["file", "dir"]

    def _list_dir(self, cloud_path: GSPath, recursive=False) -> Iterable[Tuple[GSPath, bool]]:
        # shortcut if listing all available buckets
        if not cloud_path.bucket:
            if recursive:
                raise NotImplementedError(
                    "Cannot recursively list all buckets and contents; you can get all the buckets then recursively list each separately."
                )

            yield from (
                (self.CloudPath(f"{cloud_path.cloud_prefix}{str(b)}"), True)
                for b in self.client.list_buckets()
            )
            return

        bucket = self.client.bucket(cloud_path.bucket)

        prefix = cloud_path.blob
        if prefix and not prefix.endswith("/"):
            prefix += "/"
        if recursive:
            yielded_dirs = set()
            for o in bucket.list_blobs(prefix=prefix):
                # get directory from this path
                for parent in PurePosixPath(o.name[len(prefix) :]).parents:
                    # if we haven't surfaced this directory already
                    if parent not in yielded_dirs and str(parent) != ".":
                        yield (
                            self.CloudPath(
                                f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{prefix}{parent}"
                            ),
                            True,  # is a directory
                        )
                        yielded_dirs.add(parent)
                yield (
                    self.CloudPath(f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{o.name}"),
                    False,
                )  # is a file
        else:
            iterator = bucket.list_blobs(delimiter="/", prefix=prefix)

            # files must be iterated first for `.prefixes` to be populated:
            #   see: https://github.com/googleapis/python-storage/issues/863
            for file in iterator:
                yield (
                    self.CloudPath(f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{file.name}"),
                    False,  # is a file
                )

            for directory in iterator.prefixes:
                yield (
                    self.CloudPath(f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{directory}"),
                    True,  # is a directory
                )

    def _move_file(self, src: GSPath, dst: GSPath, remove_src: bool = True) -> GSPath:
        # just a touch, so "REPLACE" metadata
        if src == dst:
            bucket = self.client.bucket(src.bucket)
            blob = bucket.get_blob(src.blob)

            # See https://github.com/googleapis/google-cloud-python/issues/1185#issuecomment-431537214
            if blob.metadata is None:
                blob.metadata = {"updated": datetime.utcnow()}
            else:
                blob.metadata["updated"] = datetime.utcnow()
            blob.patch()

        else:
            src_bucket = self.client.bucket(src.bucket)
            dst_bucket = self.client.bucket(dst.bucket)

            src_blob = src_bucket.get_blob(src.blob)
            src_bucket.copy_blob(src_blob, dst_bucket, dst.blob, **self.blob_kwargs)

            if remove_src:
                src_blob.delete()

        return dst

    def _remove(self, cloud_path: GSPath, missing_ok: bool = True) -> None:
        file_or_dir = self._is_file_or_dir(cloud_path)
        if file_or_dir == "dir":
            blobs = [
                b.blob for b, is_dir in self._list_dir(cloud_path, recursive=True) if not is_dir
            ]
            bucket = self.client.bucket(cloud_path.bucket)
            for blob in blobs:
                bucket.get_blob(blob).delete()
        elif file_or_dir == "file":
            bucket = self.client.bucket(cloud_path.bucket)
            bucket.get_blob(cloud_path.blob).delete()
        else:
            # Does not exist
            if not missing_ok:
                raise FileNotFoundError(f"File does not exist: {cloud_path}")

    def _upload_file(self, local_path: Union[str, os.PathLike], cloud_path: GSPath) -> GSPath:
        bucket = self.client.bucket(cloud_path.bucket)
        blob = bucket.blob(cloud_path.blob)

        extra_args = {}
        if self.content_type_method is not None:
            content_type, _ = self.content_type_method(str(local_path))
            extra_args["content_type"] = content_type

        blob.upload_from_filename(str(local_path), **extra_args, **self.blob_kwargs)
        return cloud_path

    def _get_public_url(self, cloud_path: GSPath) -> str:
        bucket = self.client.get_bucket(cloud_path.bucket)
        blob = bucket.blob(cloud_path.blob)
        return blob.public_url

    def _generate_presigned_url(self, cloud_path: GSPath, expire_seconds: int = 60 * 60) -> str:
        bucket = self.client.get_bucket(cloud_path.bucket)
        blob = bucket.blob(cloud_path.blob)
        url = blob.generate_signed_url(
            version="v4", expiration=timedelta(seconds=expire_seconds), method="GET"
        )
        return url


GSClient.GSPath = GSClient.CloudPath  # type: ignore