2025-10-10 09:46:41 +02:00

165 lines
5.4 KiB
Python

import datetime
import http
import os
from pathlib import Path, PurePosixPath
from tempfile import TemporaryDirectory
from typing import Any, Tuple, TYPE_CHECKING, Union, Optional
import urllib
from ..cloudpath import CloudPath, NoStatError, register_path_class
if TYPE_CHECKING:
from .httpclient import HttpClient, HttpsClient
@register_path_class("http")
class HttpPath(CloudPath):
cloud_prefix = "http://"
client: "HttpClient"
def __init__(
self,
cloud_path: Union[str, "HttpPath"],
*parts: str,
client: Optional["HttpClient"] = None,
) -> None:
super().__init__(cloud_path, *parts, client=client)
self._path = (
PurePosixPath(self._url.path)
if self._url.path.startswith("/")
else PurePosixPath(f"/{self._url.path}")
)
@property
def _local(self) -> Path:
"""Cached local version of the file."""
# remove params, query, fragment to get local path
return self.client._local_cache_dir / self._url.path.lstrip("/")
def _dispatch_to_path(self, func: str, *args, **kwargs) -> Any:
sup = super()._dispatch_to_path(func, *args, **kwargs)
# some dispatch methods like "__truediv__" strip trailing slashes;
# for http paths, we need to keep them to indicate directories
if func == "__truediv__" and str(args[0]).endswith("/"):
return self._new_cloudpath(str(sup) + "/")
else:
return sup
@property
def parsed_url(self) -> urllib.parse.ParseResult:
return self._url
@property
def drive(self) -> str:
# For HTTP paths, no drive; use .anchor for scheme + netloc
return self._url.netloc
@property
def anchor(self) -> str:
return f"{self._url.scheme}://{self._url.netloc}/"
@property
def _no_prefix_no_drive(self) -> str:
# netloc appears in anchor and drive for httppath; so don't double count
return self._str[len(self.anchor) - 1 :]
def is_dir(self, follow_symlinks: bool = True) -> bool:
if not self.exists():
return False
# Use client default to identify directories
return self.client.dir_matcher(str(self))
def is_file(self, follow_symlinks: bool = True) -> bool:
if not self.exists():
return False
return not self.client.dir_matcher(str(self))
def mkdir(self, parents: bool = False, exist_ok: bool = False) -> None:
pass # no-op for HTTP Paths
def touch(self, exist_ok: bool = True) -> None:
if self.exists():
if not exist_ok:
raise FileExistsError(f"File already exists: {self}")
raise NotImplementedError(
"Touch not implemented for existing HTTP files since we can't update the modified time; "
"use `put()` or write to the file instead."
)
else:
empty_file = Path(TemporaryDirectory().name) / "empty_file.txt"
empty_file.parent.mkdir(parents=True, exist_ok=True)
empty_file.write_text("")
self.client._upload_file(empty_file, self)
def stat(self, follow_symlinks: bool = True) -> os.stat_result:
try:
meta = self.client._get_metadata(self)
except: # noqa E722
raise NoStatError(f"Could not get metadata for {self}")
return os.stat_result(
( # type: ignore
None, # mode
None, # ino
self.cloud_prefix, # dev,
None, # nlink,
None, # uid,
None, # gid,
meta.get("size", 0), # size,
None, # atime,
meta.get(
"last_modified", datetime.datetime.fromtimestamp(0)
).timestamp(), # mtime,
None, # ctime,
)
)
def as_url(self, presign: bool = False, expire_seconds: int = 60 * 60) -> str:
if presign:
raise NotImplementedError("Presigning not supported for HTTP paths")
return (
self._url.geturl()
) # recreate from what was initialized so we have the same query params, etc.
@property
def name(self) -> str:
return self._path.name
@property
def parents(self) -> Tuple["HttpPath", ...]:
return super().parents + (self._new_cloudpath(""),)
def get(self, **kwargs) -> Tuple[http.client.HTTPResponse, bytes]:
"""Issue a get request with `urllib.request.Request`"""
return self.client.request(self, "GET", **kwargs)
def put(self, **kwargs) -> Tuple[http.client.HTTPResponse, bytes]:
"""Issue a put request with `urllib.request.Request`"""
return self.client.request(self, "PUT", **kwargs)
def post(self, **kwargs) -> Tuple[http.client.HTTPResponse, bytes]:
"""Issue a post request with `urllib.request.Request`"""
return self.client.request(self, "POST", **kwargs)
def delete(self, **kwargs) -> Tuple[http.client.HTTPResponse, bytes]:
"""Issue a delete request with `urllib.request.Request`"""
return self.client.request(self, "DELETE", **kwargs)
def head(self, **kwargs) -> Tuple[http.client.HTTPResponse, bytes]:
"""Issue a head request with `urllib.request.Request`"""
return self.client.request(self, "HEAD", **kwargs)
@register_path_class("https")
class HttpsPath(HttpPath):
cloud_prefix: str = "https://"
client: "HttpsClient"