Dive into secure and efficient coding practices with our curated list of the top 10 examples showcasing 'fsspec' in functional components in Python. Our advanced machine learning engine meticulously scans each line of code, cross-referencing millions of open source libraries to ensure your implementation is not just functional, but also robust and secure. Elevate your React applications to new heights by mastering the art of handling side effects, API calls, and asynchronous operations with confidence and precision.
("file:///tmp/test.json", LocalFileSystem),
("/tmp/test.json", LocalFileSystem),
("gcs://bucket/file.json", GCSFileSystem),
],
)
def test_protocol_usage(self, filepath, instance_type):
data_set = JSONDataSet(filepath=filepath)
assert isinstance(data_set._fs, instance_type)
assert str(data_set._filepath) == data_set._fs._strip_protocol(filepath)
assert isinstance(data_set._filepath, PurePosixPath)
def test_path_or_store_write_to_bucket(self):
path, _, _ = _get_path_or_store('http://obs.eu-de.otc.t-systems.com/fake_bucket/fake_cube.zarr',
mode='write',
client_kwargs={'aws_access_key_id': 'some_fake_id',
'aws_secret_access_key': 'some_fake_key'})
self.assertIsInstance(path, fsspec.mapping.FSMap)
def test_path_or_store_read_from_bucket(self):
path, _, _ = _get_path_or_store('http://obs.eu-de.otc.t-systems.com/dcs4cop-obs-02/OLCI-SNS-RAW-CUBE-2.zarr',
mode='read')
self.assertIsInstance(path, fsspec.mapping.FSMap)
def _load(self):
# initial: find cat files
# if flattening, need to get all entries from each.
self._entries.clear()
options = self.storage_options or {}
if isinstance(self.path, (list, tuple)):
files = sum([open_files(p, mode='rb', **options)
for p in self.path], [])
self.name = self.name or "%i files" % len(files)
self.description = self.description or f'Catalog generated from {len(files)} files'
self.path = [make_path_posix(p) for p in self.path]
else:
if isinstance(self.path, str) and '*' not in self.path:
self.path = self.path + '/*'
files = open_files(self.path, mode='rb', **options)
self.path = make_path_posix(self.path)
self.name = self.name or self.path
self.description = self.description or f'Catalog generated from all files found in {self.path}'
if not set(f.path for f in files) == set(
f.path for f in self._cat_files):
# glob changed, reload all
self._cat_files = files
self._cats.clear()
for f in files:
name = os.path.split(f.path)[-1].replace(
'.yaml', '').replace('.yml', '')
kwargs = self.kwargs.copy()
kwargs['path'] = f.path
d = make_path_posix(os.path.dirname(f.path))
if f.path not in self._cats:
entry = LocalCatalogEntry(name, "YAML file: %s" % name,
if not keywords:
raise NotThisMethod("no keywords at all, weird")
date = keywords.get("date")
if date is not None:
# git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
# datestamp. However we prefer "%ci" (which expands to an "ISO-8601
# -like" string, which we must then edit to make compliant), because
# it's been around since git-1.5.3, and it's too difficult to
# discover which version we're using, or to work around using an
# older one.
date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
refnames = keywords["refnames"].strip()
if refnames.startswith("$Format"):
if verbose:
print("keywords are unexpanded, not using")
raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
refs = set([r.strip() for r in refnames.strip("()").split(",")])
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
# just "foo-1.0". If we see a "tag: " prefix, prefer those.
TAG = "tag: "
tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)])
if not tags:
# Either we're using git < 1.8.3, or there really are no tags. We use
# a heuristic: assume all version tags have a digit. The old git %d
# expansion behaves like git log --decorate=short and strips out the
# refs/heads/ and refs/tags/ prefixes that would let us distinguish
# between branches and tags. By ignoring refnames without digits, we
# filter out many common branch names like "release" and
# "stabilization", as well as "HEAD" and "master".
tags = set([r for r in refs if re.search(r"\d", r)])
if verbose:
print("discarding '%s', no digits" % ",".join(refs - tags))
def git_versions_from_keywords(keywords, tag_prefix, verbose):
"""Get version information from git keywords."""
if not keywords:
raise NotThisMethod("no keywords at all, weird")
date = keywords.get("date")
if date is not None:
# git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
# datestamp. However we prefer "%ci" (which expands to an "ISO-8601
# -like" string, which we must then edit to make compliant), because
# it's been around since git-1.5.3, and it's too difficult to
# discover which version we're using, or to work around using an
# older one.
date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
refnames = keywords["refnames"].strip()
if refnames.startswith("$Format"):
if verbose:
print("keywords are unexpanded, not using")
raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
refs = set([r.strip() for r in refnames.strip("()").split(",")])
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
head["Accept-Encoding"] = "identity"
session = session or await get_client()
if size_policy == "head":
r = await session.head(url, allow_redirects=ar, **kwargs)
elif size_policy == "get":
r = await session.get(url, allow_redirects=ar, **kwargs)
else:
raise TypeError('size_policy must be "head" or "get", got %s' "" % size_policy)
async with r:
if "Content-Length" in r.headers:
return int(r.headers["Content-Length"])
elif "Content-Range" in r.headers:
return int(r.headers["Content-Range"].split("/")[1])
file_size = sync_wrapper(_file_size)
return out
x = re.compile(".*[^a-z]+.*") # test for non protocol-like single word
bits = (
[p if "://" in p or x.match(p) else p + "://" for p in path.split("::")]
if "::" in path
else [path]
)
if len(bits) < 2:
return []
# [[url, protocol, kwargs], ...]
out = []
previous_bit = None
previous_protocol = None
for bit in reversed(bits):
protocol = split_protocol(bit)[0] or "file"
cls = get_filesystem_class(protocol)
extra_kwargs = cls._get_kwargs_from_urls(bit)
kws = kwargs.get(split_protocol(bit)[0] or "file", {})
kw = dict(**extra_kwargs, **kws)
if (
protocol in {"blockcache", "filecache", "simplecache"}
and "target_protocol" not in kw
):
bit = previous_bit.replace(previous_protocol, protocol)
out.append((bit, protocol, kw))
previous_bit = bit
previous_protocol = protocol
out = list(reversed(out))
# We should only do the url rewrite if the cache is in the middle of the chain
if out[0][1] in {"blockcache", "filecache", "simplecache"}:
out[0] = (f"{out[0][1]}://", out[0][1], out[0][2])
return out
OpenFile(fs, path, compression=compression),
o,
l,
delimiter,
dask_key_name=key,
)
for o, key, l in zip(offset, keys, length)
]
out.append(values)
if sample:
if sample is True:
sample = "10 kiB" # backwards compatibility
if isinstance(sample, str):
sample = parse_bytes(sample)
with OpenFile(fs, paths[0], compression=compression) as f:
# read block without seek (because we start at zero)
if delimiter is None:
sample = f.read(sample)
else:
sample_buff = f.read(sample)
while True:
new = f.read(sample)
if not new:
break
if delimiter in new:
sample_buff = (
sample_buff + new.split(delimiter, 1)[0] + delimiter
)
break
sample_buff = sample_buff + new
sample = sample_buff
identifier
mode: str
normally "rb", "wb" or "ab"
"""
if mode in ["rb", "ab", "rb+"]:
if path in self.store:
f = self.store[path]
if mode == "rb":
f.seek(0)
else:
f.seek(0, 2)
return f
else:
raise FileNotFoundError(path)
if mode == "wb":
m = MemoryFile(self, path)
if not self._intrans:
m.commit()
return m