Dive into secure and efficient coding practices with our curated list of the top 10 examples showcasing 'dask' in functional components in Python. Our advanced machine learning engine meticulously scans each line of code, cross-referencing millions of open source libraries to ensure your implementation is not just functional, but also robust and secure. Elevate your React applications to new heights by mastering the art of handling side effects, API calls, and asynchronous operations with confidence and precision.
def test_pca_inverse():
# Test that the projection of data can be inverted
rng = np.random.RandomState(0)
n, p = 50, 3
X = rng.randn(n, p) # spherical data
X[:, 1] *= 0.00001 # make middle component relatively small
X += [5, 4, 3] # make a large mean
dX = da.from_array(X, chunks=(n // 2, p))
# same check that we can find the original data from the transformed
# signal (since the data is almost of rank n_components)
pca = dd.PCA(n_components=2, svd_solver="full").fit(dX)
Y = pca.transform(dX)
Y_inverse = pca.inverse_transform(Y)
assert_almost_equal(X, Y_inverse, decimal=3)
# same as above with whitening (approximate reconstruction)
for solver in solver_list:
pca = dd.PCA(n_components=2, whiten=True, svd_solver=solver)
pca.fit(dX)
Y = pca.transform(dX)
Y_inverse = pca.inverse_transform(Y)
assert_eq(dX, Y_inverse, atol=1e-3)
dfParts = []
chunkSize = min(self.CHUNK_SIZE, self.nEvents / self.ncores)
nPartitions = int(self.nEvents // chunkSize) + 1
# Determine the column names
gNames = kwds.pop('groupnames', self.getGroupNames(wexpr='Stream'))
colNames = self.name2alias(gNames)
for p in range(nPartitions): # Generate partitioned dataframe
# Calculate the starting and ending index of every chunk of events
eventIDStart = int(p * chunkSize)
eventIDEnd = int(min(eventIDStart + chunkSize, self.nEvents))
dfParts.append(d.delayed(self._assembleGroups)(gNames, amin=eventIDStart, amax=eventIDEnd, **kwds))
# Construct eda (event dask array) and edf (event dask dataframe)
eda = da.from_array(np.concatenate(d.compute(*dfParts), axis=1).T, chunks=self.CHUNK_SIZE)
self.edf = ddf.from_dask_array(eda, columns=colNames)
if ret == True:
return self.edf
# Delayed array for loading an HDF5 file of reasonable size (e.g. < 1GB)
elif form == 'darray':
gNames = kwds.pop('groupnames', self.getGroupNames(wexpr='Stream'))
darray = d.delayed(self._assembleGroups)(gNames, amin=None, amax=None, timeStamps=timeStamps, ret='array', **kwds)
if ret == True:
return darray
def _check_inputs(self, X, accept_sparse_negative=False, copy=False):
kwargs = {}
if SK_022:
kwargs["copy"] = copy
if isinstance(X, (pd.DataFrame, dd.DataFrame)):
X = X.values
if isinstance(X, np.ndarray):
C = len(X) // min(multiprocessing.cpu_count(), 2)
X = da.from_array(X, chunks=C)
rng = check_random_state(self.random_state)
# TODO: non-float dtypes?
# TODO: sparse arrays?
# TODO: mix of sparse, dense?
sample = rng.uniform(size=(5, X.shape[1])).astype(X.dtype)
super(QuantileTransformer, self)._check_inputs(
sample, accept_sparse_negative=accept_sparse_negative, **kwargs
)
return X
Optimizes chunk size in different orientations to facilitate rapid
screening of algorithm output
Returns
-------
darray : Dask Array
chunk_init : tuple (len 3), chunk size before ghosting. Used in select cases
"""
# Compute chunk size and convert if not a Dask Array
if not isinstance(darray, da.core.Array):
chunk_size = util.compute_chunk_size(darray.shape,
darray.dtype.itemsize,
kernel=kernel,
preview=preview)
darray = da.from_array(darray, chunks=chunk_size)
chunks_init = darray.chunks
else:
chunks_init = darray.chunks
# Ghost Dask Array if operation specifies a kernel
if kernel != None:
hw = tuple(np.array(kernel) // 2)
darray = da.ghost.ghost(darray, depth=hw, boundary='reflect')
return(darray, chunks_init)
def test_incremental_basic(scheduler, dataframes):
# Create observations that we know linear models can recover
n, d = 100, 3
rng = da.random.RandomState(42)
X = rng.normal(size=(n, d), chunks=30)
coef_star = rng.uniform(size=d, chunks=d)
y = da.sign(X.dot(coef_star))
y = (y + 1) / 2
if dataframes:
X = dd.from_array(X)
y = dd.from_array(y)
with scheduler() as (s, [_, _]):
est1 = SGDClassifier(random_state=0, tol=1e-3, average=True)
est2 = clone(est1)
clf = Incremental(est1, random_state=0)
result = clf.fit(X, y, classes=[0, 1])
assert result is clf
def test_basic(self, output_distribution):
rs = da.random.RandomState(0)
a = dpp.QuantileTransformer(output_distribution=output_distribution)
b = spp.QuantileTransformer(output_distribution=output_distribution)
X = rs.uniform(size=(1000, 3), chunks=50)
a.fit(X)
b.fit(X)
assert_estimator_equal(a, b, atol=0.02)
# set the quantiles, so that from here out, we're exact
a.quantiles_ = b.quantiles_
assert_eq_ar(a.transform(X), b.transform(X), atol=1e-7)
assert_eq_ar(X, a.inverse_transform(a.transform(X)))
def test_fit_shuffle_blocks():
N = 10
X = da.from_array(1 + np.arange(N).reshape(-1, 1), chunks=1)
y = da.from_array(np.ones(N), chunks=1)
classes = [0, 1]
sgd = SGDClassifier(
max_iter=5, random_state=0, fit_intercept=False, shuffle=False, tol=1e-3
)
sgd1 = fit(clone(sgd), X, y, random_state=0, classes=classes)
sgd2 = fit(clone(sgd), X, y, random_state=42, classes=classes)
assert len(sgd1.coef_) == len(sgd2.coef_) == 1
assert not np.allclose(sgd1.coef_, sgd2.coef_)
X, y = make_classification(random_state=0, chunks=20)
sgd_a = fit(clone(sgd), X, y, random_state=0, classes=classes, shuffle_blocks=False)
sgd_b = fit(
clone(sgd), X, y, random_state=42, classes=classes, shuffle_blocks=False
def huge_2d_array():
array = np.vstack(1000 * [np.arange(0, 1000)])
return da.from_array(array, chunks=(500, 500))
def test_dask_dataframe(self):
with LocalCUDACluster() as cluster:
with Client(cluster) as client:
X, y = generate_array()
X = dd.from_dask_array(X)
y = dd.from_dask_array(y)
X = X.map_partitions(cudf.from_pandas)
y = y.map_partitions(cudf.from_pandas)
dtrain = dxgb.DaskDMatrix(client, X, y)
out = dxgb.train(client, {'tree_method': 'gpu_hist'},
dtrain=dtrain,
evals=[(dtrain, 'X')],
num_boost_round=2)
assert isinstance(out['booster'], dxgb.Booster)
assert len(out['history']['X']['rmse']) == 2
predictions = dxgb.predict(client, out, dtrain).compute()
assert isinstance(predictions, np.ndarray)
def test_example(query, expected, model):
if model == 'dask':
sc = {k: dd.from_pandas(df, npartitions=3) for k, df in scope.items()}
actual = fq.execute(query, scope=sc, model=model)
actual = actual.compute()
else:
actual = fq.execute(query, scope=scope, model=model)
expected = expected()
# set empty columns in expected to the ones in actual
expected.columns = [e or a for a, e in zip(actual.columns, expected.columns)]
actual = actual.reset_index(drop=True)
expected = actual.reset_index(drop=True)
pdt.assert_frame_equal(actual, expected, check_dtype=False)