Dive into secure and efficient coding practices with our curated list of the top 10 examples showcasing 'statsmodels' in functional components in Python. Our advanced machine learning engine meticulously scans each line of code, cross-referencing millions of open source libraries to ensure your implementation is not just functional, but also robust and secure. Elevate your React applications to new heights by mastering the art of handling side effects, API calls, and asynchronous operations with confidence and precision.
def crossvalidate_formula(formula, data, response_column, k):
PRESS = 0
for i in range(k):
start = i * (len(data) // k)
end = (i + 1) * (len(data) // k) if i < k - 1 else len(data)
to_drop = data.index[start: end]
train = data.drop(to_drop)
test = data.loc[to_drop]
model = smf.ols(formula, train).fit()
pred = model.predict(test)
residuals = test[response_column] - pred
PRESS += (residuals ** 2).sum()
response = data[response_column]
Q2 = 1 - PRESS / ((response - response.mean()) ** 2).sum()
return Q2
candidate_split = best_candidate.split(sep=":")
if len(candidate_split) == 2:
if candidate_split[0] not in selected and candidate_split[0] in remaining:
remaining.remove(candidate_split[0])
selected.append(candidate_split[0])
maxvars += 1
if candidate_split[1] not in selected and candidate_split[1] in remaining:
remaining.remove(candidate_split[1])
selected.append(candidate_split[1])
maxvars += 1
remaining.remove(best_candidate)
selected.append(best_candidate)
current_score = best_new_score
formula = "{} ~ {} + 1".format(response,
' + '.join(selected))
model = smf.ols(formula, data).fit()
return model
def _get_best_configs(self, parameters, results, configs, lower_is_better,
alpha=0.05):
"""
Implements the testing procedure itself and returns the reduced set
of parameter configurations.
"""
df = self._prep_df_for_linreg(parameters, results,
configs, lower_is_better)
l = 1
h = df.Rank.max()
p = h
while l != h:
lm = ols('Objective ~ C(Rank)', data=df.loc[df.Rank <= p, :]).fit()
p_value = sm.stats.anova_lm(lm, typ=2).loc[:, "PR(>F)"].ix["C(Rank)"]
reject = p_value < alpha
if reject:
h = p - 1
else:
l = p
p = math.ceil((l + h) / 2)
return df.loc[df.Rank <= p, :].loc[:,
[p.name for p in parameters]].drop_duplicates().to_dict(
'records')
estimator=lambda y,x: OLS(y,x).fit().params,
nboot=399)
# create lagmat of both time series
dta = lagmat2ds(x, mxlg, trim='both')
dta = np.delete(dta, -1, axis = 1) # removal of the not lagged xs
#add constant
if addconst:
dtaown = add_constant(dta[:, 1:(mxlg + 1)], prepend=False)
dtajoint = add_constant(dta[:, 1:], prepend=False)
else:
raise NotImplementedError('Not Implemented')
#dtaown = dta[:, 1:mxlg]
#dtajoint = dta[:, 1:]
# Run ols on both models without and with lags of second variable
res2down = OLS(dta[:, 0], dtaown).fit()
res2djoint = OLS(dta[:, 0], dtajoint).fit()
#print results
#for ssr based tests see:
#http://support.sas.com/rnd/app/examples/ets/granger/index.htm
#the other tests are made-up
# Granger Causality test using ssr (F statistic)
fgc1 = ((res2down.ssr - res2djoint.ssr) /
res2djoint.ssr / mxlg * res2djoint.df_resid)
if verbose:
print('ssr based F test: F=%-8.4f, p=%-8.4f, df_denom=%d,'
' df_num=%d' % (fgc1,
stats.f.sf(fgc1, mxlg,
res2djoint.df_resid),
res2djoint.df_resid, mxlg))
method = 'bonferroni'
elif opts.multiTest == 'Holm':
method = 'holm'
elif opts.multiTest == 'Hochberg':
method = 'simes-hochberg'
elif opts.multiTest == 'Hommel':
method = 'hommel'
elif opts.multiTest == 'BY':
method = 'fdr_by'
elif opts.multiTest == 'TSBH':
method = 'tsbh'
else:
sys.stderr.write('ERROR: The methods for multiple test correction can only accept \'Bonferroni\', \'Holm\', \'Hochberg\', \'Hommel\', \'BH\', \'BY\' or \'TSBH\' as its input.\n')
sys.exit()
mtc = sms.stats.multicomp.multipletests(pval[idx], alpha=0.1, method=method, returnsorted=False)
padj = pval.copy()
padj[idx] = mtc[1]
data.padj = padj
return data
print '\r%i genes finished.' % num
if opts.dispDiff and np.isnan(data.dispAdjRibo[i]):
continue
if not opts.dispDiff and np.isnan(data.dispAdj[i]):
continue
response = np.hstack([data.countRibo[i, :], data.countRna[i, :]])
if opts.dispDiff:
disp = np.hstack([np.repeat(data.dispAdjRibo[i], lenSampleRibo), np.repeat(data.dispAdjRna[i], lenSampleRna)])
else:
disp = data.dispAdj[i]
try:
modNB0 = sm.GLM(response, explanatory0, family=sm.families.NegativeBinomial(alpha=disp), offset=np.log(librarySizes))
modNB1 = sm.GLM(response, explanatory1, family=sm.families.NegativeBinomial(alpha=disp), offset=np.log(librarySizes))
result0 = modNB0.fit()
result1 = modNB1.fit()
except sm.tools.sm_exceptions.PerfectSeparationError:
errorCnt += 1
else:
if not opts.dispDiff:
pval[i] = 1 - chi2.cdf(result0.deviance - result1.deviance, explanatory1.shape[1] - explanatory0.shape[1])
elif opts.dispDiff:
pval[i] = 1 - chi2.cdf(result0.deviance - result1.deviance, (explanatory1.shape[1] - explanatory0.shape[1]) / 2.5)
else:
pass
data.pval = pval
sys.stdout.write('Warning: Failed to do test: %i genes. P value set to \'nan\'.\n' % errorCnt)
n_samples, n_features = train.shape
print "n_samples:", n_samples, "n_features:", n_features
print "histogram of target"
print sp.histogram(target,3)
print "len(train):", len(train)
print "len(target):", len(target)
print "dataset shape:", dataset.shape
if family!='gaussian':
raise Exception("Only have gaussian logistic for scipy")
# train the classifier
gauss_log = sm_api.GLM(target, train, family=sm_api.families.Gaussian(sm_api.families.links.log))
start = time.time()
gauss_log_results = gauss_log.fit()
print "sm_api.GLM took", time.time() - start, "seconds"
print gauss_log_results.summary()
def __init__(self, y, x, z, data, alpha):
self.regression = sm.RLM(data[y], data[x+z])
self.result = self.regression.fit()
self.coefficient = self.result.params[x][0]
confidence_interval = self.result.conf_int(alpha=alpha/2.)
self.upper = confidence_interval[1][x][0]
self.lower = confidence_interval[0][x][0]
try:
df = pd.concat([df, pd.read_csv(csvfile)])
except FileNotFoundError:
pass
if len(df['pivot']) > 0:
print("selective:", np.mean(df['pivot']), np.std(df['pivot']), np.mean(df['length']), np.std(df['length']), np.mean(df['coverage']))
print("naive:", np.mean(df['naive_pivot']), np.std(df['naive_pivot']), np.mean(df['naive_length']), np.std(df['naive_length']), np.mean(df['naive_coverage']))
print("len ratio selective divided by naive:", np.mean(np.array(df['length']) / np.array(df['naive_length'])))
plt.clf()
U = np.linspace(0, 1, 101)
plt.plot(U, sm.distributions.ECDF(df['pivot'])(U), 'r', label='Selective', linewidth=3)
plt.plot(U, sm.distributions.ECDF(df['naive_pivot'])(U), 'b', label='Naive', linewidth=3)
plt.legend()
plt.plot([0,1], [0,1], 'k--', linewidth=2)
plt.savefig(csvfile[:-4] + '.pdf')
plt.clf()
plt.scatter(df['naive_length'], df['length'])
plt.savefig(csvfile[:-4] + '_lengths.pdf')
df.to_csv(csvfile, index=False)