Dive into secure and efficient coding practices with our curated list of the top 10 examples showcasing 'spacy' in functional components in Python. Our advanced machine learning engine meticulously scans each line of code, cross-referencing millions of open source libraries to ensure your implementation is not just functional, but also robust and secure. Elevate your React applications to new heights by mastering the art of handling side effects, API calls, and asynchronous operations with confidence and precision.
optimizer.L2 = 0.0
learn_rates = cyclic_triangular_rate(
learn_rate / 3, learn_rate * 3, 2 * len(train_data) // batch_size
)
pbar = tqdm.tqdm(total=100, leave=False)
results = []
epoch = 0
step = 0
eval_every = 100
patience = 3
while True:
# Train and evaluate
losses = Counter()
random.shuffle(train_data)
batches = minibatch(train_data, size=batch_size)
for batch in batches:
optimizer.trf_lr = next(learn_rates)
texts, annotations = zip(*batch)
nlp.update(texts, annotations, sgd=optimizer, drop=0.1, losses=losses)
pbar.update(1)
if step and (step % eval_every) == 0:
pbar.close()
with nlp.use_params(optimizer.averages):
scores = evaluate_multiclass(nlp, eval_texts, eval_cats)
results.append((scores["textcat_acc"], step, epoch))
print(
"{0:.3f}\t{1:.3f}\t{2:.3f}\t{3:.3f}".format(
losses["trf_textcat"],
scores["textcat_acc"],
scores["textcat_cor"],
scores["textcat_wrg"],
def test_doc_retokenize_split_extension_attrs_invalid(en_vocab, underscore_attrs):
Token.set_extension("x", default=False, force=True)
Token.set_extension("a", getter=lambda x: x, force=True)
Token.set_extension("b", method=lambda x: x, force=True)
doc = Doc(en_vocab, words=["LosAngeles", "start"])
attrs = {"_": underscore_attrs}
with pytest.raises(ValueError):
with doc.retokenize() as retokenizer:
heads = [(doc[0], 1), doc[1]]
retokenizer.split(doc[0], ["Los", "Angeles"], heads, attrs=attrs)
""" Official evaluation script for v1.1 of the SQuAD dataset. """
from __future__ import print_function
from collections import Counter
import string
import re
import argparse
import json
import sys
import spacy
nlp = spacy.blank("en")
def word_tokenize(sent):
doc = nlp(sent)
return [token.text for token in doc]
def normalize_answer(s):
def remove_articles(text):
return re.sub(r'\b(a|an|the)\b', ' ', text)
def white_space_fix(text):
return ' '.join(text.split())
def remove_punc(text):
exclude = set(string.punctuation)
return ''.join(ch for ch in text if ch not in exclude)
def test_doc_retokenize_split_extension_attrs_invalid(en_vocab, underscore_attrs):
Token.set_extension("x", default=False, force=True)
Token.set_extension("a", getter=lambda x: x, force=True)
Token.set_extension("b", method=lambda x: x, force=True)
doc = Doc(en_vocab, words=["LosAngeles", "start"])
attrs = {"_": underscore_attrs}
with pytest.raises(ValueError):
with doc.retokenize() as retokenizer:
heads = [(doc[0], 1), doc[1]]
retokenizer.split(doc[0], ["Los", "Angeles"], heads, attrs=attrs)
text = "Wow 😀 This is really cool! 😂 😂"
doc = Doc(en_vocab, words=text.split(" "))
pos_emoji = ["😀", "😃", "😂", "🤣", "😊", "😍"]
pos_patterns = [[{"ORTH": emoji}] for emoji in pos_emoji]
def label_sentiment(matcher, doc, i, matches):
match_id, start, end = matches[i]
if doc.vocab.strings[match_id] == "HAPPY":
doc.sentiment += 0.1
span = doc[start:end]
with doc.retokenize() as retokenizer:
retokenizer.merge(span)
token = doc[start]
token.vocab[token.text].norm_ = "happy emoji"
matcher = Matcher(en_vocab)
matcher.add("HAPPY", label_sentiment, *pos_patterns)
matcher(doc)
assert doc.sentiment != 0
assert doc[1].norm_ == "happy emoji"
def test_issue1537():
"""Test that Span.as_doc() doesn't segfault."""
string = "The sky is blue . The man is pink . The dog is purple ."
doc = Doc(Vocab(), words=string.split())
doc[0].sent_start = True
for word in doc[1:]:
if word.nbor(-1).text == ".":
word.sent_start = True
else:
word.sent_start = False
sents = list(doc.sents)
sent0 = sents[0].as_doc()
sent1 = sents[1].as_doc()
assert isinstance(sent0, Doc)
assert isinstance(sent1, Doc)
def test_underscore_docstring(en_vocab):
"""Test that docstrings are available for extension methods, even though
they're partials."""
def test_method(doc, arg1=1, arg2=2):
"""I am a docstring"""
return (arg1, arg2)
Doc.set_extension("test_docstrings", method=test_method)
doc = Doc(en_vocab, words=["hello", "world"])
assert test_method.__doc__ == "I am a docstring"
assert doc._.test_docstrings.__doc__.rsplit(". ")[-1] == "I am a docstring"
def test_doc_retokenize_merge_extension_attrs(en_vocab):
Token.set_extension("a", default=False, force=True)
Token.set_extension("b", default="nothing", force=True)
doc = Doc(en_vocab, words=["hello", "world", "!"])
# Test regular merging
with doc.retokenize() as retokenizer:
attrs = {"lemma": "hello world", "_": {"a": True, "b": "1"}}
retokenizer.merge(doc[0:2], attrs=attrs)
assert doc[0].lemma_ == "hello world"
assert doc[0]._.a is True
assert doc[0]._.b == "1"
# Test bulk merging
doc = Doc(en_vocab, words=["hello", "world", "!", "!"])
with doc.retokenize() as retokenizer:
retokenizer.merge(doc[0:2], attrs={"_": {"a": True, "b": "1"}})
retokenizer.merge(doc[2:4], attrs={"_": {"a": None, "b": "2"}})
assert doc[0]._.a is True
assert doc[0]._.b == "1"
assert doc[1]._.a is None
assert doc[1]._.b == "2"
def test_doc_retokenizer_merge_lex_attrs(en_vocab):
"""Test that retokenization also sets attributes on the lexeme if they're
lexical attributes. For example, if a user sets IS_STOP, it should mean that
"all tokens with that lexeme" are marked as a stop word, so the ambiguity
here is acceptable. Also see #2390.
"""
# Test regular merging
doc = Doc(en_vocab, words=["hello", "world", "!"])
assert not any(t.is_stop for t in doc)
with doc.retokenize() as retokenizer:
retokenizer.merge(doc[0:2], attrs={"lemma": "hello world", "is_stop": True})
assert doc[0].lemma_ == "hello world"
assert doc[0].is_stop
# Test bulk merging
doc = Doc(en_vocab, words=["eins", "zwei", "!", "!"])
assert not any(t.like_num for t in doc)
assert not any(t.is_stop for t in doc)
with doc.retokenize() as retokenizer:
retokenizer.merge(doc[0:2], attrs={"like_num": True})
retokenizer.merge(doc[2:4], attrs={"is_stop": True})
assert doc[0].like_num
assert doc[1].is_stop
assert not doc[0].is_stop
assert not doc[1].like_num
def test_matcher_set_value_operator(en_vocab):
matcher = Matcher(en_vocab)
pattern = [{"ORTH": {"IN": ["a", "the"]}, "OP": "?"}, {"ORTH": "house"}]
matcher.add("DET_HOUSE", None, pattern)
doc = Doc(en_vocab, words=["In", "a", "house"])
matches = matcher(doc)
assert len(matches) == 2
doc = Doc(en_vocab, words=["my", "house"])
matches = matcher(doc)
assert len(matches) == 1