Top 10 Examples of "nltk in functional component" in Python

Dive into secure and efficient coding practices with our curated list of the top 10 examples showcasing 'nltk' in functional components in Python. Our advanced machine learning engine meticulously scans each line of code, cross-referencing millions of open source libraries to ensure your implementation is not just functional, but also robust and secure. Elevate your React applications to new heights by mastering the art of handling side effects, API calls, and asynchronous operations with confidence and precision.

nltk/uhh-lt/path2vec/wsd/graph_wsd_test_v2.py View on Github

def convert_to_wordnet_pos(senseval_pos):
    if senseval_pos == 'VERB':
        return wn.VERB
    elif senseval_pos == 'NOUN':
        return wn.NOUN
    elif senseval_pos == 'ADV':
        return wn.ADV
    elif senseval_pos == 'ADJ':
        return wn.ADJ
    else:
        return None

nltk/relwell/corenlp-xml-lib/test/test_document.py View on Github

def test_subtrees_for_phrase(self):
        t = self._sentence.subtrees_for_phrase("NP")[0]
        self.assertIsInstance(t, Tree)
        self.assertEquals("property", t[-1].leaves()[0])

nltk/hoaxanalyzer/hoax-search-vote/extractor/preprocessor.py View on Github

def preprocess(text):
    text = text.encode('utf-8').decode("ascii", "replace").replace(u"\ufffd", "_").replace("___", "'").replace("'s", " ").replace("``", " ").replace("''", " ").replace("_", " ").replace("'"," ").replace("`"," ")
    text = re.sub("[^0-9a-zA-Z !\"/:;&lt;=&gt;?.,!@#$%^&amp;-_|()']+", " ", text)
    tokens = text.split(" ")
    result = ""
    for token in tokens:
        word = token.split(" ")[0]
        if word not in stopwords.words('english') and token not in punctuations and token not in hoax_stopwords:
            if len(word) &gt; 0:
                if word.isupper() and dictionary.check(word.lower()):
                    new_token = lemmatizer.lemmatize(token.lower())
                    if new_token == token.lower():
                        new_token = lemmatizer.lemmatize(token.lower(), pos='v')
                    result += new_token + " "
                elif word.isupper():
                    result += token.title() + " "
                elif dictionary.check(word.lower()):
                    new_token = lemmatizer.lemmatize(token.lower())
                    if new_token == token.lower():
                        new_token = lemmatizer.lemmatize(token.lower(), pos='v')
                    result += new_token + " "
                else:
                    result += token + " "
            else:

nltk/hoaxanalyzer/hoax-search-vote/backup/newtfidfold.py View on Github

def create_dic(self, documents):	
		texts = [[word for word in document.lower().split() if word not in stopwords.words('english')]
		         for document in documents]

		from collections import defaultdict
		frequency = defaultdict(int)
		for text in texts:
		    for token in text:
		        frequency[token] += 1
		texts = [[token for token in text if frequency[token] > 1]
		         for text in texts]

		dictionary = corpora.Dictionary(texts)
		corpus = [dictionary.doc2bow(text) for text in texts]
		return [dictionary, corpus]

nltk/ParasAvkirkar/-Competitive-Coding-Problem-Classifier-and-Recommender/Data Extraction/codechef/problems.py View on Github

def create_word_features(self, words):
		# print words
		w = []
		for line in words:
			for wrd in line.split():
				w.append(wrd)
		useful_words = [word for word in w if word not in
                        stopwords.words('english')]
		my_dict = ' '.join([word for word in useful_words])
		# print my_dict
		return my_dict

nltk/HsuWanTing/unified-summarization/old_code/sentence-selector-pg/cluster.py View on Github

def cluster_texts(texts, clusters=3):
    """ Transform texts to Tf-Idf coordinates and cluster texts using K-Means """
    vectorizer = TfidfVectorizer(tokenizer=process_text,
                                 stop_words=stopwords.words('english'),
                                 max_df=1.0,
                                 min_df=1,
                                 lowercase=True)

    tfidf_model = vectorizer.fit_transform(texts)
    km_model = KMeans(n_clusters=clusters, n_init=100, verbose=0, tol=1e-10)
    km_model.fit(tfidf_model)
    #print 'inertia: ', km_model.inertia_
    #pdb.set_trace()
 
    clustering = collections.defaultdict(list)
 
    for idx, label in enumerate(km_model.labels_):
        clustering[label].append(idx)
 
    return clustering

nltk/chenyangh/DialogueGenerationWithEmotion/emotion_tagger_keras.py View on Github

def text_to_wordlist(text, remove_stopwords=False, stem_words=False):
    # Clean the text, with the option to remove stopwords and to stem words.

    # Convert words to lower case and split them
    text = text.lower().split()

    # Optionally, remove stop words
    if remove_stopwords:
        stops = set(stopwords.words("english"))
        text = [w for w in text if not w in stops]

    text = " ".join(text)

    # Clean the text
    text = re.sub(r"[^A-Za-z0-9^,!.\/'+-=]", " ", text)
    text = re.sub(r"what's", "what is ", text)
    text = re.sub(r"\'s", " ", text)
    text = re.sub(r"\'ve", " have ", text)
    text = re.sub(r"can't", "cannot ", text)
    text = re.sub(r"n't", " not ", text)
    text = re.sub(r"i'm", "i am ", text)
    text = re.sub(r"\'re", " are ", text)
    text = re.sub(r"\'d", " would ", text)
    text = re.sub(r"\'ll", " will ", text)
    text = re.sub(r",", " ", text)

nltk/pratik008/HealthCare_Twitter_Analysis/Archive 1/consolidated_scripts/cleanup_scripts/textProc.py View on Github

def isStopword(text):
    filtered_words = []
    splitsent = text.split(' ')
    for w in splitsent:
        if w in  stopwords.words('english'):
            return 'Y'
        else:
            return 'N'

nltk/pshah123/neural-rewriter/rewriter/skipthoughts.py View on Github

def preprocess(text):
    """
    Preprocess text for encoder
    """
    X = []
    sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
    console.log("Loaded NLTK data")
    for t in text:
        sents = sent_detector.tokenize(t)
        result = ''
        for s in sents:
            tokens = word_tokenize(s)
            result += ' ' + ' '.join(tokens)
        X.append(result)
    return X

nltk/WolfNiu/AdversarialDialogue/src/basic/adversary.py View on Github

def _build_para_dict(self):
        path = "data/ppdb-2.0-s-all"
        lines = read_lines(path)
        relations = [line.split(" ||| ")[-1] for line in lines]
        equivalent_pairs = []
        print("Preprocessing raw data...")
        for line in tqdm(lines):
            split = line.split(" ||| ")    
            if split[-1] == "Equivalence":
                equivalent_pairs.append(tuple(split[1:3]))        

        paraphrase_pairs = [line.split(" ||| ")[1:3] for line in lines]
        equivalent_pairs_ubuntu = []
        print("Extracting paraphrase pairs...")
        for pair in tqdm(equivalent_pairs):
            tokens_0 = word_tokenize(pair[0]) 
            tokens_1 = word_tokenize(pair[1])
            if not (self._contains_unknown(tokens_0) or self._contains_unknown(tokens_1)):
                equivalent_pairs_ubuntu.append(
                    (tokens_0, tokens_1))
        
        # Insert paraphrases in both directions
        print("Building dictionary...")
        self.paraphrase_dict = {}
        for (p0, p1) in tqdm(equivalent_pairs_ubuntu):
            p0 = tuple(p0)
            p1 = tuple(p1)
            try:
                self.paraphrase_dict[p0] = self.paraphrase_dict[p0] + [p1]
            except:
                self.paraphrase_dict[p0] = [p1]

Top 10 Examples of "nltk in functional component" in Python

nltk/uhh-lt/path2vec/wsd/graph_wsd_test_v2.py View on Github

nltk/relwell/corenlp-xml-lib/test/test_document.py View on Github

nltk/hoaxanalyzer/hoax-search-vote/extractor/preprocessor.py View on Github

nltk/hoaxanalyzer/hoax-search-vote/backup/newtfidfold.py View on Github

nltk/ParasAvkirkar/-Competitive-Coding-Problem-Classifier-and-Recommender/Data Extraction/codechef/problems.py View on Github

nltk/HsuWanTing/unified-summarization/old_code/sentence-selector-pg/cluster.py View on Github

nltk/chenyangh/DialogueGenerationWithEmotion/emotion_tagger_keras.py View on Github

nltk/pratik008/HealthCare_Twitter_Analysis/Archive 1/consolidated_scripts/cleanup_scripts/textProc.py View on Github

nltk/pshah123/neural-rewriter/rewriter/skipthoughts.py View on Github

nltk/WolfNiu/AdversarialDialogue/src/basic/adversary.py View on Github

People also search

Is your System Free of Underlying Vulnerabilities?
Find Out Now

Top 10 Examples of "nltk in functional component" in Python

People also search

Is your System Free of Underlying Vulnerabilities? Find Out Now

Is your System Free of Underlying Vulnerabilities?
Find Out Now