Source code for wrappers.spacy_wrapper

from typing import List

import spacy


[docs]class SpacyWrapper: """ Wrapper object that load SpaCy module and helps use it. """ def __init__(self, spacy_module: str) -> None: try: self._nlp = spacy.load(spacy_module) except OSError: spacy.cli.download(spacy_module) self._nlp = spacy.load(spacy_module)
[docs] def sentence_tokenizer(self, text: str) -> List[str]: """ Tokenize (split) text in sentences. for example: sentence_tokenizer("Hello, world. Here are two sentences.") will output: ['Hello, world.', 'Here are two sentences.'] :param text: raw text to split into sentences :return: list of strings, each string is a sentence. """ doc = self._nlp(text) return [sent.string.strip() for sent in doc.sents]