"
],
"text/plain": [
" Token Tag Pos Description\n",
"0 example NN NOUN noun, singular or mass\n",
"1 text NN NOUN noun, singular or mass\n",
"2 Singapore NNP PROPN noun, proper singular\n",
"3 Sunil NNP PROPN noun, proper singular\n",
"4 Singh NNP PROPN noun, proper singular\n",
"5 6th JJ ADJ adjective (English), other noun-modifier (Chin...\n",
"6 August NNP PROPN noun, proper singular\n",
"7 2023 CD NUM cardinal number"
]
},
"execution_count": 100,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"word, tag, pos, desc = [], [], [], []\n",
"for token in doc:\n",
" if token.is_stop or token.is_punct:\n",
" continue\n",
" word.append(str(token))\n",
" tag.append(str(token.tag_))\n",
" pos.append(token.pos_)\n",
" desc.append(spacy.explain(token.tag_))\n",
"pd.DataFrame(data=dict(Token=word, Tag=tag, Pos=pos, Description=desc))\n"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"from sumy.parsers.plaintext import PlaintextParser\n",
"from sumy.nlp.tokenizers import Tokenizer\n",
"from sumy.summarizers.text_rank import TextRankSummarizer\n",
"from sumy.summarizers.lex_rank import LexRankSummarizer\n",
"from sumy.summarizers.lsa import LsaSummarizer\n",
"from dataclasses import dataclass\n",
"@dataclass\n",
"class __AppConfig:\n",
" \"\"\"app-wide configurations\"\"\"\n",
" summarizers = dict(\n",
" TextRankSummarizer=\"sumy.summarizers.text_rank\",\n",
" LexRankSummarizer=\"sumy.summarizers.lex_rank\",\n",
" LsaSummarizer=\"sumy.summarizers.lsa\",\n",
" )\n",
"### make configs available to any module that imports this module\n",
"app_config = __AppConfig()"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [],
"source": [
"def class_from_name(module, class_name):\n",
" return getattr(module, class_name)"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [],
"source": [
"method=\"TextRankSummarizer\"\n",
"def get_summarizer(method):\n",
" module=sys.modules[app_config.summarizers.get(method)]\n",
" summarizer = class_from_name(module, method)\n",
" return summarizer"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
"outputs": [],
"source": [
"text = \"\"\"Interstellar is a 2014 epic science fiction film co-written, directed, and produced by Christopher Nolan. It stars Matthew McConaughey, Anne Hathaway, Jessica Chastain, Bill Irwin, Ellen Burstyn, Matt Damon, and Michael Caine. Set in a dystopian future where humanity is embroiled in a catastrophic blight and famine, the film follows a group of astronauts who travel through a wormhole near Saturn in search of a new home for humankind.\n",
"Brothers Christopher and Jonathan Nolan wrote the screenplay, which had its origins in a script Jonathan developed in 2007 and was originally set to be directed by Steven Spielberg. Kip Thorne, a Caltech theoretical physicist and 2017 Nobel laureate in Physics,[4] was an executive producer, acted as a scientific consultant, and wrote a tie-in book, The Science of Interstellar. Cinematographer Hoyte van Hoytema shot it on 35 mm movie film in the Panavision anamorphic format and IMAX 70 mm. Principal photography began in late 2013 and took place in Alberta, Iceland, and Los Angeles. Interstellar uses extensive practical and miniature effects, and the company Double Negative created additional digital effects.\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 109,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"parser = PlaintextParser.from_string(text, Tokenizer(\"english\"))\n",
"parser"
]
},
{
"cell_type": "code",
"execution_count": 117,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"8"
]
},
"execution_count": 117,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"parser.document.sentences"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "nlp",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}