{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from pathlib import Path as pp\n", "from pandas.core.frame import DataFrame as df\n", "import os\n", "\n", "# reading dataset files\n", "right_dir: pp = pp(\"../data/right\")\n", "wrong_dir: pp = pp(\"../data/wrong\")\n", "\n", "# dataframes\n", "right_df : df = df()\n", "wrong_df : df = df()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# merging right datafiles into a dataframe\n", "rdf_list = []\n", "for file in os.scandir(right_dir.absolute()):\n", " if os.path.exists(file):\n", " rdf_list.append(pd.read_csv(file))\n", "right_df = pd.concat(rdf_list)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | index | \n", "content | \n", "
---|---|---|
0 | \n", "0 | \n", "Feeling annoyed? Follow this thread | \n", "
1 | \n", "1 | \n", "Thread, why you should vote for BJP. #PhirEKBa... | \n", "
2 | \n", "2 | \n", "The biggest festival of democracy is here! EC ... | \n", "
3 | \n", "3 | \n", "Slogan competition \\nShare your slogan idea ( ... | \n", "
4 | \n", "4 | \n", "4 जून की करो तैयारी,\\n\\nआ रहे हैं भगवाधारी.... | \n", "
... | \n", "... | \n", "... | \n", "
92 | \n", "92 | \n", "मैं बता रहा हूँ, बेंगलुरू मामले में भी बहुत ते... | \n", "
93 | \n", "93 | \n", "ईरान में करीब 1200 भारतीय फंसे हैं। 800 छात्र ... | \n", "
94 | \n", "94 | \n", "वैसे तो TikTok ने PMCares फंड में LAC पर चीन स... | \n", "
95 | \n", "95 | \n", "बंगाल में भीड़ से खचाखच भरी रैलियां हो सकती है... | \n", "
96 | \n", "96 | \n", "Thank You PM \\n@narendramodi\\n ji for follow b... | \n", "
1675 rows × 2 columns
\n", "