{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Pokemon Attack Scraping Script" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import requests\n", "from bs4 import BeautifulSoup\n", "\n", "\n", "\n", "\n", "physical_moves = \"https://www.serebii.net/attackdex-swsh/physical.shtml\" \n", "special_moves = \"https://www.serebii.net/attackdex-swsh/special.shtml\"\n", "status_moves = \"https://www.serebii.net/attackdex-swsh/other.shtml\"" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "\n", "\n", "data = requests.get(physical_moves)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "my_data = []\n", " #https://www.kite.com/python/examples/4420/beautifulsoup-parse-an-html-table-and-write-to-a-csv\n", "html = BeautifulSoup(data.text, 'html.parser')\n" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [], "source": [ "\n", "soup = html\n", "table = soup.find_all(\"table\")[1]\n", "\n", "output_rows = []\n", "for table_row in table.findAll('tr'):\n", " columns = table_row.findAll('td')\n", " output_row = []\n", " for column in columns:\n", " output_row.append(column.text.strip())\n", " output_rows.append(output_row)\n", "\n" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [], "source": [ "moves = pd.DataFrame(output_rows)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [], "source": [ "moves.columns = moves.iloc[0]" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [], "source": [ "moves = moves[1:]" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [], "source": [ "moves = moves[moves.Effect.apply(lambda x: \"This move can't be used\" not in x)]" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Name | \n", "Type | \n", "Cat. | \n", "PP | \n", "Att. | \n", "Acc. | \n", "Effect | \n", "
---|---|---|---|---|---|---|---|
1 | \n", "Accelerock | \n", "\n", " | \n", " | 20 | \n", "40 | \n", "100 | \n", "The user smashes into the target at high speed... | \n", "
3 | \n", "Acrobatics | \n", "\n", " | \n", " | 15 | \n", "55 | \n", "100 | \n", "The user nimbly strikes the target. If the use... | \n", "
4 | \n", "Aerial Ace | \n", "\n", " | \n", " | 20 | \n", "60 | \n", "101 | \n", "The user confounds the target with speed, then... | \n", "
6 | \n", "Anchor Shot | \n", "\n", " | \n", " | 20 | \n", "80 | \n", "100 | \n", "The user entangles the target with its anchor ... | \n", "
7 | \n", "Aqua Jet | \n", "\n", " | \n", " | 20 | \n", "40 | \n", "100 | \n", "The user lunges at the target at a speed that ... | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
381 | \n", "Wood Hammer | \n", "\n", " | \n", " | 15 | \n", "120 | \n", "100 | \n", "The user slams its rugged body into the target... | \n", "
382 | \n", "Wrap | \n", "\n", " | \n", " | 20 | \n", "15 | \n", "90 | \n", "A long body, vines, or the like are used to wr... | \n", "
383 | \n", "X-Scissor | \n", "\n", " | \n", " | 15 | \n", "80 | \n", "100 | \n", "The user slashes at the target by crossing its... | \n", "
384 | \n", "Zen Headbutt | \n", "\n", " | \n", " | 15 | \n", "80 | \n", "90 | \n", "The user focuses its willpower to its head and... | \n", "
385 | \n", "Zing Zap | \n", "\n", " | \n", " | 10 | \n", "80 | \n", "100 | \n", "A strong electric blast crashes down on the ta... | \n", "
322 rows × 7 columns
\n", "