Spaces:
Runtime error
Runtime error
ziggycross
commited on
Commit
•
47cc2c7
1
Parent(s):
783d8d0
Added data loader from branch sean-csv-reader.
Browse files- loader-cleaner.ipynb +91 -0
loader-cleaner.ipynb
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import pandas as pd\n",
|
10 |
+
"import os"
|
11 |
+
]
|
12 |
+
},
|
13 |
+
{
|
14 |
+
"attachments": {},
|
15 |
+
"cell_type": "markdown",
|
16 |
+
"metadata": {},
|
17 |
+
"source": [
|
18 |
+
"### Load data from file into a pandas df"
|
19 |
+
]
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"cell_type": "code",
|
23 |
+
"execution_count": 2,
|
24 |
+
"metadata": {},
|
25 |
+
"outputs": [
|
26 |
+
{
|
27 |
+
"name": "stdout",
|
28 |
+
"output_type": "stream",
|
29 |
+
"text": [
|
30 |
+
"File 'hr.csv' loaded successfully. \n",
|
31 |
+
"Found 311 rows, 36 columns\n"
|
32 |
+
]
|
33 |
+
}
|
34 |
+
],
|
35 |
+
"source": [
|
36 |
+
"DATADIR=\"data/\"\n",
|
37 |
+
"FILENAME=None\n",
|
38 |
+
"\n",
|
39 |
+
"while FILENAME is None:\n",
|
40 |
+
" \n",
|
41 |
+
" file_candidate = input(\"Enter file name:\")\n",
|
42 |
+
" if file_candidate == \"\": break\n",
|
43 |
+
" \n",
|
44 |
+
" try:\n",
|
45 |
+
" print(f\"Assesing file '{file_candidate}'...\".ljust(120), end=\"\\r\")\n",
|
46 |
+
" file_path = DATADIR + file_candidate\n",
|
47 |
+
" extension = file_candidate.split(\".\")[-1] \n",
|
48 |
+
" match extension:\n",
|
49 |
+
" case \"csv\":\n",
|
50 |
+
" df = pd.read_csv(file_path)\n",
|
51 |
+
" case \"json\":\n",
|
52 |
+
" df = pd.read_json(file_path)\n",
|
53 |
+
" case \"xlsx\":\n",
|
54 |
+
" df = pd.read_excel(file_path)\n",
|
55 |
+
" case _:\n",
|
56 |
+
" print(f\"Error: Invalid extension '{extension}'\")\n",
|
57 |
+
" continue\n",
|
58 |
+
" print(f\"File '{file_candidate}' loaded successfully.\")\n",
|
59 |
+
" rows, columns = df.shape\n",
|
60 |
+
" print(f\"Found {rows} rows, {columns} columns\")\n",
|
61 |
+
" FILENAME = file_candidate\n",
|
62 |
+
" except FileNotFoundError:\n",
|
63 |
+
" print(f\"Error: '{file_candidate}' doesn't exist in {os.getcwd()}/{DATADIR}\")\n",
|
64 |
+
" except Exception as error:\n",
|
65 |
+
" print(f\"Error: Unable to read file '{file_candidate}' ({str(type(error))}: {error})\".ljust(120))"
|
66 |
+
]
|
67 |
+
}
|
68 |
+
],
|
69 |
+
"metadata": {
|
70 |
+
"kernelspec": {
|
71 |
+
"display_name": "Python 3",
|
72 |
+
"language": "python",
|
73 |
+
"name": "python3"
|
74 |
+
},
|
75 |
+
"language_info": {
|
76 |
+
"codemirror_mode": {
|
77 |
+
"name": "ipython",
|
78 |
+
"version": 3
|
79 |
+
},
|
80 |
+
"file_extension": ".py",
|
81 |
+
"mimetype": "text/x-python",
|
82 |
+
"name": "python",
|
83 |
+
"nbconvert_exporter": "python",
|
84 |
+
"pygments_lexer": "ipython3",
|
85 |
+
"version": "3.10.6"
|
86 |
+
},
|
87 |
+
"orig_nbformat": 4
|
88 |
+
},
|
89 |
+
"nbformat": 4,
|
90 |
+
"nbformat_minor": 2
|
91 |
+
}
|