Spaces:
Configuration error
Configuration error
Commit ·
3531f4a
1
Parent(s): de245a1
feat: update execution counts and enhance data processing in main notebook
Browse files- data/Kollywood 2020-2022 songs.csv +0 -0
- main.ipynb +10 -4
data/Kollywood 2020-2022 songs.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
main.ipynb
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
"cells": [
|
| 3 |
{
|
| 4 |
"cell_type": "code",
|
| 5 |
-
"execution_count":
|
| 6 |
"metadata": {},
|
| 7 |
"outputs": [],
|
| 8 |
"source": [
|
|
@@ -36,7 +36,7 @@
|
|
| 36 |
},
|
| 37 |
{
|
| 38 |
"cell_type": "code",
|
| 39 |
-
"execution_count":
|
| 40 |
"metadata": {},
|
| 41 |
"outputs": [],
|
| 42 |
"source": [
|
|
@@ -117,7 +117,7 @@
|
|
| 117 |
},
|
| 118 |
{
|
| 119 |
"cell_type": "code",
|
| 120 |
-
"execution_count":
|
| 121 |
"metadata": {},
|
| 122 |
"outputs": [
|
| 123 |
{
|
|
@@ -136,7 +136,13 @@
|
|
| 136 |
"\n",
|
| 137 |
"df = pd.concat([df1, df2, df3])\n",
|
| 138 |
"logger.info(f\"Concatenated DataFrame shape: {df.shape}\")\n",
|
| 139 |
-
"logger.info(f\"Unique Track URIs: {df['Track URI'].unique().shape}\")"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
]
|
| 141 |
},
|
| 142 |
{
|
|
|
|
| 2 |
"cells": [
|
| 3 |
{
|
| 4 |
"cell_type": "code",
|
| 5 |
+
"execution_count": 2,
|
| 6 |
"metadata": {},
|
| 7 |
"outputs": [],
|
| 8 |
"source": [
|
|
|
|
| 36 |
},
|
| 37 |
{
|
| 38 |
"cell_type": "code",
|
| 39 |
+
"execution_count": 3,
|
| 40 |
"metadata": {},
|
| 41 |
"outputs": [],
|
| 42 |
"source": [
|
|
|
|
| 117 |
},
|
| 118 |
{
|
| 119 |
"cell_type": "code",
|
| 120 |
+
"execution_count": null,
|
| 121 |
"metadata": {},
|
| 122 |
"outputs": [
|
| 123 |
{
|
|
|
|
| 136 |
"\n",
|
| 137 |
"df = pd.concat([df1, df2, df3])\n",
|
| 138 |
"logger.info(f\"Concatenated DataFrame shape: {df.shape}\")\n",
|
| 139 |
+
"logger.info(f\"Unique Track URIs: {df['Track URI'].unique().shape}\")\n",
|
| 140 |
+
"\n",
|
| 141 |
+
"logger.info(f\"Before dropping duplicates: {df.shape}\")\n",
|
| 142 |
+
"df = df.drop_duplicates(subset=['Track URI'])\n",
|
| 143 |
+
"logger.info(f\"Dropped duplicates DataFrame shape: {df.shape}\")\n",
|
| 144 |
+
"\n",
|
| 145 |
+
"df.to_csv(\"data/Kollywood 2020-2022 songs.csv\", index=False)"
|
| 146 |
]
|
| 147 |
},
|
| 148 |
{
|