{ "cells": [ { "cell_type": "markdown", "id": "religious-agenda", "metadata": {}, "source": [ "# Pandas" ] }, { "cell_type": "markdown", "id": "internal-minnesota", "metadata": {}, "source": [ "Pandas is a library for tabular data (dataframe)." ] }, { "cell_type": "code", "execution_count": 27, "id": "premier-growth", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 74, "id": "mathematical-mortality", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
xy
01.012181-0.077271
11.287084-0.330829
2-1.1620340.167187
3-0.319899-0.548682
40.821274-0.565926
\n", "
" ], "text/plain": [ " x y\n", "0 1.012181 -0.077271\n", "1 1.287084 -0.330829\n", "2 -1.162034 0.167187\n", "3 -0.319899 -0.548682\n", "4 0.821274 -0.565926" ] }, "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame(np.random.randn(100, 2), columns=['x', 'y'])\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 29, "id": "informed-blues", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(100, 2)" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 30, "id": "distinguished-fence", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 100 entries, 0 to 99\n", "Data columns (total 2 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 x 100 non-null float64\n", " 1 y 100 non-null float64\n", "dtypes: float64(2)\n", "memory usage: 1.7 KB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": 3, "id": "confident-tractor", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
xy
count100.000000100.000000
mean-0.190087-0.028231
std0.8550970.879615
min-2.183391-1.905329
25%-0.811593-0.705061
50%-0.230736-0.134842
75%0.4057740.646319
max1.5517951.802841
\n", "
" ], "text/plain": [ " x y\n", "count 100.000000 100.000000\n", "mean -0.190087 -0.028231\n", "std 0.855097 0.879615\n", "min -2.183391 -1.905329\n", "25% -0.811593 -0.705061\n", "50% -0.230736 -0.134842\n", "75% 0.405774 0.646319\n", "max 1.551795 1.802841" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.describe()" ] }, { "cell_type": "code", "execution_count": 75, "id": "educated-winning", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04',\n", " '2021-01-05'],\n", " dtype='datetime64[ns]', freq='D')" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dates = pd.date_range(\"20210101\", periods=100)\n", "dates[0:5]" ] }, { "cell_type": "code", "execution_count": 76, "id": "later-grill", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
xy
2021-01-01-1.823644-1.357490
2021-01-020.4070330.140167
2021-01-03-0.2190901.195670
2021-01-041.2800750.955749
2021-01-051.201438-1.155646
\n", "
" ], "text/plain": [ " x y\n", "2021-01-01 -1.823644 -1.357490\n", "2021-01-02 0.407033 0.140167\n", "2021-01-03 -0.219090 1.195670\n", "2021-01-04 1.280075 0.955749\n", "2021-01-05 1.201438 -1.155646" ] }, "execution_count": 76, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame(np.random.randn(100, 2), columns=['x', 'y'], index=dates)\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 78, "id": "metric-mounting", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "df.plot();" ] }, { "cell_type": "code", "execution_count": 6, "id": "shared-valley", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
xy
2021-02-11-0.135543-1.036254
2021-02-122.0167050.791184
2021-02-130.656834-0.423262
2021-02-14-0.6308411.101213
2021-02-151.6848711.491705
\n", "
" ], "text/plain": [ " x y\n", "2021-02-11 -0.135543 -1.036254\n", "2021-02-12 2.016705 0.791184\n", "2021-02-13 0.656834 -0.423262\n", "2021-02-14 -0.630841 1.101213\n", "2021-02-15 1.684871 1.491705" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['2021-02-11':'2021-02-15']" ] }, { "cell_type": "code", "execution_count": 7, "id": "agreed-productivity", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
xy
2021-01-01-1.0771200.342451
2021-01-030.071963-0.305716
2021-01-040.4692400.098572
2021-01-06-0.333955-0.482170
2021-01-08-0.384534-0.758705
.........
2021-04-06-0.9071232.424140
2021-04-07-0.2804480.262274
2021-04-08-0.4770092.236021
2021-04-09-0.1410622.128535
2021-04-10-0.301844-1.010541
\n", "

67 rows × 2 columns

\n", "
" ], "text/plain": [ " x y\n", "2021-01-01 -1.077120 0.342451\n", "2021-01-03 0.071963 -0.305716\n", "2021-01-04 0.469240 0.098572\n", "2021-01-06 -0.333955 -0.482170\n", "2021-01-08 -0.384534 -0.758705\n", "... ... ...\n", "2021-04-06 -0.907123 2.424140\n", "2021-04-07 -0.280448 0.262274\n", "2021-04-08 -0.477009 2.236021\n", "2021-04-09 -0.141062 2.128535\n", "2021-04-10 -0.301844 -1.010541\n", "\n", "[67 rows x 2 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.loc[df.x < 0.5]" ] }, { "cell_type": "code", "execution_count": 8, "id": "damaged-channel", "metadata": {}, "outputs": [], "source": [ "df['label'] = [chr(97 + int(num)) for num in abs(df.x.values) * 10]" ] }, { "cell_type": "code", "execution_count": 9, "id": "flexible-butter", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
xylabel
2021-01-01-1.0771200.342451k
2021-01-022.7230080.196346|
2021-01-030.071963-0.305716a
2021-01-040.4692400.098572e
2021-01-051.2261761.319617m
\n", "
" ], "text/plain": [ " x y label\n", "2021-01-01 -1.077120 0.342451 k\n", "2021-01-02 2.723008 0.196346 |\n", "2021-01-03 0.071963 -0.305716 a\n", "2021-01-04 0.469240 0.098572 e\n", "2021-01-05 1.226176 1.319617 m" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 10, "id": "bibliographic-winning", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
xylabel
2021-01-030.071963-0.305716a
2021-02-250.0915951.604293a
2021-03-120.088501-0.170577a
2021-03-310.0266160.661760a
2021-04-010.096728-2.732678a
\n", "
" ], "text/plain": [ " x y label\n", "2021-01-03 0.071963 -0.305716 a\n", "2021-02-25 0.091595 1.604293 a\n", "2021-03-12 0.088501 -0.170577 a\n", "2021-03-31 0.026616 0.661760 a\n", "2021-04-01 0.096728 -2.732678 a" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.loc[df.label == 'a']" ] }, { "cell_type": "code", "execution_count": 11, "id": "designed-watson", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEGCAYAAACUzrmNAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAPAklEQVR4nO3dfWxd9X3H8ff3YmMbnA4vSUUXlwUGYmPIypDL2kZiVeGPdEVBrTWplaa1e1BUaexRXdKJqWqlPWju/pimVZsi6J4L6vBYqq5dAU1bkVooZk3cMGihFIbpOowbVrw6xun97g/faLFx/LvxvTfn3vj9kq5y7znnnvPh6HI//p1zfByZiSRJG6lVHUCS1P0sC0lSkWUhSSqyLCRJRZaFJKmor+oA52LHjh25e/fuqmNIUk957LHHXsrMna2so6fKYvfu3UxPT1cdQ5J6SkQ81+o6PAwlSSqyLCRJRZaFJKnIspAkFVkWkqQiy0KS2mB+YYljz7/M/MJS1VE6oqcunZWkbnTk6Ascmpqhv1ZjuV5ncmKM/Xt2VR2rrRxZSFIL5heWODQ1w8nlOq8sneLkcp2DUzMX3AjDspCkFsyeWKS/tvqrtL9WY/bEYkWJOsOykKQWjI4MsVyvr5q2XK8zOjJUUaLOsCwkqQXbhweYnBhjsL/GtoE+BvtrTE6MsX14oOpobeUJbklq0f49u9h79Q5mTywyOjJ0wRUFWBaS1BbbhwcuyJI4zcNQkqQiy0KSVGRZSJKKLAtJUpFlIUkqsiwkSUWWhSSpyLKQJBVZFpKkIstCklRkWUiSiiwLSVKRZSFJKrIsJElFloUkqciykCQVWRaSpCLLQpJUZFlIkoosC0lSUaVlERGfiIgXI+J4lTkkSRuremTxl8C+ijNIkgoqLYvM/ALwnSozSJLKqh5ZFEXEgYiYjojpubm5quNI0pbU9WWRmYczczwzx3fu3Fl1HEnakrq+LCRJ1bMsJElFVV86ezfwJeDaiJiNiF+sMo8kaX19VW48M99b5fYlSc3xMJQkqciykCQVWRaSpCLLQpJUZFlIkoosC0lS0ZYoi/mFJY49/zLzC0tVR5GknlTp71mcD0eOvsChqRn6azWW63UmJ8bYv2dX1bEkqadc0COL+YUlDk3NcHK5zitLpzi5XOfg1IwjDEk6Rxd0WcyeWKS/tvo/sb9WY/bEYkWJJKk3XdBlMToyxHK9vmracr3O6MhQRYkkqTdd0GWxfXiAyYkxBvtrbBvoY7C/xuTEGNuHB6qOJkk95YI/wb1/zy72Xr2D2ROLjI4MWRSStAkXfFnAygjDkpCkzbugD0NJktrDspAkFVkWkqQiy0KSVGRZSJKKLAtJUpFlIUkqsiwkSUWWhSSpyLKQJBVZFpKkIstCklRkWUiSiiwLSVKRZSFJKrIsJElFloUkqciykCQVWRaSpCLLQpJUZFlIkoosC0lSUaVlERH7IuJrEfF0RHyoyiySpLOrrCwi4iLg48A7gOuA90bEdVXlkSSdXZUjixuBpzPzmcx8FbgHuK3CPJKks6iyLHYBz5/xerYxbZWIOBAR0xExPTc3d97CSZL+X5VlEetMy9dMyDycmeOZOb5z587zEEuStFaVZTELvPGM16PAtyrKIknaQJVl8ShwTURcGREXA+8BPl1hHknSWfRVteHMPBURtwOfBy4CPpGZj1eVR5J0dpWVBUBmfhb4bJUZJGmt+YUlZk8sMjoyxPbhgarjdIVKy0KSus2Roy9waGqG/lqN5XqdyYkx9u95zYWaW463+5CkhvmFJQ5NzXByuc4rS6c4uVzn4NQM8wtLVUernGUhSQ2zJxbpr63+Wuyv1Zg9sVhRou5hWUhSw+jIEMv1+qppy/U6oyNDFSXqHpaFJDVsHx5gcmKMwf4a2wb6GOyvMTkx5kluPMEtbUle7XN2+/fsYu/VO9w/a1gW0hbj1T5l24cHLIk1PAwlbSFe7aPNsiykLcSrfbRZloW0hXi1jzbLspC2EK/20WZ5glvaYrzaR5thWUhbkFf76Fx5GEqSVGRZSJKKLAtJUpFlIUkqsiwkSUWWhSSpqFgWEXF7RIycjzCSpO7UzMjicuDRiPhUROyLiOh0KElSdymWRWb+DnANcBfwfuCpiPj9iPiRDmeTJHWJps5ZZGYC3248TgEjwL0RMdnBbJKkLlG83UdE/CrwPuAl4E7gtzJzOSJqwFPAwc5GlCRVrZl7Q+0A3p2Zz505MTPrEXFrZ2JJkrpJsSwy88MbzHuivXEkSd3I37OQJBVZFpKkIstCklRkWUiSiiwLSVKRZSFJKrIsJElFloUkqciykCQVWRaSpCLLQpJUVElZRMTPRMTjEVGPiPEqMkiSmlfVyOI48G7gCxVtX5J0Dpq5RXnbnb5brX+hVZJ6Q9efs4iIAxExHRHTc3NzVceRpC2pYyOLiHgQuHydWXdk5pFm15OZh4HDAOPj49mmeJKkc9CxssjMWzq1bknS+dX1h6EkSdWr6tLZd0XELPAW4J8i4vNV5JAkNaeqq6HuA+6rYtuSpHPnYShJUpFlIUkqsiwkSUWWhSSpyLKQJBVZFpKkIstCklRkWUiSiiwLSVKRZSFJKrIsJElFloUkqciykCQVWRaSpCLLQpJUZFlIkoosC0lSkWUhSSqyLCRJRZaFJKnIspAkFVkWkqQiy0KSVGRZSJKKLAtJUpFlIUkqsiwkSUWWhSSpyLKQJBVZFpKkIstCklRkWUiSiiwLSVKRZSFJKrIsJElFloUkqaiSsoiIj0XEkxExExH3RcRlVeSQJDWnqpHFA8D1mTkGfB347YpySJKaUElZZOb9mXmq8fJhYLSKHJKk5nTDOYtfAD5XdQhJ0tn1dWrFEfEgcPk6s+7IzCONZe4ATgF/t8F6DgAHAK644ooOJJUklXSsLDLzlo3mR8T7gFuBmzMzN1jPYeAwwPj4+FmXkyR1TsfKYiMRsQ84BPxUZn6vigySpOZVdc7iT4FtwAMRcTQi/ryiHJKkJlQyssjMq6vYriRpc7rhaihJUpezLCRJRZaFJKnIspAkFVkWkqQiy0KSVGRZSJKKLAtJUpFlIUkqsiwkSUWWhSSpyLKQJBVZFpKkIstCklRkWUiSiiwLSVKRZSFJKrIsJElFloUkqciykCQVWRaSpCLLQpJUZFlIkoosC0lSkWUhSSqyLCSpy80vLHHs+ZeZX1iqLENfZVuWJBUdOfoCh6Zm6K/VWK7XmZwYY/+eXec9hyMLSepS8wtLHJqa4eRynVeWTnFyuc7BqZlKRhiWhSR1qdkTi/TXVn9N99dqzJ5YPO9ZLAtJ6lKjI0Ms1+urpi3X64yODJ33LJaFJHWp7cMDTE6MMdhfY9tAH4P9NSYnxtg+PHDes3iCW5K62P49u9h79Q5mTywyOjJUSVGAZSFJXW/78EBlJXGah6EkSUWWhSSpyLKQJBVZFpKkIstCklQUmVl1hqZFxBzwXAWb3gG8VMF2N6OXskJv5e2lrNBbec3aOTuASzNzZysr6amyqEpETGfmeNU5mtFLWaG38vZSVuitvGbtnHbl9TCUJKnIspAkFVkWzTlcdYBz0EtZobfy9lJW6K28Zu2ctuT1nIUkqciRhSSpyLKQJBVt6bKIiH0R8bWIeDoiPrTO/IiIP2nMn4mIGxrTByPiyxFxLCIej4iPdnPeM+ZfFBFfiYjPdHPWiHg2Ir4aEUcjYrrTWduQ97KIuDcinoyIJyLiLd2YNSKubezT04/vRsSvd2PWxrzfaPz/dTwi7o6IwU5mbUPeX2tkfbzT+7XJrD8aEV+KiKWI+OC5vHddmbklH8BFwDeAq4CLgWPAdWuW+Wngc0AAbwYeaUwPYLjxvB94BHhzt+Y9Y/5vAp8EPtPNWYFngR298FlozPsr4Jcazy8GLuvWrGvW823gh7sxK7AL+CYw1Hj9KeD93fo5AK4HjgOXsPKnHx4Erqk46+uBNwG/B3zwXN673mMrjyxuBJ7OzGcy81XgHuC2NcvcBvx1rngYuCwi3tB4vdBYpr/x6PSVApvOCxARo8A7gTs7nLPlrBXYdN6IeB1wE3AXQGa+mpkvd2PWNcvcDHwjMzt5R4RWs/YBQxHRx8qX8Lc6mLXVvD8GPJyZ38vMU8C/Ae+qMmtmvpiZjwLL5/re9WzlstgFPH/G69nGtKaWaRzSOQq8CDyQmY90LurGWZpc5o+Bg0Cdzms1awL3R8RjEXGgYymby1Ja5ipgDviLxiG+OyPi0i7Neqb3AHe3Pd2551h3mcx8Afgj4D+B/wL+JzPv72DWs2ZpcpnjwE0RsT0iLmFlBPLGirO29b1buSxinWlrRwdnXSYzv5+Ze4BR4MaIuL698V5j03kj4lbgxcx8rP2x1tXSvgX2ZuYNwDuAX46Im9oZbh2t5O0DbgD+LDN/AvhfoLljwJvT6r4lIi4G9gN/38Zc62nlMzvCyk+7VwI/BFwaET/b5nxrbTpvZj4B/CHwAPDPrBzaOdXeeOUcnXzvVi6LWVY3/yivHeYWl2kccvhXYF/bE55jlg2W2Qvsj4hnWRlyvj0i/rZzUVvbt5l5+t8XgftYGTZ3Uit5Z4HZM0aW97JSHp3Sjs/tO4B/z8z/7kjC5nNstMwtwDczcy4zl4F/AN7awawbZWlqmcy8KzNvyMybgO8AT1Wctb3v7dQJmG5/sPIT4TOs/ORy+iTPj69Z5p2sPpn15cb0nTROYgJDwEPArd2ad80yb6PzJ7hb2beXAtvOeP5FYF+35m3Mewi4tvH8I8DHujVrY/49wM93cp+24XPwk8DjrJyrCFYuIviVbs3bmPf6xr9XAE8CI1VmPWPZj7D6BHfT7121nk5/YLr5wcpxxa+zcmXAHY1pHwA+0HgewMcb878KjDemjwFfAWZYOVb54W7Ou2Ydb6PDZdHivr2q8eE91viyuKPb9y2wB5hufB7+sZNfEm3IegkwD/xAD+zXj7LypXsc+BtgoMvzPgT8R+Oze3MXZL2clVHEd4GXG89fd7b3lh7e7kOSVLSVz1lIkppkWUiSiiwLSVKRZSFJKrIsJElFloUkqciykCQVWRZSCyLiTY2/azAYEZc2/pZBp+8TJp13/lKe1KKI+F1gkJVbv8xm5h9UHElqO8tCalHjLq6PAieBt2bm9yuOJLWdh6Gk1v0gMAxsY2WEIV1wHFlILYqIT7NyJ9crgTdk5u0VR5Larq/qAFIvi4ifA05l5icj4iLgixHx9sz8l6qzSe3kyEKSVOQ5C0lSkWUhSSqyLCRJRZaFJKnIspAkFVkWkqQiy0KSVPR/DqdD1GYrnbcAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "df.loc[df.label == 'a'].plot('x', 'y', kind='scatter');" ] }, { "cell_type": "code", "execution_count": 17, "id": "crazy-count", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.groupby(by=df[\"label\"])" ] }, { "cell_type": "code", "execution_count": 16, "id": "celtic-chassis", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
xy
label
a0.375404-0.942918
b0.057332-0.963949
c0.211865-2.870494
d0.435584-3.043571
e-0.3275974.056683
f1.2482950.227028
g0.6996171.612593
h0.7397510.201644
i0.925425-1.960625
j-1.9768306.748663
k0.016653-0.549432
l1.077869-3.301742
m-0.0670731.604039
n-4.061371-1.622024
o4.2955360.009890
p-1.506907-0.548990
q5.0028020.754617
r0.0055352.562163
t-1.9845811.133631
u2.0167050.791184
v-0.0186093.506080
x2.366788-0.014051
|2.7230080.196346
\n", "
" ], "text/plain": [ " x y\n", "label \n", "a 0.375404 -0.942918\n", "b 0.057332 -0.963949\n", "c 0.211865 -2.870494\n", "d 0.435584 -3.043571\n", "e -0.327597 4.056683\n", "f 1.248295 0.227028\n", "g 0.699617 1.612593\n", "h 0.739751 0.201644\n", "i 0.925425 -1.960625\n", "j -1.976830 6.748663\n", "k 0.016653 -0.549432\n", "l 1.077869 -3.301742\n", "m -0.067073 1.604039\n", "n -4.061371 -1.622024\n", "o 4.295536 0.009890\n", "p -1.506907 -0.548990\n", "q 5.002802 0.754617\n", "r 0.005535 2.562163\n", "t -1.984581 1.133631\n", "u 2.016705 0.791184\n", "v -0.018609 3.506080\n", "x 2.366788 -0.014051\n", "| 2.723008 0.196346" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.groupby(by=df[\"label\"]).sum()" ] }, { "cell_type": "code", "execution_count": 63, "id": "incorrect-albania", "metadata": {}, "outputs": [], "source": [ "df1 = pd.DataFrame(np.random.randn(5, 2), columns=['x', 'y'])\n", "df2 = pd.DataFrame(np.random.randn(5, 2), columns=['x', 'y'])" ] }, { "cell_type": "code", "execution_count": 64, "id": "amber-clinton", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
xy
01.0654470.508669
1-0.1112000.029376
20.251230-0.855050
3-0.206806-1.267250
4-0.195877-0.586469
\n", "
" ], "text/plain": [ " x y\n", "0 1.065447 0.508669\n", "1 -0.111200 0.029376\n", "2 0.251230 -0.855050\n", "3 -0.206806 -1.267250\n", "4 -0.195877 -0.586469" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1" ] }, { "cell_type": "code", "execution_count": 65, "id": "recovered-arbor", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
xy
01.879820-1.054090
10.834352-2.434033
2-0.9151623.600316
3-0.7817190.633350
4-0.003787-0.752671
\n", "
" ], "text/plain": [ " x y\n", "0 1.879820 -1.054090\n", "1 0.834352 -2.434033\n", "2 -0.915162 3.600316\n", "3 -0.781719 0.633350\n", "4 -0.003787 -0.752671" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2" ] }, { "cell_type": "code", "execution_count": 71, "id": "acute-minister", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
xy
01.0654470.508669
1-0.1112000.029376
20.251230-0.855050
3-0.206806-1.267250
4-0.195877-0.586469
51.879820-1.054090
60.834352-2.434033
7-0.9151623.600316
8-0.7817190.633350
9-0.003787-0.752671
\n", "
" ], "text/plain": [ " x y\n", "0 1.065447 0.508669\n", "1 -0.111200 0.029376\n", "2 0.251230 -0.855050\n", "3 -0.206806 -1.267250\n", "4 -0.195877 -0.586469\n", "5 1.879820 -1.054090\n", "6 0.834352 -2.434033\n", "7 -0.915162 3.600316\n", "8 -0.781719 0.633350\n", "9 -0.003787 -0.752671" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.concat([df1, df2], ignore_index=True)" ] }, { "cell_type": "code", "execution_count": 72, "id": "unsigned-prime", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
\n", "
" ], "text/plain": [ " PassengerId Survived Pclass \\\n", "0 1 0 3 \n", "1 2 1 1 \n", "2 3 1 3 \n", "3 4 1 1 \n", "4 5 0 3 \n", "\n", " Name Sex Age SibSp \\\n", "0 Braund, Mr. Owen Harris male 22.0 1 \n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", "2 Heikkinen, Miss. Laina female 26.0 0 \n", "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", "4 Allen, Mr. William Henry male 35.0 0 \n", "\n", " Parch Ticket Fare Cabin Embarked \n", "0 0 A/5 21171 7.2500 NaN S \n", "1 0 PC 17599 71.2833 C85 C \n", "2 0 STON/O2. 3101282 7.9250 NaN S \n", "3 0 113803 53.1000 C123 S \n", "4 0 373450 8.0500 NaN S " ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic = pd.read_csv('https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv')\n", "df_titanic.head()" ] }, { "cell_type": "code", "execution_count": 73, "id": "statewide-spectacular", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "df_titanic_grouped = df_titanic.groupby('Embarked')\n", "\n", "(df_titanic_grouped.sum() / df_titanic_grouped.count()).plot.bar(\n", " y='Survived',\n", " ylabel='Passengers that survived per embarkment\\n(%)',\n", " xlabel='Port of Embarkation\\n(C = Cherbourg; Q = Queenstown; S = Southampton)'\n", ");" ] }, { "cell_type": "code", "execution_count": 25, "id": "beneficial-civilization", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\u001b[0;31mSignature:\u001b[0m\n", "\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpathlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPath\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mIO\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m~\u001b[0m\u001b[0mAnyStr\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0msep\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m','\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mdelimiter\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mheader\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'infer'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mnames\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mindex_col\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0musecols\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0msqueeze\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mprefix\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mmangle_dupe_cols\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mconverters\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mtrue_values\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mfalse_values\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mskipinitialspace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mskiprows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mskipfooter\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mnrows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mna_values\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mkeep_default_na\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mna_filter\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mskip_blank_lines\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mparse_dates\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0minfer_datetime_format\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mkeep_date_col\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mdate_parser\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mdayfirst\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mcache_dates\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0miterator\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mchunksize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mcompression\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'infer'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mthousands\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mdecimal\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'.'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mlineterminator\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mquotechar\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'\"'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mquoting\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mdoublequote\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mescapechar\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mcomment\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mencoding\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mdialect\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0merror_bad_lines\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mwarn_bad_lines\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mdelim_whitespace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mlow_memory\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mmemory_map\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mfloat_precision\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mDocstring:\u001b[0m\n", "Read a comma-separated values (csv) file into DataFrame.\n", "\n", "Also supports optionally iterating or breaking of the file\n", "into chunks.\n", "\n", "Additional help can be found in the online docs for\n", "`IO Tools `_.\n", "\n", "Parameters\n", "----------\n", "filepath_or_buffer : str, path object or file-like object\n", " Any valid string path is acceptable. The string could be a URL. Valid\n", " URL schemes include http, ftp, s3, gs, and file. For file URLs, a host is\n", " expected. A local file could be: file://localhost/path/to/table.csv.\n", "\n", " If you want to pass in a path object, pandas accepts any ``os.PathLike``.\n", "\n", " By file-like object, we refer to objects with a ``read()`` method, such as\n", " a file handler (e.g. via builtin ``open`` function) or ``StringIO``.\n", "sep : str, default ','\n", " Delimiter to use. If sep is None, the C engine cannot automatically detect\n", " the separator, but the Python parsing engine can, meaning the latter will\n", " be used and automatically detect the separator by Python's builtin sniffer\n", " tool, ``csv.Sniffer``. In addition, separators longer than 1 character and\n", " different from ``'\\s+'`` will be interpreted as regular expressions and\n", " will also force the use of the Python parsing engine. Note that regex\n", " delimiters are prone to ignoring quoted data. Regex example: ``'\\r\\t'``.\n", "delimiter : str, default ``None``\n", " Alias for sep.\n", "header : int, list of int, default 'infer'\n", " Row number(s) to use as the column names, and the start of the\n", " data. Default behavior is to infer the column names: if no names\n", " are passed the behavior is identical to ``header=0`` and column\n", " names are inferred from the first line of the file, if column\n", " names are passed explicitly then the behavior is identical to\n", " ``header=None``. Explicitly pass ``header=0`` to be able to\n", " replace existing names. The header can be a list of integers that\n", " specify row locations for a multi-index on the columns\n", " e.g. [0,1,3]. Intervening rows that are not specified will be\n", " skipped (e.g. 2 in this example is skipped). Note that this\n", " parameter ignores commented lines and empty lines if\n", " ``skip_blank_lines=True``, so ``header=0`` denotes the first line of\n", " data rather than the first line of the file.\n", "names : array-like, optional\n", " List of column names to use. If the file contains a header row,\n", " then you should explicitly pass ``header=0`` to override the column names.\n", " Duplicates in this list are not allowed.\n", "index_col : int, str, sequence of int / str, or False, default ``None``\n", " Column(s) to use as the row labels of the ``DataFrame``, either given as\n", " string name or column index. If a sequence of int / str is given, a\n", " MultiIndex is used.\n", "\n", " Note: ``index_col=False`` can be used to force pandas to *not* use the first\n", " column as the index, e.g. when you have a malformed file with delimiters at\n", " the end of each line.\n", "usecols : list-like or callable, optional\n", " Return a subset of the columns. If list-like, all elements must either\n", " be positional (i.e. integer indices into the document columns) or strings\n", " that correspond to column names provided either by the user in `names` or\n", " inferred from the document header row(s). For example, a valid list-like\n", " `usecols` parameter would be ``[0, 1, 2]`` or ``['foo', 'bar', 'baz']``.\n", " Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``.\n", " To instantiate a DataFrame from ``data`` with element order preserved use\n", " ``pd.read_csv(data, usecols=['foo', 'bar'])[['foo', 'bar']]`` for columns\n", " in ``['foo', 'bar']`` order or\n", " ``pd.read_csv(data, usecols=['foo', 'bar'])[['bar', 'foo']]``\n", " for ``['bar', 'foo']`` order.\n", "\n", " If callable, the callable function will be evaluated against the column\n", " names, returning names where the callable function evaluates to True. An\n", " example of a valid callable argument would be ``lambda x: x.upper() in\n", " ['AAA', 'BBB', 'DDD']``. Using this parameter results in much faster\n", " parsing time and lower memory usage.\n", "squeeze : bool, default False\n", " If the parsed data only contains one column then return a Series.\n", "prefix : str, optional\n", " Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ...\n", "mangle_dupe_cols : bool, default True\n", " Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than\n", " 'X'...'X'. Passing in False will cause data to be overwritten if there\n", " are duplicate names in the columns.\n", "dtype : Type name or dict of column -> type, optional\n", " Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32,\n", " 'c': 'Int64'}\n", " Use `str` or `object` together with suitable `na_values` settings\n", " to preserve and not interpret dtype.\n", " If converters are specified, they will be applied INSTEAD\n", " of dtype conversion.\n", "engine : {'c', 'python'}, optional\n", " Parser engine to use. The C engine is faster while the python engine is\n", " currently more feature-complete.\n", "converters : dict, optional\n", " Dict of functions for converting values in certain columns. Keys can either\n", " be integers or column labels.\n", "true_values : list, optional\n", " Values to consider as True.\n", "false_values : list, optional\n", " Values to consider as False.\n", "skipinitialspace : bool, default False\n", " Skip spaces after delimiter.\n", "skiprows : list-like, int or callable, optional\n", " Line numbers to skip (0-indexed) or number of lines to skip (int)\n", " at the start of the file.\n", "\n", " If callable, the callable function will be evaluated against the row\n", " indices, returning True if the row should be skipped and False otherwise.\n", " An example of a valid callable argument would be ``lambda x: x in [0, 2]``.\n", "skipfooter : int, default 0\n", " Number of lines at bottom of file to skip (Unsupported with engine='c').\n", "nrows : int, optional\n", " Number of rows of file to read. Useful for reading pieces of large files.\n", "na_values : scalar, str, list-like, or dict, optional\n", " Additional strings to recognize as NA/NaN. If dict passed, specific\n", " per-column NA values. By default the following values are interpreted as\n", " NaN: '', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan',\n", " '1.#IND', '1.#QNAN', '', 'N/A', 'NA', 'NULL', 'NaN', 'n/a',\n", " 'nan', 'null'.\n", "keep_default_na : bool, default True\n", " Whether or not to include the default NaN values when parsing the data.\n", " Depending on whether `na_values` is passed in, the behavior is as follows:\n", "\n", " * If `keep_default_na` is True, and `na_values` are specified, `na_values`\n", " is appended to the default NaN values used for parsing.\n", " * If `keep_default_na` is True, and `na_values` are not specified, only\n", " the default NaN values are used for parsing.\n", " * If `keep_default_na` is False, and `na_values` are specified, only\n", " the NaN values specified `na_values` are used for parsing.\n", " * If `keep_default_na` is False, and `na_values` are not specified, no\n", " strings will be parsed as NaN.\n", "\n", " Note that if `na_filter` is passed in as False, the `keep_default_na` and\n", " `na_values` parameters will be ignored.\n", "na_filter : bool, default True\n", " Detect missing value markers (empty strings and the value of na_values). In\n", " data without any NAs, passing na_filter=False can improve the performance\n", " of reading a large file.\n", "verbose : bool, default False\n", " Indicate number of NA values placed in non-numeric columns.\n", "skip_blank_lines : bool, default True\n", " If True, skip over blank lines rather than interpreting as NaN values.\n", "parse_dates : bool or list of int or names or list of lists or dict, default False\n", " The behavior is as follows:\n", "\n", " * boolean. If True -> try parsing the index.\n", " * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3\n", " each as a separate date column.\n", " * list of lists. e.g. If [[1, 3]] -> combine columns 1 and 3 and parse as\n", " a single date column.\n", " * dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call\n", " result 'foo'\n", "\n", " If a column or index cannot be represented as an array of datetimes,\n", " say because of an unparseable value or a mixture of timezones, the column\n", " or index will be returned unaltered as an object data type. For\n", " non-standard datetime parsing, use ``pd.to_datetime`` after\n", " ``pd.read_csv``. To parse an index or column with a mixture of timezones,\n", " specify ``date_parser`` to be a partially-applied\n", " :func:`pandas.to_datetime` with ``utc=True``. See\n", " :ref:`io.csv.mixed_timezones` for more.\n", "\n", " Note: A fast-path exists for iso8601-formatted dates.\n", "infer_datetime_format : bool, default False\n", " If True and `parse_dates` is enabled, pandas will attempt to infer the\n", " format of the datetime strings in the columns, and if it can be inferred,\n", " switch to a faster method of parsing them. In some cases this can increase\n", " the parsing speed by 5-10x.\n", "keep_date_col : bool, default False\n", " If True and `parse_dates` specifies combining multiple columns then\n", " keep the original columns.\n", "date_parser : function, optional\n", " Function to use for converting a sequence of string columns to an array of\n", " datetime instances. The default uses ``dateutil.parser.parser`` to do the\n", " conversion. Pandas will try to call `date_parser` in three different ways,\n", " advancing to the next if an exception occurs: 1) Pass one or more arrays\n", " (as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the\n", " string values from the columns defined by `parse_dates` into a single array\n", " and pass that; and 3) call `date_parser` once for each row using one or\n", " more strings (corresponding to the columns defined by `parse_dates`) as\n", " arguments.\n", "dayfirst : bool, default False\n", " DD/MM format dates, international and European format.\n", "cache_dates : bool, default True\n", " If True, use a cache of unique, converted dates to apply the datetime\n", " conversion. May produce significant speed-up when parsing duplicate\n", " date strings, especially ones with timezone offsets.\n", "\n", " .. versionadded:: 0.25.0\n", "iterator : bool, default False\n", " Return TextFileReader object for iteration or getting chunks with\n", " ``get_chunk()``.\n", "chunksize : int, optional\n", " Return TextFileReader object for iteration.\n", " See the `IO Tools docs\n", " `_\n", " for more information on ``iterator`` and ``chunksize``.\n", "compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'\n", " For on-the-fly decompression of on-disk data. If 'infer' and\n", " `filepath_or_buffer` is path-like, then detect compression from the\n", " following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no\n", " decompression). If using 'zip', the ZIP file must contain only one data\n", " file to be read in. Set to None for no decompression.\n", "thousands : str, optional\n", " Thousands separator.\n", "decimal : str, default '.'\n", " Character to recognize as decimal point (e.g. use ',' for European data).\n", "lineterminator : str (length 1), optional\n", " Character to break file into lines. Only valid with C parser.\n", "quotechar : str (length 1), optional\n", " The character used to denote the start and end of a quoted item. Quoted\n", " items can include the delimiter and it will be ignored.\n", "quoting : int or csv.QUOTE_* instance, default 0\n", " Control field quoting behavior per ``csv.QUOTE_*`` constants. Use one of\n", " QUOTE_MINIMAL (0), QUOTE_ALL (1), QUOTE_NONNUMERIC (2) or QUOTE_NONE (3).\n", "doublequote : bool, default ``True``\n", " When quotechar is specified and quoting is not ``QUOTE_NONE``, indicate\n", " whether or not to interpret two consecutive quotechar elements INSIDE a\n", " field as a single ``quotechar`` element.\n", "escapechar : str (length 1), optional\n", " One-character string used to escape other characters.\n", "comment : str, optional\n", " Indicates remainder of line should not be parsed. If found at the beginning\n", " of a line, the line will be ignored altogether. This parameter must be a\n", " single character. Like empty lines (as long as ``skip_blank_lines=True``),\n", " fully commented lines are ignored by the parameter `header` but not by\n", " `skiprows`. For example, if ``comment='#'``, parsing\n", " ``#empty\\na,b,c\\n1,2,3`` with ``header=0`` will result in 'a,b,c' being\n", " treated as the header.\n", "encoding : str, optional\n", " Encoding to use for UTF when reading/writing (ex. 'utf-8'). `List of Python\n", " standard encodings\n", " `_ .\n", "dialect : str or csv.Dialect, optional\n", " If provided, this parameter will override values (default or not) for the\n", " following parameters: `delimiter`, `doublequote`, `escapechar`,\n", " `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to\n", " override values, a ParserWarning will be issued. See csv.Dialect\n", " documentation for more details.\n", "error_bad_lines : bool, default True\n", " Lines with too many fields (e.g. a csv line with too many commas) will by\n", " default cause an exception to be raised, and no DataFrame will be returned.\n", " If False, then these \"bad lines\" will dropped from the DataFrame that is\n", " returned.\n", "warn_bad_lines : bool, default True\n", " If error_bad_lines is False, and warn_bad_lines is True, a warning for each\n", " \"bad line\" will be output.\n", "delim_whitespace : bool, default False\n", " Specifies whether or not whitespace (e.g. ``' '`` or ``' '``) will be\n", " used as the sep. Equivalent to setting ``sep='\\s+'``. If this option\n", " is set to True, nothing should be passed in for the ``delimiter``\n", " parameter.\n", "low_memory : bool, default True\n", " Internally process the file in chunks, resulting in lower memory use\n", " while parsing, but possibly mixed type inference. To ensure no mixed\n", " types either set False, or specify the type with the `dtype` parameter.\n", " Note that the entire file is read into a single DataFrame regardless,\n", " use the `chunksize` or `iterator` parameter to return the data in chunks.\n", " (Only valid with C parser).\n", "memory_map : bool, default False\n", " If a filepath is provided for `filepath_or_buffer`, map the file object\n", " directly onto memory and access the data directly from there. Using this\n", " option can improve performance because there is no longer any I/O overhead.\n", "float_precision : str, optional\n", " Specifies which converter the C engine should use for floating-point\n", " values. The options are `None` for the ordinary converter,\n", " `high` for the high-precision converter, and `round_trip` for the\n", " round-trip converter.\n", "\n", "Returns\n", "-------\n", "DataFrame or TextParser\n", " A comma-separated values (csv) file is returned as two-dimensional\n", " data structure with labeled axes.\n", "\n", "See Also\n", "--------\n", "DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file.\n", "read_csv : Read a comma-separated values (csv) file into DataFrame.\n", "read_fwf : Read a table of fixed-width formatted lines into DataFrame.\n", "\n", "Examples\n", "--------\n", ">>> pd.read_csv('data.csv') # doctest: +SKIP\n", "\u001b[0;31mFile:\u001b[0m ~/miniconda3/envs/pangeo/lib/python3.8/site-packages/pandas/io/parsers.py\n", "\u001b[0;31mType:\u001b[0m function\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "pd.read_csv?" ] }, { "cell_type": "markdown", "id": "powerful-chain", "metadata": {}, "source": [ "For more information, see the [documentation](https://pandas.pydata.org/)." ] }, { "cell_type": "code", "execution_count": null, "id": "charitable-philippines", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "pangeo", "language": "python", "name": "pangeo" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" } }, "nbformat": 4, "nbformat_minor": 5 }