"
],
"text/plain": [
" PassengerId Survived Pclass \\\n",
"0 1 0 3 \n",
"1 2 1 1 \n",
"2 3 1 3 \n",
"3 4 1 1 \n",
"4 5 0 3 \n",
"\n",
" Name Sex Age SibSp \\\n",
"0 Braund, Mr. Owen Harris male 22.0 1 \n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
"2 Heikkinen, Miss. Laina female 26.0 0 \n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
"4 Allen, Mr. William Henry male 35.0 0 \n",
"\n",
" Parch Ticket Fare Cabin Embarked \n",
"0 0 A/5 21171 7.2500 NaN S \n",
"1 0 PC 17599 71.2833 C85 C \n",
"2 0 STON/O2. 3101282 7.9250 NaN S \n",
"3 0 113803 53.1000 C123 S \n",
"4 0 373450 8.0500 NaN S "
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_titanic = pd.read_csv('https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv')\n",
"df_titanic.head()"
]
},
{
"cell_type": "code",
"execution_count": 73,
"id": "statewide-spectacular",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"df_titanic_grouped = df_titanic.groupby('Embarked')\n",
"\n",
"(df_titanic_grouped.sum() / df_titanic_grouped.count()).plot.bar(\n",
" y='Survived',\n",
" ylabel='Passengers that survived per embarkment\\n(%)',\n",
" xlabel='Port of Embarkation\\n(C = Cherbourg; Q = Queenstown; S = Southampton)'\n",
");"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "beneficial-civilization",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\u001b[0;31mSignature:\u001b[0m\n",
"\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpathlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPath\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mIO\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m~\u001b[0m\u001b[0mAnyStr\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0msep\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m','\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mdelimiter\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mheader\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'infer'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mnames\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mindex_col\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0musecols\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0msqueeze\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mprefix\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mmangle_dupe_cols\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mconverters\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mtrue_values\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mfalse_values\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mskipinitialspace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mskiprows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mskipfooter\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mnrows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mna_values\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mkeep_default_na\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mna_filter\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mskip_blank_lines\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mparse_dates\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0minfer_datetime_format\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mkeep_date_col\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mdate_parser\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mdayfirst\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mcache_dates\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0miterator\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mchunksize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mcompression\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'infer'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mthousands\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mdecimal\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'.'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mlineterminator\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mquotechar\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'\"'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mquoting\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mdoublequote\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mescapechar\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mcomment\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mencoding\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mdialect\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0merror_bad_lines\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mwarn_bad_lines\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mdelim_whitespace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mlow_memory\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mmemory_map\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mfloat_precision\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mDocstring:\u001b[0m\n",
"Read a comma-separated values (csv) file into DataFrame.\n",
"\n",
"Also supports optionally iterating or breaking of the file\n",
"into chunks.\n",
"\n",
"Additional help can be found in the online docs for\n",
"`IO Tools `_.\n",
"\n",
"Parameters\n",
"----------\n",
"filepath_or_buffer : str, path object or file-like object\n",
" Any valid string path is acceptable. The string could be a URL. Valid\n",
" URL schemes include http, ftp, s3, gs, and file. For file URLs, a host is\n",
" expected. A local file could be: file://localhost/path/to/table.csv.\n",
"\n",
" If you want to pass in a path object, pandas accepts any ``os.PathLike``.\n",
"\n",
" By file-like object, we refer to objects with a ``read()`` method, such as\n",
" a file handler (e.g. via builtin ``open`` function) or ``StringIO``.\n",
"sep : str, default ','\n",
" Delimiter to use. If sep is None, the C engine cannot automatically detect\n",
" the separator, but the Python parsing engine can, meaning the latter will\n",
" be used and automatically detect the separator by Python's builtin sniffer\n",
" tool, ``csv.Sniffer``. In addition, separators longer than 1 character and\n",
" different from ``'\\s+'`` will be interpreted as regular expressions and\n",
" will also force the use of the Python parsing engine. Note that regex\n",
" delimiters are prone to ignoring quoted data. Regex example: ``'\\r\\t'``.\n",
"delimiter : str, default ``None``\n",
" Alias for sep.\n",
"header : int, list of int, default 'infer'\n",
" Row number(s) to use as the column names, and the start of the\n",
" data. Default behavior is to infer the column names: if no names\n",
" are passed the behavior is identical to ``header=0`` and column\n",
" names are inferred from the first line of the file, if column\n",
" names are passed explicitly then the behavior is identical to\n",
" ``header=None``. Explicitly pass ``header=0`` to be able to\n",
" replace existing names. The header can be a list of integers that\n",
" specify row locations for a multi-index on the columns\n",
" e.g. [0,1,3]. Intervening rows that are not specified will be\n",
" skipped (e.g. 2 in this example is skipped). Note that this\n",
" parameter ignores commented lines and empty lines if\n",
" ``skip_blank_lines=True``, so ``header=0`` denotes the first line of\n",
" data rather than the first line of the file.\n",
"names : array-like, optional\n",
" List of column names to use. If the file contains a header row,\n",
" then you should explicitly pass ``header=0`` to override the column names.\n",
" Duplicates in this list are not allowed.\n",
"index_col : int, str, sequence of int / str, or False, default ``None``\n",
" Column(s) to use as the row labels of the ``DataFrame``, either given as\n",
" string name or column index. If a sequence of int / str is given, a\n",
" MultiIndex is used.\n",
"\n",
" Note: ``index_col=False`` can be used to force pandas to *not* use the first\n",
" column as the index, e.g. when you have a malformed file with delimiters at\n",
" the end of each line.\n",
"usecols : list-like or callable, optional\n",
" Return a subset of the columns. If list-like, all elements must either\n",
" be positional (i.e. integer indices into the document columns) or strings\n",
" that correspond to column names provided either by the user in `names` or\n",
" inferred from the document header row(s). For example, a valid list-like\n",
" `usecols` parameter would be ``[0, 1, 2]`` or ``['foo', 'bar', 'baz']``.\n",
" Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``.\n",
" To instantiate a DataFrame from ``data`` with element order preserved use\n",
" ``pd.read_csv(data, usecols=['foo', 'bar'])[['foo', 'bar']]`` for columns\n",
" in ``['foo', 'bar']`` order or\n",
" ``pd.read_csv(data, usecols=['foo', 'bar'])[['bar', 'foo']]``\n",
" for ``['bar', 'foo']`` order.\n",
"\n",
" If callable, the callable function will be evaluated against the column\n",
" names, returning names where the callable function evaluates to True. An\n",
" example of a valid callable argument would be ``lambda x: x.upper() in\n",
" ['AAA', 'BBB', 'DDD']``. Using this parameter results in much faster\n",
" parsing time and lower memory usage.\n",
"squeeze : bool, default False\n",
" If the parsed data only contains one column then return a Series.\n",
"prefix : str, optional\n",
" Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ...\n",
"mangle_dupe_cols : bool, default True\n",
" Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than\n",
" 'X'...'X'. Passing in False will cause data to be overwritten if there\n",
" are duplicate names in the columns.\n",
"dtype : Type name or dict of column -> type, optional\n",
" Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32,\n",
" 'c': 'Int64'}\n",
" Use `str` or `object` together with suitable `na_values` settings\n",
" to preserve and not interpret dtype.\n",
" If converters are specified, they will be applied INSTEAD\n",
" of dtype conversion.\n",
"engine : {'c', 'python'}, optional\n",
" Parser engine to use. The C engine is faster while the python engine is\n",
" currently more feature-complete.\n",
"converters : dict, optional\n",
" Dict of functions for converting values in certain columns. Keys can either\n",
" be integers or column labels.\n",
"true_values : list, optional\n",
" Values to consider as True.\n",
"false_values : list, optional\n",
" Values to consider as False.\n",
"skipinitialspace : bool, default False\n",
" Skip spaces after delimiter.\n",
"skiprows : list-like, int or callable, optional\n",
" Line numbers to skip (0-indexed) or number of lines to skip (int)\n",
" at the start of the file.\n",
"\n",
" If callable, the callable function will be evaluated against the row\n",
" indices, returning True if the row should be skipped and False otherwise.\n",
" An example of a valid callable argument would be ``lambda x: x in [0, 2]``.\n",
"skipfooter : int, default 0\n",
" Number of lines at bottom of file to skip (Unsupported with engine='c').\n",
"nrows : int, optional\n",
" Number of rows of file to read. Useful for reading pieces of large files.\n",
"na_values : scalar, str, list-like, or dict, optional\n",
" Additional strings to recognize as NA/NaN. If dict passed, specific\n",
" per-column NA values. By default the following values are interpreted as\n",
" NaN: '', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan',\n",
" '1.#IND', '1.#QNAN', '', 'N/A', 'NA', 'NULL', 'NaN', 'n/a',\n",
" 'nan', 'null'.\n",
"keep_default_na : bool, default True\n",
" Whether or not to include the default NaN values when parsing the data.\n",
" Depending on whether `na_values` is passed in, the behavior is as follows:\n",
"\n",
" * If `keep_default_na` is True, and `na_values` are specified, `na_values`\n",
" is appended to the default NaN values used for parsing.\n",
" * If `keep_default_na` is True, and `na_values` are not specified, only\n",
" the default NaN values are used for parsing.\n",
" * If `keep_default_na` is False, and `na_values` are specified, only\n",
" the NaN values specified `na_values` are used for parsing.\n",
" * If `keep_default_na` is False, and `na_values` are not specified, no\n",
" strings will be parsed as NaN.\n",
"\n",
" Note that if `na_filter` is passed in as False, the `keep_default_na` and\n",
" `na_values` parameters will be ignored.\n",
"na_filter : bool, default True\n",
" Detect missing value markers (empty strings and the value of na_values). In\n",
" data without any NAs, passing na_filter=False can improve the performance\n",
" of reading a large file.\n",
"verbose : bool, default False\n",
" Indicate number of NA values placed in non-numeric columns.\n",
"skip_blank_lines : bool, default True\n",
" If True, skip over blank lines rather than interpreting as NaN values.\n",
"parse_dates : bool or list of int or names or list of lists or dict, default False\n",
" The behavior is as follows:\n",
"\n",
" * boolean. If True -> try parsing the index.\n",
" * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3\n",
" each as a separate date column.\n",
" * list of lists. e.g. If [[1, 3]] -> combine columns 1 and 3 and parse as\n",
" a single date column.\n",
" * dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call\n",
" result 'foo'\n",
"\n",
" If a column or index cannot be represented as an array of datetimes,\n",
" say because of an unparseable value or a mixture of timezones, the column\n",
" or index will be returned unaltered as an object data type. For\n",
" non-standard datetime parsing, use ``pd.to_datetime`` after\n",
" ``pd.read_csv``. To parse an index or column with a mixture of timezones,\n",
" specify ``date_parser`` to be a partially-applied\n",
" :func:`pandas.to_datetime` with ``utc=True``. See\n",
" :ref:`io.csv.mixed_timezones` for more.\n",
"\n",
" Note: A fast-path exists for iso8601-formatted dates.\n",
"infer_datetime_format : bool, default False\n",
" If True and `parse_dates` is enabled, pandas will attempt to infer the\n",
" format of the datetime strings in the columns, and if it can be inferred,\n",
" switch to a faster method of parsing them. In some cases this can increase\n",
" the parsing speed by 5-10x.\n",
"keep_date_col : bool, default False\n",
" If True and `parse_dates` specifies combining multiple columns then\n",
" keep the original columns.\n",
"date_parser : function, optional\n",
" Function to use for converting a sequence of string columns to an array of\n",
" datetime instances. The default uses ``dateutil.parser.parser`` to do the\n",
" conversion. Pandas will try to call `date_parser` in three different ways,\n",
" advancing to the next if an exception occurs: 1) Pass one or more arrays\n",
" (as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the\n",
" string values from the columns defined by `parse_dates` into a single array\n",
" and pass that; and 3) call `date_parser` once for each row using one or\n",
" more strings (corresponding to the columns defined by `parse_dates`) as\n",
" arguments.\n",
"dayfirst : bool, default False\n",
" DD/MM format dates, international and European format.\n",
"cache_dates : bool, default True\n",
" If True, use a cache of unique, converted dates to apply the datetime\n",
" conversion. May produce significant speed-up when parsing duplicate\n",
" date strings, especially ones with timezone offsets.\n",
"\n",
" .. versionadded:: 0.25.0\n",
"iterator : bool, default False\n",
" Return TextFileReader object for iteration or getting chunks with\n",
" ``get_chunk()``.\n",
"chunksize : int, optional\n",
" Return TextFileReader object for iteration.\n",
" See the `IO Tools docs\n",
" `_\n",
" for more information on ``iterator`` and ``chunksize``.\n",
"compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'\n",
" For on-the-fly decompression of on-disk data. If 'infer' and\n",
" `filepath_or_buffer` is path-like, then detect compression from the\n",
" following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no\n",
" decompression). If using 'zip', the ZIP file must contain only one data\n",
" file to be read in. Set to None for no decompression.\n",
"thousands : str, optional\n",
" Thousands separator.\n",
"decimal : str, default '.'\n",
" Character to recognize as decimal point (e.g. use ',' for European data).\n",
"lineterminator : str (length 1), optional\n",
" Character to break file into lines. Only valid with C parser.\n",
"quotechar : str (length 1), optional\n",
" The character used to denote the start and end of a quoted item. Quoted\n",
" items can include the delimiter and it will be ignored.\n",
"quoting : int or csv.QUOTE_* instance, default 0\n",
" Control field quoting behavior per ``csv.QUOTE_*`` constants. Use one of\n",
" QUOTE_MINIMAL (0), QUOTE_ALL (1), QUOTE_NONNUMERIC (2) or QUOTE_NONE (3).\n",
"doublequote : bool, default ``True``\n",
" When quotechar is specified and quoting is not ``QUOTE_NONE``, indicate\n",
" whether or not to interpret two consecutive quotechar elements INSIDE a\n",
" field as a single ``quotechar`` element.\n",
"escapechar : str (length 1), optional\n",
" One-character string used to escape other characters.\n",
"comment : str, optional\n",
" Indicates remainder of line should not be parsed. If found at the beginning\n",
" of a line, the line will be ignored altogether. This parameter must be a\n",
" single character. Like empty lines (as long as ``skip_blank_lines=True``),\n",
" fully commented lines are ignored by the parameter `header` but not by\n",
" `skiprows`. For example, if ``comment='#'``, parsing\n",
" ``#empty\\na,b,c\\n1,2,3`` with ``header=0`` will result in 'a,b,c' being\n",
" treated as the header.\n",
"encoding : str, optional\n",
" Encoding to use for UTF when reading/writing (ex. 'utf-8'). `List of Python\n",
" standard encodings\n",
" `_ .\n",
"dialect : str or csv.Dialect, optional\n",
" If provided, this parameter will override values (default or not) for the\n",
" following parameters: `delimiter`, `doublequote`, `escapechar`,\n",
" `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to\n",
" override values, a ParserWarning will be issued. See csv.Dialect\n",
" documentation for more details.\n",
"error_bad_lines : bool, default True\n",
" Lines with too many fields (e.g. a csv line with too many commas) will by\n",
" default cause an exception to be raised, and no DataFrame will be returned.\n",
" If False, then these \"bad lines\" will dropped from the DataFrame that is\n",
" returned.\n",
"warn_bad_lines : bool, default True\n",
" If error_bad_lines is False, and warn_bad_lines is True, a warning for each\n",
" \"bad line\" will be output.\n",
"delim_whitespace : bool, default False\n",
" Specifies whether or not whitespace (e.g. ``' '`` or ``' '``) will be\n",
" used as the sep. Equivalent to setting ``sep='\\s+'``. If this option\n",
" is set to True, nothing should be passed in for the ``delimiter``\n",
" parameter.\n",
"low_memory : bool, default True\n",
" Internally process the file in chunks, resulting in lower memory use\n",
" while parsing, but possibly mixed type inference. To ensure no mixed\n",
" types either set False, or specify the type with the `dtype` parameter.\n",
" Note that the entire file is read into a single DataFrame regardless,\n",
" use the `chunksize` or `iterator` parameter to return the data in chunks.\n",
" (Only valid with C parser).\n",
"memory_map : bool, default False\n",
" If a filepath is provided for `filepath_or_buffer`, map the file object\n",
" directly onto memory and access the data directly from there. Using this\n",
" option can improve performance because there is no longer any I/O overhead.\n",
"float_precision : str, optional\n",
" Specifies which converter the C engine should use for floating-point\n",
" values. The options are `None` for the ordinary converter,\n",
" `high` for the high-precision converter, and `round_trip` for the\n",
" round-trip converter.\n",
"\n",
"Returns\n",
"-------\n",
"DataFrame or TextParser\n",
" A comma-separated values (csv) file is returned as two-dimensional\n",
" data structure with labeled axes.\n",
"\n",
"See Also\n",
"--------\n",
"DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file.\n",
"read_csv : Read a comma-separated values (csv) file into DataFrame.\n",
"read_fwf : Read a table of fixed-width formatted lines into DataFrame.\n",
"\n",
"Examples\n",
"--------\n",
">>> pd.read_csv('data.csv') # doctest: +SKIP\n",
"\u001b[0;31mFile:\u001b[0m ~/miniconda3/envs/pangeo/lib/python3.8/site-packages/pandas/io/parsers.py\n",
"\u001b[0;31mType:\u001b[0m function\n"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"pd.read_csv?"
]
},
{
"cell_type": "markdown",
"id": "powerful-chain",
"metadata": {},
"source": [
"For more information, see the [documentation](https://pandas.pydata.org/)."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "charitable-philippines",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "pangeo",
"language": "python",
"name": "pangeo"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}