Adapt for pandas 2.x

gjbex · gjbex · commit bec5d674e958 · 2023-12-01T16:28:51.000+01:00
diff --git a/source-code/pandas/pivot_versus_pivot_table.ipynb b/source-code/pandas/pivot_versus_pivot_table.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# pivot versus pivot_tabel"
+    "# pivot versus pivot_table"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -24,6 +24,13 @@
     "pandas has two function to restructure dataframes.  Although they are similar, each has its won applications."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -33,7 +40,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -44,7 +51,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -62,7 +69,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -71,15 +78,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "<class 'pandas.core.frame.DataFrame'>\n",
-      "Int64Index: 62 entries, 0 to 61\n",
+      "RangeIndex: 62 entries, 0 to 61\n",
       "Data columns (total 6 columns):\n",
       " #   Column       Non-Null Count  Dtype         \n",
       "---  ------       --------------  -----         \n",
@@ -90,7 +97,7 @@
       " 4   gender       55 non-null     category      \n",
       " 5   condition    55 non-null     category      \n",
       "dtypes: category(2), datetime64[ns](1), float32(2), int64(1)\n",
-      "memory usage: 2.2 KB\n"
+      "memory usage: 1.8 KB\n"
      ]
     }
    ],
@@ -100,7 +107,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -191,7 +198,7 @@
        "4        1   0.0 2012-10-02 14:00:00    37.500000      M         A"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -216,7 +223,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -225,7 +232,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -274,7 +281,7 @@
       " 34  (condition, 8)    7 non-null      category\n",
       " 35  (condition, 9)    7 non-null      category\n",
       "dtypes: category(18), float32(18)\n",
-      "memory usage: 1.7 KB\n"
+      "memory usage: 1.9 KB\n"
      ]
     }
    ],
@@ -298,7 +305,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -308,7 +315,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -358,16 +365,16 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The `pivot_table` method on the other hand will only take the numerical columns into account."
+    "The `pivot_table` method on the other hand will only take the numerical columns into account.  Hence it will not work on this dataframe since it contains categorical data as well."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
-    "time_series_table = data.pivot_table(index='date', columns='patient')"
+    "time_series_table = data.pivot_table(index='date', columns='patient', values=['dose', 'temperature'])"
    ]
   },
   {
@@ -379,7 +386,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -427,20 +434,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 86,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
     "dose_table = data.pivot_table(index='date',\n",
     "                              values=['dose'],\n",
     "                              columns='patient',\n",
-    "                              aggfunc=np.sum,\n",
+    "                              aggfunc='sum',\n",
     "                              margins=True,)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -620,7 +627,7 @@
        "All                  6.0  15.0  13.0  10.0  27.0  8.0  30.0  0.0  30.0  139.0"
       ]
      },
-     "execution_count": 52,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -645,7 +652,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 73,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -710,15 +717,15 @@
        "       B            40.700001"
       ]
      },
-     "execution_count": 73,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "data.pivot_table(index=['gender', 'condition'],\n",
     "                 values='temperature',\n",
-    "                 aggfunc=np.max,)"
+    "                 aggfunc='max',)"
    ]
   },
   {
@@ -730,7 +737,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 76,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -767,43 +774,93 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th rowspan=\"3\" valign=\"top\">F</th>\n",
+       "      <th rowspan=\"9\" valign=\"top\">F</th>\n",
+       "      <th>1</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>15.0</td>\n",
        "      <td>39.400002</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
        "      <th>6</th>\n",
        "      <td>8.0</td>\n",
        "      <td>38.099998</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
        "      <th>8</th>\n",
        "      <td>0.0</td>\n",
        "      <td>37.900002</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th rowspan=\"5\" valign=\"top\">M</th>\n",
+       "      <th>9</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"9\" valign=\"top\">M</th>\n",
        "      <th>1</th>\n",
        "      <td>6.0</td>\n",
        "      <td>38.500000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>13.0</td>\n",
        "      <td>39.500000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
        "      <th>5</th>\n",
        "      <td>27.0</td>\n",
        "      <td>39.500000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
        "      <th>7</th>\n",
        "      <td>30.0</td>\n",
        "      <td>40.700001</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
        "      <th>9</th>\n",
        "      <td>30.0</td>\n",
        "      <td>40.200001</td>\n",
@@ -815,17 +872,27 @@
       "text/plain": [
        "                dose  temperature\n",
        "gender patient                   \n",
-       "F      2        15.0    39.400002\n",
+       "F      1         0.0          NaN\n",
+       "       2        15.0    39.400002\n",
+       "       3         0.0          NaN\n",
+       "       4         0.0          NaN\n",
+       "       5         0.0          NaN\n",
        "       6         8.0    38.099998\n",
+       "       7         0.0          NaN\n",
        "       8         0.0    37.900002\n",
+       "       9         0.0          NaN\n",
        "M      1         6.0    38.500000\n",
+       "       2         0.0          NaN\n",
        "       3        13.0    39.500000\n",
+       "       4         0.0          NaN\n",
        "       5        27.0    39.500000\n",
+       "       6         0.0          NaN\n",
        "       7        30.0    40.700001\n",
+       "       8         0.0          NaN\n",
        "       9        30.0    40.200001"
       ]
      },
-     "execution_count": 76,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -834,15 +901,15 @@
     "data.pivot_table(index=['gender', 'patient'],\n",
     "                 values=['temperature', 'dose'],\n",
     "                 aggfunc={\n",
-    "                     'temperature': np.max,\n",
-    "                     'dose': np.sum,\n",
+    "                     'temperature': 'max',\n",
+    "                     'dose': 'sum',\n",
     "                 },)"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -856,7 +923,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.6"
+   "version": "3.12.0"
   }
  },
  "nbformat": 4,