|
4 | 4 | "cell_type": "markdown",
|
5 | 5 | "metadata": {},
|
6 | 6 | "source": [
|
7 |
| - "# pivot versus pivot_tabel" |
| 7 | + "# pivot versus pivot_table" |
8 | 8 | ]
|
9 | 9 | },
|
10 | 10 | {
|
11 | 11 | "cell_type": "code",
|
12 |
| - "execution_count": 13, |
| 12 | + "execution_count": 1, |
13 | 13 | "metadata": {},
|
14 | 14 | "outputs": [],
|
15 | 15 | "source": [
|
|
24 | 24 | "pandas has two function to restructure dataframes. Although they are similar, each has its won applications."
|
25 | 25 | ]
|
26 | 26 | },
|
| 27 | + { |
| 28 | + "cell_type": "markdown", |
| 29 | + "metadata": {}, |
| 30 | + "source": [ |
| 31 | + "## Data" |
| 32 | + ] |
| 33 | + }, |
27 | 34 | {
|
28 | 35 | "cell_type": "markdown",
|
29 | 36 | "metadata": {},
|
|
33 | 40 | },
|
34 | 41 | {
|
35 | 42 | "cell_type": "code",
|
36 |
| - "execution_count": 14, |
| 43 | + "execution_count": 2, |
37 | 44 | "metadata": {},
|
38 | 45 | "outputs": [],
|
39 | 46 | "source": [
|
|
44 | 51 | },
|
45 | 52 | {
|
46 | 53 | "cell_type": "code",
|
47 |
| - "execution_count": 15, |
| 54 | + "execution_count": 3, |
48 | 55 | "metadata": {},
|
49 | 56 | "outputs": [],
|
50 | 57 | "source": [
|
|
62 | 69 | },
|
63 | 70 | {
|
64 | 71 | "cell_type": "code",
|
65 |
| - "execution_count": 16, |
| 72 | + "execution_count": 4, |
66 | 73 | "metadata": {},
|
67 | 74 | "outputs": [],
|
68 | 75 | "source": [
|
|
71 | 78 | },
|
72 | 79 | {
|
73 | 80 | "cell_type": "code",
|
74 |
| - "execution_count": 17, |
| 81 | + "execution_count": 5, |
75 | 82 | "metadata": {},
|
76 | 83 | "outputs": [
|
77 | 84 | {
|
78 | 85 | "name": "stdout",
|
79 | 86 | "output_type": "stream",
|
80 | 87 | "text": [
|
81 | 88 | "<class 'pandas.core.frame.DataFrame'>\n",
|
82 |
| - "Int64Index: 62 entries, 0 to 61\n", |
| 89 | + "RangeIndex: 62 entries, 0 to 61\n", |
83 | 90 | "Data columns (total 6 columns):\n",
|
84 | 91 | " # Column Non-Null Count Dtype \n",
|
85 | 92 | "--- ------ -------------- ----- \n",
|
|
90 | 97 | " 4 gender 55 non-null category \n",
|
91 | 98 | " 5 condition 55 non-null category \n",
|
92 | 99 | "dtypes: category(2), datetime64[ns](1), float32(2), int64(1)\n",
|
93 |
| - "memory usage: 2.2 KB\n" |
| 100 | + "memory usage: 1.8 KB\n" |
94 | 101 | ]
|
95 | 102 | }
|
96 | 103 | ],
|
|
100 | 107 | },
|
101 | 108 | {
|
102 | 109 | "cell_type": "code",
|
103 |
| - "execution_count": 18, |
| 110 | + "execution_count": 6, |
104 | 111 | "metadata": {},
|
105 | 112 | "outputs": [
|
106 | 113 | {
|
|
191 | 198 | "4 1 0.0 2012-10-02 14:00:00 37.500000 M A"
|
192 | 199 | ]
|
193 | 200 | },
|
194 |
| - "execution_count": 18, |
| 201 | + "execution_count": 6, |
195 | 202 | "metadata": {},
|
196 | 203 | "output_type": "execute_result"
|
197 | 204 | }
|
|
216 | 223 | },
|
217 | 224 | {
|
218 | 225 | "cell_type": "code",
|
219 |
| - "execution_count": 19, |
| 226 | + "execution_count": 7, |
220 | 227 | "metadata": {},
|
221 | 228 | "outputs": [],
|
222 | 229 | "source": [
|
|
225 | 232 | },
|
226 | 233 | {
|
227 | 234 | "cell_type": "code",
|
228 |
| - "execution_count": 20, |
| 235 | + "execution_count": 8, |
229 | 236 | "metadata": {},
|
230 | 237 | "outputs": [
|
231 | 238 | {
|
|
274 | 281 | " 34 (condition, 8) 7 non-null category\n",
|
275 | 282 | " 35 (condition, 9) 7 non-null category\n",
|
276 | 283 | "dtypes: category(18), float32(18)\n",
|
277 |
| - "memory usage: 1.7 KB\n" |
| 284 | + "memory usage: 1.9 KB\n" |
278 | 285 | ]
|
279 | 286 | }
|
280 | 287 | ],
|
|
298 | 305 | },
|
299 | 306 | {
|
300 | 307 | "cell_type": "code",
|
301 |
| - "execution_count": 24, |
| 308 | + "execution_count": 9, |
302 | 309 | "metadata": {},
|
303 | 310 | "outputs": [],
|
304 | 311 | "source": [
|
|
308 | 315 | },
|
309 | 316 | {
|
310 | 317 | "cell_type": "code",
|
311 |
| - "execution_count": 25, |
| 318 | + "execution_count": 10, |
312 | 319 | "metadata": {},
|
313 | 320 | "outputs": [
|
314 | 321 | {
|
|
358 | 365 | "cell_type": "markdown",
|
359 | 366 | "metadata": {},
|
360 | 367 | "source": [
|
361 |
| - "The `pivot_table` method on the other hand will only take the numerical columns into account." |
| 368 | + "The `pivot_table` method on the other hand will only take the numerical columns into account. Hence it will not work on this dataframe since it contains categorical data as well." |
362 | 369 | ]
|
363 | 370 | },
|
364 | 371 | {
|
365 | 372 | "cell_type": "code",
|
366 |
| - "execution_count": 21, |
| 373 | + "execution_count": 11, |
367 | 374 | "metadata": {},
|
368 | 375 | "outputs": [],
|
369 | 376 | "source": [
|
370 |
| - "time_series_table = data.pivot_table(index='date', columns='patient')" |
| 377 | + "time_series_table = data.pivot_table(index='date', columns='patient', values=['dose', 'temperature'])" |
371 | 378 | ]
|
372 | 379 | },
|
373 | 380 | {
|
|
379 | 386 | },
|
380 | 387 | {
|
381 | 388 | "cell_type": "code",
|
382 |
| - "execution_count": 22, |
| 389 | + "execution_count": 12, |
383 | 390 | "metadata": {},
|
384 | 391 | "outputs": [
|
385 | 392 | {
|
|
427 | 434 | },
|
428 | 435 | {
|
429 | 436 | "cell_type": "code",
|
430 |
| - "execution_count": 86, |
| 437 | + "execution_count": 13, |
431 | 438 | "metadata": {},
|
432 | 439 | "outputs": [],
|
433 | 440 | "source": [
|
434 | 441 | "dose_table = data.pivot_table(index='date',\n",
|
435 | 442 | " values=['dose'],\n",
|
436 | 443 | " columns='patient',\n",
|
437 |
| - " aggfunc=np.sum,\n", |
| 444 | + " aggfunc='sum',\n", |
438 | 445 | " margins=True,)"
|
439 | 446 | ]
|
440 | 447 | },
|
441 | 448 | {
|
442 | 449 | "cell_type": "code",
|
443 |
| - "execution_count": 52, |
| 450 | + "execution_count": 14, |
444 | 451 | "metadata": {},
|
445 | 452 | "outputs": [
|
446 | 453 | {
|
|
620 | 627 | "All 6.0 15.0 13.0 10.0 27.0 8.0 30.0 0.0 30.0 139.0"
|
621 | 628 | ]
|
622 | 629 | },
|
623 |
| - "execution_count": 52, |
| 630 | + "execution_count": 14, |
624 | 631 | "metadata": {},
|
625 | 632 | "output_type": "execute_result"
|
626 | 633 | }
|
|
645 | 652 | },
|
646 | 653 | {
|
647 | 654 | "cell_type": "code",
|
648 |
| - "execution_count": 73, |
| 655 | + "execution_count": 15, |
649 | 656 | "metadata": {},
|
650 | 657 | "outputs": [
|
651 | 658 | {
|
|
710 | 717 | " B 40.700001"
|
711 | 718 | ]
|
712 | 719 | },
|
713 |
| - "execution_count": 73, |
| 720 | + "execution_count": 15, |
714 | 721 | "metadata": {},
|
715 | 722 | "output_type": "execute_result"
|
716 | 723 | }
|
717 | 724 | ],
|
718 | 725 | "source": [
|
719 | 726 | "data.pivot_table(index=['gender', 'condition'],\n",
|
720 | 727 | " values='temperature',\n",
|
721 |
| - " aggfunc=np.max,)" |
| 728 | + " aggfunc='max',)" |
722 | 729 | ]
|
723 | 730 | },
|
724 | 731 | {
|
|
730 | 737 | },
|
731 | 738 | {
|
732 | 739 | "cell_type": "code",
|
733 |
| - "execution_count": 76, |
| 740 | + "execution_count": 16, |
734 | 741 | "metadata": {},
|
735 | 742 | "outputs": [
|
736 | 743 | {
|
|
767 | 774 | " </thead>\n",
|
768 | 775 | " <tbody>\n",
|
769 | 776 | " <tr>\n",
|
770 |
| - " <th rowspan=\"3\" valign=\"top\">F</th>\n", |
| 777 | + " <th rowspan=\"9\" valign=\"top\">F</th>\n", |
| 778 | + " <th>1</th>\n", |
| 779 | + " <td>0.0</td>\n", |
| 780 | + " <td>NaN</td>\n", |
| 781 | + " </tr>\n", |
| 782 | + " <tr>\n", |
771 | 783 | " <th>2</th>\n",
|
772 | 784 | " <td>15.0</td>\n",
|
773 | 785 | " <td>39.400002</td>\n",
|
774 | 786 | " </tr>\n",
|
775 | 787 | " <tr>\n",
|
| 788 | + " <th>3</th>\n", |
| 789 | + " <td>0.0</td>\n", |
| 790 | + " <td>NaN</td>\n", |
| 791 | + " </tr>\n", |
| 792 | + " <tr>\n", |
| 793 | + " <th>4</th>\n", |
| 794 | + " <td>0.0</td>\n", |
| 795 | + " <td>NaN</td>\n", |
| 796 | + " </tr>\n", |
| 797 | + " <tr>\n", |
| 798 | + " <th>5</th>\n", |
| 799 | + " <td>0.0</td>\n", |
| 800 | + " <td>NaN</td>\n", |
| 801 | + " </tr>\n", |
| 802 | + " <tr>\n", |
776 | 803 | " <th>6</th>\n",
|
777 | 804 | " <td>8.0</td>\n",
|
778 | 805 | " <td>38.099998</td>\n",
|
779 | 806 | " </tr>\n",
|
780 | 807 | " <tr>\n",
|
| 808 | + " <th>7</th>\n", |
| 809 | + " <td>0.0</td>\n", |
| 810 | + " <td>NaN</td>\n", |
| 811 | + " </tr>\n", |
| 812 | + " <tr>\n", |
781 | 813 | " <th>8</th>\n",
|
782 | 814 | " <td>0.0</td>\n",
|
783 | 815 | " <td>37.900002</td>\n",
|
784 | 816 | " </tr>\n",
|
785 | 817 | " <tr>\n",
|
786 |
| - " <th rowspan=\"5\" valign=\"top\">M</th>\n", |
| 818 | + " <th>9</th>\n", |
| 819 | + " <td>0.0</td>\n", |
| 820 | + " <td>NaN</td>\n", |
| 821 | + " </tr>\n", |
| 822 | + " <tr>\n", |
| 823 | + " <th rowspan=\"9\" valign=\"top\">M</th>\n", |
787 | 824 | " <th>1</th>\n",
|
788 | 825 | " <td>6.0</td>\n",
|
789 | 826 | " <td>38.500000</td>\n",
|
790 | 827 | " </tr>\n",
|
791 | 828 | " <tr>\n",
|
| 829 | + " <th>2</th>\n", |
| 830 | + " <td>0.0</td>\n", |
| 831 | + " <td>NaN</td>\n", |
| 832 | + " </tr>\n", |
| 833 | + " <tr>\n", |
792 | 834 | " <th>3</th>\n",
|
793 | 835 | " <td>13.0</td>\n",
|
794 | 836 | " <td>39.500000</td>\n",
|
795 | 837 | " </tr>\n",
|
796 | 838 | " <tr>\n",
|
| 839 | + " <th>4</th>\n", |
| 840 | + " <td>0.0</td>\n", |
| 841 | + " <td>NaN</td>\n", |
| 842 | + " </tr>\n", |
| 843 | + " <tr>\n", |
797 | 844 | " <th>5</th>\n",
|
798 | 845 | " <td>27.0</td>\n",
|
799 | 846 | " <td>39.500000</td>\n",
|
800 | 847 | " </tr>\n",
|
801 | 848 | " <tr>\n",
|
| 849 | + " <th>6</th>\n", |
| 850 | + " <td>0.0</td>\n", |
| 851 | + " <td>NaN</td>\n", |
| 852 | + " </tr>\n", |
| 853 | + " <tr>\n", |
802 | 854 | " <th>7</th>\n",
|
803 | 855 | " <td>30.0</td>\n",
|
804 | 856 | " <td>40.700001</td>\n",
|
805 | 857 | " </tr>\n",
|
806 | 858 | " <tr>\n",
|
| 859 | + " <th>8</th>\n", |
| 860 | + " <td>0.0</td>\n", |
| 861 | + " <td>NaN</td>\n", |
| 862 | + " </tr>\n", |
| 863 | + " <tr>\n", |
807 | 864 | " <th>9</th>\n",
|
808 | 865 | " <td>30.0</td>\n",
|
809 | 866 | " <td>40.200001</td>\n",
|
|
815 | 872 | "text/plain": [
|
816 | 873 | " dose temperature\n",
|
817 | 874 | "gender patient \n",
|
818 |
| - "F 2 15.0 39.400002\n", |
| 875 | + "F 1 0.0 NaN\n", |
| 876 | + " 2 15.0 39.400002\n", |
| 877 | + " 3 0.0 NaN\n", |
| 878 | + " 4 0.0 NaN\n", |
| 879 | + " 5 0.0 NaN\n", |
819 | 880 | " 6 8.0 38.099998\n",
|
| 881 | + " 7 0.0 NaN\n", |
820 | 882 | " 8 0.0 37.900002\n",
|
| 883 | + " 9 0.0 NaN\n", |
821 | 884 | "M 1 6.0 38.500000\n",
|
| 885 | + " 2 0.0 NaN\n", |
822 | 886 | " 3 13.0 39.500000\n",
|
| 887 | + " 4 0.0 NaN\n", |
823 | 888 | " 5 27.0 39.500000\n",
|
| 889 | + " 6 0.0 NaN\n", |
824 | 890 | " 7 30.0 40.700001\n",
|
| 891 | + " 8 0.0 NaN\n", |
825 | 892 | " 9 30.0 40.200001"
|
826 | 893 | ]
|
827 | 894 | },
|
828 |
| - "execution_count": 76, |
| 895 | + "execution_count": 16, |
829 | 896 | "metadata": {},
|
830 | 897 | "output_type": "execute_result"
|
831 | 898 | }
|
|
834 | 901 | "data.pivot_table(index=['gender', 'patient'],\n",
|
835 | 902 | " values=['temperature', 'dose'],\n",
|
836 | 903 | " aggfunc={\n",
|
837 |
| - " 'temperature': np.max,\n", |
838 |
| - " 'dose': np.sum,\n", |
| 904 | + " 'temperature': 'max',\n", |
| 905 | + " 'dose': 'sum',\n", |
839 | 906 | " },)"
|
840 | 907 | ]
|
841 | 908 | }
|
842 | 909 | ],
|
843 | 910 | "metadata": {
|
844 | 911 | "kernelspec": {
|
845 |
| - "display_name": "Python 3", |
| 912 | + "display_name": "Python 3 (ipykernel)", |
846 | 913 | "language": "python",
|
847 | 914 | "name": "python3"
|
848 | 915 | },
|
|
856 | 923 | "name": "python",
|
857 | 924 | "nbconvert_exporter": "python",
|
858 | 925 | "pygments_lexer": "ipython3",
|
859 |
| - "version": "3.7.6" |
| 926 | + "version": "3.12.0" |
860 | 927 | }
|
861 | 928 | },
|
862 | 929 | "nbformat": 4,
|
|
0 commit comments