|
19 | 19 | {
|
20 | 20 | "attachments": {},
|
21 | 21 | "cell_type": "markdown",
|
| 22 | + "id": "5831c1ac", |
22 | 23 | "metadata": {},
|
23 | 24 | "source": [
|
24 | 25 | "<div class=\"alert alert-block alert-warning\">\n",
|
|
38 | 39 | "3. Demonstrate powerful AI Functions for text processing and analysis\n",
|
39 | 40 | "\n",
|
40 | 41 | "**Prerequisites**: Ensure AI Functions are enabled on your deployment (AI Services > AI & ML Functions)."
|
41 |
| - ], |
42 |
| - "id": "5831c1ac" |
| 42 | + ] |
43 | 43 | },
|
44 | 44 | {
|
45 | 45 | "attachments": {},
|
46 | 46 | "cell_type": "markdown",
|
| 47 | + "id": "ea429156", |
47 | 48 | "metadata": {},
|
48 | 49 | "source": [
|
49 | 50 | "## Create some simple tables\n",
|
50 | 51 | "\n",
|
51 | 52 | "This setup establishes a basic relational structure to store some reviews for restaurants. Ensure you have selected a database and have CREATE permissions to create/delete tables."
|
52 |
| - ], |
53 |
| - "id": "ea429156" |
| 53 | + ] |
54 | 54 | },
|
55 | 55 | {
|
56 | 56 | "cell_type": "code",
|
57 | 57 | "execution_count": 1,
|
| 58 | + "id": "1f8ccd75", |
58 | 59 | "metadata": {},
|
59 | 60 | "outputs": [
|
60 | 61 | {
|
|
97 | 98 | " Summary TEXT,\n",
|
98 | 99 | " Text TEXT\n",
|
99 | 100 | ");"
|
100 |
| - ], |
101 |
| - "id": "1f8ccd75" |
| 101 | + ] |
102 | 102 | },
|
103 | 103 | {
|
104 | 104 | "attachments": {},
|
105 | 105 | "cell_type": "markdown",
|
| 106 | + "id": "6a2118dd", |
106 | 107 | "metadata": {},
|
107 | 108 | "source": [
|
108 | 109 | "## Install the required packages"
|
109 |
| - ], |
110 |
| - "id": "6a2118dd" |
| 110 | + ] |
111 | 111 | },
|
112 | 112 | {
|
113 | 113 | "cell_type": "code",
|
114 | 114 | "execution_count": 2,
|
| 115 | + "id": "40350277", |
115 | 116 | "metadata": {},
|
116 | 117 | "outputs": [
|
117 | 118 | {
|
|
143 | 144 | ],
|
144 | 145 | "source": [
|
145 | 146 | "!pip install kagglehub pandas"
|
146 |
| - ], |
147 |
| - "id": "40350277" |
| 147 | + ] |
148 | 148 | },
|
149 | 149 | {
|
150 | 150 | "attachments": {},
|
151 | 151 | "cell_type": "markdown",
|
| 152 | + "id": "97437a79", |
152 | 153 | "metadata": {},
|
153 | 154 | "source": [
|
154 | 155 | "## Download and Load Dataset"
|
155 |
| - ], |
156 |
| - "id": "97437a79" |
| 156 | + ] |
157 | 157 | },
|
158 | 158 | {
|
159 | 159 | "cell_type": "code",
|
160 | 160 | "execution_count": 3,
|
| 161 | + "id": "cf62cc7e", |
161 | 162 | "metadata": {},
|
162 | 163 | "outputs": [
|
163 | 164 | {
|
|
349 | 350 | "print(f\"Columns: {list(df.columns)}\")\n",
|
350 | 351 | "print(\"\\nFirst few rows:\")\n",
|
351 | 352 | "df.head()"
|
352 |
| - ], |
353 |
| - "id": "cf62cc7e" |
| 353 | + ] |
354 | 354 | },
|
355 | 355 | {
|
356 | 356 | "attachments": {},
|
357 | 357 | "cell_type": "markdown",
|
| 358 | + "id": "0c938c99", |
358 | 359 | "metadata": {},
|
359 | 360 | "source": [
|
360 | 361 | "## Load Data into SingleStore"
|
361 |
| - ], |
362 |
| - "id": "0c938c99" |
| 362 | + ] |
363 | 363 | },
|
364 | 364 | {
|
365 | 365 | "cell_type": "code",
|
366 | 366 | "execution_count": 4,
|
| 367 | + "id": "4d427d08", |
367 | 368 | "metadata": {},
|
368 | 369 | "outputs": [
|
369 | 370 | {
|
|
396 | 397 | ")\n",
|
397 | 398 | "\n",
|
398 | 399 | "print(\"Data loaded successfully!\")"
|
399 |
| - ], |
400 |
| - "id": "4d427d08" |
| 400 | + ] |
401 | 401 | },
|
402 | 402 | {
|
403 | 403 | "attachments": {},
|
404 | 404 | "cell_type": "markdown",
|
| 405 | + "id": "ee21f51b", |
405 | 406 | "metadata": {},
|
406 | 407 | "source": [
|
407 | 408 | " ## Verify Data Load"
|
408 |
| - ], |
409 |
| - "id": "ee21f51b" |
| 409 | + ] |
410 | 410 | },
|
411 | 411 | {
|
412 | 412 | "cell_type": "code",
|
413 | 413 | "execution_count": 5,
|
| 414 | + "id": "8423c269", |
414 | 415 | "metadata": {},
|
415 | 416 | "outputs": [
|
416 | 417 | {
|
|
458 | 459 | "%%sql\n",
|
459 | 460 | "-- Check the number of reviews loaded\n",
|
460 | 461 | "SELECT COUNT(*) as total_reviews FROM reviews;"
|
461 |
| - ], |
462 |
| - "id": "8423c269" |
| 462 | + ] |
463 | 463 | },
|
464 | 464 | {
|
465 | 465 | "attachments": {},
|
466 | 466 | "cell_type": "markdown",
|
| 467 | + "id": "d6c8e487", |
467 | 468 | "metadata": {},
|
468 | 469 | "source": [
|
469 | 470 | "## Sample Data Preview"
|
470 |
| - ], |
471 |
| - "id": "d6c8e487" |
| 471 | + ] |
472 | 472 | },
|
473 | 473 | {
|
474 | 474 | "cell_type": "code",
|
475 | 475 | "execution_count": 6,
|
| 476 | + "id": "ccefec53", |
476 | 477 | "metadata": {},
|
477 | 478 | "outputs": [
|
478 | 479 | {
|
|
602 | 603 | "SELECT Id, ProductId, Score, Summary, LEFT(Text, 100) as Review_Preview\n",
|
603 | 604 | "FROM reviews\n",
|
604 | 605 | "LIMIT 10;"
|
605 |
| - ], |
606 |
| - "id": "ccefec53" |
| 606 | + ] |
607 | 607 | },
|
608 | 608 | {
|
609 | 609 | "attachments": {},
|
610 | 610 | "cell_type": "markdown",
|
| 611 | + "id": "0bb3deb8", |
611 | 612 | "metadata": {},
|
612 | 613 | "source": [
|
613 | 614 | "## AI Functions Demonstrations\n",
|
614 | 615 | "\n",
|
615 | 616 | "Now let's explore the power of SingleStore AI Functions for text analysis and processing.\n",
|
616 | 617 | "Ensure that AI functions are enabled for the org and you are able to list the available AI functions"
|
617 |
| - ], |
618 |
| - "id": "0bb3deb8" |
| 618 | + ] |
619 | 619 | },
|
620 | 620 | {
|
621 | 621 | "cell_type": "code",
|
622 | 622 | "execution_count": 7,
|
| 623 | + "id": "bd293861", |
623 | 624 | "metadata": {},
|
624 | 625 | "outputs": [
|
625 | 626 | {
|
|
769 | 770 | "%%sql\n",
|
770 | 771 | "USE cluster;\n",
|
771 | 772 | "SHOW functions;"
|
772 |
| - ], |
773 |
| - "id": "bd293861" |
| 773 | + ] |
774 | 774 | },
|
775 | 775 | {
|
776 | 776 | "cell_type": "code",
|
777 | 777 | "execution_count": 8,
|
| 778 | + "id": "05d5d27a", |
778 | 779 | "metadata": {},
|
779 | 780 | "outputs": [
|
780 | 781 | {
|
|
824 | 825 | "SELECT cluster.AI_COMPLETE(\n",
|
825 | 826 | " 'What is SingleStore?'\n",
|
826 | 827 | ") AS completion;"
|
827 |
| - ], |
828 |
| - "id": "05d5d27a" |
| 828 | + ] |
829 | 829 | },
|
830 | 830 | {
|
831 | 831 | "cell_type": "code",
|
832 | 832 | "execution_count": 9,
|
| 833 | + "id": "9f842a0d", |
833 | 834 | "metadata": {},
|
834 | 835 | "outputs": [
|
835 | 836 | {
|
|
888 | 889 | "%%sql\n",
|
889 | 890 | "-- AI_SENTIMENT: Analyze sentiment of customer reviews for a specific product\n",
|
890 | 891 | "-- WHERE ProductId = <Your choice>\n",
|
891 |
| - "-- Remember to specific the datbase name. In this example 'temp' is the Database name\n", |
| 892 | + "-- Remember to specify the datbase name. In this example 'temp' is the Database name\n", |
892 | 893 | "SELECT\n",
|
893 | 894 | " Id,\n",
|
894 | 895 | " ProductId,\n",
|
|
898 | 899 | "FROM temp.reviews\n",
|
899 | 900 | "WHERE ProductId = 'B000NY8ODS'\n",
|
900 | 901 | "LIMIT 10;"
|
901 |
| - ], |
902 |
| - "id": "9f842a0d" |
| 902 | + ] |
903 | 903 | },
|
904 | 904 | {
|
905 | 905 | "cell_type": "code",
|
906 | 906 | "execution_count": 10,
|
| 907 | + "id": "56ff7a17", |
907 | 908 | "metadata": {},
|
908 | 909 | "outputs": [
|
909 | 910 | {
|
|
1015 | 1016 | " review_count,\n",
|
1016 | 1017 | " cluster.AI_SENTIMENT(combined_text) as overall_sentiment\n",
|
1017 | 1018 | "FROM grouped_reviews;"
|
1018 |
| - ], |
1019 |
| - "id": "56ff7a17" |
| 1019 | + ] |
1020 | 1020 | },
|
1021 | 1021 | {
|
1022 | 1022 | "cell_type": "code",
|
1023 | 1023 | "execution_count": 11,
|
| 1024 | + "id": "b9786b66", |
1024 | 1025 | "metadata": {},
|
1025 | 1026 | "outputs": [
|
1026 | 1027 | {
|
|
1122 | 1123 | " 15\n",
|
1123 | 1124 | " ) AS summary\n",
|
1124 | 1125 | "FROM long_reviews;"
|
1125 |
| - ], |
1126 |
| - "id": "b9786b66" |
| 1126 | + ] |
1127 | 1127 | },
|
1128 | 1128 | {
|
1129 | 1129 | "cell_type": "code",
|
1130 | 1130 | "execution_count": 12,
|
| 1131 | + "id": "4febc8e0", |
1131 | 1132 | "metadata": {},
|
1132 | 1133 | "outputs": [
|
1133 | 1134 | {
|
|
1263 | 1264 | " '[quality, price, shipping, taste]'\n",
|
1264 | 1265 | " ) AS classification\n",
|
1265 | 1266 | "FROM negative_reviews;"
|
1266 |
| - ], |
1267 |
| - "id": "4febc8e0" |
| 1267 | + ] |
1268 | 1268 | },
|
1269 | 1269 | {
|
1270 | 1270 | "cell_type": "code",
|
1271 | 1271 | "execution_count": 13,
|
| 1272 | + "id": "40f4cd14", |
1272 | 1273 | "metadata": {},
|
1273 | 1274 | "outputs": [
|
1274 | 1275 | {
|
|
1431 | 1432 | " 'Does this customer indicate they will buy this product again? Answer with yes, no, or unclear only'\n",
|
1432 | 1433 | " ) AS repeat_purchase_intent\n",
|
1433 | 1434 | "FROM positive_reviews;"
|
1434 |
| - ], |
1435 |
| - "id": "40f4cd14" |
| 1435 | + ] |
1436 | 1436 | },
|
1437 | 1437 | {
|
1438 | 1438 | "cell_type": "code",
|
1439 | 1439 | "execution_count": 14,
|
| 1440 | + "id": "a09f2d5b", |
1440 | 1441 | "metadata": {},
|
1441 | 1442 | "outputs": [
|
1442 | 1443 | {
|
|
1585 | 1586 | " 'Is this customer at high risk of not purchasing again? Answer with high, medium, or low only'\n",
|
1586 | 1587 | " ) AS churn_risk\n",
|
1587 | 1588 | "FROM low_rated_reviews;"
|
1588 |
| - ], |
1589 |
| - "id": "a09f2d5b" |
| 1589 | + ] |
1590 | 1590 | },
|
1591 | 1591 | {
|
1592 | 1592 | "cell_type": "code",
|
1593 | 1593 | "execution_count": 15,
|
| 1594 | + "id": "3d78f449", |
1594 | 1595 | "metadata": {},
|
1595 | 1596 | "outputs": [
|
1596 | 1597 | {
|
|
1685 | 1686 | " 'spanish'\n",
|
1686 | 1687 | " ) AS spanish_translation\n",
|
1687 | 1688 | "FROM translatable_reviews;"
|
1688 |
| - ], |
1689 |
| - "id": "3d78f449" |
| 1689 | + ] |
1690 | 1690 | },
|
1691 | 1691 | {
|
1692 | 1692 | "cell_type": "code",
|
1693 | 1693 | "execution_count": 16,
|
| 1694 | + "id": "082dc59a", |
1694 | 1695 | "metadata": {},
|
1695 | 1696 | "outputs": [
|
1696 | 1697 | {
|
|
1860 | 1861 | " cluster.AI_CLASSIFY(Text, '[quality, value, taste, packaging]') as category,\n",
|
1861 | 1862 | " cluster.AI_SUMMARIZE(Text, 'aifunctions_chat_default', 10) as brief_summary\n",
|
1862 | 1863 | "FROM product_reviews;"
|
1863 |
| - ], |
1864 |
| - "id": "082dc59a" |
| 1864 | + ] |
1865 | 1865 | },
|
1866 | 1866 | {
|
1867 | 1867 | "cell_type": "markdown",
|
|
0 commit comments