From 92d1153b5a152a0ad4becae0c088976d5fb2b94f Mon Sep 17 00:00:00 2001 From: Himanshu Parihar <94682026+Pariharx7@users.noreply.github.com> Date: Sat, 4 Oct 2025 13:21:42 +0000 Subject: [PATCH 1/2] Field Description #1088 --- .gitignore | 1 + examples/product_recommendation/main.py | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 138f19ae..7b2ee3b2 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ __pycache__/ # Distribution / packaging .venv*/ dist/ +venv/ .DS_Store diff --git a/examples/product_recommendation/main.py b/examples/product_recommendation/main.py index 4c2b9123..3ee8a75e 100644 --- a/examples/product_recommendation/main.py +++ b/examples/product_recommendation/main.py @@ -94,8 +94,12 @@ class ProductTaxonomyInfo: - complementary_taxonomies: Think about when customers buy this product, what else they might need as complementary products. Put labels for these complentary products. """ - taxonomies: list[ProductTaxonomy] - complementary_taxonomies: list[ProductTaxonomy] + taxonomies: list[ProductTaxonomy] = dataclasses.field( + metadata={"description": "Taxonomies for the current product."} + ) + complementary_taxonomies: list[ProductTaxonomy] = dataclasses.field( + metadata={"description": "Think about when customers buy this product, what else they might need as complementary products. Put labels for these complentary products."} + ) @cocoindex.op.function(behavior_version=2) From 013b35b277ceb0c1eaea4e302e09b4bc4982fb74 Mon Sep 17 00:00:00 2001 From: Himanshu Parihar <94682026+Pariharx7@users.noreply.github.com> Date: Tue, 14 Oct 2025 09:08:21 +0000 Subject: [PATCH 2/2] feat: pydantic classes --- examples/product_recommendation/main.py | 69 +++++++++++++++---------- 1 file changed, 42 insertions(+), 27 deletions(-) diff --git a/examples/product_recommendation/main.py b/examples/product_recommendation/main.py index eb7c8f85..a3f6ce8d 100644 --- a/examples/product_recommendation/main.py +++ b/examples/product_recommendation/main.py @@ -2,11 +2,16 @@ This example shows how to extract relationships from Markdown documents and build a knowledge graph. """ -import dataclasses +# New Pydantic Imports +from pydantic import BaseModel, Field import datetime import cocoindex from jinja2 import Template +# NOTE: dataclasses import is no longer strictly needed but kept for ProductInfo +import dataclasses + + neo4j_conn_spec = cocoindex.add_auth_entry( "Neo4jConnection", cocoindex.targets.Neo4jConnection( @@ -21,6 +26,15 @@ GraphDbDeclaration = cocoindex.targets.Neo4jDeclaration conn_spec = neo4j_conn_spec +<<<<<<< HEAD +======= +# Use Kuzu +#  GraphDbSpec = cocoindex.targets.Kuzu +#  GraphDbConnection = cocoindex.targets.KuzuConnection +#  GraphDbDeclaration = cocoindex.targets.KuzuDeclaration +#  conn_spec = kuzu_conn_spec + +>>>>>>> 5ad0ff8 (feat: Pydantic fields) # Template for rendering product information as markdown to provide information to LLMs PRODUCT_TEMPLATE = """ @@ -39,55 +53,55 @@ - {{ bullet }} {% endfor %} - """ + """ @dataclasses.dataclass class ProductInfo: + """Kept as dataclass, as it's not the LLM extraction target.""" id: str title: str price: float detail: str -@dataclasses.dataclass -class ProductTaxonomy: +# --- CONVERTED TO PYDANTIC --- +class ProductTaxonomy(BaseModel): """ Taxonomy for the product. - - A taxonomy is a concise noun (or short noun phrase), based on its core functionality, without specific details such as branding, style, etc. - - Always use the most common words in US English. - - Use lowercase without punctuation, unless it's a proper noun or acronym. - - A product may have multiple taxonomies. Avoid large categories like "office supplies" or "electronics". Use specific ones, like "pen" or "printer". """ - - name: str + name: str = Field( + ..., + description=( + "A taxonomy is a concise noun (or short noun phrase), based on its core functionality, " + "without specific details such as branding, style, etc. Always use the most common words in US English. " + "Use lowercase without punctuation, unless it's a proper noun or acronym. " + "A product may have multiple taxonomies. Avoid large categories like 'office supplies' or 'electronics'. " + "Use specific ones, like 'pen' or 'printer'." + ), + ) -@dataclasses.dataclass -class ProductTaxonomyInfo: +# --- CONVERTED TO PYDANTIC AND FIELD DESCRIPTIONS ADDED --- +class ProductTaxonomyInfo(BaseModel): """ Taxonomy information for the product. - - Fields: - - taxonomies: Taxonomies for the current product. - - complementary_taxonomies: Think about when customers buy this product, what else they might need as complementary products. Put labels for these complentary products. """ - - taxonomies: list[ProductTaxonomy] = dataclasses.field( - metadata={"description": "Taxonomies for the current product."} + # NOTE: The explicit "Fields:" section in the docstring has been removed. + + taxonomies: list[ProductTaxonomy] = Field( + ..., + description="Taxonomies for the current product." ) - complementary_taxonomies: list[ProductTaxonomy] = dataclasses.field( - metadata={"description": "Think about when customers buy this product, what else they might need as complementary products. Put labels for these complentary products."} + complementary_taxonomies: list[ProductTaxonomy] = Field( + ..., + description="Think about when customers buy this product, what else they might need as complementary products. Put labels for these complentary products." ) @cocoindex.op.function(behavior_version=2) def extract_product_info(product: cocoindex.Json, filename: str) -> ProductInfo: - # Print markdown for LLM to extract the taxonomy and complimentary taxonomy + # Print  markdown for LLM to extract the taxonomy and complimentary taxonomy return ProductInfo( id=f"{filename.removesuffix('.json')}", title=product["title"], @@ -118,6 +132,7 @@ def store_product_flow( .transform(cocoindex.functions.ParseJson(), language="json") .transform(extract_product_info, filename=product["filename"]) ) + # output_type still points to the refactored class taxonomy = data["detail"].transform( cocoindex.functions.ExtractByLlm( llm_spec=cocoindex.LlmSpec( @@ -208,4 +223,4 @@ def store_product_flow( ), ), primary_key_fields=["id"], - ) + ) \ No newline at end of file