新增模型字段到CreateDatasetInput类，优化FineTuningService以支持模型参数，更新前端页面以允许用户选择和导出数据集，添加微调任务进度对话框和整体数据集导出功能。

239573049 · 239573049 · commit 81efd4b1d2ac · 2025-05-22T03:34:19.000+08:00
diff --git a/src/KoalaWiki/Dto/FineTuningDto.cs b/src/KoalaWiki/Dto/FineTuningDto.cs
@@ -35,6 +35,8 @@ public class CreateDatasetInput
     /// 提示词
     /// </summary>
     public string Prompt { get; set; } = string.Empty;
+    
+    public string Model { get; set; } = string.Empty;
 }
 
 /// <summary>
diff --git a/src/KoalaWiki/Services/FineTuningService.cs b/src/KoalaWiki/Services/FineTuningService.cs
@@ -1,5 +1,6 @@
 using System.Text;
 using System.Text.Json;
+using System.Text.RegularExpressions;
 using FastService;
 using KoalaWiki.Core.DataAccess;
 using KoalaWiki.Domains;
@@ -48,6 +49,7 @@ public async Task<TrainingDataset> CreateDatasetAsync(CreateDatasetInput input)
             Status = TrainingDatasetStatus.NotStarted,
             Name = input.Name,
             Endpoint = input.Endpoint,
+            Model = input.Model,
             ApiKey = input.ApiKey,
             Prompt = input.Prompt
         };
@@ -293,11 +295,37 @@ await context.Response.WriteAsync($"data: {JsonSerializer.Serialize(new
                 await context.Response.Body.FlushAsync();
             }
 
-            await koala.FineTuningTasks.Where(x => x.Id == dataset.Id)
+            var datasetContent = sb.ToString();
+
+            // 提取<data>标签中的内容
+            var regex = new Regex(@"<data>(.*?)</data>", RegexOptions.Singleline);
+
+            var match = regex.Match(datasetContent);
+            if (match.Success)
+            {
+                datasetContent = match.Groups[1].Value;
+            }
+
+            // 提取```json```标签中的内容
+            regex = new Regex(@"```json(.*?)```", RegexOptions.Singleline);
+            match = regex.Match(datasetContent);
+            if (match.Success)
+            {
+                datasetContent = match.Groups[1].Value;
+            }
+
+
+            await koala.FineTuningTasks.Where(x => x.Id == task.Id)
                 .ExecuteUpdateAsync(x => x.SetProperty(a => a.Status, FineTuningTaskStatus.Completed)
-                        .SetProperty(x => x.Dataset, sb.ToString())
+                        .SetProperty(x => x.Dataset, datasetContent)
+                        .SetProperty(x=>x.OriginalDataset,sb.ToString())
                         .SetProperty(a => a.CompletedAt, DateTime.Now),
                     context.RequestAborted);
+            
+            await koala.TrainingDatasets.Where(x => x.Id == dataset.Id)
+                .ExecuteUpdateAsync(x => x.SetProperty(a => a.Status, TrainingDatasetStatus.Completed)
+                        .SetProperty(a => a.UpdatedAt, DateTime.Now),
+                    context.RequestAborted);
 
             await context.Response.WriteAsync($"data: {JsonSerializer.Serialize(new
             {
diff --git a/web/app/admin/finetune/create/page.tsx b/web/app/admin/finetune/create/page.tsx
@@ -214,6 +214,9 @@ export default function CreateDatasetPage() {
             <Select
               placeholder="选择用于生成数据集的模型"
               loading={modelLoading}
+              // 支持搜索
+              showSearch
+              // 支持自己填写模型
               notFoundContent={modelLoading ? <Spin size="small" /> : (
                 models.length === 0 ? "请先填写API端点和密钥并刷新" : "没有找到模型"
               )}
@@ -238,7 +241,7 @@ Here is the Markdown content you need to process:
 {{markdown_content}}
 </markdown_content>
 
-Your task is to create 10-15 high-quality instruction-response pairs based on this Markdown document. These pairs will be used to train an AI model, so they should be diverse, comprehensive, and accurately reflect the content of the document.
+Your task is to create 15-20 high-quality instruction-response pairs based on this Markdown document. These pairs will be used to train an AI model, so they should be diverse, comprehensive, and accurately reflect the content of the document.
 
 When creating instructions:
 1. Use a variety of formats (questions, commands, requests, etc.)
@@ -254,8 +257,7 @@ When creating responses:
 
 Ensure that your dataset comprehensively covers all core content and knowledge structures in the document.
 
-Your output should be in the following JSON format:
-
+Your output should be in the following JSON format using <data> tags:
 <data>
 [
   {"instruction": "Instruction content", "input": "", "output": "Response content"},
diff --git a/web/app/admin/finetune/dataset/[id]/page.tsx b/web/app/admin/finetune/dataset/[id]/page.tsx

Original file line number	Diff line number	Diff line change
`@@ -35,6 +35,8 @@ public class CreateDatasetInput`
`35`	`35`	`/// 提示词`
`36`	`36`	`/// </summary>`
`37`	`37`	`public string Prompt { get; set; } = string.Empty;`
	`38`	`+`
	`39`	`+ public string Model { get; set; } = string.Empty;`
`38`	`40`	`}`
`39`	`41`
`40`	`42`	`/// <summary>`