Skip to content

Commit 51e1ebe

Browse files
author
zhongzichao
authored
add paddle job resource support when using extensiontemplate (#944) (#946)
* add paddle job resource support when using extensiontemplate (#944)
1 parent f610d7e commit 51e1ebe

File tree

4 files changed

+989
-10
lines changed

4 files changed

+989
-10
lines changed

pkg/apiserver/controller/job/create.go

+98-4
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,27 @@ func validateJob(ctx *logger.RequestContext, request *CreateJobInfo) error {
104104
}
105105

106106
if len(request.ExtensionTemplate) != 0 {
107-
// extension template from user
108-
ctx.Logging().Infof("request ExtensionTemplate is not empty, pass validate members")
107+
// validate extension template from user
108+
if len(request.Members) == 0 {
109+
ctx.Logging().Infof("request ExtensionTemplate pass validate nil members")
110+
return nil
111+
}
112+
// todo validateMembers using these function
113+
// members not nil, continue validate
114+
if err := validateMembersRole(ctx, request); err != nil {
115+
ctx.Logging().Errorf("validate members role failed, err: %v", err)
116+
return err
117+
}
118+
// validate scheduleInfo in members
119+
if err := validateMembersScheduleInfo(ctx, request); err != nil {
120+
ctx.Logging().Errorf("validate members role failed, err: %v", err)
121+
return err
122+
}
123+
// validate resource in members
124+
if err := validateMembersResource(ctx, request); err != nil {
125+
ctx.Logging().Errorf("validate members role failed, err: %v", err)
126+
return err
127+
}
109128
} else {
110129
// validate members
111130
if err := validateJobMembers(ctx, request); err != nil {
@@ -116,6 +135,58 @@ func validateJob(ctx *logger.RequestContext, request *CreateJobInfo) error {
116135
return nil
117136
}
118137

138+
// validateScheduleInfo include
139+
func validateMembersScheduleInfo(ctx *logger.RequestContext, request *CreateJobInfo) error {
140+
var err error
141+
// validate queue
142+
for _, member := range request.Members {
143+
if err = validateMembersQueue(ctx, &member, request.SchedulingPolicy); err != nil {
144+
ctx.Logging().Errorf("Failed to check Members' Queue: %v", err)
145+
return err
146+
}
147+
// check members priority
148+
if err = checkPriority(&member.SchedulingPolicy, &request.SchedulingPolicy); err != nil {
149+
ctx.Logging().Errorf("Failed to check priority: %v", err)
150+
return err
151+
}
152+
}
153+
return nil
154+
}
155+
156+
func validateMembersResource(ctx *logger.RequestContext, request *CreateJobInfo) error {
157+
var err error
158+
sumResource := resources.EmptyResource()
159+
for index, member := range request.Members {
160+
member.Flavour, err = flavour.GetFlavourWithCheck(member.Flavour)
161+
if err != nil {
162+
log.Errorf("get flavour failed, err:%v", err)
163+
return err
164+
}
165+
request.Members[index].Flavour.ResourceInfo = member.Flavour.ResourceInfo
166+
memberRes, err := resources.NewResourceFromMap(member.Flavour.ResourceInfo.ToMap())
167+
if err != nil {
168+
ctx.Logging().Errorf("Failed to multiply replicas=%d and resourceInfo=%v, err: %v", member.Replicas, member.Flavour.ResourceInfo, err)
169+
ctx.ErrorCode = common.JobInvalidField
170+
return err
171+
}
172+
ctx.Logging().Debugf("member resource info %v", member.Flavour.ResourceInfo)
173+
if memberRes.CPU() == 0 || memberRes.Memory() == 0 {
174+
err = fmt.Errorf("flavour[%v] cpu or memory is empty", memberRes)
175+
ctx.Logging().Errorf("Failed to check flavour: %v", err)
176+
return err
177+
}
178+
memberRes.Multi(member.Replicas)
179+
sumResource.Add(memberRes)
180+
}
181+
// validate queue and total-member-resource
182+
if !sumResource.LessEqual(request.SchedulingPolicy.MaxResources) {
183+
errMsg := fmt.Sprintf("the flavour[%+v] is larger than queue's [%+v]", sumResource, request.SchedulingPolicy.MaxResources)
184+
ctx.Logging().Errorf(errMsg)
185+
return fmt.Errorf(errMsg)
186+
}
187+
return nil
188+
}
189+
119190
func validateCommonJobInfo(ctx *logger.RequestContext, requestCommonJobInfo *CommonJobInfo) error {
120191
// validate job id
121192
if requestCommonJobInfo.ID != "" {
@@ -146,6 +217,28 @@ func validateCommonJobInfo(ctx *logger.RequestContext, requestCommonJobInfo *Com
146217
return nil
147218
}
148219

220+
func validateMembersRole(ctx *logger.RequestContext, request *CreateJobInfo) error {
221+
log.Infof("validate job %s MembersRole", request.Name)
222+
frameworkRoles := getFrameworkRoles(request.Framework)
223+
for _, member := range request.Members {
224+
memberRole := schema.MemberRole(member.Role)
225+
_, find := frameworkRoles[memberRole]
226+
if !find {
227+
err := fmt.Errorf("the role[%s] for framework %s is not supported", member.Role, request.Framework)
228+
ctx.Logging().Errorf("Failed to check Members' role, err: %v", err)
229+
return err
230+
}
231+
frameworkRoles[memberRole] = frameworkRoles[memberRole] + member.Replicas
232+
}
233+
var err error
234+
request.Mode, err = checkMemberRole(request.Framework, frameworkRoles)
235+
if err != nil {
236+
ctx.Logging().Errorf("check member role for framework %s failed, err: %v", request.Framework, err)
237+
return err
238+
}
239+
return nil
240+
}
241+
149242
func validateJobMembers(ctx *logger.RequestContext, request *CreateJobInfo) error {
150243
if len(request.Members) == 0 {
151244
err := fmt.Errorf("request.Members is empty")
@@ -487,9 +580,10 @@ func buildJob(request *CreateJobInfo) (*model.Job, error) {
487580
var members []schema.Member
488581
var templateJson string
489582
var err error
490-
if len(request.ExtensionTemplate) == 0 {
583+
if len(request.Members) != 0 {
491584
members = buildMembers(request)
492-
} else {
585+
}
586+
if len(request.ExtensionTemplate) != 0 {
493587
templateJson, err = newExtensionTemplateJson(request.ExtensionTemplate)
494588
if err != nil {
495589
log.Errorf("parse extension template failed, err: %v", err)

0 commit comments

Comments
 (0)