diff --git a/fastdeploy/vision/ocr/ppocr/classifier.cc b/fastdeploy/vision/ocr/ppocr/classifier.cc index b7dcc502b1..396eb292d2 100644 --- a/fastdeploy/vision/ocr/ppocr/classifier.cc +++ b/fastdeploy/vision/ocr/ppocr/classifier.cc @@ -75,6 +75,22 @@ bool Classifier::Predict(const cv::Mat& img, int32_t* cls_label, return true; } +bool Classifier::Predict(const cv::Mat& img, vision::OCRResult* ocr_result) { + ocr_result->cls_labels.resize(1); + ocr_result->cls_scores.resize(1); + if (!Predict(img, &(ocr_result->cls_labels[0]), + &(ocr_result->cls_scores[0]))) { + return false; + } + return true; +} + +bool Classifier::BatchPredict(const std::vector& images, + vision::OCRResult* ocr_result) { + return BatchPredict(images, &(ocr_result->cls_labels), + &(ocr_result->cls_scores)); +} + bool Classifier::BatchPredict(const std::vector& images, std::vector* cls_labels, std::vector* cls_scores) { diff --git a/fastdeploy/vision/ocr/ppocr/classifier.h b/fastdeploy/vision/ocr/ppocr/classifier.h index 324da828b4..f6d2bd526f 100755 --- a/fastdeploy/vision/ocr/ppocr/classifier.h +++ b/fastdeploy/vision/ocr/ppocr/classifier.h @@ -42,7 +42,7 @@ class FASTDEPLOY_DECL Classifier : public FastDeployModel { Classifier(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE); - + /** \brief Clone a new Classifier with less memory usage when multiple instances of the same model are created * * \return new Classifier* type unique pointer @@ -61,7 +61,24 @@ class FASTDEPLOY_DECL Classifier : public FastDeployModel { */ virtual bool Predict(const cv::Mat& img, int32_t* cls_label, float* cls_score); - + + /** \brief Predict the input image and get OCR recognition model result. + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. + * \param[in] ocr_result The output of OCR recognition model result will be writen to this structure. + * \return true if the prediction is successed, otherwise false. + */ + virtual bool Predict(const cv::Mat& img, vision::OCRResult* ocr_result); + + /** \brief BatchPredict the input image and get OCR classification model result. + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. + * \param[in] ocr_result The output of OCR classification model result will be writen to this structure. + * \return true if the prediction is successed, otherwise false. + */ + virtual bool BatchPredict(const std::vector& images, + vision::OCRResult* ocr_result); + /** \brief BatchPredict the input image and get OCR classification model cls_result. * * \param[in] images The list of input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. diff --git a/fastdeploy/vision/ocr/ppocr/dbdetector.cc b/fastdeploy/vision/ocr/ppocr/dbdetector.cc index 914b952f28..14957e3791 100644 --- a/fastdeploy/vision/ocr/ppocr/dbdetector.cc +++ b/fastdeploy/vision/ocr/ppocr/dbdetector.cc @@ -72,6 +72,26 @@ bool DBDetector::Predict(const cv::Mat& img, return true; } +bool DBDetector::Predict(const cv::Mat& img, vision::OCRResult* ocr_result) { + if (!Predict(img, &(ocr_result->boxes))) { + return false; + } + return true; +} + +bool DBDetector::BatchPredict(const std::vector& images, + std::vector* ocr_results) { + std::vector>> det_results; + if (!BatchPredict(images, &det_results)) { + return false; + } + ocr_results->resize(det_results.size()); + for (int i = 0; i < det_results.size(); i++) { + (*ocr_results)[i].boxes = std::move(det_results[i]); + } + return true; +} + bool DBDetector::BatchPredict( const std::vector& images, std::vector>>* det_results) { diff --git a/fastdeploy/vision/ocr/ppocr/dbdetector.h b/fastdeploy/vision/ocr/ppocr/dbdetector.h index cab3a1d396..60c47016f3 100755 --- a/fastdeploy/vision/ocr/ppocr/dbdetector.h +++ b/fastdeploy/vision/ocr/ppocr/dbdetector.h @@ -62,6 +62,14 @@ class FASTDEPLOY_DECL DBDetector : public FastDeployModel { virtual bool Predict(const cv::Mat& img, std::vector>* boxes_result); + /** \brief Predict the input image and get OCR detection model result. + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. + * \param[in] ocr_result The output of OCR detection model result will be writen to this structure. + * \return true if the prediction is successed, otherwise false. + */ + virtual bool Predict(const cv::Mat& img, vision::OCRResult* ocr_result); + /** \brief BatchPredict the input image and get OCR detection model result. * * \param[in] images The list input of image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. @@ -71,6 +79,15 @@ class FASTDEPLOY_DECL DBDetector : public FastDeployModel { virtual bool BatchPredict(const std::vector& images, std::vector>>* det_results); + /** \brief BatchPredict the input image and get OCR detection model result. + * + * \param[in] images The list input of image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. + * \param[in] ocr_results The output of OCR detection model result will be writen to this structure. + * \return true if the prediction is successed, otherwise false. + */ + virtual bool BatchPredict(const std::vector& images, + std::vector* ocr_results); + /// Get preprocessor reference of DBDetectorPreprocessor virtual DBDetectorPreprocessor& GetPreprocessor() { return preprocessor_; diff --git a/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc b/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc index a1ebd09c6d..c888884514 100644 --- a/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc +++ b/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc @@ -117,19 +117,19 @@ void BindPPOCRModel(pybind11::module& m) { .def("predict", [](vision::ocr::DBDetector& self, pybind11::array& data) { auto mat = PyArrayToCvMat(data); - std::vector> boxes_result; - self.Predict(mat, &boxes_result); - return boxes_result; + vision::OCRResult ocr_result; + self.Predict(mat, &ocr_result); + return ocr_result; }) .def("batch_predict", [](vision::ocr::DBDetector& self, std::vector& data) { std::vector images; - std::vector>> det_results; for (size_t i = 0; i < data.size(); ++i) { images.push_back(PyArrayToCvMat(data[i])); } - self.BatchPredict(images, &det_results); - return det_results; + std::vector ocr_results; + self.BatchPredict(images, &ocr_results); + return ocr_results; }); // Classifier @@ -215,21 +215,19 @@ void BindPPOCRModel(pybind11::module& m) { .def("predict", [](vision::ocr::Classifier& self, pybind11::array& data) { auto mat = PyArrayToCvMat(data); - int32_t cls_label; - float cls_score; - self.Predict(mat, &cls_label, &cls_score); - return std::make_pair(cls_label, cls_score); + vision::OCRResult ocr_result; + self.Predict(mat, &ocr_result); + return ocr_result; }) .def("batch_predict", [](vision::ocr::Classifier& self, std::vector& data) { std::vector images; - std::vector cls_labels; - std::vector cls_scores; for (size_t i = 0; i < data.size(); ++i) { images.push_back(PyArrayToCvMat(data[i])); } - self.BatchPredict(images, &cls_labels, &cls_scores); - return std::make_pair(cls_labels, cls_scores); + vision::OCRResult ocr_result; + self.BatchPredict(images, &ocr_result); + return ocr_result; }); // Recognizer @@ -315,21 +313,19 @@ void BindPPOCRModel(pybind11::module& m) { .def("predict", [](vision::ocr::Recognizer& self, pybind11::array& data) { auto mat = PyArrayToCvMat(data); - std::string text; - float rec_score; - self.Predict(mat, &text, &rec_score); - return std::make_pair(text, rec_score); + vision::OCRResult ocr_result; + self.Predict(mat, &ocr_result); + return ocr_result; }) .def("batch_predict", [](vision::ocr::Recognizer& self, std::vector& data) { std::vector images; - std::vector texts; - std::vector rec_scores; for (size_t i = 0; i < data.size(); ++i) { images.push_back(PyArrayToCvMat(data[i])); } - self.BatchPredict(images, &texts, &rec_scores); - return std::make_pair(texts, rec_scores); + vision::OCRResult ocr_result; + self.BatchPredict(images, &ocr_result); + return ocr_result; }); } } // namespace fastdeploy diff --git a/fastdeploy/vision/ocr/ppocr/recognizer.cc b/fastdeploy/vision/ocr/ppocr/recognizer.cc index 74a8a26a16..8cd16e35f2 100644 --- a/fastdeploy/vision/ocr/ppocr/recognizer.cc +++ b/fastdeploy/vision/ocr/ppocr/recognizer.cc @@ -79,12 +79,26 @@ bool Recognizer::Predict(const cv::Mat& img, std::string* text, return true; } +bool Recognizer::Predict(const cv::Mat& img, vision::OCRResult* ocr_result) { + ocr_result->text.resize(1); + ocr_result->rec_scores.resize(1); + if (!Predict(img, &(ocr_result->text[0]), &(ocr_result->rec_scores[0]))) { + return false; + } + return true; +} + bool Recognizer::BatchPredict(const std::vector& images, std::vector* texts, std::vector* rec_scores) { return BatchPredict(images, texts, rec_scores, 0, images.size(), {}); } +bool Recognizer::BatchPredict(const std::vector& images, + vision::OCRResult* ocr_result) { + return BatchPredict(images, &(ocr_result->text), &(ocr_result->rec_scores)); +} + bool Recognizer::BatchPredict(const std::vector& images, std::vector* texts, std::vector* rec_scores, diff --git a/fastdeploy/vision/ocr/ppocr/recognizer.h b/fastdeploy/vision/ocr/ppocr/recognizer.h index 072c191297..5cafb6852c 100755 --- a/fastdeploy/vision/ocr/ppocr/recognizer.h +++ b/fastdeploy/vision/ocr/ppocr/recognizer.h @@ -44,7 +44,7 @@ class FASTDEPLOY_DECL Recognizer : public FastDeployModel { const std::string& label_path = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE); - + /// Get model's name std::string ModelName() const { return "ppocr/ocr_rec"; } @@ -63,6 +63,23 @@ class FASTDEPLOY_DECL Recognizer : public FastDeployModel { */ virtual bool Predict(const cv::Mat& img, std::string* text, float* rec_score); + /** \brief Predict the input image and get OCR recognition model result. + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. + * \param[in] ocr_result The output of OCR recognition model result will be writen to this structure. + * \return true if the prediction is successed, otherwise false. + */ + virtual bool Predict(const cv::Mat& img, vision::OCRResult* ocr_result); + + /** \brief BatchPredict the input image and get OCR recognition model result. + * + * \param[in] images The list of input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. + * \param[in] ocr_result The output of OCR recognition model result will be writen to this structure. + * \return true if the prediction is successed, otherwise false. + */ + virtual bool BatchPredict(const std::vector& images, + vision::OCRResult* ocr_result); + /** \brief BatchPredict the input image and get OCR recognition model result. * * \param[in] images The list of input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. @@ -72,7 +89,7 @@ class FASTDEPLOY_DECL Recognizer : public FastDeployModel { */ virtual bool BatchPredict(const std::vector& images, std::vector* texts, std::vector* rec_scores); - + virtual bool BatchPredict(const std::vector& images, std::vector* texts, std::vector* rec_scores, size_t start_index, size_t end_index,