PaddlePaddle · zxcd · Mar 21, 2025 · Mar 20, 2025 · Mar 20, 2025
diff --git a/audio/paddleaudio/compliance/kaldi.py b/audio/paddleaudio/compliance/kaldi.py
@@ -233,7 +233,7 @@ def spectrogram(waveform: Tensor,
         round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
             to FFT. Defaults to True.
         sr (int, optional): Sample rate of input waveform. Defaults to 16000.
-        snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it
+        snip_edges (bool, optional): Drop samples in the end of waveform that can't fit a signal frame when it
             is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
         subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
         window_type (str, optional): Choose type of window for FFT computation. Defaults to "povey".
@@ -443,7 +443,7 @@ def fbank(waveform: Tensor,
         round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
             to FFT. Defaults to True.
         sr (int, optional): Sample rate of input waveform. Defaults to 16000.
-        snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it
+        snip_edges (bool, optional): Drop samples in the end of waveform that can't fit a signal frame when it
             is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
         subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
         use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False.
@@ -566,7 +566,7 @@ def mfcc(waveform: Tensor,
         round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
             to FFT. Defaults to True.
         sr (int, optional): Sample rate of input waveform. Defaults to 16000.
-        snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it
+        snip_edges (bool, optional): Drop samples in the end of waveform that can't fit a signal frame when it
             is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
         subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
         use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False.

diff --git a/audio/paddleaudio/compliance/librosa.py b/audio/paddleaudio/compliance/librosa.py
@@ -527,7 +527,7 @@ def melspectrogram(x: np.ndarray,
     if fmax is None:
         fmax = sr // 2
     if fmin < 0 or fmin >= fmax:
-        raise ParameterError('fmin and fmax must statisfy 0<fmin<fmax')
+        raise ParameterError('fmin and fmax must satisfy 0<fmin<fmax')
 
     s = stft(
         x,

diff --git a/examples/aishell/asr3/cmd.sh b/examples/aishell/asr3/cmd.sh
@@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
 # "sbatch" (Slurm)
 elif [ "${cmd_backend}" = slurm ]; then
     # The default setting is written in conf/slurm.conf.
-    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
-    # To know the "partion" names, type "sinfo".
+    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
+    # To know the "partition" names, type "sinfo".
     # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
     # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".
 

diff --git a/examples/librispeech/asr1/cmd.sh b/examples/librispeech/asr1/cmd.sh
@@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
 # "sbatch" (Slurm)
 elif [ "${cmd_backend}" = slurm ]; then
     # The default setting is written in conf/slurm.conf.
-    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
-    # To know the "partion" names, type "sinfo".
+    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
+    # To know the "partition" names, type "sinfo".
     # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
     # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".
 

diff --git a/examples/librispeech/asr2/cmd.sh b/examples/librispeech/asr2/cmd.sh
@@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
 # "sbatch" (Slurm)
 elif [ "${cmd_backend}" = slurm ]; then
     # The default setting is written in conf/slurm.conf.
-    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
-    # To know the "partion" names, type "sinfo".
+    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
+    # To know the "partition" names, type "sinfo".
     # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
     # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".
 

diff --git a/examples/librispeech/asr3/cmd.sh b/examples/librispeech/asr3/cmd.sh
@@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
 # "sbatch" (Slurm)
 elif [ "${cmd_backend}" = slurm ]; then
     # The default setting is written in conf/slurm.conf.
-    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
-    # To know the "partion" names, type "sinfo".
+    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
+    # To know the "partition" names, type "sinfo".
     # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
     # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".
 

diff --git a/examples/librispeech/asr4/cmd.sh b/examples/librispeech/asr4/cmd.sh
@@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
 # "sbatch" (Slurm)
 elif [ "${cmd_backend}" = slurm ]; then
     # The default setting is written in conf/slurm.conf.
-    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
-    # To know the "partion" names, type "sinfo".
+    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
+    # To know the "partition" names, type "sinfo".
     # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
     # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".
 

diff --git a/examples/librispeech/asr5/cmd.sh b/examples/librispeech/asr5/cmd.sh
@@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
 # "sbatch" (Slurm)
 elif [ "${cmd_backend}" = slurm ]; then
     # The default setting is written in conf/slurm.conf.
-    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
-    # To know the "partion" names, type "sinfo".
+    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
+    # To know the "partition" names, type "sinfo".
     # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
     # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".
 

diff --git a/examples/mustc/st1/cmd.sh b/examples/mustc/st1/cmd.sh
@@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
 # "sbatch" (Slurm)
 elif [ "${cmd_backend}" = slurm ]; then
     # The default setting is written in conf/slurm.conf.
-    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
-    # To know the "partion" names, type "sinfo".
+    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
+    # To know the "partition" names, type "sinfo".
     # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
     # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".
 

diff --git a/examples/other/ngram_lm/s0/local/kenlm_score_test.py b/examples/other/ngram_lm/s0/local/kenlm_score_test.py
@@ -97,7 +97,7 @@ def test_full_scores_words():
         if w not in model:
             print('"{0}" is an OOV'.format(w))
             oov.append(w)
-    # zh_giga.no_cna_cmn.prune01244.klm is chinese charactor LM 
+    # zh_giga.no_cna_cmn.prune01244.klm is chinese character LM 
     assert oov == ["盘点", "不怕", "网站", "❗", "️", "海淘", "向来", "便宜", "保真",
                    "！"], 'error oov'
 

diff --git a/examples/ted_en_zh/st1/cmd.sh b/examples/ted_en_zh/st1/cmd.sh
@@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
 # "sbatch" (Slurm)
 elif [ "${cmd_backend}" = slurm ]; then
     # The default setting is written in conf/slurm.conf.
-    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
-    # To know the "partion" names, type "sinfo".
+    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
+    # To know the "partition" names, type "sinfo".
     # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
     # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".
 

diff --git a/paddlespeech/dataset/aidatatang_200zh/aidatatang_200zh.py b/paddlespeech/dataset/aidatatang_200zh/aidatatang_200zh.py
@@ -62,7 +62,7 @@ def create_manifest(data_dir, manifest_path_prefix):
         if line == '':
             continue
         audio_id, text = line.split(' ', 1)
-        # remove withespace, charactor text
+        # remove withespace, character text
         text = ''.join(text.split())
         transcript_dict[audio_id] = text
 

diff --git a/paddlespeech/dataset/aishell/aishell.py b/paddlespeech/dataset/aishell/aishell.py
@@ -65,7 +65,7 @@ def create_manifest(data_dir, manifest_path_prefix):
         if line == '':
             continue
         audio_id, text = line.split(' ', 1)
-        # remove withespace, charactor text
+        # remove withespace, character text
         text = ''.join(text.split())
         transcript_dict[audio_id] = text
 
@@ -159,7 +159,7 @@ def check_dataset(data_dir):
         if line == '':
             continue
         audio_id, text = line.split(' ', 1)
-        # remove withespace, charactor text
+        # remove withespace, character text
         text = ''.join(text.split())
         transcript_dict[audio_id] = text
 

diff --git a/paddlespeech/s2t/utils/error_rate.py b/paddlespeech/s2t/utils/error_rate.py
@@ -171,7 +171,7 @@ def wer(reference, hypothesis, ignore_case=False, delimiter=' '):
 
 
 def cer(reference, hypothesis, ignore_case=False, remove_space=False):
-    """Calculate charactor error rate (CER). CER compares reference text and
+    """Calculate character error rate (CER). CER compares reference text and
     hypothesis text in char-level. CER is defined as:
 
     .. math::

diff --git a/paddlespeech/t2s/frontend/zh_frontend.py b/paddlespeech/t2s/frontend/zh_frontend.py
@@ -523,7 +523,7 @@ def _g2p_assign(self,
         initials = []
         finals = []
 
-        # to charactor list
+        # to character list
         words = self._split_word_to_char(words[0])
 
         for pinyin, char in zip(pinyin_spec, words):

diff --git a/paddlespeech/t2s/utils/error_rate.py b/paddlespeech/t2s/utils/error_rate.py
@@ -159,7 +159,7 @@ def wer(reference, hypothesis, ignore_case=False, delimiter=' '):
 
 
 def cer(reference, hypothesis, ignore_case=False, remove_space=False):
-    """Calculate charactor error rate (CER). CER compares reference text and
+    """Calculate character error rate (CER). CER compares reference text and
     hypothesis text in char-level. CER is defined as:
     .. math::
         CER = (Sc + Dc + Ic) / Nc

diff --git a/runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder.cc b/runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder.cc
@@ -157,7 +157,7 @@ void CTCPrefixBeamSearch::AdvanceDecoding(
                     next_score.v_b = prefix_score.ViterbiScore() + prob;
                     next_score.times_b = prefix_score.Times();
 
-                    // Prefix not changed, copy the context from pefix
+                    // Prefix not changed, copy the context from prefix
                     if (context_graph_ && !next_score.has_context) {
                         next_score.CopyContext(prefix_score);
                         next_score.has_context = true;
@@ -183,7 +183,7 @@ void CTCPrefixBeamSearch::AdvanceDecoding(
                         }
                     }
 
-                    // Prefix not changed, copy the context from pefix
+                    // Prefix not changed, copy the context from prefix
                     if (context_graph_ && !next_score1.has_context) {
                         next_score1.CopyContext(prefix_score);
                         next_score1.has_context = true;

diff --git a/runtime/engine/common/frontend/cmvn.cc b/runtime/engine/common/frontend/cmvn.cc
@@ -72,7 +72,7 @@ bool CMVN::Read(std::vector<BaseFloat>* feats) {
         return false;
     }
 
-    // appply cmvn
+    // apply cmvn
     kaldi::Timer timer;
     Compute(feats);
     VLOG(1) << "CMVN::Read cost: " << timer.Elapsed() << " sec.";

diff --git a/runtime/engine/common/frontend/cmvn.h b/runtime/engine/common/frontend/cmvn.h
@@ -29,7 +29,7 @@ class CMVN : public FrontendInterface {
     // the length of feats = feature_row * feature_dim,
     // the Matrix is squashed into Vector
     virtual bool Read(std::vector<kaldi::BaseFloat>* feats);
-    // the dim_ is the feautre dim.
+    // the dim_ is the feature dim.
     virtual size_t Dim() const { return dim_; }
     virtual void SetFinished() { base_extractor_->SetFinished(); }
     virtual bool IsFinished() const { return base_extractor_->IsFinished(); }

diff --git a/runtime/engine/common/frontend/db_norm.h b/runtime/engine/common/frontend/db_norm.h
@@ -47,7 +47,7 @@ class DecibelNormalizer : public FrontendInterface {
         std::unique_ptr<FrontendInterface> base_extractor);
     virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& waves);
     virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* waves);
-    // noramlize audio, the dim is 1.
+    // normalize audio, the dim is 1.
     virtual size_t Dim() const { return dim_; }
     virtual void SetFinished() { base_extractor_->SetFinished(); }
     virtual bool IsFinished() const { return base_extractor_->IsFinished(); }

diff --git a/runtime/engine/common/matrix/kaldi-matrix.cc b/runtime/engine/common/matrix/kaldi-matrix.cc
@@ -244,8 +244,8 @@ void MatrixBase<Real>::SymAddMat2(const Real alpha,
   /// function will produce NaN in the output. This is a bug in the
   /// ATLAS library. To overcome this, the AddMatMat function, which calls
   /// cblas_Xgemm(...) rather than cblas_Xsyrk(...), is used in this special
-  /// sitation.
-  /// Wei Shi: Note this bug is observerd for single precision matrix
+  /// situation.
+  /// Wei Shi: Note this bug is observed for single precision matrix
   /// on a 64-bit machine
 #ifdef HAVE_ATLAS
   if (transA == kTrans && num_rows_ >= 56) {
@@ -683,7 +683,7 @@ empty.
   if (V_in == NULL) tmpV.Resize(1, this->num_cols_);  // work-space if V_in
 empty.
 
-  /// Impementation notes:
+  /// Implementation notes:
   /// Lapack works in column-order, therefore the dimensions of *this are
   /// swapped as well as the U and V matrices.
 
@@ -2378,7 +2378,7 @@ bool ReadHtk(std::istream &is, Matrix<Real> *M_ptr, HtkHeader *header_ptr)
   Matrix<Real> &M = *M_ptr;
   HtkHeader htk_hdr;
 
-  // TODO(arnab): this fails if the HTK file has CRC cheksum or is compressed.
+  // TODO(arnab): this fails if the HTK file has CRC checksum or is compressed.
   is.read((char*)&htk_hdr, sizeof(htk_hdr));  // we're being really POSIX here!
   if (is.fail()) {
     KALDI_WARN << "Could not read header from HTK feature file ";

diff --git a/runtime/engine/common/matrix/kaldi-vector.cc b/runtime/engine/common/matrix/kaldi-vector.cc
@@ -235,7 +235,7 @@ void VectorBase<Real>::CopyRowsFromMat(const MatrixBase<Real> &mat) {
         memcpy(inc_data, mat.Data(), cols * rows * sizeof(Real));
     } else {
         for (MatrixIndexT i = 0; i < rows; i++) {
-            // copy the data to the propper position
+            // copy the data to the proper position
             memcpy(inc_data, mat.RowData(i), cols * sizeof(Real));
             // set new copy position
             inc_data += cols;

diff --git a/runtime/engine/common/utils/file_utils.cc b/runtime/engine/common/utils/file_utils.cc
@@ -44,7 +44,7 @@ std::string ReadFile2String(const std::string& path) {
 }
 
 bool FileExists(const std::string& strFilename) { 
-    // this funciton if from:
+    // this function if from:
     // https://github.com/kaldi-asr/kaldi/blob/master/src/fstext/deterministic-fst-test.cc
     struct stat stFileInfo; 
     bool blnReturn; 

diff --git a/runtime/engine/kaldi/lat/kaldi-lattice.cc b/runtime/engine/kaldi/lat/kaldi-lattice.cc
@@ -407,7 +407,7 @@ bool WriteLattice(std::ostream &os, bool binary, const Lattice &t) {
     if (os.fail())
       KALDI_WARN << "Stream failure detected.";
     // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not somethig in the original
+    // detect this [this is a Kaldi mechanism, not something in the original
     // OpenFst code].
     os << '\n';
     return os.good();

diff --git a/runtime/examples/README.md b/runtime/examples/README.md
@@ -34,7 +34,7 @@ bash run.sh --stop_stage 4
 
 ## Display Model with [Netron](https://github.com/lutzroeder/netron)  
 
-If you have a model, we can using this commnd to show model graph.
+If you have a model, we can using this commend to show model graph.
 
 For example:
 ```

diff --git a/runtime/examples/audio_classification/README.md b/runtime/examples/audio_classification/README.md
@@ -74,7 +74,7 @@ includes/
 #### set path
 push resource into android phone
 
-1. change resource path in conf to gloabal path, such as:
+1. change resource path in conf to global path, such as:
 
     [CONF]
     wav_normal=true
@@ -92,9 +92,9 @@ push resource into android phone
     high_freq=14000
     dither=0.0
 2. adb push conf label_list scp test.wav /data/local/tmp/
-3. set reource path in android demo(android_demo/app/src/main/cpp/native-lib.cpp) to actual path, such as:
+3. set resource path in android demo(android_demo/app/src/main/cpp/native-lib.cpp) to actual path, such as:
 
 std::string conf_path = "/data/local/tmp/conf";
 std::string wav_path = "/data/local/tmp/test.wav";
 
-4. excecute android_demo in android studio
+4. execute android_demo in android studio
diff --git a/runtime/examples/text_lm/local/mmseg.py b/runtime/examples/text_lm/local/mmseg.py
@@ -253,7 +253,6 @@ def createChunks(self):
                             # print(word3.length, word3.text)
                             if word3.length == -1:
                                 chunk = Chunk(word1, word2)
-                                # print("Ture")
                             else:
                                 chunk = Chunk(word1, word2, word3)
                             chunks.append(chunk)

diff --git a/runtime/patch/openfst/src/include/fst/flags.h b/runtime/patch/openfst/src/include/fst/flags.h
@@ -181,8 +181,8 @@ template <typename T>
 class FlagRegisterer {
  public:
   FlagRegisterer(const string &name, const FlagDescription<T> &desc) {
-    auto registr = FlagRegister<T>::GetRegister();
-    registr->SetDescription(name, desc);
+    auto r = FlagRegister<T>::GetRegister();
+    r->SetDescription(name, desc);
   }
 
  private:

diff --git a/tests/test_tipc/conformer/scripts/aishell_tiny.py b/tests/test_tipc/conformer/scripts/aishell_tiny.py
@@ -62,7 +62,7 @@ def create_manifest(data_dir, manifest_path_prefix):
         if line == '':
             continue
         audio_id, text = line.split(' ', 1)
-        # remove withespace, charactor text
+        # remove withespace, character text
         text = ''.join(text.split())
         transcript_dict[audio_id] = text
 

diff --git a/tests/unit/cli/aishell_test_prepare.py b/tests/unit/cli/aishell_test_prepare.py
@@ -63,7 +63,7 @@ def create_manifest(data_dir, manifest_path_prefix):
         if line == '':
             continue
         audio_id, text = line.split(' ', 1)
-        # remove withespace, charactor text
+        # remove withespace, character text
         text = ''.join(text.split())
         transcript_dict[audio_id] = text
 

diff --git a/tests/unit/tts/test_snapshot.py b/tests/unit/tts/test_snapshot.py
@@ -30,7 +30,7 @@ def _test_snapshot():
     # use a simplest iterable object as dataloader
     dataloader = count()
 
-    # hack the training proecss: training does nothing except increse iteration
+    # hack the training proecss: training does nothing except increase iteration
     updater = StandardUpdater(model, optimizer, dataloader=dataloader)
     updater.update_core = lambda x: None
 

diff --git a/tools/extras/install_liblbfgs.sh b/tools/extras/install_liblbfgs.sh
@@ -17,7 +17,7 @@ cd liblbfgs-$VER
 ./configure --prefix=`pwd`
 make
 # due to the liblbfgs project directory structure, we have to use -i
-# but the erros are completely harmless
+# but the errors are completely harmless
 make -i install
 cd ..
 

diff --git a/utils/format_triplet_data.py b/utils/format_triplet_data.py
@@ -44,7 +44,7 @@
 # bpe
 add_arg('spm_model_prefix', str, None,
      "spm model prefix, spm_model_%(bpe_mode)_%(count_threshold), only need when `unit_type` is spm")
-add_arg('output_path',  str, None, "filepath of formated manifest.", required=True)
+add_arg('output_path',  str, None, "filepath of formatted manifest.", required=True)
 # yapf: disable
 args = parser.parse_args()
 

diff --git a/utils/tokenizer.perl b/utils/tokenizer.perl
@@ -79,7 +79,7 @@
         print "  -b     ... disable Perl buffering.\n";
         print "  -time  ... enable processing time calculation.\n";
         print "  -penn  ... use Penn treebank-like tokenization.\n";
-        print "  -protected FILE  ... specify file with patters to be protected in tokenisation.\n";
+        print "  -protected FILE  ... specify file with patterns to be protected in tokenisation.\n";
 	print "  -no-escape ... don't perform HTML escaping on apostrophy, quotes, etc.\n";
 	exit;
 }