Skip to content

Commit 17953b3

Browse files
author
xutianbing
committed
add TestUtil.h and TestUtil.cpp, moving from gserver/tests/ to testing/
1 parent 936b0ed commit 17953b3

File tree

2 files changed

+297
-0
lines changed

2 files changed

+297
-0
lines changed

paddle/testing/TestUtil.cpp

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "TestUtil.h"
16+
#include <gflags/gflags.h>
17+
#include "paddle/math/SparseMatrix.h"
18+
19+
DEFINE_int32(fixed_seq_length, 0, "Produce some sequence of fixed length");
20+
21+
namespace paddle {
22+
23+
std::string randStr(const int len) {
24+
std::string str =
25+
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
26+
std::string s = "";
27+
for (int i = 0; i < len; ++i) s += str[(rand() % 62)]; // NOLINT
28+
return s;
29+
}
30+
31+
MatrixPtr makeRandomSparseMatrix(size_t height,
32+
size_t width,
33+
bool withValue,
34+
bool useGpu,
35+
bool equalNnzPerSample) {
36+
std::vector<int64_t> ids(height);
37+
std::vector<int64_t> indices(height + 1);
38+
indices[0] = 0;
39+
40+
std::function<size_t()> randomer = [] { return uniformRandom(10); };
41+
if (equalNnzPerSample) {
42+
size_t n = 0;
43+
do {
44+
n = uniformRandom(10);
45+
} while (!n);
46+
randomer = [=] { return n; };
47+
}
48+
for (size_t i = 0; i < height; ++i) {
49+
indices[i + 1] = indices[i] + std::min(randomer(), width);
50+
ids[i] = i;
51+
}
52+
53+
if (!withValue) {
54+
std::vector<sparse_non_value_t> data;
55+
data.resize(indices[height] - indices[0]);
56+
for (size_t i = 0; i < data.size(); ++i) {
57+
data[i].col = uniformRandom(width);
58+
}
59+
auto mat = Matrix::createSparseMatrix(
60+
height, width, data.size(), NO_VALUE, SPARSE_CSR, false, useGpu);
61+
if (useGpu) {
62+
std::dynamic_pointer_cast<GpuSparseMatrix>(mat)->copyFrom(
63+
ids.data(), indices.data(), data.data(), HPPL_STREAM_DEFAULT);
64+
} else {
65+
std::dynamic_pointer_cast<CpuSparseMatrix>(mat)->copyFrom(
66+
ids.data(), indices.data(), data.data());
67+
}
68+
return mat;
69+
} else {
70+
std::vector<sparse_float_value_t> data;
71+
data.resize(indices[height] - indices[0]);
72+
for (size_t i = 0; i < data.size(); ++i) {
73+
data[i].col = uniformRandom(width);
74+
data[i].value = rand() / static_cast<float>(RAND_MAX); // NOLINT
75+
}
76+
auto mat = Matrix::createSparseMatrix(
77+
height, width, data.size(), FLOAT_VALUE, SPARSE_CSR, false, useGpu);
78+
if (useGpu) {
79+
std::dynamic_pointer_cast<GpuSparseMatrix>(mat)->copyFrom(
80+
ids.data(), indices.data(), data.data(), HPPL_STREAM_DEFAULT);
81+
} else {
82+
std::dynamic_pointer_cast<CpuSparseMatrix>(mat)->copyFrom(
83+
ids.data(), indices.data(), data.data());
84+
}
85+
return mat;
86+
}
87+
}
88+
89+
void generateSequenceStartPositions(size_t batchSize,
90+
IVectorPtr& sequenceStartPositions) {
91+
ICpuGpuVectorPtr gpuCpuVec;
92+
generateSequenceStartPositions(batchSize, gpuCpuVec);
93+
sequenceStartPositions = gpuCpuVec->getMutableVector(false);
94+
}
95+
96+
void generateSequenceStartPositions(size_t batchSize,
97+
ICpuGpuVectorPtr& sequenceStartPositions) {
98+
int numSeqs;
99+
if (FLAGS_fixed_seq_length != 0) {
100+
numSeqs = std::ceil((float)batchSize / (float)FLAGS_fixed_seq_length);
101+
} else {
102+
numSeqs = batchSize / 10 + 1;
103+
}
104+
sequenceStartPositions =
105+
ICpuGpuVector::create(numSeqs + 1, /* useGpu= */ false);
106+
int* buf = sequenceStartPositions->getMutableData(false);
107+
int64_t pos = 0;
108+
int len = FLAGS_fixed_seq_length;
109+
int maxLen = 2 * batchSize / numSeqs;
110+
for (int i = 0; i < numSeqs; ++i) {
111+
if (FLAGS_fixed_seq_length == 0) {
112+
len = uniformRandom(
113+
std::min<int64_t>(maxLen, batchSize - pos - numSeqs + i)) +
114+
1;
115+
}
116+
buf[i] = pos;
117+
pos += len;
118+
VLOG(1) << " len=" << len;
119+
}
120+
buf[numSeqs] = batchSize;
121+
}
122+
123+
void generateSubSequenceStartPositions(
124+
const ICpuGpuVectorPtr& sequenceStartPositions,
125+
ICpuGpuVectorPtr& subSequenceStartPositions) {
126+
int numSeqs = sequenceStartPositions->getSize() - 1;
127+
const int* buf = sequenceStartPositions->getData(false);
128+
int numOnes = 0;
129+
for (int i = 0; i < numSeqs; ++i) {
130+
if (buf[i + 1] - buf[i] == 1) {
131+
++numOnes;
132+
}
133+
}
134+
// each seq has two sub-seq except length 1
135+
int numSubSeqs = numSeqs * 2 - numOnes;
136+
subSequenceStartPositions =
137+
ICpuGpuVector::create(numSubSeqs + 1, /* useGpu= */ false);
138+
int* subBuf = subSequenceStartPositions->getMutableData(false);
139+
int j = 0;
140+
for (int i = 0; i < numSeqs; ++i) {
141+
if (buf[i + 1] - buf[i] == 1) {
142+
subBuf[j++] = buf[i];
143+
} else {
144+
int len = uniformRandom(buf[i + 1] - buf[i] - 1) + 1;
145+
subBuf[j++] = buf[i];
146+
subBuf[j++] = buf[i] + len;
147+
}
148+
}
149+
subBuf[j] = buf[numSeqs];
150+
}
151+
152+
void generateMDimSequenceData(const IVectorPtr& sequenceStartPositions,
153+
IVectorPtr& cpuSequenceDims) {
154+
/* generate sequences with 2 dims */
155+
int numSeqs = sequenceStartPositions->getSize() - 1;
156+
int numDims = 2;
157+
158+
cpuSequenceDims = IVector::create(numSeqs * numDims, /* useGpu= */ false);
159+
int* bufStarts = sequenceStartPositions->getData();
160+
int* bufDims = cpuSequenceDims->getData();
161+
162+
for (int i = 0; i < numSeqs; i++) {
163+
int len = bufStarts[i + 1] - bufStarts[i];
164+
/* get width and height randomly */
165+
std::vector<int> dimVec;
166+
for (int j = 0; j < len; j++) {
167+
if (len % (j + 1) == 0) {
168+
dimVec.push_back(1);
169+
}
170+
}
171+
int idx = rand() % dimVec.size(); // NOLINT use rand_r
172+
bufDims[i * numDims] = dimVec[idx];
173+
bufDims[i * numDims + 1] = len / dimVec[idx];
174+
}
175+
}
176+
177+
void generateMDimSequenceData(const ICpuGpuVectorPtr& sequenceStartPositions,
178+
IVectorPtr& cpuSequenceDims) {
179+
/* generate sequences with 2 dims */
180+
int numSeqs = sequenceStartPositions->getSize() - 1;
181+
int numDims = 2;
182+
183+
cpuSequenceDims = IVector::create(numSeqs * numDims, /* useGpu= */ false);
184+
const int* bufStarts = sequenceStartPositions->getData(false);
185+
int* bufDims = cpuSequenceDims->getData();
186+
187+
for (int i = 0; i < numSeqs; i++) {
188+
int len = bufStarts[i + 1] - bufStarts[i];
189+
/* get width and height randomly */
190+
std::vector<int> dimVec;
191+
for (int j = 0; j < len; j++) {
192+
if (len % (j + 1) == 0) {
193+
dimVec.push_back(1);
194+
}
195+
}
196+
int idx = rand() % dimVec.size(); // NOLINT use rand_r
197+
bufDims[i * numDims] = dimVec[idx];
198+
bufDims[i * numDims + 1] = len / dimVec[idx];
199+
}
200+
}
201+
202+
void checkMatrixEqual(const MatrixPtr& a, const MatrixPtr& b) {
203+
EXPECT_EQ(a->getWidth(), b->getWidth());
204+
EXPECT_EQ(a->getHeight(), b->getHeight());
205+
EXPECT_EQ(a->isTransposed(), b->isTransposed());
206+
for (size_t r = 0; r < a->getHeight(); ++r) {
207+
for (size_t c = 0; c < a->getWidth(); ++c) {
208+
EXPECT_FLOAT_EQ(a->getElement(r, c), b->getElement(r, c));
209+
}
210+
}
211+
}
212+
213+
void checkVectorEqual(const IVectorPtr& a, const IVectorPtr& b) {
214+
EXPECT_EQ(a->getSize(), b->getSize());
215+
for (size_t r = 0; r < a->getSize(); ++r) {
216+
EXPECT_FLOAT_EQ(a->get(r), b->get(r));
217+
}
218+
}
219+
} // namespace paddle

paddle/testing/TestUtil.h

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#pragma once
16+
17+
#include <gtest/gtest.h>
18+
#include "paddle/math/Matrix.h"
19+
20+
namespace paddle {
21+
22+
std::string randStr(const int len);
23+
24+
inline int uniformRandom(int n) { return n == 0 ? 0 : rand() % n; }
25+
26+
inline bool approximatelyEqual(float a, float b, float epsilon) {
27+
return fabs(a - b) <= ((fabs(a) < fabs(b) ? fabs(b) : fabs(a)) * epsilon);
28+
}
29+
30+
MatrixPtr makeRandomSparseMatrix(size_t height,
31+
size_t width,
32+
bool withValue,
33+
bool useGpu,
34+
bool equalNnzPerSample = false);
35+
36+
/**
37+
* @brief generate sequenceStartPositions for INPUT_SEQUENCE_DATA,
38+
* INPUT_HASSUB_SEQUENCE_DATA and INPUT_SEQUENCE_LABEL
39+
*
40+
* @param batchSize batchSize
41+
* sequenceStartPositions[out] generation output
42+
*/
43+
void generateSequenceStartPositions(size_t batchSize,
44+
IVectorPtr& sequenceStartPositions);
45+
46+
void generateSequenceStartPositions(size_t batchSize,
47+
ICpuGpuVectorPtr& sequenceStartPositions);
48+
49+
/**
50+
* @brief generate subSequenceStartPositions for INPUT_HASSUB_SEQUENCE_DATA
51+
* according to sequenceStartPositions
52+
*
53+
* @param sequenceStartPositions[in] input
54+
* subSequenceStartPositions[out] generation output
55+
*/
56+
void generateSubSequenceStartPositions(const IVectorPtr& sequenceStartPositions,
57+
IVectorPtr& subSequenceStartPositions);
58+
59+
void generateSubSequenceStartPositions(
60+
const ICpuGpuVectorPtr& sequenceStartPositions,
61+
ICpuGpuVectorPtr& subSequenceStartPositions);
62+
63+
/**
64+
* @brief generate cpuSequenceDims for INPUT_SEQUENCE_MDIM_DATA according to
65+
* sequenceStartPositions
66+
*
67+
* @param sequenceStartPositions[in] input
68+
* cpuSequenceDims[out] generation output
69+
*/
70+
void generateMDimSequenceData(const IVectorPtr& sequenceStartPositions,
71+
IVectorPtr& cpuSequenceDims);
72+
void generateMDimSequenceData(const ICpuGpuVectorPtr& sequenceStartPositions,
73+
IVectorPtr& cpuSequenceDims);
74+
75+
void checkMatrixEqual(const MatrixPtr& a, const MatrixPtr& b);
76+
77+
void checkVectorEqual(const IVectorPtr& a, const IVectorPtr& b);
78+
} // namespace paddle

0 commit comments

Comments
 (0)