I found a strange things when running code in xavier, the code as follow:
#include <iostream>
#include <chrono>
#include <vector>
class test{
public:
test(){
all_class_num_ = 7;
}
void test_func1(){
int rows = 800;
int cols = 768;
int size = rows * cols;
int all_channel = all_class_num_;
std::vector<float> classify_map1(size);
std::vector<float> classify_data1(size * all_channel);
double timer = getTime();
for (int row = 0; row < rows; ++row)
{
int idx = row * cols;
for (int col = 0; col < cols; ++col, ++idx)
{
float maxval = classify_data1[idx];
int index = 0;
for (int c = 1; c < all_channel; ++c)
{
float tmp_val = classify_data1[c * size + idx];
if (maxval < tmp_val)
{
maxval = tmp_val;
index = c;
}
}
classify_map1[idx] = index;
}
}
std::cout<<"1 COST: "<<(getTime() - timer) * 1000.<<" ms. "<<std::endl;
}
void test_func2(){
int rows = 800;
int cols = 800;
int size = rows * cols;
int all_channel = all_class_num_;
std::vector<float> classify_map1(size);
std::vector<float> classify_data1(size * all_channel);
double timer = getTime();
for (int row = 0; row < rows; ++row)
{
int idx = row * cols;
for (int col = 0; col < cols; ++col, ++idx)
{
float maxval = classify_data1[idx];
int index = 0;
for (int c = 1; c < all_channel; ++c)
{
float tmp_val = classify_data1[c * size + idx];
if (maxval < tmp_val)
{
maxval = tmp_val;
index = c;
}
}
classify_map1[idx] = index;
}
}
std::cout<<"2 COST: "<<(getTime() - timer) * 1000.<<" ms. "<<std::endl;
}
void test_func3(){
int rows = 800;
int cols = 768;
int size = rows * cols;
int all_channel = 7;
std::vector<float> classify_map1(size);
std::vector<float> classify_data1(size * all_channel);
double timer = getTime();
for (int row = 0; row < rows; ++row)
{
int idx = row * cols;
for (int col = 0; col < cols; ++col, ++idx)
{
float maxval = classify_data1[idx];
int index = 0;
for (int c = 1; c < all_channel; ++c)
{
float tmp_val = classify_data1[c * size + idx];
if (maxval < tmp_val)
{
maxval = tmp_val;
index = c;
}
}
classify_map1[idx] = index;
}
}
std::cout<<"3 COST: "<<(getTime() - timer) * 1000.<<" ms. "<<std::endl;
}
double getTime(void) {
const auto t = std::chrono::system_clock::now();
const auto t_sec = std::chrono::duration_cast<std::chrono::duration<double>>(t.time_since_epoch());
return t_sec.count();
}
private:
int all_class_num_;
};
int main(){
int n = 100;
test t1;
for (int i = 0; i < n; ++i) {
t1.test_func1();
t1.test_func2();
t1.test_func3();
}
}
for this code ,i found 1 cost nearly 30ms,and 2 cost just 6ms,and 3 cost 8ms. but they are just the same logit and nothing so different. I test in two more xavier and the result is the same.
I can’t answer, but it would be worth seeing if the first test simply takes longer. What happens if you change main()
as follows:
int main(){
int n = 100;
test t1;
// Run t1 test once and do nothing with it. Perhaps cache or other
// temporary changes matter in terms of performance.
t1.test_func1();
for (int i = 0; i < n; ++i) {
t1.test_func1();
t1.test_func2();
t1.test_func3();
// Run all tests a second time, see if func1() changes due
// to temporary effects.
t1.test_func1();
t1.test_func2();
t1.test_func3();
}
}
1 COST: 66.4811 ms.
1 COST: 50.1866 ms.
2 COST: 12.2271 ms.
3 COST: 12.8722 ms.
1 COST: 48.3041 ms.
2 COST: 12.5794 ms.
3 COST: 13.4814 ms.
1 COST: 53.2944 ms.
2 COST: 8.66556 ms.
3 COST: 6.001 ms.
1 COST: 53.5984 ms.
2 COST: 8.51417 ms.
3 COST: 5.98359 ms.
1 COST: 57.905 ms.
2 COST: 9.15194 ms.
3 COST: 6.89697 ms.
1 COST: 53.7579 ms.
2 COST: 8.74329 ms.
3 COST: 5.9824 ms.
1 COST: 52.9311 ms.
2 COST: 8.82268 ms.
3 COST: 6.04916 ms.
1 COST: 52.8085 ms.
2 COST: 8.6503 ms.
3 COST: 7.48086 ms.
1 COST: 53.8533 ms.
2 COST: 8.48484 ms.
3 COST: 6.26445 ms.
1 COST: 55.4714 ms.
2 COST: 8.40855 ms.
3 COST: 6.22916 ms.
1 COST: 53.1902 ms.
2 COST: 8.40759 ms.
3 COST: 6.02627 ms.
1 COST: 53.3967 ms.
2 COST: 8.41641 ms.
3 COST: 6.17576 ms.
1 COST: 52.5427 ms.
2 COST: 8.38852 ms.
3 COST: 5.92113 ms.
1 COST: 52.5582 ms.
2 COST: 8.44216 ms.
3 COST: 5.84698 ms.
1 COST: 52.8677 ms.
2 COST: 8.40569 ms.
3 COST: 5.90205 ms.
1 COST: 53.0884 ms.
2 COST: 9.12142 ms.
3 COST: 6.75368 ms.
1 COST: 52.8762 ms.
2 COST: 10.7553 ms.
3 COST: 5.8949 ms.
1 COST: 53.1464 ms.
2 COST: 9.59063 ms.
3 COST: 5.89299 ms.
1 COST: 52.6597 ms.
2 COST: 8.37326 ms.
3 COST: 5.74017 ms.
1 COST: 52.7501 ms.
2 COST: 8.37874 ms.
3 COST: 5.79929 ms.
1 COST: 52.9511 ms.
2 COST: 8.38208 ms.
3 COST: 5.85079 ms.
1 COST: 53.1948 ms.
2 COST: 8.47721 ms.
3 COST: 5.85604 ms.
1 COST: 52.5942 ms.
2 COST: 8.38614 ms.
3 COST: 7.2279 ms.
1 COST: 53.1187 ms.
2 COST: 8.41212 ms.
3 COST: 5.79214 ms.
1 COST: 52.7303 ms.
2 COST: 9.55915 ms.
3 COST: 7.23958 ms.
1 COST: 52.8436 ms.
2 COST: 9.49407 ms.
3 COST: 8.7235 ms.
1 COST: 52.8786 ms.
2 COST: 8.39949 ms.
3 COST: 5.74756 ms.
1 COST: 53.339 ms.
2 COST: 8.35443 ms.
3 COST: 5.89776 ms.
1 COST: 55.6509 ms.
2 COST: 8.42857 ms.
3 COST: 6.14238 ms.
1 COST: 53.2033 ms.
2 COST: 8.32772 ms.
3 COST: 5.85198 ms.
1 COST: 52.5994 ms.
2 COST: 8.42953 ms.
3 COST: 5.71299 ms.
1 COST: 52.6462 ms.
2 COST: 8.35061 ms.
3 COST: 5.86534 ms.
1 COST: 53.7517 ms.
2 COST: 9.62758 ms.
3 COST: 6.67119 ms.
1 COST: 53.0837 ms.
2 COST: 9.48787 ms.
3 COST: 5.84149 ms.
1 COST: 52.9647 ms.
2 COST: 8.43382 ms.
3 COST: 5.89776 ms.
1 COST: 53.1213 ms.
2 COST: 8.25548 ms.
3 COST: 5.68938 ms.
1 COST: 52.8872 ms.
2 COST: 8.37636 ms.
3 COST: 5.759 ms.
1 COST: 53.1888 ms.
2 COST: 8.27765 ms.
3 COST: 7.24173 ms.
1 COST: 52.8986 ms.
2 COST: 8.36778 ms.
3 COST: 5.80263 ms.
1 COST: 53.1559 ms.
2 COST: 8.30746 ms.
3 COST: 5.77164 ms.
1 COST: 52.438 ms.
2 COST: 9.54032 ms.
3 COST: 5.74946 ms.
1 COST: 52.3658 ms.
2 COST: 9.51409 ms.
3 COST: 5.88393 ms.
1 COST: 51.9323 ms.
2 COST: 8.39806 ms.
3 COST: 5.77474 ms.
1 COST: 51.9047 ms.
2 COST: 8.26001 ms.
3 COST: 5.6839 ms.
1 COST: 51.9321 ms.
2 COST: 8.28004 ms.
3 COST: 7.2422 ms.
1 COST: 51.899 ms.
2 COST: 8.32891 ms.
3 COST: 5.83553 ms.
1 COST: 51.8959 ms.
2 COST: 8.3065 ms.
3 COST: 5.68342 ms.
1 COST: 51.8646 ms.
2 COST: 8.28266 ms.
3 COST: 5.80072 ms.
1 COST: 51.8782 ms.
2 COST: 9.48906 ms.
3 COST: 5.75662 ms.
1 COST: 51.8916 ms.
2 COST: 9.56011 ms.
3 COST: 7.28297 ms.
1 COST: 51.8858 ms.
2 COST: 8.29315 ms.
3 COST: 5.78022 ms.
1 COST: 52.279 ms.
2 COST: 8.30722 ms.
3 COST: 5.71036 ms.
1 COST: 51.9407 ms.
2 COST: 8.28338 ms.
3 COST: 5.75376 ms.
1 COST: 51.9457 ms.
2 COST: 8.27622 ms.
3 COST: 5.67579 ms.
1 COST: 52.3186 ms.
2 COST: 8.30865 ms.
3 COST: 6.65617 ms.
1 COST: 52.2528 ms.
2 COST: 8.33035 ms.
3 COST: 5.8527 ms.
1 COST: 52.0058 ms.
2 COST: 9.57942 ms.
3 COST: 5.85175 ms.
1 COST: 51.8904 ms.
2 COST: 9.50265 ms.
3 COST: 5.78284 ms.
1 COST: 52.3548 ms.
2 COST: 8.31699 ms.
3 COST: 5.82933 ms.
1 COST: 52.5563 ms.
2 COST: 8.3158 ms.
3 COST: 6.04534 ms.
1 COST: 51.9121 ms.
2 COST: 8.32176 ms.
3 COST: 5.76496 ms.
1 COST: 52.4931 ms.
2 COST: 8.33321 ms.
3 COST: 5.84412 ms.
1 COST: 52.4418 ms.
2 COST: 8.33488 ms.
3 COST: 5.8105 ms.
1 COST: 52.1014 ms.
2 COST: 8.32391 ms.
3 COST: 7.89189 ms.
1 COST: 52.2532 ms.
2 COST: 8.32272 ms.
3 COST: 5.73778 ms.
1 COST: 54.4202 ms.
2 COST: 9.57441 ms.
3 COST: 5.83339 ms.
1 COST: 51.8193 ms.
2 COST: 9.51672 ms.
3 COST: 7.24316 ms.
1 COST: 51.8064 ms.
2 COST: 8.3437 ms.
3 COST: 5.85055 ms.
1 COST: 51.8951 ms.
2 COST: 8.31938 ms.
3 COST: 5.75209 ms.
1 COST: 52.2833 ms.
2 COST: 8.2581 ms.
3 COST: 5.69248 ms.
1 COST: 51.816 ms.
2 COST: 8.27169 ms.
3 COST: 5.71036 ms.
1 COST: 51.8861 ms.
2 COST: 8.28362 ms.
3 COST: 5.69344 ms.
1 COST: 51.8365 ms.
2 COST: 8.24976 ms.
3 COST: 5.62072 ms.
1 COST: 52.2089 ms.
2 COST: 9.45187 ms.
3 COST: 5.71132 ms.
1 COST: 51.9099 ms.
2 COST: 9.48572 ms.
3 COST: 5.78308 ms.
1 COST: 52.0639 ms.
2 COST: 8.30317 ms.
3 COST: 5.70703 ms.
1 COST: 51.8997 ms.
2 COST: 8.31723 ms.
3 COST: 5.73754 ms.
1 COST: 51.5256 ms.
2 COST: 8.32963 ms.
3 COST: 7.22718 ms.
1 COST: 51.856 ms.
2 COST: 8.32605 ms.
3 COST: 7.18951 ms.
1 COST: 52.078 ms.
2 COST: 8.30936 ms.
3 COST: 5.80311 ms.
1 COST: 54.4484 ms.
2 COST: 8.322 ms.
3 COST: 5.79906 ms.
1 COST: 51.8372 ms.
2 COST: 9.47237 ms.
3 COST: 5.86414 ms.
1 COST: 52.1023 ms.
2 COST: 9.5129 ms.
3 COST: 5.84126 ms.
1 COST: 52.31 ms.
2 COST: 8.29911 ms.
3 COST: 5.81884 ms.
1 COST: 52.0194 ms.
2 COST: 8.32939 ms.
3 COST: 6.57415 ms.
1 COST: 53.3748 ms.
2 COST: 8.34322 ms.
3 COST: 5.82933 ms.
1 COST: 52.0277 ms.
2 COST: 8.32152 ms.
3 COST: 5.71465 ms.
1 COST: 54.5907 ms.
2 COST: 8.28052 ms.
3 COST: 5.93495 ms.
1 COST: 51.8346 ms.
2 COST: 8.30984 ms.
3 COST: 6.47449 ms.
1 COST: 51.95 ms.
2 COST: 9.32479 ms.
3 COST: 5.78594 ms.
1 COST: 52.1998 ms.
2 COST: 9.46569 ms.
3 COST: 5.80549 ms.
1 COST: 52.0458 ms.
2 COST: 8.33154 ms.
3 COST: 5.76806 ms.
1 COST: 52.3317 ms.
2 COST: 8.32295 ms.
3 COST: 5.73969 ms.
1 COST: 52.0184 ms.
2 COST: 8.33535 ms.
3 COST: 5.87177 ms.
1 COST: 51.8813 ms.
2 COST: 8.31628 ms.
3 COST: 5.79739 ms.
1 COST: 51.9943 ms.
2 COST: 9.40084 ms.
3 COST: 5.75161 ms.
1 COST: 51.8513 ms.
2 COST: 8.32152 ms.
3 COST: 5.77188 ms.
1 COST: 52.0039 ms.
2 COST: 8.30579 ms.
3 COST: 5.79691 ms.
1 COST: 52.285 ms.
2 COST: 8.31175 ms.
3 COST: 5.81026 ms.
1 COST: 52.0294 ms.
2 COST: 8.30579 ms.
3 COST: 5.78475 ms.
1 COST: 51.7836 ms.
2 COST: 8.28218 ms.
3 COST: 7.09414 ms.
1 COST: 52.0051 ms.
2 COST: 8.34322 ms.
3 COST: 7.43747 ms.
1 COST: 52.1696 ms.
2 COST: 8.358 ms.
3 COST: 5.83959 ms.
1 COST: 52.0515 ms.
2 COST: 8.35586 ms.
3 COST: 5.81908 ms.
1 COST: 51.9576 ms.
2 COST: 8.29482 ms.
3 COST: 5.69129 ms.
1 COST: 51.9178 ms.
2 COST: 8.26836 ms.
3 COST: 5.74446 ms.
1 COST: 51.9662 ms.
2 COST: 8.27837 ms.
3 COST: 5.69105 ms.
1 COST: 51.8963 ms.
2 COST: 9.31072 ms.
3 COST: 5.73277 ms.
1 COST: 52.0017 ms.
2 COST: 8.28934 ms.
3 COST: 5.71227 ms.
1 COST: 52.0656 ms.
2 COST: 8.33654 ms.
3 COST: 5.85842 ms.
1 COST: 54.755 ms.
2 COST: 8.33917 ms.
3 COST: 6.63614 ms.
1 COST: 52.1076 ms.
2 COST: 8.31485 ms.
3 COST: 5.80287 ms.
1 COST: 52.0229 ms.
2 COST: 8.34417 ms.
3 COST: 5.83029 ms.
1 COST: 51.8756 ms.
2 COST: 8.31509 ms.
3 COST: 5.97906 ms.
1 COST: 52.0275 ms.
2 COST: 8.31175 ms.
3 COST: 5.88369 ms.
1 COST: 51.7797 ms.
2 COST: 8.31342 ms.
3 COST: 6.03294 ms.
1 COST: 51.9655 ms.
2 COST: 8.30173 ms.
3 COST: 5.89156 ms.
1 COST: 51.9745 ms.
2 COST: 8.31628 ms.
3 COST: 5.7981 ms.
1 COST: 52.0422 ms.
2 COST: 8.35037 ms.
3 COST: 5.85604 ms.
1 COST: 51.5277 ms.
2 COST: 8.32462 ms.
3 COST: 5.82457 ms.
1 COST: 52.7437 ms.
2 COST: 8.31604 ms.
3 COST: 5.83172 ms.
1 COST: 52.0449 ms.
2 COST: 8.27432 ms.
3 COST: 5.7323 ms.
1 COST: 52.0289 ms.
2 COST: 8.26502 ms.
3 COST: 7.09486 ms.
1 COST: 52.104 ms.
2 COST: 8.32748 ms.
3 COST: 5.79023 ms.
1 COST: 52.074 ms.
2 COST: 8.33273 ms.
3 COST: 5.76758 ms.
1 COST: 54.2047 ms.
2 COST: 8.28576 ms.
3 COST: 7.22432 ms.
1 COST: 51.9505 ms.
2 COST: 8.3189 ms.
3 COST: 5.83053 ms.
1 COST: 51.8956 ms.
2 COST: 8.29148 ms.
3 COST: 5.74851 ms.
1 COST: 51.9142 ms.
2 COST: 8.29458 ms.
3 COST: 5.66792 ms.
1 COST: 51.847 ms.
2 COST: 8.28338 ms.
3 COST: 5.7025 ms.
1 COST: 51.8296 ms.
2 COST: 8.27718 ms.
3 COST: 5.69272 ms.
1 COST: 51.9016 ms.
2 COST: 8.27479 ms.
3 COST: 5.65386 ms.
1 COST: 51.9185 ms.
2 COST: 8.26335 ms.
3 COST: 5.77188 ms.
1 COST: 51.9371 ms.
2 COST: 8.28075 ms.
3 COST: 5.71346 ms.
1 COST: 51.9831 ms.
2 COST: 8.25953 ms.
3 COST: 5.7137 ms.
1 COST: 52.0558 ms.
2 COST: 8.28886 ms.
3 COST: 7.19762 ms.
1 COST: 52.053 ms.
2 COST: 8.31509 ms.
3 COST: 5.83529 ms.
1 COST: 54.1608 ms.
2 COST: 8.41403 ms.
3 COST: 5.72491 ms.
1 COST: 52.0711 ms.
2 COST: 8.2922 ms.
3 COST: 5.70416 ms.
1 COST: 52.2485 ms.
2 COST: 9.48596 ms.
3 COST: 6.24967 ms.
1 COST: 52.4774 ms.
2 COST: 9.56631 ms.
3 COST: 5.98931 ms.
1 COST: 52.0403 ms.
2 COST: 8.31556 ms.
3 COST: 5.80239 ms.
1 COST: 56.8178 ms.
2 COST: 8.35848 ms.
3 COST: 5.71895 ms.
1 COST: 53.1688 ms.
2 COST: 8.63695 ms.
3 COST: 6.94704 ms.
1 COST: 55.8786 ms.
2 COST: 8.27575 ms.
3 COST: 5.78189 ms.
1 COST: 52.0422 ms.
2 COST: 8.24714 ms.
3 COST: 5.7025 ms.
1 COST: 54.0986 ms.
2 COST: 8.34036 ms.
3 COST: 5.74303 ms.
1 COST: 56.3061 ms.
2 COST: 9.27758 ms.
3 COST: 7.37405 ms.
1 COST: 54.4386 ms.
2 COST: 9.94325 ms.
3 COST: 5.76735 ms.
1 COST: 52.0685 ms.
2 COST: 8.32248 ms.
3 COST: 5.87821 ms.
1 COST: 52.305 ms.
2 COST: 8.64911 ms.
3 COST: 6.58703 ms.
1 COST: 52.5908 ms.
2 COST: 8.26693 ms.
3 COST: 5.75542 ms.
1 COST: 55.2542 ms.
2 COST: 8.92997 ms.
3 COST: 6.60276 ms.
1 COST: 51.9495 ms.
2 COST: 8.32009 ms.
3 COST: 5.84459 ms.
1 COST: 52.4678 ms.
2 COST: 8.33082 ms.
3 COST: 5.80215 ms.
1 COST: 51.9686 ms.
2 COST: 9.5799 ms.
3 COST: 5.75805 ms.
1 COST: 52.12 ms.
2 COST: 8.92663 ms.
3 COST: 5.67055 ms.
1 COST: 51.9292 ms.
2 COST: 8.32701 ms.
3 COST: 5.82504 ms.
1 COST: 52.5486 ms.
2 COST: 8.35085 ms.
3 COST: 5.72705 ms.
1 COST: 51.9519 ms.
2 COST: 8.322 ms.
3 COST: 5.82147 ms.
1 COST: 52.4158 ms.
2 COST: 8.32748 ms.
3 COST: 5.75376 ms.
1 COST: 51.9948 ms.
2 COST: 8.33321 ms.
3 COST: 6.32024 ms.
1 COST: 52.2554 ms.
2 COST: 8.8737 ms.
3 COST: 5.77569 ms.
1 COST: 51.9464 ms.
2 COST: 9.60064 ms.
3 COST: 5.74136 ms.
1 COST: 51.9869 ms.
2 COST: 8.31962 ms.
3 COST: 5.86367 ms.
1 COST: 52.4952 ms.
2 COST: 8.65221 ms.
3 COST: 6.89483 ms.
1 COST: 52.0208 ms.
2 COST: 8.27765 ms.
3 COST: 5.67865 ms.
1 COST: 52.3565 ms.
2 COST: 8.31294 ms.
3 COST: 5.77712 ms.
1 COST: 52.0391 ms.
2 COST: 8.32009 ms.
3 COST: 5.83792 ms.
1 COST: 54.3962 ms.
2 COST: 8.33464 ms.
3 COST: 5.85914 ms.
1 COST: 52.0165 ms.
2 COST: 8.82578 ms.
3 COST: 5.72801 ms.
1 COST: 51.8732 ms.
2 COST: 9.57775 ms.
3 COST: 7.00831 ms.
1 COST: 53.0245 ms.
2 COST: 8.7359 ms.
3 COST: 6.5732 ms.
1 COST: 52.9208 ms.
2 COST: 8.31461 ms.
3 COST: 5.8341 ms.
1 COST: 54.2641 ms.
2 COST: 8.32272 ms.
3 COST: 5.79882 ms.
1 COST: 56.5708 ms.
2 COST: 8.3344 ms.
3 COST: 5.86247 ms.
1 COST: 54.1701 ms.
2 COST: 8.33082 ms.
3 COST: 5.72252 ms.
1 COST: 52.0413 ms.
2 COST: 8.34394 ms.
3 COST: 5.77855 ms.
1 COST: 52.0153 ms.
2 COST: 9.73535 ms.
3 COST: 8.71038 ms.
1 COST: 52.2449 ms.
2 COST: 9.45902 ms.
3 COST: 5.77641 ms.
1 COST: 54.6386 ms.
2 COST: 8.32033 ms.
3 COST: 5.68533 ms.
1 COST: 52.1328 ms.
2 COST: 8.27456 ms.
3 COST: 5.80239 ms.
1 COST: 52.027 ms.
2 COST: 8.30221 ms.
3 COST: 5.6808 ms.
1 COST: 52.0282 ms.
2 COST: 8.26073 ms.
3 COST: 5.80144 ms.
1 COST: 51.9633 ms.
2 COST: 8.27432 ms.
3 COST: 5.68151 ms.
1 COST: 53.0481 ms.
2 COST: 8.27241 ms.
3 COST: 5.73587 ms.
1 COST: 51.9454 ms.
2 COST: 9.46617 ms.
3 COST: 5.75137 ms.
1 COST: 51.9414 ms.
2 COST: 9.552 ms.
3 COST: 5.83959 ms.
1 COST: 51.8746 ms.
2 COST: 8.30531 ms.
3 COST: 5.75733 ms.
1 COST: 51.868 ms.
2 COST: 8.255 ms.
3 COST: 5.66888 ms.
1 COST: 51.8878 ms.
2 COST: 8.25858 ms.
3 COST: 6.84857 ms.
1 COST: 51.8451 ms.
2 COST: 8.30936 ms.
3 COST: 5.74327 ms.
1 COST: 53.9057 ms.
2 COST: 8.42357 ms.
3 COST: 6.93774 ms.
1 COST: 57.7743 ms.
2 COST: 8.28052 ms.
3 COST: 5.73635 ms.
1 COST: 54.5475 ms.
2 COST: 8.27694 ms.
3 COST: 5.66268 ms.
1 COST: 54.2135 ms.
2 COST: 9.44233 ms.
3 COST: 7.47967 ms.
1 COST: 52.002 ms.
2 COST: 9.53007 ms.
3 COST: 5.76997 ms.
1 COST: 52.1691 ms.
2 COST: 8.72374 ms.
3 COST: 6.84357 ms.
1 COST: 53.8597 ms.
2 COST: 8.28195 ms.
3 COST: 6.93583 ms.
1 COST: 54.0051 ms.
2 COST: 8.63576 ms.
3 COST: 6.94346 ms.
I test it as the suggestion, but the rule seems not change, the test_func1 cost much more time.
Bibek
December 2, 2020, 10:34am
7
it seems to due to cache effect.
Can you run the same logic as a c code?
768 col size only has the problem and may be some other sizes little far away
can you convert these into C
std::vector classify_map1(size);
std::vector classify_data1(size * all_channel);
um… I am really not good at writing pure c code, and follow is my code. I am not sure the code is write.
#include <iostream>
//#include <chrono>
#include <vector>
#include <cstdlib>
#ifdef __cplusplus
extern "C" {
#endif
class test {
public:
test() {
all_class_num_ = 7;
}
void test_func1() {
int rows = 800;
int cols = 768;
int size = rows * cols;
int all_channel = all_class_num_;
clock_t start_t, end_t;
// std::vector<float> classify_map1(size);
// std::vector<float> classify_data1(size * all_channel);
float *classify_map1 = (float *) malloc(size * sizeof(float));
float *classify_data1 = (float *) malloc(size * all_channel * sizeof(float));
// double timer = getTime();
start_t = clock();
for (int row = 0; row < rows; ++row) {
int idx = row * cols;
for (int col = 0; col < cols; ++col, ++idx) {
float maxval = classify_data1[idx];
int index = 0;
for (int c = 1; c < all_channel; ++c) {
float tmp_val = classify_data1[c * size + idx];
if (maxval < tmp_val) {
maxval = tmp_val;
index = c;
}
}
classify_map1[idx] = index;
}
}
end_t = clock();
double total_t = (double) (end_t - start_t) / CLOCKS_PER_SEC;
std::cout << "1 COST: " << (total_t) * 1000. << " ms. " << std::endl;
free(classify_data1);
free(classify_map1);
// std::cout<<"1 COST: "<<(getTime() - timer) * 1000.<<" ms. "<<std::endl;
}
void test_func2() {
int rows = 800;
int cols = 800;
int size = rows * cols;
int all_channel = all_class_num_;
clock_t start_t, end_t;
//std::vector<float> classify_map1(size);
//std::vector<float> classify_data1(size * all_channel);
float *classify_map1 = (float *) malloc(size * sizeof(float));
float *classify_data1 = (float *) malloc(size * all_channel * sizeof(float));
// double timer = getTime();
start_t = clock();
for (int row = 0; row < rows; ++row) {
int idx = row * cols;
for (int col = 0; col < cols; ++col, ++idx) {
float maxval = classify_data1[idx];
int index = 0;
for (int c = 1; c < all_channel; ++c) {
float tmp_val = classify_data1[c * size + idx];
if (maxval < tmp_val) {
maxval = tmp_val;
index = c;
}
}
classify_map1[idx] = index;
}
}
end_t = clock();
double total_t = (double) (end_t - start_t) / CLOCKS_PER_SEC;
std::cout << "2 COST: " << (total_t) * 1000. << " ms. " << std::endl;
free(classify_data1);
free(classify_map1);
// std::cout<<"2 COST: "<<(getTime() - timer) * 1000.<<" ms. "<<std::endl;
}
void test_func3() {
int rows = 800;
int cols = 768;
int size = rows * cols;
int all_channel = 7;
clock_t start_t, end_t;
// std::vector<float> classify_map1(size);
// std::vector<float> classify_data1(size * all_channel);
float *classify_map1 = (float *) malloc(size * sizeof(float));
float *classify_data1 = (float *) malloc(size * all_channel * sizeof(float));
// double timer = getTime();
start_t = clock();
for (int row = 0; row < rows; ++row) {
int idx = row * cols;
for (int col = 0; col < cols; ++col, ++idx) {
float maxval = classify_data1[idx];
int index = 0;
for (int c = 1; c < all_channel; ++c) {
float tmp_val = classify_data1[c * size + idx];
if (maxval < tmp_val) {
maxval = tmp_val;
index = c;
}
}
classify_map1[idx] = index;
}
}
end_t = clock();
double total_t = (double) (end_t - start_t) / CLOCKS_PER_SEC;
std::cout << "3 COST: " << (total_t) * 1000. << " ms. " << std::endl;
free(classify_data1);
free(classify_map1);
// std::cout<<"3 COST: "<<(getTime() - timer) * 1000.<<" ms. "<<std::endl;
}
// double getTime(void) {
// const auto t = std::chrono::system_clock::now();
// const auto t_sec = std::chrono::duration_cast<std::chrono::duration<double>>(t.time_since_epoch());
// return t_sec.count();
// }
private:
int all_class_num_;
};
#ifdef __cplusplus
}
#endif
int main(){
int n = 100;
test t1;
for (int i = 0; i < n; ++i) {
t1.test_func1();
t1.test_func2();
t1.test_func3();
}
}
the result is as follow
1 COST: 47.62 ms.
2 COST: 30.662 ms.
3 COST: 13.97 ms.
1 COST: 43.357 ms.
2 COST: 24.241 ms.
3 COST: 10.533 ms.
1 COST: 35.24 ms.
2 COST: 24.531 ms.
3 COST: 6.911 ms.
1 COST: 34.539 ms.
2 COST: 25.47 ms.
3 COST: 7.358 ms.
1 COST: 34.202 ms.
2 COST: 23.208 ms.
3 COST: 8.705 ms.
1 COST: 39.11 ms.
2 COST: 24.129 ms.
3 COST: 11.775 ms.
1 COST: 34.349 ms.
2 COST: 23.57 ms.
3 COST: 11.659 ms.
1 COST: 33.455 ms.
2 COST: 23.568 ms.
3 COST: 11.124 ms.
1 COST: 32.893 ms.
2 COST: 23.574 ms.
3 COST: 11.001 ms.
1 COST: 34.406 ms.
2 COST: 23.223 ms.
3 COST: 10.94 ms.
1 COST: 39.74 ms.
2 COST: 23.536 ms.
3 COST: 10.89 ms.
1 COST: 34.493 ms.
2 COST: 23.953 ms.
3 COST: 11.034 ms.
1 COST: 34.237 ms.
2 COST: 25 ms.
3 COST: 10.906 ms.
1 COST: 32.422 ms.
2 COST: 22.99 ms.
3 COST: 10.875 ms.
1 COST: 35.325 ms.
2 COST: 23.617 ms.
3 COST: 10.91 ms.
1 COST: 39.816 ms.
2 COST: 23.251 ms.
3 COST: 10.883 ms.
1 COST: 34.38 ms.
2 COST: 23.176 ms.
3 COST: 10.929 ms.
1 COST: 33.5 ms.
2 COST: 23.253 ms.
3 COST: 11.019 ms.
1 COST: 34.472 ms.
2 COST: 23.216 ms.
3 COST: 11.04 ms.
1 COST: 35.34 ms.
2 COST: 23.249 ms.
3 COST: 10.991 ms.
1 COST: 39.694 ms.
2 COST: 23.538 ms.
3 COST: 10.926 ms.
1 COST: 34.398 ms.
2 COST: 24.963 ms.
3 COST: 11.029 ms.
1 COST: 33.44 ms.
2 COST: 23.278 ms.
3 COST: 10.94 ms.
1 COST: 32.432 ms.
2 COST: 23.408 ms.
3 COST: 11.715 ms.
1 COST: 33.866 ms.
2 COST: 23.236 ms.
3 COST: 13.561 ms.
1 COST: 39.799 ms.
2 COST: 23.53 ms.
3 COST: 11.023 ms.
1 COST: 32.502 ms.
2 COST: 23.245 ms.
3 COST: 11.607 ms.
1 COST: 32.48 ms.
2 COST: 24.874 ms.
3 COST: 11.098 ms.
1 COST: 32.448 ms.
2 COST: 23.396 ms.
3 COST: 11.039 ms.
1 COST: 33.73 ms.
2 COST: 23.366 ms.
3 COST: 11.069 ms.
1 COST: 39.76 ms.
2 COST: 23.562 ms.
3 COST: 10.945 ms.
1 COST: 32.49 ms.
2 COST: 23.514 ms.
3 COST: 10.965 ms.
1 COST: 32.442 ms.
2 COST: 23.602 ms.
3 COST: 10.962 ms.
1 COST: 32.514 ms.
2 COST: 22.907 ms.
3 COST: 11.711 ms.
1 COST: 35.158 ms.
2 COST: 23.004 ms.
3 COST: 11.059 ms.
1 COST: 39.656 ms.
2 COST: 23.044 ms.
3 COST: 10.96 ms.
1 COST: 32.436 ms.
2 COST: 23.288 ms.
3 COST: 11.158 ms.
1 COST: 32.562 ms.
2 COST: 23.2 ms.
3 COST: 11.55 ms.
1 COST: 32.451 ms.
2 COST: 23.181 ms.
3 COST: 11.163 ms.
1 COST: 34.676 ms.
2 COST: 23.127 ms.
3 COST: 10.984 ms.
1 COST: 41.249 ms.
2 COST: 23.264 ms.
3 COST: 10.962 ms.
1 COST: 30.244 ms.
2 COST: 23.096 ms.
3 COST: 10.888 ms.
1 COST: 34.407 ms.
2 COST: 23.076 ms.
3 COST: 13.364 ms.
1 COST: 33.402 ms.
2 COST: 23.421 ms.
3 COST: 16.866 ms.
1 COST: 33.39 ms.
2 COST: 23.113 ms.
3 COST: 13.201 ms.
1 COST: 32.806 ms.
2 COST: 23.095 ms.
3 COST: 10.92 ms.
1 COST: 36.561 ms.
2 COST: 23.093 ms.
3 COST: 10.883 ms.
1 COST: 32.434 ms.
2 COST: 23.231 ms.
3 COST: 11.032 ms.
1 COST: 32.411 ms.
2 COST: 23.206 ms.
3 COST: 10.892 ms.
1 COST: 33.384 ms.
2 COST: 23.351 ms.
3 COST: 10.945 ms.
1 COST: 40.994 ms.
2 COST: 23.068 ms.
3 COST: 11.002 ms.
1 COST: 39.986 ms.
2 COST: 23.066 ms.
3 COST: 10.913 ms.
1 COST: 34.357 ms.
2 COST: 23.269 ms.
3 COST: 10.89 ms.
1 COST: 33.388 ms.
2 COST: 23.116 ms.
3 COST: 10.922 ms.
1 COST: 33.441 ms.
2 COST: 23.1 ms.
3 COST: 10.929 ms.
1 COST: 32.67 ms.
2 COST: 23.733 ms.
3 COST: 10.908 ms.
1 COST: 33.429 ms.
2 COST: 23.618 ms.
3 COST: 10.924 ms.
1 COST: 35.127 ms.
2 COST: 24.337 ms.
3 COST: 10.972 ms.
1 COST: 35.267 ms.
2 COST: 23.26 ms.
3 COST: 10.908 ms.
1 COST: 34.458 ms.
2 COST: 23.345 ms.
3 COST: 11.002 ms.
1 COST: 34.388 ms.
2 COST: 23.563 ms.
3 COST: 10.969 ms.
1 COST: 38.737 ms.
2 COST: 23.61 ms.
3 COST: 10.941 ms.
1 COST: 33.411 ms.
2 COST: 23.105 ms.
3 COST: 10.926 ms.
1 COST: 33.396 ms.
2 COST: 23.399 ms.
3 COST: 10.939 ms.
1 COST: 32.423 ms.
2 COST: 23.492 ms.
3 COST: 10.935 ms.
1 COST: 32.435 ms.
2 COST: 23.483 ms.
3 COST: 10.92 ms.
1 COST: 40.364 ms.
2 COST: 23.545 ms.
3 COST: 11.003 ms.
1 COST: 39.734 ms.
2 COST: 23.016 ms.
3 COST: 10.963 ms.
1 COST: 34.412 ms.
2 COST: 23.576 ms.
3 COST: 10.951 ms.
1 COST: 32.429 ms.
2 COST: 23.172 ms.
3 COST: 13.172 ms.
1 COST: 32.457 ms.
2 COST: 23.067 ms.
3 COST: 11.315 ms.
1 COST: 32.742 ms.
2 COST: 23.204 ms.
3 COST: 11.268 ms.
1 COST: 32.45 ms.
2 COST: 23.2 ms.
3 COST: 11.06 ms.
1 COST: 32.419 ms.
2 COST: 23.315 ms.
3 COST: 10.901 ms.
1 COST: 32.441 ms.
2 COST: 23.206 ms.
3 COST: 10.884 ms.
1 COST: 39.842 ms.
2 COST: 23.168 ms.
3 COST: 10.975 ms.
1 COST: 39.78 ms.
2 COST: 23.093 ms.
3 COST: 10.95 ms.
1 COST: 32.376 ms.
2 COST: 23.177 ms.
3 COST: 10.901 ms.
1 COST: 32.434 ms.
2 COST: 23.201 ms.
3 COST: 11.004 ms.
1 COST: 32.41 ms.
2 COST: 23.16 ms.
3 COST: 10.902 ms.
1 COST: 37.799 ms.
2 COST: 23.042 ms.
3 COST: 10.894 ms.
1 COST: 30.233 ms.
2 COST: 23.215 ms.
3 COST: 13.335 ms.
1 COST: 32.357 ms.
2 COST: 23.14 ms.
3 COST: 13.377 ms.
1 COST: 32.426 ms.
2 COST: 23.317 ms.
3 COST: 10.904 ms.
1 COST: 32.411 ms.
2 COST: 23.138 ms.
3 COST: 10.869 ms.
1 COST: 34.096 ms.
2 COST: 23.186 ms.
3 COST: 10.987 ms.
1 COST: 39.769 ms.
2 COST: 23.211 ms.
3 COST: 10.899 ms.
1 COST: 33.432 ms.
2 COST: 23.09 ms.
3 COST: 10.947 ms.
1 COST: 34.335 ms.
2 COST: 23.199 ms.
3 COST: 10.943 ms.
1 COST: 32.924 ms.
2 COST: 23.155 ms.
3 COST: 10.929 ms.
1 COST: 33.439 ms.
2 COST: 23.272 ms.
3 COST: 10.925 ms.
1 COST: 36.702 ms.
2 COST: 23.137 ms.
3 COST: 10.968 ms.
1 COST: 40.058 ms.
2 COST: 23.249 ms.
3 COST: 10.941 ms.
1 COST: 34.431 ms.
2 COST: 23.152 ms.
3 COST: 10.895 ms.
1 COST: 33.359 ms.
2 COST: 23.18 ms.
3 COST: 10.971 ms.
1 COST: 33.373 ms.
2 COST: 23.216 ms.
3 COST: 10.924 ms.
1 COST: 34.331 ms.
2 COST: 23.329 ms.
3 COST: 10.932 ms.
1 COST: 35.397 ms.
2 COST: 23.272 ms.
3 COST: 10.999 ms.
1 COST: 35.203 ms.
2 COST: 23.256 ms.
3 COST: 11.29 ms.
1 COST: 32.645 ms.
2 COST: 23.352 ms.
3 COST: 13.246 ms.
it seems still look strange…