caffe 添加 weight diff

    xiaoxiao2022-07-03  119

    https://github.com/happynear/caffe-windows/commit/5027e7d424923872658855aa618add3017079ee3

     

     

    string gradient_norm = "layer blob norm:"; for (int k = 0; k < this->net_->blob_names().size(); k++) { if (this->net_->blob_names()[k].find("Convolution") != string::npos || this->net_->blob_names()[k].find("InnerProduct") != string::npos || this->net_->blob_names()[k].find("conv") != string::npos || this->net_->blob_names()[k].find("fc") != string::npos || this->net_->blob_names()[k].find("ip") != string::npos) { gradient_norm += std::to_string(this->net_->blobs()[k]->asum_diff() / this->net_->blobs()[k]->count()) + " "; } } if (gradient_norm.size() > 20) LOG(INFO) << gradient_norm; string scale_layers = "scale layer:"; for (int k = 0; k < this->net_->layers().size(); k++) { if (strstr(this->net_->layers()[k]->type(), "Scale") != NULL && this->net_->layers()[k]->blobs().size() > 0) { scale_layers += std::to_string(this->net_->layers()[k]->blobs()[0]->asum_data() / this->net_->layers()[k]->blobs()[0]->count()) + " "; } } if (scale_layers.size() > 20) LOG(INFO) << scale_layers; string parameter_layers = "parameter layer:"; for (int k = 0; k < this->net_->layers().size(); k++) { if (strstr(this->net_->layers()[k]->type(), "Parameter") != NULL && this->net_->layers()[k]->blobs().size() > 0) { parameter_layers += std::to_string(this->net_->layers()[k]->blobs()[0]->asum_data() / this->net_->layers()[k]->blobs()[0]->count()) + " "; } } if (parameter_layers.size() > 20) LOG(INFO) << parameter_layers; string prelu_layers = "prelu slope:"; for (int k = 0; k < this->net_->layers().size(); k++) { if (strstr(this->net_->layers()[k]->type(), "PReLU") != NULL && this->net_->layers()[k]->blobs().size() > 0) { prelu_layers += std::to_string(this->net_->layers()[k]->blobs()[0]->asum_data() / this->net_->layers()[k]->blobs()[0]->count()) + " "; } } if (prelu_layers.size() > 20) LOG(INFO) << prelu_layers; string weight_gradient_norm = "weight diff/data:"; for (int k = 0; k < this->net_->layers().size(); k++) { if (strstr(this->net_->layers()[k]->type(), "Convolution") != NULL || strstr(this->net_->layers()[k]->type(), "InnerProduct") != NULL || strstr(this->net_->layers()[k]->type(), "InnerDistance") != NULL) { if (this->net_->layers()[k]->blobs().size() > 0) { Blob<Dtype> diff_data_ratio; diff_data_ratio.ReshapeLike(*this->net_->layers()[k]->blobs()[0]); caffe_div(this->net_->layers()[k]->blobs()[0]->count(), this->net_->layers()[k]->blobs()[0]->cpu_diff(), this->net_->layers()[k]->blobs()[0]->cpu_data(), diff_data_ratio.mutable_cpu_data()); weight_gradient_norm += std::to_string(diff_data_ratio.asum_data() / diff_data_ratio.count()) + " "; } } } if (weight_gradient_norm.size() > 20) LOG(INFO) << weight_gradient_norm;

     

    最新回复(0)