Commit 8b05a021 authored by Jeff Donahue's avatar Jeff Donahue
Browse files

Merge pull request #2410 from sguada/datum_transform

Datum transform
parents 7e981456 e048b174
......@@ -62,6 +62,7 @@ class DataTransformer {
*/
void Transform(const vector<cv::Mat> & mat_vector,
Blob<Dtype>* transformed_blob);
/**
* @brief Applies the transformation defined in the data layer's
* transform_param block to a cv::Mat
......@@ -87,6 +88,41 @@ class DataTransformer {
*/
void Transform(Blob<Dtype>* input_blob, Blob<Dtype>* transformed_blob);
/**
* @brief Infers the shape of transformed_blob will have when
* the transformation is applied to the data.
*
* @param datum
* Datum containing the data to be transformed.
*/
vector<int> InferBlobShape(const Datum& datum);
/**
* @brief Infers the shape of transformed_blob will have when
* the transformation is applied to the data.
* It uses the first element to infer the shape of the blob.
*
* @param datum_vector
* A vector of Datum containing the data to be transformed.
*/
vector<int> InferBlobShape(const vector<Datum> & datum_vector);
/**
* @brief Infers the shape of transformed_blob will have when
* the transformation is applied to the data.
* It uses the first element to infer the shape of the blob.
*
* @param mat_vector
* A vector of Mat containing the data to be transformed.
*/
vector<int> InferBlobShape(const vector<cv::Mat> & mat_vector);
/**
* @brief Infers the shape of transformed_blob will have when
* the transformation is applied to the data.
*
* @param cv_img
* cv::Mat containing the data to be transformed.
*/
vector<int> InferBlobShape(const cv::Mat& cv_img);
protected:
/**
* @brief Generates a random integer from Uniform({0, 1, ..., n-1}).
......
......@@ -125,10 +125,31 @@ void DataTransformer<Dtype>::Transform(const Datum& datum,
template<typename Dtype>
void DataTransformer<Dtype>::Transform(const Datum& datum,
Blob<Dtype>* transformed_blob) {
// If datum is encoded, decoded and transform the cv::image.
if (datum.encoded()) {
CHECK(!param_.force_color() && !param_.force_gray())
<< "cannot set both force_color and force_gray";
cv::Mat cv_img;
if (param_.force_color() || param_.force_gray()) {
// If force_color then decode in color otherwise decode in gray.
cv_img = DecodeDatumToCVMat(datum, param_.force_color());
} else {
cv_img = DecodeDatumToCVMatNative(datum);
}
// Transform the cv::image into blob.
return Transform(cv_img, transformed_blob);
} else {
if (param_.force_color() || param_.force_gray()) {
LOG(ERROR) << "force_color and force_gray only for encoded datum";
}
}
const int crop_size = param_.crop_size();
const int datum_channels = datum.channels();
const int datum_height = datum.height();
const int datum_width = datum.width();
// Check dimensions.
const int channels = transformed_blob->channels();
const int height = transformed_blob->height();
const int width = transformed_blob->width();
......@@ -139,8 +160,6 @@ void DataTransformer<Dtype>::Transform(const Datum& datum,
CHECK_LE(width, datum_width);
CHECK_GE(num, 1);
const int crop_size = param_.crop_size();
if (crop_size) {
CHECK_EQ(crop_size, height);
CHECK_EQ(crop_size, width);
......@@ -196,10 +215,12 @@ void DataTransformer<Dtype>::Transform(const vector<cv::Mat> & mat_vector,
template<typename Dtype>
void DataTransformer<Dtype>::Transform(const cv::Mat& cv_img,
Blob<Dtype>* transformed_blob) {
const int crop_size = param_.crop_size();
const int img_channels = cv_img.channels();
const int img_height = cv_img.rows;
const int img_width = cv_img.cols;
// Check dimensions.
const int channels = transformed_blob->channels();
const int height = transformed_blob->height();
const int width = transformed_blob->width();
......@@ -212,7 +233,6 @@ void DataTransformer<Dtype>::Transform(const cv::Mat& cv_img,
CHECK(cv_img.depth() == CV_8U) << "Image data type must be unsigned byte";
const int crop_size = param_.crop_size();
const Dtype scale = param_.scale();
const bool do_mirror = param_.mirror() && Rand(2);
const bool has_mean_file = param_.has_mean_file();
......@@ -297,11 +317,23 @@ void DataTransformer<Dtype>::Transform(const cv::Mat& cv_img,
template<typename Dtype>
void DataTransformer<Dtype>::Transform(Blob<Dtype>* input_blob,
Blob<Dtype>* transformed_blob) {
const int crop_size = param_.crop_size();
const int input_num = input_blob->num();
const int input_channels = input_blob->channels();
const int input_height = input_blob->height();
const int input_width = input_blob->width();
if (transformed_blob->count() == 0) {
// Initialize transformed_blob with the right shape.
if (crop_size) {
transformed_blob->Reshape(input_num, input_channels,
crop_size, crop_size);
} else {
transformed_blob->Reshape(input_num, input_channels,
input_height, input_width);
}
}
const int num = transformed_blob->num();
const int channels = transformed_blob->channels();
const int height = transformed_blob->height();
......@@ -313,7 +345,7 @@ void DataTransformer<Dtype>::Transform(Blob<Dtype>* input_blob,
CHECK_GE(input_height, height);
CHECK_GE(input_width, width);
const int crop_size = param_.crop_size();
const Dtype scale = param_.scale();
const bool do_mirror = param_.mirror() && Rand(2);
const bool has_mean_file = param_.has_mean_file();
......@@ -395,6 +427,82 @@ void DataTransformer<Dtype>::Transform(Blob<Dtype>* input_blob,
}
}
template<typename Dtype>
vector<int> DataTransformer<Dtype>::InferBlobShape(const Datum& datum) {
if (datum.encoded()) {
CHECK(!param_.force_color() && !param_.force_gray())
<< "cannot set both force_color and force_gray";
cv::Mat cv_img;
if (param_.force_color() || param_.force_gray()) {
// If force_color then decode in color otherwise decode in gray.
cv_img = DecodeDatumToCVMat(datum, param_.force_color());
} else {
cv_img = DecodeDatumToCVMatNative(datum);
}
// InferBlobShape using the cv::image.
return InferBlobShape(cv_img);
}
const int crop_size = param_.crop_size();
const int datum_channels = datum.channels();
const int datum_height = datum.height();
const int datum_width = datum.width();
// Check dimensions.
CHECK_GT(datum_channels, 0);
CHECK_GE(datum_height, crop_size);
CHECK_GE(datum_width, crop_size);
// Build BlobShape.
vector<int> shape(4);
shape[0] = 1;
shape[1] = datum_channels;
shape[2] = (crop_size)? crop_size: datum_height;
shape[3] = (crop_size)? crop_size: datum_width;
return shape;
}
template<typename Dtype>
vector<int> DataTransformer<Dtype>::InferBlobShape(
const vector<Datum> & datum_vector) {
const int num = datum_vector.size();
CHECK_GT(num, 0) << "There is no datum to in the vector";
// Use first datum in the vector to InferBlobShape.
vector<int> shape = InferBlobShape(datum_vector[0]);
// Adjust num to the size of the vector.
shape[0] = num;
return shape;
}
template<typename Dtype>
vector<int> DataTransformer<Dtype>::InferBlobShape(const cv::Mat& cv_img) {
const int crop_size = param_.crop_size();
const int img_channels = cv_img.channels();
const int img_height = cv_img.rows;
const int img_width = cv_img.cols;
// Check dimensions.
CHECK_GT(img_channels, 0);
CHECK_GE(img_height, crop_size);
CHECK_GE(img_width, crop_size);
// Build BlobShape.
vector<int> shape(4);
shape[0] = 1;
shape[1] = img_channels;
shape[2] = (crop_size)? crop_size: img_height;
shape[3] = (crop_size)? crop_size: img_width;
return shape;
}
template<typename Dtype>
vector<int> DataTransformer<Dtype>::InferBlobShape(
const vector<cv::Mat> & mat_vector) {
const int num = mat_vector.size();
CHECK_GT(num, 0) << "There is no cv_img to in the vector";
// Use first cv_img in the vector to InferBlobShape.
vector<int> shape = InferBlobShape(mat_vector[0]);
// Adjust num to the size of the vector.
shape[0] = num;
return shape;
}
template <typename Dtype>
void DataTransformer<Dtype>::InitRand() {
const bool needs_rand = param_.mirror() ||
......
......@@ -20,11 +20,11 @@ void BaseDataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
} else {
output_labels_ = true;
}
// The subclasses should setup the size of bottom and top
DataLayerSetUp(bottom, top);
data_transformer_.reset(
new DataTransformer<Dtype>(transform_param_, this->phase_));
data_transformer_->InitRand();
// The subclasses should setup the size of bottom and top
DataLayerSetUp(bottom, top);
}
template <typename Dtype>
......@@ -62,13 +62,15 @@ void BasePrefetchingDataLayer<Dtype>::Forward_cpu(
JoinPrefetchThread();
DLOG(INFO) << "Thread joined";
// Reshape to loaded data.
top[0]->Reshape(this->prefetch_data_.num(), this->prefetch_data_.channels(),
this->prefetch_data_.height(), this->prefetch_data_.width());
top[0]->ReshapeLike(prefetch_data_);
// Copy the data
caffe_copy(prefetch_data_.count(), prefetch_data_.cpu_data(),
top[0]->mutable_cpu_data());
DLOG(INFO) << "Prefetch copied";
if (this->output_labels_) {
// Reshape to loaded labels.
top[1]->ReshapeLike(prefetch_label_);
// Copy the labels.
caffe_copy(prefetch_label_.count(), prefetch_label_.cpu_data(),
top[1]->mutable_cpu_data());
}
......
......@@ -10,12 +10,14 @@ void BasePrefetchingDataLayer<Dtype>::Forward_gpu(
// First, join the thread
JoinPrefetchThread();
// Reshape to loaded data.
top[0]->Reshape(this->prefetch_data_.num(), this->prefetch_data_.channels(),
this->prefetch_data_.height(), this->prefetch_data_.width());
top[0]->ReshapeLike(this->prefetch_data_);
// Copy the data
caffe_copy(prefetch_data_.count(), prefetch_data_.cpu_data(),
top[0]->mutable_gpu_data());
if (this->output_labels_) {
// Reshape to loaded labels.
top[1]->ReshapeLike(prefetch_label_);
// Copy the labels.
caffe_copy(prefetch_label_.count(), prefetch_label_.cpu_data(),
top[1]->mutable_gpu_data());
}
......
......@@ -38,32 +38,17 @@ void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
cursor_->Next();
}
}
// Read a data point, and use it to initialize the top blob.
// Read a data point, to initialize the prefetch and top blobs.
Datum datum;
datum.ParseFromString(cursor_->value());
// Use data_transformer to infer the expected blob shape from datum.
vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);
this->transformed_data_.Reshape(top_shape);
// Reshape top[0] and prefetch_data according to the batch_size.
top_shape[0] = this->layer_param_.data_param().batch_size();
this->prefetch_data_.Reshape(top_shape);
top[0]->ReshapeLike(this->prefetch_data_);
bool force_color = this->layer_param_.data_param().force_encoded_color();
if ((force_color && DecodeDatum(&datum, true)) ||
DecodeDatumNative(&datum)) {
LOG(INFO) << "Decoding Datum";
}
// image
int crop_size = this->layer_param_.transform_param().crop_size();
if (crop_size > 0) {
top[0]->Reshape(this->layer_param_.data_param().batch_size(),
datum.channels(), crop_size, crop_size);
this->prefetch_data_.Reshape(this->layer_param_.data_param().batch_size(),
datum.channels(), crop_size, crop_size);
this->transformed_data_.Reshape(1, datum.channels(), crop_size, crop_size);
} else {
top[0]->Reshape(
this->layer_param_.data_param().batch_size(), datum.channels(),
datum.height(), datum.width());
this->prefetch_data_.Reshape(this->layer_param_.data_param().batch_size(),
datum.channels(), datum.height(), datum.width());
this->transformed_data_.Reshape(1, datum.channels(),
datum.height(), datum.width());
}
LOG(INFO) << "output data size: " << top[0]->num() << ","
<< top[0]->channels() << "," << top[0]->height() << ","
<< top[0]->width();
......@@ -86,25 +71,17 @@ void DataLayer<Dtype>::InternalThreadEntry() {
CHECK(this->prefetch_data_.count());
CHECK(this->transformed_data_.count());
// Reshape on single input batches for inputs of varying dimension.
// Reshape according to the first datum of each batch
// on single input batches allows for inputs of varying dimension.
const int batch_size = this->layer_param_.data_param().batch_size();
const int crop_size = this->layer_param_.transform_param().crop_size();
bool force_color = this->layer_param_.data_param().force_encoded_color();
if (batch_size == 1 && crop_size == 0) {
Datum datum;
datum.ParseFromString(cursor_->value());
if (datum.encoded()) {
if (force_color) {
DecodeDatum(&datum, true);
} else {
DecodeDatumNative(&datum);
}
}
this->prefetch_data_.Reshape(1, datum.channels(),
datum.height(), datum.width());
this->transformed_data_.Reshape(1, datum.channels(),
datum.height(), datum.width());
}
Datum datum;
datum.ParseFromString(cursor_->value());
// Use data_transformer to infer the expected blob shape from datum.
vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);
this->transformed_data_.Reshape(top_shape);
// Reshape prefetch_data according to the batch_size.
top_shape[0] = batch_size;
this->prefetch_data_.Reshape(top_shape);
Dtype* top_data = this->prefetch_data_.mutable_cpu_data();
Dtype* top_label = NULL; // suppress warnings about uninitialized variables
......@@ -112,48 +89,31 @@ void DataLayer<Dtype>::InternalThreadEntry() {
if (this->output_labels_) {
top_label = this->prefetch_label_.mutable_cpu_data();
}
timer.Start();
for (int item_id = 0; item_id < batch_size; ++item_id) {
timer.Start();
// get a blob
// get a datum
Datum datum;
datum.ParseFromString(cursor_->value());
cv::Mat cv_img;
if (datum.encoded()) {
if (force_color) {
cv_img = DecodeDatumToCVMat(datum, true);
} else {
cv_img = DecodeDatumToCVMatNative(datum);
}
if (cv_img.channels() != this->transformed_data_.channels()) {
LOG(WARNING) << "Your dataset contains encoded images with mixed "
<< "channel sizes. Consider adding a 'force_color' flag to the "
<< "model definition, or rebuild your dataset using "
<< "convert_imageset.";
}
}
read_time += timer.MicroSeconds();
timer.Start();
// Apply data transformations (mirror, scale, crop...)
int offset = this->prefetch_data_.offset(item_id);
this->transformed_data_.set_cpu_data(top_data + offset);
if (datum.encoded()) {
this->data_transformer_->Transform(cv_img, &(this->transformed_data_));
} else {
this->data_transformer_->Transform(datum, &(this->transformed_data_));
}
this->data_transformer_->Transform(datum, &(this->transformed_data_));
// Copy label.
if (this->output_labels_) {
top_label[item_id] = datum.label();
}
trans_time += timer.MicroSeconds();
// go to the next iter
timer.Start();
// go to the next item.
cursor_->Next();
if (!cursor_->valid()) {
DLOG(INFO) << "Restarting data prefetching from start.";
cursor_->SeekToFirst();
}
}
timer.Stop();
batch_timer.Stop();
DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
DLOG(INFO) << " Read time: " << read_time / 1000 << " ms.";
......
......@@ -62,21 +62,15 @@ void ImageDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
// Read an image, and use it to initialize the top blob.
cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
new_height, new_width, is_color);
const int channels = cv_img.channels();
const int height = cv_img.rows;
const int width = cv_img.cols;
// image
const int crop_size = this->layer_param_.transform_param().crop_size();
// Use data_transformer to infer the expected blob shape from a cv_image.
vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);
this->transformed_data_.Reshape(top_shape);
// Reshape prefetch_data and top[0] according to the batch_size.
const int batch_size = this->layer_param_.image_data_param().batch_size();
if (crop_size > 0) {
top[0]->Reshape(batch_size, channels, crop_size, crop_size);
this->prefetch_data_.Reshape(batch_size, channels, crop_size, crop_size);
this->transformed_data_.Reshape(1, channels, crop_size, crop_size);
} else {
top[0]->Reshape(batch_size, channels, height, width);
this->prefetch_data_.Reshape(batch_size, channels, height, width);
this->transformed_data_.Reshape(1, channels, height, width);
}
top_shape[0] = batch_size;
this->prefetch_data_.Reshape(top_shape);
top[0]->ReshapeLike(this->prefetch_data_);
LOG(INFO) << "output data size: " << top[0]->num() << ","
<< top[0]->channels() << "," << top[0]->height() << ","
<< top[0]->width();
......@@ -107,19 +101,19 @@ void ImageDataLayer<Dtype>::InternalThreadEntry() {
const int batch_size = image_data_param.batch_size();
const int new_height = image_data_param.new_height();
const int new_width = image_data_param.new_width();
const int crop_size = this->layer_param_.transform_param().crop_size();
const bool is_color = image_data_param.is_color();
string root_folder = image_data_param.root_folder();
// Reshape on single input batches for inputs of varying dimension.
if (batch_size == 1 && crop_size == 0 && new_height == 0 && new_width == 0) {
cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
0, 0, is_color);
this->prefetch_data_.Reshape(1, cv_img.channels(),
cv_img.rows, cv_img.cols);
this->transformed_data_.Reshape(1, cv_img.channels(),
cv_img.rows, cv_img.cols);
}
// Reshape according to the first image of each batch
// on single input batches allows for inputs of varying dimension.
cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
new_height, new_width, is_color);
// Use data_transformer to infer the expected blob shape from a cv_img.
vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);
this->transformed_data_.Reshape(top_shape);
// Reshape prefetch_data according to the batch_size.
top_shape[0] = batch_size;
this->prefetch_data_.Reshape(top_shape);
Dtype* prefetch_data = this->prefetch_data_.mutable_cpu_data();
Dtype* prefetch_label = this->prefetch_label_.mutable_cpu_data();
......
......@@ -365,6 +365,10 @@ message TransformationParameter {
// or can be repeated the same number of times as channels
// (would subtract them from the corresponding channel)
repeated float mean_value = 5;
// Force the decoded image to have 3 color channels.
optional bool force_color = 6 [default = false];
// Force the decoded image to have 1 color channels.
optional bool force_gray = 7 [default = false];
}
// Message that stores parameters shared by loss layers
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment