Add support for Conv1DTranspose and Conv2DTranspose layers, closes #435 (#436)

Dobiasd · web-flow · commit e205219ae211 · 2025-03-10T12:03:27.000+01:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -6,7 +6,7 @@ list(APPEND CMAKE_MODULE_PATH "${FDEEP_TOP_DIR}/cmake")
 
 include(cmake/hunter.cmake) # default off
 
-project(frugally-deep VERSION 0.16.3)
+project(frugally-deep VERSION 0.17.0)
 
 message(STATUS "===( ${PROJECT_NAME} ${PROJECT_VERSION} )===")
 
diff --git a/FAQ.md b/FAQ.md
@@ -458,29 +458,6 @@ int main()
 }
 ```
 
-Why are `Conv2DTranspose` layers not supported?
------------------------------------------------
-
-The combination of `UpSampling2D` and `Conv2D` layers seems to be the better alternative:
-https://distill.pub/2016/deconv-checkerboard/
-
-Basically, instead of this:
-
-```python
-x = Conv2DTranspose(8, (3, 3), strides=(2, 2), padding='same')(x)
-```
-
-one uses that:
-
-```python
-x = Conv2D(8, (3, 3), padding='same')(UpSampling2D(2)(x))
-```
-
-In case you are not in the position to change your model's
-architecture to make that change,
-feel free to implement `Conv2DTranspose` in frugally-deep and
-submit a [pull request](https://github.com/Dobiasd/frugally-deep/pulls). :)
-
 How can I use `BatchNormalization` and `Dropout` layers with `training=True`?
 -----------------------------------------------------------------------------
 
diff --git a/INSTALL.md b/INSTALL.md
@@ -63,7 +63,7 @@ Just add a *conanfile.txt* with frugally-deep as a requirement and chose the gen
 
 ```
 [requires]
-frugally-deep/v0.16.3@dobiasd/stable
+frugally-deep/v0.17.0@dobiasd/stable
 
 [generators]
 cmake
diff --git a/README.md b/README.md
@@ -43,6 +43,7 @@ Would you like to build/train a model using Keras/Python? And would you like to
 * `AveragePooling1D/2D/3D`, `GlobalAveragePooling1D/2D/3D`
 * `TimeDistributed`
 * `Conv1D/2D`, `SeparableConv2D`, `DepthwiseConv2D`
+* `Conv1DTranspose`, `Conv2DTranspose`
 * `Cropping1D/2D/3D`, `ZeroPadding1D/2D/3D`, `CenterCrop`
 * `BatchNormalization`, `Dense`, `Flatten`, `Normalization`
 * `Dropout`, `AlphaDropout`, `GaussianDropout`, `GaussianNoise`
@@ -59,7 +60,6 @@ Would you like to build/train a model using Keras/Python? And would you like to
 * `Embedding`, `CategoryEncoding`
 * `Attention`, `AdditiveAttention`, `MultiHeadAttention`
 
-
 ### Also supported
 
 * multiple inputs and outputs
@@ -72,7 +72,6 @@ Would you like to build/train a model using Keras/Python? And would you like to
 
 ### Currently not supported are the following:
 
-`Conv2DTranspose` ([why](FAQ.md#why-are-conv2dtranspose-layers-not-supported)),
 `Lambda` ([why](FAQ.md#why-are-lambda-layers-not-supported)),
 `Conv3D`, `ConvLSTM1D`, `ConvLSTM2D`, `Discretization`,
 `GRUCell`, `Hashing`,
diff --git a/include/fdeep/convolution.hpp b/include/fdeep/convolution.hpp
@@ -225,7 +225,8 @@ namespace internal {
         const shape2& strides,
         padding pad_type,
         std::size_t input_shape_height,
-        std::size_t input_shape_width)
+        std::size_t input_shape_width,
+        bool transposed)
     {
         // https://www.tensorflow.org/api_guides/python/nn#Convolution
         const int filter_height = static_cast<int>(filter_shape.height_);
@@ -242,15 +243,27 @@ namespace internal {
             out_height = fplus::ceil(static_cast<float>(in_height) / static_cast<float>(strides_y) - 0.001);
             out_width = fplus::ceil(static_cast<float>(in_width) / static_cast<float>(strides_x) - 0.001);
         } else {
-            out_height = fplus::ceil(static_cast<float>(in_height - filter_height + 1) / static_cast<float>(strides_y) - 0.001);
-            out_width = fplus::ceil(static_cast<float>(in_width - filter_width + 1) / static_cast<float>(strides_x) - 0.001);
+            if (transposed) {
+                out_height = fplus::ceil(static_cast<float>(in_height + filter_height - 1) / static_cast<float>(strides_y) - 0.001);
+                out_width = fplus::ceil(static_cast<float>(in_width + filter_width - 1) / static_cast<float>(strides_x) - 0.001);
+            } else {
+                out_height = fplus::ceil(static_cast<float>(in_height - filter_height + 1) / static_cast<float>(strides_y) - 0.001);
+                out_width = fplus::ceil(static_cast<float>(in_width - filter_width + 1) / static_cast<float>(strides_x) - 0.001);
+            }
         }
 
         int pad_top = 0;
         int pad_bottom = 0;
         int pad_left = 0;
         int pad_right = 0;
 
+        if (transposed) {
+            pad_top = filter_height - 1;
+            pad_bottom = filter_height - 1;
+            pad_left = filter_width - 1;
+            pad_right = filter_width - 1;
+        }
+
         if (pad_type == padding::same) {
             int pad_along_height = 0;
             int pad_along_width = 0;
@@ -296,7 +309,7 @@ namespace internal {
 
         const auto conv_cfg = preprocess_convolution(
             filter_mat.filter_shape_.without_depth(),
-            strides, pad_type, input.shape().height_, input.shape().width_);
+            strides, pad_type, input.shape().height_, input.shape().width_, false);
 
         // The padding step usually (on a VGG19 net) only takes about 1% of the overall runtime.
         // So the increased code complexity of doing it inside the convolution step
@@ -312,5 +325,32 @@ namespace internal {
             in_padded);
     }
 
+    inline tensor convolve_transposed(
+        const shape2& strides,
+        const padding& pad_type,
+        const convolution_filter_matrices& filter_mat,
+        const tensor& input)
+    {
+        assertion(filter_mat.filter_shape_.depth_ == input.shape().depth_,
+            "invalid filter depth");
+
+        const auto input_dilated = dilate_tensor(strides, input, pad_type == padding::same);
+
+        const auto conv_cfg = preprocess_convolution(
+            filter_mat.filter_shape_.without_depth(),
+            shape2(1, 1), pad_type, input_dilated.shape().height_, input_dilated.shape().width_,
+            true);
+
+        const auto in_padded = pad_tensor(0, 0, 0,
+            conv_cfg.pad_top_, conv_cfg.pad_bottom_, conv_cfg.pad_left_, conv_cfg.pad_right_,
+            input_dilated);
+
+        return convolve_accumulative(
+            conv_cfg.out_height_, conv_cfg.out_width_,
+            1, 1,
+            filter_mat,
+            in_padded);
+    }
+
 }
 }
diff --git a/include/fdeep/depthwise_convolution.hpp b/include/fdeep/depthwise_convolution.hpp
@@ -85,7 +85,7 @@ namespace internal {
 
         const auto conv_cfg = preprocess_convolution(
             filter_mat.filter_shape_.without_depth(),
-            strides, pad_type, input.shape().height_, input.shape().width_);
+            strides, pad_type, input.shape().height_, input.shape().width_, false);
 
         const auto in_padded = pad_tensor(0, 0, 0,
             conv_cfg.pad_top_, conv_cfg.pad_bottom_, conv_cfg.pad_left_, conv_cfg.pad_right_,
diff --git a/include/fdeep/filter.hpp b/include/fdeep/filter.hpp
@@ -62,14 +62,15 @@ namespace internal {
 
     inline filter dilate_filter(const shape2& dilation_rate, const filter& undilated)
     {
-        return filter(dilate_tensor(dilation_rate, undilated.get_tensor()),
+        return filter(dilate_tensor(dilation_rate, undilated.get_tensor(), false),
             undilated.get_bias());
     }
 
     inline filter_vec generate_filters(
         const shape2& dilation_rate,
         const tensor_shape& filter_shape, std::size_t k,
-        const float_vec& weights, const float_vec& bias)
+        const float_vec& weights, const float_vec& bias,
+        bool transpose)
     {
         filter_vec filters(k, filter(tensor(filter_shape, 0), 0));
 
@@ -90,6 +91,10 @@ namespace internal {
         for (auto& filt : filters) {
             filt.set_params(*it_filter_val, *it_filter_bias);
             filt = dilate_filter(dilation_rate, filt);
+            if (transpose) {
+                filt = filter(reverse_height_dimension(filt.get_tensor()), filt.get_bias());
+                filt = filter(reverse_width_dimension(filt.get_tensor()), filt.get_bias());
+            }
             ++it_filter_val;
             ++it_filter_bias;
         }
diff --git a/include/fdeep/import_model.hpp b/include/fdeep/import_model.hpp
@@ -37,6 +37,7 @@
 #include "fdeep/layers/centercrop_layer.hpp"
 #include "fdeep/layers/concatenate_layer.hpp"
 #include "fdeep/layers/conv_2d_layer.hpp"
+#include "fdeep/layers/conv_2d_transpose_layer.hpp"
 #include "fdeep/layers/cropping_3d_layer.hpp"
 #include "fdeep/layers/dense_layer.hpp"
 #include "fdeep/layers/depthwise_conv_2d_layer.hpp"
@@ -403,6 +404,36 @@ namespace internal {
             dilation_rate, weights, bias);
     }
 
+    inline layer_ptr create_conv_2d_transpose_layer(const get_param_f& get_param,
+        const nlohmann::json& data,
+        const std::string& name)
+    {
+        const std::string padding_str = data["config"]["padding"];
+        const auto pad_type = create_padding(padding_str);
+
+        const shape2 strides = create_shape2(data["config"]["strides"]);
+        const shape2 dilation_rate = create_shape2(data["config"]["dilation_rate"]);
+
+        const auto filter_count = create_size_t(data["config"]["filters"]);
+        float_vec bias(filter_count, 0);
+        const bool use_bias = data["config"]["use_bias"];
+        if (use_bias)
+            bias = decode_floats(get_param(name, "bias"));
+        assertion(bias.size() == filter_count, "size of bias does not match");
+
+        const float_vec weights = decode_floats(get_param(name, "weights"));
+        const shape2 kernel_size = create_shape2(data["config"]["kernel_size"]);
+        assertion(weights.size() % kernel_size.area() == 0,
+            "invalid number of weights");
+        const std::size_t filter_depths = weights.size() / (kernel_size.area() * filter_count);
+        const tensor_shape filter_shape(
+            kernel_size.height_, kernel_size.width_, filter_depths);
+
+        return std::make_shared<conv_2d_transpose_layer>(name,
+            filter_shape, filter_count, strides, pad_type,
+            dilation_rate, weights, bias);
+    }
+
     inline layer_ptr create_separable_conv_2D_layer(const get_param_f& get_param,
         const nlohmann::json& data,
         const std::string& name)
@@ -1145,6 +1176,8 @@ namespace internal {
             { "Identity", create_identity_layer },
             { "Conv1D", create_conv_2d_layer },
             { "Conv2D", create_conv_2d_layer },
+            { "Conv1DTranspose", create_conv_2d_transpose_layer },
+            { "Conv2DTranspose", create_conv_2d_transpose_layer },
             { "SeparableConv1D", create_separable_conv_2D_layer },
             { "SeparableConv2D", create_separable_conv_2D_layer },
             { "DepthwiseConv2D", create_depthwise_conv_2D_layer },
diff --git a/include/fdeep/layers/conv_2d_layer.hpp b/include/fdeep/layers/conv_2d_layer.hpp
@@ -30,7 +30,7 @@ namespace internal {
             const float_vec& weights, const float_vec& bias)
             : layer(name)
             , filters_(generate_im2col_filter_matrix(
-                  generate_filters(dilation_rate, filter_shape, k, weights, bias)))
+                  generate_filters(dilation_rate, filter_shape, k, weights, bias, false)))
             , strides_(strides)
             , padding_(p)
         {
diff --git a/include/fdeep/layers/conv_2d_transpose_layer.hpp b/include/fdeep/layers/conv_2d_transpose_layer.hpp
@@ -0,0 +1,56 @@
+// Copyright 2016, Tobias Hermann.
+// https://github.com/Dobiasd/frugally-deep
+// Distributed under the MIT License.
+// (See accompanying LICENSE file or at
+//  https://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include "fdeep/convolution.hpp"
+#include "fdeep/filter.hpp"
+#include "fdeep/layers/layer.hpp"
+#include "fdeep/shape2.hpp"
+#include "fdeep/tensor_shape.hpp"
+
+#include <fplus/fplus.hpp>
+
+#include <cstddef>
+#include <string>
+#include <vector>
+
+namespace fdeep {
+namespace internal {
+
+    class conv_2d_transpose_layer : public layer {
+    public:
+        explicit conv_2d_transpose_layer(
+            const std::string& name, const tensor_shape& filter_shape,
+            std::size_t k, const shape2& strides, padding p,
+            const shape2& dilation_rate,
+            const float_vec& weights, const float_vec& bias)
+            : layer(name)
+            , filters_(generate_im2col_filter_matrix(
+                  generate_filters(dilation_rate, filter_shape, k, weights, bias, true)))
+            , dilation_rate_(dilation_rate)
+            , strides_(strides)
+            , padding_(p)
+        {
+            assertion(k > 0, "needs at least one filter");
+            assertion(filter_shape.volume() > 0, "filter must have volume");
+            assertion(strides.area() > 0, "invalid strides");
+        }
+
+    protected:
+        tensors apply_impl(const tensors& inputs) const override
+        {
+            const auto& input = single_tensor_from_tensors(inputs);
+            return { convolve_transposed(strides_, padding_, filters_, input) };
+        }
+        convolution_filter_matrices filters_;
+        shape2 dilation_rate_;
+        shape2 strides_;
+        padding padding_;
+    };
+
+}
+}
diff --git a/include/fdeep/layers/depthwise_conv_2d_layer.hpp b/include/fdeep/layers/depthwise_conv_2d_layer.hpp
@@ -35,7 +35,7 @@ namespace internal {
             : layer(name)
             , filters_(generate_im2col_filter_matrix(
                   generate_filters(dilation_rate, filter_shape,
-                      input_depth, depthwise_weights, bias)))
+                      input_depth, depthwise_weights, bias, false)))
             , strides_(strides)
             , padding_(p)
         {
diff --git a/include/fdeep/layers/separable_conv_2d_layer.hpp b/include/fdeep/layers/separable_conv_2d_layer.hpp
@@ -39,7 +39,7 @@ namespace internal {
                   depthwise_weights, bias_0)
             , filters_pointwise_(generate_im2col_filter_matrix(
                   generate_filters(shape2(1, 1),
-                      tensor_shape(input_depth), k, pointwise_weights, bias)))
+                      tensor_shape(input_depth), k, pointwise_weights, bias, false)))
         {
         }
 
diff --git a/include/fdeep/tensor.hpp b/include/fdeep/tensor.hpp
@@ -546,6 +546,36 @@ namespace internal {
         return out;
     }
 
+    inline tensor reverse_depth_dimension(const tensor& in)
+    {
+        tensor out = tensor(in.shape(), static_cast<float_type>(0));
+        loop_over_all_dims(in.shape(), [&in, &out](std::size_t dim5, std::size_t dim4, std::size_t y, std::size_t x, std::size_t z) {
+            out.set_ignore_rank(tensor_pos(dim5, dim4, y, x, in.shape().depth_ - z - 1),
+                in.get_ignore_rank(tensor_pos(dim5, dim4, y, x, z)));
+        });
+        return out;
+    }
+
+    inline tensor reverse_width_dimension(const tensor& in)
+    {
+        tensor out = tensor(in.shape(), static_cast<float_type>(0));
+        loop_over_all_dims(in.shape(), [&in, &out](std::size_t dim5, std::size_t dim4, std::size_t y, std::size_t x, std::size_t z) {
+            out.set_ignore_rank(tensor_pos(dim5, dim4, y, in.shape().width_ - x - 1, z),
+                in.get_ignore_rank(tensor_pos(dim5, dim4, y, x, z)));
+        });
+        return out;
+    }
+
+    inline tensor reverse_height_dimension(const tensor& in)
+    {
+        tensor out = tensor(in.shape(), static_cast<float_type>(0));
+        loop_over_all_dims(in.shape(), [&in, &out](std::size_t dim5, std::size_t dim4, std::size_t y, std::size_t x, std::size_t z) {
+            out.set_ignore_rank(tensor_pos(dim5, dim4, in.shape().height_ - y - 1, x, z),
+                in.get_ignore_rank(tensor_pos(dim5, dim4, y, x, z)));
+        });
+        return out;
+    }
+
     inline tensor transpose(const tensor& in)
     {
         return permute_tensor(in, std::vector<std::size_t>({ 2, 1 }));
@@ -577,20 +607,28 @@ namespace internal {
         return result;
     }
 
-    inline tensor dilate_tensor(const shape2& dilation_rate, const tensor& in)
+    inline tensor dilate_tensor(const shape2& dilation_rate, const tensor& in, bool trailing_zeros)
     {
         assertion(in.shape().rank() <= 3, "Invalid rank for dilation");
         if (dilation_rate == shape2(1, 1)) {
             return in;
         }
 
-        tensor result(dilate_tensor_shape(dilation_rate, in.shape()), 0);
+        const std::size_t expansion_x = trailing_zeros ? (dilation_rate.width_ - 1) : 0;
+        const std::size_t expansion_y = trailing_zeros ? (dilation_rate.height_ - 1) : 0;
+
+        auto dilated_shape = dilate_tensor_shape(dilation_rate, in.shape());
+        dilated_shape.width_ += expansion_x;
+        dilated_shape.height_ += expansion_y;
+        const std::size_t offset_x = expansion_x - expansion_x / 2;
+        const std::size_t offset_y = expansion_y - expansion_y / 2;
+        tensor result(dilated_shape, 0);
         for (std::size_t y = 0; y < in.shape().height_; ++y) {
             for (std::size_t x = 0; x < in.shape().width_; ++x) {
                 for (std::size_t z = 0; z < in.shape().depth_; ++z) {
                     result.set_ignore_rank(tensor_pos(
-                                               y * dilation_rate.height_,
-                                               x * dilation_rate.width_,
+                                               y * dilation_rate.height_ + offset_y,
+                                               x * dilation_rate.width_ + offset_x,
                                                z),
                         in.get_ignore_rank(tensor_pos(y, x, z)));
                 }
diff --git a/keras_export/convert_model.py b/keras_export/convert_model.py
diff --git a/keras_export/generate_test_models.py b/keras_export/generate_test_models.py

Original file line number	Diff line number	Diff line change
`@@ -30,7 +30,7 @@ namespace internal {`
`30`	`30`	`const float_vec& weights, const float_vec& bias)`
`31`	`31`	`: layer(name)`
`32`	`32`	`, filters_(generate_im2col_filter_matrix(`
`33`		`- generate_filters(dilation_rate, filter_shape, k, weights, bias)))`
	`33`	`+ generate_filters(dilation_rate, filter_shape, k, weights, bias, false)))`
`34`	`34`	`, strides_(strides)`
`35`	`35`	`, padding_(p)`
`36`	`36`	`{`