File size: 6,467 Bytes
34d1f8b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#pragma once
#include <torch/extension.h>

typedef enum { SUM = 0, MEAN = 1, MAX = 2 } reduce_t;

namespace voxelization {

int hard_voxelize_cpu(const at::Tensor &points, at::Tensor &voxels,
                      at::Tensor &coors, at::Tensor &num_points_per_voxel,
                      const std::vector<float> voxel_size,
                      const std::vector<float> coors_range,
                      const int max_points, const int max_voxels,
                      const int NDim = 3);

void dynamic_voxelize_cpu(const at::Tensor &points, at::Tensor &coors,
                          const std::vector<float> voxel_size,
                          const std::vector<float> coors_range,
                          const int NDim = 3);

std::vector<at::Tensor> dynamic_point_to_voxel_cpu(
    const at::Tensor &points, const at::Tensor &voxel_mapping,
    const std::vector<float> voxel_size, const std::vector<float> coors_range);

#ifdef WITH_CUDA
int hard_voxelize_gpu(const at::Tensor &points, at::Tensor &voxels,
                      at::Tensor &coors, at::Tensor &num_points_per_voxel,
                      const std::vector<float> voxel_size,
                      const std::vector<float> coors_range,
                      const int max_points, const int max_voxels,
                      const int NDim = 3);

int nondisterministic_hard_voxelize_gpu(const at::Tensor &points, at::Tensor &voxels,
                                        at::Tensor &coors, at::Tensor &num_points_per_voxel,
                                        const std::vector<float> voxel_size,
                                        const std::vector<float> coors_range,
                                        const int max_points, const int max_voxels,
                                        const int NDim = 3);

void dynamic_voxelize_gpu(const at::Tensor &points, at::Tensor &coors,
                          const std::vector<float> voxel_size,
                          const std::vector<float> coors_range,
                          const int NDim = 3);

std::vector<torch::Tensor> dynamic_point_to_voxel_forward_gpu(const torch::Tensor &feats,
                                                              const torch::Tensor &coors,
                                                              const reduce_t reduce_type);

void dynamic_point_to_voxel_backward_gpu(torch::Tensor &grad_feats,
                                         const torch::Tensor &grad_reduced_feats,
                                         const torch::Tensor &feats,
                                         const torch::Tensor &reduced_feats,
                                         const torch::Tensor &coors_idx,
                                         const torch::Tensor &reduce_count,
                                         const reduce_t reduce_type);
#endif

// Interface for Python
inline int hard_voxelize(const at::Tensor &points, at::Tensor &voxels,
                         at::Tensor &coors, at::Tensor &num_points_per_voxel,
                         const std::vector<float> voxel_size,
                         const std::vector<float> coors_range,
                         const int max_points, const int max_voxels,
                         const int NDim = 3, const bool deterministic = true) {
  if (points.device().is_cuda()) {
#ifdef WITH_CUDA
    if (deterministic) {
      return hard_voxelize_gpu(points, voxels, coors, num_points_per_voxel,
                               voxel_size, coors_range, max_points, max_voxels,
                               NDim);
    }
    return nondisterministic_hard_voxelize_gpu(points, voxels, coors, num_points_per_voxel,
                                               voxel_size, coors_range, max_points, max_voxels,
                                               NDim);
#else
    AT_ERROR("Not compiled with GPU support");
#endif
  }
  return hard_voxelize_cpu(points, voxels, coors, num_points_per_voxel,
                           voxel_size, coors_range, max_points, max_voxels,
                           NDim);
}

inline void dynamic_voxelize(const at::Tensor &points, at::Tensor &coors,
                             const std::vector<float> voxel_size,
                             const std::vector<float> coors_range,
                             const int NDim = 3) {
  if (points.device().is_cuda()) {
#ifdef WITH_CUDA
    return dynamic_voxelize_gpu(points, coors, voxel_size, coors_range, NDim);
#else
    AT_ERROR("Not compiled with GPU support");
#endif
  }
  return dynamic_voxelize_cpu(points, coors, voxel_size, coors_range, NDim);
}

inline reduce_t convert_reduce_type(const std::string &reduce_type) {
  if (reduce_type == "max")
    return reduce_t::MAX;
  else if (reduce_type == "sum")
    return reduce_t::SUM;
  else if (reduce_type == "mean")
    return reduce_t::MEAN;
  else TORCH_CHECK(false, "do not support reduce type " + reduce_type)
  return reduce_t::SUM;
}

inline std::vector<torch::Tensor> dynamic_point_to_voxel_forward(const torch::Tensor &feats,
                                                                 const torch::Tensor &coors,
                                                                 const std::string &reduce_type) {
  if (feats.device().is_cuda()) {
#ifdef WITH_CUDA
    return dynamic_point_to_voxel_forward_gpu(feats, coors, convert_reduce_type(reduce_type));
#else
    TORCH_CHECK(false, "Not compiled with GPU support");
#endif
  }
  TORCH_CHECK(false, "do not support cpu yet");
  return std::vector<torch::Tensor>();
}

inline void dynamic_point_to_voxel_backward(torch::Tensor &grad_feats,
                                            const torch::Tensor &grad_reduced_feats,
                                            const torch::Tensor &feats,
                                            const torch::Tensor &reduced_feats,
                                            const torch::Tensor &coors_idx,
                                            const torch::Tensor &reduce_count,
                                            const std::string &reduce_type) {
  if (grad_feats.device().is_cuda()) {
#ifdef WITH_CUDA
    dynamic_point_to_voxel_backward_gpu(
        grad_feats, grad_reduced_feats, feats, reduced_feats, coors_idx, reduce_count,
        convert_reduce_type(reduce_type));
    return;
#else
    TORCH_CHECK(false, "Not compiled with GPU support");
#endif
  }
  TORCH_CHECK(false, "do not support cpu yet");
}

}  // namespace voxelization