SemiDiscreteOT 1.0
Semi-Discrete Optimal Transport Library
Loading...
Searching...
No Matches
PointCloudHierarchy.cc
Go to the documentation of this file.
2#include <filesystem>
3#include <fstream>
4#include <iostream>
5#include <cmath>
6#include <stdexcept>
7#include <vector>
8#include <algorithm>
9#include <random>
10#include <numeric>
11#include <limits>
12#include <chrono>
13
14namespace fs = std::filesystem;
15
16namespace PointCloudHierarchy {
17
19 : min_points_(min_points)
20 , max_points_(max_points)
21 , num_levels_(0)
22{}
23
25 max_points_ = max_points;
26}
27
29 min_points_ = min_points;
30}
31
35
37 if (level < 0 || level >= static_cast<int>(level_point_counts_.size())) {
38 throw std::out_of_range("Level index out of range");
39 }
40 return level_point_counts_[level];
41}
42
43const std::vector<std::vector<size_t>>& PointCloudHierarchyManager::getParentIndices(int level) const {
44 if (level <= 0 || level >= num_levels_) {
45 throw std::out_of_range("Level index out of range for parent indices");
46 }
47 return parent_indices_[level-1];
48}
49
50const std::vector<std::vector<size_t>>& PointCloudHierarchyManager::getChildIndices(int level) const {
51 if (level < 0 || level >= num_levels_-1) {
52 throw std::out_of_range("Level index out of range for child indices");
53 }
54 return child_indices_[level];
55}
56
57void PointCloudHierarchyManager::ensureDirectoryExists(const std::string& path) const {
58 if (!fs::exists(path)) {
59 fs::create_directories(path);
60 }
61}
62
63int PointCloudHierarchyManager::getPointsForLevel(int base_points, int level) const {
64 // Level 0 always uses original point cloud size
65 if (level == 0) {
66 return base_points;
67 }
68 // Level 1 starts with max_points (if smaller than base_points)
69 if (level == 1) {
70 return std::min(base_points, max_points_);
71 }
72 // For subsequent levels, use a reduction factor of 4 based on level 1's size
73 int level1_points = std::min(base_points, max_points_);
74 int points = static_cast<int>(level1_points / std::pow(4.0, level - 1));
75 return std::max(points, min_points_);
76}
77
78template <int spacedim>
79std::tuple<std::vector<std::array<double, spacedim>>, std::vector<double>, std::vector<int>>
81 const std::vector<std::array<double, spacedim>>& points,
82 const std::vector<double>& densities,
83 size_t k) {
84
85 // const size_t n_points = points.size();
86
87 if (points.size() <= k) {
88 // If fewer points than clusters, return original points
89 std::vector<int> assignments(points.size());
90 std::iota(assignments.begin(), assignments.end(), 0); // Each point is its own cluster
91 return {points, densities, assignments};
92 }
93
94 // Set up clustering parameters
96
97
98 // Start timing
99 auto start = std::chrono::high_resolution_clock::now();
100
101 // Run parallel k-means clustering
102 std::vector<std::array<double, spacedim>> centers;
103 std::vector<uint32_t> cluster_assignments;
104 std::tie(centers, cluster_assignments) = dkm::kmeans_lloyd_parallel(points, k);
105
106 // Stop timing
107 auto stop = std::chrono::high_resolution_clock::now();
108 auto duration = std::chrono::duration_cast<std::chrono::seconds>(stop - start);
109
110 std::cout << " K-means clustering took " << duration.count() << " seconds" << std::endl;
111
112 // Convert cluster assignments to int
113 std::vector<int> assignments(cluster_assignments.begin(), cluster_assignments.end());
114
115 // Compute densities for each cluster
116 std::vector<double> cluster_densities(k, 0.0);
117 for (size_t i = 0; i < points.size(); ++i) {
118 int cluster = assignments[i];
119 double point_density = densities.empty() ? 1.0 : densities[i];
120 cluster_densities[cluster] += point_density;
121 }
122
123 return {centers, cluster_densities, assignments};
124}
125
126template <int spacedim>
128 const std::vector<Point<spacedim>>& input_points,
129 const std::vector<double>& input_densities,
130 const std::string& output_dir) {
131
132 // Validate inputs
133 if (input_points.empty()) {
134 throw std::runtime_error("Input point cloud is empty");
135 }
136
137 // Start timing the entire hierarchy generation
138 auto start_total = std::chrono::high_resolution_clock::now();
139
140 // Create uniform densities if none provided
141 std::vector<double> densities;
142 if (input_densities.empty()) {
143 densities.resize(input_points.size(), 1.0 / input_points.size());
144 } else {
145 if (input_densities.size() != input_points.size()) {
146 throw std::runtime_error("density vector size doesn't match point cloud size");
147 }
148 densities = input_densities;
149 }
150
151 // Create output directories
152 ensureDirectoryExists(output_dir);
153
154 // Calculate number of levels
155 const int total_points = input_points.size();
156 const int level1_points = std::min(total_points, max_points_);
157
158 num_levels_ = std::min(5, static_cast<int>(std::log(level1_points / min_points_) / std::log(4.0)) + 2);
159 num_levels_ = std::max(1, num_levels_); // At least one level
160
161 std::cout << "Initial point cloud has " << total_points << " points" << std::endl;
162 std::cout << "Level 1 will have maximum of " << max_points_ << " points" << std::endl;
163 std::cout << "Creating " << num_levels_ << " levels of point clouds" << std::endl;
164
165 // Reset level point counts and parent-child relationships
166 level_point_counts_.clear();
167 parent_indices_.clear();
168 child_indices_.clear();
169
170 // Resize parent and child indices containers
171 parent_indices_.resize(num_levels_ - 1);
172 child_indices_.resize(num_levels_ - 1);
173
174 // Convert input points to std::array format
175 std::vector<std::array<double, spacedim>> points_array(input_points.size());
176 for (size_t i = 0; i < input_points.size(); ++i) {
177 for (int d = 0; d < spacedim; ++d) {
178 points_array[i][d] = input_points[i][d];
179 }
180 }
181
182 // Store all point clouds for each level
183 std::vector<std::vector<std::array<double, spacedim>>> level_points_vec(num_levels_);
184 std::vector<std::vector<double>> level_densities_vec(num_levels_);
185
186 // Level 0 is always the original point cloud
187 level_points_vec[0] = points_array;
188 level_densities_vec[0] = densities;
189 level_point_counts_.push_back(input_points.size());
190
191 std::cout << "Level 0: using original point cloud with " << level_points_vec[0].size() << " points" << std::endl;
192
193 // Generate coarser levels
194 for (int level = 1; level < num_levels_; ++level) {
195 int points_for_level = getPointsForLevel(total_points, level);
196 std::cout << "Level " << level << ": targeting " << points_for_level << " points" << std::endl;
197
198 // Use k-means clustering to create coarser point cloud with parent-child tracking
199 std::vector<std::array<double, spacedim>> coarse_points;
200 std::vector<double> coarse_densities;
201 std::vector<int> assignments;
202
203 std::tie(coarse_points, coarse_densities, assignments) = kmeansClustering<spacedim>(
204 level_points_vec[level-1], level_densities_vec[level-1], points_for_level);
205
206 level_points_vec[level] = coarse_points;
207 level_densities_vec[level] = coarse_densities;
208 level_point_counts_.push_back(coarse_points.size());
209
210 std::cout << " Generated " << coarse_points.size() << " points after clustering" << std::endl;
211
212 // Build parent-child relationships
213 const size_t n_fine_points = level_points_vec[level-1].size();
214 const size_t n_coarse_points = coarse_points.size();
215
216 // Initialize parent indices for previous (finer) level
217 parent_indices_[level-1].resize(n_fine_points);
218
219 // Initialize child indices for this (coarser) level
220 child_indices_[level-1].resize(n_coarse_points);
221
222 // Populate parent-child relationships
223 for (size_t i = 0; i < n_fine_points; ++i) {
224 int coarse_point_idx = assignments[i];
225 if (coarse_point_idx >= 0 && coarse_point_idx < static_cast<int>(n_coarse_points)) {
226 // Add the coarse point as parent of this fine point
227 parent_indices_[level-1][i].push_back(coarse_point_idx);
228
229 // Add this fine point as child of the coarse point
230 child_indices_[level-1][coarse_point_idx].push_back(i);
231 }
232 }
233 }
234
235 // Save the point clouds and parent-child relationships for each level
236 for (int level = 0; level < num_levels_; ++level) {
237 std::string points_file = output_dir + "/level_" + std::to_string(level) + "_points.txt";
238 std::string densities_file = output_dir + "/level_" + std::to_string(level) + "_density.txt";
239
240 std::ofstream points_out(points_file);
241 std::ofstream densities_out(densities_file);
242
243 if (!points_out || !densities_out) {
244 throw std::runtime_error("Failed to open output files for level " + std::to_string(level));
245 }
246
247 // Write points and densities
248 for (size_t i = 0; i < level_points_vec[level].size(); ++i) {
249 for (int d = 0; d < spacedim; ++d) {
250 points_out << level_points_vec[level][i][d] << (d < spacedim - 1 ? " " : "");
251 }
252 points_out << std::endl;
253
254 densities_out << level_densities_vec[level][i] << std::endl;
255 }
256
257 // Save parent-child relationships for non-boundary levels
258 // Parents: For each point at level L, save its parent at level L+1
259 if (level < num_levels_ - 1) {
260 std::string parents_file = output_dir + "/level_" + std::to_string(level) + "_parents.txt";
261 std::ofstream parents_out(parents_file);
262
263 if (!parents_out) {
264 throw std::runtime_error("Failed to open parents file for level " + std::to_string(level));
265 }
266
267 // Write parent indices for points at current level
268 for (size_t i = 0; i < parent_indices_[level].size(); ++i) {
269 parents_out << parent_indices_[level][i].size();
270 for (const auto& parent_idx : parent_indices_[level][i]) {
271 parents_out << " " << parent_idx;
272 }
273 parents_out << std::endl;
274 }
275 }
276
277 // Children: For each point at level L, save its children at level L-1
278 if (level > 0) {
279 std::string children_file = output_dir + "/level_" + std::to_string(level) + "_children.txt";
280 std::ofstream children_out(children_file);
281
282 if (!children_out) {
283 throw std::runtime_error("Failed to open children file for level " + std::to_string(level));
284 }
285
286 // Write child indices for points at current level
287 for (size_t i = 0; i < child_indices_[level-1].size(); ++i) {
288 children_out << child_indices_[level-1][i].size();
289 for (const auto& child_idx : child_indices_[level-1][i]) {
290 children_out << " " << child_idx;
291 }
292 children_out << std::endl;
293 }
294 }
295
296 std::cout << "Saved level " << level << " point cloud and relationships to " << output_dir << std::endl;
297 }
298
299 // Stop timing the entire hierarchy generation
300 auto stop_total = std::chrono::high_resolution_clock::now();
301 auto duration_total = std::chrono::duration_cast<std::chrono::seconds>(stop_total - start_total);
302
303 std::cout << "Total hierarchy generation took " << duration_total.count() << " seconds" << std::endl;
304
305 return num_levels_;
306}
307
308// Explicit template instantiations for dimensions 2 and 3
309template int PointCloudHierarchyManager::generateHierarchy<2>(
310 const std::vector<Point<2>>&, const std::vector<double>&, const std::string&);
311template int PointCloudHierarchyManager::generateHierarchy<3>(
312 const std::vector<Point<3>>&, const std::vector<double>&, const std::string&);
313
314// Explicit template instantiations
315template std::tuple<std::vector<std::array<double, 2>>, std::vector<double>, std::vector<int>>
316PointCloudHierarchyManager::kmeansClustering<2>(
317 const std::vector<std::array<double, 2>>&, const std::vector<double>&, size_t);
318template std::tuple<std::vector<std::array<double, 3>>, std::vector<double>, std::vector<int>>
319PointCloudHierarchyManager::kmeansClustering<3>(
320 const std::vector<std::array<double, 3>>&, const std::vector<double>&, size_t);
321
322} // namespace PointCloudHierarchy
std::vector< std::vector< std::vector< size_t > > > parent_indices_
int getPointCount(int level) const
Get the number of points at a specific level.
const std::vector< std::vector< size_t > > & getParentIndices(int level) const
Get the parent indices for points at level L-1.
int getNumLevels() const
Get the number of levels in the last generated hierarchy.
void setMaxPoints(int max_points)
Set the maximum number of points for level 1.
std::tuple< std::vector< std::array< double, spacedim > >, std::vector< double >, std::vector< int > > kmeansClustering(const std::vector< std::array< double, spacedim > > &points, const std::vector< double > &densities, size_t k)
Performs parallel k-means clustering on a set of points with parent-child tracking.
int getPointsForLevel(int base_points, int level) const
Calculate number of points for a given level.
int generateHierarchy(const std::vector< Point< spacedim > > &input_points, const std::vector< double > &input_density, const std::string &output_dir)
Generate hierarchy of point clouds from input points.
const std::vector< std::vector< size_t > > & getChildIndices(int level) const
Get the child indices for points at level L.
void ensureDirectoryExists(const std::string &path) const
Ensure directory exists, create if it doesn't.
void setMinPoints(int min_points)
Set the minimum number of points for coarsest level.
PointCloudHierarchyManager(int min_points=100, int max_points=1000)
Constructor.
std::vector< std::vector< std::vector< size_t > > > child_indices_
std::tuple< std::vector< std::array< T, N > >, std::vector< uint32_t > > kmeans_lloyd_parallel(const std::vector< std::array< T, N > > &data, const clustering_parameters< T > &parameters)