dejavu
Fast probabilistic symmetry detection.
Loading...
Searching...
No Matches
graph.h
Go to the documentation of this file.
1// Copyright 2024 Markus Anders
2// This file is part of dejavu 2.0.
3// See LICENSE for extended copyright information.
4
5#ifndef SASSY_GRAPH_BUILDER_H
6#define SASSY_GRAPH_BUILDER_H
7
8#include<unordered_map>
9#include <fstream>
10#include "utility.h"
11#include "coloring.h"
12
13namespace dejavu {
19 class sgraph {
20 struct vertexComparator {
21 explicit vertexComparator(const sgraph &g) : graph(g) {}
22
23 const sgraph &graph;
24
25 bool operator()(const int &v1, const int &v2) const {
26 return graph.d[v1] < graph.d[v2];
27 }
28 };
29
30 struct vertexComparatorColor {
31 vertexComparatorColor(const sgraph &g, const int *vtocol) : graph(g), vertex_to_col(vtocol) {}
32
33 const sgraph &graph;
34 const int *vertex_to_col;
35
36 bool operator()(const int &v1, const int &v2) const {
37 //return (g.d[v1] < g.d[v2]) || ((g.d[v1] == g.d[v2]) && (vertex_to_col[v1] < vertex_to_col[v2]));
38 return (vertex_to_col[v1] < vertex_to_col[v2]);
39 }
40 };
41
42 public:
43 bool initialized = false;
44 int *v = nullptr;
45 int *d = nullptr;
46 int *e = nullptr;
47
48 int v_size = 0;
49 int e_size = 0;
50
51 bool dense = false;
52
53 void initialize(int nv, int ne) {
54 initialized = true;
55 v = new int[nv];
56 d = new int[nv];
57 e = new int[ne];
58 }
59
60 // initialize a coloring of this sgraph, partitioning degrees of vertices
61 void initialize_coloring(ds::coloring *c, int *vertex_to_col) {
62 c->initialize(this->v_size);
63 std::memset(c->ptn, 1, sizeof(int) * v_size);
64
65 if (this->v_size < c->domain_size) {
66 c->domain_size = this->v_size;
67 }
68
69 if (v_size == 0)
70 return;
71
72 int cells = 0;
73 int last_new_cell = 0;
74
75 if (vertex_to_col == nullptr) {
76 for (int i = 0; i < v_size; i++) {
77 c->lab[i] = i;
78 }
79 std::sort(c->lab, c->lab + c->domain_size, vertexComparator(*this));
80 for (int i = 0; i < c->domain_size; i++) {
81 c->vertex_to_col[c->lab[i]] = last_new_cell;
82 c->vertex_to_lab[c->lab[i]] = i;
83 if (i + 1 == c->domain_size) {
84 cells += 1;
85 c->ptn[last_new_cell] = i - last_new_cell;
86 c->ptn[i] = 0;
87 break;
88 }
89 dej_assert(this->d[c->lab[i]] <= this->d[c->lab[i + 1]]);
90 if (this->d[c->lab[i]] != this->d[c->lab[i + 1]]) {
91 c->ptn[i] = 0;
92 cells += 1;
93 c->ptn[last_new_cell] = i - last_new_cell;
94 last_new_cell = i + 1;
95 continue;
96 }
97 }
98 } else {
99 int col = 0;
100
101 int min_col = INT32_MAX;
102 int max_col = INT32_MIN;
103 for (int i = 0; i < v_size; i++) {
104 if (vertex_to_col[i] < min_col)
105 min_col = vertex_to_col[i];
106 if (vertex_to_col[i] > max_col)
107 max_col = vertex_to_col[i];
108 }
109
110 std::vector<int> colsize;
111 colsize.reserve(std::min(this->v_size, (max_col - min_col) + 1));
112
113 if (min_col < 0 || max_col > 4 * this->v_size) {
114 std::unordered_map<int, int> colors; // TODO: should not use unordered_map!
115 colors.reserve(this->v_size);
116 for (int i = 0; i < v_size; i++) {
117 auto it = colors.find(vertex_to_col[i]);
118 if (it == colors.end()) {
119 colors.insert(std::pair<int, int>(vertex_to_col[i], col));
120 colsize.push_back(1);
121 dej_assert(col < this->v_size);
122 vertex_to_col[i] = col;
123 ++col;
124 } else {
125 const int found_col = it->second;
126 dej_assert(found_col < this->v_size);
127 vertex_to_col[i] = found_col;
128 ++colsize[found_col];
129 }
130 }
131 } else {
132 std::vector<int> colors;
133 colors.reserve(max_col + 1);
134 for (int i = 0; i < max_col + 1; ++i)
135 colors.push_back(-1);
136 for (int i = 0; i < v_size; i++) {
137 if (colors[vertex_to_col[i]] == -1) {
138 colors[vertex_to_col[i]] = col;
139 colsize.push_back(1);
140 dej_assert(col < this->v_size);
141 vertex_to_col[i] = col;
142 ++col;
143 } else {
144 const int found_col = colors[vertex_to_col[i]];
145 dej_assert(found_col < this->v_size);
146 vertex_to_col[i] = found_col;
147 ++colsize[found_col];
148 }
149 }
150 }
151
152 int increment = 0;
153 for (int &i: colsize) {
154 const int col_sz = i;
155 i += increment;
156 increment += col_sz;
157 }
158 dej_assert(increment == v_size);
159
160 for (int i = 0; i < v_size; i++) {
161 const int v_col = vertex_to_col[i];
162 --colsize[v_col];
163 const int v_lab_pos = colsize[v_col];
164 c->lab[v_lab_pos] = i;
165 }
166
167 /*for(int i = 0; i < v_size; i++) {
168 c->lab[i] = i;
169 }
170 std::sort(c->lab, c->lab + c->lab_sz, vertexComparatorColor(*this, vertex_to_col));*/
171 for (int i = 0; i < c->domain_size; i++) {
172 c->vertex_to_col[c->lab[i]] = last_new_cell;
173 c->vertex_to_lab[c->lab[i]] = i;
174 if (i + 1 == c->domain_size) {
175 cells += 1;
176 c->ptn[last_new_cell] = i - last_new_cell;
177 c->ptn[i] = 0;
178 break;
179 }
180 //assert(this->d[c->lab[i]] <= this->d[c->lab[i + 1]]);
181 //if(this->d[c->lab[i]] < this->d[c->lab[i + 1]] || (this->d[c->lab[i]] == this->d[c->lab[i + 1]]
182 //&& (vertex_to_col[c->lab[i]] < vertex_to_col[c->lab[i + 1]]))) {
183 if (vertex_to_col[c->lab[i]] != vertex_to_col[c->lab[i + 1]]) {
184 c->ptn[i] = 0;
185 cells += 1;
186 c->ptn[last_new_cell] = i - last_new_cell;
187 last_new_cell = i + 1;
188 continue;
189 }
190 }
191 }
192
193 c->cells = cells;
194 }
195
197#if defined(DEJDEBUG) && !defined(NDEBUG)
198 for(int i = 0; i < v_size; ++i) {
199 dej_assert(d[i]>0?v[i] < e_size:true);
200 dej_assert(d[i]>0?v[i] >= 0:true);
201 dej_assert(d[i] >= 0);
202 dej_assert(d[i] < v_size);
203 }
204 for(int i = 0; i < e_size; ++i) {
205 dej_assert(e[i] < v_size);
206 dej_assert(e[i] >= 0);
207 }
208
209 // multiedge test
210 dejavu::ds::markset multiedge_test;
211 multiedge_test.initialize(v_size);
212 for(int i = 0; i < v_size; ++i) {
213 multiedge_test.reset();
214 for(int j = 0; j < d[i]; ++j) {
215 const int neigh = e[v[i] + j];
216 dej_assert(!multiedge_test.get(neigh));
217 multiedge_test.set(neigh);
218 }
219 }
220
221 // fwd - bwd test
222 multiedge_test.initialize(v_size);
223 for(int i = 0; i < v_size; ++i) {
224 multiedge_test.reset();
225 for(int j = 0; j < d[i]; ++j) {
226 const int neigh = e[v[i] + j];
227 dej_assert(neigh >= 0);
228 dej_assert(neigh < v_size);
229 bool found = false;
230 for(int k = 0; k < d[neigh]; ++k) {
231 const int neigh_neigh = e[v[neigh] + k];
232 if(neigh_neigh == i) {
233 found = true;
234 break;
235 }
236 }
237 dej_assert(found);
238 }
239 }
240#endif
241 }
242
244 if (initialized) {
245 delete[] v;
246 delete[] d;
247 delete[] e;
248 }
249 initialize(g->v_size, g->e_size);
250
251 memcpy(v, g->v, g->v_size * sizeof(int));
252 memcpy(d, g->d, g->v_size * sizeof(int));
253 memcpy(e, g->e, g->e_size * sizeof(int));
254 v_size = g->v_size;
255 e_size = g->e_size;
256 }
257
258 void sort_edgelist() const {
259 for (int i = 0; i < v_size; ++i) {
260 const int estart = v[i];
261 const int eend = estart + d[i];
262 std::sort(e + estart, e + eend);
263 }
264 }
265
267 if (initialized) {
268 delete[] v;
269 delete[] d;
270 delete[] e;
271 }
272 }
273 };
274
293 private:
294 sgraph g;
295 int* c = nullptr;
296 int* edge_cnt = nullptr;
297 unsigned int num_vertices_defined = 0;
298 unsigned int num_edges_defined = 0;
299 unsigned int num_deg_edges_defined = 0;
300 bool initialized;
301 bool finalized = false;
302
303 private:
304 void finalize() {
305 if(!finalized) {
306 if (!initialized)
307 throw std::logic_error("uninitialized graph");
308 if (num_vertices_defined != (unsigned int) g.v_size)
309 throw std::logic_error("did not add the number of vertices requested by constructor");
310 if (num_edges_defined != (unsigned int) g.e_size) {
311 std::cout << num_edges_defined << " vs. " << g.e_size << std::endl;
312 throw std::logic_error("did not add the number of edges requested by constructor");
313 }
314 sanity_check();
315 finalized = true;
316 }
317 }
318 public:
319 static_graph(const int nv, const int ne) {
320 if(nv <= 0) throw std::out_of_range("number of vertices must be positive");
321 if(ne <= 0) throw std::out_of_range("number of edges must be positive");
322 g.initialize(nv, 2*ne);
323 g.v_size = nv;
324 g.e_size = 2*ne;
325 c = new int[nv];
326 edge_cnt = new int[nv];
327 for(int i = 0; i < nv; ++i) edge_cnt[i] = 0;
328 initialized = true;
329 };
330
332 initialized = false;
333 }
334
336 if(initialized && c != nullptr)
337 delete[] c;
338 if(initialized && edge_cnt != nullptr)
339 delete[] edge_cnt;
340 }
341
342 void initialize_graph(const unsigned int nv, const unsigned int ne) {
343 if(initialized || finalized)
344 throw std::logic_error("can not initialize a graph that is already initialized");
345 initialized = true;
346 g.initialize((int) nv, (int) (2*ne));
347 g.v_size = (int) nv;
348 g.e_size = (int) (2*ne);
349 c = new int[nv];
350 edge_cnt = new int[nv];
351 for(unsigned int i = 0; i < nv; ++i)
352 edge_cnt[i] = 0;
353 };
354
355 unsigned int add_vertex(const int color, const int deg) {
356 if(!initialized)
357 throw std::logic_error("uninitialized graph");
358 if(finalized)
359 throw std::logic_error("can not change finalized graph");
360 const unsigned int vertex = num_vertices_defined;
361 ++num_vertices_defined;
362 if(num_vertices_defined > (unsigned int) g.v_size)
363 throw std::out_of_range("vertices out-of-range, define more vertices initially");
364 c[vertex] = color;
365 g.d[vertex] = deg;
366 g.v[vertex] = (int) num_deg_edges_defined;
367 num_deg_edges_defined += deg;
368 return vertex;
369 };
370
371 void add_edge(const unsigned int v1, const unsigned int v2) {
372 if(!initialized)
373 throw std::logic_error("uninitialized graph");
374 if(finalized)
375 throw std::logic_error("can not change finalized graph");
376 if(v1 > v2 || v1 == v2)
377 throw std::invalid_argument("invalid edge: v1 < v2 must hold");
378 if(v1 >= num_vertices_defined)
379 throw std::out_of_range("v1 is not a defined vertex, use add_vertex to add vertices");
380 if(v2 >= num_vertices_defined)
381 throw std::out_of_range("v2 is not a defined vertex, use add_vertex to add vertices");
382 if(static_cast<int>(num_edges_defined + 2) > g.e_size)
383 throw std::out_of_range("too many edges");
384 if(v1 > INT32_MAX)
385 throw std::out_of_range("v1 too large, must be < INT32_MAX");
386 if(v2 > INT32_MAX)
387 throw std::out_of_range("v2 too large, must be < INT32_MAX");
388 ++edge_cnt[v1];
389 const int edg_cnt1 = edge_cnt[v1];
390 if(edg_cnt1 > g.d[v1])
391 throw std::out_of_range("too many edges incident to v1");
392 g.e[g.v[v1] + edg_cnt1 - 1] = static_cast<int>(v2);
393
394 ++edge_cnt[v2];
395 const int edg_cnt2 = edge_cnt[v2];
396 if(edg_cnt2 > g.d[v2])
397 throw std::out_of_range("too many edges incident to v2");
398 g.e[g.v[v2] + edg_cnt2 - 1] = static_cast<int>(v1);
399
400 num_edges_defined += 2;
401 };
402
404 g.sanity_check();
405 }
406
407 void dump_dimacs(const std::string& filename) {
408 finalize();
409 std::ofstream dumpfile;
410 dumpfile.open (filename, std::ios::out);
411
412 dumpfile << "p edge " << g.v_size << " " << g.e_size/2 << std::endl;
413
414 for(int i = 0; i < g.v_size; ++i) {
415 dumpfile << "n " << i+1 << " " << c[i] << std::endl;
416 }
417
418 for(int i = 0; i < g.v_size; ++i) {
419 for(int j = g.v[i]; j < g.v[i]+g.d[i]; ++j) {
420 const int neighbour = g.e[j];
421 if(neighbour < i) {
422 dumpfile << "e " << neighbour+1 << " " << i+1 << std::endl;
423 }
424 }
425 }
426 }
427
429 finalize();
430 return &g;
431 };
432
434 finalize();
435 return c;
436 };
437 };
438}
439
440static inline void parse_dimacs(const std::string& filename, dejavu::sgraph* g, int** colmap, bool silent=true,
441 int seed_permute=0) {
442 std::chrono::high_resolution_clock::time_point timer = std::chrono::high_resolution_clock::now();
443 std::ifstream infile(filename);
444
445 std::vector<int> reshuffle;
446
447 std::vector<std::vector<int>> incidence_list;
448 std::set<int> degrees;
449 std::set<int> colors;
450 std::string line;
451 std::string nv_str, ne_str;
452 std::string nv1_string, nv2_string;
453 int nv1, nv2;
454 size_t i;
455 int nv = 0;
456 int ne = 0;
457 while (std::getline(infile, line)) {
458 char m = line[0];
459 int average_d;
460 switch (m) {
461 case 'p':
462 for(i = 7; i < line.size() && line[i] != ' '; ++i) {
463 nv_str += line[i];
464 }
465
466 ++i;
467 for(; i < line.size() && line[i] != ' '; ++i) {
468 ne_str += line[i];
469 }
470 nv = std::stoi(nv_str);
471 ne = std::stoi(ne_str);
472 average_d = (ne / nv) + 3;
473 g->initialize(nv, ne * 2);
474
475 reshuffle.reserve(nv);
476 for(int j = 0; j < nv; ++j) reshuffle.push_back(j + 1);
477 if(seed_permute != 0) {
478 std::mt19937 eng(seed_permute);
479 std::shuffle(std::begin(reshuffle), std::end(reshuffle), eng);
480 }
481
482 incidence_list.reserve(nv);
483 for(int j = 0; j < nv; ++j) {
484 incidence_list.emplace_back();
485 incidence_list[incidence_list.size() - 1].reserve(average_d);
486 }
487 break;
488 case 'e':
489 nv1_string = "";
490 nv2_string = "";
491 for(i = 2; i < line.size() && line[i] != ' '; ++i) {
492 nv1_string += line[i];
493 }
494 ++i;
495 for(; i < line.size() && line[i] != ' '; ++i) {
496 nv2_string += line[i];
497 }
498
499 nv1 = reshuffle[std::stoi(nv1_string)-1];
500 nv2 = reshuffle[std::stoi(nv2_string)-1];
501
502 incidence_list[nv1 - 1].push_back(nv2 - 1);
503 incidence_list[nv2 - 1].push_back(nv1 - 1);
504 break;
505 case 'n':
506 if(*colmap == nullptr)
507 *colmap = (int *) calloc(nv, sizeof(int));
508 nv1_string = "";
509 nv2_string = "";
510 for(i = 2; i < line.size() && line[i] != ' '; ++i) {
511 nv1_string += line[i];
512 }
513 ++i;
514 for(; i < line.size() && line[i] != ' '; ++i) {
515 nv2_string += line[i];
516 }
517
518 nv1 = reshuffle[std::stoi(nv1_string)-1];
519 nv2 = std::stoi(nv2_string);
520 (*colmap)[nv1 - 1] = nv2;
521 break;
522 default:
523 break;
524 }
525 }
526
527 int epos = 0;
528 int vpos = 0;
529
530 int maxd = 0;
531
532 for(auto & incident : incidence_list) {
533 g->v[vpos] = epos;
534 g->d[vpos] = (int) incident.size();
535 //degrees.insert(g->d[vpos]);
536 if(g->d[vpos] > maxd)
537 maxd = g->d[vpos];
538 vpos += 1;
539 for(int j : incident) {
540 g->e[epos] = j;
541 epos += 1;
542 }
543 }
544
545 g->v_size = nv;
546 g->e_size = 2 * ne;
547
548 dej_assert(nv == g->v_size);
549 dej_assert(2 * ne == g->e_size);
550 const double parse_time = (double) (std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now() - timer).count());
551 if(!silent) std::cout << std::setprecision(2) << "parse_time=" << parse_time / 1000000.0 << "ms";
552}
553
554#endif //SASSY_GRAPH_BUILDER_H
Vertex coloring for a graph.
Definition: coloring.h:23
void initialize(int new_domain_size)
Definition: coloring.h:149
Set specialized for quick resets.
Definition: ds.h:546
bool get(int pos)
Definition: ds.h:605
void reset()
Definition: ds.h:634
void initialize(int size)
Definition: ds.h:588
void set(int pos)
Definition: ds.h:616
Internal graph data structure.
Definition: graph.h:19
void sanity_check()
Definition: graph.h:196
void initialize(int nv, int ne)
Definition: graph.h:53
void sort_edgelist() const
Definition: graph.h:258
int * e
Definition: graph.h:46
bool initialized
Definition: graph.h:43
void copy_graph(sgraph *g)
Definition: graph.h:243
int e_size
Definition: graph.h:49
void initialize_coloring(ds::coloring *c, int *vertex_to_col)
Definition: graph.h:61
int v_size
Definition: graph.h:48
int * v
Definition: graph.h:44
bool dense
Definition: graph.h:51
int * d
Definition: graph.h:45
Graph with static number of vertices and edges.
Definition: graph.h:292
void add_edge(const unsigned int v1, const unsigned int v2)
Definition: graph.h:371
unsigned int add_vertex(const int color, const int deg)
Definition: graph.h:355
void initialize_graph(const unsigned int nv, const unsigned int ne)
Definition: graph.h:342
void dump_dimacs(const std::string &filename)
Definition: graph.h:407
void sanity_check()
Definition: graph.h:403
int * get_coloring()
Definition: graph.h:433
static_graph(const int nv, const int ne)
Definition: graph.h:319
dejavu::sgraph * get_sgraph()
Definition: graph.h:428
Definition: bfs.h:10
#define dej_assert(expr)
Definition: utility.h:40