Feat: implemented trie to store and check mutation history

This commit is contained in:
Richard Wong 2024-03-14 11:18:29 +09:00
parent aa3c3e3b20
commit 078007cc6d
Signed by: richard
GPG Key ID: 5BD36BA2E9EE33D0
7 changed files with 120 additions and 65 deletions

View File

@ -1 +1 @@
This branch tests the idea of using fuzzing to choose the direction of mutation This branch tests the idea of storing all mutation traversals with a trie structure

View File

@ -69,7 +69,7 @@ public:
int &traversal_index, int &traversal_index,
const unordered_map<int, int> &node_id_to_pos, const unordered_map<int, int> &node_id_to_pos,
int mutate_point, int mutate_point,
mt19937 rng); mt19937& rng);
// custom comparator function to sort nodes according to order in given vector // custom comparator function to sort nodes according to order in given vector
bool sortByOrder(const unordered_map<int, int>& node_id_to_pos, node* a, node* b); bool sortByOrder(const unordered_map<int, int>& node_id_to_pos, node* a, node* b);
@ -212,4 +212,19 @@ private:
vector<node*> _new_node_list; //Newly added nodes. vector<node*> _new_node_list; //Newly added nodes.
}; };
class trie {
public:
trie(int node_id);
~trie() {};
// all functions here
void add_new_post_order(vector<int> post_order, int current_position);
bool check_post_order(vector<int> post_order, int current_position);
private:
int _node_id;
vector<trie*> _next_node_list;
};
#endif // for MPS_H #endif // for MPS_H

View File

@ -24,8 +24,8 @@ int compute_removed_edge_size(const ogdf::Graph &G, vector<int> post_order);
// but their signatures are not in mps.h, hence they are declared here // but their signatures are not in mps.h, hence they are declared here
ogdf::Graph read_from_gml(string input_file); ogdf::Graph read_from_gml(string input_file);
vector<int> generate_post_order(const ogdf::Graph &G); vector<int> generate_post_order(const ogdf::Graph &G);
vector<int> generate_mutated_post_order(const ogdf::Graph &G, vector<int> post_order, int mutate_point); vector<int> generate_mutated_post_order(const ogdf::Graph &G, const vector<int> &post_order, int mutate_start_point);
vector<int> generate_guided_post_order(const ogdf::Graph &G, vector<int> post_order); vector<int> generate_guided_post_order(const ogdf::Graph &G, const vector<int> &post_order);
void compute_mps(const ogdf::Graph &G, int mutate_point, vector<int> &post_order, int &return_edge_size); void compute_mps(const ogdf::Graph &G, int mutate_point, vector<int> &post_order, int &return_edge_size);
void vector_printer(const vector<int>& state) { void vector_printer(const vector<int>& state) {
@ -36,7 +36,7 @@ void vector_printer(const vector<int>& state) {
} }
vector<int> repeated_mutation(const ogdf::Graph &G, int k_max) { vector<int> repeated_mutation(const ogdf::Graph &G, int k_max, trie* trie_store) {
// generate first post order // generate first post order
// std::cout << "generate first post order" << std::endl; // std::cout << "generate first post order" << std::endl;
vector<int> old_order = generate_post_order(G); vector<int> old_order = generate_post_order(G);
@ -45,27 +45,23 @@ vector<int> repeated_mutation(const ogdf::Graph &G, int k_max) {
int new_removed_size; int new_removed_size;
int old_removed_size = INT_MAX; int old_removed_size = INT_MAX;
// prepare random selection
std::random_device rd;
std::mt19937 gen{rd()}; // seed the generator
int first_value = 0;
// we want the index of the third last value
// at a given traversal index, only the next iteration has the mutated value
int last_value = (old_order.size() - 1) - 2;
std::uniform_int_distribution<> dist{first_value, last_value}; // set min and max
for (int k = 0; k < k_max; ++k) { for (int k = 0; k < k_max; ++k) {
// function compute new post_order and new_removed_size
// temp_order and new_removed_size will be updated with new values
#ifdef TIME // compute_mps(G, dist(gen), temp_order, new_removed_size);
auto start = std::chrono::high_resolution_clock::now(); // we set mutate_start_point to 0 to mutate everything every round
#endif // temp_order = generate_mutated_post_order(G, temp_order, 0);
compute_mps(G, dist(gen), temp_order, new_removed_size); temp_order = generate_guided_post_order(G, temp_order);
#ifdef TIME temp_order = generate_guided_post_order(G, temp_order);
auto end = std::chrono::high_resolution_clock::now(); // test if something is in the trie_store
std::cout << "compute_mps: " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() << std::endl; // returns false when no order is found in trie_store
#endif if (!(trie_store->check_post_order(temp_order, 0))) {
trie_store->add_new_post_order(temp_order, 0);
} else {
std::cout << "repeated mutation found" << std::endl;
}
new_removed_size = compute_removed_edge_size(G, temp_order);
// if there is an improvement // if there is an improvement
// 1. update the removed size to use the new smaller size // 1. update the removed size to use the new smaller size
@ -92,9 +88,10 @@ int main(int argc, char* argv[]) {
int k_max = std::stoi(argv[2]); int k_max = std::stoi(argv[2]);
const ogdf::Graph G = read_from_gml(input_file); const ogdf::Graph G = read_from_gml(input_file);
trie* trie_store = new trie(-1);
// generate order here // generate order here
vector<int> post_order = repeated_mutation(G, k_max); vector<int> post_order = repeated_mutation(G, k_max, trie_store);
// test timing of function // test timing of function
// test_correctness(G); // test_correctness(G);

View File

@ -102,7 +102,7 @@ maximal_planar_subgraph_finder::guidedPostOrderTraversal(const vector<int> &post
// take in a post-order argument then traces the graph in the same order // take in a post-order argument then traces the graph in the same order
// return is by reference via _post_order_list // return is by reference via _post_order_list
void void
maximal_planar_subgraph_finder::mutatedPostOrderTraversal(const vector<int> &post_order, int mutate_point) { maximal_planar_subgraph_finder::mutatedPostOrderTraversal(const vector<int> &post_order, int mutate_start_point) {
node::init_mark(); node::init_mark();
// // implementation 1: use vector // // implementation 1: use vector
@ -130,7 +130,7 @@ maximal_planar_subgraph_finder::mutatedPostOrderTraversal(const vector<int> &pos
int start = 0; int start = 0;
// if we mutate first node, we will select a random starting node // if we mutate first node, we will select a random starting node
if (mutate_point == 0) { if (mutate_start_point == 0) {
int first_value = 0; int first_value = 0;
int last_value = post_order.size() - 1; int last_value = post_order.size() - 1;
std::uniform_int_distribution<> dist{first_value, last_value}; std::uniform_int_distribution<> dist{first_value, last_value};
@ -156,13 +156,13 @@ maximal_planar_subgraph_finder::mutatedPostOrderTraversal(const vector<int> &pos
{ {
if (!_node_list[i]->is_marked()) if (!_node_list[i]->is_marked())
{ {
_node_list[i]->mutated_DFS_visit(_post_order_list, _node_list, postOrderID, traversal_index, node_id_to_pos, mutate_point, rng); _node_list[i]->mutated_DFS_visit(_post_order_list, _node_list, postOrderID, traversal_index, node_id_to_pos, mutate_start_point, rng);
} }
break; break;
} }
if (!_node_list[i]->is_marked()) if (!_node_list[i]->is_marked())
{ {
_node_list[i]->mutated_DFS_visit(_post_order_list, _node_list, postOrderID, traversal_index, node_id_to_pos, mutate_point, rng); _node_list[i]->mutated_DFS_visit(_post_order_list, _node_list, postOrderID, traversal_index, node_id_to_pos, mutate_start_point, rng);
} }
i = (i + 1) % end_condition; i = (i + 1) % end_condition;
} }

View File

@ -44,12 +44,12 @@ vector<int> generate_post_order(const ogdf::Graph &G) {
return m.generate_post_order(G); return m.generate_post_order(G);
} }
vector<int> generate_mutated_post_order(const ogdf::Graph &G, vector<int> post_order, int mutate_point) { vector<int> generate_mutated_post_order(const ogdf::Graph &G, const vector<int> &post_order, int mutate_start_point) {
maximal_planar_subgraph_finder m; maximal_planar_subgraph_finder m;
return m.generate_mutated_post_order(G, post_order, mutate_point); return m.generate_mutated_post_order(G, post_order, mutate_start_point);
} }
vector<int> generate_guided_post_order(const ogdf::Graph &G, vector<int> post_order) { vector<int> generate_guided_post_order(const ogdf::Graph &G, const vector<int> &post_order) {
maximal_planar_subgraph_finder m; maximal_planar_subgraph_finder m;
return m.generate_guided_post_order(G, post_order); return m.generate_guided_post_order(G, post_order);
} }
@ -80,26 +80,6 @@ int maximal_planar_subgraph_finder::find_mps(const ogdf::Graph &G) {
vector<int> maximal_planar_subgraph_finder::generate_post_order(const ogdf::Graph &G) { vector<int> maximal_planar_subgraph_finder::generate_post_order(const ogdf::Graph &G) {
init_from_graph(G); init_from_graph(G);
postOrderTraversal(); postOrderTraversal();
#ifdef DEBUG
std::cout << "standard post order traversal" << std::endl;
print_post_order();
#endif
return return_post_order();
}
// result of this will be used as input to "compute_removed_edge_size"
vector<int> maximal_planar_subgraph_finder::generate_mutated_post_order(const ogdf::Graph &G, const vector<int> &post_order, int mutate_point) {
init_from_graph(G);
mutatedPostOrderTraversal(post_order, mutate_point);
#ifdef DEBUG
std::cout << "mutated post order traversal" << std::endl;
print_post_order();
#endif
return return_post_order(); return return_post_order();
} }
@ -107,15 +87,15 @@ vector<int> maximal_planar_subgraph_finder::generate_mutated_post_order(const og
vector<int> maximal_planar_subgraph_finder::generate_guided_post_order(const ogdf::Graph &G, const vector<int> &post_order) { vector<int> maximal_planar_subgraph_finder::generate_guided_post_order(const ogdf::Graph &G, const vector<int> &post_order) {
init_from_graph(G); init_from_graph(G);
guidedPostOrderTraversal(post_order); guidedPostOrderTraversal(post_order);
// #ifdef DEBUG
// std::cout << "guided post order traversal" << std::endl;
// print_post_order();
// #endif
return return_post_order(); return return_post_order();
} }
// result of this will be used as input to "compute_removed_edge_size"
vector<int> maximal_planar_subgraph_finder::generate_mutated_post_order(const ogdf::Graph &G, const vector<int> &post_order, int mutate_start_point) {
init_from_graph(G);
mutatedPostOrderTraversal(post_order, mutate_start_point);
return return_post_order();
}
int maximal_planar_subgraph_finder::compute_removed_edge_size(const ogdf::Graph &G, vector<int> post_order) { int maximal_planar_subgraph_finder::compute_removed_edge_size(const ogdf::Graph &G, vector<int> post_order) {
@ -127,14 +107,15 @@ int maximal_planar_subgraph_finder::compute_removed_edge_size(const ogdf::Graph
sort_adj_list(); sort_adj_list();
determine_edges(); determine_edges();
back_edge_traversal(); back_edge_traversal();
return output_removed_edge_size(); return output_removed_edge_size();
} }
// depracated
void maximal_planar_subgraph_finder::reset_state() { void maximal_planar_subgraph_finder::reset_state() {
_post_order_list.clear(); _post_order_list.clear();
} }
// depracated
void maximal_planar_subgraph_finder::compute_mps(const ogdf::Graph &G, int mutate_point, vector<int> &post_order, int &return_edge_size) { void maximal_planar_subgraph_finder::compute_mps(const ogdf::Graph &G, int mutate_point, vector<int> &post_order, int &return_edge_size) {
init_from_graph(G); init_from_graph(G);
mutatedPostOrderTraversal(post_order, mutate_point); mutatedPostOrderTraversal(post_order, mutate_point);
@ -145,11 +126,9 @@ void maximal_planar_subgraph_finder::compute_mps(const ogdf::Graph &G, int mutat
// now we get the canonical representation of the post order // now we get the canonical representation of the post order
vector<int> temp_post_order = return_post_order(); vector<int> temp_post_order = return_post_order();
reset_state(); // clear the _post_order_list reset_state(); // clear the _post_order_list
// perform guided Post Order Traversal to flip the tree // perform guided Post Order Traversal to flip the tree
guidedPostOrderTraversal(temp_post_order); guidedPostOrderTraversal(temp_post_order);
post_order = return_post_order(); post_order = return_post_order();
} }

View File

@ -144,8 +144,8 @@ void node::mutated_DFS_visit(vector<node *> &dfsList,
int &return_index, int &return_index,
int &traversal_index, int &traversal_index,
const unordered_map<int, int> &node_id_to_pos, const unordered_map<int, int> &node_id_to_pos,
int mutate_point, int mutate_start_point,
mt19937 rng) mt19937& rng)
{ {
// mark current node // mark current node
@ -172,8 +172,8 @@ void node::mutated_DFS_visit(vector<node *> &dfsList,
// } // }
vector<node*> neighbor_list = _adj_list; vector<node*> neighbor_list = _adj_list;
// if the current index matches the mutate_point, then we know this is the cycle to mutate // if the current index matches or exceeds the mutate_point, then we know this is the cycle to mutate
if (traversal_index == mutate_point) { if (traversal_index >= mutate_start_point) {
// we shuffle the neighbor list // we shuffle the neighbor list
std::shuffle(neighbor_list.begin(), neighbor_list.end(), rng); std::shuffle(neighbor_list.begin(), neighbor_list.end(), rng);
// otherwise just sort based on the order set by node_id_to_pos, which is // otherwise just sort based on the order set by node_id_to_pos, which is
@ -204,7 +204,7 @@ void node::mutated_DFS_visit(vector<node *> &dfsList,
if (!neighbor_list[i]->is_marked()) if (!neighbor_list[i]->is_marked())
{ {
neighbor_list[i]->_parent = this; neighbor_list[i]->_parent = this;
neighbor_list[i]->mutated_DFS_visit(dfsList, node_list, return_index, traversal_index, node_id_to_pos, mutate_point, rng); neighbor_list[i]->mutated_DFS_visit(dfsList, node_list, return_index, traversal_index, node_id_to_pos, mutate_start_point, rng);
} }
} }

View File

@ -0,0 +1,64 @@
/* The trie data structure is used to store all DFS traversals of the tree.
* Although not strictly correct, we treat a canonical rotation representation as
* a kind of traversal order.
*
* We will store a separate traversal tree for each starting root node.
*
* The objective is so that we can identify previously tried post-orders and not
* blindly test a post-order that we already know the result to
*/
#include "mps.h"
//-----------------------------------------------------------------------------------
// CONSTRUCTOR
//-----------------------------------------------------------------------------------
trie::trie(int node_id) {
_node_id = node_id;
}
//-----------------------------------------------------------------------------------
// METHODS
//-----------------------------------------------------------------------------------
// add post order to trie structure
// root Trie node will have current_position = 0
void trie::add_new_post_order(vector<int> post_order, int current_position) {
int last_index = post_order.size() - 1;
int current_index = last_index - current_position;
// we only add when the current_index is still valid
if (current_index >= 0) {
int node_id = post_order[current_index];
// std::cout << "added trie node for: " << node_id << std::endl;
trie* new_trie_node = new trie(node_id);
new_trie_node->add_new_post_order(post_order, current_position+1);
_next_node_list.push_back(new_trie_node);
}
}
bool trie::check_post_order(vector<int> post_order, int current_position) {
bool return_value = false;
int last_index = post_order.size() - 1;
int current_index = last_index - current_position;
// while current_index is still valid
if (current_index >= 0) {
int node_id = post_order[current_index];
// we want to check for existing node_id in the _next_node_list
// Assuming 'it' is an iterator pointing to an element in 'vec'
auto it = std::find_if(_next_node_list.begin(), _next_node_list.end(), [node_id](trie* obj) {
return obj->_node_id == node_id;
});
// found
if (it != _next_node_list.end()) {
std::cout << (*it)->_node_id << std::endl;
return_value = (*it)->check_post_order(post_order, current_position+1);
// not found
} else {
; // return_value is already false by default
}
} else { // we have reached the base case of reaching the end of the trie
return true;
}
return return_value;
}