Map fully commented barring hard_erase

This commit is contained in:
Pagwin 2026-01-13 16:30:16 -05:00
parent 5963998902
commit 34afff74a0
No known key found for this signature in database
GPG key ID: 81137023740CA260

130
Map.hpp
View file

@ -1,20 +1,25 @@
#ifndef _POWELL_CS440
#define _POWELL_CS440
#include <algorithm>
// uncomment on submission/performance test
// #define NDEBUG
#include <cassert>
#include <memory>
#include <stdexcept>
#include <utility>
namespace cs440 {
// universal type defs here
// unnamed namespace used to hide implementation details
// from the end user
namespace {
// used to specify direction for specifying which child of a node
// is being referred to as well as for specifying the direction of
// rotation for the implementation of red-black tree rules
enum class Direction { Left, Right };
// technically having this stuff here instead of in a C++ file is wasteful but
// 1) I'm lazy
// 2) idk how to make the Color enum value an implementation detail which the
// template can use but external code can't other than this
enum class Direction { Left, Right };
Direction operator!(Direction dir) {
switch (dir) {
case Direction::Left:
@ -27,21 +32,27 @@ Direction operator!(Direction dir) {
}
enum class Color { Red, Black };
} // namespace
template <typename Key_T, typename Mapped_T> class Map {
// Type definitions here
using ValueType = std::pair<const Key_T, Mapped_T>;
using internal_ValueType = std::pair<Key_T, Mapped_T>;
// Internal Tree node type, tree is implemented via pointers
struct Node {
// magic number used to check if a node has been moved from
int valid = 0x13371337;
// all raw pointers are non-owning
Node *parent = nullptr;
Node *prev;
Node *next;
Map *map;
std::unique_ptr<internal_ValueType> val;
std::unique_ptr<Node> left;
std::unique_ptr<Node> right;
Color color;
Node *prev;
Node *next;
Map *map;
Node(internal_ValueType val, Map *map)
: parent{nullptr}, val{new internal_ValueType{val}}, left{}, right{},
color{Color::Red}, prev{nullptr}, next{nullptr}, map{map} {}
@ -130,6 +141,8 @@ template <typename Key_T, typename Mapped_T> class Map {
this->map = rhs.map;
return *this;
}
// Node::child is useful when a non-owning reference to a child is needed
Node *child(Direction dir) {
switch (dir) {
case Direction::Left:
@ -150,6 +163,9 @@ template <typename Key_T, typename Mapped_T> class Map {
assert(false);
}
}
// uchild is used when ownership of the child is needed in the same
// instance it is accessed
std::unique_ptr<Node> uchild(Direction dir) {
switch (dir) {
case Direction::Left:
@ -163,6 +179,10 @@ template <typename Key_T, typename Mapped_T> class Map {
std::unique_ptr<Node> uchild(Node *child) {
return this->uchild(this->which_child(child));
}
// set_child is used due to varous methods returning a Direction
// and the code for setting the correct child given a direction
// being repetitive, trivial and important to get correct
std::unique_ptr<Node> &set_child(Direction dir,
std::unique_ptr<Node> new_child) {
@ -193,12 +213,16 @@ template <typename Key_T, typename Mapped_T> class Map {
}
assert(false);
}
void erase_child(Node *n) { this->erase_child(this->which_child(n)); }
void erase_child(Direction dir) {
// if the child being erased is the min or the max we need to update
// the min/max
bool minmax = this->child(dir) == this->map->min ||
this->child(dir) == this->map->max;
// bringing ownership to this function scope so Deleter gets called at end
// of function and we can do reordering things
// bringing ownership to this function scope so destructor gets
// called at the end of function and we can do reordering things
std::unique_ptr<Node> dropping;
switch (dir) {
case Direction::Right:
@ -221,6 +245,9 @@ template <typename Key_T, typename Mapped_T> class Map {
dropping->next->prev = dropping->prev;
}
// Correctness: We can only remove the min and the max at the same time
// if there's only 1 node left in which case we can't have children
// and this method wouldn't be called
if (minmax) {
switch (dir) {
case Direction::Left:
@ -233,6 +260,9 @@ template <typename Key_T, typename Mapped_T> class Map {
}
}
}
// Utility to reset previous and next pointers when we add/remove nodes
// from the tree
void restore_ordering() {
this->prev = this->calc_pred();
this->next = this->calc_succ();
@ -243,6 +273,10 @@ template <typename Key_T, typename Mapped_T> class Map {
this->next->prev = this;
}
}
// predecessor will be one of the following
// 1) our Right-most child tracing down from our left child
// 2) The first ancestor which has us to their right
Node *calc_pred() {
if (this->left) {
Node *ret = this->left.get();
@ -260,6 +294,10 @@ template <typename Key_T, typename Mapped_T> class Map {
return ret;
}
}
// succcessor will be one of the following
// 1) our Left-most child tracing down from our right child
// 2) The first ancestor which has us to their left
Node *calc_succ() {
if (this->right) {
Node *ret = this->right.get();
@ -277,14 +315,20 @@ template <typename Key_T, typename Mapped_T> class Map {
return ret;
}
}
// Rotation is an operation that applies to three nodes of the red-black
// tree, namely the node it's applying to, that node's child and of
// opposite the given direction and that child's child in the direction
// replacing this node with it's child and that child with the grandchild
// alongside various other bits of shuffling needed to make that all work
void rotate(Direction dir) {
// cannot rotate nullptr
assert(this != nullptr);
// we can't be root for this rotate operation
assert(this->parent != nullptr);
// if we're missing the child on the opposite direction this is an invalid
// rotation
// if we're missing the child on the opposite direction
// this is an invalid rotation
assert(this->child(!dir));
Direction m_dir = this->parent->which_child(this);
@ -309,7 +353,7 @@ template <typename Key_T, typename Mapped_T> class Map {
void restore_red_black_insert(Direction dir) {
Node *self = this;
// infinite loop for case 2's sake, if tail recursion optimization was
// "infinite" loop for case 2's sake, if tail recursion optimization was
// guaranteed I'd use tail recursion
while (true) {
Node *parent = self->parent;
@ -320,7 +364,8 @@ template <typename Key_T, typename Mapped_T> class Map {
}
dir = parent->which_child(self);
// if this is violated it's a bug
// if this is violated it's a bug in which_child
assert(parent->child(dir) == self);
// parent is black so no violation no-op (case 1)
@ -372,9 +417,13 @@ template <typename Key_T, typename Mapped_T> class Map {
}
};
// data needed for implementation
// Map: data needed for implementation
std::unique_ptr<Node> root;
// Recalculating the size every time size is called is obviously bad for
// performance considering we can just increment/decrement this counter as
// needed
std::size_t _size;
// as in the case of Node raw pointers are non-owning
Node *min;
Node *max;
@ -384,6 +433,8 @@ public:
class ReverseIterator;
// public type definitions
class Iterator {
// two pointers are used to handling incrementing/decrementing
// outside the bounds of the iterator
Node *underlying;
Node *store;
Iterator(Node *ptr, Node *potential = nullptr)
@ -394,6 +445,7 @@ public:
friend ConstIterator;
friend ReverseIterator;
Iterator() = delete;
// we don't own any data so no reason not to have all default
Iterator(const Iterator &rhs) = default;
Iterator &operator=(const Iterator &) = default;
~Iterator() = default;
@ -449,6 +501,7 @@ public:
return !(lhs == rhs);
}
};
// Const Iterator simply exposes a const interface to Iterator
class ConstIterator {
Iterator underlying;
@ -499,6 +552,9 @@ public:
return !(lhs == rhs);
}
};
// ReverseIterator simply swaps increment and decrement operators on the
// normal iterator for it's implementation
class ReverseIterator {
Iterator underlying;
ReverseIterator(const Iterator &underlying) : underlying{underlying} {}
@ -539,7 +595,11 @@ public:
}
};
Map() : root{}, _size{0}, min{nullptr}, max{nullptr} {}
Map() : root{nullptr}, _size{0}, min{nullptr}, max{nullptr} {}
// Not initializing min and max in member initializer_list may not be
// strictly correct but they get initialized as the first thing in
// the constructor so practically speaking that should be fine
Map(const Map &rhs)
: root{std::make_unique<Node>(*rhs.root)}, _size{rhs._size} {
this->min = this->root.get();
@ -552,6 +612,10 @@ public:
}
}
Map(Map &&rhs) : root{std::move(rhs.root)}, _size{rhs._size} {
// This code is being kept as is to avoid accidentally breaking something
// when all I'm intending to do is retroactively add comments
// However I think it can be removed in favor of simply copying
// min and max from rhs due to everything being done via pointers
this->min = this->root.get();
this->max = this->root.get();
while (min->left) {
@ -587,6 +651,9 @@ public:
}
return *this;
}
// No reason to redo making an empty map or inserting values from an
// iterator when I already have that functionality via other methods
Map(std::initializer_list<std::pair<const Key_T, Mapped_T>> items) : Map{} {
this->insert(items.begin(), items.end());
}
@ -595,6 +662,14 @@ public:
private:
// private helpers
// This method was originally written due to root originally being
// std::optional<Node> rather than std::unique_ptr<Node>, the code can
// probably be refactored such that only node rotations are used
// the reason it wasn't is because it wasn't necessary to complete the
// assignment and I'd already done a full rewrite once and now I'm here
// just to add comments with minimal changes that should only affect
// style/clarity
void rotate_root(Direction dir) {
assert(root);
@ -639,6 +714,13 @@ private:
}
}
// this method is templated because previously in development this trace
// template var was used in conjunction with if constexpr to enable/disable
// debug logging which showed the trace of how locate traversed the tree
// the likely reasoning for removing the logging is due to the C++ compiler
// available on university lab machines (which I expected this to be tested
// on) was too old to allow if constexpr and the default template value
// meant removing that logging was trivial
template <bool trace = false>
std::pair<Node const *, Direction> locate(const Key_T &key) const {
Node const *ret_parent;
@ -669,11 +751,17 @@ private:
}
return std::make_pair(ret_parent, ret_dir);
}
// the hard case of removing a black leaf node in a red black tree
// All other cases are handled in core_erase
void hard_erase(Node *n) {
assert(n->parent);
Node *parent = n->parent;
Direction dir = parent->which_child(n);
parent->erase_child(n);
// case 1 should be handled first for all but the first loop
// so it's skipped in the first loop
goto skip;
while (true) {
parent = n->parent;
@ -694,7 +782,10 @@ private:
Color close_color = close ? close->color : Color::Black;
Color distant_color = distant ? distant->color : Color::Black;
// Macro is used for simplicity and avoiding repetition
#define redcheck(v) if ((v) == Color::Red)
// TODO: continue comment writing here
// it kinda sucks but I think that goto is genuinely the best solution
// here, making methods for cases 4,5 and 6 is a lot of unneeded
// bookkeeping
@ -765,6 +856,17 @@ private:
}
}
}
// https://en.wikipedia.org/wiki/Red%E2%80%93black_tree#Simple_cases
// core_erase is the low level erase level that handles
// all the easy cases for erasure, namely
// 1) nodes with two children (replacing the node with it's successor)
// 2) nodes with one child (replacing the node with it's child)
// 3) the root node (tree is empty)
// 4) red leaf nodes with no children (can simply be removed)
// Black leaf nodes are handled by calling out to hard_erase
// return value specifies whether or not successor and predecessor should
// be updated
bool core_erase(Node *erasing) {
Color c = erasing->color;
// 2 children