Page 1 of 1

red black tree

Posted: Sat May 16, 2009 9:49 am
by FlashBurn
I´ve made an implementation of a red black tree, but my code seems to have a failure, because I have at least 1 node which shouldn´t be on the tree.

This is my code:

Code: Select all

struct RBTreeNode_t *rbTreeGrandparent(struct RBTreeNode_t *node) {
	return node->parent->parent;
}

struct RBTreeNode_t *rbTreeUncle(struct RBTreeNode_t *node) {
	return (node->parent == rbTreeGrandparent(node)->left) ? rbTreeGrandparent(node)->right : rbTreeGrandparent(node)->left;
}

void rbTreeRotateLeft(struct RBTreeNode_t *node) {
	struct RBTreeNode_t *ptr1= node->right->left, *ptr2= node->right;
	
	if(likely(node->parent != 0)) {
		if(node->parent->left == node) {
			node->parent->left= ptr2;
		} else {
			node->parent->right= ptr2;
		}
	}
	ptr2->parent= node->parent;
	ptr2->left= node;
	node->right= ptr1;
	if(node->right != 0)
		node->right->parent= node;
	node->parent= ptr2;
	
	if(unlikely(node == kernelArgs.kernelSymTab)) {
		kernelArgs.kernelSymTab= ptr2;
	}
}

void rbTreeRotateRight(struct RBTreeNode_t *node) {
	struct RBTreeNode_t *ptr1= node->left->right, *ptr2= node->left;
	
	if(likely(node->parent != 0)) {
		if(node->parent->left == node) {
			node->parent->left= ptr2;
		} else {
			node->parent->right= ptr2;
		}
	}
	ptr2->parent= node->parent;
	ptr2->right= node;
	node->left= ptr1;
	if(node->left != 0)
		node->left->parent= node;
	node->parent= ptr2;
	
	if(unlikely(node == kernelArgs.kernelSymTab)) {
		kernelArgs.kernelSymTab= ptr2;
	}
}

void rbTreeInsertCheck(struct RBTreeNode_t *node) {
	if(unlikely(node->parent == 0)) {
		node->color= RBTREE_BLACK;
	} else {
		if(unlikely(node->parent->color == RBTREE_BLACK)) {
			return;
		} else {
			if(unlikely(rbTreeUncle(node) != 0 && rbTreeUncle(node)->color == RBTREE_RED)) {
				node->parent->color= RBTREE_RED;
				rbTreeUncle(node)->color= RBTREE_BLACK;
				rbTreeGrandparent(node)->color= RBTREE_RED;
				rbTreeInsertCheck(rbTreeGrandparent(node));
			} else {
				if(node == node->parent->right && node->parent == rbTreeGrandparent(node)->left) {
					rbTreeRotateLeft(node->parent);
					node= node->left;
				} else if (node == node->parent->left && node->parent == rbTreeGrandparent(node)->right) {
					rbTreeRotateRight(node->parent);
					node= node->right;
				}
				node->parent->color= RBTREE_BLACK;
				rbTreeGrandparent(node)->color= RBTREE_RED;
				if(node == node->parent->left && node->parent == rbTreeGrandparent(node)->left) {
					rbTreeRotateRight(rbTreeGrandparent(node));
				} else {
					rbTreeRotateLeft(rbTreeGrandparent(node));
				}
			}
		}
	}
}

void rbTreeInsert(struct RBTreeNode_t *startNode, struct RBTreeNode_t *insertNode) {
	uint32t res;
	
	if(unlikely(startNode == 0)) {
		insertNode->left= 0;
		insertNode->right= 0;
		insertNode->parent= 0;
		insertNode->color= RBTREE_BLACK;
		kernelArgs.kernelSymTab= insertNode;
	}
	
	if(unlikely(!(res= strcmp(startNode->str,insertNode->str))))
		return;
	if(res > 0) {
		if(likely(startNode->right != 0)) {
			rbTreeInsert(startNode->right,insertNode);
		} else {
			insertNode->left= 0;
			insertNode->right= 0;
			insertNode->parent= startNode;
			insertNode->color= RBTREE_RED;
			startNode->right= insertNode;
			rbTreeInsertCheck(insertNode);
		}
	} else {
		if(likely(startNode->left != 0)) {
			rbTreeInsert(startNode->left,insertNode);
		} else {
			insertNode->left= 0;
			insertNode->right= 0;
			insertNode->parent= startNode;
			insertNode->color= RBTREE_RED;
			startNode->left= insertNode;
			rbTreeInsertCheck(insertNode);
		}
	}
}

void rbTreeDebug(struct RBTreeNode_t *node) {
	if(likely(node != 0)) {
		rbTreeDebug(node->left);
		printf("str: %s, val: %#X, left: %#X, right: %#X\n",node->str,node->value,node->left,node->right);
		rbTreeDebug(node->right);
	}
}
It´s mostly just copy & paste from the German Wikipedia article.

Re: red black tree

Posted: Sun May 17, 2009 3:45 am
by FlashBurn
If anyone here is interested, I searched the net and found a paper from sedgewick where he revisited the red black tree and I converted his code to c and it is simpler and it works :D

Code: Select all

#define RBTREE_BLACK 0
#define RBTREE_RED !RBTREE_BLACK

#pragma pack(1)

struct RBTreeNode_t {
	struct RBTreeNode_t *left;
	struct RBTreeNode_t *right;
	uint32t color;
	uint32t value;
	char *str;
};

static struct RBTreeNode_t *rbTreeRotateLeft(struct RBTreeNode_t *node);
static struct RBTreeNode_t *rbTreeRotateRight(struct RBTreeNode_t *node);
static void rbTreeColorFlip(struct RBTreeNode_t *node);

struct RBTreeNode_t *rbTreeRotateLeft(struct RBTreeNode_t *node) {
	struct RBTreeNode_t *ptr= node->right;
	
	node->right= ptr->left;
	ptr->left= node;
	ptr->color= ptr->left->color;
	ptr->left->color= RBTREE_RED;
	
	return ptr;
}

struct RBTreeNode_t *rbTreeRotateRight(struct RBTreeNode_t *node) {
	struct RBTreeNode_t *ptr= node->left;
	
	node->left= ptr->right;
	ptr->right= node;
	ptr->color= ptr->right->color;
	ptr->right->color= RBTREE_RED;
	
	return ptr;
}

void rbTreeColorFlip(struct RBTreeNode_t *node) {
	node->color= !node->color;
	node->left->color= !node->left->color;
	node->right->color= !node->right->color;
}

struct RBTreeNode_t *rbTreeInsert(struct RBTreeNode_t *startNode, struct RBTreeNode_t *node) {
	uint32t res;
	
	if(unlikely(startNode == 0))
		return node;
	
	res= strcmp(startNode->str,node->str);
	
	if(res < 0)
		startNode->left= rbTreeInsert(startNode->left,node);
	else
		startNode->right= rbTreeInsert(startNode->right,node);
	
	if(startNode->right->color == RBTREE_RED)
		startNode= rbTreeRotateLeft(startNode);
	if((startNode->left->color == RBTREE_RED) && (startNode->left->left->color == RBTREE_RED))
		startNode= rbTreeRotateRight(startNode);
	if((startNode->left->color == RBTREE_RED) && (startNode->right->color == RBTREE_RED))
		rbTreeColorFlip(startNode);
	
	return startNode;
}

Re: red black tree

Posted: Mon Jun 08, 2009 6:49 am
by yemista
Just out of curiosity, why did you do this? Do you plan to use this in your os? If so, why?

Re: red black tree

Posted: Mon Jun 08, 2009 6:57 am
by FlashBurn
I use a red black tree for the kernel symbol table. I know that the symbl won´t be used often, but it is much faster than a linked list and the memory "overhead" is so small that you can´t really call it so. I think I will use it for some more things in my kernel. My 1st approach was to limit such things as tasks, threads and so on, but with a tree I can get fast search times and I have no limits.

Re: red black tree

Posted: Mon Jun 08, 2009 8:14 am
by whowhatwhere
FlashBurn wrote:I use a red black tree for the kernel symbol table. I know that the symbl won´t be used often, but it is much faster than a linked list and the memory "overhead" is so small that you can´t really call it so. I think I will use it for some more things in my kernel. My 1st approach was to limit such things as tasks, threads and so on, but with a tree I can get fast search times and I have no limits.
I've found that linked lists with an proper MTF heuristic is really fast for jobs like this. So are skip lists. I don't know how they compare to red black trees, but it's much simpler to implement.

EDIT:
> You are in dangerous and treacherous territory! I've heard that #PRAGMA will get you eaten by a GRUE. Be careful!
>

Re: red black tree

Posted: Wed Jun 10, 2009 6:02 am
by JamesM
syntropy wrote:
FlashBurn wrote:I use a red black tree for the kernel symbol table. I know that the symbl won´t be used often, but it is much faster than a linked list and the memory "overhead" is so small that you can´t really call it so. I think I will use it for some more things in my kernel. My 1st approach was to limit such things as tasks, threads and so on, but with a tree I can get fast search times and I have no limits.
I've found that linked lists with an proper MTF heuristic is really fast for jobs like this. So are skip lists. I don't know how they compare to red black trees, but it's much simpler to implement.

EDIT:
> You are in dangerous and treacherous territory! I've heard that #PRAGMA will get you eaten by a GRUE. Be careful!
>
Much faster than linked lists is a patricia trie. I use it for my symbol table. It has O(k) time complexity, where k is the number of characters in the key you're searching for. It is completely irrespective of the number of items in the trie.

Red-black trees are good too, but for a string problem like symbol tables I'd go with a trie every time. I use AVL trees myself (because the rotation and removal operations are easier) for my Map class, mapping arbitrary integers/pointers to integers/pointers.

EDIT: You don't need that pragma. Be aware of what you're using and why you're using it. The only time you should be using __attribute__((packed)) is when you're dealing with data structures to be exported elsewhere (for example hardware or file formats). Otherwise, you want the performance gain by natural alignment.

Re: red black tree

Posted: Wed Jun 10, 2009 6:15 am
by FlashBurn
Because some of you say that I shouldn´t use pragma, I want to know why. I use it, because sometimes I need it for reading structs which I haven´t created and so I´m using it the whole time, because only so the struct is saved in memory as I want it. I also find it better to use pragma instead of the attribute thing.

Maybe I will have a look at a trie (especially the patricia trie).

Re: red black tree

Posted: Wed Jun 10, 2009 6:21 am
by jal
FlashBurn wrote:Because some of you say that I shouldn´t use pragma, I want to know why.
JamesM wrote:Otherwise, you want the performance gain by natural alignment
I wish people would just read for once...


JAL

Re: red black tree

Posted: Wed Jun 10, 2009 6:33 am
by FlashBurn
Any as I´ve said it, I use it because there are always structs which are not aligned as the compiler thinks and I want my structs to be saved as I like it. So I use pragma for it!

Re: red black tree

Posted: Wed Jun 10, 2009 6:39 am
by JamesM
FlashBurn wrote:Any as I´ve said it, I use it because there are always structs which are not aligned as the compiler thinks and I want my structs to be saved as I like it. So I use pragma for it!
OK, here's a quick primer on packing.

The C standard states that all types in a struct must be naturally aligned. So, a 16-bit value must start on a 16-bit boundary, a 32-bit value on a 32-bit boundary etc.

This gives a significant performance improvement. Most CPUs pipeline better when fetching naturally-aligned values. Many will refuse to load non naturally aligned values, MIPS being an example, and the compiler is forced to insert extra instructions to ensure that the value is in a register before performing calculations on it (this basically being two naturally-aligned loads, an AND, a shift and an OR). Others like the x86 will compensate in microcode or just have poorer performance.

This is obviously a big problem. So, you should always let the compiler choose where to put variables in your struct, unless you're interfacing with hardware or file formats like you mentioned.

The C standard states that no variables can be rearranged inside a struct by the compiler, so all variables will be in the order you want, there may just be some padding inserted. This is why the structure:

Code: Select all

struct
{
    uint32_t a;
    uint8_t b;
    uint32_t c;
    uint8_t d;
}
Takes up 16 bytes, whereas the equivalent:

Code: Select all

struct
{
    uint32_t a;
    uint8_t b;
    uint8_t d;
    uint32_t c;
}
Takes up only 12. If you can't see why, reply and I'll explain.

PS: Using a pragma is worse than using the explicit __attribute__(()). The pragma applies to all structs, the __attribute__ can be applied to only one.

Re: red black tree

Posted: Wed Jun 10, 2009 7:01 am
by FlashBurn
See, I know the things you try to tell me.

As I´m only deving for the x86 arch the problem with unaligned addresses (if you ignore performance loss) isn´t there. And as I know that aligned addresses are better than unaligned I mostly use 32bit vars in my structs. I´m only too lazy to write an "atribute ..." every time I need it and so I use pragma instead. The other point is that I also use assembly code and I handle structs in assembly as if the vars are written one after another without padding. As I´ve said, I want it to be the way I like it and as I come from assembly language I think I know what I´m doing (I hope so ;) ).

Re: red black tree

Posted: Wed Jun 10, 2009 7:21 am
by JamesM
As I´ve said, I want it to be the way I like it and as I come from assembly language I think I know what I´m doing (I hope so ;) ).
I doubt it.

Re: red black tree

Posted: Wed Jun 10, 2009 1:10 pm
by Creature
I was just randomly browsing by this topic and wanted to thank JamesM for your simple explanation. I'm an intermediate C++ programmer and don't know too much about advanced things like bit-fields and packing and all that (only some basics like why you should pack or align your structures differently), but your explanation gave me a much better idea of how structure sizes can be influenced by just positioning the member variables differently. So if I got this right, what you're saying is:

Code: Select all

struct
{
    uint32_t a; //Starts on a 32-bit boundary. For example 'bit 0'.
    uint8_t b; //Starts on an 8-bit boundary. Following the previous variable: 'bit 32'.  
    uint32_t c; //Wants to start at a 32-bit boundary, but is at bit '40' so it is padded with 24 bits so it is on 'bit 64' (next 32-bit boundary).
    uint8_t d; //Starts on an 8-bit boundary. Lies on 'Bit 96'.
}
But won't that make 13 bytes? Or are the 24 bits after the 'd' member variable padded as well? And here:

Code: Select all

struct
{
    uint32_t a; //Starts on a 32-bit boundary. For example 'bit 0'.
    uint8_t b; //Starts on an 8-bit boundary. Following the previous variable: 'bit 32'.  
    uint8_t d; //Starts on an 8-bit boundary. Lies on 'Bit 40'.
    uint32_t c; //Wants to start at a 32-bit boundary, but is at bit '48' so it is padded with 16 bits so it is on 'bit 64' (next 32-bit boundary).  
}
Making 12 bytes together, is this correct?

Re: red black tree

Posted: Wed Jun 10, 2009 1:31 pm
by JamesM
Hi,
Creature wrote:I was just randomly browsing by this topic and wanted to thank JamesM for your simple explanation. I'm an intermediate C++ programmer and don't know too much about advanced things like bit-fields and packing and all that (only some basics like why you should pack or align your structures differently), but your explanation gave me a much better idea of how structure sizes can be influenced by just positioning the member variables differently. So if I got this right, what you're saying is:

Code: Select all

struct
{
    uint32_t a; //Starts on a 32-bit boundary. For example 'bit 0'.
    uint8_t b; //Starts on an 8-bit boundary. Following the previous variable: 'bit 32'.  
    uint32_t c; //Wants to start at a 32-bit boundary, but is at bit '40' so it is padded with 24 bits so it is on 'bit 64' (next 32-bit boundary).
    uint8_t d; //Starts on an 8-bit boundary. Lies on 'Bit 96'.
}
But won't that make 13 bytes? Or are the 24 bits after the 'd' member variable padded as well? And here:

Code: Select all

struct
{
    uint32_t a; //Starts on a 32-bit boundary. For example 'bit 0'.
    uint8_t b; //Starts on an 8-bit boundary. Following the previous variable: 'bit 32'.  
    uint8_t d; //Starts on an 8-bit boundary. Lies on 'Bit 40'.
    uint32_t c; //Wants to start at a 32-bit boundary, but is at bit '48' so it is padded with 16 bits so it is on 'bit 64' (next 32-bit boundary).  
}
Making 12 bytes together, is this correct?

Exactly right.

The first example you have right. It however takes up 16 bytes not 13 as the first member of the struct has 32-bit alignment. So the struct itself must have 32-bit alignment (else you wouldn't be able to have an array of them).

The second example is correct too.

Cheers,

James

Re: red black tree

Posted: Sun Jun 28, 2009 10:46 am
by gravaera
JamesM wrote:
As I´ve said, I want it to be the way I like it and as I come from assembly language I think I know what I´m doing (I hope so ;) ).
I doubt it.
That one made me smile a little.

Anyway: I've been trying to understand the alignment concept properly for a while, and now I think I have it down.

Thanks, JamesM. Your constant posts are appreciated. It's useful to just read topics at random. Even something like this one, which I'd assumed would make no sense, helped me a lot.

@op: Well, if you're not going to use the red-black tree very often, it may be a bit better, if for no other reason than readability, to have a more common structure in place for that purpose. It's also easier to post up questions when people are familiar with your code.