A prefix tree is a data structure with many uses. I won't go into details about this data structure Wikipedia does a better job than I could. What I will do is post a C# implementation of this data structure and in a following post I will show how to use this data structure when implementing an auto-complete solution.
These are the properties of a prefix tree:
- it stores strings(finite sequences of chars)
- it had nodes :) (doooooooh!)
- each node stores a character
- each node can have a number of subnodes(0,1,2, ... infinity)
- the char stored in a subnode is the one following the char stored by the node in a string
- each node has a flag which is raised if a word ends there(this doesn't mean it can't have subnodes for example a prefix tree can store the words "cat" and "catastrophy" in this case node 3 stores 't' and has a subnode that stores 'a')
The node:
The tree:
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Node | |
{ | |
public char Char; | |
public bool AWordEndsHere; | |
public List<Node> Subtree; | |
public Node(char c) | |
{ | |
Char = c; | |
Subtree = new List<Node>(); | |
} | |
public Node(char c, bool wordends):this(c) | |
{ | |
AWordEndsHere = wordends; | |
} | |
public Node GetChild(char c) | |
{ | |
if (Subtree.Count != 0) | |
foreach (var node in Subtree) | |
if (node.Char == c) | |
return node; | |
return null; | |
} | |
} |
The tree:
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Tree | |
{ | |
private List<string> subsequentStrings; | |
private Node _root; | |
public Node Root | |
{ | |
get | |
{ | |
return _root; | |
} | |
} | |
public Tree() | |
{ | |
_root = new Node(' '); | |
subsequentStrings = new List<string>(); | |
} | |
public void AddWord(string word) | |
{ | |
char[] chars = word.ToCharArray(); | |
Node currentNode = _root; | |
Node child = null; | |
for (int counter = 0; counter < chars.Length; counter++) | |
{ | |
child = currentNode.GetChild(word[counter]); | |
if(child == null) | |
{ | |
var newNode = new Node(word[counter]); | |
currentNode.Subtree.Add(newNode); | |
currentNode = newNode; | |
} | |
else | |
currentNode = child; | |
if(counter == chars.Length - 1) | |
currentNode.AWordEndsHere = true; | |
} | |
} | |
public bool Find(string query) | |
{ | |
char[] chars = query.ToCharArray(); | |
Node currentNode = _root; | |
Node child = null; | |
for (int counter = 0; counter < chars.Length; counter++) | |
{ | |
child = currentNode.GetChild(chars[counter]); | |
if (child == null) | |
return false; | |
currentNode = child; | |
} | |
return true; | |
} | |
public List<string> GetMatches(string query) | |
{ | |
StringBuilder sbPrevious = new StringBuilder(); | |
char[] chars = query.ToCharArray(); | |
Node currentNode = _root; | |
Node child = null; | |
for (int counter = 0; counter < chars.Length; counter++) | |
{ | |
if(counter < chars.Length - 1) | |
sbPrevious.Append(chars[counter]); | |
child = currentNode.GetChild(chars[counter]); | |
if (child == null) | |
break; | |
currentNode = child; | |
} | |
subsequentStrings.Clear(); | |
GenerateSubsequentStrings(currentNode, sbPrevious.ToString()); | |
return subsequentStrings; | |
} | |
private void GenerateSubsequentStrings( Node node, | |
string subsequentString) | |
{ | |
if(node == null) | |
return; | |
subsequentString = subsequentString + node.Char; | |
if(node.AWordEndsHere) | |
{ | |
subsequentStrings.Add(subsequentString); | |
//return; | |
} | |
foreach (var subnode in node.Subtree) | |
GenerateSubsequentStrings(subnode, subsequentString); | |
} | |
} |
The main program:
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static void Main(string[] args) | |
{ | |
Tree prefixTree = new Tree(); | |
prefixTree.AddWord("abc"); | |
prefixTree.AddWord("abcd"); | |
prefixTree.AddWord("abcde"); | |
prefixTree.AddWord("abcdef"); | |
prefixTree.AddWord("ab123cd"); | |
prefixTree.AddWord("abc123d"); | |
prefixTree.AddWord("abc132d"); | |
string word = "abc"; | |
if (prefixTree.Find(word)) | |
{ | |
var matches = prefixTree.GetMatches("abc"); | |
Console.WriteLine("gasit"); | |
Console.WriteLine("Autocomplete:"); | |
if (matches.Count > 0) | |
foreach (var m in matches) | |
Console.WriteLine(m); | |
} | |
else | |
Console.WriteLine("nu gasii nimic"); | |
} |
The AddWord and Find function seem to be incomplete. I dont understand why they have the XML nodes and the Node class ends with . Am I missing something?
RăspundețiȘtergere@Mujahid Shariff yes there seems to be some kind of an error with the code highlighting script
RăspundețiȘtergereI'll try and fix it first time I get