Graph Structure DFS/BFS Traversal
Prerequisite Knowledge
Before reading this article, you should learn:
In One Sentence
Graph traversal is an extension of N-ary tree traversal. The main methods are still depth-first search (DFS) and breadth-first search (BFS).
The only difference is that trees do not have cycles, but graphs may have cycles. So we need to mark visited nodes to avoid infinite loops in cycles.
Because graphs are more complex, we usually have three scenarios: traversing "nodes", "edges", and "paths". Each scenario needs slightly different code.
When traversing "nodes" or "edges", use a visited
array to mark nodes before recursion to avoid repeats. When traversing "paths", use an onPath
array to mark nodes before recursion and remove the mark after recursion.
The visualization panel allows you to create graphs and see DFS/BFS traversal paths. You can clearly see that, although graphs look more complex than trees, traversing a graph is still like traversing a tree.
Let's look at DFS first. Open the visualization panel below. Click the console.log
line multiple times to see how DFS traverses the graph. The traverse
function is basically traversing an N-ary recursive tree:
Algorithm Visualization
Now let's look at BFS. Open the visualization panel below. Click the console.log
line multiple times to see how BFS traverses the graph. It is like level-order traversing an N-ary tree:
Algorithm Visualization
Let's explain in detail below.
Depth-First Search (DFS)
In the previous article Graph Basics and Implementation, we said that we usually do not use a Vertex
class to store a graph. But here, I will use this class for now, so you can compare graph traversal with multi-way tree traversal. Later, I will give code examples based on adjacency list and adjacency matrix.
Traverse All Nodes (visited
Array)
Let’s compare the multi-way tree traversal framework with the graph traversal framework:
// N-ary tree node
class Node {
int val;
List<Node> children;
}
// Traversal framework for N-ary tree
void traverse(Node root) {
// base case
if (root == null) {
return;
}
// Pre-order position
System.out.println("visit " + root.val);
for (Node child : root.children) {
traverse(child);
}
// Post-order position
}
// Graph node
class Vertex {
int id;
Vertex[] neighbors;
}
// Traversal framework for graph
// Need a visited array to record visited nodes
// Avoid going back to prevent infinite loops
void traverse(Vertex s, boolean[] visited) {
// base case
if (s == null) {
return;
}
if (visited[s.id]) {
// Prevent infinite loops
return;
}
// Pre-order position
visited[s.id] = true;
System.out.println("visit " + s.id);
for (Vertex neighbor : s.neighbors) {
traverse(neighbor, visited);
}
// Post-order position
}
// N-ary tree node
class Node {
public:
int val;
std::vector<Node*> children;
};
// traversal framework for N-ary tree
void traverse(Node* root) {
// base case
if (root == nullptr) {
return;
}
// pre-order position
std::cout << "visit " << root->val << std::endl;
for (auto child : root->children) {
traverse(child);
}
// post-order position
}
// graph vertex
class Vertex {
public:
int id;
std::vector<Vertex*> neighbors;
};
// traversal framework for graph
// a visited array is needed to record the visited vertices
// avoid going back and falling into an infinite loop
void traverse(Vertex* s, std::vector<bool>& visited) {
// base case
if (s == nullptr) {
return;
}
if (visited[s->id]) {
// prevent infinite loop
return;
}
// pre-order position
visited[s->id] = true;
std::cout << "visit " << s->id << std::endl;
for (auto neighbor : s->neighbors) {
traverse(neighbor);
}
// post-order position
}
# N-ary tree node
class Node:
def __init__(self, val=0, children=None):
self.val = val
self.children = children if children is not None else []
# Traversal framework for N-ary tree
def traverse(root):
# base case
if root is None:
return
# pre-order position
print(f"visit {root.val}")
for child in root.children:
traverse(child)
# post-order position
# Graph node
class Vertex:
def __init__(self, id=0, neighbors=None):
self.id = id
self.neighbors = neighbors if neighbors is not None else []
# Traversal framework for graph
# Need a visited array to record visited nodes
# Avoid going backwards and falling into infinite loop
def traverse_graph(s: Vertex, visited: List[bool]):
# base case
if s is None:
return
if visited[s.id]:
# Prevent infinite loop
return
# pre-order position
visited[s.id] = True
print(f"visit {s.id}")
for neighbor in s.neighbors:
traverse_graph(neighbor, visited)
# post-order position
// Node multi-way tree node
type Node struct {
val int
children []*Node
}
// Framework for traversing a multi-way tree
func traverse(root *Node) {
// base case
if root == nil {
return
}
// Pre-order position
fmt.Println("visit", root.val)
for _, child := range root.children {
traverse(child)
}
// Post-order position
}
// Vertex graph node
type Vertex struct {
id int
neighbors []*Vertex
}
// Framework for graph traversal
// Need a visited array to keep track of visited nodes
// Avoid going back and falling into an infinite loop
func traverse(s *Vertex, visited []bool) {
// base case
if s == nil {
return
}
if visited[s.id] {
// Prevent infinite loop
return
}
// Pre-order position
visited[s.id] = true
fmt.Println("visit", s.id)
for _, neighbor := range s.neighbors {
traverse(neighbor, visited)
}
// Post-order position
}
// N-ary tree node
class Node {
constructor(val) {
this.val = val;
this.children = [];
}
}
// N-ary tree traversal framework
var traverseTree = function(root) {
// base case
if (root === null) {
return;
}
// pre-order position
console.log("visit " + root.val);
for (var i = 0; i < root.children.length; i++) {
traverseTree(root.children[i]);
}
// post-order position
};
// graph node
class Vertex {
constructor(id) {
this.id = id;
this.neighbors = [];
}
}
// graph traversal framework
// need a visited array to record visited nodes
// avoid backtracking into a loop
var traverseGraph = function(s, visited) {
// base case
if (s === null) {
return;
}
if (visited[s.id]) {
// prevent infinite loop
return;
}
// pre-order position
visited[s.id] = true;
console.log("visit " + s.id);
for (var i = 0; i < s.neighbors.length; i++) {
traverseGraph(s.neighbors[i]);
}
// post-order position
};
You can see that graph traversal uses an extra visited
array compared to multi-way tree traversal. This array keeps track of visited nodes to avoid infinite loops when there are cycles.
Why do cycles cause infinite loops?
Here is a simple example. There is an edge from 1 -> 2
, and another edge from 2 -> 1
. Nodes 1
and 2
form a cycle:
1 <=> 2
If we do not mark visited nodes, starting from 1
, we go to 2
, then back to 1
, then 2
again, then 1
again, and so on: 1->2->1->2->...
forever.
With a visited
array, the first time we visit 1
, we mark it as visited. If we see 1->2->1
, we find that 1
is already visited, so we return immediately. This stops the recursion and avoids an infinite loop.
With this understanding, we can write graph traversal code using adjacency list or adjacency matrix. Although their storage is different, they both provide the same API, so you can use the Graph
interface from Graph Basics and Implementation:
// traverse all nodes of the graph
void traverse(Graph graph, int s, boolean[] visited) {
// base case
if (s < 0 || s >= graph.size()) {
return;
}
if (visited[s]) {
// prevent infinite loop
return;
}
// pre-order position
visited[s] = true;
System.out.println("visit " + s);
for (Edge e : graph.neighbors(s)) {
traverse(graph, e.to, visited);
}
// post-order position
}
// traverse all nodes of the graph
void traverse(const Graph& graph, int s, std::vector<bool>& visited) {
// base case
if (s < 0 || s >= graph.size()) {
return;
}
if (visited[s]) {
// prevent infinite loop
return;
}
// pre-order position
visited[s] = true;
std::cout << "visit " << s << std::endl;
for (const Graph::Edge& e : graph.neighbors(s)) {
traverse(graph, e.to, visited);
}
// post-order position
}
# traverse all nodes of the graph
def traverse(graph, s, visited):
# base case
if s < 0 or s >= len(graph):
return
if visited[s]:
# prevent infinite loop
return
# pre-order position
visited[s] = True
print("visit", s)
for e in graph.neighbors(s):
traverse(graph, e.to, visited)
# post-order position
// traverse all nodes of the graph
func traverse(graph Graph, s int, visited []bool) {
// base case
if s < 0 || s >= len(graph) {
return
}
if visited[s] {
// prevent infinite loop
return
}
// pre-order position
visited[s] = true
fmt.Println("visit", s)
for _, e := range graph.neighbors(s) {
traverse(graph, e.to, visited)
}
// post-order position
}
// traverse all nodes of the graph
var traverse = function(graph, s, visited) {
// base case
if (s < 0 || s >= graph.size()) {
return;
}
if (visited[s]) {
// prevent infinite loop
return;
}
// pre-order position
visited[s] = true;
console.log("visit " + s);
for (var e of graph.neighbors(s)) {
traverse(graph, e.to, visited);
}
// post-order position
};
You can open the visual panel below and click the line with console.log
several times to see how DFS visits the graph.
Algorithm Visualization
Because of the visited
array, this function will visit every node once and try to visit every edge once. So the time complexity is , where E
is the number of edges and V
is the number of nodes.
Why is the time complexity ?
When we talked about Binary Tree Traversal, we said the time complexity is , where is the number of nodes.
Since a graph is like an extension of a tree, why is the time complexity of graph traversal , not just ?
This is a good question. Think about it for two minutes. The answer is below.
Click to see the answer
Actually, for binary trees or multi-way trees, you should also count the edges. But in trees, the number of edges and nodes is about the same, so the total is .
In a tree, each edge goes from a parent to a child. Except for the root, every node pairs with the edge from its parent, so the number of edges is almost the same as the number of nodes.
But in graphs, any two nodes can be connected. The number of edges and nodes can be very different. That's why graph traversal is .
Traverse All Edges (2D visited
Array)
In graphs, traversing all edges is not common. It is mainly used when Finding Eulerian Path, so we will mention it briefly.
In the previous code, a 1D visited
array ensures every node is visited once. The simplest way to ensure every edge is visited once is to use a 2D visited
array (where visited[u][v]
means the edge u->v
has been visited).
Let’s compare with multi-way tree traversal:
// N-ary tree node
class Node {
int val;
List<Node> children;
}
// Traverse the branches of an N-ary tree
void traverseBranch(Node root) {
// base case
if (root == null) {
return;
}
for (Node child : root.children) {
System.out.println("visit branch: " + root.val + " -> " + child.val);
traverseBranch(child);
}
}
// Graph node
class Vertex {
int id;
Vertex[] neighbors;
}
// Traverse the edges of a graph
// Need a 2D visited array to record visited edges,
// visited[u][v] indicates that edge u->v has been visited
void traverseEdges(Vertex s, boolean[][] visited) {
// base case
if (s == null) {
return;
}
for (Vertex neighbor : s.neighbors) {
// if the edge has been visited, skip it
if (visited[s.id][neighbor.id]) {
continue;
}
// mark and visit the edge
visited[s.id][neighbor.id] = true;
System.out.println("visit edge: " + s.id + " -> " + neighbor.id);
traverseEdges(neighbor, visited);
}
}
// traverse all edges of the graph starting from vertex s
void traverseEdges(const Graph& graph, int s, std::vector<std::vector<bool>>& visited) {
// base case
if (s < 0 || s >= graph.size()) {
return;
}
for (const Graph::Edge& e : graph.neighbors(s)) {
// if the edge has been visited, skip it
if (visited[s][e.to]) {
continue;
}
// mark and visit the edge
visited[s][e.to] = true;
std::cout << "visit edge: " << s << " -> " << e.to << std::endl;
traverseEdges(graph, e.to, visited);
}
}
# traverse all edges of the graph starting from vertex s
def traverse_edges(graph, s, visited):
# base case
if s < 0 or s >= len(graph):
return
for e in graph.neighbors(s):
# if the edge has been visited, skip it
if visited[s][e.to]:
continue
# mark and visit the edge
visited[s][e.to] = True
print(f"visit edge: {s} -> {e.to}")
traverse_edges(graph, e.to, visited)
// traverse all edges of the graph starting from vertex s
func traverseEdges(graph Graph, s int, visited [][]bool) {
// base case
if s < 0 || s >= len(graph) {
return
}
for _, e := range graph.neighbors(s) {
// if the edge has been visited, skip it
if visited[s][e.to] {
continue
}
// mark and visit the edge
visited[s][e.to] = true
fmt.Printf("visit edge: %d -> %d\n", s, e.to)
traverseEdges(graph, e.to, visited)
}
}
// traverse all edges of the graph starting from vertex s
var traverseEdges = function(graph, s, visited) {
// base case
if (s < 0 || s >= graph.size()) {
return;
}
var neighbors = graph.neighbors(s);
for (var i = 0; i < neighbors.length; i++) {
var e = neighbors[i];
// if the edge has been visited, skip it
if (visited[s][e.to]) {
continue;
}
// mark and visit the edge
visited[s][e.to] = true;
console.log("visit edge: " + s + " -> " + e.to);
traverseEdges(graph, e.to, visited);
}
};
Note
Since an edge connects two nodes, we need to put related code inside the for loop.
Now, we can use the Graph
interface from Graph Basics and Implementation:
// traverse all edges of the graph starting from vertex s
void traverseEdges(Graph graph, int s, boolean[][] visited) {
// base case
if (s < 0 || s >= graph.size()) {
return;
}
for (Edge e : graph.neighbors(s)) {
// if the edge has been visited, skip it
if (visited[s][e.to]) {
continue;
}
// mark and visit the edge
visited[s][e.to] = true;
System.out.println("visit edge: " + s + " -> " + e.to);
traverseEdges(graph, e.to, visited);
}
}
// traverse all edges of the graph starting from vertex s
void traverseEdges(const Graph& graph, int s, std::vector<std::vector<bool>>& visited) {
// base case
if (s < 0 || s >= graph.size()) {
return;
}
for (const Graph::Edge& e : graph.neighbors(s)) {
// if the edge has been visited, skip it
if (visited[s][e.to]) {
continue;
}
// mark and visit the edge
visited[s][e.to] = true;
std::cout << "visit edge: " << s << " -> " << e.to << std::endl;
traverseEdges(graph, e.to, visited);
}
}
# traverse all edges of the graph starting from vertex s
def traverse_edges(graph, s, visited):
# base case
if s < 0 or s >= len(graph):
return
for e in graph.neighbors(s):
# if the edge has been visited, skip it
if visited[s][e.to]:
continue
# mark and visit the edge
visited[s][e.to] = True
print(f"visit edge: {s} -> {e.to}")
traverse_edges(graph, e.to, visited)
// traverse all edges of the graph starting from vertex s
func traverseEdges(graph Graph, s int, visited [][]bool) {
// base case
if s < 0 || s >= len(graph) {
return
}
for _, e := range graph.neighbors(s) {
// if the edge has been visited, skip it
if visited[s][e.to] {
continue
}
// mark and visit the edge
visited[s][e.to] = true
fmt.Printf("visit edge: %d -> %d\n", s, e.to)
traverseEdges(graph, e.to, visited)
}
}
// traverse all edges of the graph starting from vertex s
var traverseEdges = function(graph, s, visited) {
// base case
if (s < 0 || s >= graph.size()) {
return;
}
var neighbors = graph.neighbors(s);
for (var i = 0; i < neighbors.length; i++) {
var e = neighbors[i];
// if the edge has been visited, skip it
if (visited[s][e.to]) {
continue;
}
// mark and visit the edge
visited[s][e.to] = true;
console.log("visit edge: " + s + " -> " + e.to);
traverseEdges(graph, e.to, visited);
}
};
Obviously, using a 2D visited
array is not very efficient, because it needs to create a 2D array. The time complexity is and the space complexity is , where is the number of edges and is the number of nodes.
When we talk about Hierholzer's Algorithm for Euler Path, we will show a simple way to avoid using a 2D visited
array. We will not go into details here.
Traverse All Paths (onPath
Array)
For trees, traversing all "paths" is almost the same as traversing all "nodes". But for graphs, there is a difference between traversing all "paths" and all "nodes".
In trees, each edge goes from parent to child. So from the root root
to any node targetNode
, there is only one path. In other words, after visiting all nodes once, you have already found the only path from root
to targetNode
: