Graph Structure DFS/BFS Traversal

labuladongOriginalAbout 8183 words

Prerequisite Knowledge

Before reading this article, you should learn:

In One Sentence

Graph traversal is an extension of N-ary tree traversal. The main methods are still depth-first search (DFS) and breadth-first search (BFS).

The only difference is that trees do not have cycles, but graphs may have cycles. So we need to mark visited nodes to avoid infinite loops in cycles.

Because graphs are more complex, we usually have three scenarios: traversing "nodes", "edges", and "paths". Each scenario needs slightly different code.

When traversing "nodes" or "edges", use a visited array to mark nodes before recursion to avoid repeats. When traversing "paths", use an onPath array to mark nodes before recursion and remove the mark after recursion.

The visualization panel allows you to create graphs and see DFS/BFS traversal paths. You can clearly see that, although graphs look more complex than trees, traversing a graph is still like traversing a tree.

Let's look at DFS first. Open the visualization panel below. Click the console.log line multiple times to see how DFS traverses the graph. The traverse function is basically traversing an N-ary recursive tree:

Algorithm Visualization

Now let's look at BFS. Open the visualization panel below. Click the console.log line multiple times to see how BFS traverses the graph. It is like level-order traversing an N-ary tree:

Algorithm Visualization

Let's explain in detail below.

Depth-First Search (DFS)

In the previous article Graph Basics and Implementation, we said that we usually do not use a Vertex class to store a graph. But here, I will use this class for now, so you can compare graph traversal with multi-way tree traversal. Later, I will give code examples based on adjacency list and adjacency matrix.

Traverse All Nodes (`visited` Array)

Let’s compare the multi-way tree traversal framework with the graph traversal framework:

java

// N-ary tree node
class Node {
    int val;
    List<Node> children;
}

// Traversal framework for N-ary tree
void traverse(Node root) {
    // base case
    if (root == null) {
        return;
    }
    // Pre-order position
    System.out.println("visit " + root.val);
    for (Node child : root.children) {
        traverse(child);
    }
    // Post-order position
}


// Graph node
class Vertex {
    int id;
    Vertex[] neighbors;
}

// Traversal framework for graph
// Need a visited array to record visited nodes
// Avoid going back to prevent infinite loops
void traverse(Vertex s, boolean[] visited) {
    // base case
    if (s == null) {
        return;
    }
    if (visited[s.id]) {
        // Prevent infinite loops
        return;
    }
    // Pre-order position
    visited[s.id] = true;
    System.out.println("visit " + s.id);
    for (Vertex neighbor : s.neighbors) {
        traverse(neighbor, visited);
    }
    // Post-order position
}

cpp

// N-ary tree node
class Node {
public:
    int val;
    std::vector<Node*> children;
};

// traversal framework for N-ary tree
void traverse(Node* root) {
    // base case
    if (root == nullptr) {
        return;
    }
    // pre-order position
    std::cout << "visit " << root->val << std::endl;
    for (auto child : root->children) {
        traverse(child);
    }
    // post-order position
}

// graph vertex
class Vertex {
public:
    int id;
    std::vector<Vertex*> neighbors;
};

// traversal framework for graph
// a visited array is needed to record the visited vertices
// avoid going back and falling into an infinite loop
void traverse(Vertex* s, std::vector<bool>& visited) {
    // base case
    if (s == nullptr) {
        return;
    }
    if (visited[s->id]) {
        // prevent infinite loop
        return;
    }
    // pre-order position
    visited[s->id] = true;
    std::cout << "visit " << s->id << std::endl;
    for (auto neighbor : s->neighbors) {
        traverse(neighbor);
    }
    // post-order position
}

python

# N-ary tree node
class Node:
    def __init__(self, val=0, children=None):
        self.val = val
        self.children = children if children is not None else []

# Traversal framework for N-ary tree
def traverse(root):
    # base case
    if root is None:
        return
    # pre-order position
    print(f"visit {root.val}")
    for child in root.children:
        traverse(child)
    # post-order position

# Graph node
class Vertex:
    def __init__(self, id=0, neighbors=None):
        self.id = id
        self.neighbors = neighbors if neighbors is not None else []

# Traversal framework for graph
# Need a visited array to record visited nodes
# Avoid going backwards and falling into infinite loop
def traverse_graph(s: Vertex, visited: List[bool]):
    # base case
    if s is None:
        return
    if visited[s.id]:
        # Prevent infinite loop
        return
    # pre-order position
    visited[s.id] = True
    print(f"visit {s.id}")
    for neighbor in s.neighbors:
        traverse_graph(neighbor, visited)
    # post-order position

// Node multi-way tree node
type Node struct {
    val      int
    children []*Node
}

// Framework for traversing a multi-way tree
func traverse(root *Node) {
    // base case
    if root == nil {
        return
    }
    // Pre-order position
    fmt.Println("visit", root.val)
    for _, child := range root.children {
        traverse(child)
    }
    // Post-order position
}

// Vertex graph node
type Vertex struct {
    id        int
    neighbors []*Vertex
}


// Framework for graph traversal
// Need a visited array to keep track of visited nodes
// Avoid going back and falling into an infinite loop
func traverse(s *Vertex, visited []bool) {
    // base case
    if s == nil {
        return
    }
    if visited[s.id] {
        // Prevent infinite loop
        return
    }
    // Pre-order position
    visited[s.id] = true
    fmt.Println("visit", s.id)
    for _, neighbor := range s.neighbors {
        traverse(neighbor, visited)
    }
    // Post-order position
}

javascript

// N-ary tree node
class Node {
    constructor(val) {
        this.val = val;
        this.children = [];
    }
}

// N-ary tree traversal framework
var traverseTree = function(root) {
    // base case
    if (root === null) {
        return;
    }
    // pre-order position
    console.log("visit " + root.val);
    for (var i = 0; i < root.children.length; i++) {
        traverseTree(root.children[i]);
    }
    // post-order position
};

// graph node
class Vertex {
    constructor(id) {
        this.id = id;
        this.neighbors = [];
    }
}

// graph traversal framework
// need a visited array to record visited nodes
// avoid backtracking into a loop
var traverseGraph = function(s, visited) {
    // base case
    if (s === null) {
        return;
    }
    if (visited[s.id]) {
        // prevent infinite loop
        return;
    }
    // pre-order position
    visited[s.id] = true;
    console.log("visit " + s.id);
    for (var i = 0; i < s.neighbors.length; i++) {
        traverseGraph(s.neighbors[i]);
    }
    // post-order position
};

You can see that graph traversal uses an extra visited array compared to multi-way tree traversal. This array keeps track of visited nodes to avoid infinite loops when there are cycles.

Why do cycles cause infinite loops?

Here is a simple example. There is an edge from 1 -> 2, and another edge from 2 -> 1. Nodes 1 and 2 form a cycle:

1 <=> 2

If we do not mark visited nodes, starting from 1, we go to 2, then back to 1, then 2 again, then 1 again, and so on: 1->2->1->2->... forever.

With a visited array, the first time we visit 1, we mark it as visited. If we see 1->2->1, we find that 1 is already visited, so we return immediately. This stops the recursion and avoids an infinite loop.

With this understanding, we can write graph traversal code using adjacency list or adjacency matrix. Although their storage is different, they both provide the same API, so you can use the Graph interface from Graph Basics and Implementation:

java

// traverse all nodes of the graph
void traverse(Graph graph, int s, boolean[] visited) {
    // base case
    if (s < 0 || s >= graph.size()) {
        return;
    }
    if (visited[s]) {
        // prevent infinite loop
        return;
    }
    // pre-order position
    visited[s] = true;
    System.out.println("visit " + s);
    for (Edge e : graph.neighbors(s)) {
        traverse(graph, e.to, visited);
    }
    // post-order position
}

cpp

// traverse all nodes of the graph
void traverse(const Graph& graph, int s, std::vector<bool>& visited) {
    // base case
    if (s < 0 || s >= graph.size()) {
        return;
    }
    if (visited[s]) {
        // prevent infinite loop
        return;
    }
    // pre-order position
    visited[s] = true;
    std::cout << "visit " << s << std::endl;
    for (const Graph::Edge& e : graph.neighbors(s)) {
        traverse(graph, e.to, visited);
    }
    // post-order position
}

python

# traverse all nodes of the graph
def traverse(graph, s, visited):
    # base case
    if s < 0 or s >= len(graph):
        return
    if visited[s]:
        # prevent infinite loop
        return
    # pre-order position
    visited[s] = True
    print("visit", s)
    for e in graph.neighbors(s):
        traverse(graph, e.to, visited)
    # post-order position

// traverse all nodes of the graph
func traverse(graph Graph, s int, visited []bool) {
    // base case
    if s < 0 || s >= len(graph) {
        return
    }
    if visited[s] {
        // prevent infinite loop
        return
    }
    // pre-order position
    visited[s] = true
    fmt.Println("visit", s)
    for _, e := range graph.neighbors(s) {
        traverse(graph, e.to, visited)
    }
    // post-order position
}

javascript

// traverse all nodes of the graph
var traverse = function(graph, s, visited) {
    // base case
    if (s < 0 || s >= graph.size()) {
        return;
    }
    if (visited[s]) {
        // prevent infinite loop
        return;
    }
    // pre-order position
    visited[s] = true;
    console.log("visit " + s);
    for (var e of graph.neighbors(s)) {
        traverse(graph, e.to, visited);
    }
    // post-order position
};

You can open the visual panel below and click the line with console.log several times to see how DFS visits the graph.

Algorithm Visualization

Because of the visited array, this function will visit every node once and try to visit every edge once. So the time complexity is $O(E + V)$ , where E is the number of edges and V is the number of nodes.

Why is the time complexity $O(E + V)$ ?

When we talked about Binary Tree Traversal, we said the time complexity is $O(N)$ , where $N$ is the number of nodes.

Since a graph is like an extension of a tree, why is the time complexity of graph traversal $O(E + V)$ , not just $O(V)$ ?

This is a good question. Think about it for two minutes. The answer is below.

Click to see the answer

Actually, for binary trees or multi-way trees, you should also count the edges. But in trees, the number of edges and nodes is about the same, so the total is $O(N + N) = O(N)$ .

In a tree, each edge goes from a parent to a child. Except for the root, every node pairs with the edge from its parent, so the number of edges is almost the same as the number of nodes.

But in graphs, any two nodes can be connected. The number of edges and nodes can be very different. That's why graph traversal is $O(E + V)$ .

Traverse All Edges (2D `visited` Array)

In graphs, traversing all edges is not common. It is mainly used when Finding Eulerian Path, so we will mention it briefly.

In the previous code, a 1D visited array ensures every node is visited once. The simplest way to ensure every edge is visited once is to use a 2D visited array (where visited[u][v] means the edge u->v has been visited).

Let’s compare with multi-way tree traversal:

java

// N-ary tree node
class Node {
    int val;
    List<Node> children;
}

// Traverse the branches of an N-ary tree
void traverseBranch(Node root) {
    // base case
    if (root == null) {
        return;
    }
    for (Node child : root.children) {
        System.out.println("visit branch: " + root.val + " -> " + child.val);
        traverseBranch(child);
    }
}

// Graph node
class Vertex {
    int id;
    Vertex[] neighbors;
}

// Traverse the edges of a graph
// Need a 2D visited array to record visited edges,
// visited[u][v] indicates that edge u->v has been visited
void traverseEdges(Vertex s, boolean[][] visited) {
    // base case
    if (s == null) {
        return;
    }
    for (Vertex neighbor : s.neighbors) {
      // if the edge has been visited, skip it
      if (visited[s.id][neighbor.id]) {
        continue;
      }
      // mark and visit the edge
      visited[s.id][neighbor.id] = true;
      System.out.println("visit edge: " + s.id + " -> " + neighbor.id);
      traverseEdges(neighbor, visited);
    }
}

cpp

// traverse all edges of the graph starting from vertex s
void traverseEdges(const Graph& graph, int s, std::vector<std::vector<bool>>& visited) {
    // base case
    if (s < 0 || s >= graph.size()) {
        return;
    }
    for (const Graph::Edge& e : graph.neighbors(s)) {
        // if the edge has been visited, skip it
        if (visited[s][e.to]) {
            continue;
        }
        // mark and visit the edge
        visited[s][e.to] = true;
        std::cout << "visit edge: " << s << " -> " << e.to << std::endl;
        traverseEdges(graph, e.to, visited);
    }
}

python

# traverse all edges of the graph starting from vertex s
def traverse_edges(graph, s, visited):
    # base case
    if s < 0 or s >= len(graph):
        return
    for e in graph.neighbors(s):
        # if the edge has been visited, skip it
        if visited[s][e.to]:
            continue
        # mark and visit the edge
        visited[s][e.to] = True
        print(f"visit edge: {s} -> {e.to}")
        traverse_edges(graph, e.to, visited)

// traverse all edges of the graph starting from vertex s
func traverseEdges(graph Graph, s int, visited [][]bool) {
    // base case
    if s < 0 || s >= len(graph) {
        return
    }
    for _, e := range graph.neighbors(s) {
        // if the edge has been visited, skip it
        if visited[s][e.to] {
            continue
        }
        // mark and visit the edge
        visited[s][e.to] = true
        fmt.Printf("visit edge: %d -> %d\n", s, e.to)
        traverseEdges(graph, e.to, visited)
    }
}

javascript

// traverse all edges of the graph starting from vertex s
var traverseEdges = function(graph, s, visited) {
    // base case
    if (s < 0 || s >= graph.size()) {
        return;
    }
    var neighbors = graph.neighbors(s);
    for (var i = 0; i < neighbors.length; i++) {
        var e = neighbors[i];
        // if the edge has been visited, skip it
        if (visited[s][e.to]) {
            continue;
        }
        // mark and visit the edge
        visited[s][e.to] = true;
        console.log("visit edge: " + s + " -> " + e.to);
        traverseEdges(graph, e.to, visited);
    }
};

Note

Since an edge connects two nodes, we need to put related code inside the for loop.

Now, we can use the Graph interface from Graph Basics and Implementation:

java

// traverse all edges of the graph starting from vertex s
void traverseEdges(Graph graph, int s, boolean[][] visited) {
    // base case
    if (s < 0 || s >= graph.size()) {
        return;
    }
    for (Edge e : graph.neighbors(s)) {
      // if the edge has been visited, skip it
      if (visited[s][e.to]) {
        continue;
      }
      // mark and visit the edge
      visited[s][e.to] = true;
      System.out.println("visit edge: " + s + " -> " + e.to);
      traverseEdges(graph, e.to, visited);
    }
}

cpp

// traverse all edges of the graph starting from vertex s
void traverseEdges(const Graph& graph, int s, std::vector<std::vector<bool>>& visited) {
    // base case
    if (s < 0 || s >= graph.size()) {
        return;
    }
    for (const Graph::Edge& e : graph.neighbors(s)) {
      // if the edge has been visited, skip it
      if (visited[s][e.to]) {
        continue;
      }
      // mark and visit the edge
      visited[s][e.to] = true;
      std::cout << "visit edge: " << s << " -> " << e.to << std::endl;
      traverseEdges(graph, e.to, visited);
    }
}

python

# traverse all edges of the graph starting from vertex s
def traverse_edges(graph, s, visited):
    # base case
    if s < 0 or s >= len(graph):
        return
    for e in graph.neighbors(s):
        # if the edge has been visited, skip it
        if visited[s][e.to]:
            continue
        # mark and visit the edge
        visited[s][e.to] = True
        print(f"visit edge: {s} -> {e.to}")
        traverse_edges(graph, e.to, visited)

// traverse all edges of the graph starting from vertex s
func traverseEdges(graph Graph, s int, visited [][]bool) {
    // base case
    if s < 0 || s >= len(graph) {
        return
    }
    for _, e := range graph.neighbors(s) {
        // if the edge has been visited, skip it
        if visited[s][e.to] {
            continue
        }
        // mark and visit the edge
        visited[s][e.to] = true
        fmt.Printf("visit edge: %d -> %d\n", s, e.to)
        traverseEdges(graph, e.to, visited)
    }
}

javascript

// traverse all edges of the graph starting from vertex s
var traverseEdges = function(graph, s, visited) {
    // base case
    if (s < 0 || s >= graph.size()) {
        return;
    }
    var neighbors = graph.neighbors(s);
    for (var i = 0; i < neighbors.length; i++) {
        var e = neighbors[i];
        // if the edge has been visited, skip it
        if (visited[s][e.to]) {
            continue;
        }
        // mark and visit the edge
        visited[s][e.to] = true;
        console.log("visit edge: " + s + " -> " + e.to);
        traverseEdges(graph, e.to, visited);
    }
};

Obviously, using a 2D visited array is not very efficient, because it needs to create a 2D array. The time complexity is $O(E + V^2)$ and the space complexity is $O(V^2)$ , where $E$ is the number of edges and $V$ is the number of nodes.

When we talk about Hierholzer's Algorithm for Euler Path, we will show a simple way to avoid using a 2D visited array. We will not go into details here.

Graph Structure DFS/BFS Traversal

797. All Paths From Source to Target https://leetcode.com/problems/all-paths-from-source-to-target

Depth-First Search (DFS)

Traverse All Nodes (`visited` Array)

Traverse All Edges (2D `visited` Array)

Traverse All Paths (`onPath` Array)

Using Both `visited` and `onPath` Arrays

Not Using `visited` or `onPath` Arrays

Breadth-First Search (BFS)

Pattern 1

Pattern 2

Pattern 3