#	User	Rating
1	tourist	3985
2	jiangly	3814
3	jqdai0815	3682
4	Benq	3529
5	orzdevinwang	3526
6	ksun48	3517
7	Radewoosh	3410
8	hos.lyric	3399
9	ecnerwala	3392
9	Um_nik	3392

#	User	Contrib.
1	cry	169
2	maomao90	162
2	Um_nik	162
4	atcoder_official	161
5	djm03178	158
6	-is-this-fft-	157
7	adamant	155
8	awoo	154
8	Dominater069	154
10	luogu_official	150

Introduction:

This blog covers how a DSU can save all of its previous versions after several union operations which there seems to be a lack of resources to discuss. Thanks to MinaRagy06 for helping me write this blog!

A basic DSU can be implemented in the following way:

DSU Code

int parent[MAX_N], size[MAX_N];

int get_root(int node){
	if(node == parent[node])
		return node;
	return get_root(parent[node]);
}
void union_sets(int u, int v){
	u = get_root(u);
	v = get_root(v);
	if(u == v)
		return;
	if(size[u] > size[v])
		swap(u, v);
	size[v] += size[u];
	parent[u] = v; 
}

Next, I will explain how to store more information to answer queries that require time travelling.

Main Idea:

Problem 1 (Easy) :

Let's start by solving a simple CSES Problem.

The problem can be reduced to binary searching on the first time nodes $$$a$$$ and $$$b$$$ are connected. Now let's learn how to check connectivity during any time using persistent DSU.

We maintain an extra array $$$\text{time_changed}$$$ where it stores the first time a node does not become a root after adding the edges in order. Now we can run $$$\text{get_root}$$$ with an extra parameter $$$\text{time}$$$ to find the root of that node during a certain time and check if nodes $$$a$$$ and $$$b$$$ have the same root at that specific time during binary search.

Time Complexity: $$$O(N + M \cdot {log} N + Q \cdot {log} M \cdot {log} N)$$$

Solution Code

#include <bits/stdc++.h>
using namespace std;
const int MAX_N = 2e5+5;
int n, m, q, parent[MAX_N], time_changed[MAX_N], size[MAX_N];
int get_root(int node, int time){
	if(parent[node] == node || time_changed[node] > time)
		return node;
	return get_root(parent[node], time);
}
void union_sets(int a, int b, int time){
	a = get_root(a, time);
	b = get_root(b, time);
	if(a == b)
		return;
	if(size[a] > size[b])
		swap(a, b);
	size[b] += size[a];
	parent[a] = b;
	time_changed[a] = time;
}
int main(){
	ios::sync_with_stdio(0); cin.tie(0);
	cin >> n >> m >> q;
	for(int i = 1; i <= n; i++){
		parent[i] = i;
		size[i] = 1;
	}
	for(int i = 1; i <= m; i++){
		int a, b; cin >> a >> b;
		union_sets(a, b, i);
	}
	while (q--) {
		int a, b; cin >> a >> b;
		int l = 0, r = m, mid, ans = -1;
		while (l <= r) {
			mid = (l + r) / 2;
			if (get_root(a, mid) == get_root(b, mid)) {
				ans = mid;
				r = mid - 1;
			} else {
				l = mid + 1;
			}
		}
		cout << ans << "\n";
	}
	return 0;
}

Problem 2 (Medium) :

Consider the following problem, there is a Graph consisting of $$$N$$$ nodes and initially there are no edges you are given $$$Q$$$ queries which you have to solve online of the type:

Add an Edge between node $$$A$$$ and node $$$B$$$
Check Whether Node $$$A$$$ and Node $$$B$$$ are in the same connected component after the $$$X$$$-th Query
Find the Number of Nodes in the connected component of Node $$$A$$$ after the $$$X$$$-th Query

Constraints:

$$$1 \le N,Q \le 2 \cdot 10^5$$$

$$$1 \le A,B \le N$$$

$$$1 \le X_i \le i$$$

In this problem, we also have to use the $$$\text{time_changed}$$$ array. In addition, we need to save the versions of each node such that it was a root after adding an edge in its component along with the size (or any new information we need to save in other problems).

We save two vectors for each node $$$\text{version}$$$ and $$$\text{size}$$$. whenever we add an edge, we push back the time to the new root along with the new total size.

Now whenever we want to get the size of component $$$A$$$ at time $$$T$$$, we find the root of $$$A$$$ at time $$$T$$$ and binary search on the largest index $$$pos$$$ such that $$$version[A][pos] \le T$$$ and return $$$size[A][pos]$$$ as the answer.

Time complexity: $$$O(N + Q \cdot {log} N)$$$

Solution Code

#include <bits/stdc++.h>
using namespace std;
const int MAX_N = 2e5+5;
int n, q, parent[MAX_N], time_changed[MAX_N];
vector<int>version[MAX_N], size[MAX_N];
int get_root(int node, int time){
	if(parent[node] == node || time_changed[node] > time)
		return node;
	return get_root(parent[node], time);
}
void union_sets(int a, int b, int time){
	a = get_root(a, time);
	b = get_root(b, time);
	if(a == b)
		return;
	if(size[a] > size[b])
		swap(a, b);
	parent[a] = b;
	time_changed[a] = time;
	version[b].push_back(time);
	size[b].push_back(size[a].back() + size[b].back());
}
int main(){
	ios::sync_with_stdio(0); cin.tie(0);
	cin >> n >> q;
	for(int i = 1; i <= n; i++){
		parent[i] = i;
		version[i].push_back(0);
		size[i].push_back(1);
	}
	for(int i=1; i <= q;i++){
		int type; cin >> type;
		if(type == 1){
			int a, b; cin >> a >> b;
			union_sets(a, b, i);
		}else if(type == 2){
			int a, b, X; cin >> a >> b >> X;
			cout << (get_root(a, X) == get_root(b, X)? "YES" : "NO") << "\n";
		}else{
			int a, X; cin >> a >> X;
			a = get_root(a, X);
			int pos = upper_bound(version[a].begin(),version[a].end(),X) - version[a].begin();
			cout << size[a][pos-1] << "\n";
		}
	}
	return 0;
}

Problem 3 (Hard) :

$$$\textbf{Prerequisites: Persistent segment tree}$$$

Consider the following problem, initially you have $$$M = 1$$$ graphs of $$$N$$$ nodes with no edges, and you are given $$$Q$$$ queries which you have to solve online of the type:

Copy the $$$k$$$-th graph and label the new graph $$$M + 1$$$ and set $$$M = M + 1$$$ then add a new edge connecting nodes $$$A$$$ and $$$B$$$ in this graph.
Check whether node $$$A$$$ and node $$$B$$$ are in the same connected component in the $$$k$$$-th graph.
Find the number of nodes in the connected component of node $$$A$$$ in the $$$k$$$-th graph.

Constraints:

$$$ 1 \le N,Q \le 2 \cdot 10^5 $$$

$$$ 1 \le K \le M $$$

$$$ 1 \le A,B \le N $$$

We can’t do the DSU in the same way mentioned in the previous problem since it allows us to update only the last version of the DSU but here we may have to update an older version. To solve this, we can use a persistent segment tree for every array instead, leaf $$$i$$$ would store the value of parent and size for index $$$i$$$ and any non-leaf won’t store anything except the left and right child. When updating $$$parent_i$$$ and $$$size_i$$$ for some index $$$i$$$ for a particular DSU version $$$k$$$, we can just refer to the node $$$i$$$ in the segment tree with root $$$k$$$ and change the leaf values we need to and label the new root $$$M + 1$$$ which will correspond to the DSU version $$$M + 1$$$.

Time complexity: $$$O( N + Q \cdot {log}^2 N)$$$

Solution Code

#include <bits/stdc++.h>
using namespace std;
const int MAX_N = 2e5+5;
struct Node{
	int l = -1, r = -1, sz = -1, par = -1;
};
vector<Node>t;
void build(int i, int l, int r){
	if(l == r){
		t[i].par = l;
		t[i].sz = 1;
		return;
	}
	int mid = (l + r) / 2;
	t[i].l = t.size();
	t.emplace_back();
	build(t[i].l, l, mid);
	t[i].r = t.size();
	t.emplace_back();
	build(t[i].r, mid+1, r);
}
int nw(int j){
	int i = t.size();
	t.emplace_back();
	t[i].l = t[j].l;
	t[i].r = t[j].r;
	t[i].par = t[j].par;
	t[i].sz = t[j].sz;
	return i;
}
void upd(int i, int l, int r, int p, int v1, int v2){
	if(l == r){
		if(v1 != -1)t[i].par = v1;
		if(v2 != -1)t[i].sz = v2;
		return;
	}
	int mid = (l+r)/2;
	if(p <= mid){
		t[i].l = nw(t[i].l);
		upd(t[i].l, l, mid, p, v1, v2);
	}else{
		t[i].r = nw(t[i].r);
		upd(t[i].r, mid+1, r, p, v1, v2);
	}
}
int get_sz(int i, int l, int r, int p){
	if(l == r)return t[i].sz;
	int mid = (l + r) / 2;
	if(p <= mid)return get_sz(t[i].l, l,mid, p);
	return get_sz(t[i].r, mid+1, r, p);
}
int get_par(int i, int l, int r, int p){
	if(l == r)return t[i].par;
	int mid = (l + r) / 2;
	if(p <= mid)return get_par(t[i].l, l,mid, p);
	return get_par(t[i].r, mid+1, r, p);
}
int n, m, q, root[MAX_N];
int compress(int i, int j){
	int nxt = get_par(i, 1, n, j);
	if(nxt == j)return j;
	int root = compress(i, nxt);
	upd(i, 1, n, j, root, -1);
	return root;
}
int main(){
	ios::sync_with_stdio(0); cin.tie(0);
	cin >> n >> q;
	t.emplace_back();
	build(0, 1, n);
	while(q--){
		int type; cin >> type;
		if(type==1){
			int k, a, b; cin >> k >> a >> b;
			k--;
			m++;
			root[m]=nw(root[k]);
			compress(root[k], a);
			compress(root[k], b);
			a = get_par(root[k], 1, n, a);
			b = get_par(root[k], 1, n, b);
			if(a == b)continue;
			int szl = get_sz(root[k] ,1 ,n ,a), szr = get_sz(root[k], 1, n, b);
			if(szl > szr)swap(a, b);
			upd(root[m], 1, n, b, -1, szl+szr);
			upd(root[m], 1, n, a, b, -1);
		}else if(type==2){
			int k ,a ,b; cin >> k >> a >> b;
			k--;
			compress(root[k], a);
			compress(root[k], b);
			cout << (get_par(root[k], 1, n, a) == get_par(root[k], 1, n, b)? "YES" : "NO") << "\n";
		}else{
			int k, a; cin >> k >> a;
			k--;
			compress(root[k], a);
			cout << (get_sz(root[k], 1, n, get_par(root[k], 1, n, a))) << "\n";
		}
	}
	return 0;
}

Extra Problems:

https://qoj.ac/problem/1217

https://mirror.codeforces.com/gym/104468/problem/B

https://oj.uz/problem/view/APIO20_swap

Thanks for reading!

Full text and comments »

Segment Tree is a powerful data structure in programming, that is why it can still be optimized way more. In this blog I will explain one optimization that can make a basic segment tree slightly faster and easier to write. (idea and the code by me)

This does not work on range update range query segment trees.

Introduction:

Let's consider a point update range query segment tree, while querying we visit many of useless Nodes along the way in order to answer the query moving from the root downwards.

As you can see, there are nodes (marked in red) that are not needed during the recursion, and we only need to visit the important nodes (marked in green).

This is only true when querying in a point update segment tree or updating in a point query segment tree.

Main Idea:

We can solve a query range $$$[l, r]$$$ by noticing we can make it a smaller range $$$[l + X , r]$$$, where $$$X$$$ is any power of two but we need it to be maximum (in order to reduce the time complexity) and these two conditions should be true:

$$$l + X - 1 \le r$$$. (We cannot go out of the range)
$$$[l, l + X - 1]$$$ is a valid node in the segment tree.

The first condition:

The second condition

At the end, we can solve it now because $$$X$$$ is $$$2$$$ power the minimum between $$$log_2(r-l+1)$$$ and $$$log_2( M \& -M )$$$ because it satisfies the first and second conditions and is the maximum value possible.

C++ Code:

We can preprocess $$$log_2(K)$$$ for each $$$1 \le K \le N$$$ in an array.

Note that this only works when $$$N$$$ (the number of leaves) is a power of 2.

At each step we calculate the size of the movement $$$X$$$ which is equal to $$$2^K$$$

The following codes calculate sum in the range $$$L$$$ to $$$R$$$, assuming the segment tree is built after possibly several update queries.

Recursive:

long long query(int l, int r){
	if(l > r)return 0;
	int node = N + l - 1;
	int K = min(logs[node & -node], logs[r - l + 1]);
	return (query(l + (1 << K), r) + seg[node >> K]);
}

Iterative:

long long query(int l, int r){
	long long ret = 0;
	while(l<=r){
		int node = N + l - 1;
		int K = min(logs[node & -node], logs[r - l + 1]);
		ret = (ret + seg[node >> K]);
		l += (1 << K);
	}
	return ret;
}

This can also be applied to range update point query segment trees:

void update(int l, int r, int val){
	while(l<=r){
		int node = N + l - 1;
                int K = min(logs[node & -node], logs[r - l + 1]);
		seg[node >> K] += val;
		lazy[node >> K] += val;
		l += (1 << K);
	}
}

Benchmark:

Test-Cases Generator

#include <bits/stdc++.h>
using namespace std;
int main(){
    ios::sync_with_stdio(0);cin.tie(0);
    freopen("input.txt","w",stdout);
    mt19937 mt1(time(NULL));
    const int N = (1<<23),MX=1e9;
    cout<<N<<" "<<N<<"\n";
    for(int i=1;i<=N;i++){
        cout<<(mt1()%(MX+1))<<" ";
    }
    cout<<"\n";
    for(int i=1;i<=N;i++){
        int q = mt1()&1;
        cout<<q+1<<" ";
        if(q==0){
            cout<<(mt1()%N)<<" "<<(mt1()%(MX+1))<<"\n";
        }else{
            int l = mt1()%N;
            int r = (mt1()%(N+1-l))+l+1;
            cout<<l<<" "<<r<<"\n";
        }
    }
    return 0;
}

SD-Segment-Tree Code

#include <bits/stdc++.h>
using namespace std;
const int N = (1<<23);
long long seg[N<<2];
int x,q;
void upd(int l,int r){
	seg[l+=N-1]=r;
	while((l>>=1)>=1)seg[l]=seg[l<<1]+seg[l<<1|1];
}
long long qry(int l,int r){
	long long ret=0,k=0;l+=N-2,r+=N-1;
	while((l+=(1<<k))<=r)
		ret+=seg[l>>(k=min(__lg(l&-l),__lg(r-l+1)))];
	return ret;
}
int main(){
	ios::sync_with_stdio(0);cin.tie(0);
	freopen("input.txt","r",stdin);
	ofstream time("time.txt");
	ofstream out("output.txt");
	auto st = chrono::steady_clock::now().time_since_epoch().count();
	cin>>x>>q;
	for(int i=0;i<x;i++)cin>>seg[N+i];
	for(int i=N-1;i>=1;i--)seg[i]=seg[i<<1]+seg[i<<1|1];
	for(int i=1;i<=q;i++){
		int t,l,r;cin>>t>>l>>r;
		if(t==1)upd(l+1,r);
		else if(t==2)out << qry(l,r) << "\n";
	}
	auto en = chrono::steady_clock::now().time_since_epoch().count();
	time << (en - st) / 1e9 << 's';
	return 0;
}

Iterative-Segment-Tree Code

#include <bits/stdc++.h>
using namespace std;
const int N = (1<<23);
long long seg[N<<2];
int x,q;
void upd(int l,int r){
	seg[l+=N-1]=r;
	while((l>>=1)>=1)seg[l]=seg[l<<1]+seg[l<<1|1];
}
long long qry(int l, int r) {
	long long res = 0;
	for (l += N, r += N; l < r; l >>= 1, r >>= 1) {
		if (l&1) res += seg[l++];
		if (r&1) res += seg[--r];
	}
	return res;
}
int main(){
	ios::sync_with_stdio(0);cin.tie(0);
	freopen("input.txt","r",stdin);
	ofstream time("time.txt");
	ofstream out("output.txt");
	auto st = chrono::steady_clock::now().time_since_epoch().count();
	cin>>x>>q;
	for(int i=0;i<x;i++)cin>>seg[N+i];
	for(int i=N-1;i>=1;i--)seg[i]=seg[i<<1]+seg[i<<1|1];
	for(int i=1;i<=q;i++){
		int t,l,r;cin>>t>>l>>r;
		if(t==1)upd(l+1,r);
		else if(t==2)out << qry(l+1,r) << "\n";
	}
	auto en = chrono::steady_clock::now().time_since_epoch().count();
	time << (en - st) / 1e9 << 's';
	return 0;
}

Recursive-Segment-Tree Code

#include <bits/stdc++.h>
using namespace std;
const int N = (1<<23);
long long seg[N<<2];
int x,q;
void upd(int i,int l,int r,int s,int val){
	if(l==r){
		seg[i]=val;
		return;
	}
	int mid=(l+r)>>1;
	if(s<=mid)upd(i<<1,l,mid,s,val);
	else upd(i<<1|1,mid+1,r,s,val);
	seg[i]=seg[i<<1]+seg[i<<1|1];
}
long long qry(int i,int l,int r,int s,int e){
	if(l>=s&&r<=e)return seg[i];
	int mid=(l+r)>>1;
	long long ret=0;
	if(s<=mid)ret+=qry(i<<1,l,mid,s,e);
	if(e>=mid+1)ret+=qry(i<<1|1,mid+1,r,s,e);
	return ret;
}
int main(){
	ios::sync_with_stdio(0);cin.tie(0);
	freopen("input.txt","r",stdin);
	ofstream time("time.txt");
	ofstream out("output.txt");
	auto st = chrono::steady_clock::now().time_since_epoch().count();
	cin>>x>>q;
	for(int i=0;i<x;i++)cin>>seg[N+i];
	for(int i=N-1;i>=1;i--)seg[i]=seg[i<<1]+seg[i<<1|1];
	for(int i=1;i<=q;i++){
		int t,l,r;cin>>t>>l>>r;
		if(t==1)upd(1,1,N,l+1,r);
		else if(t==2)out << qry(1,1,N,l+1,r) << "\n";
	}
	auto en = chrono::steady_clock::now().time_since_epoch().count();
	time << (en - st) / 1e9 << 's';
	return 0;
}

Size of the array and the number of queries	Time of SD-Segment-Tree /S	Time of Recursive-Segment-Tree /S	Time of Iterative-Segment-Tree /S
$$$N,Q = 2^{16}$$$	00.2847	00.3163	00.2292
$$$N,Q = 2^{17}$$$	00.4311	00.5335	00.4414
$$$N,Q = 2^{18}$$$	00.8322	00.9534	00.9729
$$$N,Q = 2^{19}$$$	01.9915	02.1086	01.6837
$$$N,Q = 2^{20}$$$	03.6747	04.4253	03.7347
$$$N,Q = 2^{21}$$$	08.0204	08.6896	07.7844
$$$N,Q = 2^{22}$$$	20.9266	27.0589	24.3542
$$$N,Q = 2^{23}$$$	50.0656	61.9385	49.8065

Conclusion:

This variation has the same time complexity as the normal segment tree $$$O(log(N))$$$ per query, but might need more memory if you preprocess Logs array.

The constant factor is smaller because of the unnecessary nodes we don't visit but in practice the time it takes is not significant than the normal segment tree for smaller array sizes.

This can only be useful for squeezing in time limits or for becoming an easier way to implement segment trees because it is shorter.

UPD: Added Benchmark

Full text and comments »

sword060's blog

Introduction:

Main Idea:

Problem 1 (Easy) :

Problem 2 (Medium) :

Problem 3 (Hard) :

Extra Problems:

Introduction:

Main Idea:

C++ Code:

Recursive:

Iterative:

Benchmark:

Conclusion:

UPD: Added Benchmark