We want to thank you all for participating in the contest, and hope you enjoyed it. Any feedback would be appreciated!
A — Modular Station Assembly
Writer: amartya110
Even Dimensions: If $$$n$$$ or $$$m$$$ is even, the grid can be perfectly tiled using only $$$1 \times 2$$$ I-Beams, making the answer YES.Thin Grids: If $$$n=1$$$ or $$$m=1$$$ (and the other is odd), the C-Core module cannot fit physically as it requires a $$$2 \times 3$$$ footprint, leaving an odd area that $$$1 \times 2$$$ beams cannot fill (NO).Small Odd Grids ($$$3 \times 3$$$): Although the area is 9, placing a $$$2 \times 3$$$ footprint C-Core in a $$$3 \times 3$$$ space creates geometric obstructions that prevent the remaining 4 cells from forming two valid $$$1 \times 2$$$ blocks (NO).Large Odd Grids: For all other odd $$$n, m \ge 3$$$, there is sufficient "maneuvering room" to place one C-Core such that the remaining even area is contiguous and tileable by I-Beams (YES).
#include <bits/stdc++.h>
using namespace std;
#define ll long long int
int main() {
ios::sync_with_stdio(false);
cin.tie(nullptr);
int t;
cin >> t;
while (t--) {
ll n,m; cin>>n>>m;
if(n%2==0 || m%2==0) cout<<"YES"<<"\n";
else if(n==1 || m==1) cout<<"NO"<<"\n";
else if(n==3 && m==3) cout<<"NO"<<"\n";
else cout<<"YES"<<"\n";
}
}
B — The Stoichiometry Constraint
Writer: amartya110
The problem requires finding integers $$$a$$$ and $$$b$$$ such that $$$ax + b = y$$$ under the constraints $$$b \gt 0$$$ and $$$a \gt 2b$$$. By substituting $$$b = y - ax$$$ into the second inequality, we derive $$$a \gt 2(y - ax)$$$, which simplifies to $$$a(2x + 1) \gt 2y$$$, or $$$a \gt \frac{2y}{2x + 1}$$$. To find the smallest valid integer $$$a$$$, we calculate $$$a = \lfloor \frac{2y}{2x + 1} \rfloor + 1$$$. Once $$$a$$$ is determined, we calculate the corresponding $$$b = y - ax$$$; if $$$b$$$ remains strictly positive ($$$ax \lt y$$$), the pair $$$(a, b)$$$ is a valid solution, otherwise, no such pair exists because increasing $$$a$$$ further would only decrease $$$b$$$ into non-positive values. Since $$$x$$$ and $$$y$$$ can reach $$$10^{14}$$$, the product $$$a \cdot x$$$ can exceed the capacity of a 64-bit integer, necessitating the use of __int128 for overflow-safe intermediate calculations.
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
void solve() {
ll x, y;
cin>>x>>y;
ll a = (2 * y) / (2 * x + 1) + 1;
if ((__int128)a * x < (__int128)y) {
ll b = y - (ll)a * x;
cout << a << " " << b << endl;
}
else {
cout << -1 << endl;
}
}
int main() {
ios_base::sync_with_stdio(false);
cin.tie(NULL);
int t;
cin>>t;
while (t--) {
solve();
}
return 0;
}
C — Server Reliability Level
Writer: mahesh_9996
The goal is to find the maximum integer $$$R$$$ (similar to the H-index) such that at least $$$R$$$ servers have $$$R$$$ or more requests, leveraging the fact that the input is already sorted. By picking an index $$$i$$$, we know there are exactly $$$n - i$$$ servers with at least $$$requests[i]$$$ requests; therefore, we seek the largest $$$R$$$ where $$$R \le n - i$$$ and $$$R \le requests[i]$$$. Since the "count of servers" $$$(n-i)$$$ decreases as $$$i$$$ increases, while the "request value" $$$requests[i]$$$ increases, the optimal $$$R$$$ occurs at the intersection of these two constraints. The binary search efficiently locates the leftmost index $$$mid$$$ where $$$requests[mid] \ge (n - mid)$$$, allowing us to identify the maximum possible $$$R$$$ as $$$n - left$$$ in $$$O(\log n)$$$ time.
#include <bits/stdc++.h>
using namespace std;
int reliabilityLevel(vector<long long int>& requests) {
long long int n = requests.size();
long long int left = 0, right = n - 1;
while(left <= right) {
long long int mid = left + (right - left)/2;
long long int servers = n - mid;
if(requests[mid] >= servers)
right = mid - 1;
else
left = mid + 1;
}
return n - left;
}
int main() {
long long int t;
cin>>t;
while(t--){
long long int n;
cin>>n;
vector <long long int> a(n);
for(int i=0;i<n;i++){
cin>>a[i];
}
//vector<int> requests = {0,1,3,5,6,8};
cout << reliabilityLevel(a)<<"\n";
}
}
D — The Mystic Sequence of 21 (Multitest Version)
Writer: mahesh_9996
To solve "The Gateway of 21," we use dynamic programming to find the longest subsequence that forms a number divisible by 21 while respecting the "no leading zeros" rule. We maintain a DP table of size $$$3 \times 7$$$ to track the maximum length of a subsequence for every possible combination of remainders modulo 3 and modulo 7. As we iterate through the string, a non-zero digit can either start a new sequence or extend an existing one by updating the remainders $$$( (rem3 + digit) \pmod 3, (rem7 \times 10 + digit) \pmod 7 )$$$, whereas a '0' is tracked separately as a potential single-digit solution since it cannot lead a multi-digit number. The final answer is the total string length minus the maximum length found in the dp[0][0] state (or length 1 if only a '0' is available); if no such subsequence exists, we return -1.
#include <bits/stdc++.h>
using namespace std;
int main() {
ios::sync_with_stdio(false);
cin.tie(nullptr);
int t;
cin>>t;
while(t--){
string s;
if(!(cin >> s)) return 0;
int n = (int)s.size();
int dp_nz[3][7];
for(int i=0;i<3;i++) for(int j=0;j<7;j++) dp_nz[i][j] = -1;
int has_zero = 0;
for(char ch : s) {
int d = ch - '0';
int new_dp[3][7];
for(int i=0;i<3;i++) for(int j=0;j<7;j++) new_dp[i][j] = dp_nz[i][j];
int new_has_zero = has_zero;
if(d == 0) {
new_has_zero = max(new_has_zero, 1);
} else {
int r3 = d % 3;
int r7 = d % 7;
new_dp[r3][r7] = max(new_dp[r3][r7], 1);
}
for(int r3 = 0; r3 < 3; ++r3) {
for(int r7 = 0; r7 < 7; ++r7) {
if(dp_nz[r3][r7] == -1) continue;
int nr3 = (r3 + d) % 3;
int nr7 = (r7 * 10 + d) % 7;
new_dp[nr3][nr7] = max(new_dp[nr3][nr7], dp_nz[r3][r7] + 1);
}
}
for(int i=0;i<3;i++) for(int j=0;j<7;j++) dp_nz[i][j] = new_dp[i][j];
has_zero = new_has_zero;
}
int best = -1;
if(dp_nz[0][0] != -1) best = max(best, dp_nz[0][0]);
if(has_zero) best = max(best, 1);
if(best <= 0) cout << -1 << '\n';
else cout << (n - best) << '\n';
}
return 0;
}
E — House Cleaning
Writer: mahesh_9996
This problem is a classic application of Graph Theory on a 2D grid. To solve it, we must transform the geometric representation of houses and workers into a bipartite graph.
- The Core Insight: Bipartite Mapping
Each house at coordinates $$$(x, y)$$$ represents a "connection" between a specific row and a specific column. Since a worker can only be assigned to a row or a column, we can model this as follows:
a. Set $$$U$$$ (Left Side): All unique $$$x$$$-coordinates (representing rows).
b. Set $$$V$$$ (Right Side): All unique $$$y$$$-coordinates (representing columns).
c. Edges: For every house at $$$(x_i, y_i)$$$, draw an edge connecting node $$$x_i$$$ in $$$U$$$ to node $$$y_i$$$ in $$$V$$$.
Problem Transformation: The goal is to select the minimum number of rows ($$$x$$$-nodes) or columns ($$$y$$$-nodes) such that every house (edge) is "covered" (connected to at least one selected node). In graph theory, this is known as the Minimum Vertex Cover problem.
Kőnig's Theorem: A fundamental theorem in graph theory, Kőnig's Theorem, states that in any bipartite graph, the number of vertices in a minimum vertex cover is exactly equal to the number of edges in a maximum matching. Maximum Matching: The largest set of edges that do not share any common vertices. Why it works: Finding the minimum number of workers (Vertex Cover) is mathematically equivalent to finding the maximum number of independent houses that cannot be serviced by the same row or column (Matching).
The Algorithm: Hopcroft-Karp: While a simple DFS-based matching algorithm works for smaller constraints, this problem has $$$N = 2 \times 10^5$$$. We use the Hopcroft-Karp Algorithm to efficiently find the maximum matching.
Step 1: Coordinate Compression. Since $$$x$$$ and $$$y$$$ can be up to $$$10^9$$$, we map them to smaller integers ($$$1$$$ to $$$N$$$) using sorting and lower_bound. This makes the graph manageable.
Step 2: BFS Phase. The algorithm uses a Breadth-First Search to find the shortest "augmenting paths" (paths that start and end at unmatched nodes and alternate between matched and unmatched edges).
Step 3: DFS Phase. A Depth-First Search then travels along these shortest paths to flip the edges (unmatched becomes matched), increasing the total matching count.
Step 4: Iteration. This continues until no more augmenting paths can be found.
- Complexity & Conclusion
The time complexity of Hopcroft-Karp is $$$O(E\sqrt{V})$$$, where $$$E$$$ is the number of houses and $$$V$$$ is the number of unique coordinates. Given $$$N = 2 \times 10^5$$$, this algorithm is significantly faster than the standard $$$O(VE)$$$ approaches, allowing it to pass within the time limit. The final "Maximum Matching" value returned is our answer.
#include <bits/stdc++.h>
using namespace std;
struct Dinic {
struct Edge {
int to, cap, flow, rev;
};
vector<vector<Edge>> adj;
vector<int> level, ptr;
Dinic(int n) : adj(n), level(n), ptr(n) {}
void addEdge(int from, int to, int cap) {
adj[from].push_back({to, cap, 0, (int)adj[to].size()});
adj[to].push_back({from, 0, 0, (int)adj[from].size() - 1});
}
bool bfs(int s, int t) {
fill(level.begin(), level.end(), -1);
level[s] = 0;
queue<int> q;
q.push(s);
while (!q.empty()) {
int v = q.front();
q.pop();
for (auto& edge : adj[v]) {
if (edge.cap - edge.flow > 0 && level[edge.to] == -1) {
level[edge.to] = level[v] + 1;
q.push(edge.to);
}
}
}
return level[t] != -1;
}
int dfs(int v, int t, int pushed) {
if (pushed == 0 || v == t) return pushed;
for (int& cid = ptr[v]; cid < adj[v].size(); ++cid) {
auto& edge = adj[v][cid];
int tr = edge.to;
if (level[v] + 1 != level[tr] || edge.cap - edge.flow == 0) continue;
int push = dfs(tr, t, min(pushed, edge.cap - edge.flow));
if (push == 0) continue;
edge.flow += push;
adj[tr][edge.rev].flow -= push;
return push;
}
return 0;
}
int max_flow(int s, int t) {
int flow = 0;
while (bfs(s, t)) {
fill(ptr.begin(), ptr.end(), 0);
while (int pushed = dfs(s, t, 1e9)) {
flow += pushed;
}
}
return flow;
}
};
int main() {
ios::sync_with_stdio(false);
cin.tie(NULL);
int n;
cin >> n;
vector<int> xs(n), ys(n);
vector<int> vx, vy;
for (int i = 0; i < n; i++) {
cin >> xs[i] >> ys[i];
vx.push_back(xs[i]);
vy.push_back(ys[i]);
}
sort(vx.begin(), vx.end());
vx.erase(unique(vx.begin(), vx.end()), vx.end());
sort(vy.begin(), vy.end());
vy.erase(unique(vy.begin(), vy.end()), vy.end());
int nx = vx.size();
int ny = vy.size();
int S = 0, T = nx + ny + 1;
Dinic dinic(T + 1);
for (int i = 0; i < nx; i++) dinic.addEdge(S, i + 1, 1);
for (int i = 0; i < ny; i++) dinic.addEdge(nx + i + 1, T, 1);
for (int i = 0; i < n; i++) {
int r = lower_bound(vx.begin(), vx.end(), xs[i]) - vx.begin() + 1;
int c = lower_bound(vy.begin(), vy.end(), ys[i]) - vy.begin() + 1 + nx;
dinic.addEdge(r, c, 1);
}
cout << dinic.max_flow(S, T) << endl;
return 0;
}
F — The Binary Alchemist
Writer: mahesh_9996
$$$1.$$$ Understanding the Core Constraint
The most important rule in this problem is the relationship between the energy $$$a_i$$$ and its "bit power" (popcount). We need:
$$$\sum a_i = m$$$ $$$\bigoplus \text{popcount}(a_i) = k$$$ The target XOR value $$$k$$$ is small ($$$k \le 31$$$), which suggests we should look at the binary representation of $$$k$$$ itself to construct our initial set of crystals.
$$$2.$$$ Construction Strategy: The "Power of Two" Approach
A clever way to satisfy the XOR condition is to pick numbers whose popcounts are themselves powers of two. If $$$k$$$ has a 1 at the $$$i$$$-th bit (representing $$$2^i$$$), we can include a crystal whose popcount is exactly $$$2^i$$$.
Step 1: Satisfying $$$k$$$. For every bit $$$i$$$ set in $$$k$$$, we create a crystal $$$a_i = 2^{2^i} - 1$$$. Example: If the $$$0$$$-th bit of $$$k$$$ is set ($$$2^0=1$$$), we create a number with popcount 1, like $$$(1 \ll 1) - 1 = 1$$$. Example: If the $$$2$$$-nd bit of $$$k$$$ is set ($$$2^2=4$$$), we create a number with popcount 4, like $$$(1 \ll 4) - 1 = 15$$$ (binary 1111).
Step 2: Calculating the Deficit. Let $$$x$$$ be the sum of these initial crystals. We calculate the remaining energy needed: $$$d = m - x$$$.
$$$3.$$$ Handling the Remainder ($$$d$$$)
We need to add $$$d$$$ to our sum without changing the XOR sum of the popcounts. The XOR sum remains unchanged if we add crystals in pairs with the same popcount, because $$$P \oplus P = 0$$$. If $$$d \lt 0$$$: Our initial construction exceeded $$$m$$$. This specific greedy approach fails (Output -1). If $$$d = 0$$$: We are done. The initial crystals sum perfectly to $$$m$$$. If $$$d$$$ is even and $$$d \ge 2$$$: We can simply add two crystals, each being $$$d/2$$$. Since they are identical, their popcounts are identical, and $$$pop(d/2) \oplus pop(d/2) = 0$$$. If $$$d$$$ is odd: This is trickier. We can't just split $$$d$$$ into two equal integers. Instead, we use a small constant set like $$${1, 2, (d-3)/2, (d-3)/2}$$$. $$$1$$$ and $$$2$$$ both have a popcount of 1. $$$1 \oplus 1 = 0$$$. The two $$$(d-3)/2$$$ values are identical, so their XOR contribution is 0.The sum is $$$1 + 2 + \frac{d-3}{2} + \frac{d-3}{2} = 3 + d - 3 = d$$$.
$$$4.$$$ Edge Cases and Adjustments
The $$$d=1$$$ case: If $$$d=1$$$, we can't use the pair strategy. The code attempts to "absorb" the 1 into an existing crystal. If we have a crystal $$$a[0]=1$$$ (popcount 1), we change it to $$$2$$$ (also popcount 1). The sum increases by 1, but the XOR sum is preserved. The $$$-1$$$ condition: If $$$d$$$ is negative, or if $$$d=1$$$ but we have no suitable crystal to increment, we output -1.
Summary of Complexity
Time Complexity: $$$O(T \log K)$$$ per test case to iterate through bits of $$$K$$$. Given $$$K \le 31$$$, this is effectively $$$O(1)$$$ relative to $$$M$$$. Space Complexity: $$$O(N)$$$ to store the sequence of crystals.
#include <iostream>
#include <vector>
using namespace std;
int main(){
cin.tie(0) -> sync_with_stdio(0);
int T; cin >> T;
while(T--){
int m, k; cin >> m >> k;
vector<int> a;
int x = 0;
for(int i = 0; i<5; i++){
if(k&(1 << i)){
a.push_back((1 << (1 << i)) - 1);
x += a.back();
}
}
int d = m - x;
vector<int> b;
int flag = 0;
if(d < 0){
flag = 1;
}else if(d >= 2){
if(d%2 == 0){
b = {d/2, d/2};
}else{
b = {1, 2, (d-3)/2, (d-3)/2};
}
}else if(d == 0){
b = {};
}else{
if(a[0] == 1){
a[0] = 2;
}else{
flag = -1;
}
}
if(flag){
cout << "-1\n";
}else{
cout << a.size() + b.size() << "\n";
for(int i : a)
cout << i << " ";
for(int i : b)
cout << i << " " ;
cout << "\n";
}
}
}
G — Random Walk Inversions
Writer: Lucifer_K
- Symmetry and Linearity of Expectation
Let $$$I_{i,j}$$$ be an indicator variable that is 1 if $$$S_{i} \gt S_{j}$$$ and 0 otherwise. The expected number of inversions is:
$$$E[inversions]=E[\sum_{i=1}^{N-1}\sum_{j=i+1}^{N}I_{ij}]=\sum_{i=1}^{N-1}\sum_{j=i+1}^{N}P(S_{i} \gt S_{j})$$$
Because every bot performs an independent random walk on the same graph starting from the same vertex, the random variables $$$S_{1},...,S_{N}$$$ are independent and identically distributed (i.i.d.). Therefore, the probability $$$P(S_{i} \gt S_{j})$$$ is identical for all pairs. There are exactly $$$\binom{N}{2}$$$ such pairs: $$$E[inversions] = \frac{N(N-1)}{2} \cdot P(S_1 \gt S_2)$$$
By symmetry of the identical distributions, $$$P(S_{1} \gt S_{2})=P(S_{1} \lt S_{2})$$$. Since the sum of all mutually exclusive outcomes must be 1:
$$$2\cdot P(S_{1} \gt S_{2})+P(S_{1}=S_{2})=1\Rightarrow P(S_{1} \gt S_{2})=\frac{1-P(S_{1}=S_{2})}{2}$$$
We have reduced the problem from tracking multiple bots to finding the probability that two independent bots tie, which allows us to calculate the probability distribution for just a single bot.
- Graph Distances and the Transition Matrix
First, run a Breadth-First Search (BFS) from vertex 1 to find $$$D[u]$$$, the shortest path distance from vertex 1 to every vertex $$$u$$$. Next, construct the transition matrix $$$T$$$ for a single bot.
Let $$$T_{v,u}$$$ be the probability of transitioning from $$$u$$$ to $$$v$$$. If there is an edge between $$$u$$$ and $$$v$$$, $$$T_{v,u}=deg(u)^{-1}$$$ (mod 998244353). Otherwise, $$$T_{v,u}=0$$$.
Let $$$P_{0}$$$ be the initial state column vector of size $$$M$$$, where $$$P_{0}[1]=1$$$ and all other entries are 0. The state vector after $$$K$$$ steps is:
$$$P_{K}=T^{K}\times P_{0}$$$
Because $$$K\le10^{18}$$$, this is computed using binary matrix exponentiation in $$$O(M^{3}log~K)$$$ time.
- Computing the Tie Probability
After computing $$$P_{K}$$$, the value $$$P_{K}[u]$$$ gives the probability that a single bot ends at vertex $$$u$$$. We group these probabilities by the final distance. Let $$$prob[d]$$$ be the probability that a bot ends at a vertex with distance $$$d$$$:
$$$prob[d] = \sum_{u \text{ where } D[u]=d} P_K[u]$$$
The probability that two independent bots tie is the sum of the probabilities that they both end up at the same distance:
$$$P(S_{1}=S_{2})=\sum_{d}(prob[d])^{2}$$$
Finally, we substitute $$$P(S_{1}=S_{2})$$$ back into our symmetry formula to find $$$P(S_{1} \gt S_{2})$$$, multiply by $$$\binom{N}{2}$$$ (mod 998244353), and print the result.
Time Complexity: $$$O(M^{3}log~K+E)$$$ Space Complexity: $$$O(M^{2})$$$
#pragma GCC optimize("O3,unroll-loops")
#include <bits/stdc++.h>
using namespace std;
using ll = long long;
using vl = vector<ll>;
using vvl = vector<vl>;
ll md = 998244353;
ll pwr(ll b, ll p) {
ll r = 1;
while(p) {
if(p & 1) r = r * b % md;
b = b * b % md;
p >>= 1;
}
return r;
}
vvl mul(vvl a, vvl b, int m) {
vvl c(m, vl(m, 0));
for(int i = 0; i < m; ++i)
for(int k = 0; k < m; ++k)
for(int j = 0; j < m; ++j)
c[i][j] = (c[i][j] + a[i][k] * b[k][j]) % md;
return c;
}
int main() {
ios_base::sync_with_stdio(0); cin.tie(0);
ll n, k, m, e;
cin >> n >> k >> m >> e;
vector<vl> g(m);
vl d(m, 0), c(m, -1);
for(int i = 0; i < e; ++i) {
int u, v; cin >> u >> v; --u; --v;
g[u].push_back(v); g[v].push_back(u);
d[u]++; d[v]++;
}
queue<int> q;
q.push(0); c[0] = 0;
while(!q.empty()) {
int u = q.front(); q.pop();
for(int v : g[u]) if(c[v] == -1) {
c[v] = c[u] + 1;
q.push(v);
}
}
vvl t(m, vl(m, 0)), r(m, vl(m, 0));
for(int i = 0; i < m; ++i) r[i][i] = 1;
for(int i = 0; i < m; ++i)
for(int j : g[i]) t[j][i] = pwr(d[i], md - 2);
while(k) {
if(k & 1) r = mul(r, t, m);
t = mul(t, t, m);
k >>= 1;
}
map<int, ll> p;
for(int i = 0; i < m; ++i)
if(c[i] != -1) p[c[i]] = (p[c[i]] + r[i][0]) % md;
ll s = 0;
for(auto x : p) s = (s + x.second * x.second) % md;
ll x = (1 - s + md) % md * pwr(2, md - 2) % md;
ll y = (n % md) * ((n - 1) % md) % md * pwr(2, md - 2) % md;
cout << x * y % md << "\n";
return 0;
}
H — The Inverted Array
Writer: Lucifer_K
To solve this problem, we must shift our perspective from iterating over permutations to isolating the contribution of the first element, $$$P_{1}$$$.
Observation 1: The First Element's Impact
If we fix $$$P_{1}=x$$$, how many inversions does it create? Since $$$x$$$ is at the very front of the array, every element smaller than $$$x$$$ will appear after it. There are exactly $$$x-1$$$ such elements. Therefore, setting $$$P_{1}=x$$$ immediately contributes exactly $$$x-1$$$ inversions.
Observation 2: The Remaining Subproblem
The remaining $$$N-1$$$ elements must form a permutation of length $$$N-1$$$. For the entire permutation to have exactly $$$K$$$ inversions, the remaining $$$N-1$$$ elements must internally contain exactly $$$K-(x-1)$$$ inversions.
Let $$$I(n,k)$$$ be the number of permutations of length $$$n$$$ with $$$k$$$ inversions. The answer is given by:
$$$Ans=\sum_{x=1}^{min(N,K+1)}x\cdot I(N-1,K-x+1)$$$
Observation 3: Shifting to Generating Functions
Since $$$N\le10^{9}$$$, an $$$O(N\cdot K)$$$ DP to compute $$$I(n,k)$$$ will Time Limit Exceed (TLE). We must view this summation as the convolution of two sequences: the sequence of weights $$$W_{x}=x$$$, and the sequence of inversion counts $$$I(N-1,k)$$$. This naturally leads us to multiply their generating functions and extract the $$$K$$$-th coefficient in $$$O(K\sqrt{K})$$$ time using partition math.
Deep Mathematical Proof
Lemma 1. The generating function for the number of inversions in a permutation of length $$$n$$$ is:
$$$I_{n}(q)=\sum_{k=0}^{\infty}I(n,k)q^{k}=\prod_{i=1}^{n}\frac{1-q^{i}}{1-q}$$$
Proof. We can construct a permutation of length $$$n$$$ by inserting the element $$$i$$$ (from $$$i=1$$$ to $$$n$$$) into a permutation of length $$$i-1$$$. When inserting the element $$$i$$$ (which is currently the maximum element), we can place it in $$$i$$$ different positions. If placed at the very end, it creates 0 inversions. If placed one spot before the end, it creates 1 inversion, and so on, up to $$$i-1$$$ inversions. Thus, the insertion of element $$$i$$$ contributes a factor of $$$(1+q+q^{2}+\cdot\cdot\cdot+q^{i-1})=\frac{1-q^{i}}{1-q}$$$. Since these choices are independent, we multiply these factors for all $$$i$$$.
Theorem 1. The final answer is the coefficient of $$$q^{K}$$$ in the polynomial $$$G(q)=A(q)\cdot B(q)\cdot C(q)$$$ where:
$$$A(q)=\prod_{i=1}^{N-1}(1-q^{i})$$$
$$$B(q)=\frac{1}{(1-q)^{N+1}}$$$
$$$C(q)=1-(N+1)q^{N}+Nq^{N+1}$$$
Proof. The total sum is the coefficient of $$$q^{K}$$$ in the product of the inversion generating function $$$I_{N-1}(q)$$$ and the weight generating function $$$W(q)=\sum_{x=1}^{N}xq^{x-1}$$$.
$$$W(q) = \frac{d}{dq} \left( \frac{1-q^{N+1}}{1-q} \right) = \frac{1-(N+1)q^{N}+Nq^{N+1}}{(1-q)^{2}}$$$
Multiplying $$$W(q)$$$ by $$$I_{N-1}(q)$$$: $$$G(q)=\left(\frac{1}{(1-q)^{N-1}}\prod_{i=1}^{N-1}(1-q^{i})\right)\cdot\frac{1-(N+1)q^{N}+Nq^{N+1}}{(1-q)^{2}}$$$
Grouping the denominators yields $$$A(q)\cdot B(q)\cdot C(q)$$$.
Computing $$$A(q)$$$ in $$$O(K\sqrt{K})$$$
If $$$N-1\ge K$$$: $$$A(q)$$$ is simply the expansion of Euler's function $$$\phi(q)=\prod_{i=1}^{\infty}(1-q^{i})$$$. By Euler's Pentagonal Number Theorem, $$$\phi(q)=\sum_{m=-\infty}^{\infty}(-1)^{m}q^{m(3m-1)/2}$$$. We only need terms up to $$$q^{K}$$$, requiring $$$O(\sqrt{K})$$$ terms.
If $$$N-1 \lt K$$$: We use the identity: $$$\prod_{i=1}^{N-1}(1-q^{i})=\phi(q)\cdot\prod_{i=N}^{\infty}\frac{1}{1-q^{i}}$$$. The term $$$\prod_{i=N}^{\infty}\frac{1}{1-q^{i}}$$$ is the generating function for integer partitions where all parts are strictly $$$\ge N$$$. Since the minimum part size is $$$N$$$, any partition of $$$k\le K$$$ can have at most $$$K/N$$$ parts. We use DP where $$$dp[i][j]$$$ is the number of partitions of $$$j$$$ into exactly $$$i$$$ parts, each $$$\ge N$$$: $$$dp[i][j]=dp[i][j-i]+dp[i-1][j-N]$$$
Since $$$N\le\sqrt{K}$$$ in this branch, $$$i\le\sqrt{K}$$$, ensuring the $$$O(K\sqrt{K})$$$ state space is optimal.
#pragma GCC optimize("O3,unroll-loops")
#include <bits/stdc++.h>
using namespace std;
const int MOD = 998244353;
long long power(long long base, long long exp) {
long long res = 1;
base %= MOD;
while (exp > 0) {
if (exp % 2 == 1) res = (res * base) % MOD;
base = (base * base) % MOD;
exp /= 2;
}
return res;
}
long long modInverse(long long n) {
return power(n, MOD - 2);
}
const int MAXK = 100005;
long long fact[MAXK], invFact[MAXK];
void precompute() {
fact[0] = 1; invFact[0] = 1;
for (int i = 1; i < MAXK; i++) fact[i] = (fact[i - 1] * i) % MOD;
invFact[MAXK - 1] = modInverse(fact[MAXK - 1]);
for (int i = MAXK - 2; i >= 1; i--) invFact[i] = (invFact[i + 1] * (i + 1)) % MOD;
}
long long nCr_small(long long n, long long r) {
if (r < 0 || r > n) return 0;
return fact[n] * invFact[r] % MOD * invFact[n - r] % MOD;
}
int main() {
ios_base::sync_with_stdio(false);
cin.tie(NULL);
precompute();
long long N; int K;
cin>>N>>K;
if (K == 0) { cout << 1 << "\n"; return 0; }
if (N == 1) { cout << (K == 0 ? 1 : 0) << "\n"; return 0; }
long long n = N - 1;
vector<long long> A(K + 1, 0);
if (n >= K) {
A[0] = 1;
for (int k = 1; ; k++) {
int p1 = k * (3 * k - 1) / 2, p2 = k * (3 * k + 1) / 2;
if (p1 > K && p2 > K) break;
long long sign = (k % 2 == 1) ? -1 : 1;
if (p1 <= K) A[p1] = (A[p1] + sign + MOD) % MOD;
if (p2 <= K) A[p2] = (A[p2] + sign + MOD) % MOD;
}
} else {
int THRESHOLD = sqrt(K) + 2;
if (n <= THRESHOLD) {
A[0] = 1;
for (int i = 1; i <= n; i++)
for (int j = K; j >= i; j--)
A[j] = (A[j] - A[j - i] + MOD) % MOD;
} else {
vector<long long> P_inf(K + 1, 0);
P_inf[0] = 1;
for (int k = 1; ; k++) {
int p1 = k * (3 * k - 1) / 2, p2 = k * (3 * k + 1) / 2;
if (p1 > K && p2 > K) break;
long long sign = (k % 2 == 1) ? -1 : 1;
if (p1 <= K) P_inf[p1] = (P_inf[p1] + sign + MOD) % MOD;
if (p2 <= K) P_inf[p2] = (P_inf[p2] + sign + MOD) % MOD;
}
int max_parts = K / (n + 1) + 1;
vector<vector<long long>> dp(max_parts + 1, vector<long long>(K + 1, 0));
dp[0][0] = 1;
vector<long long> H(K + 1, 0);
H[0] = 1;
for (int i = 1; i <= max_parts; i++) {
for (int j = 0; j <= K; j++) {
if (j >= i) dp[i][j] = (dp[i][j] + dp[i][j - i]) % MOD;
if (j >= n + 1) dp[i][j] = (dp[i][j] + dp[i - 1][j - (n + 1)]) % MOD;
H[j] = (H[j] + dp[i][j]) % MOD;
}
}
// The O(K*sqrt(K)) optimization block
for (int j = 0; j <= K; j++) {
if (P_inf[j] == 0) continue;
for (int i = j; i <= K; i++) {
A[i] = (A[i] + P_inf[j] * H[i - j]) % MOD;
}
}
}
}
vector<long long> C_base(K + 1, 0);
C_base[0] = 1;
long long N_mod = N % MOD;
for (int y = 1; y <= K; y++) {
long long num = (y + N_mod) % MOD;
long long den = modInverse(y);
C_base[y] = C_base[y - 1] * num % MOD * den % MOD;
}
long long ans = 0;
for (int m = 0; m <= K; m++) {
int y = K - m;
long long C_y = C_base[y];
if (N <= K) {
if (y >= N) C_y = (C_y - (N_mod + 1) * nCr_small(y, N) % MOD + MOD) % MOD;
if (y >= N + 1) C_y = (C_y + N_mod * nCr_small(y - 1, N) % MOD) % MOD;
}
ans = (ans + A[m] * C_y) % MOD;
}
cout << ans << "\n";
return 0;
}
Results will be published soon!








Problem sets are too good
well done