Editorial for CodeHurdle Athlos 03

№	Пользователь	Рейтинг
1	Benq	3792
2	VivaciousAubergine	3647
3	Kevin114514	3611
4	jiangly	3583
5	strapple	3515
6	tourist	3470
7	dXqwq	3436
8	Radewoosh	3415
9	Otomachi_Una	3413
10	Um_nik	3376

№	Пользователь	Вклад
1	Qingyu	164
2	adamant	150
3	Um_nik	146
4	Dominater069	144
5	errorgorn	141
6	cry	139
7	Proof_by_QED	136
8	YuukiS	135
9	chromate00	134
9	TheScrasse	134

We want to thank you all for participating in the contest, and hope you enjoyed it. Any feedback would be appreciated!

A — Modular Station Assembly

Editorial

Solution

#include <bits/stdc++.h>
using namespace std;
 #define ll long long int
int main() {
    ios::sync_with_stdio(false);
    cin.tie(nullptr);
 
    int t;
    cin >> t;
    while (t--) {
     ll n,m; cin>>n>>m;
     if(n%2==0 || m%2==0) cout<<"YES"<<"\n";
     else if(n==1 || m==1) cout<<"NO"<<"\n";
     else if(n==3 && m==3) cout<<"NO"<<"\n";
      else cout<<"YES"<<"\n";
    }
}

B — The Stoichiometry Constraint

Writer: amartya110

Editorial

Solution

#include <bits/stdc++.h>
using namespace std;

typedef long long ll;

void solve() {
    ll x, y;
   cin>>x>>y;
    ll a = (2 * y) / (2 * x + 1) + 1;
    if ((__int128)a * x < (__int128)y) {
        ll b = y - (ll)a * x;
        cout << a << " " << b << endl;
    } 
    else {
        cout << -1 << endl;
    }
}

 

int main() {
    ios_base::sync_with_stdio(false);
    cin.tie(NULL);
    int t;
    cin>>t;
    while (t--) {
        solve();
    }
    return 0;

}

C — Server Reliability Level

Writer: mahesh_9996

Editorial

Solution

#include <bits/stdc++.h>
using namespace std;
 
int reliabilityLevel(vector<long long int>& requests) {
    
    long long int n = requests.size();
    long long int left = 0, right = n - 1;
 
    while(left <= right) {
 
        long long int mid = left + (right - left)/2;
       long long int servers = n - mid;
 
        if(requests[mid] >= servers)
            right = mid - 1;
        else
            left = mid + 1;
    }
 
    return n - left;
}
 
int main() {
   long long int t;
   cin>>t;
   while(t--){
      long long int n;
       cin>>n;
       vector <long long int> a(n);
       for(int i=0;i<n;i++){
           cin>>a[i];
       }
    //vector<int> requests = {0,1,3,5,6,8};
 
    cout << reliabilityLevel(a)<<"\n";
   }
 
}

D — The Mystic Sequence of 21 (Multitest Version)

Writer: mahesh_9996

Editorial

Solution

#include <bits/stdc++.h>
using namespace std;
 
int main() {
    ios::sync_with_stdio(false);
    cin.tie(nullptr);
    int t;
    cin>>t;
    while(t--){
    string s;
    if(!(cin >> s)) return 0;
    int n = (int)s.size();
 
    int dp_nz[3][7];
    for(int i=0;i<3;i++) for(int j=0;j<7;j++) dp_nz[i][j] = -1;
 
    int has_zero = 0;
 
    for(char ch : s) {
        int d = ch - '0';
 
        int new_dp[3][7];
        for(int i=0;i<3;i++) for(int j=0;j<7;j++) new_dp[i][j] = dp_nz[i][j];
        int new_has_zero = has_zero;
 
        if(d == 0) {
            new_has_zero = max(new_has_zero, 1);
        } else {
            int r3 = d % 3;
            int r7 = d % 7;
            new_dp[r3][r7] = max(new_dp[r3][r7], 1);
        }
 
        for(int r3 = 0; r3 < 3; ++r3) {
            for(int r7 = 0; r7 < 7; ++r7) {
                if(dp_nz[r3][r7] == -1) continue;
                int nr3 = (r3 + d) % 3;
                int nr7 = (r7 * 10 + d) % 7;
                new_dp[nr3][nr7] = max(new_dp[nr3][nr7], dp_nz[r3][r7] + 1);
            }
        }
 
        for(int i=0;i<3;i++) for(int j=0;j<7;j++) dp_nz[i][j] = new_dp[i][j];
        has_zero = new_has_zero;
    }
 
    int best = -1;
    if(dp_nz[0][0] != -1) best = max(best, dp_nz[0][0]);
    if(has_zero) best = max(best, 1);
 
    if(best <= 0) cout << -1 << '\n';
    else cout << (n - best) << '\n';
}
    return 0;
}

E — House Cleaning

Writer: mahesh_9996

Editorial

This problem is a classic application of Graph Theory on a 2D grid. To solve it, we must transform the geometric representation of houses and workers into a bipartite graph.

The Core Insight: Bipartite Mapping

Each house at coordinates $$$(x, y)$$$ represents a "connection" between a specific row and a specific column. Since a worker can only be assigned to a row or a column, we can model this as follows:

a. Set $$$U$$$ (Left Side): All unique $$$x$$$-coordinates (representing rows).

b. Set $$$V$$$ (Right Side): All unique $$$y$$$-coordinates (representing columns).

c. Edges: For every house at $$$(x_i, y_i)$$$, draw an edge connecting node $$$x_i$$$ in $$$U$$$ to node $$$y_i$$$ in $$$V$$$.

Problem Transformation: The goal is to select the minimum number of rows ($$$x$$$-nodes) or columns ($$$y$$$-nodes) such that every house (edge) is "covered" (connected to at least one selected node). In graph theory, this is known as the Minimum Vertex Cover problem.
Kőnig's Theorem: A fundamental theorem in graph theory, Kőnig's Theorem, states that in any bipartite graph, the number of vertices in a minimum vertex cover is exactly equal to the number of edges in a maximum matching. Maximum Matching: The largest set of edges that do not share any common vertices. Why it works: Finding the minimum number of workers (Vertex Cover) is mathematically equivalent to finding the maximum number of independent houses that cannot be serviced by the same row or column (Matching).
The Algorithm: Hopcroft-Karp: While a simple DFS-based matching algorithm works for smaller constraints, this problem has $$$N = 2 \times 10^5$$$. We use the Hopcroft-Karp Algorithm to efficiently find the maximum matching.

Step 1: Coordinate Compression. Since $$$x$$$ and $$$y$$$ can be up to $$$10^9$$$, we map them to smaller integers ($$$1$$$ to $$$N$$$) using sorting and lower_bound. This makes the graph manageable.

Step 2: BFS Phase. The algorithm uses a Breadth-First Search to find the shortest "augmenting paths" (paths that start and end at unmatched nodes and alternate between matched and unmatched edges).

Step 3: DFS Phase. A Depth-First Search then travels along these shortest paths to flip the edges (unmatched becomes matched), increasing the total matching count.

Step 4: Iteration. This continues until no more augmenting paths can be found.

Complexity & Conclusion

The time complexity of Hopcroft-Karp is $$$O(E\sqrt{V})$$$, where $$$E$$$ is the number of houses and $$$V$$$ is the number of unique coordinates. Given $$$N = 2 \times 10^5$$$, this algorithm is significantly faster than the standard $$$O(VE)$$$ approaches, allowing it to pass within the time limit. The final "Maximum Matching" value returned is our answer.

Solution

#include <bits/stdc++.h>
using namespace std;
 
struct Dinic {
    struct Edge {
        int to, cap, flow, rev;
    };
    vector<vector<Edge>> adj;
    vector<int> level, ptr;
 
    Dinic(int n) : adj(n), level(n), ptr(n) {}
 
    void addEdge(int from, int to, int cap) {
        adj[from].push_back({to, cap, 0, (int)adj[to].size()});
        adj[to].push_back({from, 0, 0, (int)adj[from].size() - 1});
    }
 
    bool bfs(int s, int t) {
        fill(level.begin(), level.end(), -1);
        level[s] = 0;
        queue<int> q;
        q.push(s);
        while (!q.empty()) {
            int v = q.front();
            q.pop();
            for (auto& edge : adj[v]) {
                if (edge.cap - edge.flow > 0 && level[edge.to] == -1) {
                    level[edge.to] = level[v] + 1;
                    q.push(edge.to);
                }
            }
        }
        return level[t] != -1;
    }
 
    int dfs(int v, int t, int pushed) {
        if (pushed == 0 || v == t) return pushed;
        for (int& cid = ptr[v]; cid < adj[v].size(); ++cid) {
            auto& edge = adj[v][cid];
            int tr = edge.to;
            if (level[v] + 1 != level[tr] || edge.cap - edge.flow == 0) continue;
            int push = dfs(tr, t, min(pushed, edge.cap - edge.flow));
            if (push == 0) continue;
            edge.flow += push;
            adj[tr][edge.rev].flow -= push;
            return push;
        }
        return 0;
    }
 
    int max_flow(int s, int t) {
        int flow = 0;
        while (bfs(s, t)) {
            fill(ptr.begin(), ptr.end(), 0);
            while (int pushed = dfs(s, t, 1e9)) {
                flow += pushed;
            }
        }
        return flow;
    }
};
 
int main() {
    ios::sync_with_stdio(false);
    cin.tie(NULL);
 
    int n;
    cin >> n;
 
    vector<int> xs(n), ys(n);
    vector<int> vx, vy;
    for (int i = 0; i < n; i++) {
        cin >> xs[i] >> ys[i];
        vx.push_back(xs[i]);
        vy.push_back(ys[i]);
    }
 
    sort(vx.begin(), vx.end());
    vx.erase(unique(vx.begin(), vx.end()), vx.end());
    sort(vy.begin(), vy.end());
    vy.erase(unique(vy.begin(), vy.end()), vy.end());
 
    int nx = vx.size();
    int ny = vy.size();
    
   
    int S = 0, T = nx + ny + 1;
    Dinic dinic(T + 1);
 
    for (int i = 0; i < nx; i++) dinic.addEdge(S, i + 1, 1);
    for (int i = 0; i < ny; i++) dinic.addEdge(nx + i + 1, T, 1);
 
    for (int i = 0; i < n; i++) {
        int r = lower_bound(vx.begin(), vx.end(), xs[i]) - vx.begin() + 1;
        int c = lower_bound(vy.begin(), vy.end(), ys[i]) - vy.begin() + 1 + nx;
        dinic.addEdge(r, c, 1);
    }
 
    cout << dinic.max_flow(S, T) << endl;
 
    return 0;
}

F — The Binary Alchemist

Writer: mahesh_9996

Editorial

$$$1.$$$ Understanding the Core Constraint

The most important rule in this problem is the relationship between the energy $$$a_i$$$ and its "bit power" (popcount). We need:

$$$\sum a_i = m$$$ $$$\bigoplus \text{popcount}(a_i) = k$$$ The target XOR value $$$k$$$ is small ($$$k \le 31$$$), which suggests we should look at the binary representation of $$$k$$$ itself to construct our initial set of crystals.

$$$2.$$$ Construction Strategy: The "Power of Two" Approach

A clever way to satisfy the XOR condition is to pick numbers whose popcounts are themselves powers of two. If $$$k$$$ has a 1 at the $$$i$$$-th bit (representing $$$2^i$$$), we can include a crystal whose popcount is exactly $$$2^i$$$.

Step 1: Satisfying $$$k$$$. For every bit $$$i$$$ set in $$$k$$$, we create a crystal $$$a_i = 2^{2^i} - 1$$$. Example: If the $$$0$$$-th bit of $$$k$$$ is set ($$$2^0=1$$$), we create a number with popcount 1, like $$$(1 \ll 1) - 1 = 1$$$. Example: If the $$$2$$$-nd bit of $$$k$$$ is set ($$$2^2=4$$$), we create a number with popcount 4, like $$$(1 \ll 4) - 1 = 15$$$ (binary 1111).

Step 2: Calculating the Deficit. Let $$$x$$$ be the sum of these initial crystals. We calculate the remaining energy needed: $$$d = m - x$$$.

$$$3.$$$ Handling the Remainder ($$$d$$$)

We need to add $$$d$$$ to our sum without changing the XOR sum of the popcounts. The XOR sum remains unchanged if we add crystals in pairs with the same popcount, because $$$P \oplus P = 0$$$. If $$$d \lt 0$$$: Our initial construction exceeded $$$m$$$. This specific greedy approach fails (Output -1). If $$$d = 0$$$: We are done. The initial crystals sum perfectly to $$$m$$$. If $$$d$$$ is even and $$$d \ge 2$$$: We can simply add two crystals, each being $$$d/2$$$. Since they are identical, their popcounts are identical, and $$$pop(d/2) \oplus pop(d/2) = 0$$$. If $$$d$$$ is odd: This is trickier. We can't just split $$$d$$$ into two equal integers. Instead, we use a small constant set like $$${1, 2, (d-3)/2, (d-3)/2}$$$. $$$1$$$ and $$$2$$$ both have a popcount of 1. $$$1 \oplus 1 = 0$$$. The two $$$(d-3)/2$$$ values are identical, so their XOR contribution is 0.The sum is $$$1 + 2 + \frac{d-3}{2} + \frac{d-3}{2} = 3 + d - 3 = d$$$.

$$$4.$$$ Edge Cases and Adjustments

The $$$d=1$$$ case: If $$$d=1$$$, we can't use the pair strategy. The code attempts to "absorb" the 1 into an existing crystal. If we have a crystal $$$a[0]=1$$$ (popcount 1), we change it to $$$2$$$ (also popcount 1). The sum increases by 1, but the XOR sum is preserved. The $$$-1$$$ condition: If $$$d$$$ is negative, or if $$$d=1$$$ but we have no suitable crystal to increment, we output -1.

Summary of Complexity

Time Complexity: $$$O(T \log K)$$$ per test case to iterate through bits of $$$K$$$. Given $$$K \le 31$$$, this is effectively $$$O(1)$$$ relative to $$$M$$$. Space Complexity: $$$O(N)$$$ to store the sequence of crystals.

Solution

#include <iostream>
#include <vector>
 
using namespace std;
 
int main(){
	cin.tie(0) -> sync_with_stdio(0);
	int T; cin >> T;
	while(T--){
		int m, k; cin >> m >> k;
		vector<int> a;
		int x = 0;
		for(int i = 0; i<5; i++){
			if(k&(1 << i)){
				a.push_back((1 << (1 << i)) - 1);
				x += a.back();
			}
		}
 
		int d = m - x;
		vector<int> b;
		int flag = 0; 
		if(d < 0){
			flag = 1;
		}else if(d >= 2){
			if(d%2 == 0){
				b = {d/2, d/2};
			}else{
				b = {1, 2, (d-3)/2, (d-3)/2};
			}
		}else if(d == 0){
			b = {};
		}else{
			if(a[0] == 1){
				a[0] = 2;
			}else{
				flag = -1;
			}
		}
 
		if(flag){
			cout << "-1\n";
		}else{
			cout << a.size() + b.size() << "\n";
			for(int i : a) 
				cout << i << " ";
			for(int i : b)
				cout << i << " " ;
			cout << "\n";
		}
	}
}

G — Random Walk Inversions

Writer: Lucifer_K

Editorial

Symmetry and Linearity of Expectation

Let $$$I_{i,j}$$$ be an indicator variable that is 1 if $$$S_{i} \gt S_{j}$$$ and 0 otherwise. The expected number of inversions is:

$$$E[inversions]=E[\sum_{i=1}^{N-1}\sum_{j=i+1}^{N}I_{ij}]=\sum_{i=1}^{N-1}\sum_{j=i+1}^{N}P(S_{i} \gt S_{j})$$$

Because every bot performs an independent random walk on the same graph starting from the same vertex, the random variables $$$S_{1},...,S_{N}$$$ are independent and identically distributed (i.i.d.). Therefore, the probability $$$P(S_{i} \gt S_{j})$$$ is identical for all pairs. There are exactly $$$\binom{N}{2}$$$ such pairs: $$$E[inversions] = \frac{N(N-1)}{2} \cdot P(S_1 \gt S_2)$$$

By symmetry of the identical distributions, $$$P(S_{1} \gt S_{2})=P(S_{1} \lt S_{2})$$$. Since the sum of all mutually exclusive outcomes must be 1:

$$$2\cdot P(S_{1} \gt S_{2})+P(S_{1}=S_{2})=1\Rightarrow P(S_{1} \gt S_{2})=\frac{1-P(S_{1}=S_{2})}{2}$$$

We have reduced the problem from tracking multiple bots to finding the probability that two independent bots tie, which allows us to calculate the probability distribution for just a single bot.

Graph Distances and the Transition Matrix

First, run a Breadth-First Search (BFS) from vertex 1 to find $$$D[u]$$$, the shortest path distance from vertex 1 to every vertex $$$u$$$. Next, construct the transition matrix $$$T$$$ for a single bot.

Let $$$T_{v,u}$$$ be the probability of transitioning from $$$u$$$ to $$$v$$$. If there is an edge between $$$u$$$ and $$$v$$$, $$$T_{v,u}=deg(u)^{-1}$$$ (mod 998244353). Otherwise, $$$T_{v,u}=0$$$.

Let $$$P_{0}$$$ be the initial state column vector of size $$$M$$$, where $$$P_{0}[1]=1$$$ and all other entries are 0. The state vector after $$$K$$$ steps is:

$$$P_{K}=T^{K}\times P_{0}$$$

Because $$$K\le10^{18}$$$, this is computed using binary matrix exponentiation in $$$O(M^{3}log~K)$$$ time.

Computing the Tie Probability

After computing $$$P_{K}$$$, the value $$$P_{K}[u]$$$ gives the probability that a single bot ends at vertex $$$u$$$. We group these probabilities by the final distance. Let $$$prob[d]$$$ be the probability that a bot ends at a vertex with distance $$$d$$$:

$$$prob[d] = \sum_{u \text{ where } D[u]=d} P_K[u]$$$

The probability that two independent bots tie is the sum of the probabilities that they both end up at the same distance:

$$$P(S_{1}=S_{2})=\sum_{d}(prob[d])^{2}$$$

Finally, we substitute $$$P(S_{1}=S_{2})$$$ back into our symmetry formula to find $$$P(S_{1} \gt S_{2})$$$, multiply by $$$\binom{N}{2}$$$ (mod 998244353), and print the result.

Time Complexity: $$$O(M^{3}log~K+E)$$$ Space Complexity: $$$O(M^{2})$$$

Solution

#pragma GCC optimize("O3,unroll-loops")
#include <bits/stdc++.h>
using namespace std;
using ll = long long;
using vl = vector<ll>;
using vvl = vector<vl>;
 
ll md = 998244353;
 
ll pwr(ll b, ll p) {
    ll r = 1;
    while(p) {
        if(p & 1) r = r * b % md;
        b = b * b % md;
        p >>= 1;
    }
    return r;
}
 
vvl mul(vvl a, vvl b, int m) {
    vvl c(m, vl(m, 0));
    for(int i = 0; i < m; ++i)
        for(int k = 0; k < m; ++k)
            for(int j = 0; j < m; ++j)
                c[i][j] = (c[i][j] + a[i][k] * b[k][j]) % md;
    return c;
}
 
int main() {
    ios_base::sync_with_stdio(0); cin.tie(0);
    ll n, k, m, e;
    cin >> n >> k >> m >> e;
    vector<vl> g(m);
    vl d(m, 0), c(m, -1);
    for(int i = 0; i < e; ++i) {
        int u, v; cin >> u >> v; --u; --v;
        g[u].push_back(v); g[v].push_back(u);
        d[u]++; d[v]++;
    }
    queue<int> q;
    q.push(0); c[0] = 0;
    while(!q.empty()) {
        int u = q.front(); q.pop();
        for(int v : g[u]) if(c[v] == -1) {
            c[v] = c[u] + 1;
            q.push(v);
        }
    }
    vvl t(m, vl(m, 0)), r(m, vl(m, 0));
    for(int i = 0; i < m; ++i) r[i][i] = 1;
    for(int i = 0; i < m; ++i)
        for(int j : g[i]) t[j][i] = pwr(d[i], md - 2);
    while(k) {
        if(k & 1) r = mul(r, t, m);
        t = mul(t, t, m);
        k >>= 1;
    }
    map<int, ll> p;
    for(int i = 0; i < m; ++i)
        if(c[i] != -1) p[c[i]] = (p[c[i]] + r[i][0]) % md;
    ll s = 0;
    for(auto x : p) s = (s + x.second * x.second) % md;
    ll x = (1 - s + md) % md * pwr(2, md - 2) % md;
    ll y = (n % md) * ((n - 1) % md) % md * pwr(2, md - 2) % md;
    cout << x * y % md << "\n";
    return 0;
}

H — The Inverted Array

Writer: Lucifer_K

Editorial

To solve this problem, we must shift our perspective from iterating over permutations to isolating the contribution of the first element, $$$P_{1}$$$.

Observation 1: The First Element's Impact

If we fix $$$P_{1}=x$$$, how many inversions does it create? Since $$$x$$$ is at the very front of the array, every element smaller than $$$x$$$ will appear after it. There are exactly $$$x-1$$$ such elements. Therefore, setting $$$P_{1}=x$$$ immediately contributes exactly $$$x-1$$$ inversions.

Observation 2: The Remaining Subproblem

The remaining $$$N-1$$$ elements must form a permutation of length $$$N-1$$$. For the entire permutation to have exactly $$$K$$$ inversions, the remaining $$$N-1$$$ elements must internally contain exactly $$$K-(x-1)$$$ inversions.

Let $$$I(n,k)$$$ be the number of permutations of length $$$n$$$ with $$$k$$$ inversions. The answer is given by:

$$$Ans=\sum_{x=1}^{min(N,K+1)}x\cdot I(N-1,K-x+1)$$$

Observation 3: Shifting to Generating Functions

Since $$$N\le10^{9}$$$, an $$$O(N\cdot K)$$$ DP to compute $$$I(n,k)$$$ will Time Limit Exceed (TLE). We must view this summation as the convolution of two sequences: the sequence of weights $$$W_{x}=x$$$, and the sequence of inversion counts $$$I(N-1,k)$$$. This naturally leads us to multiply their generating functions and extract the $$$K$$$-th coefficient in $$$O(K\sqrt{K})$$$ time using partition math.

Deep Mathematical Proof

Lemma 1. The generating function for the number of inversions in a permutation of length $$$n$$$ is:

$$$I_{n}(q)=\sum_{k=0}^{\infty}I(n,k)q^{k}=\prod_{i=1}^{n}\frac{1-q^{i}}{1-q}$$$

Proof. We can construct a permutation of length $$$n$$$ by inserting the element $$$i$$$ (from $$$i=1$$$ to $$$n$$$) into a permutation of length $$$i-1$$$. When inserting the element $$$i$$$ (which is currently the maximum element), we can place it in $$$i$$$ different positions. If placed at the very end, it creates 0 inversions. If placed one spot before the end, it creates 1 inversion, and so on, up to $$$i-1$$$ inversions. Thus, the insertion of element $$$i$$$ contributes a factor of $$$(1+q+q^{2}+\cdot\cdot\cdot+q^{i-1})=\frac{1-q^{i}}{1-q}$$$. Since these choices are independent, we multiply these factors for all $$$i$$$.

Theorem 1. The final answer is the coefficient of $$$q^{K}$$$ in the polynomial $$$G(q)=A(q)\cdot B(q)\cdot C(q)$$$ where:

$$$A(q)=\prod_{i=1}^{N-1}(1-q^{i})$$$

$$$B(q)=\frac{1}{(1-q)^{N+1}}$$$

$$$C(q)=1-(N+1)q^{N}+Nq^{N+1}$$$

Proof. The total sum is the coefficient of $$$q^{K}$$$ in the product of the inversion generating function $$$I_{N-1}(q)$$$ and the weight generating function $$$W(q)=\sum_{x=1}^{N}xq^{x-1}$$$.

$$$W(q) = \frac{d}{dq} \left( \frac{1-q^{N+1}}{1-q} \right) = \frac{1-(N+1)q^{N}+Nq^{N+1}}{(1-q)^{2}}$$$

Multiplying $$$W(q)$$$ by $$$I_{N-1}(q)$$$: $$$G(q)=\left(\frac{1}{(1-q)^{N-1}}\prod_{i=1}^{N-1}(1-q^{i})\right)\cdot\frac{1-(N+1)q^{N}+Nq^{N+1}}{(1-q)^{2}}$$$

Grouping the denominators yields $$$A(q)\cdot B(q)\cdot C(q)$$$.

Computing $$$A(q)$$$ in $$$O(K\sqrt{K})$$$

If $$$N-1\ge K$$$: $$$A(q)$$$ is simply the expansion of Euler's function $$$\phi(q)=\prod_{i=1}^{\infty}(1-q^{i})$$$. By Euler's Pentagonal Number Theorem, $$$\phi(q)=\sum_{m=-\infty}^{\infty}(-1)^{m}q^{m(3m-1)/2}$$$. We only need terms up to $$$q^{K}$$$, requiring $$$O(\sqrt{K})$$$ terms.

If $$$N-1 \lt K$$$: We use the identity: $$$\prod_{i=1}^{N-1}(1-q^{i})=\phi(q)\cdot\prod_{i=N}^{\infty}\frac{1}{1-q^{i}}$$$. The term $$$\prod_{i=N}^{\infty}\frac{1}{1-q^{i}}$$$ is the generating function for integer partitions where all parts are strictly $$$\ge N$$$. Since the minimum part size is $$$N$$$, any partition of $$$k\le K$$$ can have at most $$$K/N$$$ parts. We use DP where $$$dp[i][j]$$$ is the number of partitions of $$$j$$$ into exactly $$$i$$$ parts, each $$$\ge N$$$: $$$dp[i][j]=dp[i][j-i]+dp[i-1][j-N]$$$

Since $$$N\le\sqrt{K}$$$ in this branch, $$$i\le\sqrt{K}$$$, ensuring the $$$O(K\sqrt{K})$$$ state space is optimal.

Solution

#pragma GCC optimize("O3,unroll-loops")
#include <bits/stdc++.h>
using namespace std;
 
const int MOD = 998244353;
 
long long power(long long base, long long exp) {
    long long res = 1;
    base %= MOD;
    while (exp > 0) {
        if (exp % 2 == 1) res = (res * base) % MOD;
        base = (base * base) % MOD;
        exp /= 2;
    }
    return res;
}
 
long long modInverse(long long n) {
    return power(n, MOD - 2);
}
 
const int MAXK = 100005;
long long fact[MAXK], invFact[MAXK];
 
void precompute() {
    fact[0] = 1; invFact[0] = 1;
    for (int i = 1; i < MAXK; i++) fact[i] = (fact[i - 1] * i) % MOD;
    invFact[MAXK - 1] = modInverse(fact[MAXK - 1]);
    for (int i = MAXK - 2; i >= 1; i--) invFact[i] = (invFact[i + 1] * (i + 1)) % MOD;
}
 
long long nCr_small(long long n, long long r) {
    if (r < 0 || r > n) return 0;
    return fact[n] * invFact[r] % MOD * invFact[n - r] % MOD;
}
 
int main() {
    ios_base::sync_with_stdio(false); 
    cin.tie(NULL);
    
    precompute();
    
    long long N; int K;
    cin>>N>>K;
 
    if (K == 0) { cout << 1 << "\n"; return 0; }
    if (N == 1) { cout << (K == 0 ? 1 : 0) << "\n"; return 0; }
 
    long long n = N - 1;
    vector<long long> A(K + 1, 0);
 
    if (n >= K) {
        A[0] = 1;
        for (int k = 1; ; k++) {
            int p1 = k * (3 * k - 1) / 2, p2 = k * (3 * k + 1) / 2;
            if (p1 > K && p2 > K) break;
            long long sign = (k % 2 == 1) ? -1 : 1;
            if (p1 <= K) A[p1] = (A[p1] + sign + MOD) % MOD;
            if (p2 <= K) A[p2] = (A[p2] + sign + MOD) % MOD;
        }
    } else {
        int THRESHOLD = sqrt(K) + 2;
        if (n <= THRESHOLD) {
            A[0] = 1;
            for (int i = 1; i <= n; i++)
                for (int j = K; j >= i; j--)
                    A[j] = (A[j] - A[j - i] + MOD) % MOD;
        } else {
            vector<long long> P_inf(K + 1, 0); 
            P_inf[0] = 1;
            for (int k = 1; ; k++) {
                int p1 = k * (3 * k - 1) / 2, p2 = k * (3 * k + 1) / 2;
                if (p1 > K && p2 > K) break;
                long long sign = (k % 2 == 1) ? -1 : 1;
                if (p1 <= K) P_inf[p1] = (P_inf[p1] + sign + MOD) % MOD;
                if (p2 <= K) P_inf[p2] = (P_inf[p2] + sign + MOD) % MOD;
            }
            
            int max_parts = K / (n + 1) + 1;
            vector<vector<long long>> dp(max_parts + 1, vector<long long>(K + 1, 0));
            dp[0][0] = 1;
            
            vector<long long> H(K + 1, 0); 
            H[0] = 1;
            
            for (int i = 1; i <= max_parts; i++) {
                for (int j = 0; j <= K; j++) {
                    if (j >= i) dp[i][j] = (dp[i][j] + dp[i][j - i]) % MOD;
                    if (j >= n + 1) dp[i][j] = (dp[i][j] + dp[i - 1][j - (n + 1)]) % MOD;
                    H[j] = (H[j] + dp[i][j]) % MOD;
                }
            }
            
            // The O(K*sqrt(K)) optimization block
            for (int j = 0; j <= K; j++) {
                if (P_inf[j] == 0) continue; 
                for (int i = j; i <= K; i++) {
                    A[i] = (A[i] + P_inf[j] * H[i - j]) % MOD;
                }
            }
        }
    }
 
    vector<long long> C_base(K + 1, 0);
    C_base[0] = 1; 
    long long N_mod = N % MOD;
    for (int y = 1; y <= K; y++) {
        long long num = (y + N_mod) % MOD;
        long long den = modInverse(y);
        C_base[y] = C_base[y - 1] * num % MOD * den % MOD;
    }
 
    long long ans = 0;
    for (int m = 0; m <= K; m++) {
        int y = K - m;
        long long C_y = C_base[y];
        if (N <= K) {
            if (y >= N) C_y = (C_y - (N_mod + 1) * nCr_small(y, N) % MOD + MOD) % MOD;
            if (y >= N + 1) C_y = (C_y + N_mod * nCr_small(y - 1, N) % MOD) % MOD;
        }
        ans = (ans + A[m] * C_y) % MOD;
    }
    
    cout << ans << "\n";
    return 0;
}

результаты

Results will be published soon!