Code of ABC336G 16 integers (with detailed comments) in $$$O(B^3 + N)$$$ time ($$$O(B^4 + N)$$$ in the editorial)
Code of ABC336G
#pragma GCC optimize("Ofast,unroll-loops")
#pragma GCC target("avx2,bmi,bmi2,popcnt,lzcnt")
#include <bits/stdc++.h>
using namespace std;
//In the following context, `ref` refers to https://oi-wiki.org/graph/matrix-tree/
//(1)There is no theorem to count Euler trail directly. Fortunately, BEST theorem
//could count the Euler circuit. Therefore, we brute force the start vertex and the terminal vertex, add an edge from
//the start and the terminal, and transform Euler trail counting into Euler circuit counting.
//(1.1)The graph should be connected as a undirected graph, if we omit the **isolation vertices** (weak-connectivity).
//A dsu is used to check this.
//(1.2)Second, for each vertex v, either indeg(v) == outdeg(v), or |indeg(v) - outdeg(v)| = 1, and the latter situation
//could happen on exactly 0 or 2 vertices. If their is no |indeg(v) - outdeg(v)| = 1 vertex v, we are counting Euler circuits, otherwise,
//there are s such that indeg(s) - outdeg(s) == 1, and t such that outdeg(t) - indeg(t) == 1, and we are counting Euler trails from
//s to t.
//The pair (Start vertex, Euler circuit) <--> (Length N+3 sequence) forms a bijection. Starting contributes three characters, while
//the Euler circuit contributes n.
//(2)The formula ec(G) = t^{root}(G, k) \prod (deg(v) - 1)! counts equivalence classes up to cyclic isomorphism
//e.g, A->B->A and B->A->B are considered the same Euler circuit.
//The number of Euler circuits starting at v_i is
//[Observation2]ec(G, vi) = t^{root}(G, k) deg(v_i)\prod (deg(v) - 1)!, and sum up ec(G, vi) w.r.t vi, it is exactly |E(G)|ec(G, k). It has a
//combinatorical meaning. Each isomorphic class is counted |E(G)| times by cyclic shift.
//To count Euler trail s->t, we increase the wt[s][t]. In such case, we should count up to **equivalent class**, as within one equivalent
//class, only one of Euler trail ends up with s->t (For example, s->t->k->s, t->k->s->t, k->s->t->k, only the second one is desired).
//In the Euler trail case, the Start vertex is fixed with only one possibility, s!
//In the Euler circuit case, we should count all trails, instead of isomorphic classes only.
//We use the Observation2 to reduce the complexity from O(B^{w+1} + N) to O(B^w + N), although it is subtle improvement since B is small.
//(3)To handle self-loops, i.e., x->x, Becareful that in the formula Lap_{ii} = D_{ii} - Wt_{ii}, Wt_{ii} > 0.
//I am used to the `fact` that Wt_{ii} = 0 is `always true` if Wt is an adjacent matrix, but in the self-loop cases, this is not true.
//For example, A<-->B<-->C
// (two self-loops at B)
//The Lap matrix should be
//* A B C
//A 1 -1 0
//B -1 2 -1
//C 0 -1 1
//Lap_{bb} = D_{bb} - Wt_{bb} = 2 + (2self-loops at B) - (2self-loops at B) = 2.
#define ll long long
#define P 998244353
#define C 1000005
#define add(x, y) (((ll)(x)+(ll)(y))%(P))
#define sub(x, y) (((x) - (y) + (P))%(P))
#define mul(x, y) ((1ll*(x)*(y))%(P))
#define inv(x) (power((x), ((P)-2)))
#define inc(x) ((x) = add((x), 1))
#define dec(x) ((x) = sub((x), 1))
#define NO { std::cout << "0\n"; return; }
int tmpwt[8][8], tmpin[8], tmpout[8];
int f[8], fa[8];
int fac[1000007], ifac[1000007];
int power(int a, int b)
{
int res = 1;
while(b>0)
{
if(b&1)
res = 1ll * res * a % P;
b = b>>1;
a = 1ll * a * a%P;
}
return res;
}
int find(int x){
if(fa[x] == x) return x;
return fa[x] = find(fa[x]);
}
void merge(int x, int y){
int fax = find(x), fay = find(y);
if(fax != fay){
fa[fax] = fay;
}
}
int det(vector<vector<int>>&& lap){
int m = lap.size();
int res = 1, w = 1;
//Gaussian Elimination
//https://www.luogu.com.cn/problem/P7112
for(int i = 0; i < m; i++)
{
for(int j = i+1; j < m; ++j)
{
while(lap[i][i])
{
int div = lap[j][i]/lap[i][i];
for(int k = i; k < m; ++k)
{
lap[j][k] = sub(lap[j][k], mul(div, lap[i][k]));
}
swap(lap[i], lap[j]);
w = w ? P - w : 0;
}
swap(lap[i], lap[j]);
w = w ? P - w : 0;
}
}
for(int i = 0; i < m; i++) res = mul(res, lap[i][i]);
return mul(res, w);
}
int calc(vector<vector<int>>& wt, int s, int t, bool needadd=true){
if(needadd) inc(wt[s][t]); //In the Euler trail case, we need to enclose the t->s trail by adding an edge s->t
int m = wt.size();
vector<vector<int>> lap(m, vector<int>(m));
int facmul = 1;
for(int i = 0; i < m; ++i){
int sm = 0;
for(int j = 0; j < m; ++j){
if(i != j) {
lap[i][j] = wt[i][j] == 0 ? 0 : P - wt[i][j];
lap[i][i] = add(lap[i][i], wt[i][j]);
}
sm = add(sm, wt[i][j]);
}
facmul = mul(fac[sm-1], facmul);
}
//Note that the BEST lemma is using an co-factor at (i, i) for arbitrary i, for convenience
//we use i == 0
//We shape the matrix into
// 1 0 0 ... 0
// 0 R R ... R
// 0 R R ... R
// 0 R R ... R
// ...
// 0 R R ... R
//The R part remains the same, therefore, the det of this matrix is equal to the cofactor of the original matrix at (0, 0)
for(int i = 1; i < m; ++i) lap[0][i] = lap[i][0] = 0;
lap[0][0] = 1;
int res = det(std::move(lap));
//printf("In calc(%d %d), res==%d, facmul==%d\n", s, t, res, facmul); for(int i = 0; i < m; ++i) { for(int j = 0; j < m; ++j) printf("%d ", lap[i][j]); printf("\n");}
if(needadd) dec(wt[s][t]);
return mul(facmul, res);
}
void solve(){
int ifacmul = 1;
for(int i = 0; i < 8; ++i) fa[i] = i;
for(int i = 0; i < 16; ++i){
int a = i >> 1, b = i & 7;
cin >> tmpwt[a][b];
ifacmul = mul(ifacmul, ifac[tmpwt[a][b]]);
tmpout[a] = add(tmpout[a], tmpwt[a][b]);
tmpin[b] = add(tmpin[b], tmpwt[a][b]);
if(tmpwt[a][b]) merge(a, b);
}
vector<int> niso;
int s = -1, t = -1, globalfa = -1;
for(int i = 0; i < 8; ++i){
if(!tmpin[i] && !tmpout[i]){
//Filtering isolated vertices
continue;
}
if(globalfa == -1) globalfa = find(i);
else if(globalfa != find(i)){
NO; //Not connected as a undirected graph
}
f[niso.size()] = i;
if(tmpin[i] > tmpout[i]){
//First, Euler trail only allows one vertex such that indeg - tmpout == 1
if(tmpin[i] - tmpout[i] >= 2) NO;
if(s != -1) NO;
s = niso.size();
}
if(tmpin[i] < tmpout[i]){
if(tmpout[i] - tmpin[i] >= 2) NO;
if(t != -1) NO;
t = niso.size();
}
niso.push_back(i);
}
if((s != -1 && t == -1) || (s == -1 && t != -1)) NO;
int m = niso.size();
//printf("SURVIVED, m==%d, s==f[%d]==%d, t==f[%d]==%d\n", m, s, f[s], t, f[t]);
vector<vector<int>> wt(m, vector<int>(m));
int sm = 0;
for(int i = 0; i < m; ++i){
for(int j = 0; j < m; ++j){
//Renumbering after filtering isolated vertices.
wt[i][j] = tmpwt[f[i]][f[j]];
sm = add(sm, wt[i][j]);
}
}
int ans = 0;
if(s != -1){
ans = calc(wt, s, t);
}else{
//for(int i = 0; i < m; ++i) ans = add(ans, calc(wt, i, i)); //AC, ZLT
ans = mul(sm, calc(wt, 0, 0, false)); //Improve using Observation 2
}
ans = mul(ans, ifacmul); //Remove multiple edges between two vertices i->j, as they generate the same sequences
cout << ans << "\n";
}
int main(void){
cin.tie(0) -> sync_with_stdio(0);
fac[0] = ifac[0] = 1;
for(int i = 1; i <= C; ++i){
fac[i] = mul(i, fac[i-1]);
ifac[i] = mul(inv(i), ifac[i-1]);
}
solve();
}