My solution: each bit is independent so let us consider them separately.
Now notice that if the first row and colmn are already filled, the whole grid is uniquely determined.
Use a dsu to maintain the equality relationship between the positions and the final time complexity is $$$\mathcal O((n + q) \log V \alpha n)$$$.
I have tested the code in mashups and it runs 2499ms, which means that the constant is too big. Can any one help me to optimize the code?
#include<bits/stdc++.h>
#ifdef DEBUG_LOCAL
#include <mydebug/debug.h>
#endif
using ll = long long;
const int N = 2e5+5,mod = 1e9 + 7,inv = (1e9 + 8) / 2;
using namespace std;
int T,n,m,k,q,x,y,z,fg[30][2],pw[N * 2]; vector<pair<int,int>> g[N];
struct dsu{
int tg[N],w[N],fa[N],as,fg,siz[N];
int fd(int x){
if(x == fa[x]) return x;
int t = fa[x]; fa[x] = fd(fa[x]),w[x] ^= w[t];
return fa[x];
}void mg(int u,int v,int c){
int ou = fd(u),ov = fd(v);
if(siz[ou] < siz[ov]) swap(ou,ov),swap(u,v);
if(ou == ov){
if(w[u] ^ w[v] ^ c) fg = 1;
return ;
}siz[ou] += siz[ov];
if(tg[ou] == -1) as --;
if(tg[ov] == -1) as --;
fa[ov] = ou,w[ov] = w[v] ^ w[u] ^ c;
if(tg[ov] >= 0) tg[ov] ^= w[ov];
tg[ou] = (tg[ou] == -2 ? -2 : (tg[ou] == -1 ? tg[ov] : (tg[ov] == -1 || tg[ou] == tg[ov] ? tg[ou] : -2)));
if(tg[ou] == -1) as ++;
if(tg[ou] == -2) fg = 1;
}void init(){
as = 0,fg = 0;
for(int i = 1;i <= n + m;i ++) fa[i] = i,siz[i] = 1,w[i] = 0,tg[i] = -1;
for(int i = 2;i <= n + m;i ++) if(i != m + 1) as ++;
}void tt(int x,int y){
int ox = fd(x); y ^= w[x];
if(tg[ox] == -1) as --;
tg[ox] = (tg[ox] == -2 ? -2 : (tg[ox] == -1 ? y : (y == -1 || y == tg[ox] ? tg[ox] : -2)));
if(tg[ox] == -2) fg = 1;
}
}d[30][2];
inline int calc(int x,int y,int z){
int res = 1,oz = z;
for(int i = 0;i < 30;i ++){
int sm = 0;
for(int j = 0;j < 2;j ++){
z = (oz >> i) & 1;
if(fg[i][j] || d[i][j].fg) continue;
if(x == 1 && y == 1) {if(z != j) {fg[i][j] = 1;continue;}}
else if(y == 1) d[i][j].tt(m + x,z);
else if(x == 1) d[i][j].tt(y,z);
else d[i][j].mg(x + m,y,j ^ z);
if(fg[i][j] || d[i][j].fg) continue;
sm = (sm + pw[d[i][j].as]) % mod;
}res = 1ll * res * sm % mod;
}
return res;
}void los(){
cin >> n >> m >> k >> q; int as = 1; pw[0] = 1;
for(int i = 1;i <= n + m;i ++) pw[i] = 2ll * pw[i - 1] % mod;
for(int i = 0;i < 30;i ++) for(int j = 0;j < 2;j ++) d[i][j].init(),fg[i][j] = 0;
for(int i = 1;i < n + m;i ++) as = 1ll * as * (1ll << 30) % mod;
for(int i = 1;i <= k;i ++) if(cin >> x >> y >> z,as) as = calc(x,y,z);
cout << as << "\n";
for(int i = 1;i <= q;i ++) cin >> x >> y >> z,as = as ? calc(x,y,z) : 0,cout << as << "\n";
}int main(){
ios::sync_with_stdio(0),cin.tie(0);
for(cin >> T;T --;) los();
}