When I first got MLE, I suspected it might be because of #define int long long
, but even after changing that I still got MLE. I couldn't figure out what was wrong as I was sure that the space was O(n).
When I used std::set
instead of std::deque
it used only 100MB memory.
std::deque code
...
#include <bits/stdc++.h>
#include <ext/pb_ds/assoc_container.hpp>
#include <ext/pb_ds/tree_policy.hpp>
// #define int long long
#define float double
#define all(x) (x).begin(), (x).end()
#define pnl cout << "\n"
#ifndef ONLINE_JUDGE
#define dbg(x) cerr << (#x) << " " << (x) << "\n"
#else
#define dbg(x) 42
#endif
using namespace std;
using namespace __gnu_pbds;
template <typename T>
using ordered_set = tree<T, null_type, less<T>, rb_tree_tag, tree_order_statistics_node_update>;
using vi = vector<int>;
using vvi = vector<vi>;
using ii = array<int, 2>;
using vii = vector<ii>;
template <typename T>
istream &operator>>(istream &in, vector<T> &a) {
for (auto &x : a) in >> x;
return in;
}
template <typename T>
ostream &operator<<(ostream &out, vector<T> &a) {
for (auto &x : a) out << x << " ";
return out;
}
mt19937_64 rng(chrono::steady_clock::now().time_since_epoch().count());
const int inf = numeric_limits<int>::max();
struct Node {
Node *l = 0, *r = 0;
int lo, hi, mset = inf, madd = 0, val = 0;
Node(int lo,int hi):lo(lo),hi(hi){} // Large interval of 0
Node(vector<int>& v, int lo, int hi) : lo(lo), hi(hi) {
if (lo + 1 < hi) {
int mid = lo + (hi - lo) / 2;
l = new Node(v, lo, mid); r = new Node(v, mid, hi);
val = l->val + r->val;
}
else val = v[lo];
}
int query(int L, int R) {
if (R <= lo || hi <= L) return 0;
if (L <= lo && hi <= R) return val;
push();
return l->query(L, R) + r->query(L, R);
}
void set(int L, int R, int x) {
if (R <= lo || hi <= L) return;
if (L <= lo && hi <= R) {
mset = x;
madd = 0;
val = x * (hi - lo);
}
else {
push();
l->set(L, R, x);
r->set(L, R, x);
val = l->val + r->val;
}
}
void add(int L, int R, int x) {
if (R <= lo || hi <= L) return;
if (L <= lo && hi <= R) {
if (mset != inf) mset += x;
else madd += x;
val += x * (hi - lo);
}
else {
push();
l->add(L, R, x);
r->add(L, R, x);
val = l->val + r->val;
}
}
void push() {
if (!l) {
int mid = lo + (hi - lo) / 2;
l = new Node(lo, mid); r = new Node(mid, hi);
}
if (mset != inf) {
l->set(lo, hi, mset);
r->set(lo, hi, mset);
mset = inf;
}
else if (madd) {
l->add(lo, hi, madd);
r->add(lo, hi, madd);
madd = 0;
}
}
};
void solve(int testcase) {
int n; cin >> n;
vi a(n); cin >> a;
map<int, deque<int>> pos;
for (int i = 0; i < n; i++) {
pos[a[i]].push_back(i);
}
vi op(n);
Node st(a, 0, n);
Node st2(op, 0, n);
int m; cin >> m;
while (m--) {
int clr; cin >> clr;
if (pos.find(clr) == pos.end()) continue;
auto &dq = pos[clr];
if (dq.size() < 2) continue;
while (dq.size()) {
int l = dq.front();
if (st2.query(l, l + 1)) {
dq.pop_front();
continue;
}
break;
}
while (dq.size()) {
int r = dq.back();
if (st2.query(r, r + 1)) {
dq.pop_back();
continue;
}
break;
}
if (dq.size() >= 2) {
int l = dq.front();
int r = dq.back();
st.set(l, r + 1, a[l]);
if (r > l + 1) st2.add(l + 1, r, 1);
}
}
for (int i = 0; i < n; i++) {
cout << st.query(i, i + 1) << " ";
}
pnl;
}
int32_t main() {
using namespace chrono;
ios::sync_with_stdio(false); cin.tie(nullptr); cout.tie(nullptr);
auto start = high_resolution_clock::now();
int T = 1; // cin >> T;
for (int t = 1; t <= T; t++) {
solve(t);
}
auto end = high_resolution_clock::now();
auto duration = duration_cast<milliseconds>(end - start);
#ifndef ONLINE_JUDGE
cerr << "Time: " << duration.count() << " ms\n";
#endif
return 0;
}
std::set code
...
#include <bits/stdc++.h>
#include <ext/pb_ds/assoc_container.hpp>
#include <ext/pb_ds/tree_policy.hpp>
// #define int long long
#define float double
#define all(x) (x).begin(), (x).end()
#define pnl cout << "\n"
#ifndef ONLINE_JUDGE
#define dbg(x) cerr << (#x) << " " << (x) << "\n"
#else
#define dbg(x) 42
#endif
using namespace std;
using namespace __gnu_pbds;
template <typename T>
using ordered_set = tree<T, null_type, less<T>, rb_tree_tag, tree_order_statistics_node_update>;
using vi = vector<int>;
using vvi = vector<vi>;
using ii = array<int, 2>;
using vii = vector<ii>;
template <typename T>
istream &operator>>(istream &in, vector<T> &a) {
for (auto &x : a) in >> x;
return in;
}
template <typename T>
ostream &operator<<(ostream &out, vector<T> &a) {
for (auto &x : a) out << x << " ";
return out;
}
mt19937_64 rng(chrono::steady_clock::now().time_since_epoch().count());
const int inf = numeric_limits<int>::max();
struct Node {
Node *l = 0, *r = 0;
int lo, hi, mset = inf, madd = 0, val = 0;
Node(int lo,int hi):lo(lo),hi(hi){} // Large interval of 0
Node(vector<int>& v, int lo, int hi) : lo(lo), hi(hi) {
if (lo + 1 < hi) {
int mid = lo + (hi - lo) / 2;
l = new Node(v, lo, mid); r = new Node(v, mid, hi);
val = l->val + r->val;
}
else val = v[lo];
}
int query(int L, int R) {
if (R <= lo || hi <= L) return 0;
if (L <= lo && hi <= R) return val;
push();
return l->query(L, R) + r->query(L, R);
}
void set(int L, int R, int x) {
if (R <= lo || hi <= L) return;
if (L <= lo && hi <= R) {
mset = x;
madd = 0;
val = x * (hi - lo);
}
else {
push();
l->set(L, R, x);
r->set(L, R, x);
val = l->val + r->val;
}
}
void add(int L, int R, int x) {
if (R <= lo || hi <= L) return;
if (L <= lo && hi <= R) {
if (mset != inf) mset += x;
else madd += x;
val += x * (hi - lo);
}
else {
push();
l->add(L, R, x);
r->add(L, R, x);
val = l->val + r->val;
}
}
void push() {
if (!l) {
int mid = lo + (hi - lo) / 2;
l = new Node(lo, mid); r = new Node(mid, hi);
}
if (mset != inf) {
l->set(lo, hi, mset);
r->set(lo, hi, mset);
mset = inf;
}
else if (madd) {
l->add(lo, hi, madd);
r->add(lo, hi, madd);
madd = 0;
}
}
};
void solve(int testcase) {
int n; cin >> n;
vi a(n); cin >> a;
map<int, set<int>> pos;
for (int i = 0; i < n; i++) {
pos[a[i]].insert(i);
}
vi op(n);
Node st(a, 0, n);
Node st2(op, 0, n);
int m; cin >> m;
while (m--) {
int clr; cin >> clr;
if (pos.find(clr) == pos.end()) continue;
auto &dq = pos[clr];
if (dq.size() < 2) {
pos.erase(clr);
continue;
}
while (dq.size()) {
int l = *dq.begin();
if (st2.query(l, l + 1)) {
dq.erase(dq.begin());
continue;
}
break;
}
while (dq.size()) {
int r = *prev(dq.end());
if (st2.query(r, r + 1)) {
dq.erase(prev(dq.end()));
continue;
}
break;
}
if (dq.size() >= 2) {
int l = *dq.begin();
int r = *prev(dq.end());
st.set(l, r + 1, a[l]);
if (r > l + 1) st2.add(l + 1, r, 1);
} else {
pos.erase(clr);
}
}
for (int i = 0; i < n; i++) {
cout << st.query(i, i + 1) << " ";
}
pnl;
}
int32_t main() {
using namespace chrono;
ios::sync_with_stdio(false); cin.tie(nullptr); cout.tie(nullptr);
auto start = high_resolution_clock::now();
int T = 1; // cin >> T;
for (int t = 1; t <= T; t++) {
solve(t);
}
auto end = high_resolution_clock::now();
auto duration = duration_cast<milliseconds>(end - start);
#ifndef ONLINE_JUDGE
cerr << "Time: " << duration.count() << " ms\n";
#endif
return 0;
}
Can anybody explain why std::deque
has such high memory usage?
Auto comment: topic has been updated by ArushN27 (previous revision, new revision, compare).
deque isn't stored contiguously in memory
This doesn't affect memory consumption though, only cache utilization.
Because its retarded. Deque needs like 2 kilos on its own to prepare for a lot of the shenanigans it can maintain (i.e. random access in constant time). All its subderivates also have the same thing (i.e. queue and stack). So try your best not to use them if you have a lot of them instantiated in parallel.
As a fun fact, you can force stacks to use
std::vector
by initializing them asstack<int,vector<int>>
, but I don't know how this affects performance.Random access in constant time is not the reason for why deque is memory hungry...
Deque internally allocates data in constant sized blocks. Depending on which implementation of the standard library you use, the block size can vary a lot.
According to https://devblogs.microsoft.com/oldnewthing/20230810-00/?p=108587, libstdc++ (gcc) uses 512 bytes per block, libc++ (clang) uses 4096 bytes per block, and msvc uses 16 bytes. (I don't know why msvc went with just 16 bytes. It completely kills the performance of its deque.)
Unlike a vector, deque simply allocates a new block everytime it fills up. This makes it so any pointer/reference to an element in a deque is never invalidated on push_back (this is the main feature of deque!). This feature can be very handy if you are working with pointers. Note also that if you want to store a lot of elements, a deque can be significantly more memory efficient compared to a vector since a deque doesn't have to over allocate by as much.
I'm sure a quick Google search would get you to quite a few instances of people crying about deque's memory usage, and some SE answers trying to explain why, so I won't attempt to do that.
As a generally helpful replacement for deque,
std::list
exists. We tend to largely ignore Linked lists in CP, but the STL implementation is fairly good at basic stuff like popping or inserting at both sides. This fortunately covers almost all uses of deque in most problems, and with less funky behaviour in my experience. It doesn't support O(1) random access though, and it doesn't look like you need it either. Your current code ACs with <100MB using lists.Less useful: For this specific submission, you can also AC by submitting in C++17, or deleting all deques of size 1 from the map.
Also, your
Node
has memory leaks. You don't deletel
andr
. Doesn't fix the issue here though.It's from kactl, I didn't bother.
Implementing a deque yourself using a circular buffer probably wouldn't be that hard either.
Rough idea: maintain a buffer of length $$$2^k$$$ as well as a pointer to the first element and the length of the deque. Accessing the $$$i$$$-th element is then done as
buf[(i + first) % 1 << k]
;pop_back
islength--
,pop_front
isfirst++; first %= 1 << k;
. Topush_back
andpush_front
, you can normally write the data to the correct location, but if the length of the deque would exceed $$$2^k$$$, migrate to a buffer of length $$$2^{k + 1}$$$ before (like vectors do).Reminds me of ...
Defining $$$10^6$$$
std::deque
in 512MB ML would definitely lead to MLE.A famous sentence in Chinese programming community.
can you share some other famous sentences in Chinese programming community?
About the queue-optimized Bellman-Ford algorithm (known as "SPFA" in Chinese programming community):It's dead.
Please share more Chinese wisdom