AtCoder DP Editorial

Neo Wang, Dong Liu / April 22, 2021

– views

# Introduction

This post contains the editorials for all tasks contained in the AtCoder DP Contest, since there is no official editorial.

## A - Frog 1

Time Complexity: $\mathcal{O}(N)$

Use dynamic programming and define $\texttt{dp}[i]$ as the minimum cost to reach stone $i$. Then, there are only two transitions:

• Jump one stone:

$\texttt{dp}[i + 1] = \min(\texttt{dp}[i + 1], \texttt{dp}[i] + |\texttt{A}[i] - \texttt{A}[i + 1]|)$
• Jump two stones:

$\texttt{dp}[i + 2] = \min_{0 \le i + j < N}(\texttt{dp}[i + 2], \texttt{dp}[i] + |\texttt{A}[i] - \texttt{A}[i + 2]|)$
#include <bits/stdc++.h>

using namespace std;

const int mx = 1e5+1;

int A[mx], dp[mx];

int main() {
cin.tie(0)->sync_with_stdio(0);

int N; cin >> N;

for(int i = 0; i < N; ++i) {
cin >> A[i];
dp[i] = 1e9 + 7;
}

dp[0] = 0;

for(int i = 0; i < N; ++i) {
if(i + 1 < N) dp[i + 1] = min(dp[i + 1], dp[i] + abs(A[i] - A[i + 1]));
if(i + 2 < N) dp[i + 2] = min(dp[i + 2], dp[i] + abs(A[i] - A[i + 2]));
}

cout << dp[N - 1] << endl;
}


## B - Frog 2

Time Complexity: $\mathcal{O}(NK)$

This is the exact same problem as Frog 1, just with variable distances. Simply loop through each of the possible jumps, and use the previous transition where $\texttt{dp}[i]$ represents the best value for stone $i$.

$\texttt{dp}[i + j] = \min_{0 \le i + j < N}(\texttt{dp}[i + j], \texttt{dp}[i] + |\texttt{A}[i] - \texttt{A}[i + j]|)$
#include <bits/stdc++.h>

using namespace std;

const int mx = 1e5+1;

int A[mx], dp[mx];

int main() {
cin.tie(0)->sync_with_stdio(0);

int N, K; cin >> N >> K;

for(int i = 0; i < N; ++i) {
cin >> A[i];
dp[i] = 1e9 + 7;
}

dp[0] = 0;

for(int i = 0; i < N; ++i) {
for(int j = 1; j <= K; ++j) { // j is jump
if(i + j < N) dp[i + j] = min(dp[i + j], dp[i] + abs(A[i] - A[i + j]));
}
}

cout << dp[N - 1] << endl;
}


## C - Vacation

Time Complexity: $\mathcal{O}(N)$

Since Taro can't do the activities for two or more consecutive days, we can instead define $\texttt{dp}[i][j]$ as the best possible value on the $i$-th day that ends on activity $j$. Hence, the best we can do for any day $A$ is either the previous best ending on day $B$ added to the happiness attained from $C$, or the previous best ending on day $C$ added to the happiness attained from $B$. The same goes for days $B, C$. In this sense, our formulation is:

$\texttt{dp}[i][j] = \max_{k \neq j}(dp[i-1][k] + V[d] : d \neq j, d \neq k)$
#include <bits/stdc++.h>

using namespace std;

const int mx = 1e5+1;

bool ckmax(int& a, const int& b) {
return a < b ? a = b, 1 : 0;
}

int dp[mx][3];

int main() {
cin.tie(0)->sync_with_stdio(0);

int N; cin >> N;

for(int i = 1; i <= N; ++i) {
int a, b, c; cin >> a >> b >> c;
ckmax(dp[i][0], max(dp[i - 1][1] + b, dp[i - 1][2] + c));
ckmax(dp[i][1], max(dp[i - 1][0] + a, dp[i - 1][2] + c));
ckmax(dp[i][2], max(dp[i - 1][0] + a, dp[i - 1][1] + b));
}

cout << max(dp[N][0], max(dp[N][1], dp[N][2])) << endl;
}


## D - Knapsack 1

Time Complexity: $\mathcal{O}(NW)$

This is the classical knapsack problem. Notice that because $v_i \le 10^9$, it is not feasible to store $v_i$ in our $\texttt{dp}$ array. Instead, store the possible values of $W (W \le 10^5)$. Let $\texttt{dp}[i][j]$ represent the maximum value that can be attained by the first $i$ weights with a weight of $j$. Then, this turns into the classical knapsack problem.

#include <bits/stdc++.h>

using namespace std;

const int mx = 1e5+1;

template<class T> bool ckmax(T& a, const T& b) {
return a < b ? a = b, 1 : 0;
}

long long dp[101][mx];
int w[101], v[101];

int main() {
cin.tie(0)->sync_with_stdio(0);

int N, W; cin >> N >> W;
for(int i = 0; i < N; ++i) cin >> w[i] >> v[i];

for(int i = 0; i < N; ++i) for(int j = 0; j <= W; ++j) {
if(j + w[i] <= W) ckmax(dp[i + 1][j + w[i]], dp[i][j] + v[i]);
ckmax(dp[i + 1][j], dp[i][j]);
}

cout << dp[N][W] << endl;
}


## E - Knapsack 2

Time Complexity: $\mathcal{O}(N^2V)$

This is the exact same problem except instead of a high value of $v_i$, there is a high value of $w_i$. Now, we must minimize the value of $w_i$ for any given $v_i$, and then try to find out the maximum value of $v_i$ that can be reached.

Define $\texttt{dp}[i]$ as the lowest weight we can achieve for value $i$. The transition then, is:

$\texttt{dp}[j + \texttt{v}[i]] = \min (\texttt{dp}[j + \texttt{v}[i]],\texttt{dp}[j] + \texttt{w}[i])$
#include <bits/stdc++.h>

using namespace std;

const int mx = 1e5+1;

template<class T> bool ckmin(T& a, const T& b) {
return a > b ? a = b, 1 : 0;
}

long long dp[mx];
int w[101], v[101];

int main() {
cin.tie(0)->sync_with_stdio(0);

int N, W; cin >> N >> W;
for(int i = 0; i < N; ++i) cin >> w[i] >> v[i];
for(int i = 0; i < mx; ++i) dp[i] = 1e18;

dp[0] = 0;

for(int i = 0; i < N; ++i) {
for(int j = mx - 1; j >= 0; j--) {
if(dp[j] + w[i] <= W) ckmin(dp[j + v[i]], dp[j] + w[i]);
}
}

for(int i = mx - 1; i >= 0; i--) {
if(dp[i] != 1e18) {
cout << i << endl;
break;
}
}
}


## F - LCS

Time Complexity: $\mathcal{O}(N^2)$

First read this, then following the according $\texttt{dp}$ model, build the string accordingly.

#include <bits/stdc++.h>

using namespace std;

template<class T> bool ckmax(T& a, const T& b) {
return a < b ? a = b, 1 : 0;
}

int dp[3001][3001];

int main() {
cin.tie(0)->sync_with_stdio(0);
string s, t; cin >> s >> t;
int n = s.size(), m = t.size();

for(int i = 0 ; i <= n; ++i) {
for(int j = 0; j <= m; ++j)  {
if(!i || !j) dp[i][j] = 0;
else if(s[i - 1] == t[j - 1]) dp[i][j] = 1 + dp[i - 1][j - 1];
else dp[i][j] = max(dp[i - 1][j], dp[i][j - 1]);
}
}

string ret = "";

while(n && m) {
if(s[n - 1] == t[m - 1]) {
ret += s[n - 1];
n--;
m--;
}
else if(dp[n - 1][m] > dp[n][m - 1]) n--;
else m--;
}

reverse(ret.begin(), ret.end());

cout << ret << endl;
}


## G - Longest Path

Time Complexity: $\mathcal{O}(N + M)$

Simply perform a DFS on the graph, defining $\texttt{dp}[i]$ as the longest path that node $i$ can take. Notice how the optimal substructure is formed: the best path for any node $x$ is one added to the best path for any of its children.

#include <bits/stdc++.h>

using namespace std;

vector<int> dp(100001);
vector<vector<int>> adj(100001);

int dfs(int x) {
if (dp[x]) return dp[x];
for (auto e : adj[x]){
dp[e] = dfs(e);
dp[x] = max(dp[e] + 1, dp[x]);
}
return dp[x];
}

int main(){
int n,m;
cin >> n >> m;

for(int i = 0; i < m; ++i) {
int a, b;
cin >> a >> b;
a--; b--;
adj[a].push_back(b);
}

for (int i = 0; i < n; ++i) {
dfs(i);
}

int ans = 0;

for (int i = 0;i < n; ++i) {
ans = max(dp[i], ans);
}

cout << ans;
}


## H - Grid 1

Time Complexity: $\mathcal{O}(N^2)$

A full tutorial can be found here.

#include <bits/stdc++.h>

using namespace std;

bool ok[1000][1000];
long long dp[1000][1000];

int main() {
cin.tie(0)->sync_with_stdio(0);

int n; cin >> n;
for(int i = 0; i < n; ++i) {
string s;
cin >> s;
for(int j = 0; j < n; ++j) {
if(s[j] == '.') ok[i][j] = true;
else ok[i][j] = false;
}
}

dp[0][0] = 1;
for(int i = 0; i < n; ++i) {
for(int j = 0; j < n; ++j) {
if(!ok[i][j]) dp[i][j] = 0;
else {
if(i > 0) dp[i][j] += dp[i - 1][j];
if(j > 0) dp[i][j] += dp[i][j - 1];
dp[i][j] %= 1000000007;
}
}
}

cout << dp[n - 1][n - 1] << "\n";

return 0;
}


## I - Coins

Time Complexity: $\mathcal{O}(N^2)$

Define $\texttt{dp}[i][j]$ to be the probability after tossing the first $i$ coins, and receiving $j$ heads. Then, our probability of flipping $j$ heads from the first $i$ coins is the addition of the following:

• either we flipped a head with probability $p$
• then use $\texttt{dp}[i-1][j-1]$, the probability of receiving $j-1$ heads from the previous toss.
• we flipped a tail with probability $1-p$
• then use $\texttt{dp}[i-1][j]\cdot(1-\texttt{p}[i-1])$, the probability of receiving $j$ heads from the previous toss.
$\texttt{dp}[i][j] = \texttt{dp}[i-1][j-1]\cdot \texttt{p}[i-1] + \texttt{dp}[i-1][j]\cdot(1-\texttt{p}[i-1])$
#include<bits/stdc++.h>

using namespace std;

long double dp[3001][3001];

int main() {
cin.tie(0)->sync_with_stdio(0);

int n;
cin >> n;

vector<long double> p(n);

for(int i = 0; i < n; ++i) cin >> p[i];

int leastHeads = n / 2 + 1;

for(int i = 0; i <= n; ++i) {
dp[i][0] = 1;
}

for(int i = 1; i <= n; ++i) {
for(int j = 1; j <= leastHeads; ++j) {
dp[i][j] = dp[i - 1][j - 1] * p[i - 1] + dp[i - 1][j] * (1 - p[i - 1]);
}
}

cout << fixed << setprecision(10) << dp[n][leastHeads] << endl;
}


## J - Sushi

Time Complexity: $\mathcal{O}(N^3)$

Let $\texttt{dp}[x][y][z]$ represent the expected moves for $x$ number of plates 1-sushi remaining, $y$ number of plates 2-sushi remaining, $z$ number of plates 3-sushi remaining.

Then, we can use the relation

$\texttt{dp}[x][y][z] = n + x \cdot \texttt{dp}[x-1][y][z] + y \cdot \texttt{dp}[x+1][y-1][z] + z \cdot \texttt{dp}[x][y+1][z-1]$

Note that we add $1$ for the $y$ and $z$ equations because we take from one sushi platter which transitions into one more sushi in another grouping of either $x,y$. For example, by taking one sushi away from a group of size $2$ then there is a corresponding increase in a sushi group of size $1$.

#include <bits/stdc++.h>

using namespace std;

int n;

long double dp[301][301][301];
// let dp[i][j][k] be
// i dishes of 1 sushi
// j dishes of 2 sushi
// k dishes of 3 sushi

double memo(int x, int y, int z) {
if(x < 0 || y < 0 || z < 0) return 0;
if(x == 0 && y == 0 && z == 0) return 0;
// if(x + y + z < 1) return 0;
if(dp[x][y][z] > 0) return dp[x][y][z];

long double ret = n + x * memo(x - 1, y, z)  // # 1 sushi
+ y * memo(x + 1, y - 1, z)  // # 2 sushi
+ z * memo(x, y + 1, z - 1); // # 3 sushi

return dp[x][y][z] = ret / (x + y + z);
}

int main() {
cin.tie(0)->sync_with_stdio(0);

cin >> n;
vector<int> a(n); for(int i = 0; i < n; ++i) cin >> a[i];

vector<int> freq(3);
for(int x : a) freq[x - 1]++;

memset(dp, -1, sizeof dp);

cout << fixed << setprecision(10) << memo(freq[0], freq[1], freq[2]) << endl;
}


## K - Stones

Time Complexity: $\mathcal{O}(N^2)$

Define $\texttt{dp}[i]$ as if it's possible to win with $i$ stones remaining. Then keep two loops, one for values of $i (1 \le i \le K)$, and the other for each value in $A$. If $i \ge j$ and it was not possible to win a game with $i - a_j$ stones, then - because the turns alternate - a game with $i$ stones is winnable.

#include <bits/stdc++.h>

using namespace std;

int main() {
cin.tie(0)->sync_with_stdio(0);

int n, k;
cin >> n >> k;
vector<int> a(n);
for (int& x : a) cin >> x;
vector<bool> dp(k + 1);
for (int i = 1; i <= k; ++i)
for (int j : a)
if (i >= j && !dp[i - j])
dp[i] = 1;
cout << (dp[k] ? "First" : "Second") << '\n';
}


## L - Deque

Time Complexity: $\mathcal{O}(N^2)$

Define $\texttt{dp}[i][j]$ as the optimal score for Jiro $(X - Y)$ using the range $[i, j]$. Then, we can either choose $a_i$ to append to the left of the range, or $a_j$ on the right. Then, our two transitions are

$\texttt{dp}[i][j] = \max_{j > i} \left\{\begin{matrix} a_i - \texttt{dp}[i+1][j] \\ a_j - \texttt{dp}[i][j-1] \end{matrix}\right.$

Our initial states are $\texttt{dp}[i][i] = 0$, since any range of size $0$ will have difference $0$.

#include <bits/stdc++.h>

using namespace std;

long long dp[3001][3001];

int main() {
cin.tie(0)->sync_with_stdio(0);

int n;
cin >> n;

vector<int> a(n);
for (int& x : a) cin >> x;

for (int i = 0; i < n; ++i) dp[i][i] = a[i];

for (int i = n - 1; i >= 0; i--)
for (int j = i + 1; j < n; ++j)
dp[i][j] = max(a[i] - dp[i + 1][j], a[j] - dp[i][j - 1]);
cout << dp[0][n - 1] << '\n';
}


## M - Candies

Time Complexity: $\mathcal{O}(NK)$

Let $\texttt{dp}[i][j]$ represent the number of ways to distribute $j$ candies to the first $i$ children.

Then we have

$\texttt{dp}[i][j]=\sum{\texttt{dp}[i-1][j']}$

such that $0\leq j - j' \leq a_i$.

We can use prefix sums to speed this up from $\mathcal O(N\cdot K^2)$ to $\mathcal O(N\cdot K)$.

#include <bits/stdc++.h>

using namespace std;

const int MAXK = 100000;
const int MOD = 1000000007;

int add(int i, int j) {
if ((i += j) >= MOD)
i -= MOD;
return i;
}

int sub(int i, int j) {
if ((i -= j) < 0)
i += MOD;
return i;
}

int main() {
cin.tie(0)->sync_with_stdio(0);

int n, k, a;
cin >> n >> k;

static int dp[MAXK + 1], dq[MAXK + 1];
dp[0] = 1;
for (int i = 1; i <= n; ++i) {
cin >> a;
for (int j = 1; j <= k; ++j)
dp[j] = add(dp[j - 1], dp[j]);
for (int j = 0; j <= k; ++j)
dq[j] = sub(dp[j], (j > a ? dp[j - a - 1] : 0));
swap(dp, dq);
}
cout << dp[k] << '\n';
}


## N - Slimes

Let $\texttt{dp}[i][j]$ represent the minimum cost to merge the slimes $i\dots j$ into one slime.

To calculate $\texttt{dp}[i][j]$, we want to merge two smaller (combined) slimes in the range $[i, j]$. To do this, we loop through all $k$ in the range $[i, j - 1]$ and simulate merging $[i, k]$ and $[k + 1, j]$. Note that the cost of this is $\texttt{dp}[i][k] + \texttt{dp}[k + 1][j] + a[i] + a[i + 1] + \dots + a[j]$. Here, $a[i] + a[i + 1] + \dots + a[j]$ can be precalculated with prefix sums.

This takes $\mathcal O(N^3)$ which fits in the Time Limit but can be optimized to $\mathcal O(N^2)$ with Knuth's Optimization.

The code below is $\mathcal O(N^3)$.

#include <bits/stdc++.h>
using namespace std;

const int MAXN = 400;
const long long INF = 0x3f3f3f3f3f3f3f3f;

int main() {
cin.tie(0)->sync_with_stdio(0);

int n;
cin >> n;
vector<long long> a(n), p(n);
for (long long& x : a) cin >> x;
for (int i = 0; i < n; ++i)
p[i] = (i == 0 ? 0 : p[i - 1]) + a[i];
auto query = [&](int l, int r) {
return p[r] - (l == 0 ? 0 : p[l - 1]); };

static long long dp[MAXN][MAXN];
memset(dp, 0x3f, sizeof(dp));
for (int i = 0; i + 1 < n; ++i) dp[i][i + 1] = a[i] + a[i + 1];
for (int i = 0; i < n; ++i) dp[i][i] = 0;
for (int i = n - 1; i >= 0; --i)
for (int j = i + 2; j < n; ++j) {
long long best = INF;
for (int k = i; k < j; ++k)
best = min(best, dp[i][k] + dp[k + 1][j]);
dp[i][j] = query(i, j) + best;
}
cout << dp[0][n - 1] << '\n';
}


## O - Matching

Time Complexity: $\mathcal{O}(N\cdot 2^N)$

If we define $\texttt{dp}[S]$ to be the number of matchings for females in the set $S$ to the first $|S|$ males, this problem boils down to the following:

$\texttt{dp}[S] = \sum \texttt{dp}[S\backslash x]: \texttt{compatible}[|S|][x]$

(The : stands for "such that".) In English, this is equivalent to the following:

The number of matchings in a subset $S$ to include a certain female $x$ is equivalent to the sum of all the matchings without female $x$ where female $x$ is compatible with the $|S|$-th male.

Our base case is the empty set, which has a value of $1$ (the empty set can be considered as a single matching involving zero pairs).

#include <bits/stdc++.h>

using namespace std;

const int MOD = 1e9 + 7;
const int MAX_N = 21;

bool compat[MAX_N][MAX_N];
int dp[1 << MAX_N];

int main() {
int N;
cin >> N;
for (int i = 0; i < N; ++i) {
for (int j = 0; j < N; ++j) {
cin >> compat[i][j];
}
}

dp[0] = 1;

for (int s = 0; s < (1 << N); s++) {
int pair_num = __builtin_popcount(s);
for (int w = 0; w < N; w++) {
/*
* check that
* 1. this woman hasn't been paired already
* 2. she's also compatible with the {pair_num + 1}th man
*/
if ((s & (1 << w)) || !compat[pair_num][w])
continue;

// add the amount to future dp states
dp[s | (1 << w)] += dp[s];
dp[s | (1 << w)] %= MOD;
}
}

cout << dp[(1 << N) - 1] << endl;
}


## P - Independent Set

Time Complexity: $\mathcal{O}(N)$

First, lets root the tree arbitrary. Let $\texttt{dp}[i][j]$ represent the number of ways to color subtree $i$, coloring node $i$ color $j$.

Here, $j=0$ is black and $j=1$ is white. If we color node $i$ black, then it's children must be all white. So, we have

$\texttt{dp}[i][0]=\prod \texttt{dp}[c][1]$

such that $c$ is $i$'s child, and if we color node $i$ white, then it's children can be either white or black:

$\texttt{dp}[i][1]=\prod({\texttt{dp}[c][0] + \texttt{dp}[c][1]})$
#include <bits/stdc++.h>

using namespace std;

const int MAXN = 1e5;
const int MOD = 1e9 + 7;

int add(int i, int j) {
if ((i += j) >= MOD)
i -= MOD;
return i;
}
int mul(int i, int j) {
return int((long long) i * j % MOD);
}

vector<int> adj[MAXN];
int dp[MAXN][2];

void dfs(int i, int p) {
dp[i][0] = dp[i][1] = 1;
for (int j : adj[i]) if (j != p) {
dfs(j, i);
dp[i][0] = mul(dp[i][0], dp[j][1]);
dp[i][1] = mul(dp[i][1], add(dp[j][0], dp[j][1]));
}
}

int main() {
cin.tie(0)->sync_with_stdio(0);

int n;
cin >> n;
for (int i = 0, a, b; i + 1 < n; ++i) {
cin >> a >> b, --a, --b;
adj[a].push_back(b), adj[b].push_back(a);
}
dfs(0, 0);
cout << add(dp[0][0], dp[0][1]) << '\n';
}


## Q - Flowers

Time Complexity: $\mathcal{O}(N\log N)$

Let $\texttt{dp}[i]$ represent the maximum beauty of a subsequence of flowers ending at $i$.

$\texttt{dp}[i]=\max(\texttt{dp}[j]) + a_i$

such that $j < i$ and $h_j < h_i$.

However, looping through all values of $j$ would result in a complexity of $\mathcal O(N^2)$, which would TLE.

We can use a data-structure to speed this up. Notice that when calculating $\texttt{dp}[i]$, we're querying the maximum $\texttt{dp}[j]$ such that $h_j < h_i$ and adding $a_i$.

In other words, we're querying the maximum $\texttt{dp}[j]$ which $h_j\in[1, h_i - 1]$, which can be handled with a data-structure like a segment tree or a binary indexed tree.

#include <bits/stdc++.h>

using namespace std;

const int MAXN = 2e5;

int n;
long long bit[MAXN + 1];

void update(int i, long long x) {
for ( ; i <= n; i += i & -i)
bit[i] = max(bit[i], x);
}

long long query(int i) {
long long ans = 0;
for ( ; i > 0; i -= i & -i)
ans = max(ans, bit[i]);
return ans;
}

int main() {
cin.tie(0)->sync_with_stdio(0);

cin >> n;
vector<int> h(n), a(n);
for (int i = 0; i < n; ++i) cin >> h[i];
for (int i = 0; i < n; ++i) cin >> a[i];
for (int i = 0; i < n; ++i) {
update(h[i], query(h[i] - 1) + a[i]);
}
cout << query(n) << '\n';
}


## R - Walk

We're given a adjacency matrix $a$ and we want to calculate the number of paths of length $k$. First, lets convert $a$ into a Matrix, $m$.

Notice that when we multiply $m$ by $m$, we get the number of paths with length $2$. If we take $m^p$, we end up with the number of paths of length $p$ from each $i$ to $j$.

Thus, we're looking for $m^k$, which can be calculated with binary exponentiation.

Time Complexity: $\mathcal{O}(N^3\log K)$

#include <bits/stdc++.h>

using namespace std;

const int MOD = 1e9 + 7;

int add(int i, int j) {
if ((i += j) >= MOD)
i -= MOD;
return i;
}
int mul(int i, int j) {
return int((long long) i * j % MOD);
}

template<class T> struct Matrix {
T **mat; int a, b;

Matrix() : a(0), b(0) {}
Matrix(int a_, int b_) : a(a_), b(b_) {
int i, j;

mat = new T*[a];
for (i = 0; i < a; ++i) {
mat[i] = new T[b];
for (j = 0; j < b; ++j)
mat[i][j] = 0;
}
}
Matrix(const vector<vector<T>>& vt) {
int i, j;

*this = Matrix((int) vt.size(), (int) vt[0].size());
for (i = 0; i < a; ++i)
for (j = 0; j < b; ++j)
mat[i][j] = vt[i][j];
}
Matrix operator*(const Matrix& m) {
int i, j, k;

assert(b == m.a);
Matrix r(a, m.b);
for (i = 0; i < a; ++i)
for (j = 0; j < m.b; ++j)
for (k = 0; k < b; ++k)
r.mat[i][j] = add(r.mat[i][j],
mul(mat[i][k], m.mat[k][j]));
return r;
}
Matrix& operator*=(const Matrix& m) {
return *this = (*this) * m;
}
friend Matrix power(Matrix m, long long p) {
int i;

assert(m.a == m.b);
Matrix r(m.a, m.b);
for (i = 0; i < m.a; ++i)
r.mat[i][i] = 1;
for ( ; p > 0; p >>= 1, m *= m)
if (p & 1)
r *= m;
return r;
}
};

int main() {
cin.tie(0)->sync_with_stdio(0);

int n; long long k;
cin >> n >> k;
vector<vector<int>> adj(n, vector<int>(n));
for (int i = 0; i < n; ++i)
for (int j = 0; j < n; ++j)
cin >> adj[i][j];
Matrix<int> mat(adj);
mat = power(mat, k);
int ans = 0;
for (int i = 0; i < n; ++i)
for (int j = 0; j < n; ++j)
ans = add(ans, mat.mat[i][j]);
cout << ans << '\n';
}


## S - Digit Sum

Time Complexity: $\mathcal{O}(|K|D)$

We can use digit dp to solve this problem. Let $\texttt{dp}[i][j]$ represent the number of ways to create a length $|K|$ number which is a multiple of $D$ given that the $i$ digits is fixed and has a sum equivalent to $j \pmod D$.

To find the answer, we will loop through all prefixes of $K$ and calculate the number of ways to create the remaining suffix, which can be mapped to a dp state.

#include <bits/stdc++.h>

using namespace std;

const int MAXL = 10000;
const int MAXD = 100;
const int MOD = 1e9 + 7;

int dp[MAXL + 1][MAXD];

int add(int i, int j) {
if ((i += j) >= MOD)
i -= MOD;
return i;
}

void init(int l, int d) {
dp[l][0] = 1;
for (int i = l - 1; i >= 0; --i)
for (int j = 0; j < d; ++j)
for (int k = 0; k < 10; ++k)
dp[i][j] = add(dp[i][j], dp[i + 1][(j + k) % d]);
}

int query(const string& s, int d) {
int ans = 0, carry = 0;
for (int i = 0; i < (int) s.length(); ++i) {
for (int j = 0; j < (s[i] - '0'); ++j)
ans = add(ans, dp[i + 1][(carry + j) % d]);
carry = (carry + (s[i] - '0')) % d;
}
if (carry == 0) ans++;
if (--ans < 0) ans += MOD;
return ans;
}

int main() {
cin.tie(0)->sync_with_stdio(0);

string s; int d;
cin >> s >> d;
init((int) s.size(), d);
cout << query(s, d) << '\n';
}


## T - Permutation

Time Complexity: $\mathcal{O}(N^2)$

Let $\texttt{dp}[i][j]$ represent the number of ways to create a permutation with length $i+1$ the first $i$ signs, using $j$ as the last element of the permutation.

If $s[i]$ is $>$, then, $\texttt{dp}[i][j]=\sum{\texttt{dp}[i-1][k]}$ for all $k\geq j$.

Otherwise, $\texttt{dp}[i][j]=\sum{\texttt{dp}[i-1][k]}$ for all $k < j$. This can be sped up to $\mathcal O(N^2)$ with prefix sums.

#include <bits/stdc++.h>

using namespace std;

const int MAXN = 3000;

const int MOD = 1e9 + 7;

int add(int i, int j) {
if ((i += j) >= MOD)
i -= MOD;
return i;
}
int sub(int i, int j) {
if ((i -= j) < 0)
i += MOD;
return i;
}

int main() {
cin.tie(0)->sync_with_stdio(0);

int n;
string s;

cin >> n >> s;
static int dp[MAXN + 1], dq[MAXN + 1];
dp[1] = 1;
for (int i = 1; i < n; ++i) {
for (int j = 1; j <= i + 1; ++j)
dp[j] = add(dp[j], dp[j - 1]);
for (int j = 1; j <= i + 1; ++j)
if (s[i - 1] == '>')
dq[j] = sub(dp[i], dp[j - 1]);
else
dq[j] = dp[j - 1];
swap(dp, dq);
}
int ans = 0;
for (int i = 1; i <= n; ++i)
ans = add(ans, dp[i]);
cout << ans << '\n';
}


## U - Grouping

Time Complexity: $\mathcal{O}(3^N + 2^N\cdot N^2)$

Let $\texttt{cost}[S]$ represent the score of assigning all Rabbits in set $S$ into the same group. This can be calculated in $\mathcal O(2^N \cdot N^2)$.

Now, we calculate $\texttt{dp}[S]$, the maximum score of assigning the rabbits in set $S$. To calculate $\texttt{dp}[S]$, we have to loop through all subsets of $S$ and simulate putting all the rabbits in the subset in the same group. Fortunately, you can do this in $\mathcal O(3^N)$ (see https://cp-algorithms.com/algebra/all-submasks.html#toc-tgt-1 ), which runs in time for $N\leq 16$.

#include <bits/stdc++.h>
using namespace std;

const int MAXN = 16;

int main() {
cin.tie(0)->sync_with_stdio(0);

int n;
static int a[MAXN][MAXN];
static long long cost[1 << MAXN];
static long long dp[1 << MAXN];
cin >> n;
for (int i = 0; i < n; ++i)
for (int j = 0; j < n; ++j)
cin >> a[i][j];
for (int i = 0; i < (1 << n); ++i)
for (int j = 0; j < n; ++j)
for (int k = j + 1; k < n; ++k)
if (i & (1 << j) && i & (1 << k))
cost[i] += a[j][k];
for (int i = 0, j; i < (1 << n); ++i) {
j = ((1 << n) - 1) ^ i;
for (int k = j; k; k = (k - 1) & j)
dp[i ^ k] = max(dp[i ^ k], dp[i] + cost[k]);
}
cout << dp[(1 << n) - 1] << '\n';
}


## V - Subtrees

This explains it pretty well.

## W - Intervals

Let $\texttt{dp}[i]$ represent the maximum score of a string with the $i$th bit turned on.

For our relation, we have

$\texttt{dp}[i] = \max(\texttt{dp}[j] + \texttt{cost}(i, j))$

such that $j < i$ and $\texttt{cost}(i, j)$ contains the sum of all $a$ such that $l\leq i\leq r$ but not $l\leq j\leq r$.

This works, in $\mathcal O(N^2)$, which unfortunately, TLEs.

However, we can use a segment tree to speed this up. To calculate $\texttt{dp}[i]$, we can query the maximum of $[0, i - 1]$ in the segment tree. Now, we just need to support activating/deactivating each query.

• When we reach a $l$, we add $a$ to each dp value in the range $[0, a - 1]$
• When we reach a $r$, we subtract $a$ from each dp value in the range $[0, a - 1]$

Now, we can just query the maximum of $\texttt{dp}[0\dots i-1]$ and update it into our segment tree!

#include <bits/stdc++.h>
using namespace std;

const long long INF = 0x3f3f3f3f3f3f3f3f;

template<class T> struct LazySeg {
int sz; vector<T> st, lz;

T comb(T a, T b) {
return max(a, b);
}
void init(int n) {
sz = 1; while (sz < n) sz <<= 1;
st.assign(2 * sz, 0), lz.assign(2 * sz, 0);
}
void pull(int i) {
st[i] = comb(st[i << 1], st[i << 1 | 1]);
}
void push(int i, int l, int r) {
st[i] += lz[i];
if (l != r) lz[i << 1] += lz[i], lz[i << 1 | 1] += lz[i];
lz[i] = 0;
}
void update(int ql, int qr, T x, int i, int l, int r) {
push(i, l, r); if (ql > r || qr < l) return;
if (ql <= l && qr >= r) { lz[i] += x; return void(push(i, l, r)); }
int m = (l + r) >> 1; update(ql, qr, x, i << 1, l, m);
update(ql, qr, x, i << 1 | 1, m + 1, r); pull(i);
}
void update(int ql, int qr, T x) {
update(ql, qr, x, 1, 0, sz - 1);
}
T query(int ql, int qr, int i, int l, int r) {
push(i, l, r); if (ql > r || qr < l) return -INF;
if (ql <= l && qr >= r) return st[i]; int m = (l + r) >> 1;
return comb(query(ql, qr, i << 1, l, m), query(ql, qr, i << 1 | 1, m + 1, r));
}
T query(int ql, int qr) {
return query(ql, qr, 1, 0, sz - 1);
}
};

LazySeg<long long> st;

int main() {
cin.tie(0)->sync_with_stdio(0);

int n, m;
cin >> n >> m;
vector<long long> add(n + 2);
vector<vector<array<int, 2>>> todo(n + 2);
while (m--) {
int l, r, a;
cin >> l >> r >> a;
add[l] += a;
todo[r + 1].push_back({a, l});
}
st.init(n + 1);
for (int i = 1; i <= n + 1; ++i) {
st.update(0, i - 1, add[i]);
for (array<int, 2> ar : todo[i])
st.update(0, ar[1] - 1, -ar[0]);
st.update(i, i, st.query(0, i - 1));
}
cout << st.query(0, n) << '\n';
}


## X - Towers

Notice that the optimal sorting of elements is by the sum of their weight and strength.

Consider two orderings for blocks $a$ and $b$.

• If the tower $a$ is on the bottom, then the strength of the tower is
$s_a - w_b$
• If the tower $b$ is on the bottom, then the strength of the tower is
$s_b - w_a$

This produces the equation

$s_a - w_b \overset{?}{=} s_b - w_a$
$s_a + w_a \overset{?}{=} s_b + w_b$

Obviously, we want to maximize the strength of our tower, so put the one with less $s_i + w_i$ on the top.

Then, define $\texttt{dp}[i]$ as the maximum value we can achieve for a weight of $i$. We process the tower top-down, only calculating weights $j : j \le s_i$ since any more would break the tower. Our relation is then:

$\texttt{dp}[j + \texttt{w}[x]] = \max(j + \texttt{dp}[\texttt{w}[x]], \texttt{dp}[j] + \texttt{v}[x])$
#include <bits/stdc++.h>

using namespace std;

const int MAX_N = 1e3+1;

long long dp[20001];
int w[MAX_N], s[MAX_N], v[MAX_N], p[MAX_N];

int main() {
cin.tie(0)->sync_with_stdio(0);

int N; cin >> N;
for(int i = 0; i < N; ++i) cin >> w[i] >> s[i] >> v[i];

iota(p, p + N, 0);

sort(p, p + N, [&](const int &a, const int &b) {
return w[a] + s[a] < w[b] + s[b];
});

for(int i = 0; i < N; ++i) {
int x = p[i];
for(int j = s[x]; j >= 0; --j) {
dp[j + w[x]] = max(dp[j + w[x]], dp[j] + v[x]);
}
}

cout << *max_element(dp, dp + 20001) << endl;
}


## Y - Grids 2

Time Complexity: $\mathcal O(H+W+N^2)$

First, note that the number of ways to walk from cell $(i, j)$ to $(x, y)$ is ${x-i+y-j}\choose{x-i}$, which can be calculated in $\mathcal O(1)$ if you precompute factorials.

Let $\texttt{dp}[i]$ represent the number of ways to walk from $(1, 1)$ to the ith square without passing any other squares other than the ith one.

To calculate this, note that if there are no squares in between $(1, 1)$ and $(r_i, c_i)$, then there are a total of ${r_i-1+c_i-1}\choose{r_i-1}$ ways to do this.

We can subtract this value from the number of ways to pass through a square in the path from $(1, 1)$ to $(r_i, c_i$). This can be done by looping through all squares $j$ such that $r_j\leq r_i$ and $c_j\leq c_i$.

To calculate the answer, we can simply add an extra square at $(H, W)$ and take the resulting $\texttt{dp}$ value.

#include <bits/stdc++.h>
using namespace std;

#define N   100000
#define M   3000
#define MOD 1000000007

int n, m, k, dp[M + 1];
long long fac[N << 1], ifac[N << 1];
struct P {
int x, y;
bool operator<(const P& other) const {
if (x == other.x) return y < other.y;
return x < other.x;
}
} point[M + 1];

int power(long long x, int p) {
long long res = 1;
while (p > 0) {
if (p & 1) res = res * x % MOD;
x = x * x % MOD, p >>= 1;
}
return res;
}

void genFac(int size) {
fac[0] = 1;
for (int i = 1; i <= size; ++i)
fac[i] = fac[i - 1] * i % MOD;
ifac[size] = power(fac[size], MOD - 2);
for (int i = size; i >= 1; --i)
ifac[i - 1] = ifac[i] * i % MOD;
}
int choose(int n, int k) {
assert(n >= k);
return fac[n] * ifac[k] % MOD * ifac[n - k] % MOD;
}
int path(int i, int j, int x, int y) {
assert(i <= x && j <= y);
return choose(x - i + y - j, x - i);
}

int main() {
scanf("%d%d%d", &n, &m, &k);
genFac(n + m - 1);
for (int i = 0; i < k; ++i)
scanf("%d%d", &point[i].x, &point[i].y);
point[k] = {n, m};
sort(point, point + k + 1);
for (int i = 0; i <= k; ++i) {
long long sum = path(1, 1, point[i].x, point[i].y);
for (int j = 0; j < i; ++j)
if (point[i].y - point[j].y >= 0) {
sum = sum - (long long) dp[j] * path(point[j].x, point[j].y, point[i].x, point[i].y) % MOD;
if (sum < 0) sum += MOD;
}
dp[i] = sum;
}
printf("%d\n", dp[k]);
}


## Z - Frog 3

Time Complexity: $\mathcal O(N)$

Lets think of a $\mathcal O(N^2)$ dp first. Let $\texttt{dp}[i]$ represent the minimum cost to travel from stone $1$ to stone $i$. We have the following recurrence:

$\texttt{dp}[i]=\max(\texttt{dp}[j] + (h_i-h_j)^2 + C)$

Lets expand the equation take out the constants and we have

$\texttt{dp}[i]=C+h_i^2 + \max(\texttt{dp}[j] + -2h_ih_j+h_j^2)$

$\texttt{dp}[j] + -2h_ih_j+h_j^2$ can be rerwritten as $(-2h_j)x+(\texttt{dp}[j] + h_j^2)$ where $x=h_i$. This forms a line ($mx+b$). Now, we just want to query the minimum value with $x=h_i$. This can be achieved with Convex Hull Trick.

#include <bits/stdc++.h>
using namespace std;

using ll = long long;
struct Line {
mutable ll k, m, p;
bool operator<(const Line& o) const { return k < o.k; }
bool operator<(ll x) const { return p < x; }
long long eval(long long x) const { return k * x + m; }
};

struct CHT {
deque<Line> hull;
static const ll inf = LLONG_MAX;

ll div(ll a, ll b) { // floored division
return a / b - ((a ^ b) < 0 && a % b); }
bool isect(Line &x, const Line &y) {
if (x.k == y.k) x.p = x.m > y.m ? inf : -inf;
else x.p = div(y.m - x.m, x.k - y.k);
return x.p >= y.p;
}
void add(long long k, long long m) {
Line L = {k, m, 0};
while ((int) hull.size() >= 2 && (isect(L, hull.back()),
isect(hull.back(), hull[(int) hull.size() - 2]), L.p < hull.back().p))
hull.pop_back();
hull.push_back(L);
}
long long query(long long x) {
while ((int) hull.size() >= 2 && hull[0].eval(x) >= hull[1].eval(x))
hull.pop_front();
return hull[0].eval(x);
}
};

int main() {
cin.tie(0)->sync_with_stdio(0);

int n; long long c;
cin >> n >> c;
vector<long long> h(n), dp(n);
for (long long&x : h) cin >> x;

CHT ch;
auto ins = [&](int i) {
ch.add(-h[i] * 2, (dp[i] + (h[i] * h[i])));
};
dp[0] = 0; ins(0);
for (int i = 1; i < n; ++i) {
long long x = ch.query(h[i]);
dp[i] = c + (h[i] * h[i]) + x;
ins(i);
}
cout << dp[n - 1] << '\n';
}

🌓