Introduction

This post contains the editorials for all tasks contained in the AtCoder DP Contest, since there is no official editorial.

A - Frog 1

Time Complexity: $\mathcal{O}(N)$

Use dynamic programming and define $\texttt{dp}[i]$ as the minimum cost to reach stone $i$ . Then, there are only two transitions:

Jump one stone:
$\texttt{dp}[i + 1] = \min(\texttt{dp}[i + 1], \texttt{dp}[i] + |\texttt{A}[i] - \texttt{A}[i + 1]|)$
Jump two stones:
$\texttt{dp}[i + 2] = \min_{0 \le i + j < N}(\texttt{dp}[i + 2], \texttt{dp}[i] + |\texttt{A}[i] - \texttt{A}[i + 2]|)$

#include <bits/stdc++.h>

using namespace std;

const int mx = 1e5+1;

int A[mx], dp[mx];

int main() {
	cin.tie(0)->sync_with_stdio(0);

	int N; cin >> N;

	for(int i = 0; i < N; ++i) {
		cin >> A[i];
		dp[i] = 1e9 + 7;
	}

	dp[0] = 0;

	for(int i = 0; i < N; ++i) {
		if(i + 1 < N) dp[i + 1] = min(dp[i + 1], dp[i] + abs(A[i] - A[i + 1]));
		if(i + 2 < N) dp[i + 2] = min(dp[i + 2], dp[i] + abs(A[i] - A[i + 2]));
	}

	cout << dp[N - 1] << endl;
}

This is the exact same problem as Frog 1, just with variable distances. Simply loop through each of the possible jumps, and use the previous transition where $\texttt{dp}[i]$ represents the best value for stone $i$ .

\texttt{dp}[i + j] = \min_{0 \le i + j < N}(\texttt{dp}[i + j], \texttt{dp}[i] + |\texttt{A}[i] - \texttt{A}[i + j]|)

#include <bits/stdc++.h>

using namespace std;

const int mx = 1e5+1;

int A[mx], dp[mx];

int main() {
	cin.tie(0)->sync_with_stdio(0);

	int N, K; cin >> N >> K;

	for(int i = 0; i < N; ++i) {
		cin >> A[i];
		dp[i] = 1e9 + 7;
	}

	dp[0] = 0;

	for(int i = 0; i < N; ++i) {
		for(int j = 1; j <= K; ++j) { // j is jump
			if(i + j < N) dp[i + j] = min(dp[i + j], dp[i] + abs(A[i] - A[i + j]));
		}
	}

	cout << dp[N - 1] << endl;
}

C - Vacation

Time Complexity: $\mathcal{O}(N)$

Since Taro can't do the activities for two or more consecutive days, we can instead define $\texttt{dp}[i][j]$ as the best possible value on the $i$ -th day that ends on activity $j$ . Hence, the best we can do for any day $A$ is either the previous best ending on day $B$ added to the happiness attained from $C$ , or the previous best ending on day $C$ added to the happiness attained from $B$ . The same goes for days $B, C$ . In this sense, our formulation is:

\texttt{dp}[i][j] = \max_{k \neq j}(dp[i-1][k] + V[d] : d \neq j, d \neq k)

#include <bits/stdc++.h>

using namespace std;

const int mx = 1e5+1;

bool ckmax(int& a, const int& b) {
	return a < b ? a = b, 1 : 0;
}

int dp[mx][3];

int main() {
	cin.tie(0)->sync_with_stdio(0);

	int N; cin >> N;

	for(int i = 1; i <= N; ++i) {
		int a, b, c; cin >> a >> b >> c;
		ckmax(dp[i][0], max(dp[i - 1][1] + b, dp[i - 1][2] + c));
		ckmax(dp[i][1], max(dp[i - 1][0] + a, dp[i - 1][2] + c));
		ckmax(dp[i][2], max(dp[i - 1][0] + a, dp[i - 1][1] + b));
	}

	cout << max(dp[N][0], max(dp[N][1], dp[N][2])) << endl;
}

D - Knapsack 1

Time Complexity: $\mathcal{O}(NW)$

This is the classical knapsack problem. Notice that because $v_i \le 10^9$ , it is not feasible to store $v_i$ in our $\texttt{dp}$ array. Instead, store the possible values of $W (W \le 10^5)$ . Let $\texttt{dp}[i][j]$ represent the maximum value that can be attained by the first $i$ weights with a weight of $j$ . Then, this turns into the classical knapsack problem.

#include <bits/stdc++.h>

using namespace std;

const int mx = 1e5+1;

template<class T> bool ckmax(T& a, const T& b) {
	return a < b ? a = b, 1 : 0;
}

long long dp[101][mx];
int w[101], v[101];

int main() {
	cin.tie(0)->sync_with_stdio(0);

	int N, W; cin >> N >> W;
	for(int i = 0; i < N; ++i) cin >> w[i] >> v[i];

	for(int i = 0; i < N; ++i) for(int j = 0; j <= W; ++j) {
		if(j + w[i] <= W) ckmax(dp[i + 1][j + w[i]], dp[i][j] + v[i]);
		ckmax(dp[i + 1][j], dp[i][j]);
	}

	cout << dp[N][W] << endl;
}

E - Knapsack 2

Time Complexity: $\mathcal{O}(N^2V)$

This is the exact same problem except instead of a high value of $v_i$ , there is a high value of $w_i$ . Now, we must minimize the value of $w_i$ for any given $v_i$ , and then try to find out the maximum value of $v_i$ that can be reached.

Define $\texttt{dp}[i]$ as the lowest weight we can achieve for value $i$ . The transition then, is:

\texttt{dp}[j + \texttt{v}[i]] = \min (\texttt{dp}[j + \texttt{v}[i]],\texttt{dp}[j] + \texttt{w}[i])

#include <bits/stdc++.h>

using namespace std;

const int mx = 1e5+1;

template<class T> bool ckmin(T& a, const T& b) {
	return a > b ? a = b, 1 : 0;
}

long long dp[mx];
int w[101], v[101];

int main() {
	cin.tie(0)->sync_with_stdio(0);

	int N, W; cin >> N >> W;
	for(int i = 0; i < N; ++i) cin >> w[i] >> v[i];
	for(int i = 0; i < mx; ++i) dp[i] = 1e18;

	dp[0] = 0;

	for(int i = 0; i < N; ++i) {
		for(int j = mx - 1; j >= 0; j--) {
			if(dp[j] + w[i] <= W) ckmin(dp[j + v[i]], dp[j] + w[i]);
		}
	}

	for(int i = mx - 1; i >= 0; i--) {
		if(dp[i] != 1e18) {
			cout << i << endl;
			break;
		}
	}
}

F - LCS

Time Complexity: $\mathcal{O}(N^2)$

First read this, then following the according $\texttt{dp}$ model, build the string accordingly.

#include <bits/stdc++.h>

using namespace std;

template<class T> bool ckmax(T& a, const T& b) {
	return a < b ? a = b, 1 : 0;
}

int dp[3001][3001];

int main() {
	cin.tie(0)->sync_with_stdio(0);
	string s, t; cin >> s >> t;
	int n = s.size(), m = t.size();

	for(int i = 0 ; i <= n; ++i) {
		for(int j = 0; j <= m; ++j)  {
			if(!i || !j) dp[i][j] = 0;
			else if(s[i - 1] == t[j - 1]) dp[i][j] = 1 + dp[i - 1][j - 1];
			else dp[i][j] = max(dp[i - 1][j], dp[i][j - 1]);
		}
	}

	string ret = "";

	while(n && m) {
		if(s[n - 1] == t[m - 1]) {
			ret += s[n - 1];
			n--;
			m--;
		}
		else if(dp[n - 1][m] > dp[n][m - 1]) n--;
		else m--;
	 }

	 reverse(ret.begin(), ret.end());

	 cout << ret << endl;
}

G - Longest Path

Time Complexity: $\mathcal{O}(N + M)$

Simply perform a DFS on the graph, defining $\texttt{dp}[i]$ as the longest path that node $i$ can take. Notice how the optimal substructure is formed: the best path for any node $x$ is one added to the best path for any of its children.

#include <bits/stdc++.h>

using namespace std;

vector<int> dp(100001);
vector<vector<int>> adj(100001);

int dfs(int x) {
	if (dp[x]) return dp[x];
	for (auto e : adj[x]){
			dp[e] = dfs(e);
			dp[x] = max(dp[e] + 1, dp[x]);
	}
	return dp[x];
}

int main(){
	int n,m;
	cin >> n >> m;

	for(int i = 0; i < m; ++i) {
		int a, b;
		cin >> a >> b;
		a--; b--;
		adj[a].push_back(b);
	}

	for (int i = 0; i < n; ++i) {
		dfs(i);
	}

	int ans = 0;

	for (int i = 0;i < n; ++i) {
		ans = max(dp[i], ans);
	}

	cout << ans;
}

H - Grid 1

Time Complexity: $\mathcal{O}(N^2)$

A full tutorial can be found here.

#include <bits/stdc++.h>

using namespace std;

bool ok[1000][1000];
long long dp[1000][1000];

int main() {
	cin.tie(0)->sync_with_stdio(0);

	int n; cin >> n;
	for(int i = 0; i < n; ++i) {
		string s;
		cin >> s;
		for(int j = 0; j < n; ++j) {
			if(s[j] == '.') ok[i][j] = true;
			else ok[i][j] = false;
		}
	}

	dp[0][0] = 1;
	for(int i = 0; i < n; ++i) {
		for(int j = 0; j < n; ++j) {
			if(!ok[i][j]) dp[i][j] = 0;
			else {
				if(i > 0) dp[i][j] += dp[i - 1][j];
				if(j > 0) dp[i][j] += dp[i][j - 1];
				dp[i][j] %= 1000000007;
			}
		}
	}

	cout << dp[n - 1][n - 1] << "\n";

	return 0;
}

I - Coins

Time Complexity: $\mathcal{O}(N^2)$

Define $\texttt{dp}[i][j]$ to be the probability after tossing the first $i$ coins, and receiving $j$ heads. Then, our probability of flipping $j$ heads from the first $i$ coins is the addition of the following:

either we flipped a head with probability $p$
- then use $\texttt{dp}[i-1][j-1]$ , the probability of receiving $j-1$ heads from the previous toss.
we flipped a tail with probability $1-p$
- then use $\texttt{dp}[i-1][j]\cdot(1-\texttt{p}[i-1])$ , the probability of receiving $j$ heads from the previous toss.

\texttt{dp}[i][j] = \texttt{dp}[i-1][j-1]\cdot \texttt{p}[i-1] + \texttt{dp}[i-1][j]\cdot(1-\texttt{p}[i-1])

#include<bits/stdc++.h>

using namespace std;

long double dp[3001][3001];

int main() {
	cin.tie(0)->sync_with_stdio(0);

	int n;
	cin >> n;

	vector<long double> p(n);

	for(int i = 0; i < n; ++i) cin >> p[i];

	int leastHeads = n / 2 + 1;

	for(int i = 0; i <= n; ++i) {
			dp[i][0] = 1;
	}

	for(int i = 1; i <= n; ++i) {
		for(int j = 1; j <= leastHeads; ++j) {
			dp[i][j] = dp[i - 1][j - 1] * p[i - 1] + dp[i - 1][j] * (1 - p[i - 1]);
		}
	}

	cout << fixed << setprecision(10) << dp[n][leastHeads] << endl;
}

J - Sushi

Time Complexity: $\mathcal{O}(N^3)$

Let $\texttt{dp}[x][y][z]$ represent the expected moves for $x$ number of plates 1-sushi remaining, $y$ number of plates 2-sushi remaining, $z$ number of plates 3-sushi remaining.

Then, we can use the relation

\texttt{dp}[x][y][z] = n + x \cdot \texttt{dp}[x-1][y][z] + y \cdot \texttt{dp}[x+1][y-1][z] + z \cdot \texttt{dp}[x][y+1][z-1]

Note that we add $1$ for the $y$ and $z$ equations because we take from one sushi platter which transitions into one more sushi in another grouping of either $x,y$ . For example, by taking one sushi away from a group of size $2$ then there is a corresponding increase in a sushi group of size $1$ .

#include <bits/stdc++.h>

using namespace std;

int n;

long double dp[301][301][301];
// let dp[i][j][k] be
// i dishes of 1 sushi
// j dishes of 2 sushi
// k dishes of 3 sushi

double memo(int x, int y, int z) {
	if(x < 0 || y < 0 || z < 0) return 0;
	if(x == 0 && y == 0 && z == 0) return 0;
	// if(x + y + z < 1) return 0;
	if(dp[x][y][z] > 0) return dp[x][y][z];

	long double ret = n + x * memo(x - 1, y, z)  // # 1 sushi
					+ y * memo(x + 1, y - 1, z)  // # 2 sushi
					+ z * memo(x, y + 1, z - 1); // # 3 sushi

	return dp[x][y][z] = ret / (x + y + z);
}

int main() {
	cin.tie(0)->sync_with_stdio(0);

	cin >> n;
	vector<int> a(n); for(int i = 0; i < n; ++i) cin >> a[i];

	vector<int> freq(3);
	for(int x : a) freq[x - 1]++;

	memset(dp, -1, sizeof dp);

	cout << fixed << setprecision(10) << memo(freq[0], freq[1], freq[2]) << endl;
}

K - Stones

Time Complexity: $\mathcal{O}(N^2)$

Define $\texttt{dp}[i]$ as if it's possible to win with $i$ stones remaining. Then keep two loops, one for values of $i (1 \le i \le K)$ , and the other for each value in $A$ . If $i \ge j$ and it was not possible to win a game with $i - a_j$ stones, then - because the turns alternate - a game with $i$ stones is winnable.

#include <bits/stdc++.h>

using namespace std;

int main() {
	cin.tie(0)->sync_with_stdio(0);

	int n, k;
	cin >> n >> k;
	vector<int> a(n);
	for (int& x : a) cin >> x;
	vector<bool> dp(k + 1);
	for (int i = 1; i <= k; ++i)
		for (int j : a)
			if (i >= j && !dp[i - j])
				dp[i] = 1;
	cout << (dp[k] ? "First" : "Second") << '\n';
}

L - Deque

Time Complexity: $\mathcal{O}(N^2)$

Define $\texttt{dp}[i][j]$ as the optimal score for Jiro $(X - Y)$ using the range $[i, j]$ . Then, we can either choose $a_i$ to append to the left of the range, or $a_j$ on the right. Then, our two transitions are

\texttt{dp}[i][j] = \max_{j > i} \left\{\begin{matrix} a_i - \texttt{dp}[i+1][j] \\ a_j - \texttt{dp}[i][j-1] \end{matrix}\right.

Our initial states are $\texttt{dp}[i][i] = 0$ , since any range of size $0$ will have difference $0$ .

#include <bits/stdc++.h>

using namespace std;

long long dp[3001][3001];

int main() {
	cin.tie(0)->sync_with_stdio(0);

	int n;
	cin >> n;

	vector<int> a(n);
	for (int& x : a) cin >> x;

	for (int i = 0; i < n; ++i) dp[i][i] = a[i];

	for (int i = n - 1; i >= 0; i--)
		for (int j = i + 1; j < n; ++j)
			dp[i][j] = max(a[i] - dp[i + 1][j], a[j] - dp[i][j - 1]);
	cout << dp[0][n - 1] << '\n';
}

M - Candies

Time Complexity: $\mathcal{O}(NK)$

Let $\texttt{dp}[i][j]$ represent the number of ways to distribute $j$ candies to the first $i$ children.

Then we have

\texttt{dp}[i][j]=\sum{\texttt{dp}[i-1][j']}

such that $0\leq j - j' \leq a_i$ .

We can use prefix sums to speed this up from $\mathcal O(N\cdot K^2)$ to $\mathcal O(N\cdot K)$ .

#include <bits/stdc++.h>

using namespace std;

const int MAXK = 100000;
const int MOD = 1000000007;

int add(int i, int j) {
	if ((i += j) >= MOD)
		i -= MOD;
	return i;
}

int sub(int i, int j) {
	if ((i -= j) < 0)
		i += MOD;
	return i;
}

int main() {
	cin.tie(0)->sync_with_stdio(0);

	int n, k, a;
	cin >> n >> k;

	static int dp[MAXK + 1], dq[MAXK + 1];
	dp[0] = 1;
	for (int i = 1; i <= n; ++i) {
		cin >> a;
		for (int j = 1; j <= k; ++j)
			dp[j] = add(dp[j - 1], dp[j]);
		for (int j = 0; j <= k; ++j)
			dq[j] = sub(dp[j], (j > a ? dp[j - a - 1] : 0));
		swap(dp, dq);
	}
	cout << dp[k] << '\n';
}

N - Slimes

Let $\texttt{dp}[i][j]$ represent the minimum cost to merge the slimes $i\dots j$ into one slime.

To calculate $\texttt{dp}[i][j]$ , we want to merge two smaller (combined) slimes in the range $[i, j]$ . To do this, we loop through all $k$ in the range $[i, j - 1]$ and simulate merging $[i, k]$ and $[k + 1, j]$ . Note that the cost of this is $\texttt{dp}[i][k] + \texttt{dp}[k + 1][j] + a[i] + a[i + 1] + \dots + a[j]$ . Here, $a[i] + a[i + 1] + \dots + a[j]$ can be precalculated with prefix sums.

This takes $\mathcal O(N^3)$ which fits in the Time Limit but can be optimized to $\mathcal O(N^2)$ with Knuth's Optimization.

The code below is $\mathcal O(N^3)$ .

#include <bits/stdc++.h>
using namespace std;

const int MAXN = 400;
const long long INF = 0x3f3f3f3f3f3f3f3f;

int main() {
	cin.tie(0)->sync_with_stdio(0);

	int n;
	cin >> n;
	vector<long long> a(n), p(n);
	for (long long& x : a) cin >> x;
	for (int i = 0; i < n; ++i)
		p[i] = (i == 0 ? 0 : p[i - 1]) + a[i];
	auto query = [&](int l, int r) {
		return p[r] - (l == 0 ? 0 : p[l - 1]); };

	static long long dp[MAXN][MAXN];
	memset(dp, 0x3f, sizeof(dp));
	for (int i = 0; i + 1 < n; ++i) dp[i][i + 1] = a[i] + a[i + 1];
	for (int i = 0; i < n; ++i) dp[i][i] = 0;
	for (int i = n - 1; i >= 0; --i)
		for (int j = i + 2; j < n; ++j) {
			long long best = INF;
			for (int k = i; k < j; ++k)
				best = min(best, dp[i][k] + dp[k + 1][j]);
			dp[i][j] = query(i, j) + best;
		}
	cout << dp[0][n - 1] << '\n';
}

O - Matching

Time Complexity: $\mathcal{O}(N\cdot 2^N)$

If we define $\texttt{dp}[S]$ to be the number of matchings for females in the set $S$ to the first $|S|$ males, this problem boils down to the following:

\texttt{dp}[S] = \sum \texttt{dp}[S\backslash x]: \texttt{compatible}[|S|][x]

(The : stands for "such that".) In English, this is equivalent to the following:

The number of matchings in a subset $S$ to include a certain female $x$ is equivalent to the sum of all the matchings without female $x$ where female $x$ is compatible with the $|S|$ -th male.

Our base case is the empty set, which has a value of $1$ (the empty set can be considered as a single matching involving zero pairs).

#include <bits/stdc++.h>

using namespace std;

const int MOD = 1e9 + 7;
const int MAX_N = 21;

bool compat[MAX_N][MAX_N];
int dp[1 << MAX_N];

int main() {
	int N;
	cin >> N;
	for (int i = 0; i < N; ++i) {
		for (int j = 0; j < N; ++j) {
			cin >> compat[i][j];
		}
	}

	dp[0] = 1;

	for (int s = 0; s < (1 << N); s++) {
		int pair_num = __builtin_popcount(s);
		for (int w = 0; w < N; w++) {
			/*
			 * check that
			 * 1. this woman hasn't been paired already
			 * 2. she's also compatible with the {pair_num + 1}th man
			 */
			if ((s & (1 << w)) || !compat[pair_num][w])
				continue;

			// add the amount to future dp states
			dp[s | (1 << w)] += dp[s];
			dp[s | (1 << w)] %= MOD;
		}
	}

   	cout << dp[(1 << N) - 1] << endl;
}

P - Independent Set

Time Complexity: $\mathcal{O}(N)$

First, lets root the tree arbitrary. Let $\texttt{dp}[i][j]$ represent the number of ways to color subtree $i$ , coloring node $i$ color $j$ .

Here, $j=0$ is black and $j=1$ is white. If we color node $i$ black, then it's children must be all white. So, we have

\texttt{dp}[i][0]=\prod \texttt{dp}[c][1]

such that $c$ is $i$ 's child, and if we color node $i$ white, then it's children can be either white or black:

\texttt{dp}[i][1]=\prod({\texttt{dp}[c][0] + \texttt{dp}[c][1]})

#include <bits/stdc++.h>

using namespace std;

const int MAXN = 1e5;
const int MOD = 1e9 + 7;

int add(int i, int j) {
	if ((i += j) >= MOD)
		i -= MOD;
	return i;
}
int mul(int i, int j) {
	return int((long long) i * j % MOD);
}

vector<int> adj[MAXN];
int dp[MAXN][2];

void dfs(int i, int p) {
	dp[i][0] = dp[i][1] = 1;
	for (int j : adj[i]) if (j != p) {
		dfs(j, i);
		dp[i][0] = mul(dp[i][0], dp[j][1]);
		dp[i][1] = mul(dp[i][1], add(dp[j][0], dp[j][1]));
	}
}

int main() {
	cin.tie(0)->sync_with_stdio(0);

	int n;
	cin >> n;
	for (int i = 0, a, b; i + 1 < n; ++i) {
		cin >> a >> b, --a, --b;
		adj[a].push_back(b), adj[b].push_back(a);
	}
	dfs(0, 0);
	cout << add(dp[0][0], dp[0][1]) << '\n';
}

Q - Flowers

Time Complexity: $\mathcal{O}(N\log N)$

Let $\texttt{dp}[i]$ represent the maximum beauty of a subsequence of flowers ending at $i$ .

\texttt{dp}[i]=\max(\texttt{dp}[j]) + a_i

such that $j < i$ and $h_j < h_i$ .

However, looping through all values of $j$ would result in a complexity of $\mathcal O(N^2)$ , which would TLE.

We can use a data-structure to speed this up. Notice that when calculating $\texttt{dp}[i]$ , we're querying the maximum $\texttt{dp}[j]$ such that $h_j < h_i$ and adding $a_i$ .

In other words, we're querying the maximum $\texttt{dp}[j]$ which $h_j\in[1, h_i - 1]$ , which can be handled with a data-structure like a segment tree or a binary indexed tree.

#include <bits/stdc++.h>

using namespace std;

const int MAXN = 2e5;

int n;
long long bit[MAXN + 1];

void update(int i, long long x) {
	for ( ; i <= n; i += i & -i)
		bit[i] = max(bit[i], x);
}

long long query(int i) {
	long long ans = 0;
	for ( ; i > 0; i -= i & -i)
		ans = max(ans, bit[i]);
	return ans;
}

int main() {
	cin.tie(0)->sync_with_stdio(0);

	cin >> n;
	vector<int> h(n), a(n);
	for (int i = 0; i < n; ++i) cin >> h[i];
	for (int i = 0; i < n; ++i) cin >> a[i];
	for (int i = 0; i < n; ++i) {
		update(h[i], query(h[i] - 1) + a[i]);
	}
	cout << query(n) << '\n';
}

R - Walk

We're given a adjacency matrix $a$ and we want to calculate the number of paths of length $k$ . First, lets convert $a$ into a Matrix, $m$ .

Notice that when we multiply $m$ by $m$ , we get the number of paths with length $2$ . If we take $m^p$ , we end up with the number of paths of length $p$ from each $i$ to $j$ .

Thus, we're looking for $m^k$ , which can be calculated with binary exponentiation.

Time Complexity: $\mathcal{O}(N^3\log K)$

#include <bits/stdc++.h>

using namespace std;

const int MOD = 1e9 + 7;

int add(int i, int j) {
	if ((i += j) >= MOD)
		i -= MOD;
	return i;
}
int mul(int i, int j) {
	return int((long long) i * j % MOD);
}

template<class T> struct Matrix {
	T **mat; int a, b;

	Matrix() : a(0), b(0) {}
	Matrix(int a_, int b_) : a(a_), b(b_) {
		int i, j;

		mat = new T*[a];
		for (i = 0; i < a; ++i) {
			mat[i] = new T[b];
			for (j = 0; j < b; ++j)
				mat[i][j] = 0;
		}
	}
	Matrix(const vector<vector<T>>& vt) {
		int i, j;

		*this = Matrix((int) vt.size(), (int) vt[0].size());
		for (i = 0; i < a; ++i)
			for (j = 0; j < b; ++j)
				mat[i][j] = vt[i][j];
	}
	Matrix operator*(const Matrix& m) {
		int i, j, k;

		assert(b == m.a);
		Matrix r(a, m.b);
		for (i = 0; i < a; ++i)
			for (j = 0; j < m.b; ++j)
				for (k = 0; k < b; ++k)
					r.mat[i][j] = add(r.mat[i][j],
						mul(mat[i][k], m.mat[k][j]));
		return r;
	}
	Matrix& operator*=(const Matrix& m) {
		return *this = (*this) * m;
	}
	friend Matrix power(Matrix m, long long p) {
		int i;

		assert(m.a == m.b);
		Matrix r(m.a, m.b);
		for (i = 0; i < m.a; ++i)
			r.mat[i][i] = 1;
		for ( ; p > 0; p >>= 1, m *= m)
			if (p & 1)
				r *= m;
		return r;
	}
};

int main() {
	cin.tie(0)->sync_with_stdio(0);

	int n; long long k;
	cin >> n >> k;
	vector<vector<int>> adj(n, vector<int>(n));
	for (int i = 0; i < n; ++i)
		for (int j = 0; j < n; ++j)
			cin >> adj[i][j];
	Matrix<int> mat(adj);
	mat = power(mat, k);
	int ans = 0;
	for (int i = 0; i < n; ++i)
		for (int j = 0; j < n; ++j)
			ans = add(ans, mat.mat[i][j]);
	cout << ans << '\n';
}

S - Digit Sum

Time Complexity: $\mathcal{O}(|K|D)$

We can use digit dp to solve this problem. Let $\texttt{dp}[i][j]$ represent the number of ways to create a length $|K|$ number which is a multiple of $D$ given that the $i$ digits is fixed and has a sum equivalent to $j \pmod D$ .

To find the answer, we will loop through all prefixes of $K$ and calculate the number of ways to create the remaining suffix, which can be mapped to a dp state.

#include <bits/stdc++.h>

using namespace std;

const int MAXL = 10000;
const int MAXD = 100;
const int MOD = 1e9 + 7;

int dp[MAXL + 1][MAXD];

int add(int i, int j) {
	if ((i += j) >= MOD)
		i -= MOD;
	return i;
}

void init(int l, int d) {
	dp[l][0] = 1;
	for (int i = l - 1; i >= 0; --i)
		for (int j = 0; j < d; ++j)
			for (int k = 0; k < 10; ++k)
				dp[i][j] = add(dp[i][j], dp[i + 1][(j + k) % d]);
}

int query(const string& s, int d) {
	int ans = 0, carry = 0;
	for (int i = 0; i < (int) s.length(); ++i) {
		for (int j = 0; j < (s[i] - '0'); ++j)
			ans = add(ans, dp[i + 1][(carry + j) % d]);
		carry = (carry + (s[i] - '0')) % d;
	}
	if (carry == 0) ans++;
	if (--ans < 0) ans += MOD;
	return ans;
}

int main() {
	cin.tie(0)->sync_with_stdio(0);

	string s; int d;
	cin >> s >> d;
	init((int) s.size(), d);
	cout << query(s, d) << '\n';
}

T - Permutation

Time Complexity: $\mathcal{O}(N^2)$

Let $\texttt{dp}[i][j]$ represent the number of ways to create a permutation with length $i+1$ the first $i$ signs, using $j$ as the last element of the permutation.

If $s[i]$ is $>$ , then, $\texttt{dp}[i][j]=\sum{\texttt{dp}[i-1][k]}$ for all $k\geq j$ .

Otherwise, $\texttt{dp}[i][j]=\sum{\texttt{dp}[i-1][k]}$ for all $k < j$ . This can be sped up to $\mathcal O(N^2)$ with prefix sums.

#include <bits/stdc++.h>

using namespace std;

const int MAXN = 3000;

const int MOD = 1e9 + 7;

int add(int i, int j) {
	if ((i += j) >= MOD)
		i -= MOD;
	return i;
}
int sub(int i, int j) {
	if ((i -= j) < 0)
		i += MOD;
	return i;
}

int main() {
	cin.tie(0)->sync_with_stdio(0);

	int n;
	string s;

	cin >> n >> s;
	static int dp[MAXN + 1], dq[MAXN + 1];
	dp[1] = 1;
	for (int i = 1; i < n; ++i) {
		for (int j = 1; j <= i + 1; ++j)
			dp[j] = add(dp[j], dp[j - 1]);
		for (int j = 1; j <= i + 1; ++j)
			if (s[i - 1] == '>')
				dq[j] = sub(dp[i], dp[j - 1]);
			else
				dq[j] = dp[j - 1];
		swap(dp, dq);
	}
	int ans = 0;
	for (int i = 1; i <= n; ++i)
		ans = add(ans, dp[i]);
	cout << ans << '\n';
}

U - Grouping

Time Complexity: $\mathcal{O}(3^N + 2^N\cdot N^2)$

Let $\texttt{cost}[S]$ represent the score of assigning all Rabbits in set $S$ into the same group. This can be calculated in $\mathcal O(2^N \cdot N^2)$ .

Now, we calculate $\texttt{dp}[S]$ , the maximum score of assigning the rabbits in set $S$ . To calculate $\texttt{dp}[S]$ , we have to loop through all subsets of $S$ and simulate putting all the rabbits in the subset in the same group. Fortunately, you can do this in $\mathcal O(3^N)$ (see https://cp-algorithms.com/algebra/all-submasks.html#toc-tgt-1 ), which runs in time for $N\leq 16$ .

#include <bits/stdc++.h>
using namespace std;

const int MAXN = 16;

int main() {
	cin.tie(0)->sync_with_stdio(0);

	int n;
	static int a[MAXN][MAXN];
	static long long cost[1 << MAXN];
	static long long dp[1 << MAXN];
	cin >> n;
	for (int i = 0; i < n; ++i)
		for (int j = 0; j < n; ++j)
			cin >> a[i][j];
	for (int i = 0; i < (1 << n); ++i)
		for (int j = 0; j < n; ++j)
			for (int k = j + 1; k < n; ++k)
				if (i & (1 << j) && i & (1 << k))
					cost[i] += a[j][k];
	for (int i = 0, j; i < (1 << n); ++i) {
		j = ((1 << n) - 1) ^ i;
		for (int k = j; k; k = (k - 1) & j)
			dp[i ^ k] = max(dp[i ^ k], dp[i] + cost[k]);
	}
	cout << dp[(1 << n) - 1] << '\n';
}

V - Subtrees

This explains it pretty well.

W - Intervals

Let $\texttt{dp}[i]$ represent the maximum score of a string with the $i$ th bit turned on.

For our relation, we have

\texttt{dp}[i] = \max(\texttt{dp}[j] + \texttt{cost}(i, j))

such that $j < i$ and $\texttt{cost}(i, j)$ contains the sum of all $a$ such that $l\leq i\leq r$ but not $l\leq j\leq r$ .

This works, in $\mathcal O(N^2)$ , which unfortunately, TLEs.

However, we can use a segment tree to speed this up. To calculate $\texttt{dp}[i]$ , we can query the maximum of $[0, i - 1]$ in the segment tree. Now, we just need to support activating/deactivating each query.

When we reach a $l$ , we add $a$ to each dp value in the range $[0, a - 1]$
When we reach a $r$ , we subtract $a$ from each dp value in the range $[0, a - 1]$

Now, we can just query the maximum of $\texttt{dp}[0\dots i-1]$ and update it into our segment tree!

#include <bits/stdc++.h>
using namespace std;

const long long INF = 0x3f3f3f3f3f3f3f3f;

template<class T> struct LazySeg {
	int sz; vector<T> st, lz;

	T comb(T a, T b) {
		return max(a, b);
	}
	void init(int n) {
		sz = 1; while (sz < n) sz <<= 1;
		st.assign(2 * sz, 0), lz.assign(2 * sz, 0);
	}
	void pull(int i) {
		st[i] = comb(st[i << 1], st[i << 1 | 1]);
	}
	void push(int i, int l, int r) {
		st[i] += lz[i];
		if (l != r) lz[i << 1] += lz[i], lz[i << 1 | 1] += lz[i];
		lz[i] = 0;
	}
	void update(int ql, int qr, T x, int i, int l, int r) {
		push(i, l, r); if (ql > r || qr < l) return;
		if (ql <= l && qr >= r) { lz[i] += x; return void(push(i, l, r)); }
		int m = (l + r) >> 1; update(ql, qr, x, i << 1, l, m);
		update(ql, qr, x, i << 1 | 1, m + 1, r); pull(i);
	}
	void update(int ql, int qr, T x) {
		update(ql, qr, x, 1, 0, sz - 1);
	}
	T query(int ql, int qr, int i, int l, int r) {
		push(i, l, r); if (ql > r || qr < l) return -INF;
		if (ql <= l && qr >= r) return st[i]; int m = (l + r) >> 1;
		return comb(query(ql, qr, i << 1, l, m), query(ql, qr, i << 1 | 1, m + 1, r));
	}
	T query(int ql, int qr) {
		return query(ql, qr, 1, 0, sz - 1);
	}
};

LazySeg<long long> st;

int main() {
	cin.tie(0)->sync_with_stdio(0);

	int n, m;
	cin >> n >> m;
	vector<long long> add(n + 2);
	vector<vector<array<int, 2>>> todo(n + 2);
	while (m--) {
		int l, r, a;
		cin >> l >> r >> a;
		add[l] += a;
		todo[r + 1].push_back({a, l});
	}
	st.init(n + 1);
	for (int i = 1; i <= n + 1; ++i) {
		st.update(0, i - 1, add[i]);
		for (array<int, 2> ar : todo[i])
			st.update(0, ar[1] - 1, -ar[0]);
		st.update(i, i, st.query(0, i - 1));
	}
	cout << st.query(0, n) << '\n';
}

X - Towers

Notice that the optimal sorting of elements is by the sum of their weight and strength.

Consider two orderings for blocks $a$ and $b$ .

If the tower $a$ is on the bottom, then the strength of the tower is

s_a - w_b

If the tower $b$ is on the bottom, then the strength of the tower is

s_b - w_a

This produces the equation

s_a - w_b \overset{?}{=} s_b - w_a

s_a + w_a \overset{?}{=} s_b + w_b

Obviously, we want to maximize the strength of our tower, so put the one with less $s_i + w_i$ on the top.

Then, define $\texttt{dp}[i]$ as the maximum value we can achieve for a weight of $i$ . We process the tower top-down, only calculating weights $j : j \le s_i$ since any more would break the tower. Our relation is then:

\texttt{dp}[j + \texttt{w}[x]] = \max(j + \texttt{dp}[\texttt{w}[x]], \texttt{dp}[j] + \texttt{v}[x])

#include <bits/stdc++.h>

using namespace std;

const int MAX_N = 1e3+1;

long long dp[20001];
int w[MAX_N], s[MAX_N], v[MAX_N], p[MAX_N];

int main() {
	cin.tie(0)->sync_with_stdio(0);

	int N; cin >> N;
	for(int i = 0; i < N; ++i) cin >> w[i] >> s[i] >> v[i];

	iota(p, p + N, 0);

	sort(p, p + N, [&](const int &a, const int &b) {
		return w[a] + s[a] < w[b] + s[b];
	});

	for(int i = 0; i < N; ++i) {
		int x = p[i];
		for(int j = s[x]; j >= 0; --j) {
			dp[j + w[x]] = max(dp[j + w[x]], dp[j] + v[x]);
		}
	}

	cout << *max_element(dp, dp + 20001) << endl;
}

Y - Grids 2

Time Complexity: $\mathcal O(H+W+N^2)$

First, note that the number of ways to walk from cell $(i, j)$ to $(x, y)$ is ${x-i+y-j}\choose{x-i}$ , which can be calculated in $\mathcal O(1)$ if you precompute factorials.

Let $\texttt{dp}[i]$ represent the number of ways to walk from $(1, 1)$ to the ith square without passing any other squares other than the ith one.

To calculate this, note that if there are no squares in between $(1, 1)$ and $(r_i, c_i)$ , then there are a total of ${r_i-1+c_i-1}\choose{r_i-1}$ ways to do this.

We can subtract this value from the number of ways to pass through a square in the path from $(1, 1)$ to $(r_i, c_i$ ). This can be done by looping through all squares $j$ such that $r_j\leq r_i$ and $c_j\leq c_i$ .

To calculate the answer, we can simply add an extra square at $(H, W)$ and take the resulting $\texttt{dp}$ value.

#include <bits/stdc++.h>
using namespace std;

#define N	100000
#define M	3000
#define MOD	1000000007

int n, m, k, dp[M + 1];
long long fac[N << 1], ifac[N << 1];
struct P {
	int x, y;
	bool operator<(const P& other) const {
		if (x == other.x) return y < other.y;
		return x < other.x;
	}
} point[M + 1];

int power(long long x, int p) {
	long long res = 1;
	while (p > 0) {
		if (p & 1) res = res * x % MOD;
		x = x * x % MOD, p >>= 1;
	}
	return res;
}

void genFac(int size) {
	fac[0] = 1;
	for (int i = 1; i <= size; ++i)
		fac[i] = fac[i - 1] * i % MOD;
	ifac[size] = power(fac[size], MOD - 2);
	for (int i = size; i >= 1; --i)
		ifac[i - 1] = ifac[i] * i % MOD;
}
int choose(int n, int k) {
	assert(n >= k);
	return fac[n] * ifac[k] % MOD * ifac[n - k] % MOD;
}
int path(int i, int j, int x, int y) {
	assert(i <= x && j <= y);
	return choose(x - i + y - j, x - i);
}

int main() {
	scanf("%d%d%d", &n, &m, &k);
	genFac(n + m - 1);
	for (int i = 0; i < k; ++i)
		scanf("%d%d", &point[i].x, &point[i].y);
	point[k] = {n, m};
	sort(point, point + k + 1);
	for (int i = 0; i <= k; ++i) {
		long long sum = path(1, 1, point[i].x, point[i].y);
		for (int j = 0; j < i; ++j)
			if (point[i].y - point[j].y >= 0) {
				sum = sum - (long long) dp[j] * path(point[j].x, point[j].y, point[i].x, point[i].y) % MOD;
				if (sum < 0) sum += MOD;
			}
		dp[i] = sum;
	}
	printf("%d\n", dp[k]);
}

Z - Frog 3

Time Complexity: $\mathcal O(N)$

Lets think of a $\mathcal O(N^2)$ dp first. Let $\texttt{dp}[i]$ represent the minimum cost to travel from stone $1$ to stone $i$ . We have the following recurrence:

\texttt{dp}[i]=\max(\texttt{dp}[j] + (h_i-h_j)^2 + C)

Lets expand the equation take out the constants and we have

\texttt{dp}[i]=C+h_i^2 + \max(\texttt{dp}[j] + -2h_ih_j+h_j^2)

$\texttt{dp}[j] + -2h_ih_j+h_j^2$ can be rerwritten as $(-2h_j)x+(\texttt{dp}[j] + h_j^2)$ where $x=h_i$ . This forms a line ( $mx+b$ ). Now, we just want to query the minimum value with $x=h_i$ . This can be achieved with Convex Hull Trick.

#include <bits/stdc++.h>
using namespace std;

using ll = long long;
struct Line {
	mutable ll k, m, p;
	bool operator<(const Line& o) const { return k < o.k; }
	bool operator<(ll x) const { return p < x; }
	long long eval(long long x) const { return k * x + m; }
};

struct CHT {
	deque<Line> hull;
	static const ll inf = LLONG_MAX;

	ll div(ll a, ll b) { // floored division
		return a / b - ((a ^ b) < 0 && a % b); }
	bool isect(Line &x, const Line &y) {
		if (x.k == y.k) x.p = x.m > y.m ? inf : -inf;
		else x.p = div(y.m - x.m, x.k - y.k);
		return x.p >= y.p;
	}
	void add(long long k, long long m) {
		Line L = {k, m, 0};
		while ((int) hull.size() >= 2 && (isect(L, hull.back()),
			isect(hull.back(), hull[(int) hull.size() - 2]), L.p < hull.back().p))
			hull.pop_back();
		hull.push_back(L);
	}
	long long query(long long x) {
		while ((int) hull.size() >= 2 && hull[0].eval(x) >= hull[1].eval(x))
			hull.pop_front();
		return hull[0].eval(x);
	}
};

int main() {
	cin.tie(0)->sync_with_stdio(0);

	int n; long long c;
	cin >> n >> c;
	vector<long long> h(n), dp(n);
	for (long long&x : h) cin >> x;

	CHT ch;
	auto ins = [&](int i) {
		ch.add(-h[i] * 2, (dp[i] + (h[i] * h[i])));
	};
	dp[0] = 0; ins(0);
	for (int i = 1; i < n; ++i) {
		long long x = ch.query(h[i]);
		dp[i] = c + (h[i] * h[i]) + x;
		ins(i);
	}
	cout << dp[n - 1] << '\n';
}