AtCoder DP Editorial

Neo Wang, Dong Liu / April 22, 2021

– views

Introduction

This post contains the editorials for all tasks contained in the AtCoder DP Contest, since there is no official editorial.

A - Frog 1

Time Complexity: O(N)\mathcal{O}(N)

Use dynamic programming and define dp[i]\texttt{dp}[i] as the minimum cost to reach stone ii. Then, there are only two transitions:

  • Jump one stone:

    dp[i+1]=min(dp[i+1],dp[i]+A[i]A[i+1])\texttt{dp}[i + 1] = \min(\texttt{dp}[i + 1], \texttt{dp}[i] + |\texttt{A}[i] - \texttt{A}[i + 1]|)
  • Jump two stones:

    dp[i+2]=min0i+j<N(dp[i+2],dp[i]+A[i]A[i+2])\texttt{dp}[i + 2] = \min_{0 \le i + j < N}(\texttt{dp}[i + 2], \texttt{dp}[i] + |\texttt{A}[i] - \texttt{A}[i + 2]|)
#include <bits/stdc++.h>

using namespace std;

const int mx = 1e5+1;

int A[mx], dp[mx];

int main() {
    cin.tie(0)->sync_with_stdio(0);

    int N; cin >> N;

    for(int i = 0; i < N; ++i) {
        cin >> A[i];
        dp[i] = 1e9 + 7;
    }

    dp[0] = 0;

    for(int i = 0; i < N; ++i) {
        if(i + 1 < N) dp[i + 1] = min(dp[i + 1], dp[i] + abs(A[i] - A[i + 1]));
        if(i + 2 < N) dp[i + 2] = min(dp[i + 2], dp[i] + abs(A[i] - A[i + 2]));
    }

    cout << dp[N - 1] << endl;
}

B - Frog 2

Time Complexity: O(NK)\mathcal{O}(NK)

This is the exact same problem as Frog 1, just with variable distances. Simply loop through each of the possible jumps, and use the previous transition where dp[i]\texttt{dp}[i] represents the best value for stone ii.

dp[i+j]=min0i+j<N(dp[i+j],dp[i]+A[i]A[i+j])\texttt{dp}[i + j] = \min_{0 \le i + j < N}(\texttt{dp}[i + j], \texttt{dp}[i] + |\texttt{A}[i] - \texttt{A}[i + j]|)
#include <bits/stdc++.h>

using namespace std;

const int mx = 1e5+1;

int A[mx], dp[mx];

int main() {
    cin.tie(0)->sync_with_stdio(0);

    int N, K; cin >> N >> K;

    for(int i = 0; i < N; ++i) {
        cin >> A[i];
        dp[i] = 1e9 + 7;
    }

    dp[0] = 0;

    for(int i = 0; i < N; ++i) {
        for(int j = 1; j <= K; ++j) { // j is jump
            if(i + j < N) dp[i + j] = min(dp[i + j], dp[i] + abs(A[i] - A[i + j]));
        }
    }

    cout << dp[N - 1] << endl;
}

C - Vacation

Time Complexity: O(N)\mathcal{O}(N)

Since Taro can't do the activities for two or more consecutive days, we can instead define dp[i][j]\texttt{dp}[i][j] as the best possible value on the ii-th day that ends on activity jj. Hence, the best we can do for any day AA is either the previous best ending on day BB added to the happiness attained from CC, or the previous best ending on day CC added to the happiness attained from BB. The same goes for days B,CB, C. In this sense, our formulation is:

dp[i][j]=maxkj(dp[i1][k]+V[d]:dj,dk)\texttt{dp}[i][j] = \max_{k \neq j}(dp[i-1][k] + V[d] : d \neq j, d \neq k)
#include <bits/stdc++.h>

using namespace std;

const int mx = 1e5+1;

bool ckmax(int& a, const int& b) {
    return a < b ? a = b, 1 : 0;
}

int dp[mx][3];

int main() {
    cin.tie(0)->sync_with_stdio(0);

    int N; cin >> N;

    for(int i = 1; i <= N; ++i) {
        int a, b, c; cin >> a >> b >> c;
        ckmax(dp[i][0], max(dp[i - 1][1] + b, dp[i - 1][2] + c));
        ckmax(dp[i][1], max(dp[i - 1][0] + a, dp[i - 1][2] + c));
        ckmax(dp[i][2], max(dp[i - 1][0] + a, dp[i - 1][1] + b));
    }

    cout << max(dp[N][0], max(dp[N][1], dp[N][2])) << endl;
}

D - Knapsack 1

Time Complexity: O(NW)\mathcal{O}(NW)

This is the classical knapsack problem. Notice that because vi109v_i \le 10^9, it is not feasible to store viv_i in our dp\texttt{dp} array. Instead, store the possible values of W(W105)W (W \le 10^5). Let dp[i][j]\texttt{dp}[i][j] represent the maximum value that can be attained by the first ii weights with a weight of jj. Then, this turns into the classical knapsack problem.

#include <bits/stdc++.h>

using namespace std;

const int mx = 1e5+1;

template<class T> bool ckmax(T& a, const T& b) {
    return a < b ? a = b, 1 : 0;
}

long long dp[101][mx];
int w[101], v[101];

int main() {
    cin.tie(0)->sync_with_stdio(0);

    int N, W; cin >> N >> W;
    for(int i = 0; i < N; ++i) cin >> w[i] >> v[i];

    for(int i = 0; i < N; ++i) for(int j = 0; j <= W; ++j) {
        if(j + w[i] <= W) ckmax(dp[i + 1][j + w[i]], dp[i][j] + v[i]);
        ckmax(dp[i + 1][j], dp[i][j]);
    }

    cout << dp[N][W] << endl;
}

E - Knapsack 2

Time Complexity: O(N2V)\mathcal{O}(N^2V)

This is the exact same problem except instead of a high value of viv_i, there is a high value of wiw_i. Now, we must minimize the value of wiw_i for any given viv_i, and then try to find out the maximum value of viv_i that can be reached.

Define dp[i]\texttt{dp}[i] as the lowest weight we can achieve for value ii. The transition then, is:

dp[j+v[i]]=min(dp[j+v[i]],dp[j]+w[i])\texttt{dp}[j + \texttt{v}[i]] = \min (\texttt{dp}[j + \texttt{v}[i]],\texttt{dp}[j] + \texttt{w}[i])
#include <bits/stdc++.h>

using namespace std;

const int mx = 1e5+1;

template<class T> bool ckmin(T& a, const T& b) {
    return a > b ? a = b, 1 : 0;
}

long long dp[mx];
int w[101], v[101];

int main() {
    cin.tie(0)->sync_with_stdio(0);

    int N, W; cin >> N >> W;
    for(int i = 0; i < N; ++i) cin >> w[i] >> v[i];
    for(int i = 0; i < mx; ++i) dp[i] = 1e18;

    dp[0] = 0;

    for(int i = 0; i < N; ++i) {
        for(int j = mx - 1; j >= 0; j--) {
            if(dp[j] + w[i] <= W) ckmin(dp[j + v[i]], dp[j] + w[i]);
        }
    }

    for(int i = mx - 1; i >= 0; i--) {
        if(dp[i] != 1e18) {
            cout << i << endl;
            break;
        }
    }
}

F - LCS

Time Complexity: O(N2)\mathcal{O}(N^2)

First read this, then following the according dp\texttt{dp} model, build the string accordingly.

#include <bits/stdc++.h>

using namespace std;

template<class T> bool ckmax(T& a, const T& b) {
    return a < b ? a = b, 1 : 0;
}

int dp[3001][3001];

int main() {
    cin.tie(0)->sync_with_stdio(0);
    string s, t; cin >> s >> t;
    int n = s.size(), m = t.size();

    for(int i = 0 ; i <= n; ++i) {
        for(int j = 0; j <= m; ++j)  {
            if(!i || !j) dp[i][j] = 0;
            else if(s[i - 1] == t[j - 1]) dp[i][j] = 1 + dp[i - 1][j - 1];
            else dp[i][j] = max(dp[i - 1][j], dp[i][j - 1]);
        }
    }

    string ret = "";

    while(n && m) {
        if(s[n - 1] == t[m - 1]) {
            ret += s[n - 1];
            n--;
            m--;
        }
        else if(dp[n - 1][m] > dp[n][m - 1]) n--;
        else m--;
     }

     reverse(ret.begin(), ret.end());

     cout << ret << endl;
}

G - Longest Path

Time Complexity: O(N+M)\mathcal{O}(N + M)

Simply perform a DFS on the graph, defining dp[i]\texttt{dp}[i] as the longest path that node ii can take. Notice how the optimal substructure is formed: the best path for any node xx is one added to the best path for any of its children.

#include <bits/stdc++.h>

using namespace std;

vector<int> dp(100001);
vector<vector<int>> adj(100001);

int dfs(int x) {
    if (dp[x]) return dp[x];
    for (auto e : adj[x]){
            dp[e] = dfs(e);
            dp[x] = max(dp[e] + 1, dp[x]);
    }
    return dp[x];
}

int main(){
    int n,m;
    cin >> n >> m;

    for(int i = 0; i < m; ++i) {
        int a, b;
        cin >> a >> b;
        a--; b--;
        adj[a].push_back(b);
    }

    for (int i = 0; i < n; ++i) {
        dfs(i);
    }

    int ans = 0;

    for (int i = 0;i < n; ++i) {
        ans = max(dp[i], ans);
    }

    cout << ans;
}

H - Grid 1

Time Complexity: O(N2)\mathcal{O}(N^2)

A full tutorial can be found here.

#include <bits/stdc++.h>

using namespace std;

bool ok[1000][1000];
long long dp[1000][1000];

int main() {
    cin.tie(0)->sync_with_stdio(0);

    int n; cin >> n;
    for(int i = 0; i < n; ++i) {
        string s;
        cin >> s;
        for(int j = 0; j < n; ++j) {
            if(s[j] == '.') ok[i][j] = true;
            else ok[i][j] = false;
        }
    }

    dp[0][0] = 1;
    for(int i = 0; i < n; ++i) {
        for(int j = 0; j < n; ++j) {
            if(!ok[i][j]) dp[i][j] = 0;
            else {
                if(i > 0) dp[i][j] += dp[i - 1][j];
                if(j > 0) dp[i][j] += dp[i][j - 1];
                dp[i][j] %= 1000000007;
            }
        }
    }

    cout << dp[n - 1][n - 1] << "\n";

    return 0;
}

I - Coins

Time Complexity: O(N2)\mathcal{O}(N^2)

Define dp[i][j]\texttt{dp}[i][j] to be the probability after tossing the first ii coins, and receiving jj heads. Then, our probability of flipping jj heads from the first ii coins is the addition of the following:

  • either we flipped a head with probability pp
    • then use dp[i1][j1]\texttt{dp}[i-1][j-1], the probability of receiving j1j-1 heads from the previous toss.
  • we flipped a tail with probability 1p1-p
    • then use dp[i1][j](1p[i1])\texttt{dp}[i-1][j]\cdot(1-\texttt{p}[i-1]), the probability of receiving jj heads from the previous toss.
dp[i][j]=dp[i1][j1]p[i1]+dp[i1][j](1p[i1])\texttt{dp}[i][j] = \texttt{dp}[i-1][j-1]\cdot \texttt{p}[i-1] + \texttt{dp}[i-1][j]\cdot(1-\texttt{p}[i-1])
#include<bits/stdc++.h>

using namespace std;

long double dp[3001][3001];

int main() {
    cin.tie(0)->sync_with_stdio(0);

    int n;
    cin >> n;

    vector<long double> p(n);

    for(int i = 0; i < n; ++i) cin >> p[i];

    int leastHeads = n / 2 + 1;

    for(int i = 0; i <= n; ++i) {
            dp[i][0] = 1;
    }

    for(int i = 1; i <= n; ++i) {
        for(int j = 1; j <= leastHeads; ++j) {
            dp[i][j] = dp[i - 1][j - 1] * p[i - 1] + dp[i - 1][j] * (1 - p[i - 1]);
        }
    }

    cout << fixed << setprecision(10) << dp[n][leastHeads] << endl;
}

J - Sushi

Time Complexity: O(N3)\mathcal{O}(N^3)

Let dp[x][y][z]\texttt{dp}[x][y][z] represent the expected moves for xx number of plates 1-sushi remaining, yy number of plates 2-sushi remaining, zz number of plates 3-sushi remaining.

Then, we can use the relation

dp[x][y][z]=n+xdp[x1][y][z]+ydp[x+1][y1][z]+zdp[x][y+1][z1]\texttt{dp}[x][y][z] = n + x \cdot \texttt{dp}[x-1][y][z] + y \cdot \texttt{dp}[x+1][y-1][z] + z \cdot \texttt{dp}[x][y+1][z-1]

Note that we add 11 for the yy and zz equations because we take from one sushi platter which transitions into one more sushi in another grouping of either x,yx,y. For example, by taking one sushi away from a group of size 22 then there is a corresponding increase in a sushi group of size 11.

#include <bits/stdc++.h>

using namespace std;

int n;

long double dp[301][301][301];
// let dp[i][j][k] be
// i dishes of 1 sushi
// j dishes of 2 sushi
// k dishes of 3 sushi

double memo(int x, int y, int z) {
    if(x < 0 || y < 0 || z < 0) return 0;
    if(x == 0 && y == 0 && z == 0) return 0;
    // if(x + y + z < 1) return 0;
    if(dp[x][y][z] > 0) return dp[x][y][z];

    long double ret = n + x * memo(x - 1, y, z)  // # 1 sushi
                    + y * memo(x + 1, y - 1, z)  // # 2 sushi
                    + z * memo(x, y + 1, z - 1); // # 3 sushi

    return dp[x][y][z] = ret / (x + y + z);
}

int main() {
    cin.tie(0)->sync_with_stdio(0);

    cin >> n;
    vector<int> a(n); for(int i = 0; i < n; ++i) cin >> a[i];

    vector<int> freq(3);
    for(int x : a) freq[x - 1]++;

    memset(dp, -1, sizeof dp);

    cout << fixed << setprecision(10) << memo(freq[0], freq[1], freq[2]) << endl;
}

K - Stones

Time Complexity: O(N2)\mathcal{O}(N^2)

Define dp[i]\texttt{dp}[i] as if it's possible to win with ii stones remaining. Then keep two loops, one for values of i(1iK)i (1 \le i \le K), and the other for each value in AA. If iji \ge j and it was not possible to win a game with iaji - a_j stones, then - because the turns alternate - a game with ii stones is winnable.

#include <bits/stdc++.h>

using namespace std;

int main() {
    cin.tie(0)->sync_with_stdio(0);

    int n, k;
    cin >> n >> k;
    vector<int> a(n);
    for (int& x : a) cin >> x;
    vector<bool> dp(k + 1);
    for (int i = 1; i <= k; ++i)
        for (int j : a)
            if (i >= j && !dp[i - j])
                dp[i] = 1;
    cout << (dp[k] ? "First" : "Second") << '\n';
}

L - Deque

Time Complexity: O(N2)\mathcal{O}(N^2)

Define dp[i][j]\texttt{dp}[i][j] as the optimal score for Jiro (XY)(X - Y) using the range [i,j][i, j]. Then, we can either choose aia_i to append to the left of the range, or aja_j on the right. Then, our two transitions are

dp[i][j]=maxj>i{aidp[i+1][j]ajdp[i][j1]\texttt{dp}[i][j] = \max_{j > i} \left\{\begin{matrix} a_i - \texttt{dp}[i+1][j] \\ a_j - \texttt{dp}[i][j-1] \end{matrix}\right.

Our initial states are dp[i][i]=0\texttt{dp}[i][i] = 0, since any range of size 00 will have difference 00.

#include <bits/stdc++.h>

using namespace std;

long long dp[3001][3001];

int main() {
    cin.tie(0)->sync_with_stdio(0);

    int n;
    cin >> n;

    vector<int> a(n);
    for (int& x : a) cin >> x;

    for (int i = 0; i < n; ++i) dp[i][i] = a[i];

    for (int i = n - 1; i >= 0; i--)
        for (int j = i + 1; j < n; ++j)
            dp[i][j] = max(a[i] - dp[i + 1][j], a[j] - dp[i][j - 1]);
    cout << dp[0][n - 1] << '\n';
}

M - Candies

Time Complexity: O(NK)\mathcal{O}(NK)

Let dp[i][j]\texttt{dp}[i][j] represent the number of ways to distribute jj candies to the first ii children.

Then we have

dp[i][j]=dp[i1][j]\texttt{dp}[i][j]=\sum{\texttt{dp}[i-1][j']}

such that 0jjai0\leq j - j' \leq a_i.

We can use prefix sums to speed this up from O(NK2)\mathcal O(N\cdot K^2) to O(NK)\mathcal O(N\cdot K).

#include <bits/stdc++.h>

using namespace std;

const int MAXK = 100000;
const int MOD = 1000000007;

int add(int i, int j) {
    if ((i += j) >= MOD)
        i -= MOD;
    return i;
}

int sub(int i, int j) {
    if ((i -= j) < 0)
        i += MOD;
    return i;
}

int main() {
    cin.tie(0)->sync_with_stdio(0);

    int n, k, a;
    cin >> n >> k;

    static int dp[MAXK + 1], dq[MAXK + 1];
    dp[0] = 1;
    for (int i = 1; i <= n; ++i) {
        cin >> a;
        for (int j = 1; j <= k; ++j)
            dp[j] = add(dp[j - 1], dp[j]);
        for (int j = 0; j <= k; ++j)
            dq[j] = sub(dp[j], (j > a ? dp[j - a - 1] : 0));
        swap(dp, dq);
    }
    cout << dp[k] << '\n';
}

N - Slimes

Let dp[i][j]\texttt{dp}[i][j] represent the minimum cost to merge the slimes iji\dots j into one slime.

To calculate dp[i][j]\texttt{dp}[i][j], we want to merge two smaller (combined) slimes in the range [i,j][i, j]. To do this, we loop through all kk in the range [i,j1][i, j - 1] and simulate merging [i,k][i, k] and [k+1,j][k + 1, j]. Note that the cost of this is dp[i][k]+dp[k+1][j]+a[i]+a[i+1]++a[j]\texttt{dp}[i][k] + \texttt{dp}[k + 1][j] + a[i] + a[i + 1] + \dots + a[j]. Here, a[i]+a[i+1]++a[j]a[i] + a[i + 1] + \dots + a[j] can be precalculated with prefix sums.

This takes O(N3)\mathcal O(N^3) which fits in the Time Limit but can be optimized to O(N2)\mathcal O(N^2) with Knuth's Optimization.

The code below is O(N3)\mathcal O(N^3).

#include <bits/stdc++.h>
using namespace std;

const int MAXN = 400;
const long long INF = 0x3f3f3f3f3f3f3f3f;

int main() {
    cin.tie(0)->sync_with_stdio(0);

    int n;
    cin >> n;
    vector<long long> a(n), p(n);
    for (long long& x : a) cin >> x;
    for (int i = 0; i < n; ++i)
        p[i] = (i == 0 ? 0 : p[i - 1]) + a[i];
    auto query = [&](int l, int r) {
        return p[r] - (l == 0 ? 0 : p[l - 1]); };

    static long long dp[MAXN][MAXN];
    memset(dp, 0x3f, sizeof(dp));
    for (int i = 0; i + 1 < n; ++i) dp[i][i + 1] = a[i] + a[i + 1];
    for (int i = 0; i < n; ++i) dp[i][i] = 0;
    for (int i = n - 1; i >= 0; --i)
        for (int j = i + 2; j < n; ++j) {
            long long best = INF;
            for (int k = i; k < j; ++k)
                best = min(best, dp[i][k] + dp[k + 1][j]);
            dp[i][j] = query(i, j) + best;
        }
    cout << dp[0][n - 1] << '\n';
}

O - Matching

Time Complexity: O(N2N)\mathcal{O}(N\cdot 2^N)

If we define dp[S]\texttt{dp}[S] to be the number of matchings for females in the set SS to the first S|S| males, this problem boils down to the following:

dp[S]=dp[S\x]:compatible[S][x]\texttt{dp}[S] = \sum \texttt{dp}[S\backslash x]: \texttt{compatible}[|S|][x]

(The : stands for "such that".) In English, this is equivalent to the following:

The number of matchings in a subset SS to include a certain female xx is equivalent to the sum of all the matchings without female xx where female xx is compatible with the S|S|-th male.

Our base case is the empty set, which has a value of 11 (the empty set can be considered as a single matching involving zero pairs).

#include <bits/stdc++.h>

using namespace std;

const int MOD = 1e9 + 7;
const int MAX_N = 21;

bool compat[MAX_N][MAX_N];
int dp[1 << MAX_N];

int main() {
    int N;
    cin >> N;
    for (int i = 0; i < N; ++i) {
        for (int j = 0; j < N; ++j) {
            cin >> compat[i][j];
        }
    }

    dp[0] = 1;

    for (int s = 0; s < (1 << N); s++) {
        int pair_num = __builtin_popcount(s);
        for (int w = 0; w < N; w++) {
            /*
             * check that
             * 1. this woman hasn't been paired already
             * 2. she's also compatible with the {pair_num + 1}th man
             */
            if ((s & (1 << w)) || !compat[pair_num][w])
                continue;

            // add the amount to future dp states
            dp[s | (1 << w)] += dp[s];
            dp[s | (1 << w)] %= MOD;
        }
    }

    cout << dp[(1 << N) - 1] << endl;
}

P - Independent Set

Time Complexity: O(N)\mathcal{O}(N)

First, lets root the tree arbitrary. Let dp[i][j]\texttt{dp}[i][j] represent the number of ways to color subtree ii, coloring node ii color jj.

Here, j=0j=0 is black and j=1j=1 is white. If we color node ii black, then it's children must be all white. So, we have

dp[i][0]=dp[c][1]\texttt{dp}[i][0]=\prod \texttt{dp}[c][1]

such that cc is ii's child, and if we color node ii white, then it's children can be either white or black:

dp[i][1]=(dp[c][0]+dp[c][1])\texttt{dp}[i][1]=\prod({\texttt{dp}[c][0] + \texttt{dp}[c][1]})
#include <bits/stdc++.h>

using namespace std;

const int MAXN = 1e5;
const int MOD = 1e9 + 7;

int add(int i, int j) {
    if ((i += j) >= MOD)
        i -= MOD;
    return i;
}
int mul(int i, int j) {
    return int((long long) i * j % MOD);
}

vector<int> adj[MAXN];
int dp[MAXN][2];

void dfs(int i, int p) {
    dp[i][0] = dp[i][1] = 1;
    for (int j : adj[i]) if (j != p) {
        dfs(j, i);
        dp[i][0] = mul(dp[i][0], dp[j][1]);
        dp[i][1] = mul(dp[i][1], add(dp[j][0], dp[j][1]));
    }
}

int main() {
    cin.tie(0)->sync_with_stdio(0);

    int n;
    cin >> n;
    for (int i = 0, a, b; i + 1 < n; ++i) {
        cin >> a >> b, --a, --b;
        adj[a].push_back(b), adj[b].push_back(a);
    }
    dfs(0, 0);
    cout << add(dp[0][0], dp[0][1]) << '\n';
}

Q - Flowers

Time Complexity: O(NlogN)\mathcal{O}(N\log N)

Let dp[i]\texttt{dp}[i] represent the maximum beauty of a subsequence of flowers ending at ii.

dp[i]=max(dp[j])+ai\texttt{dp}[i]=\max(\texttt{dp}[j]) + a_i

such that j<ij < i and hj<hih_j < h_i.

However, looping through all values of jj would result in a complexity of O(N2)\mathcal O(N^2), which would TLE.

We can use a data-structure to speed this up. Notice that when calculating dp[i]\texttt{dp}[i], we're querying the maximum dp[j]\texttt{dp}[j] such that hj<hih_j < h_i and adding aia_i.

In other words, we're querying the maximum dp[j]\texttt{dp}[j] which hj[1,hi1]h_j\in[1, h_i - 1], which can be handled with a data-structure like a segment tree or a binary indexed tree.

#include <bits/stdc++.h>

using namespace std;

const int MAXN = 2e5;

int n;
long long bit[MAXN + 1];

void update(int i, long long x) {
    for ( ; i <= n; i += i & -i)
        bit[i] = max(bit[i], x);
}

long long query(int i) {
    long long ans = 0;
    for ( ; i > 0; i -= i & -i)
        ans = max(ans, bit[i]);
    return ans;
}

int main() {
    cin.tie(0)->sync_with_stdio(0);

    cin >> n;
    vector<int> h(n), a(n);
    for (int i = 0; i < n; ++i) cin >> h[i];
    for (int i = 0; i < n; ++i) cin >> a[i];
    for (int i = 0; i < n; ++i) {
        update(h[i], query(h[i] - 1) + a[i]);
    }
    cout << query(n) << '\n';
}

R - Walk

We're given a adjacency matrix aa and we want to calculate the number of paths of length kk. First, lets convert aa into a Matrix, mm.

Notice that when we multiply mm by mm, we get the number of paths with length 22. If we take mpm^p, we end up with the number of paths of length pp from each ii to jj.

Thus, we're looking for mkm^k, which can be calculated with binary exponentiation.

Time Complexity: O(N3logK)\mathcal{O}(N^3\log K)

#include <bits/stdc++.h>

using namespace std;

const int MOD = 1e9 + 7;

int add(int i, int j) {
    if ((i += j) >= MOD)
        i -= MOD;
    return i;
}
int mul(int i, int j) {
    return int((long long) i * j % MOD);
}

template<class T> struct Matrix {
    T **mat; int a, b;

    Matrix() : a(0), b(0) {}
    Matrix(int a_, int b_) : a(a_), b(b_) {
        int i, j;

        mat = new T*[a];
        for (i = 0; i < a; ++i) {
            mat[i] = new T[b];
            for (j = 0; j < b; ++j)
                mat[i][j] = 0;
        }
    }
    Matrix(const vector<vector<T>>& vt) {
        int i, j;

        *this = Matrix((int) vt.size(), (int) vt[0].size());
        for (i = 0; i < a; ++i)
            for (j = 0; j < b; ++j)
                mat[i][j] = vt[i][j];
    }
    Matrix operator*(const Matrix& m) {
        int i, j, k;

        assert(b == m.a);
        Matrix r(a, m.b);
        for (i = 0; i < a; ++i)
            for (j = 0; j < m.b; ++j)
                for (k = 0; k < b; ++k)
                    r.mat[i][j] = add(r.mat[i][j],
                        mul(mat[i][k], m.mat[k][j]));
        return r;
    }
    Matrix& operator*=(const Matrix& m) {
        return *this = (*this) * m;
    }
    friend Matrix power(Matrix m, long long p) {
        int i;

        assert(m.a == m.b);
        Matrix r(m.a, m.b);
        for (i = 0; i < m.a; ++i)
            r.mat[i][i] = 1;
        for ( ; p > 0; p >>= 1, m *= m)
            if (p & 1)
                r *= m;
        return r;
    }
};

int main() {
    cin.tie(0)->sync_with_stdio(0);

    int n; long long k;
    cin >> n >> k;
    vector<vector<int>> adj(n, vector<int>(n));
    for (int i = 0; i < n; ++i)
        for (int j = 0; j < n; ++j)
            cin >> adj[i][j];
    Matrix<int> mat(adj);
    mat = power(mat, k);
    int ans = 0;
    for (int i = 0; i < n; ++i)
        for (int j = 0; j < n; ++j)
            ans = add(ans, mat.mat[i][j]);
    cout << ans << '\n';
}

S - Digit Sum

Time Complexity: O(KD)\mathcal{O}(|K|D)

We can use digit dp to solve this problem. Let dp[i][j]\texttt{dp}[i][j] represent the number of ways to create a length K|K| number which is a multiple of DD given that the ii digits is fixed and has a sum equivalent to j(modD)j \pmod D.

To find the answer, we will loop through all prefixes of KK and calculate the number of ways to create the remaining suffix, which can be mapped to a dp state.

#include <bits/stdc++.h>

using namespace std;

const int MAXL = 10000;
const int MAXD = 100;
const int MOD = 1e9 + 7;

int dp[MAXL + 1][MAXD];

int add(int i, int j) {
    if ((i += j) >= MOD)
        i -= MOD;
    return i;
}

void init(int l, int d) {
    dp[l][0] = 1;
    for (int i = l - 1; i >= 0; --i)
        for (int j = 0; j < d; ++j)
            for (int k = 0; k < 10; ++k)
                dp[i][j] = add(dp[i][j], dp[i + 1][(j + k) % d]);
}

int query(const string& s, int d) {
    int ans = 0, carry = 0;
    for (int i = 0; i < (int) s.length(); ++i) {
        for (int j = 0; j < (s[i] - '0'); ++j)
            ans = add(ans, dp[i + 1][(carry + j) % d]);
        carry = (carry + (s[i] - '0')) % d;
    }
    if (carry == 0) ans++;
    if (--ans < 0) ans += MOD;
    return ans;
}

int main() {
    cin.tie(0)->sync_with_stdio(0);

    string s; int d;
    cin >> s >> d;
    init((int) s.size(), d);
    cout << query(s, d) << '\n';
}

T - Permutation

Time Complexity: O(N2)\mathcal{O}(N^2)

Let dp[i][j]\texttt{dp}[i][j] represent the number of ways to create a permutation with length i+1i+1 the first ii signs, using jj as the last element of the permutation.

If s[i]s[i] is >>, then, dp[i][j]=dp[i1][k]\texttt{dp}[i][j]=\sum{\texttt{dp}[i-1][k]} for all kjk\geq j.

Otherwise, dp[i][j]=dp[i1][k]\texttt{dp}[i][j]=\sum{\texttt{dp}[i-1][k]} for all k<jk < j. This can be sped up to O(N2)\mathcal O(N^2) with prefix sums.

#include <bits/stdc++.h>

using namespace std;

const int MAXN = 3000;

const int MOD = 1e9 + 7;

int add(int i, int j) {
    if ((i += j) >= MOD)
        i -= MOD;
    return i;
}
int sub(int i, int j) {
    if ((i -= j) < 0)
        i += MOD;
    return i;
}

int main() {
    cin.tie(0)->sync_with_stdio(0);

    int n;
    string s;

    cin >> n >> s;
    static int dp[MAXN + 1], dq[MAXN + 1];
    dp[1] = 1;
    for (int i = 1; i < n; ++i) {
        for (int j = 1; j <= i + 1; ++j)
            dp[j] = add(dp[j], dp[j - 1]);
        for (int j = 1; j <= i + 1; ++j)
            if (s[i - 1] == '>')
                dq[j] = sub(dp[i], dp[j - 1]);
            else
                dq[j] = dp[j - 1];
        swap(dp, dq);
    }
    int ans = 0;
    for (int i = 1; i <= n; ++i)
        ans = add(ans, dp[i]);
    cout << ans << '\n';
}

U - Grouping

Time Complexity: O(3N+2NN2)\mathcal{O}(3^N + 2^N\cdot N^2)

Let cost[S]\texttt{cost}[S] represent the score of assigning all Rabbits in set SS into the same group. This can be calculated in O(2NN2)\mathcal O(2^N \cdot N^2).

Now, we calculate dp[S]\texttt{dp}[S], the maximum score of assigning the rabbits in set SS. To calculate dp[S]\texttt{dp}[S], we have to loop through all subsets of SS and simulate putting all the rabbits in the subset in the same group. Fortunately, you can do this in O(3N)\mathcal O(3^N) (see https://cp-algorithms.com/algebra/all-submasks.html#toc-tgt-1 ), which runs in time for N16N\leq 16.

#include <bits/stdc++.h>
using namespace std;

const int MAXN = 16;

int main() {
    cin.tie(0)->sync_with_stdio(0);

    int n;
    static int a[MAXN][MAXN];
    static long long cost[1 << MAXN];
    static long long dp[1 << MAXN];
    cin >> n;
    for (int i = 0; i < n; ++i)
        for (int j = 0; j < n; ++j)
            cin >> a[i][j];
    for (int i = 0; i < (1 << n); ++i)
        for (int j = 0; j < n; ++j)
            for (int k = j + 1; k < n; ++k)
                if (i & (1 << j) && i & (1 << k))
                    cost[i] += a[j][k];
    for (int i = 0, j; i < (1 << n); ++i) {
        j = ((1 << n) - 1) ^ i;
        for (int k = j; k; k = (k - 1) & j)
            dp[i ^ k] = max(dp[i ^ k], dp[i] + cost[k]);
    }
    cout << dp[(1 << n) - 1] << '\n';
}

V - Subtrees

This explains it pretty well.

W - Intervals

Let dp[i]\texttt{dp}[i] represent the maximum score of a string with the iith bit turned on.

For our relation, we have

dp[i]=max(dp[j]+cost(i,j))\texttt{dp}[i] = \max(\texttt{dp}[j] + \texttt{cost}(i, j))

such that j<ij < i and cost(i,j)\texttt{cost}(i, j) contains the sum of all aa such that lirl\leq i\leq r but not ljrl\leq j\leq r.

This works, in O(N2)\mathcal O(N^2), which unfortunately, TLEs.

However, we can use a segment tree to speed this up. To calculate dp[i]\texttt{dp}[i], we can query the maximum of [0,i1][0, i - 1] in the segment tree. Now, we just need to support activating/deactivating each query.

  • When we reach a ll, we add aa to each dp value in the range [0,a1][0, a - 1]
  • When we reach a rr, we subtract aa from each dp value in the range [0,a1][0, a - 1]

Now, we can just query the maximum of dp[0i1]\texttt{dp}[0\dots i-1] and update it into our segment tree!

#include <bits/stdc++.h>
using namespace std;

const long long INF = 0x3f3f3f3f3f3f3f3f;

template<class T> struct LazySeg {
    int sz; vector<T> st, lz;

    T comb(T a, T b) {
        return max(a, b);
    }
    void init(int n) {
        sz = 1; while (sz < n) sz <<= 1;
        st.assign(2 * sz, 0), lz.assign(2 * sz, 0);
    }
    void pull(int i) {
        st[i] = comb(st[i << 1], st[i << 1 | 1]);
    }
    void push(int i, int l, int r) {
        st[i] += lz[i];
        if (l != r) lz[i << 1] += lz[i], lz[i << 1 | 1] += lz[i];
        lz[i] = 0;
    }
    void update(int ql, int qr, T x, int i, int l, int r) {
        push(i, l, r); if (ql > r || qr < l) return;
        if (ql <= l && qr >= r) { lz[i] += x; return void(push(i, l, r)); }
        int m = (l + r) >> 1; update(ql, qr, x, i << 1, l, m);
        update(ql, qr, x, i << 1 | 1, m + 1, r); pull(i);
    }
    void update(int ql, int qr, T x) {
        update(ql, qr, x, 1, 0, sz - 1);
    }
    T query(int ql, int qr, int i, int l, int r) {
        push(i, l, r); if (ql > r || qr < l) return -INF;
        if (ql <= l && qr >= r) return st[i]; int m = (l + r) >> 1;
        return comb(query(ql, qr, i << 1, l, m), query(ql, qr, i << 1 | 1, m + 1, r));
    }
    T query(int ql, int qr) {
        return query(ql, qr, 1, 0, sz - 1);
    }
};

LazySeg<long long> st;

int main() {
    cin.tie(0)->sync_with_stdio(0);

    int n, m;
    cin >> n >> m;
    vector<long long> add(n + 2);
    vector<vector<array<int, 2>>> todo(n + 2);
    while (m--) {
        int l, r, a;
        cin >> l >> r >> a;
        add[l] += a;
        todo[r + 1].push_back({a, l});
    }
    st.init(n + 1);
    for (int i = 1; i <= n + 1; ++i) {
        st.update(0, i - 1, add[i]);
        for (array<int, 2> ar : todo[i])
            st.update(0, ar[1] - 1, -ar[0]);
        st.update(i, i, st.query(0, i - 1));
    }
    cout << st.query(0, n) << '\n';
}

X - Towers

Notice that the optimal sorting of elements is by the sum of their weight and strength.

Consider two orderings for blocks aa and bb.

  • If the tower aa is on the bottom, then the strength of the tower is
sawbs_a - w_b
  • If the tower bb is on the bottom, then the strength of the tower is
sbwas_b - w_a

This produces the equation

sawb=?sbwas_a - w_b \overset{?}{=} s_b - w_a
sa+wa=?sb+wbs_a + w_a \overset{?}{=} s_b + w_b

Obviously, we want to maximize the strength of our tower, so put the one with less si+wis_i + w_i on the top.

Then, define dp[i]\texttt{dp}[i] as the maximum value we can achieve for a weight of ii. We process the tower top-down, only calculating weights j:jsij : j \le s_i since any more would break the tower. Our relation is then:

dp[j+w[x]]=max(j+dp[w[x]],dp[j]+v[x])\texttt{dp}[j + \texttt{w}[x]] = \max(j + \texttt{dp}[\texttt{w}[x]], \texttt{dp}[j] + \texttt{v}[x])
#include <bits/stdc++.h>

using namespace std;

const int MAX_N = 1e3+1;

long long dp[20001];
int w[MAX_N], s[MAX_N], v[MAX_N], p[MAX_N];

int main() {
    cin.tie(0)->sync_with_stdio(0);

    int N; cin >> N;
    for(int i = 0; i < N; ++i) cin >> w[i] >> s[i] >> v[i];

    iota(p, p + N, 0);

    sort(p, p + N, [&](const int &a, const int &b) {
        return w[a] + s[a] < w[b] + s[b];
    });

    for(int i = 0; i < N; ++i) {
        int x = p[i];
        for(int j = s[x]; j >= 0; --j) {
            dp[j + w[x]] = max(dp[j + w[x]], dp[j] + v[x]);
        }
    }

    cout << *max_element(dp, dp + 20001) << endl;
}

Y - Grids 2

Time Complexity: O(H+W+N2)\mathcal O(H+W+N^2)

First, note that the number of ways to walk from cell (i,j)(i, j) to (x,y)(x, y) is (xi+yjxi){x-i+y-j}\choose{x-i}, which can be calculated in O(1)\mathcal O(1) if you precompute factorials.

Let dp[i]\texttt{dp}[i] represent the number of ways to walk from (1,1)(1, 1) to the ith square without passing any other squares other than the ith one.

To calculate this, note that if there are no squares in between (1,1)(1, 1) and (ri,ci)(r_i, c_i), then there are a total of (ri1+ci1ri1){r_i-1+c_i-1}\choose{r_i-1} ways to do this.

We can subtract this value from the number of ways to pass through a square in the path from (1,1)(1, 1) to (ri,ci(r_i, c_i). This can be done by looping through all squares jj such that rjrir_j\leq r_i and cjcic_j\leq c_i.

To calculate the answer, we can simply add an extra square at (H,W)(H, W) and take the resulting dp\texttt{dp} value.

#include <bits/stdc++.h>
using namespace std;

#define N   100000
#define M   3000
#define MOD 1000000007

int n, m, k, dp[M + 1];
long long fac[N << 1], ifac[N << 1];
struct P {
    int x, y;
    bool operator<(const P& other) const {
        if (x == other.x) return y < other.y;
        return x < other.x;
    }
} point[M + 1];

int power(long long x, int p) {
    long long res = 1;
    while (p > 0) {
        if (p & 1) res = res * x % MOD;
        x = x * x % MOD, p >>= 1;
    }
    return res;
}

void genFac(int size) {
    fac[0] = 1;
    for (int i = 1; i <= size; ++i)
        fac[i] = fac[i - 1] * i % MOD;
    ifac[size] = power(fac[size], MOD - 2);
    for (int i = size; i >= 1; --i)
        ifac[i - 1] = ifac[i] * i % MOD;
}
int choose(int n, int k) {
    assert(n >= k);
    return fac[n] * ifac[k] % MOD * ifac[n - k] % MOD;
}
int path(int i, int j, int x, int y) {
    assert(i <= x && j <= y);
    return choose(x - i + y - j, x - i);
}

int main() {
    scanf("%d%d%d", &n, &m, &k);
    genFac(n + m - 1);
    for (int i = 0; i < k; ++i)
        scanf("%d%d", &point[i].x, &point[i].y);
    point[k] = {n, m};
    sort(point, point + k + 1);
    for (int i = 0; i <= k; ++i) {
        long long sum = path(1, 1, point[i].x, point[i].y);
        for (int j = 0; j < i; ++j)
            if (point[i].y - point[j].y >= 0) {
                sum = sum - (long long) dp[j] * path(point[j].x, point[j].y, point[i].x, point[i].y) % MOD;
                if (sum < 0) sum += MOD;
            }
        dp[i] = sum;
    }
    printf("%d\n", dp[k]);
}

Z - Frog 3

Time Complexity: O(N)\mathcal O(N)

Lets think of a O(N2)\mathcal O(N^2) dp first. Let dp[i]\texttt{dp}[i] represent the minimum cost to travel from stone 11 to stone ii. We have the following recurrence:

dp[i]=max(dp[j]+(hihj)2+C)\texttt{dp}[i]=\max(\texttt{dp}[j] + (h_i-h_j)^2 + C)

Lets expand the equation take out the constants and we have

dp[i]=C+hi2+max(dp[j]+2hihj+hj2)\texttt{dp}[i]=C+h_i^2 + \max(\texttt{dp}[j] + -2h_ih_j+h_j^2)

dp[j]+2hihj+hj2\texttt{dp}[j] + -2h_ih_j+h_j^2 can be rerwritten as (2hj)x+(dp[j]+hj2)(-2h_j)x+(\texttt{dp}[j] + h_j^2) where x=hix=h_i. This forms a line (mx+bmx+b). Now, we just want to query the minimum value with x=hix=h_i. This can be achieved with Convex Hull Trick.

#include <bits/stdc++.h>
using namespace std;

using ll = long long;
struct Line {
    mutable ll k, m, p;
    bool operator<(const Line& o) const { return k < o.k; }
    bool operator<(ll x) const { return p < x; }
    long long eval(long long x) const { return k * x + m; }
};

struct CHT {
    deque<Line> hull;
    static const ll inf = LLONG_MAX;

    ll div(ll a, ll b) { // floored division
        return a / b - ((a ^ b) < 0 && a % b); }
    bool isect(Line &x, const Line &y) {
        if (x.k == y.k) x.p = x.m > y.m ? inf : -inf;
        else x.p = div(y.m - x.m, x.k - y.k);
        return x.p >= y.p;
    }
    void add(long long k, long long m) {
        Line L = {k, m, 0};
        while ((int) hull.size() >= 2 && (isect(L, hull.back()),
            isect(hull.back(), hull[(int) hull.size() - 2]), L.p < hull.back().p))
            hull.pop_back();
        hull.push_back(L);
    }
    long long query(long long x) {
        while ((int) hull.size() >= 2 && hull[0].eval(x) >= hull[1].eval(x))
            hull.pop_front();
        return hull[0].eval(x);
    }
};

int main() {
    cin.tie(0)->sync_with_stdio(0);

    int n; long long c;
    cin >> n >> c;
    vector<long long> h(n), dp(n);
    for (long long&x : h) cin >> x;

    CHT ch;
    auto ins = [&](int i) {
        ch.add(-h[i] * 2, (dp[i] + (h[i] * h[i])));
    };
    dp[0] = 0; ins(0);
    for (int i = 1; i < n; ++i) {
        long long x = ch.query(h[i]);
        dp[i] = c + (h[i] * h[i]) + x;
        ins(i);
    }
    cout << dp[n - 1] << '\n';
}
🌓