// 演算法:把相同值的下標分在一組(用 qsort 排序 {值, 下標} 配對後,
// 相同值會自然相鄰)。再對每組用前綴和,讓位置 m 的答案在 O(1) 內算出。
// Algorithm: sort {value, index} pairs so equal values are adjacent, then for
// each value-group sweep once using a running leftSum to get each answer in O(1).

#include <stdlib.h>   // 為了 malloc / calloc / free / qsort / for malloc, calloc, free, qsort

// 把「值 + 原下標」綁成一個型別,方便一起排序 / group value and original index together for sorting
typedef struct {
    int val;   // nums[i] 的數值 / the value
    int idx;   // 它在 nums 中的原始下標 / its original position in nums
} Pair;

// qsort 的比較函式:先按 val,再按 idx(idx 其實本來就遞增,但寫出來更安全)
// Comparator for qsort: order by val, then by idx (indices are already in order,
// but specifying it makes behavior deterministic).
static int cmp(const void *a, const void *b) {
    const Pair *pa = (const Pair*)a;   // 轉回 Pair 指標 / cast void* back to Pair*
    const Pair *pb = (const Pair*)b;
    if (pa->val != pb->val)
        // 不能直接 return pa->val - pb->val(可能溢位);用比較結果 -1/0/1 / avoid subtraction overflow
        return (pa->val > pb->val) - (pa->val < pb->val);
    return (pa->idx > pb->idx) - (pa->idx < pb->idx);
}

// LeetCode 指定的函式簽名:回傳 long long 陣列,並透過 *returnSize 回傳長度
// LeetCode-required signature: returns a heap-allocated long long array and writes length into *returnSize.
long long* distance(int* nums, int numsSize, int* returnSize) {
    *returnSize = numsSize;                                                      // 設定回傳長度 / set output length

    // calloc 會把每個元素初始化為 0,單元素組直接就是正確答案 / calloc zero-fills, which is already
    // correct for groups of size 1 (they never get overwritten).
    long long *ans = (long long*)calloc((size_t)numsSize, sizeof(long long));

    // 為每個下標建立一個 Pair,準備排序 / build one Pair per index, ready to sort
    Pair *arr = (Pair*)malloc(sizeof(Pair) * (size_t)numsSize);
    for (int i = 0; i < numsSize; i++) {
        arr[i].val = nums[i];  // 複製值 / copy the value
        arr[i].idx = i;        // 記住原本位置 / remember the original index
    }

    // 排序後,相同 val 的 Pair 會排在連續區段 / after sorting, equal values form contiguous runs
    qsort(arr, (size_t)numsSize, sizeof(Pair), cmp);

    // 用兩個指標 l、r 掃出每一段「相同值」的區間 [l, r) / sweep contiguous runs of equal values
    int l = 0;
    while (l < numsSize) {
        int r = l;
        // 把 r 推到下一個不同值的位置 / advance r past all equal values
        while (r < numsSize && arr[r].val == arr[l].val) r++;

        int k = r - l;             // 本組的元素個數 / size of this value-group

        // 先算整組下標總和 total / first compute the total of all indices in this group
        long long total = 0;
        for (int i = l; i < r; i++) total += arr[i].idx;

        // leftSum 在掃的過程中累加:目前位置左邊所有下標的和 / running sum of indices already visited
        long long leftSum = 0;
        for (int i = l; i < r; i++) {
            int m = i - l;                      // m 是在組內的位置(從 0 算起)/ position within the group
            long long pm = arr[i].idx;          // 這個元素的原下標 / original index of current element
            // 右邊的總和就是總和扣掉左邊與自己 / right-side sum is what remains after removing leftSum and pm
            long long rightSum = total - leftSum - pm;
            // 左邊貢獻:m 個 (pm - p_j) 加起來 = m*pm - leftSum / left part: m*pm minus indices before me
            long long left  = (long long)m * pm - leftSum;
            // 右邊貢獻:(k-1-m) 個 (p_j - pm) 加起來 = rightSum - (k-1-m)*pm / right part
            long long right = rightSum - (long long)(k - 1 - m) * pm;
            ans[arr[i].idx] = left + right;     // 寫回原本下標的位置 / write to the original slot
            leftSum += pm;                      // 下一輪前把自己併入 leftSum / fold current index into leftSum
        }

        l = r;   // 跳到下一組 / jump to next group
    }

    free(arr);   // 歸還 Pair 陣列的記憶體 / release the Pair buffer
    return ans;  // ans 由呼叫端釋放(LeetCode 的慣例)/ caller frees ans, per LeetCode convention
}
