uLipSync の Timeline エディタのパフォーマンス改善を行ってみた

はじめに

先日、uLipSync のタイムラインの機能で長いデータを割り当てるとパフォーマンスが低下する、とのコメントを頂きました。実際に 3 分程のクリップを割り当ててみると顕著にパフォーマンスが落ちています。

原因を調べてみると、どうやら波形描画に用いている AudioCurveRendering の実行が重いようです。これはアンチエイリアスつきの波形描画を行ってくれるものなのですが、テクスチャを返す API ではなく、指定した領域（Rect）に波形画像を描画するものになります。そのためキャッシュもできず、描画更新が起こるタイミングで毎回生成処理が走ってしまいます。

そこで、今回はこのテクスチャを自前で生成することで処理の改善を試みてみました。愚直な Texture2D.SetPixels() を使う方式から Texture2D.GetPixelData() の利用、そして Job + Burst 化まで行います。本エントリは解説というよりは作業の備忘録になります。

ダウンロード

github.com

問題点

以前はこんな感じで Timeline のエディタ描画の更新があるたびに AudioUtil.GetMinMaxData() および AudioCurveRendering.DrawMinMaxFilledCurve() が呼ばれていました。

CustomTimelineEditor(typeof(uLipSyncClip))]
public class uLipSyncClipTimelineEditor : ClipEditor
{
    ...
    public override void DrawBackground(TimelineClip clip, ClipBackgroundRegion region)
    {
        var ls = clip.asset as uLipSyncClip;
        var data = ls.bakedData;
        ...
        EditorUtil.DrawWave(rect, data.audioClip, new EditorUtil.DrawWaveOption()
        {
            // ... 色を付ける処理
        });
    }
}


public static class EditorUtil
{
    ...
    public class DrawWaveOption
    {
        public System.Func<float, Color> colorFunc;
        public float waveScale;
    }

    public static void DrawWave(Rect rect, AudioClip clip, DrawWaveOption option)
    {
        ...
        // 波形取得
        var minMaxData = AudioUtil.GetMinMaxData(clip);
        ...
        AudioCurveRendering.AudioMinMaxCurveAndColorEvaluator dlg = delegate(
            float x, 
            out Color col, 
            out float minValue, 
            out float maxValue)
        {
            col = option.colorFunc(x);
            ...
            minValue = ...;
            maxValue = ...;
            ...
        };

        // 描画処理
        AudioCurveRendering.DrawMinMaxFilledCurve(rect, dlg);
    }
}

描画の更新は例えば Timeline 再生中などは毎フレーム行われています。また内部では uLipSync.BakedData を参照してそれぞれの時間にどのような音素があるかによって色付けをしています。これが結構重く、3 分程度の歌のようなデータを描画している場合は私の環境では 20 fps 程度までパフォーマンスが低下してしまっていました。

docs.unity3d.com

このように Timeline など毎フレーム更新しうる場所に表示する項目の計算が重い場合は他にも色々ありそうですね。

対処法

以前、Timeline の装飾の記事でも書いたのですが、生成したテクスチャをキャッシュすることでパフォーマンスを改善することができます。

tips.hecomi.com

要は先程は毎回テクスチャ生成処理が走ってしまっていたのですが、これを変更がない限り使い回す、というものです。タイムラインのズーム率が変わって横幅が変わったり、データが変更されて波形が変わるタイミングでテクスチャを再生成するようにしておきます。

ランタイムでももしかしたら使う機会があるかもしれないと思い、Texture2D の生成関数を BakedData に次のように追加してみました。

[System.Serializable]
public struct BakedFrame
{
    public float volume;
    public List<BakedPhonemeRatio> phonemes;
    ...
}

...
public class BakedData : ScriptableObject
{
    public BakedFrame GetFrame(float t)
    {
        ...
    }

    ...
    public static Color[] phonemeColors = new Color[]
    {
        Color.red,
        Color.cyan,
        Color.yellow,
        Color.magenta,
        Color.green,
        Color.blue,
        Color.gray,
    };

    public Texture2D CreateTexture(int width, int height)
    {
        ...
        var colors = new Color[width * height];
        var currentColor = new Color();
        var smooth = 0.15f;

        for (int x = 0; x < width; ++x)
        {
            var t = (float)x / width * duration;
            var frame = GetFrame(t);
            var targetColor = new Color();

            for (int i = 0; i < frame.phonemes.Count; ++i)
            {
                var colorIndex = i % phonemeColors.Length;
                targetColor += phonemeColors[colorIndex] * frame.phonemes[i].ratio;
            }

            currentColor += (targetColor - currentColor) * smooth;

            for (int y = 0; y < height; ++y)
            {
                var index = width * y + x;
                var color = currentColor;
                var dy = ((float)y - height / 2f) / (height / 2f);
                dy = Mathf.Abs(dy);
                dy = Mathf.Pow(dy, 2f);
                color.a = dy > frame.volume ? 0f : 1f;
                colors[index] = color;
            }
        }

        var tex = new Texture2D(width, height);
        tex.SetPixels(colors);
        tex.Apply();

        return tex;
    }
}

まずはテクスチャ生成は愚直な Color[] 配列を Texture2D.SetPixels() する方式で書いてみました。テクスチャは音量で適当に上下を大きくして音素で色付けするコードが書いてあります。

この生成したテクスチャをキャッシュしながら表示する場所は ClipEditor 側で行います。少し長いですが次のように初回、クリップが変化したとき、ズーム率などで横幅や縦幅が大きく変化したときにテクスチャを再生成するようにしています。

[CustomTimelineEditor(typeof(uLipSyncClip))]
public class uLipSyncClipTimelineEditor : ClipEditor
{
    internal class TextureCache
    {
        public Texture2D texture;
        public bool forceUpdate = false;
    }

    Dictionary<uLipSyncClip, TextureCache> _textures 
        = new Dictionary<uLipSyncClip, TextureCache>();

    void RemoveCachedTexture(uLipSyncClip clip)
    {
        if (!_textures.ContainsKey(clip)) return;

        var cache = _textures[clip];
        Object.DestroyImmediate(cache.texture);
        _textures.Remove(clip);
    }

    Texture2D CreateCachedTexture(
        uLipSyncClip clip, 
        int width, 
        int height)
    {
        RemoveCachedTexture(clip);

        var data = clip.bakedData;
        if (!data) return null;

        width = Mathf.Clamp(width, 128, 4096);
        var tex = data.CreateTexture(width, height);
        var cache = new TextureCache { texture = tex };
        _textures.Add(clip, cache);

        return tex;
    }

    Texture2D GetOrCreateCachedTexture(
        uLipSyncClip clip, 
        int width, 
        int height)
    {
        if (!_textures.ContainsKey(clip))
        {
            return CreateCachedTexture(clip, width, height);
        }

        var cache = _textures[clip];
        if (cache.forceUpdate)
        {
            return CreateCachedTexture(clip, width, height);
        }

        var dw = Mathf.Abs(cache.texture.width - width);
        var dh = Mathf.Abs(cache.texture.height - height);
        if (dw > 10 || dh > 10)
        {
            return CreateCachedTexture(clip, width, height);
        }

        return cache.texture;
    }

    public override void DrawBackground(
        TimelineClip clip, 
        ClipBackgroundRegion region)
    {
        DrawBackground(region);
        DrawWave(clip, region);
    }

    void DrawBackground(ClipBackgroundRegion region)
    {
        EditorUtil.DrawBackgroundRect(
            region.position, 
            new Color(0f, 0f, 0f, 0.3f), 
            Color.clear);
    }

    void DrawWave(
        TimelineClip timelineClip, 
        ClipBackgroundRegion region)
    {
        var clip = timelineClip.asset as uLipSyncClip;
        var data = clip.bakedData;
        if (!data) return;

        var audioClip = data.audioClip;
        if (!audioClip) return;

        var rect = region.position;
        var duration = region.endTime - region.startTime;
        var width = (float)(rect.width * audioClip.length / duration);
        var left = Mathf.Max(
            (float)timelineClip.clipIn, 
            (float)region.startTime);
        var offset = (float)(width * left / audioClip.length);
        rect.x -= offset;
        rect.width = width;

        var tex = GetOrCreateCachedTexture(
            clip, 
            (int)rect.width, 
            (int)rect.height);
        if (!tex) return;

        GUI.DrawTexture(rect, tex);
    }

    public override void OnClipChanged(TimelineClip timelineClip)
    {
        var clip = timelineClip.asset as uLipSyncClip;

        if (!_textures.ContainsKey(clip)) return;

        _textures[clip].forceUpdate = true;
    }
}

これで見た目もそこそこにパフォーマンス改善することができました。

`Texture2D.GetPixelData()` 方式

Color[] に詰めて SetPixels() するのもオーバーヘッドがあるので、Texture2D.GetPixelData() を使ってみます。これは直接カラーバッファを書き換え可能な　 NativeArray を返してくれる便利関数です。

docs.unity3d.com

以下のように書けば古い Unity では SetPixels() を、対応しているバージョンでは Texture2D.GetPixelData() を使うようにできます。

public Texture2D CreateTexture(int width, int height)
{
    ...
    var tex = new Texture2D(width, height);
#if UNITY_2020_1_OR_NEWER
    var colors = tex.GetPixelData<Color32>(0);
#else
    var colors = new Color32[width * height];
#endif
    ...
    for (int x = 0; x < width; ++x)
    {
        ...
        colors[index] = color;
        ...
    }

#if !UNITY_2020_1_OR_NEWER
    tex.SetPixels(colors);
#endif
    tex.Apply();

    return tex;
}

ジョブ化

この Texture2D.GetPixelData() とジョブを組み合わせた公式サンプルが以下に公開されています。

github.com

これを元に Burst + Job 化をしてみましょう。ちょっと uLipSync 特有コードが多くあまり参考にならないかもしれませんが...。

using Unity.Burst;
using Unity.Collections;
using Unity.Jobs;
using Unity.Mathematics;
...

[BurstCompile]
...
public class BakedData : ScriptableObject
{
    ...
    [BurstCompile]
    struct CreateTextureJob : IJob
    {
        [WriteOnly] public NativeArray<Color32> texColors;

        [DeallocateOnJobCompletion][ReadOnly] 
        public NativeArray<Color> phonemeColors;

        [DeallocateOnJobCompletion][ReadOnly] 
        public NativeArray<float> phonemeRatios;

        [DeallocateOnJobCompletion][ReadOnly] 
        public NativeArray<float> volumes;

        [ReadOnly] public int width;
        [ReadOnly] public int height;
        [ReadOnly] public int phonemeCount;
        [ReadOnly] public float smooth;

        public void Execute()
        {
            var currentColor = new Color();

            for (int x = 0; x < width; ++x)
            {
                var targetColor = new Color();
                for (int i = 0; i < phonemeCount; ++i)
                {
                    var colorIndex = i % phonemeColors.Length;
                    var ratioIndex = x * phonemeCount + i;
                    var color = phonemeColors[colorIndex];
                    float ratio = phonemeRatios[ratioIndex];
                    targetColor += color * ratio;
                }

                currentColor += (targetColor - currentColor) * smooth;

                for (int y = 0; y < height; ++y)
                {
                    var index = width * y + x;
                    var color = currentColor;
                    var dy = ((float)y - height / 2f) / (height / 2f);
                    dy = math.abs(dy);
                    dy = math.pow(dy, 2f);
                    color.a = dy > volumes[x] ? 0f : 1f;
                    texColors[index] = color;
                }
            }
        }
    }

    public Texture2D CreateTexture(int width, int height)
    {
        if (!isValid) return Texture2D.whiteTexture;

        var tex = new Texture2D(width, height);
        var texColors = tex.GetPixelData<Color32>(0);
        var phonemeColorsTmp = new NativeArray<Color>(
            phonemeColors, 
            Allocator.TempJob);
        int phonemeCount = frames[0].phonemes.Count;
        var phonemeRatiosTmp = new NativeArray<float>(
            width * phonemeCount, 
            Allocator.TempJob);
        var volumesTmp = new NativeArray<float>(
            width, 
            Allocator.TempJob);

        for (int x = 0; x < width; ++x)
        {
            var t = (float)x / width * duration;
            var frame = GetFrame(t);
            for (int i = 0; i < phonemeCount; ++i)
            {
                int index = x * phonemeCount + i;
                phonemeRatiosTmp[index] = frame.phonemes[i].ratio;
            }
            volumesTmp[x] = frame.volume;
        }

        var job = new CreateTextureJob()
        {
            texColors = texColors,
            phonemeColors = phonemeColorsTmp,
            phonemeRatios = phonemeRatiosTmp,
            volumes = volumesTmp,
            width = width,
            height = height,
            phonemeCount = phonemeCount,
            smooth = 0.15f,
        };
        job.Schedule().Complete();

        tex.Apply();
        return tex;
    }
}