/*
 * Decompiled with CFR 0.152.
 */
package com.nvidia.viper.analysis;

import com.nvidia.viper.analysis.AnalysisBase;
import com.nvidia.viper.analysis.AnalysisDescriptor;
import com.nvidia.viper.analysis.AnalysisResult;
import com.nvidia.viper.analysis.AnalysisResultGPUComputeEfficiency;
import com.nvidia.viper.analysis.AnalysisResultGPUOverlap;
import com.nvidia.viper.analysis.AnalysisResultKernelConcurrency;
import com.nvidia.viper.analysis.AnalysisTimeData;
import com.nvidia.viper.analysis.TimeRanges;
import com.nvidia.viper.model.Session;
import com.nvidia.viper.model.Timeline;
import com.nvidia.viper.model.TimelineDevice;
import com.nvidia.viper.model.TimelineIntervalKind;
import com.nvidia.viper.model.TimelineKind;
import java.util.Arrays;
import java.util.List;

public class GPUAnalysis
extends AnalysisBase {
    private static final double BAD_COMPUTE_MEMCPY_RATIO = 2.0;
    private static final double BAD_OVERLAP = 0.1;
    private static final double NO_CONCURRENCY = 0.1;

    @Override
    public boolean run(Session session, List<AnalysisResult> results, boolean generateAllResults) {
        boolean ret = true;
        if (session.getTimelines().isEmpty()) {
            results.add(new AnalysisResult(AnalysisDescriptor.GPU_UTILIZATION_NO_TIMELINE));
            ret = false;
        } else {
            List<Timeline> deviceTimelines = session.getTimelines(TimelineKind.DEVICE);
            if (deviceTimelines.isEmpty()) {
                results.add(new AnalysisResult(AnalysisDescriptor.GPU_UTILIZATION_NO_DEVICE));
            } else {
                if (generateAllResults) {
                    results.add(new AnalysisResult(AnalysisDescriptor.GPU_UTILIZATION_NO_TIMELINE));
                    results.add(new AnalysisResult(AnalysisDescriptor.GPU_UTILIZATION_NO_DEVICE));
                }
                this.runOverlapAnalysis(results, deviceTimelines, generateAllResults);
                this.runKernelConcurrentAnalysis(session, results, deviceTimelines, generateAllResults);
            }
        }
        return ret;
    }

    protected void runOverlapAnalysis(List<AnalysisResult> results, List<Timeline> deviceTimelines, boolean generateAllResults) {
        AnalysisResultGPUComputeEfficiency badComputeEfficiency = new AnalysisResultGPUComputeEfficiency(AnalysisDescriptor.GPU_COMPUTE_MEMCPY_EFFICIENCY_BAD);
        AnalysisResultGPUOverlap badOverlap = new AnalysisResultGPUOverlap(AnalysisDescriptor.GPU_COMPUTE_MEMCPY_OVERLAP_BAD);
        for (Timeline deviceTimeline : deviceTimelines) {
            long overlapTime;
            double overlap;
            long maxOverlapTime;
            TimeRanges kernelRanges = new TimeRanges();
            List<Timeline> kernelsTimelines = deviceTimeline.getDescendants(TimelineKind.KERNELS);
            for (Timeline timeline : kernelsTimelines) {
                kernelRanges.union(new TimeRanges(timeline));
            }
            TimeRanges memcpyRanges = new TimeRanges();
            List<Timeline> memcpyTimelines = deviceTimeline.getDescendants(Arrays.asList(TimelineKind.MEMCPY_DTOH, TimelineKind.MEMCPY_HTOD, TimelineKind.MEMCPY_DTOD, TimelineKind.MEMCPY_PTOP));
            for (Timeline timeline : memcpyTimelines) {
                memcpyRanges.union(new TimeRanges(timeline));
            }
            TimeRanges overlapRanges = new TimeRanges(kernelRanges);
            overlapRanges.intersect(memcpyRanges);
            long totalComputeTime = kernelRanges.getTotalTime();
            long totalNonOverlapMemcpyTime = memcpyRanges.getTotalTime() - overlapRanges.getTotalTime();
            double ratio = (double)totalComputeTime / (double)totalNonOverlapMemcpyTime;
            if (ratio <= 2.0) {
                badComputeEfficiency.addTimeline(deviceTimeline, new AnalysisTimeData(totalComputeTime, totalNonOverlapMemcpyTime));
            }
            if ((maxOverlapTime = Math.min(memcpyRanges.getTotalTime(), totalComputeTime)) <= 0L || !((overlap = (double)(overlapTime = overlapRanges.getTotalTime()) / (double)maxOverlapTime) <= 0.1)) continue;
            badOverlap.addTimeline(deviceTimeline, new AnalysisTimeData(overlapTime, maxOverlapTime));
        }
        if (badComputeEfficiency.getTimelineCount() > 0 || generateAllResults) {
            results.add(badComputeEfficiency);
        }
        if (badOverlap.getTimelineCount() > 0 || generateAllResults) {
            results.add(badOverlap);
        }
    }

    protected boolean runKernelConcurrentAnalysis(Session session, List<AnalysisResult> results, List<Timeline> deviceTimelines, boolean generateAllResults) {
        boolean ret = true;
        AnalysisResultKernelConcurrency noConcurrency = new AnalysisResultKernelConcurrency(AnalysisDescriptor.GPU_COMPUTE_OVERLAP_BAD);
        for (Timeline deviceTimeline : deviceTimelines) {
            long overlapTime;
            double overlap;
            if (!((TimelineDevice)deviceTimeline).supportsConcurrentKernels()) continue;
            List<Timeline> stls = deviceTimeline.getDescendants(TimelineKind.STREAM);
            Timeline[] streamTimelines = stls.toArray(new Timeline[0]);
            TimeRanges[] streamRanges = new TimeRanges[streamTimelines.length];
            long maxOverlapTime = 0L;
            int sidx = 0;
            while (sidx < streamRanges.length) {
                streamRanges[sidx] = new TimeRanges(streamTimelines[sidx], TimelineIntervalKind.KERNEL);
                TimeRanges memsetRanges = new TimeRanges(streamTimelines[sidx], TimelineIntervalKind.MEMSET);
                streamRanges[sidx].union(memsetRanges);
                maxOverlapTime = Math.max(maxOverlapTime, streamRanges[sidx].getTotalTime());
                ++sidx;
            }
            TimeRanges overlapRanges = new TimeRanges();
            int sidx2 = 0;
            while (sidx2 < streamRanges.length) {
                TimeRanges otherRanges = new TimeRanges();
                int oidx = 0;
                while (oidx < streamRanges.length) {
                    if (sidx2 != oidx) {
                        otherRanges.union(streamRanges[oidx]);
                    }
                    ++oidx;
                }
                TimeRanges intersect = new TimeRanges(streamRanges[sidx2]);
                intersect.intersect(otherRanges);
                overlapRanges.union(intersect);
                ++sidx2;
            }
            if (maxOverlapTime <= 0L || !((overlap = (double)(overlapTime = overlapRanges.getTotalTime()) / (double)maxOverlapTime) <= 0.1)) continue;
            noConcurrency.addTimeline(deviceTimeline, new AnalysisTimeData(overlapTime, maxOverlapTime));
        }
        if (noConcurrency.getTimelineCount() > 0 || generateAllResults) {
            results.add(noConcurrency);
        }
        return ret;
    }
}

