vscode-codeql/extensions/ql-vscode/src/log-insights/performance-comparison.ts at e718ea630c36c253846be585f2d14a4768e54874 · asgerf/vscode-codeql · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
import { createHash } from "crypto";
import type { EvaluationLogScanner } from "./log-scanner";
import type { SummaryEvent } from "./log-summary";

export interface PipelineSummary {
  steps: string[];
  /** Total counts for each step in the RA array, across all iterations */
  counts: number[];
  hash: string;
}

/**
 * Data extracted from a log for the purpose of doing a performance comparison.
 *
 * Memory compactness is important since we keep this data in memory; once for
 * each side of the comparison.
 *
 * This object must be able to survive a `postMessage` transfer from the extension host
 * to a web view (which rules out `Map` values, for example).
 */
export interface PerformanceComparisonDataFromLog {
  /**
   * Names of predicates mentioned in the log.
   *
   * For compactness, details of these predicates are stored in a "struct of arrays" style.
   *
   * All fields (except those ending with `Indices`) should contain an array of the same length as `names`;
   * details of a given predicate should be stored at the same index in each of those arrays.
   */
  names: string[];

  /** RA hash of the `i`th predicate event */
  raHashes: string[];

  /** Number of milliseconds spent evaluating the `i`th predicate from the `names` array. */
  timeCosts: number[];

  /** Number of tuples seen in pipelines evaluating the `i`th predicate from the `names` array. */
  tupleCosts: number[];

  /** Number of iterations seen when evaluating the `i`th predicate from the `names` array. */
  iterationCounts: number[];

  /** Number of executions of pipelines evaluating the `i`th predicate from the `names` array. */
  evaluationCounts: number[];

  /**
   * List of indices into the `names` array for which we have seen a cache hit.
   */
  cacheHitIndices: number[];

  /**
   * List of indices into the `names` array where the predicate was deemed empty due to a sentinel check.
   */
  sentinelEmptyIndices: number[];

  /**
   * All the pipeline runs seen for the `i`th predicate from the `names` array.
   */
  pipelineSummaryList: Array<Record<string, PipelineSummary>>;

  /** All dependencies of the `i`th predicate from the `names` array, encoded as a list of indices in `names`. */
  dependencyLists: number[][];
}

export class PerformanceOverviewScanner implements EvaluationLogScanner {
  private readonly data: PerformanceComparisonDataFromLog = {
    names: [],
    raHashes: [],
    timeCosts: [],
    tupleCosts: [],
    cacheHitIndices: [],
    sentinelEmptyIndices: [],
    pipelineSummaryList: [],
    evaluationCounts: [],
    iterationCounts: [],
    dependencyLists: [],
  };
  private readonly raToIndex = new Map<string, number>();
  private readonly mainHashToRepr = new Map<string, number>();
  private readonly nameToIndex = new Map<string, number>();

  private getPredicateIndex(name: string, ra: string): number {
    let index = this.raToIndex.get(ra);
    if (index === undefined) {
      index = this.raToIndex.size;
      this.raToIndex.set(ra, index);
      const {
        names,
        raHashes,
        timeCosts,
        tupleCosts,
        iterationCounts,
        evaluationCounts,
        pipelineSummaryList,
        dependencyLists,
      } = this.data;
      names.push(name);
      raHashes.push(ra);
      timeCosts.push(0);
      tupleCosts.push(0);
      iterationCounts.push(0);
      evaluationCounts.push(0);
      pipelineSummaryList.push({});
      dependencyLists.push([]);
    }
    return index;
  }

  getData(): PerformanceComparisonDataFromLog {
    return this.data;
  }

  onEvent(event: SummaryEvent): void {
    const { completionType, evaluationStrategy, predicateName, raHash } = event;
    if (completionType !== undefined && completionType !== "SUCCESS") {
      return; // Skip any evaluation that wasn't successful
    }

    switch (evaluationStrategy) {
      case "EXTENSIONAL": {
        break;
      }
      case "COMPUTED_EXTENSIONAL": {
        if (predicateName.startsWith("cached_")) {
          // Add a dependency from a cached COMPUTED_EXTENSIONAL to the predicate with the actual contents.
          // The raHash of the this event may appear in a CACHE_HIT events in the other event log. The dependency
          // we're adding here is needed in order to associate the original predicate with such a cache hit.
          const originalName = predicateName.substring("cached_".length);
          const originalIndex = this.nameToIndex.get(originalName);
          if (originalIndex != null) {
            const index = this.getPredicateIndex(predicateName, raHash);
            this.data.dependencyLists[index].push(originalIndex);
          }
        }
        break;
      }
      case "CACHE_HIT":
      case "CACHACA": {
        // Record a cache hit, but only if the predicate has not been seen before.
        // We're mainly interested in the reuse of caches from an earlier query run as they can distort comparisons.
        if (!this.raToIndex.has(raHash)) {
          this.data.cacheHitIndices.push(
            this.getPredicateIndex(predicateName, raHash),
          );
        }
        break;
      }
      case "SENTINEL_EMPTY": {
        const index = this.getPredicateIndex(predicateName, raHash);
        this.data.sentinelEmptyIndices.push(index);
        const sentinelIndex = this.raToIndex.get(event.sentinelRaHash);
        if (sentinelIndex != null) {
          this.data.dependencyLists[index].push(sentinelIndex); // needed for matching up cache hits
        }
        break;
      }
      case "COMPUTE_RECURSIVE":
      case "COMPUTE_SIMPLE":
      case "NAMED_LOCAL":
      case "IN_LAYER": {
        const index = this.getPredicateIndex(predicateName, raHash);
        this.nameToIndex.set(predicateName, index);
        let totalTime = 0;
        let totalTuples = 0;
        if (evaluationStrategy === "COMPUTE_SIMPLE") {
          totalTime += event.millis;
        } else {
          // Make a best-effort estimate of the total time by adding up the positive iteration times (they can be negative).
          // Note that for COMPUTE_RECURSIVE the "millis" field contain the total time of the SCC, not just that predicate,
          // but we don't have a good way to show that in the UI, so we rely on the accumulated iteration times.
          for (const millis of event.predicateIterationMillis ?? []) {
            if (millis > 0) {
              totalTime += millis;
            }
          }
        }
        const {
          timeCosts,
          tupleCosts,
          iterationCounts,
          evaluationCounts,
          pipelineSummaryList,
          dependencyLists,
        } = this.data;
        const pipelineSummaries = pipelineSummaryList[index];
        const dependencyList = dependencyLists[index];
        for (const { counts, raReference } of event.pipelineRuns ?? []) {
          // Get or create the pipeline summary for this RA
          const pipelineSummary = (pipelineSummaries[raReference] ??= {
            steps: event.ra[raReference],
            counts: counts.map(() => 0),
            hash: getPipelineHash(event.ra[raReference]),
          });
          const { counts: totalTuplesPerStep } = pipelineSummary;
          for (let i = 0, length = counts.length; i < length; ++i) {
            const count = counts[i];
            if (count < 0) {
              // Empty RA lines have a tuple count of -1. Do not count them when aggregating.
              // But retain the fact that this step had a negative count for rendering purposes.
              totalTuplesPerStep[i] = count;
              continue;
            }
            totalTuples += count;
            totalTuplesPerStep[i] += count;
          }
        }
        for (const dependencyHash of Object.values(event.dependencies ?? {})) {
          const dependencyIndex = this.raToIndex.get(dependencyHash);
          if (dependencyIndex != null) {
            dependencyList.push(dependencyIndex);
          }
        }
        // For predicates in the same SCC, add two-way dependencies with an arbitrary SCC member
        const sccHash =
          event.mainHash ??
          (evaluationStrategy === "COMPUTE_RECURSIVE" ? raHash : null);
        if (sccHash != null) {
          const mainIndex = this.mainHashToRepr.get(sccHash);
          if (mainIndex == null) {
            this.mainHashToRepr.set(sccHash, index);
          } else {
            dependencyLists[index].push(mainIndex);
            dependencyLists[mainIndex].push(index);
          }
        }
        timeCosts[index] += totalTime;
        tupleCosts[index] += totalTuples;
        iterationCounts[index] += event.pipelineRuns?.length ?? 0;
        evaluationCounts[index] += 1;
        break;
      }
    }
  }

  onDone(): void {}
}

function getPipelineHash(steps: string[]) {
  const md5 = createHash("md5");
  for (const step of steps) {
    md5.write(step);
  }
  return md5.digest("base64");
}