Coverage report for lib/src/line_splitting/solve_state.dart

Line coverage: 153 / 168 (91.1%)

All files > lib/src/line_splitting/solve_state.dart

1
// Copyright (c) 2015, the Dart project authors.  Please see the AUTHORS file
2
// for details. All rights reserved. Use of this source code is governed by a
3
// BSD-style license that can be found in the LICENSE file.
4
5
library dart_style.src.line_splitting.solve_state;
6
7
import '../debug.dart' as debug;
8
import '../nesting_level.dart';
9
import '../rule/rule.dart';
10
import '../whitespace.dart';
11
import 'line_splitter.dart';
12
import 'rule_set.dart';
13
14
/// A possibly incomplete solution in the line splitting search space.
15
///
16
/// A single [SolveState] binds some subset of the rules to values while
17
/// leaving the rest unbound. If every rule is bound, the solve state describes
18
/// a complete solution to the line splitting problem. Even if rules are
19
/// unbound, a state can also usually be used as a solution by treating all
20
/// unbound rules as unsplit. (The usually is because a state that constrains
21
/// an unbound rule to split can't be used with that rule unsplit.)
22
///
23
/// From a given solve state, we can explore the search tree to more refined
24
/// solve states by producing new ones that add more bound rules to the current
25
/// state.
26
class SolveState {
27
  final LineSplitter _splitter;
28
  final RuleSet _ruleValues;
29
30
  /// The set of [Rule]s that are bound by [_ruleValues].
31
  ///
32
  /// Cached by [_ensureConstraints] for use by [_ensureUnboundConstraints].
33
  Set<Rule> _boundRules;
34
35
  /// The set of [Rule]s that are not bound by [_ruleValues].
36
  ///
37
  /// Cached by [_ensureConstraints] for use by [_ensureUnboundConstraints].
38
  Set<Rule> _unboundRules;
39
40
  /// The unbound rules in this state that can be bound to produce new more
41
  /// refined states.
42
  ///
43
  /// Keeping this set small is the key to make the entire line splitter
44
  /// perform well. If we consider too many rules at each state, our
45
  /// exploration of the solution space is too branchy and we waste time on
46
  /// dead end solutions.
47
  ///
48
  /// Here is the key insight. The line splitter treats any unbound rule as
49
  /// being unsplit. This means refining a solution always means taking a rule
50
  /// that is unsplit and making it split. That monotonically increases the
51
  /// cost, but may help fit the solution inside the page.
52
  ///
53
  /// We want to keep the cost low, so the only reason to consider making a
54
  /// rule split is if it reduces an overflowing line. It's also the case that
55
  /// splitting an earlier rule will often reshuffle the rest of the line.
56
  ///
57
  /// Taking that into account, the only rules we consider binding to extend a
58
  /// solve state are *unbound rules inside the first line that is overflowing*.
59
  /// Even if a line has dozens of rules, this generally keeps the branching
60
  /// down to a few. It also means rules inside lines that already fit are
61
  /// never touched.
62
  ///
63
  /// There is one other set of rules that go in here. Sometimes a bound rule
64
  /// in the solve state constrains some other unbound rule to split. In that
65
  /// case, we also consider that active so we know to not leave it at zero.
66
  final _liveRules = new Set<Rule>();
67
68
  /// The set of splits chosen for this state.
696
  SplitSet get splits => _splits;
70
  SplitSet _splits;
71
72
  /// The number of characters that do not fit inside the page with this set of
73
  /// splits.
746
  int get overflowChars => _overflowChars;
75
  int _overflowChars;
76
77
  /// Whether we can treat this state as a complete solution by leaving its
78
  /// unbound rules unsplit.
79
  ///
80
  /// This is generally true but will be false if the state contains any
81
  /// unbound rules that are constrained to not be zero by other bound rules.
82
  /// This avoids picking a solution that leaves those rules at zero when they
83
  /// aren't allowed to be.
84
  bool _isComplete = true;
85
86
  /// The constraints the bound rules in this state have on the remaining
87
  /// unbound rules.
88
  Map<Rule, int> _constraints;
89
90
  /// The unbound rule values that are disallowed because they would place
91
  /// invalid constraints on the currently bound values.
92
  ///
93
  /// For example, say rule A is bound to 1 and B is unbound. If B has value
94
  /// 1, it constrains A to be 1. Likewise, if B is 2, it constrains A to be
95
  /// 2 as well. Since we already know A is 1, that means we know B cannot be
96
  /// bound to value 2. This means B is more limited in this state than it
97
  /// might be in another state that binds A to a different value.
98
  ///
99
  /// It's important to track this, because we can't allow to states to overlap
100
  /// if one permits more values for some unbound rule than the other does.
101
  Map<Rule, Set<int>> _unboundConstraints;
102
103
  /// The bound rules that appear inside lines also containing unbound rules.
104
  ///
105
  /// By appearing in the same line, it means these bound rules may affect the
106
  /// results of binding those unbound rules. This is used to tell if two
107
  /// states may diverge by binding unbound rules or not.
108
  Set<Rule> _boundRulesInUnboundLines;
109
1103
  SolveState(this._splitter, this._ruleValues) {
1113
    _calculateSplits();
1123
    _calculateCost();
113
  }
114
115
  /// Gets the value to use for [rule], either the bound value or
116
  /// [Rule.unsplit] if it isn't bound.
1179
  int getValue(Rule rule) => _ruleValues.getValue(rule);
118
119
  /// Returns `true` if this state is a better solution to use as the final
120
  /// result than [other].
1213
  bool isBetterThan(SolveState other) {
122
    // If this state contains an unbound rule that we know can't be left
123
    // unsplit, we can't pick this as a solution.
1243
    if (!_isComplete) return false;
125
126
    // Anything is better than nothing.
127
    if (other == null) return true;
128
129
    // Prefer the solution that fits the most in the page.
1306
    if (overflowChars != other.overflowChars) {
1316
      return overflowChars < other.overflowChars;
132
    }
133
134
    // Otherwise, prefer the best cost.
1355
    return splits.cost < other.splits.cost;
136
  }
137
138
  /// Determines if this state "overlaps" [other].
139
  ///
140
  /// Two states overlap if they currently have the same score and we can tell
141
  /// for certain that they won't diverge as their unbound rules are bound. If
142
  /// that's the case, then whichever state is better now (based on their
143
  /// currently bound rule values) is the one that will always win, regardless
144
  /// of how they get expanded.
145
  ///
146
  /// In other words, their entire expanded solution trees also overlap. In
147
  /// that case, there's no point in expanding both, so we can just pick the
148
  /// winner now and discard the other.
149
  ///
150
  /// For this to be true, we need to prove that binding an unbound rule won't
151
  /// affect one state differently from the other. We have to show that they
152
  /// are parallel.
153
  ///
154
  /// Two things could cause this *not* to be the case.
155
  ///
156
  /// 1. If one state's bound rules place different constraints on the unbound
157
  ///    rules than the other.
158
  ///
159
  /// 2. If one state's different bound rules are in the same line as an
160
  ///    unbound rule. That affects the indentation and length of the line,
161
  ///    which affects the context where the unbound rule is being chosen.
162
  ///
163
  /// If neither of these is the case, the states overlap. Returns `<0` if this
164
  /// state is better, or `>0` if [other] wins. If the states do not overlap,
165
  /// returns `0`.
1662
  int compareOverlap(SolveState other) {
1672
    if (!_isOverlapping(other)) return 0;
168
169
    // They do overlap, so see which one wins.
1706
    for (var rule in _splitter.rules) {
1714
      var value = _ruleValues.getValue(rule);
1724
      var otherValue = other._ruleValues.getValue(rule);
173
1744
      if (value != otherValue) return value.compareTo(otherValue);
175
    }
176
177
    // The way SolveStates are expanded should guarantee that we never generate
178
    // the exact same state twice. Getting here implies that that failed.
179
    throw "unreachable";
180
  }
181
182
  /// Enqueues more solve states to consider based on this one.
183
  ///
184
  /// For each unbound rule in this state that occurred in the first long line,
185
  /// enqueue solve states that bind that rule to each value it can have and
186
  /// bind all previous rules to zero. (In other words, try all subsolutions
187
  /// where that rule becomes the first new rule to split at.)
1882
  void expand() {
1894
    var unsplitRules = _ruleValues.clone();
190
191
    // Walk down the rules looking for unbound ones to try.
192
    var triedRules = 0;
1936
    for (var rule in _splitter.rules) {
1944
      if (_liveRules.contains(rule)) {
195
        // We found one worth trying, so try all of its values.
1966
        for (var value = 1; value < rule.numValues; value++) {
1972
          var boundRules = unsplitRules.clone();
198
199
          List<Rule> mustSplitRules;
2007
          var valid = boundRules.tryBind(_splitter.rules, rule, value, (rule) {
2011
            if (mustSplitRules == null) mustSplitRules = [];
2021
            mustSplitRules.add(rule);
203
          });
204
205
          // Make sure we don't violate the constraints of the bound rules.
206
          if (!valid) continue;
207
2084
          var state = new SolveState(_splitter, boundRules);
209
210
          // If some unbound rules are constrained to split, remember that.
211
          if (mustSplitRules != null) {
2121
            state._isComplete = false;
2132
            state._liveRules.addAll(mustSplitRules);
214
          }
215
2164
          _splitter.enqueue(state);
217
        }
218
219
        // Stop once we've tried all of the ones we can.
2208
        if (++triedRules == _liveRules.length) break;
221
      }
222
223
      // Fill in previous unbound rules with zero.
2244
      if (!_ruleValues.contains(rule)) {
225
        // Pass a dummy callback because zero will never fail. (If it would
226
        // have, that rule would already be bound to some other value.)
2276
        if (!unsplitRules.tryBind(_splitter.rules, rule, 0, (_) {})) {
228
          break;
229
        }
230
      }
231
    }
232
  }
233
234
  /// Returns `true` if [other] overlaps this state.
2352
  bool _isOverlapping(SolveState other) {
236
    // Lines that contain both bound and unbound rules must have the same
237
    // bound values.
2382
    _ensureBoundRulesInUnboundLines();
2392
    other._ensureBoundRulesInUnboundLines();
2406
    if (_boundRulesInUnboundLines.length !=
2414
        other._boundRulesInUnboundLines.length) {
242
      return false;
243
    }
244
2454
    for (var rule in _boundRulesInUnboundLines) {
2464
      if (!other._boundRulesInUnboundLines.contains(rule)) return false;
24710
      if (_ruleValues.getValue(rule) != other._ruleValues.getValue(rule)) {
248
        return false;
249
      }
250
    }
251
2522
    _ensureConstraints();
2532
    other._ensureConstraints();
25410
    if (_constraints.length != other._constraints.length) return false;
255
2565
    for (var rule in _constraints.keys) {
2575
      if (_constraints[rule] != other._constraints[rule]) return false;
258
    }
259
2602
    _ensureUnboundConstraints();
2612
    other._ensureUnboundConstraints();
26210
    if (_unboundConstraints.length != other._unboundConstraints.length) {
263
      return false;
264
    }
265
2666
    for (var rule in _unboundConstraints.keys) {
2674
      var disallowed = _unboundConstraints[rule];
2684
      var otherDisallowed = other._unboundConstraints[rule];
269
2706
      if (disallowed.length != otherDisallowed.length) return false;
2714
      for (var value in disallowed) {
2722
        if (!otherDisallowed.contains(value)) return false;
273
      }
274
    }
275
276
    return true;
277
  }
278
279
  /// Calculates the [SplitSet] for this solve state, assuming any unbound
280
  /// rules are set to zero.
2813
  void _calculateSplits() {
282
    // Figure out which expression nesting levels got split and need to be
283
    // assigned columns.
2843
    var usedNestingLevels = new Set<NestingLevel>();
28518
    for (var i = 0; i < _splitter.chunks.length - 1; i++) {
2869
      var chunk = _splitter.chunks[i];
28712
      if (chunk.rule.isSplit(getValue(chunk.rule), chunk)) {
2884
        usedNestingLevels.add(chunk.nesting);
2894
        chunk.nesting.clearTotalUsedIndent();
290
      }
291
    }
292
2935
    for (var nesting in usedNestingLevels) {
2942
      nesting.refreshTotalUsedIndent(usedNestingLevels);
295
    }
296
29715
    _splits = new SplitSet(_splitter.chunks.length);
29818
    for (var i = 0; i < _splitter.chunks.length - 1; i++) {
2999
      var chunk = _splitter.chunks[i];
30012
      if (chunk.rule.isSplit(getValue(chunk.rule), chunk)) {
301
        var indent = 0;
3022
        if (!chunk.flushLeftAfter) {
303
          // Add in the chunk's indent.
3048
          indent = _splitter.blockIndentation + chunk.indent;
305
306
          // And any expression nesting.
3076
          indent += chunk.nesting.totalUsedIndent;
308
3094
          if (chunk.indentBlock(getValue)) indent += Indent.expression;
310
        }
311
3124
        _splits.add(i, indent);
313
      }
314
    }
315
  }
316
317
  /// Evaluates the cost (i.e. the relative "badness") of splitting the line
318
  /// into [lines] physical lines based on the current set of rules.
3193
  void _calculateCost() {
3203
    assert(_splits != null);
321
322
    // Calculate the length of each line and apply the cost of any spans that
323
    // get split.
324
    var cost = 0;
3253
    _overflowChars = 0;
326
3276
    var length = _splitter.firstLineIndent;
328
329
    // The unbound rules in use by the current line. This will be null after
330
    // the first long line has completed.
331
    var foundOverflowRules = false;
332
    var start = 0;
333
3343
    endLine(int end) {
335
      // Track lines that went over the length. It is only rules contained in
336
      // long lines that we may want to split.
33712
      if (length > _splitter.writer.pageWidth) {
33812
        _overflowChars += length - _splitter.writer.pageWidth;
339
340
        // Only try rules that are in the first long line, since we know at
341
        // least one of them *will* be split.
342
        if (!foundOverflowRules) {
3434
          for (var i = start; i < end; i++) {
34410
            if (_addLiveRules(_splitter.chunks[i].rule)) {
345
              foundOverflowRules = true;
346
            }
347
          }
348
        }
349
      }
350
351
      start = end;
352
    }
353
354
    // The set of spans that contain chunks that ended up splitting. We store
355
    // these in a set so a span's cost doesn't get double-counted if more than
356
    // one split occurs in it.
3573
    var splitSpans = new Set();
358
359
    // The nesting level of the chunk that ended the previous line.
360
    var previousNesting;
361
36215
    for (var i = 0; i < _splitter.chunks.length; i++) {
3639
      var chunk = _splitter.chunks[i];
364
3659
      length += chunk.text.length;
366
367
      // Ignore the split after the last chunk.
36815
      if (i == _splitter.chunks.length - 1) break;
369
3706
      if (_splits.shouldSplitAt(i)) {
3712
        endLine(i);
372
3734
        splitSpans.addAll(chunk.spans);
374
375
        // Include the cost of the nested block.
3762
        if (chunk.isBlock) {
3772
          cost +=
37812
              _splitter.writer.formatBlock(chunk, _splits.getColumn(i)).cost;
379
        }
380
381
        // Do not allow sequential lines to have the same indentation but for
382
        // different reasons. In other words, don't allow different expressions
383
        // to claim the same nesting level on subsequent lines.
384
        //
385
        // A contrived example would be:
386
        //
387
        //     function(inner(
388
        //         argument), second(
389
        //         another);
390
        //
391
        // For the most part, we prevent this by the constraints on splits.
392
        // For example, the above can't happen because the split before
393
        // "argument", forces the split before "second".
394
        //
395
        // But there are a couple of squirrely cases where it's hard to prevent
396
        // by construction. Instead, this outlaws it by penalizing it very
397
        // heavily if it happens to get this far.
398
        if (previousNesting != null &&
3996
            chunk.nesting.totalUsedIndent != 0 &&
4008
            chunk.nesting.totalUsedIndent == previousNesting.totalUsedIndent &&
4012
            !identical(chunk.nesting, previousNesting)) {
4023
          _overflowChars += 10000;
403
        }
404
4052
        previousNesting = chunk.nesting;
406
407
        // Start the new line.
4084
        length = _splits.getColumn(i);
409
      } else {
4106
        if (chunk.spaceWhenUnsplit) length++;
411
412
        // Include the nested block inline, if any.
4136
        length += chunk.unsplitBlockLength;
414
      }
415
    }
416
417
    // Add the costs for the rules that have any splits.
41814
    _ruleValues.forEach(_splitter.rules, (rule, value) {
4196
      if (value != Rule.unsplit) cost += rule.cost;
420
    });
421
422
    // Add the costs for the spans containing splits.
4237
    for (var span in splitSpans) cost += span.cost;
424
425
    // Finish the last line.
42612
    endLine(_splitter.chunks.length);
427
4286
    _splits.setCost(cost);
429
  }
430
431
  /// Adds [rule] and all of the rules it constrains to the set of [_liveRules].
432
  ///
433
  /// Only does this if [rule] is a valid soft rule. Returns `true` if any new
434
  /// live rules were added.
4352
  bool _addLiveRules(Rule rule) {
436
    if (rule == null) return false;
437
438
    var added = false;
4392
    for (var constrained in rule.allConstrainedRules) {
4404
      if (_ruleValues.contains(constrained)) continue;
441
4424
      _liveRules.add(constrained);
443
      added = true;
444
    }
445
446
    return added;
447
  }
448
449
  /// Lazily initializes the [_boundInUnboundLines], which is needed to compare
450
  /// two states for overlap.
451
  ///
452
  /// We do this lazily because the calculation is a bit slow, and is only
453
  /// needed when we have two states with the same score.
4542
  void _ensureBoundRulesInUnboundLines() {
4552
    if (_boundRulesInUnboundLines != null) return;
456
4574
    _boundRulesInUnboundLines = new Set<Rule>();
458
4592
    var boundInLine = new Set<Rule>();
460
    var hasUnbound = false;
461
46212
    for (var i = 0; i < _splitter.chunks.length - 1; i++) {
4634
      if (splits.shouldSplitAt(i)) {
4644
        if (hasUnbound) _boundRulesInUnboundLines.addAll(boundInLine);
465
4662
        boundInLine.clear();
467
        hasUnbound = false;
468
      }
469
4708
      var rule = _splitter.chunks[i].rule;
471
      if (rule != null) {
4724
        if (_ruleValues.contains(rule)) {
4732
          boundInLine.add(rule);
474
        } else {
475
          hasUnbound = true;
476
        }
477
      }
478
    }
479
4804
    if (hasUnbound) _boundRulesInUnboundLines.addAll(boundInLine);
481
  }
482
483
  /// Lazily initializes the [_constraints], which is needed to compare two
484
  /// states for overlap.
485
  ///
486
  /// We do this lazily because the calculation is a bit slow, and is only
487
  /// needed when we have two states with the same score.
4882
  void _ensureConstraints() {
4892
    if (_constraints != null) return;
490
4914
    _unboundRules = new Set();
4924
    _boundRules = new Set();
493
4946
    for (var rule in _splitter.rules) {
4954
      if (_ruleValues.contains(rule)) {
4964
        _boundRules.add(rule);
497
      } else {
4984
        _unboundRules.add(rule);
499
      }
500
    }
501
5024
    _constraints = {};
503
5044
    for (var bound in _boundRules) {
5052
      for (var unbound in bound.constrainedRules) {
5064
        if (!_unboundRules.contains(unbound)) continue;
507
5082
        var value = _ruleValues.getValue(bound);
5091
        var constraint = bound.constrain(value, unbound);
510
        if (constraint != null) {
5112
          _constraints[unbound] = constraint;
512
        }
513
      }
514
    }
515
  }
516
517
  /// Lazily initializes the [_unboundConstraints], which is needed to compare
518
  /// two states for overlap.
519
  ///
520
  /// We do this lazily because the calculation is a bit slow, and is only
521
  /// needed when we have two states with the same score.
5222
  void _ensureUnboundConstraints() {
5232
    if (_unboundConstraints != null) return;
524
525
    // _ensureConstraints should be called first which initializes these.
5262
    assert(_boundRules != null);
5272
    assert(_unboundRules != null);
528
5294
    _unboundConstraints = {};
530
5314
    for (var unbound in _unboundRules) {
532
      Set<int> disallowedValues;
533
5342
      for (var bound in unbound.constrainedRules) {
5354
        if (!_boundRules.contains(bound)) continue;
536
5374
        var boundValue = _ruleValues.getValue(bound);
538
5396
        for (var value = 0; value < unbound.numValues; value++) {
5402
          var constraint = unbound.constrain(value, bound);
541
542
          // If the unbound rule doesn't place any constraint on this bound
543
          // rule, we're fine.
544
          if (constraint == null) continue;
545
546
          // If the bound rule's value already meets the constraint it applies,
547
          // we don't need to track it. This way, two states that have the
548
          // same bound value, one of which has a satisfied constraint, are
549
          // still allowed to overlap.
5502
          if (constraint == boundValue) continue;
5512
          if (constraint == Rule.mustSplit && boundValue != Rule.unsplit) {
552
            continue;
553
          }
554
555
          if (disallowedValues == null) {
5562
            disallowedValues = new Set<int>();
5574
            _unboundConstraints[unbound] = disallowedValues;
558
          }
559
5602
          disallowedValues.add(value);
561
        }
562
      }
563
    }
564
  }
565
5660
  String toString() {
5670
    var buffer = new StringBuffer();
568
5690
    buffer.writeAll(_splitter.rules.map((rule) {
5700
      var valueLength = "${rule.fullySplitValue}".length;
571
572
      var value = "?";
5730
      if (_ruleValues.contains(rule)) {
5740
        value = "${_ruleValues.getValue(rule)}";
575
      }
576
5770
      value = value.padLeft(valueLength);
5780
      if (_liveRules.contains(rule)) {
5790
        value = debug.bold(value);
580
      } else {
5810
        value = debug.gray(value);
582
      }
583
584
      return value;
585
    }), " ");
586
5870
    buffer.write("   \$${splits.cost}");
588
5890
    if (overflowChars > 0) buffer.write(" (${overflowChars} over)");
5900
    if (!_isComplete) buffer.write(" (incomplete)");
5910
    if (splits == null) buffer.write(" invalid");
592
5930
    return buffer.toString();
594
  }
595
}