@@ -114,13 +114,54 @@ internal sealed class SymbolicRegexBuilder<TSet> where TSet : IComparable<TSet>,
114114 /// </summary>
115115 internal DfaMatchingState < TSet > [ ] ? _stateArray ;
116116 internal DfaMatchingState < TSet > [ ] ? _capturingStateArray ;
117+
118+ /// <summary>
119+ /// Maps state IDs to context-independent information for all states in <see cref="_stateArray"/>.
120+ /// </summary>
121+ internal byte [ ] ? _stateInfo ;
122+
123+ // Bit masks for decoding elements of _stateInfo
124+ private const int isInitialMask = 0b0001 ;
125+ private const int isDeadendMask = 0b0010 ;
126+ private const int isNullableMask = 0b0100 ;
127+ private const int canBeNullableMask = 0b1000 ;
128+
129+ /// <summary>Assign the context-independent information for the given state.</summary>
130+ internal void SetStateInfo ( int stateId , bool isInitial , bool isDeadend , bool isNullable , bool canBeNullable )
131+ {
132+ Debug . Assert ( _stateInfo is not null ) ;
133+ byte info = 0 ;
134+ if ( isInitial )
135+ info |= isInitialMask ;
136+ if ( isDeadend )
137+ info |= isDeadendMask ;
138+ if ( isNullable )
139+ info |= isNullableMask ;
140+ if ( canBeNullable )
141+ info |= canBeNullableMask ;
142+ _stateInfo [ stateId ] = info ;
143+ }
144+
145+ /// <summary>Get context-independent information for the given state.</summary>
146+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
147+ internal ( bool IsInitial , bool IsDeadend , bool IsNullable , bool CanBeNullable ) GetStateInfo ( int stateId )
148+ {
149+ Debug . Assert ( _stateInfo is not null ) ;
150+ byte info = _stateInfo [ stateId ] ;
151+ return (
152+ ( info & isInitialMask ) != 0 ,
153+ ( info & isDeadendMask ) != 0 ,
154+ ( info & isNullableMask ) != 0 ,
155+ ( info & canBeNullableMask ) != 0 ) ;
156+ }
157+
117158 /// <remarks>
118159 /// For these "delta" arrays, technically Volatile.Read should be used to read out an element,
119160 /// but in practice that's not needed on the runtimes in use (though that needs to be documented
120161 /// via https://github.com/dotnet/runtime/issues/63474), and use of Volatile.Read is
121162 /// contributing non-trivial overhead (https://github.com/dotnet/runtime/issues/65789).
122163 /// </remarks>
123- internal DfaMatchingState < TSet > ? [ ] ? _delta ;
164+ internal int [ ] ? _delta ;
124165 internal List < ( DfaMatchingState < TSet > , DerivativeEffect [ ] ) > ? [ ] ? _capturingDelta ;
125166 private const int InitialStateLimit = 1024 ;
126167
@@ -170,10 +211,11 @@ internal SymbolicRegexBuilder(ISolver<TSet> solver, CharSetSolver charSetSolver)
170211 {
171212 _stateArray = new DfaMatchingState < TSet > [ InitialStateLimit ] ;
172213 _capturingStateArray = new DfaMatchingState < TSet > [ InitialStateLimit ] ;
214+ _stateInfo = new byte [ InitialStateLimit ] ;
173215
174216 // the extra +1 slot with id minterms.Length is reserved for \Z (last occurrence of \n)
175217 _mintermsLog = BitOperations . Log2 ( ( uint ) _minterms . Length ) + 1 ;
176- _delta = new DfaMatchingState < TSet > [ InitialStateLimit << _mintermsLog ] ;
218+ _delta = new int [ InitialStateLimit << _mintermsLog ] ;
177219 _capturingDelta = new List < ( DfaMatchingState < TSet > , DerivativeEffect [ ] ) > [ InitialStateLimit << _mintermsLog ] ;
178220 }
179221
@@ -208,10 +250,10 @@ internal TSet GetMinterm(int mintermId)
208250 }
209251
210252 /// <summary>Returns the span from <see cref="_delta"/> that may contain transitions for the given state</summary>
211- internal Span < DfaMatchingState < TSet > ? > GetDeltasFor ( DfaMatchingState < TSet > state )
253+ internal Span < int > GetDeltasFor ( DfaMatchingState < TSet > state )
212254 {
213255 if ( _delta is null || _minterms is null )
214- return Span < DfaMatchingState < TSet > ? > . Empty ;
256+ return Span < int > . Empty ;
215257 int numMinterms = state . StartsWithLineAnchor ? _minterms . Length + 1 : _minterms . Length ;
216258 return _delta . AsSpan ( state . Id << _mintermsLog , numMinterms ) ;
217259 }
@@ -453,8 +495,9 @@ internal SymbolicRegexNode<TNewSet> Transform<TNewSet>(SymbolicRegexNode<TSet> n
453495 /// <param name="node">the pattern that this state will represent</param>
454496 /// <param name="prevCharKind">the kind of the character that led to this state</param>
455497 /// <param name="capturing">whether to use the separate space of states with capturing transitions or not</param>
498+ /// <param name="isInitialState">whether to mark the state as an initial state or not</param>
456499 /// <returns></returns>
457- public DfaMatchingState < TSet > CreateState ( SymbolicRegexNode < TSet > node , uint prevCharKind , bool capturing = false )
500+ public DfaMatchingState < TSet > CreateState ( SymbolicRegexNode < TSet > node , uint prevCharKind , bool capturing = false , bool isInitialState = false )
458501 {
459502 //first prune the anchors in the node
460503 TSet wlbSet = _wordLetterForBoundariesSet ;
@@ -469,21 +512,21 @@ public DfaMatchingState<TSet> CreateState(SymbolicRegexNode<TSet> node, uint pre
469512 var s = new DfaMatchingState < TSet > ( pruned_node , prevCharKind ) ;
470513 if ( ! ( capturing ? _capturingStateCache : _stateCache ) . TryGetValue ( s , out DfaMatchingState < TSet > ? state ) )
471514 {
472- state = MakeNewState ( s , capturing ) ;
515+ state = MakeNewState ( s , capturing , isInitialState ) ;
473516 }
474517
475518 return state ;
476519 }
477520
478- private DfaMatchingState < TSet > MakeNewState ( DfaMatchingState < TSet > state , bool capturing )
521+ private DfaMatchingState < TSet > MakeNewState ( DfaMatchingState < TSet > state , bool capturing , bool isInitialState )
479522 {
480523 lock ( this )
481524 {
482525 HashSet < DfaMatchingState < TSet > > cache = capturing ? _capturingStateCache : _stateCache ;
526+ cache . Add ( state ) ; // Add to cache first to make 1 the first state ID
483527 state . Id = cache . Count ;
484- cache . Add ( state ) ;
485528
486- Debug . Assert ( _stateArray is not null && _capturingStateArray is not null ) ;
529+ Debug . Assert ( _stateArray is not null && _capturingStateArray is not null && _stateInfo is not null ) ;
487530
488531 const int GrowthSize = 1024 ;
489532 if ( capturing )
@@ -503,8 +546,10 @@ private DfaMatchingState<TSet> MakeNewState(DfaMatchingState<TSet> state, bool c
503546 int newsize = _stateArray . Length + GrowthSize ;
504547 Array . Resize ( ref _stateArray , newsize ) ;
505548 Array . Resize ( ref _delta , newsize << _mintermsLog ) ;
549+ Array . Resize ( ref _stateInfo , newsize ) ;
506550 }
507551 _stateArray [ state . Id ] = state ;
552+ SetStateInfo ( state . Id , isInitialState , state . IsDeadend , state . Node . IsNullable , state . Node . CanBeNullable ) ;
508553 }
509554 return state ;
510555 }
@@ -549,13 +594,20 @@ private int MakeNewNfaState(int coreStateId)
549594 }
550595 }
551596
552- /// <summary>Gets the core state corresponding to the NFA state</summary>
553- public DfaMatchingState < TSet > GetCoreState ( int nfaStateId )
597+ /// <summary>Gets the core state Id corresponding to the NFA state</summary>
598+ public int GetCoreStateId ( int nfaStateId )
554599 {
555600 Debug . Assert ( _stateArray is not null ) ;
556601 Debug . Assert ( nfaStateId < _nfaStateArray . Length ) ;
557602 Debug . Assert ( _nfaStateArray [ nfaStateId ] < _stateArray . Length ) ;
558- return _stateArray [ _nfaStateArray [ nfaStateId ] ] ;
603+ return _nfaStateArray [ nfaStateId ] ;
604+ }
605+
606+ /// <summary>Gets the core state corresponding to the NFA state</summary>
607+ public DfaMatchingState < TSet > GetCoreState ( int nfaStateId )
608+ {
609+ Debug . Assert ( _stateArray is not null ) ;
610+ return _stateArray [ GetCoreStateId ( nfaStateId ) ] ;
559611 }
560612
561613 /// <summary>Critical region for defining a new core transition</summary>
@@ -570,13 +622,13 @@ public DfaMatchingState<TSet> CreateNewTransition(DfaMatchingState<TSet> sourceS
570622 public bool TryCreateNewTransition (
571623 DfaMatchingState < TSet > sourceState , int mintermId , int offset , bool checkThreshold , [ NotNullWhen ( true ) ] out DfaMatchingState < TSet > ? nextState )
572624 {
573- Debug . Assert ( _delta is not null ) ;
625+ Debug . Assert ( _delta is not null && _stateArray is not null ) ;
574626 lock ( this )
575627 {
576628 Debug . Assert ( offset < _delta . Length ) ;
577629
578630 // check if meanwhile delta[offset] has become defined possibly by another thread
579- DfaMatchingState < TSet > ? targetState = _delta [ offset ] ;
631+ DfaMatchingState < TSet > ? targetState = _stateArray [ _delta [ offset ] ] ;
580632 if ( targetState is null )
581633 {
582634 if ( checkThreshold && _stateCache . Count >= SymbolicRegexThresholds . NfaThreshold )
@@ -586,7 +638,7 @@ public bool TryCreateNewTransition(
586638 }
587639
588640 targetState = sourceState . Next ( GetMinterm ( mintermId ) ) ;
589- Volatile . Write ( ref _delta [ offset ] , targetState ) ;
641+ Volatile . Write ( ref _delta [ offset ] , targetState . Id ) ;
590642 }
591643
592644 nextState = targetState ;
@@ -597,7 +649,7 @@ public bool TryCreateNewTransition(
597649 /// <summary>Gets or creates a new NFA transition.</summary>
598650 public int [ ] CreateNewNfaTransition ( int nfaStateId , int mintermId , int nfaOffset )
599651 {
600- Debug . Assert ( _delta is not null ) ;
652+ Debug . Assert ( _delta is not null && _stateArray is not null ) ;
601653 lock ( this )
602654 {
603655 Debug . Assert ( nfaOffset < _nfaDelta . Length ) ;
@@ -609,7 +661,9 @@ public int[] CreateNewNfaTransition(int nfaStateId, int mintermId, int nfaOffset
609661 // Create the underlying transition from the core state corresponding to the nfa state
610662 DfaMatchingState < TSet > coreState = GetCoreState ( nfaStateId ) ;
611663 int coreOffset = ( coreState . Id << _mintermsLog ) | mintermId ;
612- DfaMatchingState < TSet > ? coreTarget = _delta [ coreOffset ] ?? CreateNewTransition ( coreState , mintermId , coreOffset ) ;
664+ int coreTargetId = _delta [ coreOffset ] ;
665+ DfaMatchingState < TSet > ? coreTarget = coreTargetId > 0 ?
666+ _stateArray [ coreTargetId ] : CreateNewTransition ( coreState , mintermId , coreOffset ) ;
613667
614668 SymbolicRegexNode < TSet > node = coreTarget . Node . Kind == SymbolicRegexNodeKind . DisableBacktrackingSimulation ?
615669 coreTarget . Node . _left ! : coreTarget . Node ;
0 commit comments