V8 Project
v8::internal::TextNode Class Reference

#include <jsregexp.h>

+ Inheritance diagram for v8::internal::TextNode:
+ Collaboration diagram for v8::internal::TextNode:

Public Member Functions

 TextNode (ZoneList< TextElement > *elms, RegExpNode *on_success)
 
 TextNode (RegExpCharacterClass *that, RegExpNode *on_success)
 
virtual void Accept (NodeVisitor *visitor)
 
virtual void Emit (RegExpCompiler *compiler, Trace *trace)
 
virtual int EatsAtLeast (int still_to_find, int budget, bool not_at_start)
 
virtual void GetQuickCheckDetails (QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
 
ZoneList< TextElement > * elements ()
 
void MakeCaseIndependent (bool is_one_byte)
 
virtual int GreedyLoopTextLength ()
 
virtual RegExpNodeGetSuccessorOfOmnivorousTextNode (RegExpCompiler *compiler)
 
virtual void FillInBMInfo (int offset, int budget, BoyerMooreLookahead *bm, bool not_at_start)
 
void CalculateOffsets ()
 
virtual RegExpNodeFilterOneByte (int depth, bool ignore_case)
 
- Public Member Functions inherited from v8::internal::SeqRegExpNode
 SeqRegExpNode (RegExpNode *on_success)
 
RegExpNodeon_success ()
 
void set_on_success (RegExpNode *node)
 
- Public Member Functions inherited from v8::internal::RegExpNode
 RegExpNode (Zone *zone)
 
virtual ~RegExpNode ()
 
bool EmitQuickCheck (RegExpCompiler *compiler, Trace *bounds_check_trace, Trace *trace, bool preload_has_checked_bounds, Label *on_possible_success, QuickCheckDetails *details_return, bool fall_through_on_failure)
 
RegExpNodereplacement ()
 
RegExpNodeset_replacement (RegExpNode *replacement)
 
void SaveBMInfo (BoyerMooreLookahead *bm, bool not_at_start, int offset)
 
Label * label ()
 
NodeInfoinfo ()
 
BoyerMooreLookaheadbm_info (bool not_at_start)
 
Zonezone () const
 
- Public Member Functions inherited from v8::internal::ZoneObject
 INLINE (void *operator new(size_t size, Zone *zone))
 
void operator delete (void *, size_t)
 
void operator delete (void *pointer, Zone *zone)
 

Private Types

enum  TextEmitPassType {
  NON_LATIN1_MATCH , SIMPLE_CHARACTER_MATCH , NON_LETTER_CHARACTER_MATCH , CASE_CHARACTER_MATCH ,
  CHARACTER_CLASS_MATCH
}
 

Private Member Functions

void TextEmitPass (RegExpCompiler *compiler, TextEmitPassType pass, bool preloaded, Trace *trace, bool first_element_checked, int *checked_up_to)
 
int Length ()
 

Static Private Member Functions

static bool SkipPass (int pass, bool ignore_case)
 

Private Attributes

ZoneList< TextElement > * elms_
 

Static Private Attributes

static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH
 
static const int kLastPass = CHARACTER_CLASS_MATCH
 

Additional Inherited Members

- Static Public Attributes inherited from v8::internal::RegExpNode
static const int kNodeIsTooComplexForGreedyLoops = -1
 
static const int kRecursionBudget = 200
 
static const int kMaxCopiesCodeGenerated = 10
 
- Protected Types inherited from v8::internal::RegExpNode
enum  LimitResult { DONE , CONTINUE }
 
- Protected Member Functions inherited from v8::internal::SeqRegExpNode
RegExpNodeFilterSuccessor (int depth, bool ignore_case)
 
- Protected Member Functions inherited from v8::internal::RegExpNode
LimitResult LimitVersions (RegExpCompiler *compiler, Trace *trace)
 
void set_bm_info (bool not_at_start, BoyerMooreLookahead *bm)
 
- Protected Attributes inherited from v8::internal::RegExpNode
RegExpNodereplacement_
 

Detailed Description

Definition at line 830 of file jsregexp.h.

Member Enumeration Documentation

◆ TextEmitPassType

Enumerator
NON_LATIN1_MATCH 
SIMPLE_CHARACTER_MATCH 
NON_LETTER_CHARACTER_MATCH 
CASE_CHARACTER_MATCH 
CHARACTER_CLASS_MATCH 

Definition at line 862 of file jsregexp.h.

862  {
863  NON_LATIN1_MATCH, // Check for characters that can't match.
864  SIMPLE_CHARACTER_MATCH, // Case-dependent single character check.
865  NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs.
866  CASE_CHARACTER_MATCH, // Case-independent single character check.
867  CHARACTER_CLASS_MATCH // Character class.
868  };

Constructor & Destructor Documentation

◆ TextNode() [1/2]

v8::internal::TextNode::TextNode ( ZoneList< TextElement > *  elms,
RegExpNode on_success 
)
inline

Definition at line 832 of file jsregexp.h.

835  elms_(elms) { }
RegExpNode * on_success()
Definition: jsregexp.h:727
SeqRegExpNode(RegExpNode *on_success)
Definition: jsregexp.h:725
ZoneList< TextElement > * elms_
Definition: jsregexp.h:879

◆ TextNode() [2/2]

v8::internal::TextNode::TextNode ( RegExpCharacterClass *  that,
RegExpNode on_success 
)
inline

Definition at line 836 of file jsregexp.h.

839  elms_(new(zone()) ZoneList<TextElement>(1, zone())) {
840  elms_->Add(TextElement::CharClass(that), zone());
841  }
void Add(const T &element, AllocationPolicy allocator=AllocationPolicy())
Definition: list-inl.h:17
Zone * zone() const
Definition: jsregexp.h:668

References v8::internal::List< T, AllocationPolicy >::Add(), elms_, and v8::internal::RegExpNode::zone().

+ Here is the call graph for this function:

Member Function Documentation

◆ Accept()

virtual void v8::internal::TextNode::Accept ( NodeVisitor visitor)
virtual

◆ CalculateOffsets()

void v8::internal::TextNode::CalculateOffsets ( )

Definition at line 5744 of file jsregexp.cc.

5744  {
5745  int element_count = elements()->length();
5746  // Set up the offsets of the elements relative to the start. This is a fixed
5747  // quantity since a TextNode can only contain fixed-width things.
5748  int cp_offset = 0;
5749  for (int i = 0; i < element_count; i++) {
5750  TextElement& elm = elements()->at(i);
5751  elm.set_cp_offset(cp_offset);
5752  cp_offset += elm.length();
5753  }
5754 }
T & at(int i) const
Definition: list.h:69
ZoneList< TextElement > * elements()
Definition: jsregexp.h:849

References v8::internal::List< T, AllocationPolicy >::at(), and elements().

+ Here is the call graph for this function:

◆ EatsAtLeast()

int v8::internal::TextNode::EatsAtLeast ( int  still_to_find,
int  budget,
bool  not_at_start 
)
virtual

Implements v8::internal::RegExpNode.

Definition at line 2334 of file jsregexp.cc.

2336  {
2337  int answer = Length();
2338  if (answer >= still_to_find) return answer;
2339  if (budget <= 0) return answer;
2340  // We are not at start after this node so we set the last argument to 'true'.
2341  return answer + on_success()->EatsAtLeast(still_to_find - answer,
2342  budget - 1,
2343  true);
2344 }
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start)=0

◆ elements()

ZoneList<TextElement>* v8::internal::TextNode::elements ( )
inline

Definition at line 849 of file jsregexp.h.

849 { return elms_; }

References elms_.

Referenced by CalculateOffsets(), and FillInBMInfo().

+ Here is the caller graph for this function:

◆ Emit()

void v8::internal::TextNode::Emit ( RegExpCompiler compiler,
Trace trace 
)
virtual

Implements v8::internal::RegExpNode.

Definition at line 3315 of file jsregexp.cc.

3315  {
3316  LimitResult limit_result = LimitVersions(compiler, trace);
3317  if (limit_result == DONE) return;
3318  DCHECK(limit_result == CONTINUE);
3319 
3320  if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {
3321  compiler->SetRegExpTooBig();
3322  return;
3323  }
3324 
3325  if (compiler->one_byte()) {
3326  int dummy = 0;
3327  TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy);
3328  }
3329 
3330  bool first_elt_done = false;
3331  int bound_checked_to = trace->cp_offset() - 1;
3332  bound_checked_to += trace->bound_checked_up_to();
3333 
3334  // If a character is preloaded into the current character register then
3335  // check that now.
3336  if (trace->characters_preloaded() == 1) {
3337  for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
3338  if (!SkipPass(pass, compiler->ignore_case())) {
3339  TextEmitPass(compiler,
3340  static_cast<TextEmitPassType>(pass),
3341  true,
3342  trace,
3343  false,
3344  &bound_checked_to);
3345  }
3346  }
3347  first_elt_done = true;
3348  }
3349 
3350  for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
3351  if (!SkipPass(pass, compiler->ignore_case())) {
3352  TextEmitPass(compiler,
3353  static_cast<TextEmitPassType>(pass),
3354  false,
3355  trace,
3356  first_elt_done,
3357  &bound_checked_to);
3358  }
3359  }
3360 
3361  Trace successor_trace(*trace);
3362  successor_trace.set_at_start(false);
3363  successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);
3364  RecursionCheck rc(compiler);
3365  on_success()->Emit(compiler, &successor_trace);
3366 }
LimitResult LimitVersions(RegExpCompiler *compiler, Trace *trace)
Definition: jsregexp.cc:2229
virtual void Emit(RegExpCompiler *compiler, Trace *trace)=0
static bool SkipPass(int pass, bool ignore_case)
Definition: jsregexp.cc:3299
static const int kLastPass
Definition: jsregexp.h:871
static const int kFirstRealPass
Definition: jsregexp.h:870
void TextEmitPass(RegExpCompiler *compiler, TextEmitPassType pass, bool preloaded, Trace *trace, bool first_element_checked, int *checked_up_to)
Definition: jsregexp.cc:3225
#define DCHECK(condition)
Definition: logging.h:205
static void Trace(const char *msg,...)
Definition: scheduler.cc:21

References v8::internal::Trace::AdvanceCurrentPositionInTrace(), v8::internal::Trace::bound_checked_up_to(), v8::internal::Trace::characters_preloaded(), v8::internal::Trace::cp_offset(), DCHECK, v8::internal::DONE, v8::internal::RegExpCompiler::ignore_case(), v8::internal::RegExpMacroAssembler::kMaxCPOffset, v8::internal::RegExpCompiler::one_byte(), v8::internal::Trace::set_at_start(), and v8::internal::RegExpCompiler::SetRegExpTooBig().

+ Here is the call graph for this function:

◆ FillInBMInfo()

void v8::internal::TextNode::FillInBMInfo ( int  offset,
int  budget,
BoyerMooreLookahead bm,
bool  not_at_start 
)
virtual

Reimplemented from v8::internal::SeqRegExpNode.

Definition at line 5855 of file jsregexp.cc.

5858  {
5859  if (initial_offset >= bm->length()) return;
5860  int offset = initial_offset;
5861  int max_char = bm->max_char();
5862  for (int i = 0; i < elements()->length(); i++) {
5863  if (offset >= bm->length()) {
5864  if (initial_offset == 0) set_bm_info(not_at_start, bm);
5865  return;
5866  }
5867  TextElement text = elements()->at(i);
5868  if (text.text_type() == TextElement::ATOM) {
5869  RegExpAtom* atom = text.atom();
5870  for (int j = 0; j < atom->length(); j++, offset++) {
5871  if (offset >= bm->length()) {
5872  if (initial_offset == 0) set_bm_info(not_at_start, bm);
5873  return;
5874  }
5875  uc16 character = atom->data()[j];
5876  if (bm->compiler()->ignore_case()) {
5878  int length = GetCaseIndependentLetters(
5879  Isolate::Current(),
5880  character,
5881  bm->max_char() == String::kMaxOneByteCharCode,
5882  chars);
5883  for (int j = 0; j < length; j++) {
5884  bm->Set(offset, chars[j]);
5885  }
5886  } else {
5887  if (character <= max_char) bm->Set(offset, character);
5888  }
5889  }
5890  } else {
5891  DCHECK_EQ(TextElement::CHAR_CLASS, text.text_type());
5892  RegExpCharacterClass* char_class = text.char_class();
5893  ZoneList<CharacterRange>* ranges = char_class->ranges(zone());
5894  if (char_class->is_negated()) {
5895  bm->SetAll(offset);
5896  } else {
5897  for (int k = 0; k < ranges->length(); k++) {
5898  CharacterRange& range = ranges->at(k);
5899  if (range.from() > max_char) continue;
5900  int to = Min(max_char, static_cast<int>(range.to()));
5901  bm->SetInterval(offset, Interval(range.from(), to));
5902  }
5903  }
5904  offset++;
5905  }
5906  }
5907  if (offset >= bm->length()) {
5908  if (initial_offset == 0) set_bm_info(not_at_start, bm);
5909  return;
5910  }
5911  on_success()->FillInBMInfo(offset,
5912  budget - 1,
5913  bm,
5914  true); // Not at start after a text node.
5915  if (initial_offset == 0) set_bm_info(not_at_start, bm);
5916 }
void set_bm_info(bool not_at_start, BoyerMooreLookahead *bm)
Definition: jsregexp.h:676
virtual void FillInBMInfo(int offset, int budget, BoyerMooreLookahead *bm, bool not_at_start)
Definition: jsregexp.h:622
static const int32_t kMaxOneByteCharCode
Definition: objects.h:8811
enable harmony numeric enable harmony object literal extensions Optimize object Array DOM strings and string trace pretenuring decisions of HAllocate instructions Enables optimizations which favor memory size over execution speed maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining trace the tracking of allocation sites deoptimize every n garbage collections perform array bounds checks elimination analyze liveness of environment slots and zap dead values flushes the cache of optimized code for closures on every GC allow uint32 values on optimize frames if they are used only in safe operations track concurrent recompilation artificial compilation delay in ms do not emit check maps for constant values that have a leaf deoptimize the optimized code if the layout of the maps changes enable context specialization in TurboFan execution budget before interrupt is triggered max percentage of megamorphic generic ICs to allow optimization enable use of SAHF instruction if enable use of VFP3 instructions if available enable use of NEON instructions if enable use of SDIV and UDIV instructions if enable use of MLS instructions if enable loading bit constant by means of movw movt instruction enable unaligned accesses for enable use of d16 d31 registers on ARM this requires VFP3 force all emitted branches to be in long enable alignment of csp to bytes on platforms which prefer the register to always be expose gc extension under the specified name show built in functions in stack traces use random jit cookie to mask large constants minimum length for automatic enable preparsing CPU profiler sampling interval in microseconds trace out of bounds accesses to external arrays default size of stack region v8 is allowed to maximum length of function source code printed in a stack trace min size of a semi the new space consists of two semi spaces print one trace line following each garbage collection do not print trace line after scavenger collection print cumulative GC statistics in only print modified registers Trace simulator debug messages Implied by trace sim abort randomize hashes to avoid predictable hash Fixed seed to use to hash property Print the time it takes to deserialize the snapshot A filename with extra code to be included in the A file to write the raw snapshot bytes to(mksnapshot only)") DEFINE_STRING(raw_context_file
#define DCHECK_EQ(v1, v2)
Definition: logging.h:206
unsigned int uchar
Definition: unicode.h:17
static LifetimePosition Min(LifetimePosition a, LifetimePosition b)
static int GetCaseIndependentLetters(Isolate *isolate, uc16 character, bool one_byte_subject, unibrow::uchar *letters)
Definition: jsregexp.cc:1590
uint16_t uc16
Definition: globals.h:184
static const int kMaxWidth
Definition: unicode.h:245

References v8::internal::List< T, AllocationPolicy >::at(), v8::internal::BoyerMooreLookahead::compiler(), DCHECK_EQ, elements(), v8::internal::RegExpNode::FillInBMInfo(), v8::internal::CharacterRange::from(), v8::internal::GetCaseIndependentLetters(), v8::internal::RegExpCompiler::ignore_case(), v8::internal::String::kMaxOneByteCharCode, unibrow::Ecma262UnCanonicalize::kMaxWidth, v8::internal::BoyerMooreLookahead::length(), v8::internal::BoyerMooreLookahead::max_char(), v8::internal::Min(), v8::internal::SeqRegExpNode::on_success(), v8::internal::BoyerMooreLookahead::Set(), v8::internal::RegExpNode::set_bm_info(), v8::internal::BoyerMooreLookahead::SetAll(), v8::internal::BoyerMooreLookahead::SetInterval(), v8::internal::CharacterRange::to(), to(), and v8::internal::RegExpNode::zone().

+ Here is the call graph for this function:

◆ FilterOneByte()

RegExpNode * v8::internal::TextNode::FilterOneByte ( int  depth,
bool  ignore_case 
)
virtual

Reimplemented from v8::internal::SeqRegExpNode.

Definition at line 2789 of file jsregexp.cc.

2789  {
2790  if (info()->replacement_calculated) return replacement();
2791  if (depth < 0) return this;
2792  DCHECK(!info()->visited);
2793  VisitMarker marker(info());
2794  int element_count = elms_->length();
2795  for (int i = 0; i < element_count; i++) {
2796  TextElement elm = elms_->at(i);
2797  if (elm.text_type() == TextElement::ATOM) {
2798  Vector<const uc16> quarks = elm.atom()->data();
2799  for (int j = 0; j < quarks.length(); j++) {
2800  uint16_t c = quarks[j];
2801  if (c <= String::kMaxOneByteCharCode) continue;
2802  if (!ignore_case) return set_replacement(NULL);
2803  // Here, we need to check for characters whose upper and lower cases
2804  // are outside the Latin-1 range.
2806  // Character is outside Latin-1 completely
2807  if (converted == 0) return set_replacement(NULL);
2808  // Convert quark to Latin-1 in place.
2809  uint16_t* copy = const_cast<uint16_t*>(quarks.start());
2810  copy[j] = converted;
2811  }
2812  } else {
2813  DCHECK(elm.text_type() == TextElement::CHAR_CLASS);
2814  RegExpCharacterClass* cc = elm.char_class();
2815  ZoneList<CharacterRange>* ranges = cc->ranges(zone());
2816  if (!CharacterRange::IsCanonical(ranges)) {
2818  }
2819  // Now they are in order so we only need to look at the first.
2820  int range_count = ranges->length();
2821  if (cc->is_negated()) {
2822  if (range_count != 0 &&
2823  ranges->at(0).from() == 0 &&
2824  ranges->at(0).to() >= String::kMaxOneByteCharCode) {
2825  // This will be handled in a later filter.
2826  if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
2827  return set_replacement(NULL);
2828  }
2829  } else {
2830  if (range_count == 0 ||
2831  ranges->at(0).from() > String::kMaxOneByteCharCode) {
2832  // This will be handled in a later filter.
2833  if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
2834  return set_replacement(NULL);
2835  }
2836  }
2837  }
2838  }
2839  return FilterSuccessor(depth - 1, ignore_case);
2840 }
static uint16_t ConvertNonLatin1ToLatin1(uint16_t)
Definition: unicode-inl.h:60
static void Canonicalize(ZoneList< CharacterRange > *ranges)
Definition: jsregexp.cc:5508
static bool IsCanonical(ZoneList< CharacterRange > *ranges)
Definition: jsregexp.cc:5400
RegExpNode * replacement()
Definition: jsregexp.h:636
RegExpNode * set_replacement(RegExpNode *replacement)
Definition: jsregexp.h:640
RegExpNode * FilterSuccessor(int depth, bool ignore_case)
Definition: jsregexp.cc:2764
enable harmony numeric enable harmony object literal extensions Optimize object Array DOM strings and string trace pretenuring decisions of HAllocate instructions Enables optimizations which favor memory size over execution speed maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining trace the tracking of allocation sites deoptimize every n garbage collections perform array bounds checks elimination analyze liveness of environment slots and zap dead values flushes the cache of optimized code for closures on every GC allow uint32 values on optimize frames if they are used only in safe operations track concurrent recompilation artificial compilation delay in ms do not emit check maps for constant values that have a leaf deoptimize the optimized code if the layout of the maps changes enable context specialization in TurboFan execution budget before interrupt is triggered max percentage of megamorphic generic ICs to allow optimization enable use of SAHF instruction if enable use of VFP3 instructions if available enable use of NEON instructions if enable use of SDIV and UDIV instructions if enable use of MLS instructions if enable loading bit constant by means of movw movt instruction enable unaligned accesses for enable use of d16 d31 registers on ARM this requires VFP3 force all emitted branches to be in long enable alignment of csp to bytes on platforms which prefer the register to always be NULL
unsigned short uint16_t
Definition: unicode.cc:23
static bool RangesContainLatin1Equivalents(ZoneList< CharacterRange > *ranges)
Definition: jsregexp.cc:2780

References v8::internal::List< T, AllocationPolicy >::at(), v8::internal::CharacterRange::Canonicalize(), v8::internal::cc, unibrow::Latin1::ConvertNonLatin1ToLatin1(), DCHECK, v8::internal::CharacterRange::IsCanonical(), v8::internal::String::kMaxOneByteCharCode, v8::internal::Vector< T >::length(), NULL, v8::internal::RangesContainLatin1Equivalents(), and v8::internal::Vector< T >::start().

+ Here is the call graph for this function:

◆ GetQuickCheckDetails()

void v8::internal::TextNode::GetQuickCheckDetails ( QuickCheckDetails details,
RegExpCompiler compiler,
int  characters_filled_in,
bool  not_at_start 
)
virtual

Implements v8::internal::RegExpNode.

Definition at line 2527 of file jsregexp.cc.

2530  {
2531  Isolate* isolate = compiler->macro_assembler()->zone()->isolate();
2532  DCHECK(characters_filled_in < details->characters());
2533  int characters = details->characters();
2534  int char_mask;
2535  if (compiler->one_byte()) {
2536  char_mask = String::kMaxOneByteCharCode;
2537  } else {
2538  char_mask = String::kMaxUtf16CodeUnit;
2539  }
2540  for (int k = 0; k < elms_->length(); k++) {
2541  TextElement elm = elms_->at(k);
2542  if (elm.text_type() == TextElement::ATOM) {
2543  Vector<const uc16> quarks = elm.atom()->data();
2544  for (int i = 0; i < characters && i < quarks.length(); i++) {
2545  QuickCheckDetails::Position* pos =
2546  details->positions(characters_filled_in);
2547  uc16 c = quarks[i];
2548  if (c > char_mask) {
2549  // If we expect a non-Latin1 character from an one-byte string,
2550  // there is no way we can match. Not even case-independent
2551  // matching can turn an Latin1 character into non-Latin1 or
2552  // vice versa.
2553  // TODO(dcarney): issue 3550. Verify that this works as expected.
2554  // For example, \u0178 is uppercase of \u00ff (y-umlaut).
2555  details->set_cannot_match();
2556  pos->determines_perfectly = false;
2557  return;
2558  }
2559  if (compiler->ignore_case()) {
2561  int length = GetCaseIndependentLetters(isolate, c,
2562  compiler->one_byte(), chars);
2563  DCHECK(length != 0); // Can only happen if c > char_mask (see above).
2564  if (length == 1) {
2565  // This letter has no case equivalents, so it's nice and simple
2566  // and the mask-compare will determine definitely whether we have
2567  // a match at this character position.
2568  pos->mask = char_mask;
2569  pos->value = c;
2570  pos->determines_perfectly = true;
2571  } else {
2572  uint32_t common_bits = char_mask;
2573  uint32_t bits = chars[0];
2574  for (int j = 1; j < length; j++) {
2575  uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);
2576  common_bits ^= differing_bits;
2577  bits &= common_bits;
2578  }
2579  // If length is 2 and common bits has only one zero in it then
2580  // our mask and compare instruction will determine definitely
2581  // whether we have a match at this character position. Otherwise
2582  // it can only be an approximate check.
2583  uint32_t one_zero = (common_bits | ~char_mask);
2584  if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {
2585  pos->determines_perfectly = true;
2586  }
2587  pos->mask = common_bits;
2588  pos->value = bits;
2589  }
2590  } else {
2591  // Don't ignore case. Nice simple case where the mask-compare will
2592  // determine definitely whether we have a match at this character
2593  // position.
2594  pos->mask = char_mask;
2595  pos->value = c;
2596  pos->determines_perfectly = true;
2597  }
2598  characters_filled_in++;
2599  DCHECK(characters_filled_in <= details->characters());
2600  if (characters_filled_in == details->characters()) {
2601  return;
2602  }
2603  }
2604  } else {
2605  QuickCheckDetails::Position* pos =
2606  details->positions(characters_filled_in);
2607  RegExpCharacterClass* tree = elm.char_class();
2608  ZoneList<CharacterRange>* ranges = tree->ranges(zone());
2609  if (tree->is_negated()) {
2610  // A quick check uses multi-character mask and compare. There is no
2611  // useful way to incorporate a negative char class into this scheme
2612  // so we just conservatively create a mask and value that will always
2613  // succeed.
2614  pos->mask = 0;
2615  pos->value = 0;
2616  } else {
2617  int first_range = 0;
2618  while (ranges->at(first_range).from() > char_mask) {
2619  first_range++;
2620  if (first_range == ranges->length()) {
2621  details->set_cannot_match();
2622  pos->determines_perfectly = false;
2623  return;
2624  }
2625  }
2626  CharacterRange range = ranges->at(first_range);
2627  uc16 from = range.from();
2628  uc16 to = range.to();
2629  if (to > char_mask) {
2630  to = char_mask;
2631  }
2632  uint32_t differing_bits = (from ^ to);
2633  // A mask and compare is only perfect if the differing bits form a
2634  // number like 00011111 with one single block of trailing 1s.
2635  if ((differing_bits & (differing_bits + 1)) == 0 &&
2636  from + differing_bits == to) {
2637  pos->determines_perfectly = true;
2638  }
2639  uint32_t common_bits = ~SmearBitsRight(differing_bits);
2640  uint32_t bits = (from & common_bits);
2641  for (int i = first_range + 1; i < ranges->length(); i++) {
2642  CharacterRange range = ranges->at(i);
2643  uc16 from = range.from();
2644  uc16 to = range.to();
2645  if (from > char_mask) continue;
2646  if (to > char_mask) to = char_mask;
2647  // Here we are combining more ranges into the mask and compare
2648  // value. With each new range the mask becomes more sparse and
2649  // so the chances of a false positive rise. A character class
2650  // with multiple ranges is assumed never to be equivalent to a
2651  // mask and compare operation.
2652  pos->determines_perfectly = false;
2653  uint32_t new_common_bits = (from ^ to);
2654  new_common_bits = ~SmearBitsRight(new_common_bits);
2655  common_bits &= new_common_bits;
2656  bits &= new_common_bits;
2657  uint32_t differing_bits = (from & common_bits) ^ bits;
2658  common_bits ^= differing_bits;
2659  bits &= common_bits;
2660  }
2661  pos->mask = common_bits;
2662  pos->value = bits;
2663  }
2664  characters_filled_in++;
2665  DCHECK(characters_filled_in <= details->characters());
2666  if (characters_filled_in == details->characters()) {
2667  return;
2668  }
2669  }
2670  }
2671  DCHECK(characters_filled_in != details->characters());
2672  if (!details->cannot_match()) {
2673  on_success()-> GetQuickCheckDetails(details,
2674  compiler,
2675  characters_filled_in,
2676  true);
2677  }
2678 }
static const int kMaxUtf16CodeUnit
Definition: objects.h:8813
virtual void GetQuickCheckDetails(QuickCheckDetails *details, RegExpCompiler *compiler, int characters_filled_in, bool not_at_start)
Definition: jsregexp.cc:2527
static uint32_t SmearBitsRight(uint32_t v)
Definition: jsregexp.cc:2411

References v8::internal::List< T, AllocationPolicy >::at(), v8::internal::QuickCheckDetails::cannot_match(), v8::internal::QuickCheckDetails::characters(), DCHECK, v8::internal::QuickCheckDetails::Position::determines_perfectly, v8::internal::CharacterRange::from(), v8::internal::GetCaseIndependentLetters(), v8::internal::RegExpCompiler::ignore_case(), v8::internal::Zone::isolate(), v8::internal::String::kMaxOneByteCharCode, v8::internal::String::kMaxUtf16CodeUnit, unibrow::Ecma262UnCanonicalize::kMaxWidth, v8::internal::Vector< T >::length(), v8::internal::RegExpCompiler::macro_assembler(), v8::internal::QuickCheckDetails::Position::mask, v8::internal::RegExpCompiler::one_byte(), v8::internal::QuickCheckDetails::positions(), v8::internal::QuickCheckDetails::set_cannot_match(), v8::internal::SmearBitsRight(), v8::internal::CharacterRange::to(), to(), v8::internal::QuickCheckDetails::Position::value, and v8::internal::RegExpMacroAssembler::zone().

+ Here is the call graph for this function:

◆ GetSuccessorOfOmnivorousTextNode()

RegExpNode * v8::internal::TextNode::GetSuccessorOfOmnivorousTextNode ( RegExpCompiler compiler)
virtual

Reimplemented from v8::internal::RegExpNode.

Definition at line 3418 of file jsregexp.cc.

3419  {
3420  if (elms_->length() != 1) return NULL;
3421  TextElement elm = elms_->at(0);
3422  if (elm.text_type() != TextElement::CHAR_CLASS) return NULL;
3423  RegExpCharacterClass* node = elm.char_class();
3424  ZoneList<CharacterRange>* ranges = node->ranges(zone());
3425  if (!CharacterRange::IsCanonical(ranges)) {
3427  }
3428  if (node->is_negated()) {
3429  return ranges->length() == 0 ? on_success() : NULL;
3430  }
3431  if (ranges->length() != 1) return NULL;
3432  uint32_t max_char;
3433  if (compiler->one_byte()) {
3434  max_char = String::kMaxOneByteCharCode;
3435  } else {
3436  max_char = String::kMaxUtf16CodeUnit;
3437  }
3438  return ranges->at(0).IsEverything(max_char) ? on_success() : NULL;
3439 }

References v8::internal::List< T, AllocationPolicy >::at(), v8::internal::CharacterRange::Canonicalize(), v8::internal::CharacterRange::IsCanonical(), v8::internal::String::kMaxOneByteCharCode, v8::internal::String::kMaxUtf16CodeUnit, NULL, and v8::internal::RegExpCompiler::one_byte().

+ Here is the call graph for this function:

◆ GreedyLoopTextLength()

int v8::internal::TextNode::GreedyLoopTextLength ( )
virtual

Reimplemented from v8::internal::RegExpNode.

Definition at line 3412 of file jsregexp.cc.

3412  {
3413  TextElement elm = elms_->at(elms_->length() - 1);
3414  return elm.cp_offset() + elm.length();
3415 }

◆ Length()

int v8::internal::TextNode::Length ( )
private

Definition at line 3292 of file jsregexp.cc.

3292  {
3293  TextElement elm = elms_->last();
3294  DCHECK(elm.cp_offset() >= 0);
3295  return elm.cp_offset() + elm.length();
3296 }
T & last() const
Definition: list.h:70

References DCHECK.

◆ MakeCaseIndependent()

void v8::internal::TextNode::MakeCaseIndependent ( bool  is_one_byte)

Definition at line 3393 of file jsregexp.cc.

3393  {
3394  int element_count = elms_->length();
3395  for (int i = 0; i < element_count; i++) {
3396  TextElement elm = elms_->at(i);
3397  if (elm.text_type() == TextElement::CHAR_CLASS) {
3398  RegExpCharacterClass* cc = elm.char_class();
3399  // None of the standard character classes is different in the case
3400  // independent case and it slows us down if we don't know that.
3401  if (cc->is_standard(zone())) continue;
3402  ZoneList<CharacterRange>* ranges = cc->ranges(zone());
3403  int range_count = ranges->length();
3404  for (int j = 0; j < range_count; j++) {
3405  ranges->at(j).AddCaseEquivalents(ranges, is_one_byte, zone());
3406  }
3407  }
3408  }
3409 }

References v8::internal::List< T, AllocationPolicy >::at(), and v8::internal::cc.

+ Here is the call graph for this function:

◆ SkipPass()

bool v8::internal::TextNode::SkipPass ( int  pass,
bool  ignore_case 
)
staticprivate

Definition at line 3299 of file jsregexp.cc.

3299  {
3300  TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass);
3301  if (ignore_case) {
3302  return pass == SIMPLE_CHARACTER_MATCH;
3303  } else {
3304  return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH;
3305  }
3306 }

◆ TextEmitPass()

void v8::internal::TextNode::TextEmitPass ( RegExpCompiler compiler,
TextEmitPassType  pass,
bool  preloaded,
Trace trace,
bool  first_element_checked,
int checked_up_to 
)
private

Definition at line 3225 of file jsregexp.cc.

3230  {
3231  RegExpMacroAssembler* assembler = compiler->macro_assembler();
3232  Isolate* isolate = assembler->zone()->isolate();
3233  bool one_byte = compiler->one_byte();
3234  Label* backtrack = trace->backtrack();
3235  QuickCheckDetails* quick_check = trace->quick_check_performed();
3236  int element_count = elms_->length();
3237  for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
3238  TextElement elm = elms_->at(i);
3239  int cp_offset = trace->cp_offset() + elm.cp_offset();
3240  if (elm.text_type() == TextElement::ATOM) {
3241  Vector<const uc16> quarks = elm.atom()->data();
3242  for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
3243  if (first_element_checked && i == 0 && j == 0) continue;
3244  if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
3245  EmitCharacterFunction* emit_function = NULL;
3246  switch (pass) {
3247  case NON_LATIN1_MATCH:
3248  DCHECK(one_byte);
3249  if (quarks[j] > String::kMaxOneByteCharCode) {
3250  assembler->GoTo(backtrack);
3251  return;
3252  }
3253  break;
3255  emit_function = &EmitAtomNonLetter;
3256  break;
3258  emit_function = &EmitSimpleCharacter;
3259  break;
3260  case CASE_CHARACTER_MATCH:
3261  emit_function = &EmitAtomLetter;
3262  break;
3263  default:
3264  break;
3265  }
3266  if (emit_function != NULL) {
3267  bool bound_checked = emit_function(isolate,
3268  compiler,
3269  quarks[j],
3270  backtrack,
3271  cp_offset + j,
3272  *checked_up_to < cp_offset + j,
3273  preloaded);
3274  if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
3275  }
3276  }
3277  } else {
3278  DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type());
3279  if (pass == CHARACTER_CLASS_MATCH) {
3280  if (first_element_checked && i == 0) continue;
3281  if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
3282  RegExpCharacterClass* cc = elm.char_class();
3283  EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset,
3284  *checked_up_to < cp_offset, preloaded, zone());
3285  UpdateBoundsCheck(cp_offset, checked_up_to);
3286  }
3287  }
3288  }
3289 }
static void EmitCharClass(RegExpMacroAssembler *macro_assembler, RegExpCharacterClass *cc, bool one_byte, Label *on_failure, int cp_offset, bool check_offset, bool preloaded, Zone *zone)
Definition: jsregexp.cc:2115
static bool EmitAtomLetter(Isolate *isolate, RegExpCompiler *compiler, uc16 c, Label *on_failure, int cp_offset, bool check, bool preloaded)
Definition: jsregexp.cc:1717
static bool DeterminedAlready(QuickCheckDetails *quick_check, int offset)
Definition: jsregexp.cc:3182
static void UpdateBoundsCheck(int index, int *checked_up_to)
Definition: jsregexp.cc:3189
static bool EmitAtomNonLetter(Isolate *isolate, RegExpCompiler *compiler, uc16 c, Label *on_failure, int cp_offset, bool check, bool preloaded)
Definition: jsregexp.cc:1636
static bool EmitSimpleCharacter(Isolate *isolate, RegExpCompiler *compiler, uc16 c, Label *on_failure, int cp_offset, bool check, bool preloaded)
Definition: jsregexp.cc:1613
bool EmitCharacterFunction(Isolate *isolate, RegExpCompiler *compiler, uc16 c, Label *on_failure, int cp_offset, bool check, bool preloaded)
Definition: jsregexp.cc:1707

References v8::internal::Trace::backtrack(), v8::internal::cc, v8::internal::Trace::cp_offset(), DCHECK, DCHECK_EQ, v8::internal::DeterminedAlready(), v8::internal::EmitAtomLetter(), v8::internal::EmitAtomNonLetter(), v8::internal::EmitCharClass(), v8::internal::EmitSimpleCharacter(), v8::internal::RegExpMacroAssembler::GoTo(), v8::internal::Zone::isolate(), v8::internal::String::kMaxOneByteCharCode, v8::internal::Vector< T >::length(), v8::internal::RegExpCompiler::macro_assembler(), NULL, v8::internal::RegExpCompiler::one_byte(), v8::internal::Trace::quick_check_performed(), v8::internal::UpdateBoundsCheck(), and v8::internal::RegExpMacroAssembler::zone().

+ Here is the call graph for this function:

Member Data Documentation

◆ elms_

ZoneList<TextElement>* v8::internal::TextNode::elms_
private

Definition at line 879 of file jsregexp.h.

Referenced by elements(), and TextNode().

◆ kFirstRealPass

const int v8::internal::TextNode::kFirstRealPass = SIMPLE_CHARACTER_MATCH
staticprivate

Definition at line 870 of file jsregexp.h.

◆ kLastPass

const int v8::internal::TextNode::kLastPass = CHARACTER_CLASS_MATCH
staticprivate

Definition at line 871 of file jsregexp.h.


The documentation for this class was generated from the following files: