V8 Project
v8::internal::RegExpEngine Class Reference

#include <jsregexp.h>

+ Inheritance diagram for v8::internal::RegExpEngine:
+ Collaboration diagram for v8::internal::RegExpEngine:

Classes

struct  CompilationResult
 

Static Public Member Functions

static CompilationResult Compile (RegExpCompileData *input, bool ignore_case, bool global, bool multiline, bool sticky, Handle< String > pattern, Handle< String > sample_subject, bool is_one_byte, Zone *zone)
 
static void DotPrint (const char *label, RegExpNode *node, bool ignore_case)
 

Detailed Description

Definition at line 1647 of file jsregexp.h.

Member Function Documentation

◆ Compile()

RegExpEngine::CompilationResult v8::internal::RegExpEngine::Compile ( RegExpCompileData input,
bool  ignore_case,
bool  global,
bool  multiline,
bool  sticky,
Handle< String pattern,
Handle< String sample_subject,
bool  is_one_byte,
Zone zone 
)
static

Definition at line 6034 of file jsregexp.cc.

6037  {
6038  if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
6039  return IrregexpRegExpTooBig(zone->isolate());
6040  }
6041  RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte, zone);
6042 
6043  // Sample some characters from the middle of the string.
6044  static const int kSampleSize = 128;
6045 
6046  sample_subject = String::Flatten(sample_subject);
6047  int chars_sampled = 0;
6048  int half_way = (sample_subject->length() - kSampleSize) / 2;
6049  for (int i = Max(0, half_way);
6050  i < sample_subject->length() && chars_sampled < kSampleSize;
6051  i++, chars_sampled++) {
6052  compiler.frequency_collator()->CountCharacter(sample_subject->Get(i));
6053  }
6054 
6055  // Wrap the body of the regexp in capture #0.
6056  RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,
6057  0,
6058  &compiler,
6059  compiler.accept());
6060  RegExpNode* node = captured_body;
6061  bool is_end_anchored = data->tree->IsAnchoredAtEnd();
6062  bool is_start_anchored = data->tree->IsAnchoredAtStart();
6063  int max_length = data->tree->max_match();
6064  if (!is_start_anchored && !is_sticky) {
6065  // Add a .*? at the beginning, outside the body capture, unless
6066  // this expression is anchored at the beginning or sticky.
6067  RegExpNode* loop_node =
6068  RegExpQuantifier::ToNode(0,
6070  false,
6071  new(zone) RegExpCharacterClass('*'),
6072  &compiler,
6073  captured_body,
6074  data->contains_anchor);
6075 
6076  if (data->contains_anchor) {
6077  // Unroll loop once, to take care of the case that might start
6078  // at the start of input.
6079  ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone);
6080  first_step_node->AddAlternative(GuardedAlternative(captured_body));
6081  first_step_node->AddAlternative(GuardedAlternative(
6082  new(zone) TextNode(new(zone) RegExpCharacterClass('*'), loop_node)));
6083  node = first_step_node;
6084  } else {
6085  node = loop_node;
6086  }
6087  }
6088  if (is_one_byte) {
6089  node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
6090  // Do it again to propagate the new nodes to places where they were not
6091  // put because they had not been calculated yet.
6092  if (node != NULL) {
6093  node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
6094  }
6095  }
6096 
6097  if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone);
6098  data->node = node;
6099  Analysis analysis(ignore_case, is_one_byte);
6100  analysis.EnsureAnalyzed(node);
6101  if (analysis.has_failed()) {
6102  const char* error_message = analysis.error_message();
6103  return CompilationResult(zone->isolate(), error_message);
6104  }
6105 
6106  // Create the correct assembler for the architecture.
6107 #ifndef V8_INTERPRETED_REGEXP
6108  // Native regexp implementation.
6109 
6113 
6114 #if V8_TARGET_ARCH_IA32
6115  RegExpMacroAssemblerIA32 macro_assembler(mode, (data->capture_count + 1) * 2,
6116  zone);
6117 #elif V8_TARGET_ARCH_X64
6118  RegExpMacroAssemblerX64 macro_assembler(mode, (data->capture_count + 1) * 2,
6119  zone);
6120 #elif V8_TARGET_ARCH_ARM
6121  RegExpMacroAssemblerARM macro_assembler(mode, (data->capture_count + 1) * 2,
6122  zone);
6123 #elif V8_TARGET_ARCH_ARM64
6124  RegExpMacroAssemblerARM64 macro_assembler(mode, (data->capture_count + 1) * 2,
6125  zone);
6126 #elif V8_TARGET_ARCH_MIPS
6127  RegExpMacroAssemblerMIPS macro_assembler(mode, (data->capture_count + 1) * 2,
6128  zone);
6129 #elif V8_TARGET_ARCH_MIPS64
6130  RegExpMacroAssemblerMIPS macro_assembler(mode, (data->capture_count + 1) * 2,
6131  zone);
6132 #elif V8_TARGET_ARCH_X87
6133  RegExpMacroAssemblerX87 macro_assembler(mode, (data->capture_count + 1) * 2,
6134  zone);
6135 #else
6136 #error "Unsupported architecture"
6137 #endif
6138 
6139 #else // V8_INTERPRETED_REGEXP
6140  // Interpreted regexp implementation.
6141  EmbeddedVector<byte, 1024> codes;
6142  RegExpMacroAssemblerIrregexp macro_assembler(codes, zone);
6143 #endif // V8_INTERPRETED_REGEXP
6144 
6145  // Inserted here, instead of in Assembler, because it depends on information
6146  // in the AST that isn't replicated in the Node structure.
6147  static const int kMaxBacksearchLimit = 1024;
6148  if (is_end_anchored &&
6149  !is_start_anchored &&
6150  max_length < kMaxBacksearchLimit) {
6151  macro_assembler.SetCurrentPositionFromEnd(max_length);
6152  }
6153 
6154  if (is_global) {
6155  macro_assembler.set_global_mode(
6156  (data->tree->min_match() > 0)
6159  }
6160 
6161  return compiler.Assemble(&macro_assembler,
6162  node,
6163  data->capture_count,
6164  pattern);
6165 }
static const int kMaxRecursion
Definition: jsregexp.cc:1020
static const int kInfinity
Definition: ast.h:2597
static Handle< String > Flatten(Handle< String > string, PretenureFlag pretenure=NOT_TENURED)
Definition: objects-inl.h:3354
enable harmony numeric enable harmony object literal extensions Optimize object Array DOM strings and string trace pretenuring decisions of HAllocate instructions Enables optimizations which favor memory size over execution speed maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining trace the tracking of allocation sites deoptimize every n garbage collections perform array bounds checks elimination analyze liveness of environment slots and zap dead values flushes the cache of optimized code for closures on every GC allow uint32 values on optimize frames if they are used only in safe operations track concurrent recompilation artificial compilation delay in ms do not emit check maps for constant values that have a leaf deoptimize the optimized code if the layout of the maps changes enable context specialization in TurboFan execution budget before interrupt is triggered max percentage of megamorphic generic ICs to allow optimization enable use of SAHF instruction if enable use of VFP3 instructions if available enable use of NEON instructions if enable use of SDIV and UDIV instructions if enable use of MLS instructions if enable loading bit constant by means of movw movt instruction enable unaligned accesses for enable use of d16 d31 registers on ARM this requires VFP3 force all emitted branches to be in long mode(MIPS only)") DEFINE_BOOL(enable_always_align_csp
enable harmony numeric enable harmony object literal extensions Optimize object Array DOM strings and string trace pretenuring decisions of HAllocate instructions Enables optimizations which favor memory size over execution speed maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining trace the tracking of allocation sites deoptimize every n garbage collections perform array bounds checks elimination analyze liveness of environment slots and zap dead values flushes the cache of optimized code for closures on every GC allow uint32 values on optimize frames if they are used only in safe operations track concurrent recompilation artificial compilation delay in ms do not emit check maps for constant values that have a leaf deoptimize the optimized code if the layout of the maps changes enable context specialization in TurboFan execution budget before interrupt is triggered max percentage of megamorphic generic ICs to allow optimization enable use of SAHF instruction if enable use of VFP3 instructions if available enable use of NEON instructions if enable use of SDIV and UDIV instructions if enable use of MLS instructions if enable loading bit constant by means of movw movt instruction enable unaligned accesses for enable use of d16 d31 registers on ARM this requires VFP3 force all emitted branches to be in long enable alignment of csp to bytes on platforms which prefer the register to always be NULL
static LifetimePosition Max(LifetimePosition a, LifetimePosition b)
static RegExpEngine::CompilationResult IrregexpRegExpTooBig(Isolate *isolate)
Definition: jsregexp.cc:1066

References v8::internal::RegExpCompiler::accept(), v8::internal::ChoiceNode::AddAlternative(), v8::internal::RegExpCompiler::Assemble(), v8::internal::EndNode::BACKTRACK, v8::internal::RegExpCompileData::capture_count, v8::internal::RegExpCompileData::contains_anchor, v8::internal::FrequencyCollator::CountCharacter(), v8::internal::Analysis::EnsureAnalyzed(), v8::internal::Analysis::error_message(), v8::internal::RegExpNode::FilterOneByte(), v8::internal::String::Flatten(), v8::internal::RegExpCompiler::frequency_collator(), v8::internal::RegExpMacroAssembler::GLOBAL, v8::internal::RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK, v8::internal::Analysis::has_failed(), v8::internal::IrregexpRegExpTooBig(), v8::internal::RegExpTree::IsAnchoredAtEnd(), v8::internal::RegExpTree::IsAnchoredAtStart(), v8::internal::Zone::isolate(), v8::internal::RegExpTree::kInfinity, v8::internal::RegExpCompiler::kMaxRecursion, v8::internal::RegExpMacroAssembler::kMaxRegister, v8::internal::NativeRegExpMacroAssembler::LATIN1, v8::internal::Max(), v8::internal::RegExpTree::max_match(), v8::internal::RegExpTree::min_match(), mode(), v8::internal::RegExpCompileData::node, NULL, v8::internal::RegExpMacroAssembler::set_global_mode(), v8::internal::RegExpMacroAssemblerIA32::SetCurrentPositionFromEnd(), v8::internal::RegExpCompileData::tree, and v8::internal::NativeRegExpMacroAssembler::UC16.

Referenced by v8::internal::RegExpImpl::CompileIrregexp().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ DotPrint()

static void v8::internal::RegExpEngine::DotPrint ( const char *  label,
RegExpNode node,
bool  ignore_case 
)
static

The documentation for this class was generated from the following files: