V8 Project
unibrow::Utf8DecoderBase Class Reference

#include <unicode.h>

+ Inheritance diagram for unibrow::Utf8DecoderBase:
+ Collaboration diagram for unibrow::Utf8DecoderBase:

Public Member Functions

 Utf8DecoderBase ()
 
 Utf8DecoderBase (uint16_t *buffer, unsigned buffer_length, const uint8_t *stream, unsigned stream_length)
 
unsigned Utf16Length () const
 

Protected Member Functions

void Reset (uint16_t *buffer, unsigned buffer_length, const uint8_t *stream, unsigned stream_length)
 

Static Protected Member Functions

static void WriteUtf16Slow (const uint8_t *stream, uint16_t *data, unsigned length)
 

Protected Attributes

const uint8_t * unbuffered_start_
 
unsigned utf16_length_
 
bool last_byte_of_buffer_unused_
 

Private Member Functions

 DISALLOW_COPY_AND_ASSIGN (Utf8DecoderBase)
 

Detailed Description

Definition at line 159 of file unicode.h.

Constructor & Destructor Documentation

◆ Utf8DecoderBase() [1/2]

unibrow::Utf8DecoderBase::Utf8DecoderBase ( )
inline

Definition at line 156 of file unicode-inl.h.

158  utf16_length_(0),
bool last_byte_of_buffer_unused_
Definition: unicode.h:180
const uint8_t * unbuffered_start_
Definition: unicode.h:178
enable harmony numeric enable harmony object literal extensions Optimize object Array DOM strings and string trace pretenuring decisions of HAllocate instructions Enables optimizations which favor memory size over execution speed maximum source size in bytes considered for a single inlining maximum cumulative number of AST nodes considered for inlining trace the tracking of allocation sites deoptimize every n garbage collections perform array bounds checks elimination analyze liveness of environment slots and zap dead values flushes the cache of optimized code for closures on every GC allow uint32 values on optimize frames if they are used only in safe operations track concurrent recompilation artificial compilation delay in ms do not emit check maps for constant values that have a leaf deoptimize the optimized code if the layout of the maps changes enable context specialization in TurboFan execution budget before interrupt is triggered max percentage of megamorphic generic ICs to allow optimization enable use of SAHF instruction if enable use of VFP3 instructions if available enable use of NEON instructions if enable use of SDIV and UDIV instructions if enable use of MLS instructions if enable loading bit constant by means of movw movt instruction enable unaligned accesses for enable use of d16 d31 registers on ARM this requires VFP3 force all emitted branches to be in long enable alignment of csp to bytes on platforms which prefer the register to always be NULL

◆ Utf8DecoderBase() [2/2]

unibrow::Utf8DecoderBase::Utf8DecoderBase ( uint16_t buffer,
unsigned  buffer_length,
const uint8_t *  stream,
unsigned  stream_length 
)
inline

Definition at line 161 of file unicode-inl.h.

164  {
165  Reset(buffer, buffer_length, stream, stream_length);
166 }
void Reset(uint16_t *buffer, unsigned buffer_length, const uint8_t *stream, unsigned stream_length)
Definition: unicode.cc:261

References Reset().

+ Here is the call graph for this function:

Member Function Documentation

◆ DISALLOW_COPY_AND_ASSIGN()

unibrow::Utf8DecoderBase::DISALLOW_COPY_AND_ASSIGN ( Utf8DecoderBase  )
private

◆ Reset()

void unibrow::Utf8DecoderBase::Reset ( uint16_t buffer,
unsigned  buffer_length,
const uint8_t *  stream,
unsigned  stream_length 
)
protected

Definition at line 261 of file unicode.cc.

264  {
265  // Assume everything will fit in the buffer and stream won't be needed.
268  bool writing_to_buffer = true;
269  // Loop until stream is read, writing to buffer as long as buffer has space.
270  unsigned utf16_length = 0;
271  while (stream_length != 0) {
272  unsigned cursor = 0;
273  uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor);
274  DCHECK(cursor > 0 && cursor <= stream_length);
275  stream += cursor;
276  stream_length -= cursor;
277  bool is_two_characters = character > Utf16::kMaxNonSurrogateCharCode;
278  utf16_length += is_two_characters ? 2 : 1;
279  // Don't need to write to the buffer, but still need utf16_length.
280  if (!writing_to_buffer) continue;
281  // Write out the characters to the buffer.
282  // Must check for equality with buffer_length as we've already updated it.
283  if (utf16_length <= buffer_length) {
284  if (is_two_characters) {
285  *buffer++ = Utf16::LeadSurrogate(character);
286  *buffer++ = Utf16::TrailSurrogate(character);
287  } else {
288  *buffer++ = character;
289  }
290  if (utf16_length == buffer_length) {
291  // Just wrote last character of buffer
292  writing_to_buffer = false;
293  unbuffered_start_ = stream;
294  }
295  continue;
296  }
297  // Have gone over buffer.
298  // Last char of buffer is unused, set cursor back.
299  DCHECK(is_two_characters);
300  writing_to_buffer = false;
302  unbuffered_start_ = stream - cursor;
303  }
304  utf16_length_ = utf16_length;
305 }
static uint16_t LeadSurrogate(uint32_t char_code)
Definition: unicode.h:108
static const uchar kMaxNonSurrogateCharCode
Definition: unicode.h:98
static uint16_t TrailSurrogate(uint32_t char_code)
Definition: unicode.h:111
static uchar ValueOf(const byte *str, unsigned length, unsigned *cursor)
Definition: unicode-inl.h:129
#define DCHECK(condition)
Definition: logging.h:205

References DCHECK, unibrow::Utf16::kMaxNonSurrogateCharCode, last_byte_of_buffer_unused_, unibrow::Utf16::LeadSurrogate(), NULL, unibrow::Utf16::TrailSurrogate(), unbuffered_start_, utf16_length_, and unibrow::Utf8::ValueOf().

Referenced by unibrow::Utf8Decoder< kBufferSize >::Reset(), and Utf8DecoderBase().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ Utf16Length()

unsigned unibrow::Utf8DecoderBase::Utf16Length ( ) const
inline

Definition at line 167 of file unicode.h.

167 { return utf16_length_; }

References utf16_length_.

◆ WriteUtf16Slow()

void unibrow::Utf8DecoderBase::WriteUtf16Slow ( const uint8_t *  stream,
uint16_t data,
unsigned  length 
)
staticprotected

Definition at line 308 of file unicode.cc.

310  {
311  while (data_length != 0) {
312  unsigned cursor = 0;
313  uint32_t character = Utf8::ValueOf(stream, Utf8::kMaxEncodedSize, &cursor);
314  // There's a total lack of bounds checking for stream
315  // as it was already done in Reset.
316  stream += cursor;
318  *data++ = Utf16::LeadSurrogate(character);
319  *data++ = Utf16::TrailSurrogate(character);
320  DCHECK(data_length > 1);
321  data_length -= 2;
322  } else {
323  *data++ = character;
324  data_length -= 1;
325  }
326  }
327 }
static const unsigned kMaxEncodedSize
Definition: unicode.h:140

References DCHECK, unibrow::Utf8::kMaxEncodedSize, unibrow::Utf16::kMaxNonSurrogateCharCode, unibrow::Utf16::LeadSurrogate(), unibrow::Utf16::TrailSurrogate(), and unibrow::Utf8::ValueOf().

+ Here is the call graph for this function:

Member Data Documentation

◆ last_byte_of_buffer_unused_

bool unibrow::Utf8DecoderBase::last_byte_of_buffer_unused_
protected

Definition at line 180 of file unicode.h.

Referenced by Reset().

◆ unbuffered_start_

const uint8_t* unibrow::Utf8DecoderBase::unbuffered_start_
protected

Definition at line 178 of file unicode.h.

Referenced by Reset().

◆ utf16_length_

unsigned unibrow::Utf8DecoderBase::utf16_length_
protected

Definition at line 179 of file unicode.h.

Referenced by Reset(), and Utf16Length().


The documentation for this class was generated from the following files: