V8 Project
dateparser.h
Go to the documentation of this file.
1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_DATEPARSER_H_
6 #define V8_DATEPARSER_H_
7 
8 #include "src/allocation.h"
10 
11 namespace v8 {
12 namespace internal {
13 
14 class DateParser : public AllStatic {
15  public:
16  // Parse the string as a date. If parsing succeeds, return true after
17  // filling out the output array as follows (all integers are Smis):
18  // [0]: year
19  // [1]: month (0 = Jan, 1 = Feb, ...)
20  // [2]: day
21  // [3]: hour
22  // [4]: minute
23  // [5]: second
24  // [6]: millisecond
25  // [7]: UTC offset in seconds, or null value if no timezone specified
26  // If parsing fails, return false (content of output array is not defined).
27  template <typename Char>
28  static bool Parse(Vector<Char> str, FixedArray* output, UnicodeCache* cache);
29 
30  enum {
32  };
33 
34  private:
35  // Range testing
36  static inline bool Between(int x, int lo, int hi) {
37  return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo);
38  }
39 
40  // Indicates a missing value.
41  static const int kNone = kMaxInt;
42 
43  // Maximal number of digits used to build the value of a numeral.
44  // Remaining digits are ignored.
45  static const int kMaxSignificantDigits = 9;
46 
47  // InputReader provides basic string parsing and character classification.
48  template <typename Char>
49  class InputReader BASE_EMBEDDED {
50  public:
52  : index_(0),
53  buffer_(s),
54  unicode_cache_(unicode_cache) {
55  Next();
56  }
57 
58  int position() { return index_; }
59 
60  // Advance to the next character of the string.
61  void Next() {
62  ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0;
63  index_++;
64  }
65 
66  // Read a string of digits as an unsigned number. Cap value at
67  // kMaxSignificantDigits, but skip remaining digits if the numeral
68  // is longer.
70  int n = 0;
71  int i = 0;
72  while (IsAsciiDigit()) {
73  if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0';
74  i++;
75  Next();
76  }
77  return n;
78  }
79 
80  // Read a word (sequence of chars. >= 'A'), fill the given buffer with a
81  // lower-case prefix, and pad any remainder of the buffer with zeroes.
82  // Return word length.
83  int ReadWord(uint32_t* prefix, int prefix_size) {
84  int len;
85  for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) {
86  if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_);
87  }
88  for (int i = len; i < prefix_size; i++) prefix[i] = 0;
89  return len;
90  }
91 
92  // The skip methods return whether they actually skipped something.
93  bool Skip(uint32_t c) {
94  if (ch_ == c) {
95  Next();
96  return true;
97  }
98  return false;
99  }
100 
101  bool SkipWhiteSpace() {
102  if (unicode_cache_->IsWhiteSpaceOrLineTerminator(ch_)) {
103  Next();
104  return true;
105  }
106  return false;
107  }
108 
110  if (ch_ != '(') return false;
111  int balance = 0;
112  do {
113  if (ch_ == ')') --balance;
114  else if (ch_ == '(') ++balance;
115  Next();
116  } while (balance > 0 && ch_);
117  return true;
118  }
119 
120  // Character testing/classification. Non-ASCII digits are not supported.
121  bool Is(uint32_t c) const { return ch_ == c; }
122  bool IsEnd() const { return ch_ == 0; }
123  bool IsAsciiDigit() const { return IsDecimalDigit(ch_); }
124  bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; }
125  bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; }
126 
127  // Return 1 for '+' and -1 for '-'.
128  int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); }
129 
130  private:
131  int index_;
135  };
136 
137  enum KeywordType {
139  };
140 
141  struct DateToken {
142  public:
143  bool IsInvalid() { return tag_ == kInvalidTokenTag; }
144  bool IsUnknown() { return tag_ == kUnknownTokenTag; }
145  bool IsNumber() { return tag_ == kNumberTag; }
146  bool IsSymbol() { return tag_ == kSymbolTag; }
147  bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; }
148  bool IsEndOfInput() { return tag_ == kEndOfInputTag; }
149  bool IsKeyword() { return tag_ >= kKeywordTagStart; }
150 
151  int length() { return length_; }
152 
153  int number() {
154  DCHECK(IsNumber());
155  return value_;
156  }
158  DCHECK(IsKeyword());
159  return static_cast<KeywordType>(tag_);
160  }
162  DCHECK(IsKeyword());
163  return value_;
164  }
165  char symbol() {
166  DCHECK(IsSymbol());
167  return static_cast<char>(value_);
168  }
169  bool IsSymbol(char symbol) {
170  return IsSymbol() && this->symbol() == symbol;
171  }
173  return tag_ == tag;
174  }
176  return IsNumber() && length_ == length;
177  }
178  bool IsAsciiSign() {
179  return tag_ == kSymbolTag && (value_ == '-' || value_ == '+');
180  }
181  int ascii_sign() {
182  DCHECK(IsAsciiSign());
183  return 44 - value_;
184  }
185  bool IsKeywordZ() {
186  return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0;
187  }
188  bool IsUnknown(int character) {
189  return IsUnknown() && value_ == character;
190  }
191  // Factory functions.
192  static DateToken Keyword(KeywordType tag, int value, int length) {
193  return DateToken(tag, length, value);
194  }
195  static DateToken Number(int value, int length) {
196  return DateToken(kNumberTag, length, value);
197  }
198  static DateToken Symbol(char symbol) {
199  return DateToken(kSymbolTag, 1, symbol);
200  }
202  return DateToken(kEndOfInputTag, 0, -1);
203  }
205  return DateToken(kWhiteSpaceTag, length, -1);
206  }
207  static DateToken Unknown() {
208  return DateToken(kUnknownTokenTag, 1, -1);
209  }
210  static DateToken Invalid() {
211  return DateToken(kInvalidTokenTag, 0, -1);
212  }
213 
214  private:
215  enum TagType {
222  kKeywordTagStart = 0
223  };
224  DateToken(int tag, int length, int value)
225  : tag_(tag),
226  length_(length),
227  value_(value) { }
228 
229  int tag_;
230  int length_; // Number of characters.
231  int value_;
232  };
233 
234  template <typename Char>
236  public:
237  explicit DateStringTokenizer(InputReader<Char>* in)
238  : in_(in), next_(Scan()) { }
240  DateToken result = next_;
241  next_ = Scan();
242  return result;
243  }
244 
246  return next_;
247  }
248  bool SkipSymbol(char symbol) {
249  if (next_.IsSymbol(symbol)) {
250  next_ = Scan();
251  return true;
252  }
253  return false;
254  }
255 
256  private:
257  DateToken Scan();
258 
259  InputReader<Char>* in_;
261  };
262 
263  static int ReadMilliseconds(DateToken number);
264 
265  // KeywordTable maps names of months, time zones, am/pm to numbers.
266  class KeywordTable : public AllStatic {
267  public:
268  // Look up a word in the keyword table and return an index.
269  // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength
270  // and 'len' is the word length.
271  static int Lookup(const uint32_t* pre, int len);
272  // Get the type of the keyword at index i.
273  static KeywordType GetType(int i) {
274  return static_cast<KeywordType>(array[i][kTypeOffset]);
275  }
276  // Get the value of the keyword at index i.
277  static int GetValue(int i) { return array[i][kValueOffset]; }
278 
279  static const int kPrefixLength = 3;
280  static const int kTypeOffset = kPrefixLength;
281  static const int kValueOffset = kTypeOffset + 1;
282  static const int kEntrySize = kValueOffset + 1;
283  static const int8_t array[][kEntrySize];
284  };
285 
286  class TimeZoneComposer BASE_EMBEDDED {
287  public:
288  TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {}
289  void Set(int offset_in_hours) {
290  sign_ = offset_in_hours < 0 ? -1 : 1;
291  hour_ = offset_in_hours * sign_;
292  minute_ = 0;
293  }
294  void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; }
295  void SetAbsoluteHour(int hour) { hour_ = hour; }
296  void SetAbsoluteMinute(int minute) { minute_ = minute; }
297  bool IsExpecting(int n) const {
298  return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n);
299  }
300  bool IsUTC() const { return hour_ == 0 && minute_ == 0; }
301  bool Write(FixedArray* output);
302  bool IsEmpty() { return hour_ == kNone; }
303  private:
304  int sign_;
305  int hour_;
306  int minute_;
307  };
308 
309  class TimeComposer BASE_EMBEDDED {
310  public:
311  TimeComposer() : index_(0), hour_offset_(kNone) {}
312  bool IsEmpty() const { return index_ == 0; }
313  bool IsExpecting(int n) const {
314  return (index_ == 1 && IsMinute(n)) ||
315  (index_ == 2 && IsSecond(n)) ||
316  (index_ == 3 && IsMillisecond(n));
317  }
318  bool Add(int n) {
319  return index_ < kSize ? (comp_[index_++] = n, true) : false;
320  }
321  bool AddFinal(int n) {
322  if (!Add(n)) return false;
323  while (index_ < kSize) comp_[index_++] = 0;
324  return true;
325  }
326  void SetHourOffset(int n) { hour_offset_ = n; }
327  bool Write(FixedArray* output);
328 
329  static bool IsMinute(int x) { return Between(x, 0, 59); }
330  static bool IsHour(int x) { return Between(x, 0, 23); }
331  static bool IsSecond(int x) { return Between(x, 0, 59); }
332 
333  private:
334  static bool IsHour12(int x) { return Between(x, 0, 12); }
335  static bool IsMillisecond(int x) { return Between(x, 0, 999); }
336 
337  static const int kSize = 4;
338  int comp_[kSize];
339  int index_;
341  };
342 
343  class DayComposer BASE_EMBEDDED {
344  public:
345  DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {}
346  bool IsEmpty() const { return index_ == 0; }
347  bool Add(int n) {
348  if (index_ < kSize) {
349  comp_[index_] = n;
350  index_++;
351  return true;
352  }
353  return false;
354  }
355  void SetNamedMonth(int n) { named_month_ = n; }
356  bool Write(FixedArray* output);
357  void set_iso_date() { is_iso_date_ = true; }
358  static bool IsMonth(int x) { return Between(x, 1, 12); }
359  static bool IsDay(int x) { return Between(x, 1, 31); }
360 
361  private:
362  static const int kSize = 3;
363  int comp_[kSize];
364  int index_;
366  // If set, ensures that data is always parsed in year-month-date order.
368  };
369 
370  // Tries to parse an ES5 Date Time String. Returns the next token
371  // to continue with in the legacy date string parser. If parsing is
372  // complete, returns DateToken::EndOfInput(). If terminally unsuccessful,
373  // returns DateToken::Invalid(). Otherwise parsing continues in the
374  // legacy parser.
375  template <typename Char>
377  DateStringTokenizer<Char>* scanner,
378  DayComposer* day,
379  TimeComposer* time,
380  TimeZoneComposer* tz);
381 };
382 
383 
384 } } // namespace v8::internal
385 
386 #endif // V8_DATEPARSER_H_
void Set(int offset_in_hours)
Definition: dateparser.h:289
int ReadWord(uint32_t *prefix, int prefix_size)
Definition: dateparser.h:83
InputReader(UnicodeCache *unicode_cache, Vector< Char > s)
Definition: dateparser.h:51
DateStringTokenizer(InputReader< Char > *in)
Definition: dateparser.h:237
static int Lookup(const uint32_t *pre, int len)
Definition: dateparser.cc:140
static KeywordType GetType(int i)
Definition: dateparser.h:273
static const int8_t array[][kEntrySize]
Definition: dateparser.h:283
static bool Parse(Vector< Char > str, FixedArray *output, UnicodeCache *cache)
static const int kMaxSignificantDigits
Definition: dateparser.h:45
static DateParser::DateToken ParseES5DateTime(DateStringTokenizer< Char > *scanner, DayComposer *day, TimeComposer *time, TimeZoneComposer *tz)
static const int kNone
Definition: dateparser.h:41
static bool Between(int x, int lo, int hi)
Definition: dateparser.h:36
static int ReadMilliseconds(DateToken number)
Definition: dateparser.cc:159
#define DCHECK(condition)
Definition: logging.h:205
bool IsDecimalDigit(uc32 c)
const int kMaxInt
Definition: globals.h:109
int AsciiAlphaToLower(uc32 c)
Debugger support for the V8 JavaScript engine.
Definition: accessors.cc:20
bool IsFixedLengthNumber(int length)
Definition: dateparser.h:175
static DateToken Symbol(char symbol)
Definition: dateparser.h:198
static DateToken Number(int value, int length)
Definition: dateparser.h:195
bool IsKeywordType(KeywordType tag)
Definition: dateparser.h:172
static DateToken Keyword(KeywordType tag, int value, int length)
Definition: dateparser.h:192
DateToken(int tag, int length, int value)
Definition: dateparser.h:224
static DateToken WhiteSpace(int length)
Definition: dateparser.h:204