| |
| #ifndef DATE_TIME_FORMAT_DATE_PARSER_HPP__ |
| #define DATE_TIME_FORMAT_DATE_PARSER_HPP__ |
| |
| /* Copyright (c) 2004-2005 CrystalClear Software, Inc. |
| * Use, modification and distribution is subject to the |
| * Boost Software License, Version 1.0. (See accompanying |
| * file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) |
| * Author: Jeff Garland, Bart Garst |
| * $Date: 2009-06-04 04:24:49 -0400 (Thu, 04 Jun 2009) $ |
| */ |
| |
| |
| #include "boost/lexical_cast.hpp" |
| #include "boost/date_time/string_parse_tree.hpp" |
| #include "boost/date_time/strings_from_facet.hpp" |
| #include "boost/date_time/special_values_parser.hpp" |
| #include <string> |
| #include <vector> |
| #include <sstream> |
| #include <iterator> |
| #ifndef BOOST_NO_STDC_NAMESPACE |
| # include <cctype> |
| #else |
| # include <ctype.h> |
| #endif |
| |
| #ifdef BOOST_NO_STDC_NAMESPACE |
| namespace std { |
| using ::isspace; |
| using ::isdigit; |
| } |
| #endif |
| namespace boost { namespace date_time { |
| |
| //! Helper function for parsing fixed length strings into integers |
| /*! Will consume 'length' number of characters from stream. Consumed |
| * character are transfered to parse_match_result struct. |
| * Returns '-1' if no number can be parsed or incorrect number of |
| * digits in stream. */ |
| template<typename int_type, typename charT> |
| inline |
| int_type |
| fixed_string_to_int(std::istreambuf_iterator<charT>& itr, |
| std::istreambuf_iterator<charT>& stream_end, |
| parse_match_result<charT>& mr, |
| unsigned int length, |
| const charT& fill_char) |
| { |
| //typedef std::basic_string<charT> string_type; |
| unsigned int j = 0; |
| //string_type s; |
| while (j < length && itr != stream_end && |
| (std::isdigit(*itr) || *itr == fill_char)) { |
| if(*itr == fill_char) { |
| /* Since a fill_char can be anything, we convert it to a zero. |
| * lexical_cast will behave predictably when zero is used as fill. */ |
| mr.cache += ('0'); |
| } |
| else { |
| mr.cache += (*itr); |
| } |
| itr++; |
| j++; |
| } |
| int_type i = -1; |
| // mr.cache will hold leading zeros. size() tells us when input is too short. |
| if(mr.cache.size() < length) { |
| return i; |
| } |
| try { |
| i = boost::lexical_cast<int_type>(mr.cache); |
| }catch(bad_lexical_cast&){ |
| // we want to return -1 if the cast fails so nothing to do here |
| } |
| return i; |
| } |
| |
| //! Helper function for parsing fixed length strings into integers |
| /*! Will consume 'length' number of characters from stream. Consumed |
| * character are transfered to parse_match_result struct. |
| * Returns '-1' if no number can be parsed or incorrect number of |
| * digits in stream. */ |
| template<typename int_type, typename charT> |
| inline |
| int_type |
| fixed_string_to_int(std::istreambuf_iterator<charT>& itr, |
| std::istreambuf_iterator<charT>& stream_end, |
| parse_match_result<charT>& mr, |
| unsigned int length) |
| { |
| return fixed_string_to_int<int_type, charT>(itr, stream_end, mr, length, '0'); |
| } |
| |
| //! Helper function for parsing varied length strings into integers |
| /*! Will consume 'max_length' characters from stream only if those |
| * characters are digits. Returns '-1' if no number can be parsed. |
| * Will not parse a number preceeded by a '+' or '-'. */ |
| template<typename int_type, typename charT> |
| inline |
| int_type |
| var_string_to_int(std::istreambuf_iterator<charT>& itr, |
| const std::istreambuf_iterator<charT>& stream_end, |
| unsigned int max_length) |
| { |
| typedef std::basic_string<charT> string_type; |
| unsigned int j = 0; |
| string_type s; |
| while (itr != stream_end && (j < max_length) && std::isdigit(*itr)) { |
| s += (*itr); |
| ++itr; |
| ++j; |
| } |
| int_type i = -1; |
| if(!s.empty()) { |
| i = boost::lexical_cast<int_type>(s); |
| } |
| return i; |
| } |
| |
| |
| //! Class with generic date parsing using a format string |
| /*! The following is the set of recognized format specifiers |
| - %a - Short weekday name |
| - %A - Long weekday name |
| - %b - Abbreviated month name |
| - %B - Full month name |
| - %d - Day of the month as decimal 01 to 31 |
| - %j - Day of year as decimal from 001 to 366 |
| - %m - Month name as a decimal 01 to 12 |
| - %U - Week number 00 to 53 with first Sunday as the first day of week 1? |
| - %w - Weekday as decimal number 0 to 6 where Sunday == 0 |
| - %W - Week number 00 to 53 where Monday is first day of week 1 |
| - %x - facet default date representation |
| - %y - Year without the century - eg: 04 for 2004 |
| - %Y - Year with century |
| |
| The weekday specifiers (%a and %A) do not add to the date construction, |
| but they provide a way to skip over the weekday names for formats that |
| provide them. |
| |
| todo -- Another interesting feature that this approach could provide is |
| an option to fill in any missing fields with the current values |
| from the clock. So if you have %m-%d the parser would detect |
| the missing year value and fill it in using the clock. |
| |
| todo -- What to do with the %x. %x in the classic facet is just bad... |
| |
| */ |
| template<class date_type, typename charT> |
| class format_date_parser |
| { |
| public: |
| typedef std::basic_string<charT> string_type; |
| typedef std::basic_istringstream<charT> stringstream_type; |
| typedef std::istreambuf_iterator<charT> stream_itr_type; |
| typedef typename string_type::const_iterator const_itr; |
| typedef typename date_type::year_type year_type; |
| typedef typename date_type::month_type month_type; |
| typedef typename date_type::day_type day_type; |
| typedef typename date_type::duration_type duration_type; |
| typedef typename date_type::day_of_week_type day_of_week_type; |
| typedef typename date_type::day_of_year_type day_of_year_type; |
| typedef string_parse_tree<charT> parse_tree_type; |
| typedef typename parse_tree_type::parse_match_result_type match_results; |
| typedef std::vector<std::basic_string<charT> > input_collection_type; |
| |
| // TODO sv_parser uses its default constructor - write the others |
| |
| format_date_parser(const string_type& format_str, |
| const input_collection_type& month_short_names, |
| const input_collection_type& month_long_names, |
| const input_collection_type& weekday_short_names, |
| const input_collection_type& weekday_long_names) : |
| m_format(format_str), |
| m_month_short_names(month_short_names, 1), |
| m_month_long_names(month_long_names, 1), |
| m_weekday_short_names(weekday_short_names), |
| m_weekday_long_names(weekday_long_names) |
| {} |
| |
| format_date_parser(const string_type& format_str, |
| const std::locale& locale) : |
| m_format(format_str), |
| m_month_short_names(gather_month_strings<charT>(locale), 1), |
| m_month_long_names(gather_month_strings<charT>(locale, false), 1), |
| m_weekday_short_names(gather_weekday_strings<charT>(locale)), |
| m_weekday_long_names(gather_weekday_strings<charT>(locale, false)) |
| {} |
| |
| format_date_parser(const format_date_parser<date_type,charT>& fdp) |
| { |
| this->m_format = fdp.m_format; |
| this->m_month_short_names = fdp.m_month_short_names; |
| this->m_month_long_names = fdp.m_month_long_names; |
| this->m_weekday_short_names = fdp.m_weekday_short_names; |
| this->m_weekday_long_names = fdp.m_weekday_long_names; |
| } |
| |
| string_type format() const |
| { |
| return m_format; |
| } |
| |
| void format(string_type format_str) |
| { |
| m_format = format_str; |
| } |
| |
| void short_month_names(const input_collection_type& month_names) |
| { |
| m_month_short_names = parse_tree_type(month_names, 1); |
| } |
| void long_month_names(const input_collection_type& month_names) |
| { |
| m_month_long_names = parse_tree_type(month_names, 1); |
| } |
| void short_weekday_names(const input_collection_type& weekday_names) |
| { |
| m_weekday_short_names = parse_tree_type(weekday_names); |
| } |
| void long_weekday_names(const input_collection_type& weekday_names) |
| { |
| m_weekday_long_names = parse_tree_type(weekday_names); |
| } |
| |
| date_type |
| parse_date(const string_type& value, |
| const string_type& format_str, |
| const special_values_parser<date_type,charT>& sv_parser) const |
| { |
| stringstream_type ss(value); |
| stream_itr_type sitr(ss); |
| stream_itr_type stream_end; |
| return parse_date(sitr, stream_end, format_str, sv_parser); |
| } |
| |
| date_type |
| parse_date(std::istreambuf_iterator<charT>& sitr, |
| std::istreambuf_iterator<charT>& stream_end, |
| const special_values_parser<date_type,charT>& sv_parser) const |
| { |
| return parse_date(sitr, stream_end, m_format, sv_parser); |
| } |
| |
| /*! Of all the objects that the format_date_parser can parse, only a |
| * date can be a special value. Therefore, only parse_date checks |
| * for special_values. */ |
| date_type |
| parse_date(std::istreambuf_iterator<charT>& sitr, |
| std::istreambuf_iterator<charT>& stream_end, |
| string_type format_str, |
| const special_values_parser<date_type,charT>& sv_parser) const |
| { |
| bool use_current_char = false; |
| |
| // skip leading whitespace |
| while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; } |
| charT current_char = *sitr; |
| |
| short year(0), month(0), day(0), day_of_year(0);// wkday(0); |
| /* Initialized the following to their minimum values. These intermediate |
| * objects are used so we get specific exceptions when part of the input |
| * is unparsable. |
| * Ex: "205-Jan-15" will throw a bad_year, "2005-Jsn-15"- bad_month, etc.*/ |
| year_type t_year(1400); |
| month_type t_month(1); |
| day_type t_day(1); |
| day_of_week_type wkday(0); |
| |
| |
| const_itr itr(format_str.begin()); |
| while (itr != format_str.end() && (sitr != stream_end)) { |
| if (*itr == '%') { |
| itr++; |
| if (*itr != '%') { |
| switch(*itr) { |
| case 'a': |
| { |
| //this value is just throw away. It could be used for |
| //error checking potentially, but it isn't helpful in |
| //actually constructing the date - we just need to get it |
| //out of the stream |
| match_results mr = m_weekday_short_names.match(sitr, stream_end); |
| if(mr.current_match == match_results::PARSE_ERROR) { |
| // check special_values |
| if(sv_parser.match(sitr, stream_end, mr)) { |
| return date_type(static_cast<special_values>(mr.current_match)); |
| } |
| } |
| wkday = mr.current_match; |
| if (mr.has_remaining()) { |
| current_char = mr.last_char(); |
| use_current_char = true; |
| } |
| break; |
| } |
| case 'A': |
| { |
| //this value is just throw away. It could be used for |
| //error checking potentially, but it isn't helpful in |
| //actually constructing the date - we just need to get it |
| //out of the stream |
| match_results mr = m_weekday_long_names.match(sitr, stream_end); |
| if(mr.current_match == match_results::PARSE_ERROR) { |
| // check special_values |
| if(sv_parser.match(sitr, stream_end, mr)) { |
| return date_type(static_cast<special_values>(mr.current_match)); |
| } |
| } |
| wkday = mr.current_match; |
| if (mr.has_remaining()) { |
| current_char = mr.last_char(); |
| use_current_char = true; |
| } |
| break; |
| } |
| case 'b': |
| { |
| match_results mr = m_month_short_names.match(sitr, stream_end); |
| if(mr.current_match == match_results::PARSE_ERROR) { |
| // check special_values |
| if(sv_parser.match(sitr, stream_end, mr)) { |
| return date_type(static_cast<special_values>(mr.current_match)); |
| } |
| } |
| t_month = month_type(mr.current_match); |
| if (mr.has_remaining()) { |
| current_char = mr.last_char(); |
| use_current_char = true; |
| } |
| break; |
| } |
| case 'B': |
| { |
| match_results mr = m_month_long_names.match(sitr, stream_end); |
| if(mr.current_match == match_results::PARSE_ERROR) { |
| // check special_values |
| if(sv_parser.match(sitr, stream_end, mr)) { |
| return date_type(static_cast<special_values>(mr.current_match)); |
| } |
| } |
| t_month = month_type(mr.current_match); |
| if (mr.has_remaining()) { |
| current_char = mr.last_char(); |
| use_current_char = true; |
| } |
| break; |
| } |
| case 'd': |
| { |
| match_results mr; |
| day = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2); |
| if(day == -1) { |
| if(sv_parser.match(sitr, stream_end, mr)) { |
| return date_type(static_cast<special_values>(mr.current_match)); |
| } |
| } |
| t_day = day_type(day); |
| break; |
| } |
| case 'e': |
| { |
| match_results mr; |
| day = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2, ' '); |
| if(day == -1) { |
| if(sv_parser.match(sitr, stream_end, mr)) { |
| return date_type(static_cast<special_values>(mr.current_match)); |
| } |
| } |
| t_day = day_type(day); |
| break; |
| } |
| case 'j': |
| { |
| match_results mr; |
| day_of_year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 3); |
| if(day_of_year == -1) { |
| if(sv_parser.match(sitr, stream_end, mr)) { |
| return date_type(static_cast<special_values>(mr.current_match)); |
| } |
| } |
| // these next two lines are so we get an exception with bad input |
| day_of_year_type t_day_of_year(1); |
| t_day_of_year = day_of_year_type(day_of_year); |
| break; |
| } |
| case 'm': |
| { |
| match_results mr; |
| month = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2); |
| if(month == -1) { |
| if(sv_parser.match(sitr, stream_end, mr)) { |
| return date_type(static_cast<special_values>(mr.current_match)); |
| } |
| } |
| t_month = month_type(month); |
| break; |
| } |
| case 'Y': |
| { |
| match_results mr; |
| year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 4); |
| if(year == -1) { |
| if(sv_parser.match(sitr, stream_end, mr)) { |
| return date_type(static_cast<special_values>(mr.current_match)); |
| } |
| } |
| t_year = year_type(year); |
| break; |
| } |
| case 'y': |
| { |
| match_results mr; |
| year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2); |
| if(year == -1) { |
| if(sv_parser.match(sitr, stream_end, mr)) { |
| return date_type(static_cast<special_values>(mr.current_match)); |
| } |
| } |
| year += 2000; //make 2 digit years in this century |
| t_year = year_type(year); |
| break; |
| } |
| default: |
| {} //ignore those we don't understand |
| |
| }//switch |
| |
| } |
| else { // itr == '%', second consecutive |
| sitr++; |
| } |
| |
| itr++; //advance past format specifier |
| } |
| else { //skip past chars in format and in buffer |
| itr++; |
| if (use_current_char) { |
| use_current_char = false; |
| current_char = *sitr; |
| } |
| else { |
| sitr++; |
| } |
| } |
| } |
| |
| if (day_of_year > 0) { |
| date_type d(static_cast<unsigned short>(year-1),12,31); //end of prior year |
| return d + duration_type(day_of_year); |
| } |
| |
| return date_type(t_year, t_month, t_day); // exceptions were thrown earlier |
| // if input was no good |
| } |
| |
| //! Throws bad_month if unable to parse |
| month_type |
| parse_month(std::istreambuf_iterator<charT>& sitr, |
| std::istreambuf_iterator<charT>& stream_end, |
| string_type format_str) const |
| { |
| match_results mr; |
| return parse_month(sitr, stream_end, format_str, mr); |
| } |
| |
| //! Throws bad_month if unable to parse |
| month_type |
| parse_month(std::istreambuf_iterator<charT>& sitr, |
| std::istreambuf_iterator<charT>& stream_end, |
| string_type format_str, |
| match_results& mr) const |
| { |
| bool use_current_char = false; |
| |
| // skip leading whitespace |
| while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; } |
| charT current_char = *sitr; |
| |
| short month(0); |
| |
| const_itr itr(format_str.begin()); |
| while (itr != format_str.end() && (sitr != stream_end)) { |
| if (*itr == '%') { |
| itr++; |
| if (*itr != '%') { |
| switch(*itr) { |
| case 'b': |
| { |
| mr = m_month_short_names.match(sitr, stream_end); |
| month = mr.current_match; |
| if (mr.has_remaining()) { |
| current_char = mr.last_char(); |
| use_current_char = true; |
| } |
| break; |
| } |
| case 'B': |
| { |
| mr = m_month_long_names.match(sitr, stream_end); |
| month = mr.current_match; |
| if (mr.has_remaining()) { |
| current_char = mr.last_char(); |
| use_current_char = true; |
| } |
| break; |
| } |
| case 'm': |
| { |
| month = var_string_to_int<short, charT>(sitr, stream_end, 2); |
| // var_string_to_int returns -1 if parse failed. That will |
| // cause a bad_month exception to be thrown so we do nothing here |
| break; |
| } |
| default: |
| {} //ignore those we don't understand |
| |
| }//switch |
| |
| } |
| else { // itr == '%', second consecutive |
| sitr++; |
| } |
| |
| itr++; //advance past format specifier |
| } |
| else { //skip past chars in format and in buffer |
| itr++; |
| if (use_current_char) { |
| use_current_char = false; |
| current_char = *sitr; |
| } |
| else { |
| sitr++; |
| } |
| } |
| } |
| |
| return month_type(month); // throws bad_month exception when values are zero |
| } |
| |
| //! Expects 1 or 2 digits 1-31. Throws bad_day_of_month if unable to parse |
| day_type |
| parse_var_day_of_month(std::istreambuf_iterator<charT>& sitr, |
| std::istreambuf_iterator<charT>& stream_end) const |
| { |
| // skip leading whitespace |
| while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; } |
| |
| return day_type(var_string_to_int<short, charT>(sitr, stream_end, 2)); |
| } |
| //! Expects 2 digits 01-31. Throws bad_day_of_month if unable to parse |
| day_type |
| parse_day_of_month(std::istreambuf_iterator<charT>& sitr, |
| std::istreambuf_iterator<charT>& stream_end) const |
| { |
| // skip leading whitespace |
| while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; } |
| |
| //return day_type(var_string_to_int<short, charT>(sitr, stream_end, 2)); |
| match_results mr; |
| return day_type(fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2)); |
| } |
| |
| day_of_week_type |
| parse_weekday(std::istreambuf_iterator<charT>& sitr, |
| std::istreambuf_iterator<charT>& stream_end, |
| string_type format_str) const |
| { |
| match_results mr; |
| return parse_weekday(sitr, stream_end, format_str, mr); |
| } |
| day_of_week_type |
| parse_weekday(std::istreambuf_iterator<charT>& sitr, |
| std::istreambuf_iterator<charT>& stream_end, |
| string_type format_str, |
| match_results& mr) const |
| { |
| bool use_current_char = false; |
| |
| // skip leading whitespace |
| while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; } |
| charT current_char = *sitr; |
| |
| short wkday(0); |
| |
| const_itr itr(format_str.begin()); |
| while (itr != format_str.end() && (sitr != stream_end)) { |
| if (*itr == '%') { |
| itr++; |
| if (*itr != '%') { |
| switch(*itr) { |
| case 'a': |
| { |
| //this value is just throw away. It could be used for |
| //error checking potentially, but it isn't helpful in |
| //actually constructing the date - we just need to get it |
| //out of the stream |
| mr = m_weekday_short_names.match(sitr, stream_end); |
| wkday = mr.current_match; |
| if (mr.has_remaining()) { |
| current_char = mr.last_char(); |
| use_current_char = true; |
| } |
| break; |
| } |
| case 'A': |
| { |
| //this value is just throw away. It could be used for |
| //error checking potentially, but it isn't helpful in |
| //actually constructing the date - we just need to get it |
| //out of the stream |
| mr = m_weekday_long_names.match(sitr, stream_end); |
| wkday = mr.current_match; |
| if (mr.has_remaining()) { |
| current_char = mr.last_char(); |
| use_current_char = true; |
| } |
| break; |
| } |
| case 'w': |
| { |
| // weekday as number 0-6, Sunday == 0 |
| wkday = var_string_to_int<short, charT>(sitr, stream_end, 2); |
| break; |
| } |
| default: |
| {} //ignore those we don't understand |
| |
| }//switch |
| |
| } |
| else { // itr == '%', second consecutive |
| sitr++; |
| } |
| |
| itr++; //advance past format specifier |
| } |
| else { //skip past chars in format and in buffer |
| itr++; |
| if (use_current_char) { |
| use_current_char = false; |
| current_char = *sitr; |
| } |
| else { |
| sitr++; |
| } |
| } |
| } |
| |
| return day_of_week_type(wkday); // throws bad_day_of_month exception |
| // when values are zero |
| } |
| |
| //! throws bad_year if unable to parse |
| year_type |
| parse_year(std::istreambuf_iterator<charT>& sitr, |
| std::istreambuf_iterator<charT>& stream_end, |
| string_type format_str) const |
| { |
| match_results mr; |
| return parse_year(sitr, stream_end, format_str, mr); |
| } |
| |
| //! throws bad_year if unable to parse |
| year_type |
| parse_year(std::istreambuf_iterator<charT>& sitr, |
| std::istreambuf_iterator<charT>& stream_end, |
| string_type format_str, |
| match_results& mr) const |
| { |
| bool use_current_char = false; |
| |
| // skip leading whitespace |
| while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; } |
| charT current_char = *sitr; |
| |
| unsigned short year(0); |
| |
| const_itr itr(format_str.begin()); |
| while (itr != format_str.end() && (sitr != stream_end)) { |
| if (*itr == '%') { |
| itr++; |
| if (*itr != '%') { |
| //match_results mr; |
| switch(*itr) { |
| case 'Y': |
| { |
| // year from 4 digit string |
| year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 4); |
| break; |
| } |
| case 'y': |
| { |
| // year from 2 digit string (no century) |
| year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2); |
| year += 2000; //make 2 digit years in this century |
| break; |
| } |
| default: |
| {} //ignore those we don't understand |
| |
| }//switch |
| |
| } |
| else { // itr == '%', second consecutive |
| sitr++; |
| } |
| |
| itr++; //advance past format specifier |
| } |
| else { //skip past chars in format and in buffer |
| itr++; |
| if (use_current_char) { |
| use_current_char = false; |
| current_char = *sitr; |
| } |
| else { |
| sitr++; |
| } |
| } |
| } |
| |
| return year_type(year); // throws bad_year exception when values are zero |
| } |
| |
| |
| private: |
| string_type m_format; |
| parse_tree_type m_month_short_names; |
| parse_tree_type m_month_long_names; |
| parse_tree_type m_weekday_short_names; |
| parse_tree_type m_weekday_long_names; |
| |
| }; |
| |
| } } //namespace |
| |
| #endif |
| |
| |
| |