Program Listing for File data_types.h

Return to documentation for file (include/shad/extensions/data_types/data_types.h)

//===------------------------------------------------------------*- C++ -*-===//
//
//                                     SHAD
//
//      The Scalable High-performance Algorithms and Data Structure Library
//
//===----------------------------------------------------------------------===//
//
// Copyright 2018 Battelle Memorial Institute
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
//
//===----------------------------------------------------------------------===//

#ifndef INCLUDE_SHAD_EXTENSIONS_DATA_TYPES_DATA_TYPES_H_
#define INCLUDE_SHAD_EXTENSIONS_DATA_TYPES_DATA_TYPES_H_

#include <ctime>

#include <algorithm>
#include <iomanip>
#include <iostream>
#include <string>
#include <type_traits>
#include <utility>
#include <vector>


namespace shad {

namespace data_types {

  enum data_t {
    STRING = 0,  // string support is currenlty limited
    CHARS,       // sequence of characters
    UINT,        // unsigned, binds by default to uint64_t
    INT,         // int, binds by default to int64_t
    FLOAT,       // float, binds by default to float
    DOUBLE,      // double, binds by default to double
    BOOL,        // bool, binds by default to bool
    DATE,        // date in "%y-%m-%d" format, binds by default to time_t
    USDATE,      // date in "%m/%d/%y" format, binds by default to time_t
    DATE_TIME,   // date in "%y-%m-%dT%H:%M:%S" format,
                 // binds by default to time_t
    IP_ADDRESS,  // IPv4, binds by default to data_types::ipv4_t
    LIST_UINT,   // Sequence of unsigneds, support currently limited
    LIST_INT,    // Sequence of integers, support currently limited
    LIST_DOUBLE, // Sequence of doubles, support currently limited
    NONE
  };

  using schema_t = std::vector<std::pair<std::string, data_t>>;

  template <typename ENC_t>
  constexpr ENC_t kNullValue = ENC_t();

  template <>
  constexpr uint64_t kNullValue<uint64_t> = std::numeric_limits<int64_t>::max();

  template <typename ENC_t, typename IN_t, data_t DT>
  ENC_t encode(IN_t &in);

  template <typename ENC_t, typename IN_t>
  ENC_t encode(IN_t &in);

  template <typename ENC_t, typename IN_t>
  ENC_t encode(IN_t &in, data_t dt);

  template <typename ENC_t, size_t MAX_s, data_t ST>
  std::array<ENC_t, MAX_s> encode(std::string &str) {
    std::array<ENC_t, MAX_s> res;
    if (str.size() > 0) {
      memcpy(res.data(), str.data(), sizeof(ENC_t)*MAX_s);
    } else {
      res.fill('\0');
    }
    return res;
  }

  template <typename ENC_t, typename DEC_t>
  typename std::enable_if<(std::is_arithmetic<DEC_t>::value or (sizeof(DEC_t) == sizeof(ENC_t))), DEC_t>::type
  decode(ENC_t encvalue) {
    DEC_t val;
    memcpy(&val, &encvalue, sizeof(DEC_t));
    return val;
  }

  template <typename ENC_t, typename DEC_t, data_t ST>
  DEC_t decode(ENC_t value);

  template <typename ENC_t, data_t ST>
  typename std::enable_if<(ST==data_t::INT), int64_t>::type
  decode(ENC_t encvalue) {
    return decode<ENC_t, int64_t>(encvalue);
  }

  template <typename ENC_t, data_t ST>
  typename std::enable_if<(ST==data_t::UINT), uint64_t>::type
  decode(ENC_t encvalue) {
    return decode<ENC_t, uint64_t>(encvalue);
  }

  template <typename ENC_t, data_t ST>
  typename std::enable_if<(ST==data_t::FLOAT), float>::type
  decode(ENC_t encvalue) {
    return decode<ENC_t, float>(encvalue);
  }

  template <typename ENC_t, data_t ST>
  typename std::enable_if<(ST==data_t::DOUBLE), double>::type
  decode(ENC_t encvalue) {
    return decode<ENC_t, double>(encvalue);
  }

  template <typename ENC_t, data_t ST>
  typename std::enable_if<(ST==data_t::BOOL), bool>::type
  decode(ENC_t encvalue) {
    return decode<ENC_t, bool>(encvalue);
  }

  template <typename ENC_t, data_t ST>
  typename std::enable_if<(ST==data_t::DATE), std::time_t>::type
  decode(ENC_t encvalue) {
    return decode<ENC_t, std::time_t>(encvalue);
  }

  template <typename ENC_t, size_t MAX_s, data_t ST>
  std::string decode(std::array<ENC_t, MAX_s> &val) {
    return std::string(reinterpret_cast<const char*>(val.data()));
  }
}  // namespace data_types


// METHODS SPECIALIZATION FOR UINT64 ENC_t
template<>
uint64_t data_types::encode<uint64_t,
                            std::string,
                            data_types::UINT>(std::string &str) {
  uint64_t value;
  try { value = std::stoull(str); }
  catch(...) { value = kNullValue<uint64_t>; }
  return value;
}

template<>
uint64_t data_types::encode<uint64_t,
                            std::string,
                            data_types::INT>(std::string &str) {
  uint64_t encval;
  int64_t value;
  try { value = stoll(str); }
  catch(...) { return kNullValue<uint64_t>; }
  memcpy(&encval, &value, sizeof(value));
  return encval;
}

template<>
uint64_t data_types::encode<uint64_t,
                            std::string,
                            data_types::FLOAT>(std::string &str) {
  uint64_t encval;
  float value;
  try { value = stof(str); }
  catch(...) { return kNullValue<uint64_t>; }
  memcpy(&encval, &value, sizeof(value));
  return encval;
}

template<>
uint64_t data_types::encode<uint64_t,
                            std::string,
                            data_types::DOUBLE>(std::string &str) {
  uint64_t encval;
  double value;
  try { value = stod(str); }
  catch(...) { return kNullValue<uint64_t>; }
  memcpy(&encval, &value, sizeof(value));
  return encval;
}

template<>
uint64_t data_types::encode<uint64_t,
                            std::string,
                            data_types::BOOL>(std::string &str) {
  if (str.size() == 0) return kNullValue<uint64_t>;
  uint64_t encval = 1;
  if ((str == "F") || (str == "f") || (str == "FALSE")
                   || (str == "false") || (str == "0")) encval = 0;
  return encval;
}


template<>
uint64_t data_types::encode<uint64_t,
                            std::string,
                            data_types::CHARS>(std::string &str) {
  uint64_t encval = 0;
  memset(&encval, '\0', sizeof(encval));
  memcpy(&encval, str.c_str(), sizeof(encval)-1);
  return encval;
}

template<>
uint64_t data_types::encode<uint64_t,
                            std::string,
                            data_types::IP_ADDRESS>(std::string &str) {
  uint64_t val, value = 0;
  std::string::iterator start = str.begin();
  for (unsigned i = 0; i < 4; i ++) {
    std::string::iterator end = std::find(start, str.end(), '.');
    try {
      val = std::stoull(std::string(start, end));
    } catch(...) {
      return kNullValue<uint64_t>;
    }
    if (val < 256) {
      value = (value << 8) + val; start = end + 1;
    } else {
      return kNullValue<uint64_t>;
    }
  }
  return value;
}

template<>
uint64_t data_types::encode<uint64_t,
                            std::string,
                            data_types::DATE>(std::string &str) {
  uint64_t value = 0;
  struct tm date{};
  date.tm_isdst = -1;
  strptime(str.c_str(), "%Y-%m-%d", &date);
  time_t t;
  try {
    t = mktime(&date);
  }
  catch(...) {
    return kNullValue<uint64_t>;
  }
  memcpy(&value, &t, sizeof(value));
  return value;
}

template<>
uint64_t data_types::encode<uint64_t,
                            std::string,
                            data_types::USDATE>(std::string &str) {
  uint64_t value = 0;
  struct tm date{};
  date.tm_isdst = -1;
  strptime(str.c_str(), "%m/%d/%y", &date);
  time_t t;
  try {
    t = mktime(&date);
  }
  catch(...) {
    return kNullValue<uint64_t>;
  }
  memcpy(&value, &t, sizeof(value));
  return value;
}

template<>
uint64_t data_types::encode<uint64_t,
                            std::string,
                            data_types::DATE_TIME>(std::string &str) {
  uint64_t value = 0;
  struct tm date{};
  date.tm_isdst = -1;
  strptime(str.c_str(), "%Y-%m-%dT%H:%M:%S", &date);
  time_t t;
  try {
    t = mktime(&date);
  }
  catch(...) {
    return kNullValue<uint64_t>;
  }
  memcpy(&value, &t, sizeof(value));
  return value;
}

template <typename ENC_t, typename IN_t>
ENC_t data_types::encode(IN_t &in, data_types::data_t dt) {
  switch (dt) {
//     case data_types::STRING :
//       return data_types::encode<ENC_t, IN_t, data_types::STRING>(in);
//     case data_types::CHARS :
//       return data_types::encode<ENC_t, IN_t, data_types::CHARS>(in);
    case data_types::UINT :
      return data_types::encode<ENC_t, IN_t, data_types::UINT>(in);
    case data_types::INT :
      return data_types::encode<ENC_t, IN_t, data_types::INT>(in);
    case data_types::FLOAT :
      return data_types::encode<ENC_t, IN_t, data_types::FLOAT>(in);
    case data_types::DOUBLE :
      return data_types::encode<ENC_t, IN_t, data_types::DOUBLE>(in);
    case data_types::BOOL :
      return data_types::encode<ENC_t, IN_t, data_types::BOOL>(in);
    case data_types::DATE :
      return data_types::encode<ENC_t, IN_t, data_types::DATE>(in);
    case data_types::USDATE :
      return data_types::encode<ENC_t, IN_t, data_types::USDATE>(in);
    case data_types::DATE_TIME :
      return data_types::encode<ENC_t, IN_t, data_types::DATE_TIME>(in);
    case data_types::IP_ADDRESS :
      return data_types::encode<ENC_t, IN_t, data_types::IP_ADDRESS>(in);
  }
  return data_types::kNullValue<ENC_t>;
}

template<>
std::string data_types::decode<uint64_t,
                               std::string,
                               data_types::UINT>(uint64_t value) {
  if (value == kNullValue<uint64_t>) return "";
  return std::to_string(value);
}

template<>
std::string data_types::decode<uint64_t,
                               std::string,
                               data_types::INT>(uint64_t value) {
  if (value == kNullValue<uint64_t>) return "";
  int64_t v;
  memcpy(&v, &value, sizeof(v));
  return std::to_string(v);
}

template<>
std::string data_types::decode<uint64_t,
                               std::string,
                               data_types::FLOAT>(uint64_t value) {
  if (value == kNullValue<uint64_t>) return "";
  float v;
  memcpy(&v, &value, sizeof(v));
  return std::to_string(v);
}

template<>
std::string data_types::decode<uint64_t,
                               std::string,
                               data_types::DOUBLE>(uint64_t value) {
  if (value == kNullValue<uint64_t>) return "";
  double v;
  memcpy(&v, &value, sizeof(v));
  return std::to_string(v);
}

template<>
std::string data_types::decode<uint64_t,
                               std::string,
                               data_types::IP_ADDRESS>(uint64_t value) {
  std::string ipAddr = "";
  uint64_t octets[4];
  for (uint64_t k = 0; k < 4; k ++) {octets[k] = value & 255; value = value >> 8;}
  for (uint64_t k = 3; k >= 1; k --) ipAddr += std::to_string(octets[k]) + '.';
  return ipAddr + std::to_string(octets[0]);
}

template<>
std::string data_types::decode<uint64_t,
                               std::string,
                               data_types::BOOL>(uint64_t value) {
  if (value == kNullValue<uint64_t>) return "";
  return std::to_string(value);
}

template<>
std::string data_types::decode<uint64_t,
                               std::string,
                               data_types::DATE>(uint64_t value) {
  time_t t = data_types::decode<uint64_t, data_types::DATE>(value);
  char dateString[11];
  strftime(dateString, 11, "%Y-%m-%d", std::localtime(&t));
  return std::string(dateString);
}

template<>
std::string data_types::decode<uint64_t,
                               std::string,
                               data_types::CHARS>(uint64_t value) {
  const char* c = reinterpret_cast<const char*>(&value);
  return std::string(c);
}

template <>
uint64_t data_types::decode<uint64_t, uint64_t>(uint64_t encvalue) {
  return encvalue;
}




}  // namespace shad

#endif  // INCLUDE_SHAD_EXTENSIONS_DATA_TYPES_DATA_TYPES_H_