OnlineGDB

online compiler and debugger for c/c++

code. compile. run. debug. share.

Language

main.cpp

#include <iostream> #include <vector> #include <cstring> #include <algorithm> #include <numeric> #include <iomanip> #include <functional> class Dataframe { public: enum class Base : char { SIGNED = 'S', UNSIGNED = 'U', CHAR = 'A', // and other types like floats, date, timestamp, etc. }; class Dtype { public: Dtype(Base base_dtype, std::size_t size) : m_base_dtype(base_dtype), m_size(size) {} auto base_dtype() const { return m_base_dtype; } auto size() const { return m_size; } private: Base m_base_dtype; std::size_t m_size; }; class Comparator { public: using comparator_fn = std::function<std::int8_t(const char*, const char*)>; // create a function that returns a comparator functions for each type static comparator_fn make_comparator(Dataframe::Base base_dtype, std::size_t size, std::size_t offset) { switch (base_dtype) { case Dataframe::Base::CHAR: return [offset](const char *a, const char *b) { return std::strcmp(a + offset, b + offset); }; case Dataframe::Base::SIGNED: return [offset](const char *a, const char *b) { const auto v1 = *reinterpret_cast<const int*>(a + offset); const auto v2 = *reinterpret_cast<const int*>(b + offset); return (v1 < v2) ? -1 : (v1 > v2) ? 1 : 0; }; case Dataframe::Base::UNSIGNED: return [offset](const char *a, const char *b) { const auto v1 = *reinterpret_cast<const unsigned int*>(a + offset); const auto v2 = *reinterpret_cast<const unsigned int*>(b + offset); return (v1 < v2) ? -1 : (v1 > v2) ? 1 : 0; }; default: throw std::runtime_error("Unknown base dtype"); } } // create a function that static comparator_fn make_composite_comparator(const std::vector<Dataframe::Dtype> &dtypes) { std::vector<comparator_fn> F; std::size_t offset = 0; for (auto &dtype : dtypes) { F.push_back(make_comparator(dtype.base_dtype(), dtype.size(), offset)); offset += dtype.size(); } return [F](const char *a, const char *b) { for (auto &f : F) { if (const int8_t result = f(a, b); result != 0) return result < 0; } return false; }; } }; Dataframe(const std::vector<char> &raw_data, const std::vector<Dtype> &dtypes) : m_raw_data(raw_data), m_dtypes(dtypes) { cols_count = m_dtypes.size(); m_record_size = std::accumulate(m_dtypes.begin(), m_dtypes.end(), 0, [](std::size_t acc, const Dtype &dtype) { return acc + dtype.size(); }); m_records_count = m_raw_data.size() / m_record_size; std::cout << "Info:" << std::endl; std::cout << " - cols count: " << cols_count << std::endl; std::cout << " - record size: " << m_record_size << std::endl; std::cout << " - records count: " << m_records_count << std::endl; std::cout << std::endl; } void print() { // cycle over the records std::cout << "Dataframe:" << std::endl; for (std::size_t i = 0; i < m_records_count; i++) { // cycle over the dtypes (e.g. columns) auto current_field = m_raw_data.data() + i * m_record_size; for (std::size_t j = 0; j < m_dtypes.size(); j++) { auto dtype = m_dtypes[j]; auto base_dtype = dtype.base_dtype(); auto size = dtype.size(); current_field += j > 0 ? m_dtypes[j - 1].size() : 0; // print the data switch (base_dtype) { case Base::CHAR: std::cout << std::setw(1) << *current_field << ";"; break; case Base::SIGNED: std::cout << std::setw(4) << *reinterpret_cast<int*>(current_field) << ";"; break; case Base::UNSIGNED: std::cout << std::setw(4) << *reinterpret_cast<unsigned int*>(current_field) << ";"; break; } } std::cout << std::endl; } std::cout << std::endl; } void sort() { if (cols_count == 1) { auto dtype = m_dtypes[0]; auto base_dtype = dtype.base_dtype(); auto size = dtype.size(); auto data = m_raw_data.data(); // print the data switch (base_dtype) { case Base::CHAR: std::sort(data, data + m_raw_data.size()); break; case Base::SIGNED: std::sort((int*)data, (int*)data + m_raw_data.size() / size); break; case Base::UNSIGNED: std::sort((unsigned int*)data, (unsigned int*)data + m_raw_data.size() / size); break; } } else { // This is a naive implementation, it works but it's not efficient // use the ptr, compare and then swap the records auto comparator = Comparator::make_composite_comparator(m_dtypes); // create a set of pointers to the records std::vector<char*> ptrs; for (std::size_t i = 0; i < m_records_count; i++) { ptrs.push_back(m_raw_data.data() + i * m_record_size); } // sort the pointers std::sort(ptrs.begin(), ptrs.end(), comparator); // apply pointers permutation std::vector<char> tmp(m_raw_data.size()); for (std::size_t i = 0; i < m_records_count; i++) { std::copy(ptrs[i], ptrs[i] + m_record_size, tmp.data() + i * m_record_size); } m_raw_data = tmp; } } private: std::vector<char> m_raw_data; std::vector<Dtype> m_dtypes; std::size_t m_records_count; std::size_t m_record_size; std::size_t cols_count; }; void trivial_case_with_1_column() { std::vector<char> raw; int data[] = {8, 30, 8, 19, 7, 6, 10, 5, 3, 2, 1, 5}; raw.insert(raw.end(), (char*)data, (char*)data + sizeof(data)); Dataframe df(raw, {Dataframe::Dtype(Dataframe::Base::SIGNED, sizeof(int))}); df.print(); df.sort(); std::cout << "AFTER SORTING" << std::endl; df.print(); } void problematic_case_with_2_or_more_columns() { std::vector<char> raw; int column1[] = {8, 30, 8, 19, 7, 6, 10, 5, 3, 2, 1, 5}; char column2[] = {'z', 'a', 'a', 'b', 'c', 'd', 'e', 'x', 'g', 'h', 'i', 'a'}; for (std::size_t i = 0; i < sizeof(column1) / sizeof(int); i++) { raw.insert(raw.end(), (char*)&column1[i], (char*)&column1[i] + sizeof(int)); raw.insert(raw.end(), (char*)&column2[i], (char*)&column2[i] + sizeof(char)); } const auto dtypes = {Dataframe::Dtype(Dataframe::Base::SIGNED, sizeof(int)), Dataframe::Dtype(Dataframe::Base::CHAR, sizeof(char))}; Dataframe df(raw, dtypes); df.print(); df.sort(); std::cout << "AFTER SORTING" << std::endl; df.print(); } int main(int argc, char const *argv[]) { std::cout << "This case can be sorted by using a std::sort that casts the pointer to the correct type and it's efficient" << std::endl; trivial_case_with_1_column(); std::cout << "----------------------------------------------------------------\n" << std::endl; std::cout << "I don't know how to manage effectively this case" << std::endl; problematic_case_with_2_or_more_columns(); return 0; }

close ad [x]

Compiling Program...

Command line arguments:

Standard Input: Interactive Console Text

Program is not being debugged. Click "Debug" button to start program in debug mode.

Call Stack

#	Function	File:Line

Local Variables

Variable	Value

Registers

Register	Value

Display Expressions

Expression	Value

Breakpoints and Watchpoints

	#	Description