17 lines
348 B
C++
17 lines
348 B
C++
#ifndef HUMANUS_TOKENIZER_BASE_H
|
|
#define HUMANUS_TOKENIZER_BASE_H
|
|
|
|
#include <vector>
|
|
#include <string>
|
|
|
|
namespace humanus {
|
|
|
|
class BaseTokenizer {
|
|
public:
|
|
virtual std::vector<size_t> encode(const std::string& text) const = 0;
|
|
virtual std::string decode(const std::vector<size_t>& tokens) const = 0;
|
|
};
|
|
|
|
}
|
|
|
|
#endif // HUMANUS_TOKENIZER_BASE_H
|