Thursday, May 5, 2016

libraryless c++ neural nets

UPDATE: DONT USE THIS

I have noticed that i messed up the backprop.. seems Matrics are not my strong point and do/dw is a killer.. even the wiki page on it gives up https://en.wikipedia.org/wiki/Matrix_calculus#Layout_conventions ... check the "Result of differentiating various kinds of aggregates with other kinds of aggregates" table it has "??" in the vector y to matrix X column



I like every other com-sci guy on the planet is studying Machine learning... But to do this tools are needed..

Libraries, toolkits and frameworks with their sundry list of requirements have always annoyed the hell out of me. These things are never cost-less.. We constantly have burn time to setup a system that is compatible with the megabytes of dependencies that these things require to be installed, then deal with the portability issues that stop them from installing and then over the long term maintain and upgrade them and deal with package drift between the various components to keep it all running and then hope that the package maintainers dont quit updating the entire mess.

Library free raw language implementation have some big plus going for them:
* Firstly any of todays smartphones have enough processing power to do the job of training and working with neural networks.. but the tool chain makes it next to impossible actually do the job..
* Secondly companies are often paranoid about what you can install in workstations.

So in short I have tossed out the ML tool chain and give a shoot at doing it in plain simple c++.

ML at its core is just Matrix math so I have hacked together a Matrix class that can be compiled with c4driod, g++, and clang. Then using this Matrix class build a simple xor model and trained using hand worked out back prop. This is proof of concept that with less than 1000 lines of normal c++ code and a standard c++ compiler your good to go for ML projects.

The whole mess is 728 lines of c++11 code (it needs a fair bit clean up but hey whatever)

Heres the end result of the build and run on my smartphone (a nexus 5x using c4driod)



And the code:
#include <iostream>
#include <vector>
#include <functional>
#include <memory>
#include <sstream>
#include <stdexcept>
#include <cmath>
#include <cstdlib>

// ****************************************************************
// *************************** MATRIX *****************************
// ****************************************************************

template <typename Type>
struct Matrix
{
    //private:
    typedef std::vector<Type> Data;

    std::shared_ptr<Data>    data_;
    std::vector<std::size_t> shape_;

    // friend class MatrixUtils<Type>;
    //public:

    Matrix()
    {}

    Matrix(std::size_t cols,
           std::size_t rows)
    {
        data_.reset(new Data);
        data_->resize(cols*rows);
        shape_.push_back(cols);
        shape_.push_back(rows);
    }

    template <std::size_t N>
    Matrix(Type (&raw)[N],
           std::size_t cols,
           std::size_t rows)
    {
        data_.reset(new Data);
        data_->resize(N);
        data_->assign(raw,raw+N);
        shape_.push_back(cols);
        shape_.push_back(rows);
    }

    Matrix(Type* begin, Type* end,
           std::size_t cols,
           std::size_t rows)
    {
        data_.reset(new Data);
        data_->resize(cols*rows);
        data_->assign(begin,end);
        shape_.push_back(cols);
        shape_.push_back(rows);
    }

    Matrix(std::shared_ptr<Data> data,
           std::size_t cols,
           std::size_t rows) :
        data_(data)
    {
        shape_.push_back(cols);
        shape_.push_back(rows);
    }

    Type& at(std::size_t x, std::size_t y)
    {
        return (*data_)[y*shape_[0] + x];
    }

    Type at(std::size_t x, std::size_t y) const
    {
        return (*data_)[y*shape_[0] + x];
    }
};

// ****************************************************************
// ************************ MATRIX UTILS **************************
// ****************************************************************

template <typename Type>
struct MatrixUtils
{
    static void print(std::ostream& os, const Matrix<Type>& a)
    {
       for (std::size_t y = 0; y < a.shape_[1]; ++y)
        {
            for (std::size_t x = 0; x < a.shape_[0]; ++x)
            {
                os << a.at(x,y) << " ";
            }
            os << "\n";
        }
    }

    static Matrix<Type> matmul(const Matrix<Type>& a,
                               const Matrix<Type>& b)
    {
        if (a.shape_[0] != b.shape_[1])
        {
            std::stringstream ss;
            ss << "Matrix shapes wrong for matmul"
               << " a: " << a.shape_[0] << "x" << a.shape_[1]
               << " b: " << b.shape_[0] << "x" << b.shape_[1];
            throw std::runtime_error(ss.str());
        }

        Matrix<Type> res(b.shape_[0], a.shape_[1]);

        for (std::size_t y = 0; y < res.shape_[1]; ++y)
        {
            for (std::size_t x = 0; x < res.shape_[0]; ++x)
            {
                Type sum = 0;
                for (std::size_t i = 0; i < a.shape_[0]; ++i)
                {
                    sum += a.at(i,y) * b.at(x,i);
                }
                res.at(x,y) = sum;
            }
        }

        return res;
    }

    static Matrix<Type> selrow(std::size_t row,
                               const Matrix<Type>& a)
    {
        // TODO reimpl as an iterator!
        Matrix<Type> r(a.shape_[0],1);

        for (std::size_t x = 0; x < a.shape_[0]; ++x)
        {
            r.at(x,0) = a.at(x,row);
        }

        return r;
    }

    static Matrix<Type> selcol(std::size_t col,
                               const Matrix<Type>& a)
    {
        Matrix<Type> r(1,a.shape_[1]);

        for (std::size_t y = 0; y < a.shape_[1]; ++y)
        {
            r.at(0,y) = a.at(col,y);
        }

        return r;
    }

    static Matrix<Type> transpose(const Matrix<Type>& a)
    {
        Matrix<Type> r(a.shape_[1],a.shape_[0]);

        for (std::size_t y = 0; y < a.shape_[1]; ++y)
        {
            for (std::size_t x = 0; x < a.shape_[0]; ++x)
            {
                r.at(y,x) = a.at(x,y);
            }
        }

        return r;
    }

    static void unifunctor_inplace(std::function<Type (Type)> func,
                                   Matrix<Type>& a)

    {
        typedef typename Matrix<Type>::Data::iterator iterator;
        // TODO this feels llike there should be an std:algo for it
        //  maybe generate ??

        for(iterator it = a.data_->begin();
            it != a.data_->end();
            ++it)
        {
            *it = func(*it);
        }
    }

    static Matrix<Type> unifunctor(std::function<Type (Type)> func,
                                   const Matrix<Type>& a)

    {
        typedef typename Matrix<Type>::Data::iterator iterator;
        // TODO this feels llike there should be an std:algo for it
        //  maybe generate ??

        Matrix<Type> r(a.shape_[0],a.shape_[1]);

        iterator ait = a.data_->begin();
        for(iterator rit = r.data_->begin();
            rit != r.data_->end();
            ++rit)
        {
            *rit = func(*ait);
            ++ait;
        }

        return r;
    }

    static Matrix<Type> bifunctor(std::function<Type (Type,Type)> func,
                                  const Matrix<Type>& a,
                                  const Matrix<Type>& b)

    {
        typedef typename Matrix<Type>::Data::iterator iterator;

        if (a.shape_[0] != b.shape_[0] or
            a.shape_[1] != b.shape_[1])
        {
            std::stringstream ss;
            ss << "Matrix shapes mismatch for bifunctor"
               << " a: " << a.shape_[0] << "x" << a.shape_[1]
               << " b: " << b.shape_[0] << "x" << b.shape_[1];
            throw std::runtime_error(ss.str());
        }

        // TODO this feels llike there should be an std:algo for it
        //  maybe generate ??
        Matrix<Type> r(a.shape_[0], a.shape_[1]);

        iterator ait = a.data_->begin();
        iterator bit = b.data_->begin();
        iterator rit = r.data_->begin();

        while (rit != r.data_->end())
        {
            *rit = func(*ait, *bit);
            ++ait;
            ++bit;
            ++rit;
        }

        return r;
    }

    static void bifunctor_inplace(std::function<Type (Type,Type)> func,
                                  Matrix<Type>& a,
                                  const Matrix<Type>& b)

    {
        typedef typename Matrix<Type>::Data::iterator iterator;

        if (a.shape_[0] != b.shape_[0] or
            a.shape_[1] != b.shape_[1])
        {
            std::stringstream ss;
            ss << "Matrix shapes mismatch for bifunctor"
               << " a: " << a.shape_[0] << "x" << a.shape_[1]
               << " b: " << b.shape_[0] << "x" << b.shape_[1];
            throw std::runtime_error(ss.str());
        }

        // TODO this feels llike there should be an std:algo for it
        //  maybe generate ??
        Matrix<Type> r(a.shape_[0], a.shape_[1]);

        iterator bit = b.data_->begin();
        for(iterator ait = a.data_->begin();
            ait != a.data_->end();
            ++ait)
        {
            *ait = func(*ait, *bit);
            ++bit;
        }
    }

    static Matrix<Type> bifunctor_row(std::function<Type (Type,Type)> func,
                                      const Matrix<Type>& a,
                                      const Matrix<Type>& b)
    {
        typedef typename Matrix<Type>::Data::iterator iterator;

        if (a.shape_[0] != b.shape_[0] or
            b.shape_[1] != 1)
        {
            std::stringstream ss;
            ss << "Matrix shapes mismatch for bifunctor_row"
               << " a: " << a.shape_[0] << "x" << a.shape_[1]
               << " b: " << b.shape_[0] << "x" << b.shape_[1];
            throw std::runtime_error(ss.str());
        }

        // TODO this feels llike there should be an std:algo for it
        //  maybe generate ??
        Matrix<Type> r(a.shape_[0], a.shape_[1]);

        iterator ait = a.data_->begin();
        iterator bit = b.data_->begin();
        iterator rit = r.data_->begin();

        while (rit != r.data_->end())
        {
            if (bit == b.data_->end()) bit = b.data_->begin();

            *rit = func(*ait, *bit);
            ++ait;
            ++bit;
            ++rit;
        }

        return r;
    }

    static Matrix<Type> bifunctor_scaler(std::function<Type (Type,Type)> func,
                                        const Type a,
                                        const Matrix<Type>& b)

    {
        typedef typename Matrix<Type>::Data::iterator iterator;

        // TODO this feels llike there should be an std:algo for it
        //  maybe generate ??
        Matrix<Type> r(b.shape_[0], b.shape_[1]);

        iterator bit = b.data_->begin();
        iterator rit = r.data_->begin();

        while (rit != r.data_->end())
        {
            *rit = func(a,*bit);
            ++bit;
            ++rit;
        }

        return r;
    }

    static Matrix<Type> bifunctor_scaler(std::function<Type (Type,Type)> func,
                                        const Matrix<Type>& a,
                                        const Type b)
    {
        typedef typename Matrix<Type>::Data::iterator iterator;

        // TODO this feels llike there should be an std:algo for it
        //  maybe generate ??
        Matrix<Type> r(a.shape_[0], a.shape_[1]);

        iterator ait = a.data_->begin();
        iterator rit = r.data_->begin();

        while (rit != r.data_->end())
        {
            *rit = func(*ait,b);
            ++ait;
            ++rit;
        }

        return r;
    }

    struct Helpers
    {
        static Type add(Type a, Type b) { return a+b; }
        static Type sub(Type a, Type b) { return a-b; }
        static Type mul(Type a, Type b) { return a*b; }
        static Type div(Type a, Type b) { return a/b; }

        static Type relu(Type a) { return (a < 0) ? 0 : a; }
        static Type tanh(Type a) { return std::tanh(a); }
        static Type rand(Type a) { return static_cast<Type>(std::rand() % 100)/100.0 - 0.5; }
        static Type ones(Type a) { return 1.0; }

        static Type xor_f(Type a, Type b) { return (a*b < 0) ? -1.0 : 1.0; }
    };
};

// ****************************************************************
// ************************ MATRIX OPERATORS **********************
// ****************************************************************

template <typename Type>
std::ostream& operator<<(std::ostream& os, const Matrix<Type>& a)
{
    MatrixUtils<Type>::print(os, a);
    return os;
}

template <typename Type>
Matrix<Type> operator+(const Matrix<Type>& a,
                       const Matrix<Type>& b)
{
    return MatrixUtils<Type>::bifunctor(&MatrixUtils<Type>::Helpers::add,a,b);
}

template <typename Type>
Matrix<Type> operator-(const Matrix<Type>& a,
                       const Matrix<Type>& b)
{
    return MatrixUtils<Type>::bifunctor(&MatrixUtils<Type>::Helpers::sub,a,b);
}

template <typename Type>
Matrix<Type> operator*(const Matrix<Type>& a,
                       const Matrix<Type>& b)
{
    return MatrixUtils<Type>::matmul(a,b);
}

template <typename Type>
Matrix<Type> operator*(Type                a,
                       const Matrix<Type>& b)
{
    return MatrixUtils<Type>::bifunctor_scaler(&MatrixUtils<Type>::Helpers::mul,a,b);
}

template <typename Type>
Matrix<Type> operator*(const Matrix<Type>& a,
                       Type                b)
{
    return MatrixUtils<Type>::bifunctor_scaler(&MatrixUtils<Type>::Helpers::mul,a,b);
}

template <typename Type>
Matrix<Type> operator/(const Matrix<Type>& a,
                       const Matrix<Type>& b)
{
    return MatrixUtils<Type>::bifunctor(&MatrixUtils<Type>::Helpers::div,a,b);
}

template <typename Type>
Matrix<Type> operator+=(Matrix<Type>&       a,
                        const Matrix<Type>& b)
{
    MatrixUtils<Type>::bifunctor_inplace(&MatrixUtils<Type>::Helpers::add,a,b);
    return a;
}

template <typename Type>
Matrix<Type> product(const Matrix<Type>& a,
                     const Matrix<Type>& b)
{
    return MatrixUtils<Type>::bifunctor(&MatrixUtils<Type>::Helpers::mul,a,b);
}

template <typename Type>
Matrix<Type> rowadd(const Matrix<Type>& a,
                    const Matrix<Type>& b)
{
    return MatrixUtils<Type>::bifunctor_row(&MatrixUtils<Type>::Helpers::add,a,b);
}

template <typename Type>
Matrix<Type> transpose(const Matrix<Type>& a)
{
    return MatrixUtils<Type>::transpose(a);
}

template <typename Type>
Matrix<Type> tanh(const Matrix<Type>& a)
{
    return MatrixUtils<Type>::unifunctor(&MatrixUtils<Type>::Helpers::tanh,a);
}

template <typename Type>
Matrix<Type> tanh_derivate(const Matrix<Type>& a)
{
    // dtanh/dx = 1 - (tanh(x)) ^ 2
    Matrix<Type> th = tanh(a);

    return MatrixUtils<Type>::bifunctor_scaler(&MatrixUtils<Type>::Helpers::sub, 1, product(th,th));
}

template <typename Type>
void rand(Matrix<Type>& a)
{
    MatrixUtils<Type>::unifunctor_inplace(&MatrixUtils<Type>::Helpers::rand,a);
}

template <typename Type>
void ones(Matrix<Type>& a)
{
    MatrixUtils<Type>::unifunctor_inplace(&MatrixUtils<Type>::Helpers::ones,a);
}

// ****************************************************************
// ****************************************************************
// ****************************************************************

struct Model
{
    // input   2
    // hidden1 4
    // hidden2 2
    // output  1
    // function tanh
    // learn rate: 0.03
    // batch size 10
    // xor data [-1, 1]
    //  result in less than 140 iterators
    typedef float Type;

    struct Network
    {
        Matrix<Type> x_;
        Matrix<Type> o1_;
        Matrix<Type> h1_;
        Matrix<Type> o2_;
        Matrix<Type> h2_;
        Matrix<Type> o3_;
        Matrix<Type> y_ ;
    };

    struct Params
    {
        Matrix<float> Wh1_;
        Matrix<float> Bh1_;
        Matrix<float> Wh2_;
        Matrix<float> Bh2_;
        Matrix<float> Wo_;
        Matrix<float> Bo_;

        Params() :
            Wh1_(4,2),
            Bh1_(4,1),
            Wh2_(2,4),
            Bh2_(2,1),
            Wo_ (1,2),
            Bo_ (1,1)
        {
            // init wieghts
            rand(Wh1_);
            rand(Bh1_);
            rand(Wh2_);
            rand(Bh2_);
            rand(Wo_ );
            rand(Bo_ );
        }
    };

    Params p_;

    // init weights
    Model() :
        p_()
    {}

    void forward(Network& net, const Matrix<Type>& inputs)
    {
        // forward network params
        // h1 = tanh(Wh1*x  + Bh1)
        // h2 = tanh(Wh2*h1 + Bh2)
        // y  = tanh(Wo *h2 + Bo )
        net.x_ = inputs;

        net.o1_ = rowadd(net.x_  * p_.Wh1_, p_.Bh1_);  net.h1_ = tanh(net.o1_);
        net.o2_ = rowadd(net.h1_ * p_.Wh2_, p_.Bh2_);  net.h2_ = tanh(net.o2_);
        net.o3_ = rowadd(net.h2_ * p_.Wo_ , p_.Bo_ );  net.y_  = tanh(net.o3_);
    }

    Matrix<Type> loss(const Matrix<Type>& expected,
                      const Network& net)
    {
        // you dont have to run this unless your printing stuff.. the back prop needs the "dE/dy" not the "E"
        Matrix<Type> delta = net.y_ - expected;
        return product(delta, delta);
    }

    void backward(const Matrix<Type>& expected,
                  const Network& net,
                  const Type alpha)
    {
        // note tanh(x) = (exp(?x)-?exp(?-x))/?(exp(?x)+?exp(?-x))
        // dtanh/dx = 1 - (tanh(x)) ^ 2

        // forward network params
        // o1 = Wh1*x  + Bh1 ; h1 = tanh(o1)
        // o2 = Wh2*h1 + Bh2 ; h2 = tanh(o2)
        // o3 = Wo *h2 + Bo  ; y  = tanh(o3)
        // E = (e - y)^2

        // dE/dy =  2 * (y - e)
        //       =~     (y - e)    // toss the 2  out as alpha will take it

        // sigma3 = dE/do3
        //        = dE/dy  * dy/do3
        //        = (y - e) * (1 - (tanh(o3))^2)
        // sigma2 = dE/do2
        //        = dE/dy  * dy/do3 * do3/dh2 * dh2/do2
        //        = sigma3          * do3/dh2 * dh2/do2
        //        = sigma3          * Wo      * (1 - (tanh(o2))^2)
        // sigma1 = dE/do1
        //        = dE/dy  * dy/do3 * do3/dh2 * dh2/do2 * do2/dh1 * dh1/do1
        //        = sigma2                              * do2/dh1 * dh1/do1
        //        = sigma2                              * Wh2     * (1 - (tanh(o1))^2)

        // dWo    = -alpha * dE/dWo
        //        = -alpha * dE/dy * dy/do3 * do3/dWo
        //        = -alpha * sigma3         * do3/dWo
        //        = -alpha * sigma3         * h2
        // dWh2   = -alpha * dE/dWh2
        //        = -alpha * dE/dy * dy/do3 * do3/dh2 * dh2/do2 * do2/dWh2
        //        = -alpha * sigma2                             * do2/dWh2
        //        = -alpha * sigma2                             * h1
        // dWh1   = -alpha * dE/dWh1
        //        = -alpha * dE/dy * dy/do3 * do3/dh2 * dh2/do2 * do2/dh1 * dh1/do1 * do1/dWh1
        //        = -alpha * sigma1                                                 * do1/dWh1
        //        = -alpha * sigma1                                                 * x

        // dBo    = -alpha * dE/dBo
        //        = -alpha * dE/dy * dy/do3 * do3/dBo
        //        = -alpha * sigma3         * do3/dBo
        //        = -alpha * sigma3         * 1
        // dBh2   = -alpha * dE/dBh2
        //        = -alpha * dE/dy * dy/do3 * do3/dh2 * dh2/do2 * do2/dBh2
        //        = -alpha * sigma2                             * do2/dBh2
        //        = -alpha * sigma2                             * 1
        // dBh1   = -alpha * dE/dBh1
        //        = -alpha * dE/dy * dy/do3 * do3/dh2 * dh2/do2 * do2/dh1 * dh1/do1 * do1/dBh1
        //        = -alpha * sigma1                                                 * do1/dBh1
        //        = -alpha * sigma1                                                 * 1

        Matrix<Type> sigma3 = product((net.y_ - expected)        , tanh_derivate(net.o3_));
        Matrix<Type> sigma2 = product(sigma3 * transpose(p_.Wo_ ), tanh_derivate(net.o2_));
        Matrix<Type> sigma1 = product(sigma2 * transpose(p_.Wh2_), tanh_derivate(net.o1_));

        Matrix<Type> dWo  = -alpha * (transpose(net.h2_) * sigma3);
        Matrix<Type> dWh2 = -alpha * (transpose(net.h1_) * sigma2);
        Matrix<Type> dWh1 = -alpha * (transpose(net.x_ ) * sigma1);

        Matrix<Type> transposed_unit(sigma3.shape_[1], 1);
        ones(transposed_unit);

        Matrix<Type> dBo  = -alpha * (transposed_unit * sigma3);
        Matrix<Type> dBh2 = -alpha * (transposed_unit * sigma2);
        Matrix<Type> dBh1 = -alpha * (transposed_unit * sigma1);

        p_.Wo_  += dWo ;
        p_.Wh2_ += dWh2;
        p_.Wh1_ += dWh1;

        p_.Bo_  += dBo ;
        p_.Bh2_ += dBh2;
        p_.Bh1_ += dBh1;
    }

    void train(const Matrix<Type>& inputs,
               const Matrix<Type>& expected,
               const Type alpha,
               const int  cycles)
    {
        //gradient descent .. the hard way?
        for (int i = 0; i < cycles; ++i)
        {
            // batching ??
            Network net;

            forward(net, inputs);
            backward(expected, net, alpha);

            if ((i % 10) == 0)
            {
                Matrix<Type> err = loss(expected,net);
                Matrix<Type> transposed_unit(err.shape_[1], 1);
                ones(transposed_unit);

                Matrix<Type>  netErr = transposed_unit * err;
                std::cout << "**********************************************\n";
                std::cout << "step: " << i << " err:" << netErr;

                std::cout << "       Wo_ :\n" << p_.Wo_ ;
                std::cout << "       Wh2_:\n" << p_.Wh2_;
                std::cout << "       Wh1_:\n" << p_.Wh1_;
                std::cout << "       Bo_ :\n" << p_.Bo_ ;
                std::cout << "       Bh2_:\n" << p_.Bh2_;
                std::cout << "       Bh1_:\n" << p_.Bh1_;

            }
        }
    }
};

void basicTest()
{
    int rawA[] = {1,2,3,4};
    int rawB[] = {2,3,4,5};
    int rawC[] = {2,3,4,5,6,7};

    Matrix<int> a(rawA, 2, 2);
    Matrix<int> b(rawB, 2, 2);
    Matrix<int> c(rawC, 3, 2);

    std::cout << a << "\n";
    std::cout << b << "\n";
    std::cout << c << "\n";

    std::cout << (a + b) << "\n";
    try { std::cout << (a + c) << "\n"; }
    catch (std::exception& e) { std::cout << e.what() << "\n"; }

    std::cout << MatrixUtils<int>::matmul(a,c) << "\n";
    try { std::cout << MatrixUtils<int>::matmul(c,a) << "\n"; }
    catch (std::exception& e) { std::cout << e.what() << "\n"; }

    std::cout << "SUCCESS\n";
}

void xorNetTest()
{
    // generate some train data
    Matrix<float> inputs(2,100);
    rand(inputs);

    // compute xor for input data
    Matrix<float> expected
        = MatrixUtils<float>::bifunctor(&MatrixUtils<float>::Helpers::xor_f,
                                        MatrixUtils<float>::selcol(0, inputs),
                                        MatrixUtils<float>::selcol(1, inputs));

    Model model;
    model.train(inputs,expected,0.03,10001);
}

int main()
{
    basicTest();
    xorNetTest();

}

No comments:

Post a Comment