Tuesday, June 7, 2016

c++ like template AutoGrad - How to compute the Derivative Of a function using templates

You may have heard of a system called AutoGrad. It is a system that when given an equation computes the derivate of it automatically. Here is an example of how to build the same system using c++ templates.

I was inspired to write this after i realized i messed up the differentiation of the for the neural networks back prop path in my last post.

Clearly you see where im headed with this.. a few more mods to make it work with tensors and ill have full autograd system for c++ that can automatically compute the backdrop of a neural network.. and will also compile in c4driod.. and then toss in some thrust code and it will be CUDA capable able to use PC with GPUs

from smartphone to GPU in a few short steps...

Anyway here is the proof of concept code:

output looks like
x:2 m:3 c:4 p:6 y:10 z:16 q:100
 dm/dx:0 dx/dx:1 dp/dx:1 dy/dx:3 dz/dx:12 dq/dx:60

// compile with "g++ std=c++11 ..."
// or in c4driod on an andriod phone..

#include <tuple>
#include <iostream>
#include <cmath>

template <int VALUE>
struct Const
{
    template< typename... Types >
    static double op(std::tuple<Types...>& params )
    {
        return VALUE;
    }
};

template <int ID>
struct Var
{
    template< typename... Types >
    static double op(std::tuple<Types...>& params )
    {
        return std::get<ID>(params);
    }
};

template <typename L, typename R>
struct OpAdd
{
    template< typename... Types >
    static double op(std::tuple<Types...>& params)
    {
        return L::op(params) + R::op(params);
    }
};

template <typename L, typename R>
struct OpMult
{
    template< typename... Types >
    static double op(std::tuple<Types...>& params)
    {
        return L::op(params) * R::op(params);
    }
};

template <typename L, int POW>
struct OpPower
{
    template< typename... Types >
    static double op(std::tuple<Types...>& params)
    {
        return std::pow(L::op(params), POW);
    }
};

template <typename Num, typename Denum>
struct DerivativeOf {};

template <int V, int ID_D>
struct DerivativeOf<Const<V>, Var<ID_D> >
{
    typedef Const<0> Type;
};

template <int ID_N, int ID_D>
struct DerivativeOf<Var<ID_N>, Var<ID_D> >
{
    typedef Const<0> Type;
};

template <int ID_N>
struct DerivativeOf<Var<ID_N>, Var<ID_N> >
{
    typedef Const<1> Type;
};

template <typename L, typename R, typename D>
struct DerivativeOf<OpAdd<L,R>, D >
{
    typedef OpAdd<typename DerivativeOf<L,D>::Type,
                  typename DerivativeOf<R,D>::Type> Type;
};

template <typename L, typename R, typename D>
struct DerivativeOf<OpMult<L,R>, D >
{
    typedef OpAdd<OpMult<typename DerivativeOf<L, D>::Type, R>,
                  OpMult<L, typename DerivativeOf<R, D>::Type> > Type;
};

template <typename L, int POW>
struct DerivativeOf<OpPower<L,POW>, L >
{
    typedef OpMult<Const<POW>, OpPower<L, POW-1> > Type;
};

template <typename L, int POW, typename D>
struct DerivativeOf<OpPower<L,POW>, D >
{
    typedef OpMult<OpMult<Const<POW>, OpPower<L, POW-1> >,
                   typename DerivativeOf<L, D>::Type>  Type;
};

int main()
{
    typedef Var<0> X;
    typedef Var<1> M;
    typedef Var<2> C;
    typedef OpAdd<X,C> P;                      // p = x + c
    typedef OpAdd<OpMult<M,X>,C> Y;            // y = m*x + c
    typedef OpAdd<OpMult<M,OpPower<X,2>>,C> Z; // z = m*x^2 + c
    typedef OpPower<OpAdd<OpMult<M,X>,C>,2> Q; // q = = y^2 = (m*x + c)^2

    typedef DerivativeOf<M, X>::Type  dM_dX;   // dm/dx = 0
    typedef DerivativeOf<X, X>::Type  dX_dX;   // dx/dx = 1
    typedef DerivativeOf<P, X>::Type  dP_dX;   // dP/dx = 1
    typedef DerivativeOf<Y, X>::Type  dY_dX;   // dY/Dx = m
    typedef DerivativeOf<Z, X>::Type  dZ_dX;   // dZ/dx = 2x
    typedef DerivativeOf<Q, X>::Type  dQ_dX;   // dQ/dx = dq/dy + dy/dx = 2*(m*x + c)*m

    X x;
    M m;
    C c;
    P p;
    Y y;
    Z z;
    Q q;

    dX_dX  dx_dx;
    dM_dX  dm_dx;
    dP_dX  dp_dx;
    dY_dX  dy_dx;
    dZ_dX  dz_dx;
    dQ_dX  dq_dx;

    std::tuple<double,double,double> params = std::make_tuple(2.0,3.0,4.0);

    std::cout << " x:" << x.op(params)
              << " m:" << m.op(params)
              << " c:" << c.op(params)
              << " p:" << p.op(params)
              << " y:" << y.op(params)
              << " z:" << z.op(params)
              << " q:" << q.op(params)
              << "\n";

    std::cout << " dm/dx:"  << dm_dx.op(params)
              << " dx/dx:"  << dx_dx.op(params)
              << " dp/dx:"  << dp_dx.op(params)
              << " dy/dx:"  << dy_dx.op(params)
              << " dz/dx:"  << dz_dx.op(params)
              << " dq/dx:"  << dq_dx.op(params)
              << "\n";
}