%% Dynamic programming using radial basis functions %% an inverted pendulum on a cart M = 8; m = 2; mr = m/(m+M); l = 0.5; c = 9.8; h = 0.1; f = @(x,u) kron(x,ones(length(u),1))+h* ... [reshape(ones(length(u),1)*x(:,2)', length(u)*size(x,1),1), ... reshape((c/l*ones(length(u),1)*sin(x(:,1)') ... -1/2*mr*ones(length(u),1)*(x(:,2)'.^2.*sin(2*x(:,1)')) ... -mr/m/l*u*cos(x(:,1)'))./(4/3-mr*ones(length(u),1)*cos(x(:,1)').^2), ... length(u)*size(x,1),1)]; c = @(x,u) exp(-h*(1/2*reshape(0.1*ones(length(u),1)*x(:,1)'.^2 + ... 0.05*ones(length(u),1)*x(:,2)'.^2 + 0.01*u.^2*ones(1,size(x,1)), ... length(u)*size(x,1),1))); phi = @(r) max(spones(r)-r,0).^4.*(4*r+spones(r)); % Wendland function phi = @(r) max(spones(r)-r,0).^10.*(429*r.^4+450*r.^3+210*r.^2+50*r+5*spones(r)); T = [0 0]; v_T = 1; % boundary cond. shepard = @(A) spdiags(1./sum(A')',0,size(A,1),size(A,1))*A; % Shepard op. S = [8,10]; % radius of domain L = 33; U = linspace(-128,128,L)'; % control values N = 50; X1 = linspace(-1,1,N); [XX,YY] = meshgrid(X1*S(1),X1*S(2)); X = [XX(:) YY(:)]; % centers ep = 1/sqrt((4*prod(S)*20/N^2)/pi); % shape parameter fXU = f(X,U); C = c(X,U); A = shepard(phi(ep*sdistm(fXU,[T;X],1/ep))); % Shepard matrix %C = exp(-cXU); % one step costs %% value iteration v = zeros(N^2+1,1); v0 = ones(size(v)); TOL = 1e-12; while norm(v-v0,inf)/norm(v,inf) > TOL v0 = v; v = [v_T; max(reshape(C.*(A*v),L,N^2))']; % Bellman operator end %% plot of the value function clf; Mr = 500; X1r = linspace(-1,1,Mr); [XXr,YYr] = meshgrid(X1r*S(1),X1r*S(2)); Xr = [XXr(:) YYr(:)]; R = shepard(phi(ep*sdistm(Xr,[T;X],1/ep))); surf(XXr,YYr,reshape(min(-log(abs(R*v)),10),Mr,Mr)); axis tight; shading flat; view(0,90); colorbar; xlabel('angle'); ylabel('angular velocity');