
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%	
%			ASSG/RASSG: Accelerated Stochastic SubGradient descent methods
%											
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%  This is a short script for ASSG/RASSG that implementing the algorithms in following paper:
%
%  [1] Yi Xu, Qihang Lin, and Tianbao Yang. Stochastic convex optimization: Faster local growth implies faster global
%   convergence. In Proceedings of the 34th International Conference on Machine Learning (ICML), pages 3821?3830, 2017.
%
%  Written by: Yi Xu, The University of Iowa
%  Email: yi-xu@uiowa.edu
%  Created: April 2018
%  ASSG Version 1.0
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%  ASSG/RASSG is to solve
%
%	min_{w \in R^d} 1/n*sum_{i=1}^{n} loss(Xw,y) + \gamm/2*|w|_1
%   
%  	gamm: parameter of l1-norm regularizer
%
%  Parameters for ASSG.m (Algorithm 1 [1]): 
%	X : input feature data, (d-by-n). d is the dimensionality, n is sample size. 
%	y : input label data, (1-by-n).
%	lss :  loss function, 1 for hinge; 2 for logistic; 3 for least square; 4 for huber; 5 for squared hinge.
%	eta0 : initial stepsize, for example 0.1.
%	w0 : initial solution (1-by-d), if input is [], then default is 0.
%	D0 : initial radius of domain (i.e., D_1 in Algorithm 1).
%	t : number of iterations for inner loop.
%	K : number of stages for outer loop.
% 	b : scale value for reducing 'D' and 'eta', must b>1, for example b=2 in the paper [1]. 
%	gamm : parameter of l1-norm regularizer, (i.e., lambda in the problem).
%	dlta : parameter for Huber loss ONLY, (i.e., gamma in the Huber function from the paper [1]).
%	epsilon : accuracy level.
% 	dense : "dense = 1". If input data is sparse, one can also use dense = 0.
%	prox : 0 for ASSG, 1 for proximal ASSG.
%	idx : index sampling. 
% 	evalk : windowsize for printing results on screen, must evalk < t*K.
% 
%  Parameters for RASSG.m (Algorithm 3 [1]): 
%	Same as ASSG.m
%	maxt :  maximum number of iterations for each inner loop of ASSG.
%	Tmax : maximum number of total iterations.
%     	incr : the increasing ratio of the t, (i.e., 2^{2(1-theta)} in Algorithm 3).
%     	reNum : number of restart ASSG, (i.e., S in Algorithm 3).
%	reseta : "omega" in Algorithm 3, 0<reseta<=1.
%      
%  Outputs:
%	Obj : objective values for "it[:,3]" iterations.
%	it : the first column is 'CPU time', the second column is 'running time', the third column is 'number of iterations’.
%	avgw : final solution.
%     
%      
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% If you find ASSG helpful, please cite it as

	@InProceedings{pmlr-v70-xu17a,
  		title = {Stochastic Convex Optimization: Faster Local Growth Implies Faster Global Convergence},
  		author = {Yi Xu and Qihang Lin and Tianbao Yang},
  		booktitle = {Proceedings of the 34th International Conference on Machine Learning (ICML)},
  		pages = {3821--3830},
  		year = {2017}
  	}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%