labeling.R - labeling - Axis labeling algorithms for R, used in ggplot

labeling.R (22036B)
      1 #' Functions for positioning tick labels on axes
      2 #'
      3 #' \tabular{ll}{
      4 #' Package: \tab labeling\cr
      5 #' Type: \tab Package\cr
      6 #' Version: \tab 0.2\cr
      7 #' Date: \tab 2011-04-01\cr
      8 #' License: \tab Unlimited\cr
      9 #' LazyLoad: \tab yes\cr
     10 #' }
     11 #' 
     12 #' Implements a number of axis labeling schemes, including those 
     13 #' compared in An Extension of Wilkinson's Algorithm for Positioning Tick Labels on Axes
     14 #' by Talbot, Lin, and Hanrahan, InfoVis 2010.
     15 #'
     16 #' @name labeling-package
     17 #' @aliases labeling
     18 #' @docType package
     19 #' @title Axis labeling
     20 #' @author Justin Talbot \email{jtalbot@@stanford.edu}
     21 #' @references
     22 #' Heckbert, P. S. (1990) Nice numbers for graph labels, Graphics Gems I, Academic Press Professional, Inc.
     23 #' Wilkinson, L. (2005) The Grammar of Graphics, Springer-Verlag New York, Inc.
     24 #' Talbot, J., Lin, S., Hanrahan, P. (2010) An Extension of Wilkinson's Algorithm for Positioning Tick Labels on Axes, InfoVis 2010.
     25 #' @keywords dplot
     26 #' @seealso \code{\link{extended}}, \code{\link{wilkinson}}, \code{\link{heckbert}}, \code{\link{rpretty}}, \code{\link{gnuplot}}, \code{\link{matplotlib}}, \code{\link{nelder}}, \code{\link{sparks}}, \code{\link{thayer}}, \code{\link{pretty}}
     27 #' @examples
     28 #' heckbert(8.1, 14.1, 4)	# 5 10 15
     29 #' wilkinson(8.1, 14.1, 4)	# 8 9 10 11 12 13 14 15
     30 #' extended(8.1, 14.1, 4)	# 8 10 12 14
     31 
     32 #' # When plotting, extend the plot range to include the labeling
     33 #' # Should probably have a helper function to make this easier
     34 #' data(iris)
     35 #' x <- iris$Sepal.Width
     36 #' y <- iris$Sepal.Length
     37 #' xl <- extended(min(x), max(x), 6)
     38 #' yl <- extended(min(y), max(y), 6)
     39 #' plot(x, y, 
     40 #'     xlim=c(min(x,xl),max(x,xl)), 
     41 #'     ylim=c(min(y,yl),max(y,yl)), 
     42 #'     axes=FALSE, main="Extended labeling")
     43 #' axis(1, at=xl)
     44 #' axis(2, at=yl)
     45 c()
     46 
     47 
     48 
     49 #' Heckbert's labeling algorithm
     50 #'
     51 #' @param dmin minimum of the data range
     52 #' @param dmax maximum of the data range
     53 #' @param m number of axis labels
     54 #' @return vector of axis label locations
     55 #' @references
     56 #' Heckbert, P. S. (1990) Nice numbers for graph labels, Graphics Gems I, Academic Press Professional, Inc.
     57 #' @author Justin Talbot \email{jtalbot@@stanford.edu}
     58 #' @export
     59 heckbert <- function(dmin, dmax, m)
     60 {
     61     range <- .heckbert.nicenum((dmax-dmin), FALSE)
     62     lstep <- .heckbert.nicenum(range/(m-1), TRUE)
     63     lmin <- floor(dmin/lstep)*lstep
     64     lmax <- ceiling(dmax/lstep)*lstep
     65     seq(lmin, lmax, by=lstep)
     66 }
     67     
     68 .heckbert.nicenum <- function(x, round)
     69 {
     70 	e <- floor(log10(x))
     71 	f <- x / (10^e)
     72 	if(round)
     73 	{
     74 		if(f < 1.5) nf <- 1
     75 		else if(f < 3) nf <- 2
     76 		else if(f < 7) nf <- 5
     77 		else nf <- 10
     78 	}
     79 	else
     80 	{
     81 		if(f <= 1) nf <- 1
     82 		else if(f <= 2) nf <- 2
     83 		else if(f <= 5) nf <- 5
     84 		else nf <- 10
     85 	}
     86 	nf * (10^e)
     87 }
     88 
     89 
     90 
     91 #' Wilkinson's labeling algorithm
     92 #'
     93 #' @param dmin minimum of the data range
     94 #' @param dmax maximum of the data range
     95 #' @param m number of axis labels
     96 #' @param Q set of nice numbers
     97 #' @param mincoverage minimum ratio between the the data range and the labeling range, controlling the whitespace around the labeling (default = 0.8)
     98 #' @param mrange range of \code{m}, the number of tick marks, that should be considered in the optimization search
     99 #' @return vector of axis label locations
    100 #' @note Ported from Wilkinson's Java implementation with some changes.
    101 #'	Changes: 	1) m (the target number of ticks) is hard coded in Wilkinson's implementation as 5. 
    102 #'						Here we allow it to vary as a parameter. Since m is fixed, 
    103 #'						Wilkinson only searches over a fixed range 4-13 of possible resulting ticks.
    104 #'						We broadened the search range to max(floor(m/2),2) to ceiling(6*m), 
    105 #'						which is a larger range than Wilkinson considers for 5 and allows us to vary m,
    106 #'						including using non-integer values of m.
    107 #'				2) Wilkinson's implementation assumes that the scores are non-negative. But, his revised
    108 #'						granularity function can be extremely negative. We tweaked the code to allow negative scores.
    109 #'						We found that this produced better labelings.
    110 #'				3) We added 10 to Q. This seemed to be necessary to get steps of size 1.
    111 #'	It is possible for this algorithm to find no solution.
    112 #'					In Wilkinson's implementation, instead of failing, he returns the non-nice labels spaced evenly from min to max.
    113 #'					We want to detect this case, so we return NULL. If this happens, the search range, mrange, needs to be increased.
    114 #' @references
    115 #' Wilkinson, L. (2005) The Grammar of Graphics, Springer-Verlag New York, Inc.
    116 #' @author Justin Talbot \email{jtalbot@@stanford.edu}
    117 #' @export
    118 wilkinson <-function(dmin, dmax, m, Q = c(1,5,2,2.5,3,4,1.5,7,6,8,9), mincoverage = 0.8, mrange=max(floor(m/2),2):ceiling(6*m))
    119 {
    120 	best <- NULL
    121 	for(k in mrange)
    122 	{
    123 		result <- .wilkinson.nice.scale(dmin, dmax, k, Q, mincoverage, mrange, m)
    124 		if(!is.null(result) && (is.null(best) || result$score > best$score))
    125 		{
    126 			best <- result
    127 		}
    128 	}
    129 	seq(best$lmin, best$lmax, by=best$lstep)
    130 }
    131 
    132 .wilkinson.nice.scale <- function(min, max, k, Q = c(1,5,2,2.5,3,4,1.5,7,6,8,9), mincoverage = 0.8, mrange=c(), m=k)
    133 {
    134 	Q <- c(10, Q)
    135 
    136 	range <- max-min
    137 	intervals <- k-1
    138 	granularity <- 1 - abs(k-m)/m
    139 
    140 	delta <- range / intervals
    141 	base <- floor(log10(delta))
    142 	dbase <- 10^base
    143 
    144 	best <- NULL
    145 	for(i in 1:length(Q))
    146 	{
    147 		tdelta <- Q[i] * dbase
    148 		tmin <- floor(min/tdelta) * tdelta
    149 		tmax <- tmin + intervals * tdelta
    150 
    151 		if(tmin <= min && tmax >= max)
    152 		{
    153 			roundness <- 1 - ((i-1) - ifelse(tmin <= 0 && tmax >= 0, 1, 0)) / length(Q)
    154 			coverage <- (max-min)/(tmax-tmin)
    155 			if(coverage > mincoverage)
    156 			{
    157 				tnice <- granularity + roundness + coverage
    158 
    159 				## Wilkinson's implementation contains code to favor certain ranges of labels
    160 				## e.g. those balanced around or anchored at 0, etc.
    161 				## We did not evaluate this type of optimization in the paper, so did not include it.
    162 				## Obviously this optimization component could also be added to our function.
    163 				#if(tmin == -tmax || tmin == 0 || tmax == 1 || tmax == 100)
    164 				#	tnice <- tnice + 1
    165 				#if(tmin == 0 && tmax == 1 || tmin == 0 && tmax == 100)
    166 				#	tnice <- tnice + 1
    167 
    168 				if(is.null(best) || tnice > best$score)
    169 				{
    170 					best <- list(lmin=tmin,
    171 							 lmax=tmax,
    172 							 lstep=tdelta,
    173 					      	 score=tnice
    174 						)
    175 				}
    176 			}
    177 		}
    178 	}
    179 	best
    180 }
    181 
    182 
    183 
    184 
    185 ## The Extended-Wilkinson algorithm described in the paper.
    186 
    187 ## Our scoring functions, including the approximations for limiting the search
    188 .simplicity <- function(q, Q, j, lmin, lmax, lstep)
    189 {
    190 	eps <- .Machine$double.eps * 100
    191 
    192 	n <- length(Q)
    193 	i <- match(q, Q)[1]
    194 	v <- ifelse( (lmin %% lstep < eps || lstep - (lmin %% lstep) < eps) && lmin <= 0 && lmax >=0, 1, 0)
    195 
    196 	1 - (i-1)/(n-1) - j + v
    197 }
    198 
    199 .simplicity.max <- function(q, Q, j)
    200 {
    201 	n <- length(Q)
    202 	i <- match(q, Q)[1]
    203 	v <- 1
    204 
    205 	1 - (i-1)/(n-1) - j + v
    206 }
    207 
    208 .coverage <- function(dmin, dmax, lmin, lmax)
    209 {
    210 	range <- dmax-dmin
    211 	1 - 0.5 * ((dmax-lmax)^2+(dmin-lmin)^2) / ((0.1*range)^2)
    212 }
    213 
    214 .coverage.max <- function(dmin, dmax, span)
    215 {
    216 	range <- dmax-dmin
    217 	if(span > range)
    218 	{
    219 		half <- (span-range)/2
    220 		1 - 0.5 * (half^2 + half^2) / ((0.1 * range)^2)
    221 	}
    222 	else
    223 	{
    224 		1
    225 	}
    226 }
    227 
    228 .density <- function(k, m, dmin, dmax, lmin, lmax)
    229 {
    230 	r <- (k-1) / (lmax-lmin)
    231 	rt <- (m-1) / (max(lmax,dmax)-min(dmin,lmin))
    232 	2 - max( r/rt, rt/r )
    233 }
    234 
    235 .density.max <- function(k, m)
    236 {
    237 	if(k >= m)
    238 		2 - (k-1)/(m-1)
    239 	else
    240 		1
    241 }
    242 
    243 .legibility <- function(lmin, lmax, lstep)
    244 {
    245 	1			## did all the legibility tests in C#, not in R.
    246 }
    247 
    248 
    249 #' An Extension of Wilkinson's Algorithm for Position Tick Labels on Axes
    250 #'
    251 #' \code{extended} is an enhanced version of Wilkinson's optimization-based axis labeling approach. It is described in detail in our paper. See the references.
    252 #' 
    253 #' @param dmin minimum of the data range
    254 #' @param dmax maximum of the data range
    255 #' @param m number of axis labels
    256 #' @param Q set of nice numbers
    257 #' @param only.loose if true, the extreme labels will be outside the data range
    258 #' @param w weights applied to the four optimization components (simplicity, coverage, density, and legibility)
    259 #' @return vector of axis label locations
    260 #' @references
    261 #' Talbot, J., Lin, S., Hanrahan, P. (2010) An Extension of Wilkinson's Algorithm for Positioning Tick Labels on Axes, InfoVis 2010.
    262 #' @author Justin Talbot \email{jtalbot@@stanford.edu}
    263 #' @export
    264 extended <- function(dmin, dmax, m, Q=c(1,5,2,2.5,4,3), only.loose=FALSE, w=c(0.25,0.2,0.5,0.05))
    265 {
    266 	eps <- .Machine$double.eps * 100
    267 	
    268 	if(dmin > dmax) {
    269 		temp <- dmin
    270 		dmin <- dmax
    271 		dmax <- temp
    272 	}
    273 
    274 	if(dmax - dmin < eps) {
    275 		#if the range is near the floating point limit,
    276 		#let seq generate some equally spaced steps.
    277 		return(seq(from=dmin, to=dmax, length.out=m))
    278 	}
    279 	
    280 	if((dmax - dmin) > sqrt(.Machine$double.xmax)) {
    281     #if the range is too large
    282     #let seq generate some equally spaced steps.
    283     return(seq(from=dmin, to=dmax, length.out=m))
    284   }
    285 
    286 	n <- length(Q)
    287 
    288 	best <- list()
    289 	best$score <- -2
    290 	
    291 	j <- 1
    292 	while(j < Inf)
    293 	{
    294 		for(q in Q)
    295 		{
    296 			sm <- .simplicity.max(q, Q, j)
    297 
    298 			if((w[1]*sm+w[2]+w[3]+w[4]) < best$score)
    299 			{
    300 				j <- Inf
    301 				break
    302 			}
    303 		
    304 			k <- 2
    305 			while(k < Inf)													# loop over tick counts
    306 			{		
    307 				dm <- .density.max(k, m)
    308 
    309 				if((w[1]*sm+w[2]+w[3]*dm+w[4]) < best$score)
    310 					break
    311 			
    312 				delta <- (dmax-dmin)/(k+1)/j/q
    313 				z <- ceiling(log(delta, base=10))
    314 
    315 				while(z < Inf)
    316 				{			
    317 					step <- j*q*10^z
    318 
    319 					cm <- .coverage.max(dmin, dmax, step*(k-1))
    320 
    321 					if((w[1]*sm+w[2]*cm+w[3]*dm+w[4]) < best$score)
    322 						break
    323 					
    324 					min_start <- floor(dmax/(step))*j - (k - 1)*j
    325 					max_start <- ceiling(dmin/(step))*j
    326 
    327 					if(min_start > max_start)
    328 					{
    329 						z <- z+1
    330 						next
    331 					}
    332 
    333 					for(start in min_start:max_start)
    334 					{
    335 						lmin <- start * (step/j)
    336 						lmax <- lmin + step*(k-1)
    337 						lstep <- step
    338 
    339 						s <- .simplicity(q, Q, j, lmin, lmax, lstep)
    340 						c <- .coverage(dmin, dmax, lmin, lmax)						
    341 						g <- .density(k, m, dmin, dmax, lmin, lmax)
    342 						l <- .legibility(lmin, lmax, lstep)						
    343 
    344 						score <- w[1]*s + w[2]*c + w[3]*g + w[4]*l
    345 
    346 						if(score > best$score && (!only.loose || (lmin <= dmin && lmax >= dmax)))
    347 						{
    348 							best <- list(lmin=lmin,
    349 								 lmax=lmax,
    350 								 lstep=lstep,
    351 						         score=score)
    352 						}
    353 					}
    354 					z <- z+1
    355 				}				
    356 				k <- k+1
    357 			}
    358 		}
    359 		j <- j + 1		
    360 	}
    361 
    362 	seq(from=best$lmin, to=best$lmax, by=best$lstep)
    363 }
    364 
    365 
    366 
    367 ## Quantitative evaluation plots (Figures 2 and 3 in the paper)
    368 
    369 
    370 #' Generate figures from An Extension of Wilkinson's Algorithm for Position Tick Labels on Axes
    371 #'
    372 #' Generates Figures 2 and 3 from our paper.
    373 #' 
    374 #' @param samples number of samples to use (in the paper we used 10000, but that takes awhile to run).
    375 #' @return produces plots as a side effect
    376 #' @references
    377 #' Talbot, J., Lin, S., Hanrahan, P. (2010) An Extension of Wilkinson's Algorithm for Positioning Tick Labels on Axes, InfoVis 2010.
    378 #' @author Justin Talbot \email{jtalbot@@stanford.edu}
    379 #' @export
    380 extended.figures <- function(samples = 100)
    381 {
    382 	oldpar <- par()
    383 	par(ask=TRUE)
    384 	
    385 	a <- runif(samples, -100, 400)
    386 	b <- runif(samples, -100, 400)
    387 	low <- pmin(a,b)
    388 	high <- pmax(a,b)
    389 	ticks <- runif(samples, 2, 10)
    390 
    391 	generate.labelings <- function(labeler, dmin, dmax, ticks, ...)
    392 	{
    393 		mapply(labeler, dmin, dmax, ticks, SIMPLIFY=FALSE, MoreArgs=list(...))
    394 	}
    395 	
    396 	h1 <- generate.labelings(heckbert, low, high, ticks)
    397 	w1 <- generate.labelings(wilkinson, low, high, ticks, mincoverage=0.8)
    398 	f1 <- generate.labelings(extended, low, high, ticks, only.loose=TRUE)
    399 	e1 <- generate.labelings(extended, low, high, ticks)
    400 	
    401 	figure2 <- function(r, names)
    402 	{
    403 		for(i in 1:length(r))
    404 		{
    405 			d <- r[[i]]
    406 			
    407 			#plot coverage
    408 			cover <- sapply(d, function(x) {max(x)-min(x)})/(high-low)
    409 			hist(cover, breaks=seq(from=-0.01,to=1000,by=0.02), xlab="", ylab=names[i], main=ifelse(i==1, "Density", ""), col="darkgray", lab=c(3,3,3), xlim=c(0.5,3.5), ylim=c(0,0.12*samples), axes=FALSE, border=FALSE)
    410 			#hist(cover)
    411 			axis(side=1, at=c(0,1,2,3,4), xlab="hello", line=-0.1, lwd=0.5)
    412 			
    413 			# plot density
    414 			dens <- sapply(d, length) / ticks
    415 			hist(dens, breaks=seq(from=-0.01,to=10,by=0.02), xlab="", ylab=names[i], main=ifelse(i==1, "Density", ""), col="darkgray", lab=c(3,3,3), xlim=c(0.5,3.5), ylim=c(0,0.06*samples), axes=FALSE, border=FALSE)
    416 			axis(side=1, at=c(0,1,2,3,4), xlab="hello", line=-0.1, lwd=0.5)
    417 		}
    418 	}
    419 
    420 	par(mfrow=c(4, 2), mar=c(0.5,1.85,1,0), oma=c(1,0,1,0), mgp=c(0,0.5,-0.3), font.main=1, font.lab=1, cex.lab=1, cex.main=1, tcl=-0.2)
    421 	figure2(list(h1,w1, f1, e1), names=c("Heckbert", "Wilkinson", "Extended\n(loose)", "Extended\n(flexible)"))
    422 
    423 	figure3 <- function(r, names)
    424 	{
    425 		for(i in 1:length(r))
    426 		{
    427 			d <- r[[i]]
    428 			steps <- sapply(d, function(x) round(median(diff(x)), 2))
    429 			steps <- steps / (10^floor(log10(steps)))
    430 			tab <- table(steps)
    431 			barplot(rev(tab), xlim=c(0,0.4*samples), horiz=TRUE, xlab=ifelse(i==1,"Frequency",""), xaxt='n', yaxt='s', las=1, main=names[i], border=NA, col="gray")
    432 		}
    433 	}
    434 	
    435 	par(mfrow=c(1,4), mar=c(0.5, 0.75, 2, 0.5), oma=c(0,2,1,1), mgp=c(0,0.75,-0.3), cex.lab=1, cex.main=1)
    436 	figure3(list(h1,w1, f1, e1), names=c("Heckbert", "Wilkinson", "Extended\n(loose)", "Extended\n(flexible)"))
    437 	par(oldpar)
    438 }
    439 
    440 
    441 
    442 #' Nelder's labeling algorithm
    443 #'
    444 #' @param dmin minimum of the data range
    445 #' @param dmax maximum of the data range
    446 #' @param m number of axis labels
    447 #' @param Q set of nice numbers
    448 #' @return vector of axis label locations
    449 #' @references
    450 #' Nelder, J. A. (1976) AS 96. A Simple Algorithm for Scaling Graphs, Journal of the Royal Statistical Society. Series C., pp. 94-96.
    451 #' @author Justin Talbot \email{jtalbot@@stanford.edu}
    452 #' @export
    453 nelder <- function(dmin, dmax, m, Q = c(1,1.2,1.6,2,2.5,3,4,5,6,8,10))
    454 {
    455 	ntick <- floor(m)
    456 	tol <- 5e-6
    457 	bias <- 1e-4
    458 
    459 	intervals <- m-1
    460 	x <- abs(dmax)
    461 	if(x == 0) x <- 1
    462 	if(!((dmax-dmin)/x > tol))
    463 	{
    464 		## special case handling for very small ranges. Not implemented yet.
    465 	}
    466 
    467 	step <- (dmax-dmin)/intervals
    468 	s <- step
    469 
    470 	while(s <= 1)
    471 		s <- s*10
    472 	while(s > 10)
    473 		s <- s/10
    474 
    475 	x <- s-bias
    476 	unit <- 1
    477 	for(i in 1:length(Q))
    478 	{
    479 		if(x < Q[i])
    480 		{
    481 			unit <- i
    482 			break
    483 		}
    484 	}
    485 	step <- step * Q[unit] / s
    486 	range <- step*intervals
    487 
    488 	x <- 0.5 * (1+ (dmin+dmax-range) / step)
    489 	j <- floor(x-bias)
    490 	valmin <- step * j
    491 
    492 	if(dmin > 0 && range >= dmax)
    493 		valmin <- 0
    494 	valmax <- valmin + range
    495 
    496 	if(!(dmax > 0 || range < -dmin))
    497 	{
    498 		valmax <- 0
    499 		valmin <- -range
    500 	}
    501 
    502 	seq(from=valmin, to=valmax, by=step)
    503 }
    504 
    505 
    506 #' R's pretty algorithm implemented in R
    507 #'
    508 #' @param dmin minimum of the data range
    509 #' @param dmax maximum of the data range
    510 #' @param m number of axis labels
    511 #' @param n number of axis intervals (specify one of \code{m} or \code{n})
    512 #' @param min.n nonnegative integer giving the \emph{minimal} number of intervals. If \code{min.n == 0}, \code{pretty(.)} may return a single value.
    513 #' @param shrink.sml positive numeric by a which a default scale is shrunk in the case when \code{range(x)} is very small (usually 0).
    514 #' @param high.u.bias non-negative numeric, typically \code{> 1}. The interval unit is determined as \code{\{1,2,5,10\}} times \code{b}, a power of 10. Larger \code{high.u.bias} values favor larger units.
    515 #' @param u5.bias non-negative numeric multiplier favoring factor 5 over 2. Default and 'optimal': \code{u5.bias = .5 + 1.5*high.u.bias}.
    516 #' @return vector of axis label locations
    517 #' @references
    518 #' Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) \emph{The New S Language}. Wadsworth & Brooks/Cole.
    519 #' @author Justin Talbot \email{jtalbot@@stanford.edu}
    520 #' @export
    521 rpretty <- function(dmin, dmax, m=6, n=floor(m)-1, min.n=n%/%3, shrink.sml = 0.75, high.u.bias=1.5, u5.bias=0.5 + 1.5*high.u.bias)
    522 {
    523 	ndiv <- n
    524 	h <- high.u.bias
    525 	h5 <- u5.bias
    526 
    527 	dx <- dmax-dmin
    528 	if(dx==0 && dmax==0)
    529 	{
    530 		cell <- 1
    531 		i_small <- TRUE
    532 		U <- 1
    533 	}
    534 	else
    535 	{
    536 		cell <- max(abs(dmin), abs(dmax))
    537 		U <- 1 + ifelse(h5 >= 1.5*h+0.5, 1/(1+h), 1.5/(1+h5))
    538 		i_small = dx < (cell * U * max(1, ndiv) * 1e-07 * 3)
    539 	}
    540 
    541 	if(i_small)
    542 	{
    543 		if(cell > 10)
    544 		{
    545 			cell <- 9+cell/10
    546 		}
    547     	cell <- cell * shrink.sml
    548 		if(min.n > 1) cell <- cell/min.n
    549 	}	
    550 	else
    551 	{
    552 		cell <- dx
    553 		if(ndiv > 1) cell <- cell/ndiv
    554 	}
    555 
    556 	if(cell < 20 * 1e-07)
    557 		cell <- 20 * 1e-07
    558 	
    559 	base <- 10^floor(log10(cell))
    560 
    561 	unit <- base
    562 
    563 	if((2*base)-cell < h*(cell-unit))
    564 	{
    565 		unit <- 2*base
    566 		if((5*base)-cell < h5*(cell-unit))
    567 		{
    568 			unit <- 5*base
    569 			if((10*base)-cell < h*(cell-unit))
    570 				unit <- 10*base
    571 		}
    572 	}
    573 
    574 	# track down lattice labelings...
    575 
    576 	## Maybe used to correct for the epsilon here??
    577 	ns <- floor(dmin/unit + 1e-07)
    578 	nu <- ceiling(dmax/unit - 1e-07)
    579 
    580 	## Extend the range out beyond the data. Does this ever happen??
    581 	while(ns*unit > dmin+(1e-07*unit)) ns <- ns-1
    582 	while(nu*unit < dmax-(1e-07*unit)) nu <- nu+1
    583 
    584 
    585 	## If we don't have quite enough labels, extend the range out to make more (these labels are beyond the data :( )
    586 	k <- floor(0.5 + nu-ns)
    587 	if(k < min.n)
    588 	{
    589 		k <- min.n - k
    590 		if(ns >=0)
    591 		{
    592 			nu <- nu + k/2
    593 			ns <- ns - k/2 + k%%2
    594 		}
    595 		else
    596 		{
    597 			ns <- ns - k/2
    598 			nu <- nu + k/2 + k%%2
    599 		}
    600 		ndiv <- min.n
    601 	}
    602 	else
    603 	{
    604 		ndiv <- k
    605 	}
    606 
    607 	graphmin <- ns*unit
    608 	graphmax <- nu*unit
    609 
    610 	seq(from=graphmin, to=graphmax, by=unit)
    611 }
    612 
    613 #' Matplotlib's labeling algorithm
    614 #'
    615 #' @param dmin minimum of the data range
    616 #' @param dmax maximum of the data range
    617 #' @param m number of axis labels
    618 #' @return vector of axis label locations
    619 #' @references
    620 #' \url{http://matplotlib.sourceforge.net/}
    621 #' @author Justin Talbot \email{jtalbot@@stanford.edu}
    622 #' @export
    623 matplotlib <- function(dmin, dmax, m)
    624 {
    625 	steps <- c(1,2,5,10)
    626 	nbins <- m
    627 	trim <- TRUE
    628 
    629 	vmin <- dmin
    630 	vmax <- dmax
    631 	params <- .matplotlib.scale.range(vmin, vmax, nbins)
    632 	scale <- params[1]
    633 	offset <- params[2]
    634 
    635 	vmin <- vmin-offset
    636 	vmax <- vmax-offset
    637 
    638 	rawStep <- (vmax-vmin)/nbins
    639 	scaledRawStep <- rawStep/scale
    640 
    641 	bestMax <- vmax
    642 	bestMin <- vmin
    643 
    644 	scaledStep <- 1
    645 	chosenFactor <- 1
    646 
    647 	for (step in steps)
    648 	{
    649 		if (step >= scaledRawStep)
    650 		{
    651 			scaledStep <- step*scale
    652 			chosenFactor <- step
    653 			bestMin <- scaledStep * floor(vmin/scaledStep)
    654 			bestMax <- bestMin + scaledStep*nbins
    655 			if (bestMax >= vmax)
    656 				break
    657 		}
    658 	}
    659 	if (trim)
    660 	{
    661 		extraBins <- floor((bestMax-vmax)/scaledStep)
    662 		nbins <- nbins-extraBins
    663 	}
    664 	graphMin <- bestMin+offset
    665 	graphMax <- graphMin+nbins*scaledStep
    666 
    667 	seq(from=graphMin, to=graphMax, by=scaledStep)
    668 }
    669 
    670 .matplotlib.scale.range <- function(min, max, bins)
    671 {
    672 	threshold <- 100
    673 	dv <- abs(max-min)
    674 	maxabsv<-max(abs(min), abs(max))
    675 	if (maxabsv == 0 || dv/maxabsv<10^-12)
    676 		return(c(1, 0))
    677 
    678 	meanv <- 0.5*(min+max)
    679 
    680 	if ((abs(meanv)/dv) < threshold)
    681 		offset<- 0
    682 	else if (meanv>0)
    683 	{
    684 		exp<-floor(log10(meanv))
    685 		offset = 10.0^exp
    686 	} else
    687 	{
    688 		exp <- floor(log10(-1*meanv))
    689 		offset <- -10.0^exp
    690 	}
    691 	exp <- floor(log10(dv/bins))
    692 	scale = 10.0^exp
    693 	c(scale, offset)
    694 }
    695 
    696 
    697 
    698 #' gnuplot's labeling algorithm
    699 #'
    700 #' @param dmin minimum of the data range
    701 #' @param dmax maximum of the data range
    702 #' @param m number of axis labels
    703 #' @return vector of axis label locations
    704 #' @references
    705 #' \url{http://www.gnuplot.info/}
    706 #' @author Justin Talbot \email{jtalbot@@stanford.edu}
    707 #' @export
    708 gnuplot <- function(dmin, dmax, m)
    709 {
    710 	ntick <- floor(m)
    711 	power <- 10^floor(log10(dmax-dmin))
    712 	norm_range <- (dmax-dmin)/power
    713 	p <- (ntick-1) / norm_range
    714 
    715 	if(p > 40)
    716 		t <- 0.05
    717 	else if(p > 20)
    718 		t <- 0.1
    719 	else if(p > 10)
    720 		t <- 0.2
    721 	else if(p > 4)
    722 		t <- 0.5
    723 	else if(p > 2)
    724 		t <- 1
    725 	else if(p > 0.5)
    726 		t <- 2
    727 	else
    728 		t <- ceiling(norm_range)
    729 
    730 	d <- t*power
    731 	graphmin <- floor(dmin/d) * d
    732 	graphmax <- ceiling(dmax/d) * d
    733 
    734 	seq(from=graphmin, to=graphmax, by=d)
    735 }
    736 
    737 
    738 
    739 #' Sparks' labeling algorithm
    740 #'
    741 #' @param dmin minimum of the data range
    742 #' @param dmax maximum of the data range
    743 #' @param m number of axis labels
    744 #' @return vector of axis label locations
    745 #' @references
    746 #' Sparks, D. N. (1971) AS 44. Scatter Diagram Plotting, Journal of the Royal Statistical Society. Series C., pp. 327-331.
    747 #' @author Justin Talbot \email{jtalbot@@stanford.edu}
    748 #' @export
    749 sparks <- function(dmin, dmax, m)
    750 {
    751 	fm <- m-1
    752 	ratio <- 0
    753 	key <- 1
    754 	kount <- 0
    755 	r <- dmax-dmin
    756 	b <- dmin
    757 	
    758 	while(ratio <= 0.8)
    759 	{
    760 		while(key <= 2)
    761 		{
    762 			while(r <= 1)
    763 			{
    764 				kount <- kount + 1
    765 				r <- r*10
    766 			}
    767 			while(r > 10)
    768 			{
    769 				kount <- kount - 1
    770 				r <- r/10
    771 			}
    772 
    773 			b <- b*(10^kount)
    774 			if( b < 0 && b != trunc(b)) b <- b-1
    775 			b <- trunc(b)/(10^kount)
    776 			r <- (dmax-b)/fm
    777 			kount <- 0
    778 			key <- key+2
    779 		}
    780 	
    781 		fstep <- trunc(r)
    782 		if(fstep != r) fstep <- fstep+1
    783 		if(r < 1.5) fstep <- fstep-0.5
    784 		fstep <- fstep/(10^kount)
    785 		ratio <- (dmax - dmin)*(fm*fstep)
    786 		kount <- 1
    787 		key <- 2
    788 	}
    789 	fmin <- b
    790 	c <- fstep*trunc(b/fstep)
    791 	if(c < 0 && c != b) c <- c-fstep
    792 	if((c+fm*fstep) > dmax) fmin <- c
    793 	
    794 	seq(from=fmin, to=fstep*(m-1), by=fstep)
    795 }
    796 
    797 
    798 #' Thayer and Storer's labeling algorithm
    799 #'
    800 #' @param dmin minimum of the data range
    801 #' @param dmax maximum of the data range
    802 #' @param m number of axis labels
    803 #' @return vector of axis label locations
    804 #' @references
    805 #' Thayer, R. P. and Storer, R. F. (1969) AS 21. Scale Selection for Computer Plots, Journal of the Royal Statistical Society. Series C., pp. 206-208.
    806 #' @author Justin Talbot \email{jtalbot@@stanford.edu}
    807 #' @export
    808 thayer <- function(dmin, dmax, m)
    809 {
    810 	r <- dmax-dmin
    811 	b <- dmin
    812 	kount <- 0
    813 	kod <- 0
    814 
    815 	while(kod < 2)
    816 	{
    817 		while(r <= 1)
    818 		{
    819 			kount <- kount+1
    820 			r <- r*10
    821 		}
    822 		while(r > 10)
    823 		{
    824 			kount <- kount-1
    825 			r <- r/10
    826 		}
    827 		b <- b*(10^kount)
    828 		if(b < 0)
    829 			b <- b-1
    830 		ib <- trunc(b)
    831 		b <- ib
    832 		b <- b/(10^kount)
    833 		r <- dmax-b
    834 		a <- r/(m-1)
    835 		kount <- 0
    836 		while(a <= 1)
    837 		{
    838 			kount <- kount+1
    839 			a <- a*10
    840 		}
    841 		while(a > 10)
    842 		{
    843 			kount <- kount-1
    844 			a <- a/10
    845 		}
    846 		ia <- trunc(a)
    847 		if(ia == 6) ia <- 7
    848 		if(ia == 8) ia <- 9
    849 
    850 		aa <- 0
    851 		if(a < 1.5) aa <- -0.5
    852 		a <- aa + 1 + ia
    853 		a <- a/(10^kount)
    854 				
    855 		test <- (m-1) * a
    856 		test1 <- (dmax-dmin)/test
    857 		if(test1 > 0.8)
    858 			kod <- 2
    859 
    860 		if(kod < 2)
    861 		{
    862 			kount <- 1
    863 			r <- dmax-dmin
    864 			b <- dmin
    865 			kod <- kod + 1
    866 		}
    867 	}
    868 
    869 	iab <- trunc(b/a)
    870 	if(iab < 0) iab <- iab-1
    871 	c <- a * iab
    872 	d <- c + (m-1)*a
    873 	if(d >= dmax)
    874 		b <- c
    875 
    876 	valmin <- b
    877 	valmax <- b + a*(m-1)	
    878 
    879 	seq(from=valmin, to=valmax, by=a)
    880 }
    881
	labeling Axis labeling algorithms for R, used in ggplot
	Log \| Files \| Refs \| README