|
Commit message:
Uploaded |
|
added:
Hmisc/COPYING Hmisc/DESCRIPTION Hmisc/INDEX Hmisc/MD5 Hmisc/NAMESPACE Hmisc/NEWS Hmisc/R/AFirst.lib.s Hmisc/R/Cs.s Hmisc/R/Key.s Hmisc/R/Misc.s Hmisc/R/abs.error.pred.s Hmisc/R/areg.s Hmisc/R/aregImpute.s Hmisc/R/biVar.s Hmisc/R/binconf.s Hmisc/R/bootkm.s Hmisc/R/bpower.s Hmisc/R/bpplot.s Hmisc/R/bystats.s Hmisc/R/capitalize.s Hmisc/R/ciapower.s Hmisc/R/cnvrt.coords.s Hmisc/R/confbar.s Hmisc/R/consolidate.s Hmisc/R/cpower.s Hmisc/R/curveRep.s Hmisc/R/cut2.s Hmisc/R/data.frame.labelled.s Hmisc/R/dataRep.s Hmisc/R/dates.s Hmisc/R/deff.s Hmisc/R/describe.s Hmisc/R/discrete.s Hmisc/R/dotchart3.s Hmisc/R/ecdf.s Hmisc/R/epi.s Hmisc/R/errbar.s Hmisc/R/event.chart.s Hmisc/R/event.history.s Hmisc/R/find.matches.s Hmisc/R/format.pval.s Hmisc/R/ftu.s Hmisc/R/gbayes.s Hmisc/R/gettext.s Hmisc/R/groupn.s Hmisc/R/hist.data.frame.s Hmisc/R/histbackback.s Hmisc/R/hoeffd.s Hmisc/R/impute.s Hmisc/R/in.operator.s Hmisc/R/inc-dec.s Hmisc/R/is.present.s Hmisc/R/james.stein.s Hmisc/R/labcurve.s Hmisc/R/label.s Hmisc/R/latex.s Hmisc/R/latexDotchart.s Hmisc/R/latexTabular.s Hmisc/R/latexTherm.s Hmisc/R/list.tree.s Hmisc/R/mApply.s Hmisc/R/mChoice.s Hmisc/R/makeNstr.s Hmisc/R/mask.s Hmisc/R/matxv.s Hmisc/R/mdb.get.s Hmisc/R/minor.tick.s Hmisc/R/model.frame.default.s Hmisc/R/mtitle.s Hmisc/R/multLines.s Hmisc/R/na.delete.s Hmisc/R/na.detail.response.s Hmisc/R/na.keep.s Hmisc/R/na.pattern.s Hmisc/R/nobsY.s Hmisc/R/nstr.s Hmisc/R/num.intercepts.s Hmisc/R/panel.abwplot.s Hmisc/R/panel.bpplot.s Hmisc/R/pc1.s Hmisc/R/plsmo.s Hmisc/R/popower.s Hmisc/R/print.char.list.s Hmisc/R/pstamp.s Hmisc/R/rcorr.cens.s Hmisc/R/rcorr.s Hmisc/R/rcorrp.cens.s Hmisc/R/rcspline.eval.s Hmisc/R/rcspline.plot.s Hmisc/R/rcspline.restate.s Hmisc/R/reShape.s Hmisc/R/redun.s Hmisc/R/regexpEscape.s Hmisc/R/responseSummary.s Hmisc/R/rm.boot.s Hmisc/R/samplesize.bin.s Hmisc/R/sas.get.s Hmisc/R/scat1d.s Hmisc/R/score.binary.s Hmisc/R/sedit.s Hmisc/R/show.pch.s Hmisc/R/showPsfrag.s Hmisc/R/solvet.s Hmisc/R/somers2.s Hmisc/R/spearman.test.s Hmisc/R/spower.s Hmisc/R/src.s Hmisc/R/stata.get.s Hmisc/R/strgraphwrap.s Hmisc/R/string.break.line.s Hmisc/R/strwrap.s Hmisc/R/subplot.s Hmisc/R/substi.s Hmisc/R/summary.formula.s Hmisc/R/summaryM.s Hmisc/R/summaryP.s Hmisc/R/summaryRc.s Hmisc/R/summaryS.s Hmisc/R/symbol.freq.s Hmisc/R/sys.s Hmisc/R/t.test.cluster.s Hmisc/R/tabulr.s Hmisc/R/tex.s Hmisc/R/transace.s Hmisc/R/transcan.s Hmisc/R/translate.s Hmisc/R/units.s Hmisc/R/valueTags.s Hmisc/R/varclus.s Hmisc/R/wtd.stats.s Hmisc/R/xYplot.s Hmisc/R/xtfrm.labelled.s Hmisc/R/xy.group.s Hmisc/R/ynbind.s Hmisc/R/zoom.s Hmisc/README.md Hmisc/inst/CHANGELOG Hmisc/inst/NEWS Hmisc/inst/THANKS Hmisc/inst/WISHLIST Hmisc/inst/todo Hmisc/man/Cs.Rd Hmisc/man/Ecdf.Rd Hmisc/man/Hmisc-internal.Rd Hmisc/man/Lag.Rd Hmisc/man/Misc.Rd Hmisc/man/Overview.Rd Hmisc/man/Save.Rd Hmisc/man/abs.error.pred.Rd Hmisc/man/addMarginal.Rd Hmisc/man/all.is.numeric.Rd Hmisc/man/approxExtrap.Rd Hmisc/man/areg.Rd Hmisc/man/aregImpute.Rd Hmisc/man/biVar.Rd Hmisc/man/binconf.Rd Hmisc/man/bootkm.Rd Hmisc/man/bpower.Rd Hmisc/man/bpplot.Rd Hmisc/man/bystats.Rd Hmisc/man/capitalize.Rd Hmisc/man/ciapower.Rd Hmisc/man/cnvrt.coords.Rd Hmisc/man/consolidate.Rd Hmisc/man/contents.Rd Hmisc/man/cpower.Rd Hmisc/man/csv.get.Rd Hmisc/man/curveRep.Rd Hmisc/man/cut2.Rd Hmisc/man/data.frame.create.modify.check.Rd Hmisc/man/dataRep.Rd Hmisc/man/deff.Rd Hmisc/man/describe.Rd Hmisc/man/discrete.Rd Hmisc/man/dotchart2.Rd Hmisc/man/dotchart3.Rd Hmisc/man/epi.Rd Hmisc/man/equalBins.Rd Hmisc/man/errbar.Rd Hmisc/man/escapeRegex.Rd Hmisc/man/event.chart.Rd Hmisc/man/event.convert.Rd Hmisc/man/event.history.Rd Hmisc/man/find.matches.Rd Hmisc/man/first.word.Rd Hmisc/man/format.df.Rd Hmisc/man/format.pval.Rd Hmisc/man/gbayes.Rd Hmisc/man/getHdata.Rd Hmisc/man/getZip.Rd Hmisc/man/hdquantile.Rd Hmisc/man/hist.data.frame.Rd Hmisc/man/histbackback.Rd Hmisc/man/hoeffd.Rd Hmisc/man/html.Rd Hmisc/man/impute.Rd Hmisc/man/inc-dec.Rd Hmisc/man/labcurve.Rd Hmisc/man/label.Rd Hmisc/man/latex.Rd Hmisc/man/latexDotchart.Rd Hmisc/man/latexTabular.Rd Hmisc/man/latexTherm.Rd Hmisc/man/legendfunctions.Rd Hmisc/man/list.tree.Rd Hmisc/man/mApply.Rd Hmisc/man/mChoice.Rd Hmisc/man/makeNstr.Rd Hmisc/man/mdb.get.Rd Hmisc/man/mgp.axis.Rd Hmisc/man/minor.tick.Rd Hmisc/man/mtitle.Rd Hmisc/man/multLines.Rd Hmisc/man/na.delete.Rd Hmisc/man/na.detail.response.Rd Hmisc/man/na.keep.Rd Hmisc/man/nin.Rd Hmisc/man/nobsY.Rd Hmisc/man/nstr.Rd Hmisc/man/num.intercepts.Rd Hmisc/man/panel.bpplot.Rd Hmisc/man/partition.Rd Hmisc/man/pc1.Rd Hmisc/man/plotCorrPrecision.Rd Hmisc/man/plsmo.Rd Hmisc/man/popower.Rd Hmisc/man/print.char.list.Rd Hmisc/man/print.char.matrix.Rd Hmisc/man/prnz.Rd Hmisc/man/prselect.Rd Hmisc/man/pstamp.Rd Hmisc/man/rMultinom.Rd Hmisc/man/rcorr.Rd Hmisc/man/rcorr.cens.Rd Hmisc/man/rcorrp.cens.Rd Hmisc/man/rcspline.eval.Rd Hmisc/man/rcspline.plot.Rd Hmisc/man/rcspline.restate.Rd Hmisc/man/reShape.Rd Hmisc/man/redun.Rd Hmisc/man/rlegend.Rd Hmisc/man/rm.boot.Rd Hmisc/man/samplesize.bin.Rd Hmisc/man/sasxport.get.Rd Hmisc/man/scat1d.Rd Hmisc/man/score.binary.Rd Hmisc/man/sedit.Rd Hmisc/man/show.pch.Rd Hmisc/man/showPsfrag.Rd Hmisc/man/simplifyDims.Rd Hmisc/man/smean.sd.Rd Hmisc/man/solvet.Rd Hmisc/man/somers2.Rd Hmisc/man/spower.Rd Hmisc/man/spss.get.Rd Hmisc/man/src.Rd Hmisc/man/stata.get.Rd Hmisc/man/string.bounding.box.Rd Hmisc/man/string.break.line.Rd Hmisc/man/stringDims.Rd Hmisc/man/subplot.Rd Hmisc/man/summarize.Rd Hmisc/man/summary.formula.Rd Hmisc/man/summaryM.Rd Hmisc/man/summaryP.Rd Hmisc/man/summaryRc.Rd Hmisc/man/summaryS.Rd Hmisc/man/symbol.freq.Rd Hmisc/man/sys.Rd Hmisc/man/t.test.cluster.Rd Hmisc/man/tabulr.Rd Hmisc/man/tex.Rd Hmisc/man/transace.Rd Hmisc/man/transcan.Rd Hmisc/man/translate.Rd Hmisc/man/trunc.POSIXt.Rd Hmisc/man/units.Rd Hmisc/man/unix/sas.get.Rd Hmisc/man/upData.Rd Hmisc/man/valueTags.Rd Hmisc/man/varclus.Rd Hmisc/man/windows/sas.get.Rd Hmisc/man/wtd.stats.Rd Hmisc/man/xYplot.Rd Hmisc/man/xtfrm.labelled.Rd Hmisc/man/xy.group.Rd Hmisc/man/yearDays.Rd Hmisc/man/ynbind.Rd Hmisc/src/Hmisc.c Hmisc/src/Hmisc.h Hmisc/src/cidxcn.f Hmisc/src/cidxcp.f Hmisc/src/hoeffd.f Hmisc/src/jacklins.f Hmisc/src/largrec.f Hmisc/src/mChoice.c Hmisc/src/maxempr.f Hmisc/src/nstr.c Hmisc/src/ranksort.c Hmisc/src/ratfor/cidxcn.r Hmisc/src/ratfor/cidxcp.r Hmisc/src/ratfor/hoeffd.r Hmisc/src/ratfor/jacklins.r Hmisc/src/ratfor/maxempr.r Hmisc/src/ratfor/rcorr.r Hmisc/src/ratfor/wclosest.r Hmisc/src/rcorr.f Hmisc/src/sas/exportlib.sas Hmisc/src/string_box.c Hmisc/src/wclosest.f Hmisc/tests/Ecdf.r Hmisc/tests/ace.s Hmisc/tests/areg.s Hmisc/tests/aregImpute.r Hmisc/tests/aregImpute2.r Hmisc/tests/aregImpute3.r Hmisc/tests/consolidate.R Hmisc/tests/csv/FORMAT.csv Hmisc/tests/csv/TEST.csv Hmisc/tests/csv/_contents_.csv Hmisc/tests/dataframeReduce.r Hmisc/tests/dataload.r Hmisc/tests/fit.mult.impute.bootstrap.r Hmisc/tests/hoeff.r Hmisc/tests/howto.html Hmisc/tests/inverseFunction.r Hmisc/tests/largest.empty.r Hmisc/tests/latex.s Hmisc/tests/latex.summaryM.Rnw Hmisc/tests/latexTherm.Rnw Hmisc/tests/latexTherm.r Hmisc/tests/minor.tick.r Hmisc/tests/panelbp.r Hmisc/tests/procmeans.txt Hmisc/tests/readsasxml.r Hmisc/tests/redun.r Hmisc/tests/summary.formula.r Hmisc/tests/summary.formula.response.stratify.r Hmisc/tests/summaryD.r Hmisc/tests/summaryP.r Hmisc/tests/summaryRc.r Hmisc/tests/summaryS.r Hmisc/tests/test.r Hmisc/tests/test.rda Hmisc/tests/test.sas Hmisc/tests/test.xml Hmisc/tests/test.xpt Hmisc/tests/test2.xpt Hmisc/tests/testexportlib.r Hmisc/tests/testexportlib.sas Hmisc/tests/wtd.r Hmisc/tests/xYplotFilledBands.r |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/COPYING --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/COPYING Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,16 @@ +## Copyright (C) 2001 Frank E Harrell Jr +## +## This program is free software; you can redistribute it and/or modify it +## under the terms of the GNU General Public License as published by the +## Free Software Foundation; either version 2, or (at your option) any +## later version. +## +## These functions are distributed in the hope that they will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## The text of the GNU General Public License, version 2, is available +## as http://www.gnu.org/copyleft or by writing to the Free Software +## Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +## |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/DESCRIPTION --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/DESCRIPTION Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,24 @@ +Package: Hmisc +Version: 3.14-6 +Date: 2014-11-16 +Title: Harrell Miscellaneous +Author: Frank E Harrell Jr <f.harrell@vanderbilt.edu>, with + contributions from Charles Dupont and many others. +Maintainer: Frank E Harrell Jr <f.harrell@vanderbilt.edu> +Depends: methods, grid, lattice, survival (>= 2.37-6), Formula +Imports: latticeExtra, cluster, rpart, nnet, acepack, foreign +Suggests: chron, rms, mice, tables +Description: The Hmisc package contains many functions useful for data + analysis, high-level graphics, utility operations, functions for + computing sample size and power, importing datasets, + imputing missing values, advanced table making, variable clustering, + character string manipulation, conversion of R objects to LaTeX code, + and recoding variables. +License: GPL (>= 2) +LazyLoad: Yes +URL: http://biostat.mc.vanderbilt.edu/Hmisc, + https://github.com/harrelfe/Hmisc +Packaged: 2014-11-21 18:05:11 UTC; harrelfe +NeedsCompilation: yes +Repository: CRAN +Date/Publication: 2014-11-22 00:42:24 |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/INDEX --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/INDEX Wed Jun 28 20:28:48 2017 -0400 |
| b |
| b"@@ -0,0 +1,191 @@\n+abs.error.pred Indexes of Absolute Prediction Error for Linear\n+ Models\n+addMarginal Add Marginal Observations\n+all.is.numeric Check if All Elements in Character Vector are\n+ Numeric\n+approxExtrap Linear Extrapolation\n+areg Additive Regression with Optimal\n+ Transformations on Both Sides using Canonical\n+ Variates\n+aregImpute Multiple Imputation using Additive Regression,\n+ Bootstrapping, and Predictive Mean Matching\n+binconf Confidence Intervals for Binomial Probabilities\n+biVar Bivariate Summaries Computed Separately by a\n+ Series of Predictors\n+bootkm Bootstrap Kaplan-Meier Estimates\n+bpower Power and Sample Size for Two-Sample Binomial\n+ Test\n+bpplot Box-percentile plots\n+bystats Statistics by Categories\n+capitalize capitalize the first letter of a string\n+ciapower Power of Interaction Test for Exponential\n+ Survival\n+clowess Miscellaneous Functions\n+cnvrt.coords Convert between the 5 different coordinate\n+ sytems on a graphical device\n+consolidate Element Merging\n+contents Metadata for a Data Frame\n+cpower Power of Cox/log-rank Two-Sample Test\n+Cs Character strings from unquoted names\n+csv.get Read Comma-Separated Text Data Files\n+curveRep Representative Curves\n+cut2 Cut a Numeric Variable into Intervals\n+data.frame.create.modify.check\n+ Tips for Creating, Modifying, and Checking Data\n+ Frames\n+dataRep Representativeness of Observations in a Data\n+ Set\n+deff Design Effect and Intra-cluster Correlation\n+describe Concise Statistical Description of a Vector,\n+ Matrix, Data Frame, or Formula\n+discrete Discrete Vector tools\n+dotchart2 Enhanced Dot Chart\n+dotchart3 Enhanced Version of dotchart Function\n+Ecdf Empirical Cumulative Distribution Plot\n+equalBins Multicolumn Formating\n+errbar Plot Error Bars\n+escapeRegex Escapes any characters that would have special\n+ meaning in a reqular expression.\n+event.chart Flexible Event Chart for Time-to-Event Data\n+event.convert Event Conversion for Time-to-Event Data\n+event.history Produces event.history graph for survival data\n+find.matches Find Close Matches\n+first.word First Word in a String or Expression\n+format.df Format a Data Frame or Matrix for LaTeX or HTML\n+format.pval Format P Values\n+gbayes Gaussian Bayesian Posterior and Predictive\n+ Distributions\n+getHdata Download and Install Datasets for 'Hmisc',\n+ 'rms', and Statistical Modeling\n+getZip Open a Zip File From a URL.\n+hdquantile Harrell-Davis Distribution-Free Quantile\n+ Estimator\n+histbackback Back to Back Histograms\n+hist.data.frame Histograms for Variables in a Data Frame\n+HmiscOverview Overview of Hmisc Library\n+hoeffd Matrix of Hoeffding's D Statistics\n+html Convert an S object to HTML\n+impute Generic Functions and Methods for Imputation\n+inc<- Increment and Decrement\n+labcurve Label Curves, Make Keys, and Interactively Draw\n+ Points and Curves\n+label "..b" Spline Function\n+redun Redundancy Analysis\n+reShape Reshape Matrices and Serial Data\n+rlegend Special Version of legend for R\n+rm.boot Bootstrap Repeated Measurements Model\n+rMultinom Generate Multinomial Random Variables with\n+ Varying Probabilities\n+samplesize.bin Sample Size for 2-sample Binomial\n+sas.get Convert a SAS Dataset to an S Data Frame\n+sasxport.get Enhanced Importing of SAS Transport Files using\n+ read.xport\n+Save Faciliate Use of save and load to Remote\n+ Directories\n+scat1d One-Dimensional Scatter Diagram, Spike\n+ Histogram, or Density\n+score.binary Score a Series of Binary Variables\n+sedit Character String Editing and Miscellaneous\n+ Character Handling Functions\n+show.pch Display Colors, Plotting Symbols, and Symbol\n+ Numeric Equivalents\n+showPsfrag Display image from psfrag LaTeX strings\n+simplifyDims List Simplification\n+smean.sd Compute Summary Statistics on a Vector\n+solvet solve Function with tol argument\n+somers2 Somers' Dxy Rank Correlation\n+spower Simulate Power of 2-Sample Test for Survival\n+ under Complex Conditions\n+spss.get Enhanced Importing of SPSS Files\n+src Source a File from the Current Working\n+ Directory\n+stata.get Enhanced Importing of STATA Files\n+string.bounding.box Determine Diamentions of Strings\n+string.break.line Break a String into Many Lines at Newlines\n+stringDims String Dimentions\n+subplot Embed a new plot within an existing plot\n+summarize Summarize Scalars or Matrices by\n+ Cross-Classification\n+summary.formula Summarize Data for Making Tables and Plots\n+summaryM Summarize Mixed Data Types vs. Groups\n+summaryP Multi-way Summary of Proportions\n+summaryRc Graphical Summarization of Continuous Variables\n+ Against a Response\n+summaryS Summarize Multiple Response Variables and Make\n+ Multipanel Scatter or Dot Plot\n+symbol.freq Graphic Representation of a Frequency Table\n+sys Run Unix or Dos Depending on System\n+tabulr Interface to Tabular Function\n+tex function for use in graphs that are used with\n+ the psfrag package in LaTeX\n+transace Additive Regression and Transformations using\n+ ace or avas\n+transcan Transformations/Imputations using Canonical\n+ Variates\n+translate Translate Vector or Matrix of Text Strings\n+trunc.POSIXt return the floor, ceiling, or rounded value of\n+ date or time to specified unit.\n+t.test.cluster t-test for Clustered Data\n+units Units Attribute of a Vector\n+upData Update a Data Frame or Cleanup a Data Frame\n+ after Importing\n+valueTags Store Discriptive Information About an Object\n+varclus Variable Clustering\n+wtd.mean Weighted Statistical Estimates\n+xtfrm.labelled Auxiliary Function Method for Sorting and\n+ Ranking\n+xy.group Mean x vs. function of y in groups of x\n+xYplot xyplot and dotplot with Matrix Variables to\n+ Plot Error Bars and Bands\n+yearDays Get Number of Days in Year or Month\n+ynbind Combine Variables in a Matrix\n" |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/MD5 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/MD5 Wed Jun 28 20:28:48 2017 -0400 |
| b |
| b'@@ -0,0 +1,351 @@\n+16f8ea43efba90112351c5ecb33021c4 *COPYING\n+3c38884e849267c86148b87c6e843631 *DESCRIPTION\n+17da8e77284bb353c8e685a3c47a669d *INDEX\n+a8b838ef66b469e2b6ae849e3f96f74a *NAMESPACE\n+57f1318c607746fb3bd8c6ec3c4a9c11 *NEWS\n+76f90acbe9bf8ad2eaaa6b8813eb7c82 *R/AFirst.lib.s\n+62ef16fc333551d6c65f15dedfb5a697 *R/Cs.s\n+387ca68eeb2830ad1d19cdbd979cef6e *R/Key.s\n+a957c90f4bffbb5af2cdef5ca6de310f *R/Misc.s\n+9043da0ab6b8288636bb399cdb0f89bf *R/abs.error.pred.s\n+bdf1cbb1952b1cdf53a296a6d29ecb08 *R/areg.s\n+a958ac987c2e30b6881a474ae67f6ae9 *R/aregImpute.s\n+23d0d26c581c89f3ceb3ed09e811c31a *R/biVar.s\n+b4b1cd12b90021d097455110506b80ff *R/binconf.s\n+4a8bfc99ba3dbc3823adb49523d6b070 *R/bootkm.s\n+12bf07383dcdc63a08469cf4311e50ff *R/bpower.s\n+93a72b437b091f96a6188c755862b352 *R/bpplot.s\n+bb0f5e114bad79ac6c8033ac0c8b74a6 *R/bystats.s\n+1955872ba2ca7c1026bbfeca83a66c23 *R/capitalize.s\n+b9862f09ba4c92cd873521799fcf9d5f *R/ciapower.s\n+f77dbf71580e9bf653fadd0c2f491f85 *R/cnvrt.coords.s\n+8c002470a851e0d14571cfda085e6c42 *R/confbar.s\n+1274fe9b36deb504e596de1fc99cdbcb *R/consolidate.s\n+c04e31869fdc5107eb6af6886eadb566 *R/cpower.s\n+45de10ab5edf3d2a193a8617fe6128f4 *R/curveRep.s\n+5da6635f342086cedeee652e296ce9e1 *R/cut2.s\n+25c501e1db065dacd0b7ebbc9918254e *R/data.frame.labelled.s\n+97dc5dcc48cb719088b817833067352c *R/dataRep.s\n+8ef7d2efc9aa8c82dcdb3073d48477ae *R/dates.s\n+3f02d2486d14e095a96fe5ee10e483c7 *R/deff.s\n+c836746571a4ea73ad482de3c552debe *R/describe.s\n+6cb5b3a77860b605f102bb528e84a071 *R/discrete.s\n+a96e8a7c59626e7cb975f79eb869b025 *R/dotchart3.s\n+01ac1a5622f257498eefc940f11c8615 *R/ecdf.s\n+c1e489a07ca933fb02e004489fd3ee8e *R/epi.s\n+f1353c18475724f1c211adc872483648 *R/errbar.s\n+048a26c156cbf7588ef95b2d52418cc9 *R/event.chart.s\n+f6f60ab400ea2df181948ccb799eccba *R/event.history.s\n+8f6279b9164a57be894cb2f12abb6ac6 *R/find.matches.s\n+c238614fb72943362407d74442bf236a *R/format.pval.s\n+aace929824aad6ebdfba910950b6cc2b *R/ftu.s\n+1adfa3a976732cd315829ae8354418fa *R/gbayes.s\n+f76f66eae7faef0e445143d9e367619d *R/gettext.s\n+b70800bb3730a5b84500d43e97d951f4 *R/groupn.s\n+29cf6d5ee0c8444cb58ed0afb85ac57b *R/hist.data.frame.s\n+9e9a40e628e1d9eb30fae1e7bb0156e0 *R/histbackback.s\n+32c00da8341fad17652a3f5037932efd *R/hoeffd.s\n+0e011b03695a9881e56f90e313914014 *R/impute.s\n+2d992948ec2ad38aa2c778882fd65e72 *R/in.operator.s\n+f681f31c073f4cce7f950b6b6610669b *R/inc-dec.s\n+e1bbabaa533c22a5378d1a4b645a80d2 *R/is.present.s\n+8cf9e0a10997c59a5be3f19a39c73684 *R/james.stein.s\n+88c72d01c0b5a4f5d40f919918bfbd00 *R/labcurve.s\n+df6cc0711c0f32bd8524560fc403dbff *R/label.s\n+f20844d838120469e5167a1933e90071 *R/latex.s\n+15f9f1d9ce01c4624523182f7a514244 *R/latexDotchart.s\n+9fafa76ce37fa2f42c27236c9bbcfb9d *R/latexTabular.s\n+330de4a4106a11330d54434e2d7a8c32 *R/latexTherm.s\n+27e713e2c004dc564b862e3e5a89570e *R/list.tree.s\n+ce608a6c653ec8c6fe828448f9a78eb6 *R/mApply.s\n+2d052be3962a7b8042d0cef39231d48d *R/mChoice.s\n+e21c3cbb9b9c17b07223d4f805433395 *R/makeNstr.s\n+e5f0eef89d954522d56fb5399066a6d3 *R/mask.s\n+f4cc097babcda3c1edcd60f516713ff2 *R/matxv.s\n+c43b31317d74ac389aff80c7adf7efa5 *R/mdb.get.s\n+1c430f0b7f3caed1b15359483136509d *R/minor.tick.s\n+32a369e847b95005134970ac15b4ba73 *R/model.frame.default.s\n+fc71dbaeeb57983dcfa3ac0497ed837b *R/mtitle.s\n+1f7cc9e23302fa42a853d794880b6140 *R/multLines.s\n+e10cfeb828fa87c42b5e4a9a1a4450b7 *R/na.delete.s\n+1f3488410c5f3721d93a23bf22e9a479 *R/na.detail.response.s\n+dd4806066d614756cd8be2bef6bad3dd *R/na.keep.s\n+3a51c2e1ac1f7676347a3ac306a607bf *R/na.pattern.s\n+12307329c016d97d38231b3d88ed3242 *R/nobsY.s\n+27019e9e90730ac0b25014b51d68ec66 *R/nstr.s\n+cf2866606281d01f39e99d3205f50ae3 *R/num.intercepts.s\n+215f9a06ffc6df02fb67576ad0da83b9 *R/panel.abwplot.s\n+35e90e268046316b9369585f790bdd9a *R/panel.bpplot.s\n+1630c1be85251dac0e8cd0243bedd201 *R/pc1.s\n+b4c352cd36449e5ec442b9badb45c9d2 *R/plsmo.s\n+ce22555a4a09e120b39f9eec3320b37c *R/popower.s\n+9a1119c7949407c39968645e921a87a6 *R/print.char.list.s\n+afe3fb9a3b0fa665e0811fd2b1ace401 *R/pstamp.s\n+e22'..b'0f9dbd *man/units.Rd\n+e14aa1714cbb8369099eaa0d38b4ea09 *man/unix/sas.get.Rd\n+c853b40641b5abfc91323c101f012ef5 *man/upData.Rd\n+05327becfe12abbbc92e8f5c342197a5 *man/valueTags.Rd\n+5b38e2511ce2b1f47cbb217211b5a803 *man/varclus.Rd\n+bc5f53ee64bd18f64dd31271902608ca *man/windows/sas.get.Rd\n+bb8aba91aba23e8359f046a3cf7854ad *man/wtd.stats.Rd\n+7bcbae4f32d3a80d4bcd721f45aa9ede *man/xYplot.Rd\n+13c3f8a542f16076e346f9d6cc7904c3 *man/xtfrm.labelled.Rd\n+83b8f2732a6c378785187242ff93b26a *man/xy.group.Rd\n+a376e82806f5056eb2734193039f2193 *man/yearDays.Rd\n+c87ff598535f37eaf4c78f044e722b74 *man/ynbind.Rd\n+edcf05627a85e73dea9b7ea4bafd4f51 *src/Hmisc.c\n+49cd1f68a9ee96ef99b5ce5e0962d2e3 *src/Hmisc.h\n+06d9b6e7e6a7e22898fc0134691889b3 *src/cidxcn.f\n+ac182f4144cfd7853f252be1e4b3d75b *src/cidxcp.f\n+4e562a4570979794f333a4b0e2f70a11 *src/hoeffd.f\n+f1af3dfa93855e177f438e9c0989d028 *src/jacklins.f\n+bffae56a2552fd897a70ed573edf0620 *src/largrec.f\n+4ba1160280bdb841ff84c72545d527ce *src/mChoice.c\n+703f73252fe6efaf8244b0713aaabe6b *src/maxempr.f\n+69d3a08b2cd911161ba22af68f97f611 *src/nstr.c\n+f698976ca9ef8a852edd2c37939b66a2 *src/ranksort.c\n+df53032c14b9c8a41bfe9529411c7729 *src/ratfor/cidxcn.r\n+ebb3e56a96c8678a2e3184d176af9926 *src/ratfor/cidxcp.r\n+c8e8ca03d6d6f3079d87c795160c2246 *src/ratfor/hoeffd.r\n+025b1b76e7f0fed7ae06434ba402a515 *src/ratfor/jacklins.r\n+daae316a69a5b013bdf334c4302725cd *src/ratfor/maxempr.r\n+3217ed8c746472e66790f62ce944cb8f *src/ratfor/rcorr.r\n+aa3a41104a7731568ce59a05ce65d235 *src/ratfor/wclosest.r\n+7f55a368337c86c4852368304c9ee9e7 *src/rcorr.f\n+32839fee8ce62db43ef9b527256a8737 *src/sas/exportlib.sas\n+6391fc48464e9b488b3626eaacbe967d *src/string_box.c\n+4966ee836b899392c07d690265906264 *src/wclosest.f\n+e041077a4b6e1be43bde49623aee2977 *tests/Ecdf.r\n+85b5b81d790bbc9bc28d1a4ba1d0d461 *tests/ace.s\n+d3ccd55b47e92d78445f1880c69aa380 *tests/areg.s\n+2bedbe9ed72ce0734d19af8e11731d45 *tests/aregImpute.r\n+274528fcb5fbf02b5cd24b4ba6879d15 *tests/aregImpute2.r\n+0b73b76245a462191695af917f104d7d *tests/aregImpute3.r\n+8c71471c170cd4b7f0bee73fca3055fe *tests/consolidate.R\n+40313999a2d82fb8ffac5f8c2306ceaa *tests/csv/FORMAT.csv\n+5fee907037836b5e42e0f163e608325f *tests/csv/TEST.csv\n+0f4e7bbe74214f3fb0c6faca2ca4d423 *tests/csv/_contents_.csv\n+83ef2f75fdba346104cf289db2f49c88 *tests/dataframeReduce.r\n+fe59102477004bfd8dad6e2ca452082e *tests/dataload.r\n+cfd5ed703e86daf89c13c7c144666c2f *tests/fit.mult.impute.bootstrap.r\n+9a2aa1aec08464f6cf8eeeb2bfab8993 *tests/hoeff.r\n+fb5a32fba03003505f33815e5152dce6 *tests/howto.html\n+bc723e6e82c91373f2c3f142bbe0be59 *tests/inverseFunction.r\n+573465e16901912adb87c138b62056c6 *tests/largest.empty.r\n+a1e0437dadc110e13e67c1f72235bfb5 *tests/latex.s\n+969b3b3132b54e2b4a174e8c251338b0 *tests/latex.summaryM.Rnw\n+86bb29e3df2c4cc1dab74549971ca150 *tests/latexTherm.Rnw\n+2a119079fd599b3fd6f64b6f579de087 *tests/latexTherm.r\n+c0668a4323dcc6d83736668c00456d23 *tests/minor.tick.r\n+dc25c71a732941ce1d970f8585068a6b *tests/panelbp.r\n+badcf6388bfae0071da3c10a2b77bac6 *tests/procmeans.txt\n+cc8253dbe7c1739dbec34b7ad9779d33 *tests/readsasxml.r\n+9cc3fb4ab2821804ac365c5573a09af9 *tests/redun.r\n+c17621d7759320c355a948272a8f587d *tests/summary.formula.r\n+93d6cc6040ff9451c6559b41b539a76e *tests/summary.formula.response.stratify.r\n+9509c473975c8c6aa68e7efe9312111a *tests/summaryD.r\n+40b10330bfb0045056717ca222c0a82f *tests/summaryP.r\n+1c451c58093a60464ddab77a55f034dd *tests/summaryRc.r\n+957f37b87db1198e703acdd5b547e80b *tests/summaryS.r\n+15ed256963dddc8577f87dd733ea0613 *tests/test.r\n+b36692d5996f652cf69c36533bda7848 *tests/test.rda\n+16fb6f4d0b48d6ef354c2f493170239d *tests/test.sas\n+be545aff97891d7a2d7a89060601d639 *tests/test.xml\n+907f2fb5b395929aa67db43f7d3488de *tests/test.xpt\n+f7ecdf35cbda3600541a5a24b2fef03c *tests/test2.xpt\n+7d8799b50e0922e62f230d1d9d6f619e *tests/testexportlib.r\n+52c79beb3d98d36025262007f39d5884 *tests/testexportlib.sas\n+58ee64af0de3dd6d0cb23e7607cf626c *tests/wtd.r\n+494a68c264841676707438c6c699bc57 *tests/xYplotFilledBands.r\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/NAMESPACE --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/NAMESPACE Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,32 @@ +exportPattern("^([^.]|\\..+\\.)") +useDynLib(Hmisc) + +import(methods) +import(stats) +import(survival) +import(Formula) +import(grid) +import(lattice) + +importFrom(acepack, ace, avas) +importFrom(latticeExtra, useOuterStrips, resizePanels) +importFrom(cluster, clara) +importFrom(foreign, read.dta, lookup.xport, read.spss, read.xport) +importFrom(nnet, multinom) +importFrom(rpart, rpart, rpart.control) +# importFrom(tables, tabular, table_options) + +S3method(print, spower) +S3method(print, Quantile2) +S3method(plot, Quantile2) +S3method(plot, summaryP) +S3method(plot, summaryS) +S3method(label, default) +S3method(label, data.frame) +S3method('label<-', default) +S3method('label<-', data.frame) +S3method('[', labelled) +S3method(print, labelled) +S3method(label, Surv) +S3method(units, default) +S3method(units, Surv) |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/NEWS --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/NEWS Wed Jun 28 20:28:48 2017 -0400 |
| b |
| b'@@ -0,0 +1,128 @@\n+Changes in version 3.14-6 (2014-11-16)\n+ * wtd.rank: fixed bug in no weights case (thanks: DPlat)\n+ * latex.summaryM, latex.summary.formula.reverse: added npct=\'slash\' to present numerators and denominators horizontally instead of vertically\n+ * plsmo: put derivation of ylab earlier so will use original attributes\n+ * NAMESPACE: exported more S3 methods for label\n+ * contents: added arguments id, range, values\n+ * print.contents.data.frame, html.contents.data.frame: added maxlevels argument\n+ * curveRep: added option to have the color of frequencies displayed match line colors. Thanks: jstat10\n+ * curveRep: changed call to strwidth to use units instead of unit argument\n+ * contents.data.frame: corrected omission in help file\n+ * wtd.var: corrected denominator. Thanks: Shan Huang\n+ * latex.default: changed to use colheads=FALSE to suppress column headings. Thanks: Michael Rose\n+ * mdb.get: added system option -b strip to mdb-export to skip binary output\n+\n+Changes in version 3.14-5 (2014-09-11)\n+ * latex.summaryM: fixed bug in caption with test=TRUE. Thanks: Yonghao Pua\n+ * csv.get: uses data.table package\'s fread in place of read.csv if data.table is in effect\n+ * combined.levels: sensed all NA vector, now return non-factor numeric instead\n+ * dataframeReduce: handle all-NA factor variable\n+ * subplot: replaced with latest version from TeachingDemos package by Greg Snow\n+ * latexTabular: fixed error in example in help file; result is not a file\n+ * latex: another test added in tests/latex.s\n+ * summaryP: removed observations with a right-hand-side variable missing\n+ * latex.summaryP: fixed bug with wrong column labels due to reshape reordering columns coming from factor levels alphabetically instead of by original levels\n+ * format.df: added % & <= >= to list of characters handled, the last two by going into math mode\n+ * latex.summaryP: use blank if denominator 0, instead of NaN\n+ * summary.formula: fixed problem with deparse formula. Thanks: Andreas Kiermeier\n+ * describe: added relative information measure for numeric variables - a measure of how continuous the variable is\n+ * wtd.table: detect duplications using duplicated() instead of diff(x) to handle Inf. Thanks: Benjamin Tyner\n+ * DESCRIPTION, NAMESPACE: multiple function changes to work in R-devel\n+\n+Changes in version 3.14-4 (2014-04-13)\n+ * rcspline.eval: stop new logic for ignoring outer values when there are many ties when there are also many ties on interior values. Added new logic to use interior unique values of x when the number of unique x is small.\n+ * latexBuild: generalized with insert argument\n+ * latex.default: modified to use mods of latexBuild, fixed bug with caption.loc=\'bottom\' (thanks: YacineH)\n+ * latex.default: fixed bug where comma did not appear after caption={} for ctable (thanks: Johannes Hofrichter)\n+ * tests: fit.mult.impute.bootstrap.r: added new example (thanks: Jane Cook)\n+ * fit.mult.impute: added call for fit.mult.impute in returned object, replacing call from fitter; makes update work for fit.mult.impute\n+ * summary.formula: fixed recognition of multiple left-hand-side variables to trigger call to summaryM (thanks: Kevin Thorpe)\n+ * summaryM: changed group label to \'\' instead of 0 for formulas like age + sex ~ 1\n+ * Ecdf: added what argument to all functions\n+ * nobsY: return constructed id vector\n+ * addMarginal: instead of .marginal. being logical, make it contain names of variables being marginalized over\n+ * mChoice.c: fixed some inconsistencies\n+\n+Changes in version 3.14-3 (2014-03-02)\n+ * format.df: clarified in help file that col.just can contain specifications other than l,r,c, e.g., "p{3in}" to get paragraph formatting in a column. Thanks: Ben Bolker\n+ * latex.default: added example for the above in tests\n+ * label.Surv: got units from inputAttributes in addition, and added type argument\n+\n+Changes in version 3.14-2 '..b" for character var\n+ * latex: removed newlines when ending environments, added hyperref argument\n+ * latex: added center='centerline', fixed 'centering'\n+ * upData, cleanup.import, dataframeReduce: changed argument pr to print\n+ * rcspline.eval: added more evasive action in case of extreme ties\n+\n+Changes in version 3.14-0 (2014-01-22)\n+ * Added trans argument to varclus\n+ * Removed recode, existsFunction functions, under.unix object, survfitKM, functions used only by S-Plus: comment, mem, mulbar.chart, p.sunflowers\n+ * as.category, is.category, untangle.special: removed\n+ * Removed reference to .R. from many functions\n+ * Remove oldClass, oldUnclass, getFunction\n+ * latex.default: changed 'rotate' to 'sideways' for ctable mode. Thanks: Simon Zehnder <szehnder@uni-bonn.de>\n+ * gView: removed\n+ * ldBands: removed\n+ * summaryP: new function - graphical replacement for tables of proportions\n+ * ynbind: new function for combining related yes/no variables into a matrix with a label\n+ * added file argument to prn\n+ * summaryP: added autoarrange\n+ * added addMarginal and nobsY functions\n+ * pBlock: new function for blocking variables for summaryP\n+ * summaryP: changed text positioning to grid absolutes, added text.at argument\n+ * scat1d, histSpike: if grid used and y has grid units, fixed logic for frac\n+ * plsmo, panel.plsmo: added scat1d.opts argument\n+ * label.Surv, units.Surv: added, removed ::: in survival calls\n+ * summarize: added keepcolnames argument\n+ * Suppressed startup message unless options(Hverbose=TRUE) is set\n+ * summaryS: new function - multi-panel lattice xy and dot plots\n+ * summaryD: added ylab argument\n+ * dotchart3: quit letting left margin be less than pre-existing one\n+ * multLines: new function\n+ * Improved nobsY to respect subject IDs when counting number of subjects, and to return an attribute 'formula' without id variable; changed bpplotM, summaryP, summaryS to use this\n+ * Removed nobsY calculations from bpplotM, summaryP, summaryS, enhanced nobsY to allow stratification by treatment\n+ * panel.bpplot: added violin and violin.opts arguments\n+ * summaryS: added medvPanel support during-plot vertical violin plots\n+ * plot.summaryP: padded x-axis limits\n+ * latexTabular: added translate and hline arguments; moved to its own file and help page\n+ * latexTherm: added tooltip using LaTeX ocgtools package\n+ * summaryP: stopped reversing order of panels\n+ * summaryM: added table.env argument, changed how model.frame built\n+ * latex.summaryM: changed to print proportions by default, added round='auto'\n+ * character.table: added xpd=NA; thanks: Dale\n+ * summaryP: added latex method\n+ * latex.default: added insert.top argument\n+ * summaryM: added stratification (multiple tables)\n+\n+Changes in version 3.13-0 (2013-11-18)\n+ * Changed n location (nloc argument) in bpplotM\n+ * Improved dotchart3 to better compute string widths when there is a mixture of expressions and regular strings for auxdata/auxtitle\n+ * Changed rlegend to not take logs if log axes are in effect. Fixes Ecdf(..., log='x', label.curves=list(keys=1:3)). Thanks: Bayazid Sarker <sarkarbayazid@gmail.com>\n+\t * Extended non-panel (regular) version of plsmo to handle matrix y\n+ * Likewise for summaryRc\n+ * Added xlim to bpplotM\n+ * Added latexTherm function to create LaTeX picture environments to add a series of thermometers to LaTeX text\n+ * Fixed deff to handle the case where R^2 = 1. Thanks: Matthieu Stigler <matthieu.stigler@gmail.com>\n+ * Added new test file for wtd.mean, wtd.quantile\n+ * New test aregImpute3.r for glm Poisson regression\n+ * Improved describe.vector to format single unique values\n+ * Took away warning about var, s.e., t, p in fit.mult.impute\n+ * Switched from subversion to github repository\n+ * Changed maintainer from Charles Dupont to Frank Harrell\n+ * Changed wtd.loess.noiter to use loess instead of stats:::simpleLoess\n" |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/AFirst.lib.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/AFirst.lib.s Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,12 @@ +## $Id$ + +.noGenenerics <- TRUE # faster loading as new methods not used + +.onAttach <- function(libname, pkgname, ...) { + verbose <- .Options$Hverbose + if(length(verbose) && verbose) + packageStartupMessage("Hmisc library by Frank E Harrell Jr\n\n", + "Type library(help='Hmisc'), ?Overview, or ?Hmisc.Overview')\n", + "to see overall documentation.\n") + invisible() +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/Cs.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/Cs.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,1 @@ +Cs <- function(...)as.character(sys.call())[-1] |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/Key.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/Key.s Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,46 @@ +.tmpfunction <- function(...) + stop("This function callback has not been defined yet") + +Key <- function(...) { + .tmpfunction(...) +} +environment(Key) <- new.env() + +.setKey <- function(x) { + environment(Key)$.tmpfunction <- x +} + +Key2 <- function(...) + .tmpfunction(...) +environment(Key2) <- new.env() + +.setKey2 <- function(x) + environment(Key2)$.tmpfunction <- x + +sKey <- function(...) + .tmpfunction(...) +environment(sKey) <- new.env() + +.setsKey <- function(x) + environment(sKey)$.tmpfunction <- x + +Points <- function(...) + .tmpfunction(...) +environment(Points) <- new.env() + +.setPoints <- function(x) + environment(Key)$.tmpfunction <- x + +Abline <- function(...) + .tmpfunction(...) +environment(Abline) <- new.env() + +.setAbline <- function(x) + environment(Key)$.tmpfunction <- x + +Curve <- function(...) + .tmpfunction(...) +environment(Curve) <- new.env() + +.setCurve <- function(x) + environment(Key)$.tmpfunction <- x |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/Misc.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/Misc.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,1534 @@\n+## $Id$\n+\t\t\n+if(!exists("NROW", mode=\'function\')) {\n+ NROW <- function(x)\n+ if (is.array(x) || is.data.frame(x)) nrow(x) else length(x)\n+}\n+\n+if(!exists("NCOL", mode=\'function\')) {\n+ NCOL <- function(x)\n+ if (is.array(x) && length(dim(x)) > 1 || is.data.frame(x)) ncol(x) else as.integer(1)\n+}\n+\n+prn <- function(x, txt, file=\'\')\n+{\n+ calltext <- as.character(sys.call())[2]\n+ if(file != \'\') sink(file, append=TRUE)\n+ \n+ if(!missing(txt)) {\n+ if(nchar(txt) + nchar(calltext) +3 > .Options$width)\n+ calltext <- paste(\'\\n\\n \',calltext,sep=\'\')\n+ else\n+ txt <- paste(txt, \' \', sep=\'\')\n+ cat(\'\\n\', txt, calltext, \'\\n\\n\', sep=\'\') \n+ }\n+ else cat(\'\\n\',calltext,\'\\n\\n\',sep=\'\')\n+ print(x)\n+ if(file != \'\') sink()\n+ invisible()\n+}\n+\n+format.sep <- function(x, digits, ...)\n+{\n+ y <- character(length(x))\n+ for(i in 1:length(x))\n+ y[i] <- if(missing(digits)) format(x[i], ...)\n+ else format(x[i],digits=digits, ...) ## 17Apr02\n+\n+ names(y) <- names(x) ## 17Apr02\n+ y\n+}\n+\n+nomiss <- function(x)\n+{\n+ if(is.data.frame(x)) na.exclude(x)\n+ else if(is.matrix(x))\n+ x[!is.na(x %*% rep(1,ncol(x))),]\n+ else x[!is.na(x)]\n+}\n+\n+fillin <- function(v, p)\n+{\n+ v.f <- ifelse(is.na(v),p,v)\n+ if(length(p)==1)\n+ label(v.f) <- paste(label(v),"with",sum(is.na(v)),\n+ "NAs replaced with",format(p))\n+ else\n+ label(v.f) <- paste(label(v),"with",sum(is.na(v)),"NAs replaced")\n+ v.f\n+}\n+\n+spearman <- function(x, y)\n+{\n+ x <- as.numeric(x)\n+ y <- as.numeric(y) ## 17Jul97\n+ \n+ notna <- !is.na(x+y)\t##exclude NAs\n+ if(sum(notna) < 3)\n+ c(rho=NA)\n+ else\n+ c(rho=cor(rank(x[notna]), rank(y[notna])))\n+}\n+\n+plotCorrPrecision <- function(rho=c(0,0.5), n=seq(10,400,length=100),\n+ conf.int=0.95, offset=.025, ...)\n+{\n+ ## Thanks to Xin Wang for computations\n+ curves <- vector(\'list\', length(rho))\n+ names(curves) <- paste(\'r\',format(rho),sep=\'=\')\n+ zcrit <- qnorm(1-(1-conf.int)/2)\n+ for(i in 1:length(rho)) {\n+ r <- rho[i]\n+ z <- .5*log((1+r)/(1-r))\n+ lo <- z - zcrit/sqrt(n-3)\n+ hi <- z + zcrit/sqrt(n-3)\n+ rlo <- (exp(2*lo)-1)/(exp(2*lo)+1)\n+ rhi <- (exp(2*hi)-1)/(exp(2*hi)+1)\n+ precision <- pmax(rhi-r, r-rlo)\n+ curves[[i]] <- list(N=n, Precision=precision)\n+ }\n+ labcurve(curves, pl=TRUE, xrestrict=quantile(n,c(.25,1)), offset=offset, ...)\n+ invisible()\n+}\n+\n+trap.rule <- function(x,y) sum(diff(x)*(y[-1]+y[-length(y)]))/2\n+\n+uncbind <- function(x, prefix="", suffix="")\n+{\n+ nn <- dimnames(x)[[2]]\n+ warning("You are using uncbind. That was a really bad idea. If you had any variables in the global environment named ", paste(prefix, nn, suffix, sep="", collapse=", "), " they are now over writen.\\n\\nYou are now warned.", immediate. = TRUE, )\n+ for(i in 1:ncol(x))\n+ assign(paste(prefix,nn[i],suffix,sep=""), x[,i], pos=parent.env())\n+ invisible()\n+}\n+\n+## Function to pick off ordinates of a step-function at user-chosen abscissas\n+\n+stepfun.eval <- function(x, y, xout, type=c("left","right"))\n+{\n+ s <- !is.na(x+y)\n+ type <- match.arg(type)\n+ approx(x[s], y[s], xout=xout, method="constant", f=if(type=="left")0 else 1)$y\n+}\n+\n+\n+km.quick <- function(S, times, q)\n+{\n+ S <- S[!is.na(S),]\n+ n <- nrow(S)\n+ stratvar <- factor(rep(1,nrow(S)))\n+ f <- survfitKM(stratvar, S, se.fit=FALSE, conf.type=\'none\')\n+ tt <- c(0, f$time)\n+ ss <- c(1, f$surv)\n+ if(missing(times))\n+ min(tt[ss <= q])\n+ else\n+ approx(tt, ss, xout=times, method=\'constant\', f=0)$y\n+}\n+\n+oPar <- function()\n+{\n+ ## Saves existing state of par() and makes changes suitable\n+ ## for restoring at the end of a high-level graphics functions\n+ oldpar <- par()\n+ oldpar$fin <- NULL\n+ oldpar$new <- FALSE\n+ invisible(oldpar)\n+}\n+\n+setParNro <- function(pars)\n+{\n+ ## Sets non-read-only par parameters from the input list\n+ i <- names(pars) %nin%\n+ c(\'cin\',\'cra\',\'csi\',\'cxy\',\'din\',\'xlog\',\'ylog\',\'gamma\',\'page\')\n+ invisible(par(pars[i]))'..b'=\\".*/trunk/R/(.*)\\\\?.*\',\'\\\\1\', w))\n+ files <- sub(\'\\\\.s$\',\'\',files)\n+ ver <- switch(type,\n+ cvs=if(length(recent))\n+ sub(\'^.*rev=(.*);.*\',\'\\\\1\',w) else\n+ sub(\'\\"$\',\'\',sub(\'^.*rev=\',\'\',w)),\n+ svn=if(length(recent))\n+ sub(\'^.*rev=(.*)&.*\', \'\\\\1\', w) else\n+ sub(\'^.*rev=(.*)\\"\', \'\\\\1\', w))\n+\n+ if(avail) return(data.frame(file=files, version=ver))\n+\n+ if(length(recent)) x <- files[1:recent]\n+ if(length(x)==1 && x==\'all\') x <- files\n+ for(fun in x) {\n+ i <- which(files==fun)\n+ if(!length(i)) stop(paste(\'no file \', fun,\' in \',package, sep=\'\'))\n+ cat(\'Fetching\', fun, \'version\', ver[i],\'\\n\')\n+ url <- switch(type,\n+ cvs=paste(\'http://biostat.mc.vanderbilt.edu/cgi-bin/cvsweb.cgi/~checkout~/\',package,\'/R/\',fun,\'.s?rev=\',ver[i],\';content-type=text%2Fplain\', sep=\'\'),\n+ svn=paste(\'http://biostat.mc.vanderbilt.edu/svn/R/\',\n+ package,\'/trunk/R/\', fun,\'.s\',sep=\'\'))\n+ source(url)\n+ }\n+}\n+ \n+clowess <- function(x, y=NULL, iter=3, ...) {\n+ ## to get around bug in lowess with occasional wild values with iter>0\n+ r <- range(if(length(y)) y else x$y)\n+ f <- lowess(x, y, iter=iter, ...)\n+ if(iter != 0 && any(f$y < r[1] | f$y > r[2]))\n+ f <- lowess(x, y, iter=0)\n+ f\n+}\n+\n+prselect <- function(x, start=NULL, stop=NULL, i=0, j=0, pr=TRUE)\n+ {\n+ f <- function(pattern, x)\n+ {\n+ y <- grep(pattern, x)\n+ if(length(y) > 1) y <- y[1]\n+ y\n+ }\n+ lx <- length(x)\n+ k <- if(length(start)) f(start, x) else 1\n+ if(length(k))\n+ {\n+ k <- k + i\n+ m <- if(length(stop))\n+ {\n+ w <- f(stop, x[k:lx])\n+ if(length(w)) w + k - 1 + j else -1\n+ }\n+ else lx\n+ if(m > 0) x <- if(k==1) (if(m==lx) \'...\' else c(\'...\', x[-(k:m)]))\n+ else\n+ {\n+ if(m==lx) c(x[-(k:m)], \'...\')\n+ else c(x[1:(k-1)], \'...\', x[(m+1):lx])\n+ }\n+ }\n+ else # no start specified; keep lines after stop\n+ {\n+ m <- f(stop, x)\n+ if(length(m) > 0)\n+ {\n+ m <- if(length(m)) m + j - 1 else lx\n+ x <- if(m==lx) \'...\' else c(\'...\', x[-(1:m)])\n+ }\n+ }\n+ if(pr) cat(x, sep=\'\\n\')\n+ invisible(x)\n+ }\n+\n+## The following is taken from survival:::plot.survfit internal dostep function\n+## Remove code to remove duplicates in y\n+\n+makeSteps <- function(x, y)\n+{\n+ if (is.na(x[1] + y[1]))\n+ {\n+ x <- x[-1]\n+ y <- y[-1]\n+ }\n+ n <- length(x)\n+ if (n > 2)\n+ {\n+ xrep <- rep(x, c(1, rep(2, n - 1)))\n+ yrep <- rep(y, c(rep(2, n - 1), 1))\n+ list(x = xrep, y = yrep)\n+ }\n+ else if (n == 1)\n+ list(x = x, y = y)\n+ else list(x = x[c(1, 2, 2)], y = y[c(1, 1, 2)])\n+}\n+\n+#latexBuild <- function(..., afterEndtabular=NULL, beforeEndtable=NULL, sep=\'\') {\n+latexBuild <- function(..., insert=NULL, sep=\'\') {\n+ w <- list(...)\n+ l <- length(w)\n+ if(l %% 2 != 0) stop(\'# arguments must be multiple of 2\')\n+ k <- l / 2\n+ j <- 1\n+ txt <- op <- character(0)\n+ for(i in 1 : k) {\n+ a <- w[[j]]\n+ if(length(a)) {\n+ txt <- c(txt, a)\n+ if(w[[j + 1]] != \'\') op <- c(op, w[[j + 1]])\n+ }\n+ j <- j + 2\n+ }\n+ txt <- paste(txt, collapse=sep)\n+ w <- character(0)\n+ close <- if(length(op)) {\n+ for(y in rev(op)) {\n+ if(length(insert))\n+ for(ins in insert)\n+ if(length(ins) &&\n+ ins[[1]] == y && ins[[2]] == \'before\')\n+ w <- c(w, \'\\n\\n\', ins[[3]])\n+ w <- c(w,\n+ if(y == \'(\') \')\'\n+ else if(y == \'{\') \'}\'\n+ else if(y == \'[\') \']\'\n+ else sprintf(\'\\\\end{%s}\', y))\n+ if(length(insert))\n+ for(ins in insert)\n+ if(length(ins) &&\n+ ins[[1]] == y && ins[[2]] == \'after\')\n+ w <- c(w, \'\\n\\n\', ins[[3]])\n+ }\n+ paste(w, collapse=sep)\n+ }\n+ structure(txt, close=close)\n+}\n+\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/abs.error.pred.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/abs.error.pred.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,48 @@ +abs.error.pred <- function(fit, lp=NULL, y=NULL) +{ + if(!length(y)) y <- fit$y + if(!length(lp)) lp <- fit$fitted.values + if(!length(lp)) lp <- fit$linear.predictors + if(!(length(y) && length(lp))) + stop('must specify lp and y or specify y=T in the fit') + + s <- is.na(y + lp) + + if(any(s)) { + y <- y[!s] + lp <- lp[!s] + } + + my <- median(y) + mlp <- median(lp) + meanr <- mean( abs( lp - mlp)) + meant <- mean( abs( y - my )) + meane <- mean( abs( lp - y )) + medr <- median(abs( lp - mlp)) + medt <- median(abs( y - my )) + mede <- median(abs( lp - y )) + + differences <- cbind(c(meanr,meane,meant), + c(medr ,mede ,medt ) ) + + dimnames(differences) <- list(c('|Yi hat - median(Y hat)|', + '|Yi hat - Yi|', + '|Yi - median(Y)|'), + c('Mean','Median')) + + ratios <- cbind(c(meanr/meant, meane/meant), + c( medr/ medt, mede/ medt)) + dimnames(ratios) <- list(c('|Yi hat - median(Y hat)|/|Yi - median(Y)|', + '|Yi hat - Yi|/|Yi - median(Y)|'), + c('Mean','Median')) + structure(list(differences=differences,ratios=ratios),class='abs.error.pred') +} + +print.abs.error.pred <- function(x, ...) +{ + cat('\nMean/Median |Differences|\n\n') + print(x$differences) + cat('\n\nRatios of Mean/Median |Differences|\n\n') + print(x$ratios) + invisible() +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/areg.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/areg.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b"@@ -0,0 +1,442 @@\n+# $Id$\n+areg <- function(x, y, xtype=NULL, ytype=NULL, nk=4,\n+ B=0, na.rm=TRUE,\n+ tolerance=NULL, crossval=NULL) {\n+ yname <- deparse(substitute(y))\n+ xname <- deparse(substitute(x))\n+ ism <- is.matrix(x)\n+ if(!ism) {\n+ x <- as.matrix(x)\n+ if(!length(colnames(x))) colnames(x) <- xname\n+ }\n+ if(na.rm) {\n+ omit <- is.na(x %*% rep(1,ncol(x))) | is.na(y)\n+ nmiss <- sum(omit)\n+ if(nmiss) {\n+ x <- x[!omit,,drop=FALSE]\n+ y <- y[!omit]\n+ }\n+ } else nmiss <- 0\n+ \n+ d <- dim(x)\n+ n <- d[1]; p <- d[2]\n+ xnam <- colnames(x)\n+ if(!length(xnam)) xnam <- paste('x', 1:p, sep='')\n+\n+ nuy <- length(unique(y))\n+ \n+ if(!length(ytype)) ytype <- \n+ if(is.factor(y) || is.character(y)) 'c' else\n+ if(nk==0 || (nuy < 3)) 'l' else 's'\n+ \n+ if(nk==0 && ytype=='s') ytype <- 'l'\n+ if(ytype=='s' && nk > 0 && nuy < 3) {\n+ warning('requested smooth transformation of y but less than 3 unique values; forced linear')\n+ ytype <- 'l'\n+ }\n+\n+ if(!length(xtype)) xtype <- rep(if(nk==0)'l' else 's', p)\n+ xtype[nk==0 & xtype=='s'] <- 'l'\n+ names(xtype) <- xnam\n+\n+ nux <- apply(x, 2, function(z) length(unique(z)))\n+ tooTied <- xtype=='s' & nux < nk\n+ if(any(tooTied)) {\n+ warning(paste('the following x variables have too few unique values for the value of nk;\\nlinearity forced:', paste(xnam[tooTied], collapse=',')))\n+ xtype[nux] <- 'l'\n+ }\n+\n+ fcancor <- function(X, Y) {\n+ ## colnames must exist for correct insertion of zeros for singular\n+ ## elements\n+ colnames(X) <- paste('x', 1:ncol(X), sep='')\n+ colnames(Y) <- paste('y', 1:ncol(Y), sep='')\n+ ## If canonical variate transformation of Y is descending in Y,\n+ ## negate all parameters\n+ f <- cancor(X, Y)\n+ f$r2 <- f$cor[1]^2\n+ n <- nrow(Y); if(!length(n)) n <- length(y)\n+ varconst <- sqrt(n-1)\n+ ## cancor returns rows only for non-singular variables\n+ ## For first canonical variate insert zeros for singular variables\n+ xcoef <- 0. * X[1, ]\n+ b <- f$xcoef[, 1]\n+ xdf <- length(b)\n+ xcoef[names(b)] <- b\n+ xcoef <- c(intercept = -sum(xcoef * f$xcenter), xcoef) * varconst\n+ ycoef <- 0. * Y[1, ]\n+ b <- f$ycoef[, 1]\n+ ydf <- length(b)\n+ ycoef[names(b)] <- b\n+ ycoef <- c(intercept = -sum(ycoef * f$ycenter), ycoef) * varconst\n+ ty <- matxv(Y, ycoef)\n+ g <- lm.fit.qr.bare(Y, ty, singzero=TRUE)\n+ \n+ if(g$coefficients[2] < 0) {\n+ xcoef <- -xcoef\n+ ycoef <- -ycoef\n+ ty <- -ty\n+ }\n+ f$xcoef <- xcoef\n+ f$ycoef <- ycoef\n+ f$ty <- ty\n+ f$xdf <- xdf\n+ f$ydf <- ydf\n+ f\n+ }\n+\n+ need2getinv <- FALSE\n+ xpxi <- NULL\n+ \n+ Y <- aregTran(y, ytype, nk, functions=TRUE)\n+ at <- attributes(Y)\n+ ytrans <- at$fun\n+ yinv <- at$inversefun ## NULL if type='s'; need coef\n+ yparms <- at$parms\n+ \n+ xdf <- ifelse(xtype=='l', 1, nk - 1)\n+ j <- xtype == 'c'\n+ if(any(j)) xdf[j] <- nux[j] - 1\n+ names(xdf) <- xnam\n+\n+ X <- matrix(NA, nrow=n, ncol=sum(xdf))\n+ xparms <- list()\n+ j <- 0\n+ xn <- character(0)\n+ for(i in 1 : p) {\n+ w <- aregTran(x[,i], xtype[i], nk)\n+ xparms[[xnam[i]]] <- attr(w, 'parms')\n+ m <- ncol(w)\n+ xdf[i] <- m\n+ X[, (j + 1) : (j + m)] <- w\n+ j <- j + m\n+ xn <- c(xn, paste(xnam[i], 1:m, sep=''))\n+ }\n+ ## See if rcpsline.eval could not get desired no. of knots due to ties\n+ if(ncol(X) > sum(xdf)) X <- X[, 1 : sum(xdf), drop=FALSE]\n+ \n+ covx <- covy <- r2opt <- r2boot <-\n+ madopt <- madboot <- medopt <- medboot <- NULL\n+ if(B > 0L) {\n+ r <- 1L + sum(xdf)\n+ barx <- rep(0., r)\n+ vname <- c('Intercept', xn)\n+ covx <- matrix(0, nrow=r, ncol=r, dimnames=list(vname,vname))\n+ if(ytype != 'l') {\n+ r <- ncol(Y) + 1L\n+ bary <- rep(0., r)\n+ vname <- c('Intercept', paste(yname, 1 : (r - 1), sep=''))\n+ covy <- matrix(0, nrow=r, ncol=r, dimnames=list(vname, vname))\n+ }\n+ }\n+ if(ytype == 'l') {\n+ f <- lm.fit"..b"x(z))\n+\n+ if(type=='c') {\n+ n <- length(z)\n+ lp <- length(parms)\n+ ## Assume z is integer code if parms is given\n+ w <- if(lp) z else factor(z)\n+ x <- as.integer(w)\n+ if(!lp) parms <- 1 : max(x, na.rm=TRUE)\n+ z <- matrix(0, nrow=n, ncol=length(parms) - 1)\n+ z[cbind(1 : n, x - 1)] <- 1\n+ attr(z, 'parms') <- if(lp)parms else levels(w)\n+ if(functions) {\n+ attr(z, 'fun') <- function(x, parms, coef) {\n+ if(length(parms) > length(coef)) coef <- c(0,coef)\n+ coef[-1] <- coef[-1] + coef[1]\n+ names(coef) <- parms\n+ coef[x]\n+ }\n+ formals(attr(z, 'fun')) <-\n+ list(x=integer(0), parms=parms, coef=numeric(0))\n+ \n+ ## what is ignored; for compatibility with inverseFunction in Misc.s\n+ attr(z, 'inversefun') <- function(y, parms, coef, what=character(0)) {\n+ if(length(parms) > length(coef)) coef <- c(0, coef)\n+ isna <- is.na(y)\n+ y[isna] <- 0\n+ x <- parms[whichClosest(c(coef[1], coef[1] + coef[-1]), y)]\n+ x[isna] <- NA\n+ x\n+ }\n+ formals(attr(z, 'inversefun')) <-\n+ list(y=numeric(0), parms=parms,\n+ coef=numeric(0), what=character(0))\n+ \n+ }\n+ z\n+ }\n+ else {\n+ z <- rcspline.eval(z, knots=parms, nk=nk, inclx=TRUE)\n+ knots <- attr(z, 'knots')\n+ attr(z,'parms') <- knots\n+ if(functions) attr(z, 'fun') <- rcsplineFunction(knots)\n+ ## inverse function created later when coefficients available\n+ z\n+ }\n+}\n+\n+predict.areg <- function(object, x, type=c('lp','fitted','x'),\n+ what=c('all','sample'), ...) {\n+ type <- match.arg(type)\n+ what <- match.arg(what)\n+ beta <- object$xcoefficients\n+ xparms <- object$xparms\n+ xtype <- object$xtype\n+ xdf <- object$xdf\n+ ybeta <- object$ycoefficients\n+ yinv <- object$yinv\n+ x <- as.matrix(x)\n+ p <- length(xdf)\n+ X <- matrix(NA, nrow=nrow(x), ncol=sum(xdf))\n+ j <- 0\n+ xnam <- names(xtype)\n+ for(i in 1:p) {\n+ w <- aregTran(x[,i], xtype[i], parms=xparms[[xnam[i]]])\n+ m <- ncol(w)\n+ X[, (j + 1) : (j + m)] <- w\n+ j <- j + m\n+ }\n+ if(type == 'x') return(cbind(1, X))\n+ xb <- matxv(X, beta)\n+ if(type=='fitted') yinv(xb, what=what, coef=ybeta) else xb\n+}\n+\n+print.areg <- function(x, digits=4, ...) {\n+ xdata <- x[c('n', 'm', 'nk', 'rsquared', 'xtype', 'xdf', 'ytype', 'ydf')]\n+ xinfo <- data.frame(type=xdata$xtype, d.f.=xdata$xdf,\n+ row.names=names(xdata$xtype))\n+ cat('\\nN:', xdata$n, '\\t', xdata$m,\n+ ' observations with NAs deleted.\\n')\n+ cat('R^2: ', round(xdata$rsquared, 3), '\\tnk: ', xdata$nk,\n+ '\\tMean and Median |error|: ', format(x$mad, digits=digits), ', ',\n+ format(x$med, digits=digits), '\\n\\n', sep='')\n+ if(length(x$r2boot)) {\n+ x1 <- format(c(x$r2opt, x$madopt, x$medopt), digits=digits)\n+ x2 <- format(c(x$r2boot, x$madboot, x$medboot), digits=digits)\n+ n <- c('R^2', 'Mean |error|', 'Median |error|')\n+ d <- cbind('Bootstrap Estimates'=n, Optimism=x1, 'Optimism-corrected'=x2)\n+ row.names(d) <- rep('', 3)\n+ print(d, quote=FALSE, right=TRUE)\n+ }\n+ if(length(x$crossval)) {\n+ x1 <- format(c(x$rsquaredcv, x$madcv, x$medcv), digits=digits)\n+ n <- c('R^2', 'Mean |error|', 'Median |error|')\n+ d <- cbind(n, x1)\n+ dimnames(d) <- list(rep('',3), \n+ c(paste(x$crossval,'-fold Cross-validation',sep=''),\n+ 'Estimate'))\n+ cat('\\n')\n+ print(d, quote=FALSE, right=TRUE)\n+ }\n+ cat('\\n')\n+ print(xinfo)\n+ cat('\\ny type:', xdata$ytype, '\\td.f.:', xdata$ydf, '\\n\\n')\n+ invisible()\n+}\n+\n+plot.areg <- function(x, whichx=1 : ncol(x$x), ...) {\n+ plot(x$y, x$ty, xlab=x$yname,\n+ ylab=paste('Transformed',x$yname))\n+ r2 <- round(x$rsquared, 3)\n+ title(sub=bquote(R^2==.(r2)), adj=0)\n+ xdata <- x$x\n+ cn <- colnames(xdata)\n+ for(i in whichx)\n+ plot(xdata[,i], x$tx[,i],\n+ xlab=cn[i], ylab=paste('Transformed', cn[i]), ...) \n+ invisible()\n+}\n" |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/aregImpute.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/aregImpute.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,375 @@\n+# $Id$\n+aregImpute <- function(formula, data, subset, n.impute=5,\n+ group=NULL, nk=3, tlinear=TRUE,\n+ type=c(\'pmm\',\'regression\',\'normpmm\'), pmmtype=1,\n+ match=c(\'weighted\',\'closest\',\'kclosest\'),\n+ kclosest=3, fweighted=0.2, curtail=TRUE,\n+ boot.method=c(\'simple\', \'approximate bayesian\'),\n+ burnin=3, x=FALSE,\n+ pr=TRUE, plotTrans=FALSE,\n+ tolerance=NULL, B=75) {\n+ \n+ acall <- match.call()\n+ type <- match.arg(type)\n+ match <- match.arg(match)\n+ boot.method <- match.arg(boot.method)\n+ if(pmmtype %nin% 1:3) stop(\'pmmtype must be 1, 2, or 3\')\n+ if(pmmtype == 3) boot.method <- \'approximate bayesian\'\n+ lgroup <- length(group)\n+ if(type == \'normpmm\' && lgroup)\n+ stop(\'group makes no sense when type="normpmm"\')\n+ if(type == \'normpmm\' && !tlinear)\n+ stop(\'type="normpmm" not implemented when tlinear=FALSE because no covariance matrix is available for right hand side beta for first canonical variate\')\n+ \n+ if(!inherits(formula,\'formula\'))\n+ stop(\'formula must be a formula\')\n+ \n+ nam <- all.vars(formula)\n+ \n+ m <- match.call(expand.dots = FALSE)\n+ Terms <- terms(formula, specials=\'I\')\n+ m$formula <- formula\n+ m$match <- m$fweighted <- m$curtail <- m$x <- m$n.impute <- m$nk <-\n+ m$tlinear <- m$burnin <- m$type <- m$pmmtype <- m$group <- m$pr <-\n+ m$plotTrans <- m$tolerance <- m$boot.method <- m$B <- NULL\n+ m$na.action <- na.retain\n+\n+ m[[1]] <- as.name("model.frame")\n+ z <- eval(m, sys.parent())\n+ p <- length(z)\n+ n <- nrow(z)\n+ rnam <- row.names(z)\n+ if(length(rnam)==0) rnam <- as.character(1:n)\n+\n+ if(lgroup) {\n+ if(boot.method == \'approximate bayesian\')\n+ stop(\'group not implemented for boot.method="approximate bayesian"\')\n+ if(lgroup != n)\n+ stop(\'group should have length equal to number of observations\')\n+ \n+ ngroup <- length(unique(group[!is.na(group)]))\n+ }\n+\n+ linear <- nam[attr(Terms,\'specials\')$I]\n+\n+ cat.levels <- vector(\'list\',p)\n+ names(cat.levels) <- nam\n+ vtype <- rep(\'s\', p); names(vtype) <- nam\n+ dof <- rep(NA, p); names(dof) <- nam\n+ na <- vector(\'list\',p)\n+ names(na) <- nam\n+ nna <- integer(p); names(nna) <- nam\n+\n+ xf <- matrix(as.double(1), nrow=n, ncol=p, dimnames=list(rnam,nam))\n+ imp <- vector(\'list\',p)\n+ names(imp) <- nam\n+ if(lgroup) group.inds <- imp\n+\n+ for(i in 1:p) {\n+ xi <- z[[i]]\n+ ni <- nam[i]\n+ nai <- is.na(xi)\n+ na[[i]] <- (1:n)[nai] \n+ nna[i] <- nnai <- sum(nai)\n+ if(nnai > 0) imp[[ni]] <- matrix(NA, nrow=nnai, ncol=n.impute,\n+ dimnames=list(rnam[nai],NULL))\n+ if(lgroup) {\n+ if(any(is.na(group[!nai])))\n+ stop(\'NAs not allowed in group\')\n+ \n+ if(length(unique(group[!nai])) != ngroup)\n+ stop(paste(\'not all\',ngroup,\n+ \'values of group are represented in\\n\',\n+ \'observations with non-missing values of\',\n+ ni))\n+ group.inds[[i]] <- split((1:n)[!nai], group[!nai])\n+ }\n+ \n+ iscat <- FALSE\n+ if(is.character(xi)) {\n+ xi <- as.factor(xi)\n+ lev <- levels(xi)\n+ iscat <- TRUE\n+ } else if(is.factor(xi)) {\n+ lev <- levels(xi)\n+ iscat <- TRUE\n+ }\n+ if(iscat) {\n+ if(length(lev) < 2) stop(paste(ni,\'is constant\'))\n+ tab <- table(xi)\n+ if(any(tab==0))\n+ stop(paste(ni,\'has the following levels with no observations:\',\n+ paste(names(tab)[tab==0],collapse=\' \')))\n+ if(any(tab < 5))\n+ warning(paste(ni,\'has the following levels with < 5 observations:\',\n+ paste(names(tab)[tab < 5],collapse=\' \'),\n+ \'\\nConsider using the group parameter to balance bootstrap samples\'))\n+ cat.levels[[ni]] <- lev\n+ xi <- as.integer(xi)\n+ vtype[ni] <- \'c\'\n+ }\n+ else {\n+ u'..b'==\'l\') pti <- (pti - mean(pti))/sqrt(var(pti))\n+ whichclose <-\n+ if(match==\'kclosest\') j[whichClosek(pti[j], pti[nai], k=kclosest)]\n+ else if(match==\'closest\') {\n+ ## Jitter predicted transformed values for non-NAs to randomly\n+ ## break ties in matching with predictions for NAs in xf[,i]\n+ ## Because of normalization used by fitter, pti usually ranges\n+ ## from about -4 to 4\n+ pti[j] <- pti[j] + runif(npr,-.0001,.0001)\n+ \n+ ## For each orig. missing xf[,i] impute with non-missing xf[,i]\n+ ## that has closest predicted transformed value\n+ j[whichClosest(pti[j], pti[nai])] ## see Misc.s\n+ }\n+ else\n+ j[whichClosePW(pti[j], pti[nai], f=fweighted)]\n+ impi <- xf[whichclose,i]\n+ }\n+ else { ## type=\'regression\'\n+ ## predicted transformed target var + random sample of res,\n+ ## for NAs\n+ r <- sample(res, length(nai),\n+ replace=length(nai) > length(res))\n+ ptir <- pti[nai] + r\n+ \n+ ## predicted random draws on untransformed scale\n+ impi <- f$yinv(ptir, what=\'sample\', coef=f$ycoefficients)\n+ if(curtail) impi <- pmin(pmax(impi, xrange[1,i]), xrange[2,i])\n+ }\n+ xf[nai,i] <- impi\n+ if(iter > burnin) imp[[nam[i]]][,iter-burnin] <- impi\n+ }\n+ }\n+ if(pr)\n+ cat(\'\\n\')\n+ \n+ if(!x) xf <- NULL\n+ \n+ structure(list(call=acall, formula=formula,\n+ match=match, fweighted=fweighted, pmmtype=pmmtype,\n+ n=n, p=p, na=na, nna=nna,\n+ type=vtype, tlinear=tlinear, nk=min(nk),\n+ cat.levels=cat.levels, df=dof,\n+ n.impute=n.impute, imputed=imp, x=xf, rsq=rsq,\n+ resampacc=resampacc),\n+ class=\'aregImpute\')\n+}\n+\n+print.aregImpute <- function(x, digits=3, ...) {\n+ cat("\\nMultiple Imputation using Bootstrap and PMM\\n\\n")\n+ dput(x$call)\n+ cat("\\n")\n+ cat(\'n:\',x$n,\'\\tp:\',x$p,\n+ \'\\tImputations:\',x$n.impute,\' \\tnk:\',x$nk,\'\\n\')\n+ cat(\'\\nNumber of NAs:\\n\'); print(x$nna); cat(\'\\n\')\n+ info <- data.frame(type=x$type, d.f.=x$df,\n+ row.names=names(x$type))\n+ print(info)\n+ if(x$tlinear)\n+ cat(\'\\nTransformation of Target Variables Forced to be Linear\\n\')\n+ \n+ cat(\'\\nR-squares for Predicting Non-Missing Values for Each Variable\\nUsing Last Imputations of Predictors\\n\')\n+ print(round(x$rsq, digits))\n+\n+ racc <- x$resampacc\n+ if(length(racc)) {\n+ cat(\'\\nResampling results for determining the complexity of imputation models\\n\\n\')\n+ for(i in 1:length(racc)) {\n+ cat(\'Variable being imputed:\', names(racc)[i], \'\\n\')\n+ print(racc[[i]], digits=digits)\n+ cat(\'\\n\')\n+ }\n+ cat(\'\\n\')\n+ }\n+ invisible()\n+}\n+\n+plot.aregImpute <- function(x, nclass=NULL, type=c(\'ecdf\',\'hist\'),\n+ datadensity=c("hist","none","rug","density"),\n+ diagnostics=FALSE, maxn=10, ...) {\n+ type <- match.arg(type)\n+ datadensity <- match.arg(datadensity)\n+ i <- x$imputed\n+ catg <- x$categorical\n+ lev <- x$cat.levels\n+ n.impute <- x$n.impute\n+ for(n in names(i)) {\n+ xi <- i[[n]]\n+ if(!length(xi))\n+ next\n+ \n+ if(diagnostics) {\n+ r <- range(xi)\n+ for(j in 1:min(maxn,nrow(xi))) {\n+ plot(1:n.impute, xi[j,], ylim=r, xlab=\'Imputation\',\n+ ylab=paste("Imputations for Obs.",j,"of",n))\n+ }\n+ }\n+ \n+ ix <- as.vector(i[[n]])\n+ lab <- paste(\'Imputed\',n)\n+ if(n %in% catg) {\n+ tab <- table(ix)\n+ dotchart3(tab, lev[[n]], auxdata=tab, xlab=\'Frequency\',\n+ ylab=lab)\n+ }\n+ else {\n+ if(type==\'ecdf\')\n+ Ecdf(ix, xlab=lab, datadensity=datadensity, subtitles=FALSE)\n+ else {\n+ if(length(nclass))\n+ hist(ix, xlab=n, nclass=nclass, main=\'\')\n+ else\n+ hist(ix, xlab=lab, main=\'\')\n+ scat1d(ix)\n+ }\n+ }\n+ }\n+ invisible()\n+}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/biVar.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/biVar.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b"@@ -0,0 +1,258 @@\n+biVar <- function(formula, statinfo, data=NULL, subset=NULL,\n+ na.action=na.retain, exclude.imputed=TRUE, ...)\n+{\n+ call <- match.call()\n+ x <- do.call('model.frame',\n+ list(formula, data=data, subset=subset, na.action=na.action))\n+ nam <- names(x); yname <- nam[1]\n+ y <- x[[1]]\n+ ylabel <- label(y)\n+ x <- x[-1]\n+ xlabel <- sapply(x, label)\n+ m <- ncol(x)\n+ statnames <- statinfo$names\n+ stats <- matrix(NA, nrow=m, ncol=length(statnames),\n+ dimnames=list(names(x), statnames))\n+ nmin <- statinfo$nmin\n+ fun <- statinfo$fun\n+ \n+ N <- integer(m)\n+ yna <- if(is.matrix(y))is.na(y %*% rep(1,ncol(y))) else is.na(y)\n+ for(i in 1:m) {\n+ w <- x[[i]]\n+ j <- !(yna | is.na(w))\n+ if(exclude.imputed) j <- j & !(is.imputed(w) | is.imputed(y))\n+ yy <- if(is.matrix(y)) y[j,,drop=FALSE] else y[j]\n+ w <- w[j]\n+ N[i] <- length(w)\n+ stats[i,] <- if(N[i] >= nmin) fun(w, yy, ...) else\n+ rep(NA, length(statnames))\n+ }\n+ stats <- cbind(stats, n=N)\n+ structure(stats, class='biVar', yname=yname,\n+ ylabel=ylabel, xlabel=xlabel,\n+ statinfo=statinfo, call=call)\n+}\n+\n+print.biVar <- function(x, ...) {\n+ info <- attr(x, 'statinfo')\n+ yname <- attr(x, 'yname')\n+ cat('\\n', info$title, ' Response variable:', yname, '\\n\\n', sep='')\n+\n+ dig <- c(info$digits,0)\n+ for(i in 1:ncol(x))\n+ x[,i] <- round(x[,i],dig[i])\n+ \n+ attr(x,'yname') <- attr(x, 'statinfo') <- attr(x, 'call') <-\n+ attr(x, 'ylabel') <- attr(x, 'xlabel') <- class(x) <- NULL\n+ print(x)\n+ invisible()\n+}\n+\n+\n+plot.biVar <- function(x,\n+ what=info$defaultwhat,\n+ sort.=TRUE,\n+ main, xlab,\n+ vnames=c('names','labels'), ...) {\n+\n+ vnames <- match.arg(vnames)\n+ yname <- attr(x, 'yname')\n+ ylabel <- attr(x, 'ylabel')\n+ if(vnames=='labels' && ylabel!='') yname <- sedit(ylabel, ' ', '~')\n+ xlabel <- attr(x, 'xlabel')\n+ info <- attr(x, 'statinfo')\n+ aux <- info$aux\n+ auxlabel <- info$auxlabel\n+ if(!length(auxlabel)) auxlabel <- aux\n+ \n+ i <- match(what, info$names)\n+ if(is.na(i)) stop(paste('what must be one of',\n+ paste(info$names,collapse=' ')))\n+ if(missing(xlab))\n+ xlab <- info$rxlab[i]\n+ if(missing(main)) main <-\n+ parse(text=paste(as.character(info$rmain),'~~~~Response:',\n+ yname,sep=''))\n+ auxtitle <- 'N'; auxdata <- format(x[,'n'])\n+ if(length(aux)) {\n+ auxtitle <- paste('N', auxlabel, sep=' ')\n+ auxdata <- paste(format(x[,'n']), format(x[,aux]))\n+ }\n+ stat <- x[,what]\n+ if(vnames=='labels')\n+ names(stat) <- ifelse(xlabel=='', names(stat), xlabel)\n+ if(sort.) {\n+ i <- order(stat)\n+ stat <- stat[i]\n+ auxdata <- auxdata[i]\n+ }\n+ dotchart3(stat, auxdata=auxdata,\n+ xlab=xlab, auxtitle=auxtitle,\n+ main=main, ...)\n+ invisible()\n+}\n+\n+chiSquare <- function(formula, data=NULL, subset=NULL, na.action=na.retain,\n+ exclude.imputed=TRUE, ...) {\n+ \n+g <- function(x, y, minlev=0, g=3) {\n+ if(minlev) y <- combine.levels(y, minlev=minlev)\n+ if((is.character(x) || is.factor(x)) && minlev)\n+ x <- combine.levels(x, minlev=minlev)\n+ if(is.numeric(x) && length(unique(x)) > g) x <- cut2(x, g=g)\n+ ct <- chisq.test(x, y)\n+ chisq <- ct$statistic\n+ df <- ct$parameter\n+ pval <- ct$p.value\n+ c(chisq, df, chisq-df, pval)\n+}\n+\n+statinfo <- list(fun=g,\n+ title='Pearson Chi-square Tests',\n+ main='Pearson Chi-squared',\n+ rmain=expression(Pearson~chi^2),\n+ names=c('chisquare','df','chisquare-df','P'),\n+ xlab=c('Chi-square','d.f.','Chi-square - d.f.','P-value'),\n+ rxlab=expression(chi^2, d.f., chi^2 - d.f., P-value),\n+ digits=c(2,0,2,4),\n+ aux='df', nmin=2, defaultwhat='chisquare-df')\n+\n+biVar(formula, statinfo=statinfo, data=data, "..b' <- !(is.na(x) | is.na(y))\n+ if(exclude.imputed) {\n+ im <- is.imputed(x) | is.imputed(y)\n+ s <- s & !im\n+ }\n+ x <- x[s]; y <- y[s]\n+ }\n+ n <- length(x)\n+ \n+ ## If number of non-NA values is less then 3 then return a NA\n+ ## value.\n+ if(n < 3)\n+ return(c(rho2=NA,F=NA,df1=0,df2=n,P=NA,n=n,\'Adjusted rho2\'=NA))\n+\n+ ## Find the number of unique values in x\n+ u <- length(unique(x))\n+\n+ ## If is a factor and unique values are greater then 2 then find the\n+ ## lm.fit.qr.bare without an intercept.\n+ if(is.factor(x) && u > 2) {\n+ if(minlev > 0) {\n+ x <- combine.levels(x, minlev)\n+ if(length(levels(x))<2) {\n+ warning(paste(\'x did not have >= 2 categories with >=\',\n+ minlev,\'of the observations\'))\n+ return(c(rho2=NA,F=NA,df1=0,df2=n,P=NA,n=n,\'Adjusted rho2\'=NA))\n+ }\n+ }\n+ \n+ x <- model.matrix(~x, data=data.frame(x))\n+ p <- ncol(x)-1\n+ rsquare <- lm.fit.qr.bare(x, rank(y), intercept=FALSE)$rsquared\n+ } else {\n+ x <- as.numeric(x)\n+ if(u < 3)\n+ p <- 1\n+ \n+ x <- rank(x)\n+ rsquare <-\n+ if(p==1)\n+ cor(x, rank(y))^2\n+ else {\n+ x <- cbind(x, x^2)\n+ lm.fit.qr.bare(x, rank(y), intercept=TRUE)$rsquared\n+ }\n+ }\n+ \n+ df2 <- n-p-1\n+ fstat <- rsquare/p/((1-rsquare)/df2)\n+ pvalue <- 1-pf(fstat,p,df2)\n+ rsqa <- 1 - (1 - rsquare)*(n-1)/df2\n+ \n+ x <- c(rsquare,fstat,p,df2,pvalue,n,rsqa)\n+ names(x) <- c("rho2","F","df1","df2","P","n","Adjusted rho2")\n+ x\n+}\n+\n+spearman2.formula <- function(formula, data=NULL, subset=NULL,\n+ na.action=na.retain,\n+ exclude.imputed=TRUE, ...)\n+{\n+ g <- function(x, y, p=1, minlev=0)\n+ spearman2(x, y, p=p, minlev=minlev, na.rm=FALSE)[-6]\n+ \n+statinfo <- list(fun=g,\n+ title=\'Spearman rho^2\',\n+ main=\'Spearman rho^2\',\n+ rmain=expression(Spearman~rho^2),\n+ names=c(\'rho2\',\'F\',\'df1\',\'df2\',\'P\',\'Adjusted rho2\'),\n+ xlab=c(\'rho^2\',\'F\',\'df2\',\'df2\',\'P-value\',\'Adjusted rho^2\'),\n+ rxlab=expression(rho^2, F, df1, df2, P-value, Adjusted~rho^2),\n+ digits=c(3,2,0,0,4,3),\n+ aux=\'df1\', auxlabel=\'df\', nmin=2, defaultwhat=\'Adjusted rho2\')\n+\n+biVar(formula, statinfo=statinfo, data=data, subset=subset,\n+ na.action=na.action, exclude.imputed=exclude.imputed, ...)\n+}\n+\n+\n+rcorrcens <- function(x, ...) UseMethod("rcorrcens") \n+\n+rcorrcens.formula <- function(formula, data=NULL, subset=NULL,\n+ na.action=na.retain,\n+ exclude.imputed=TRUE, outx=FALSE, ...)\n+{\n+ g <- function(x, y, outx)\n+ {\n+ lev <- levels(x)\n+ if(is.factor(x) && length(lev)==2) x <- as.integer(x)\n+ \n+ u <- if(is.factor(x))\n+ {\n+ i <- order(-table(x))\n+ u <- rcorr.cens(1*(x==lev[i[1]]), y, outx=outx)\n+ v <- rcorr.cens(1*(x==lev[i[2]]), y, outx=outx)\n+ if(abs(v[\'Dxy\']) > abs(u[\'Dxy\'])) v else u\n+ }\n+ else rcorr.cens(x, y, outx=outx)\n+ Dxy <- u[\'Dxy\']\n+ SE <- u[\'S.D.\']\n+ aDxy <- abs(Dxy)\n+ z <- aDxy/SE\n+ P <- 2*(1-pnorm(z))\n+ c(C=u[\'C Index\'], Dxy=Dxy, aDxy=aDxy, SD=SE, Z=z, P=P)\n+ }\n+ \n+statinfo <- list(fun=g,\n+ title="Somers\' Rank Correlation for Censored Data",\n+ main="Somers\' Rank Correlation",\n+ rmain=expression(paste("Somers\' ", D[xy])),\n+ names=c(\'C\',\'Dxy\',\'aDxy\',\'SD\',\'Z\',\'P\'),\n+ xlab=c(\'C\',\'Dxy\',\'|Dxy|\',\'SD\',\'Z\',\'P-value\'),\n+ rxlab=expression(C-index, D[xy], paste(\'|\',D[xy],\'|\'), SD, Z, P-value),\n+ digits=c(3,3,3,3,2,4),\n+# aux=\'n\', auxlabel=\'N\',\n+ nmin=2, defaultwhat=\'aDxy\')\n+\n+biVar(formula, statinfo=statinfo, data=data, subset=subset,\n+ na.action=na.action, exclude.imputed=exclude.imputed, outx=outx, ...)\n+}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/binconf.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/binconf.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,99 @@ +binconf <- function(x, n, alpha = 0.05, + method = c("wilson","exact","asymptotic","all"), + include.x = FALSE, include.n = FALSE, + return.df = FALSE) +{ + ## ..modifications for printing and the addition of a + ## method argument and the asymptotic interval + ## and to accept vector arguments were + ## made by Brad Biggerstaff on 10 June 1999 + + method <- match.arg(method) + bc <- function(x, n, alpha, method) + { + nu1 <- 2 * (n - x + 1) + nu2 <- 2 * x + ll <- if(x > 0) + x/(x + qf(1 - alpha/2, nu1, nu2) * (n - x + 1)) + else + 0 + + nu1p <- nu2 + 2 + nu2p <- nu1 - 2 + pp <- if(x < n) + qf(1 - alpha/2, nu1p, nu2p) + else + 1 + + ul <- ((x + 1) * pp)/(n - x + (x + 1) * pp) + zcrit <- - qnorm(alpha/2) + z2 <- zcrit * zcrit + p <- x/n + cl <- (p + z2/2/n + c(-1, 1) * zcrit * + sqrt((p * (1 - p) + z2/4/n)/n))/(1 + z2/n) + + if(x == 1) + cl[1] <- - log(1 - alpha)/n + + if(x == (n - 1)) + cl[2] <- 1 + log(1 - alpha)/n + + asymp.lcl <- x/n - qnorm(1 - alpha/2) * + sqrt(((x/n) * (1 - x/n))/n) + + asymp.ucl <- x/n + qnorm(1 - alpha/2) * sqrt(((x/n) * (1 - x/n) + )/n) + res <- rbind(c(ll, ul), cl, c(asymp.lcl, asymp.ucl)) + res <- cbind(rep(x/n, 3), res) + + ##dimnames(res) <- list(c("Exact", "Wilson", "Asymptotic"), c( + ## "Point Estimate", "Lower", "Upper")) + switch(method, + wilson = res[2, ], + exact = res[1, ], + asymptotic = res[3, ], + all = res, + res) + } + + if((length(x) != length(n)) & length(x) == 1) + x <- rep(x, length(n)) + if((length(x) != length(n)) & length(n) == 1) + n <- rep(n, length(x)) + if((length(x) > 1 | length(n) > 1) & method == "all") { + method <- "wilson" + warning("method=all will not work with vectors...setting method to wilson") + } + if(method == "all" & length(x) == 1 & length(n) == 1) { + mat <- bc(x, n, alpha, method) + dimnames(mat) <- list(c("Exact", "Wilson", "Asymptotic"), + c("PointEst", "Lower", "Upper")) + if(include.n) + mat <- cbind(N = n, mat) + + if(include.x) + mat <- cbind(X = x, mat) + + if(return.df) + mat <- as.data.frame(mat) + + return(mat) + } + + mat <- matrix(ncol = 3, nrow = length(x)) + for(i in 1:length(x)) + mat[i, ] <- bc(x[i], n[i], alpha = alpha, method = method) + + dimnames(mat) <- list(rep("", dim(mat)[1]), + c("PointEst", "Lower", "Upper")) + if(include.n) + mat <- cbind(N = n, mat) + + if(include.x) + mat <- cbind(X = x, mat) + + if(return.df) + mat <- as.data.frame(mat, row.names=NULL) + + mat +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/bootkm.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/bootkm.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,41 @@ +bootkm <- function(S, q=.5, B=500, times, pr=TRUE) +{ + tthere <- !missing(times) + if(tthere && length(times)>1) + stop('presently bootkm only works for a single time') + + S <- S[!is.na(S),] + n <- nrow(S) + stratvar <- factor(rep(1,nrow(S))) + f <- survfitKM(stratvar, S) + tt <- c(0, f$time) + ss <- c(1, f$surv) + if(!tthere) { + if(ss[length(ss)] > q) + stop(paste('overall Kaplan-Meier estimate does not fall below',q)) + + } else { + if(tt[length(tt)] < times) + stop(paste('overall Kaplan-Meier estimate not defined to time',times)) + } + + ests <- double(B) + + for(i in 1:B) { + if(pr && (i %% 10)==0) + cat(i,'\r') + + f <- survfitKM(stratvar, S[sample(n,n,replace=TRUE),], + se.fit=FALSE, conf.type='none') + tt <- c(0, f$time) + ss <- c(1, f$surv) + ests[i] <- if(tthere) + approx(tt, ss, xout=times, method='constant', f=0)$y + else + min(tt[ss <= q]) #is NA if none + } + if(pr) + cat('\n') + + ests +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/bpower.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/bpower.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,88 @@ +bpower <- function(p1, p2, odds.ratio, percent.reduction, n, n1, n2, + alpha=.05) +{ + if(!missing(odds.ratio)) + p2 <- p1*odds.ratio/(1-p1+p1*odds.ratio) + else if(!missing(percent.reduction)) + p2 <- p1*(1-percent.reduction/100) + + if(!missing(n)) { + n1 <- n2 <- n/2 + } + z <- qnorm(1-alpha/2) + q1 <- 1-p1 + q2 <- 1-p2 + pm <- (n1*p1+n2*p2)/(n1+n2) + ds <- z*sqrt((1/n1 + 1/n2)*pm*(1-pm)) + ex <- abs(p1-p2) + sd <- sqrt(p1*q1/n1+p2*q2/n2) + c(Power = 1-pnorm((ds-ex)/sd)+pnorm((-ds-ex)/sd) ) +} + + +bsamsize <- function(p1, p2, fraction=.5, alpha=.05, power=.8) +{ + z.alpha <- qnorm(1-alpha/2) + z.beta <- qnorm(power) + + ratio <- (1-fraction)/fraction + p <- fraction*p1+(1-fraction)*p2 + + n1 <- (z.alpha * sqrt((ratio+1) * p * (1-p)) + + z.beta * sqrt(ratio * p1 * (1-p1) + p2 * (1 - p2)) + )^2/ratio/((p1-p2)^2) + + n2 <- ratio*n1 + c(n1=n1, n2=n2) +} + +ballocation <- function(p1, p2, n, alpha=.05) +{ + q1 <- 1-p1 + q2 <- 1-p2 + + f.minvar.diff <- 1/(1+sqrt(p2*q2/(p1*q1))) + f.minvar.ratio <- 1/(1+sqrt(p1*q2/p2/q1)) + + z <- c(fraction.group1.min.var.diff=f.minvar.diff, + fraction.group1.min.var.ratio=f.minvar.ratio, + fraction.group1.min.var.logodds=1-f.minvar.diff) + + if(!missing(n)) { + possf <- seq(.001,.999,length=1000) + pow <- bpower(p1, p2, n1=n*possf, n2=n*(1-possf), alpha=alpha) + ## fun <- function(f, n, p1, p2, alpha) bpower(p1, p2, n1=f*n, n2=(1-f)*n, alpha=alpha) + ## f.maxpow <- optimize(fun, lower=.01, upper=.99, maximum=T, + ## n=n, p1=p1, p2=p2, alpha=alpha)$maximum + f <- possf[pow==max(pow)] + f <- f[abs(f-.5)==min(abs(f-.5))] + z <- c(z, fraction.group1.max.power=f[1]) + } + z +} + +bpower.sim <- function(p1, p2, odds.ratio, percent.reduction, n, n1, n2, + alpha=.05, nsim=10000) +{ + if(!missing(odds.ratio)) + p2 <- p1*odds.ratio/(1-p1+p1*odds.ratio) + else if(!missing(percent.reduction)) + p2 <- p1*(1-percent.reduction/100) + + if(!missing(n)) { + n1 <- n2 <- round(n/2) + } + n <- n1+n2 + + if(length(p1)+length(p2)+length(n1)+length(n2)+length(alpha)+length(nsim)!=6) + stop('all arguments must have length 1') + + chi2 <- qchisq(1-alpha, 1) + + d1 <- rbinom(nsim, n1, p1) + d2 <- rbinom(nsim, n2, p2) + chisq <- n*(d1*(n2-d2)-(n1-d1)*d2)^2/(d1+d2)/(n-d1-d2)/n1/n2 + power <- mean(chisq>chi2) + se <- sqrt(power*(1-power)/nsim) + c(Power=power,Lower=power-1.96*se,Upper=power+1.96*se) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/bpplot.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/bpplot.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,80 @@ +##Modified FEH 30Jun97 - delete missing data, names default to T, +## auto names for list argument, ylab default to "" instead of Percentiles +## names -> name, added srtx +bpplot <- function(..., name = TRUE, + main = "Box-Percentile Plot", + xlab = "", ylab = "", srtx=0) +{ + all.x <- list(...) ## FH 30Jun97 + nam <- character(0) ## FH + ## if(is.list(...)) { ## FH + if(is.list(all.x[[1]])) { + all.x <- all.x[[1]] + if(is.logical(name) && name) name <- names(...) ## FH + } + + n <- length(all.x) + centers <- seq(from = 0, by = 1.2, length = n) + ymax <- max(sapply(all.x, max, na.rm=TRUE)) ## na.rm=T FEH + ymin <- min(sapply(all.x, min, na.rm=TRUE)) + xmax <- max(centers) + 0.5 + xmin <- -0.5 + plot(c(xmin, xmax), c(ymin, ymax), type = "n", main = main, + xlab = '', ylab = ylab, xaxt = "n") + for(i in 1:n) { + plot.values <- bpx(all.x[[i]], centers[i]) + lines(plot.values$x1, plot.values$y1) + lines(plot.values$x2, plot.values$y2) + lines(plot.values$q1.x, plot.values$q1.y) + lines(plot.values$q3.x, plot.values$q3.y) + lines(plot.values$med.x, plot.values$med.y) + } + + if(is.logical(name)) { + if(name) + mgp.axis(1, centers, + sapply(substitute(list(...)), deparse)[2:(n + 1)], + srt=srtx, + adj=if(srtx==0).5 + else 1, + axistitle=xlab) + } + else mgp.axis(1, centers, name, srt=srtx, + adj=if(srtx==0).5 + else 1, + axistitle=xlab) + + invisible(centers) +} + +bpx <- function(y, offset) +{ + y <- y[!is.na(y)] ## FEH 30Jun97 + n <- length(y) + delta <- 1/(n + 1) + prob <- seq(delta, 1 - delta, delta) + quan <- sort(y) + med <- median(y) + q1 <- median(y[y < med]) + q3 <- median(y[y > med]) + first.half.p <- prob[quan <= med] + second.half.p <- 1 - prob[quan > med] + plotx <- c(first.half.p, second.half.p) + + ## calculating the ends of the first quartile line + + qx <- approx(quan, plotx, xout = q1)$y + q1.x <- c( - qx, qx) + offset + + ## calculating the ends of the third quartile line + + qx <- approx(quan, plotx, xout = q3)$y + q3.x <- c( - qx, qx) + offset + q1.y <- c(q1, q1) + q3.y <- c(q3, q3) + med.x <- c( - max(first.half.p), max(first.half.p)) + offset + med.y <- c(med, med) + return(list(x1 = ( - plotx) + offset, y1 = quan, x2 = plotx + offset, + y2 = quan, q1.y = q1.y, q1.x = q1.x, q3.y = q3.y, q3.x = q3.x, + med.y = med.y, med.x = med.x)) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/bystats.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/bystats.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,261 @@ +bystats <- function(y, ..., fun, nmiss, subset) +{ + ## Fri, 16 Sep 2005 - Shawn@ori.org removed left argument to + ## interaction + x <- interaction(..., drop=TRUE, sep=" ") + l <- levels(x) + if(any(is.na(x))) { + l <- c(l, "NA") + attr(x,"class") <- NULL + x[is.na(x)] <- length(l) + levels(x) <- l + attr(x,'class') <- "factor" + } + + y <- as.matrix(y) + if(!missing(subset)) { + x <- x[subset] + y <- y[subset,,drop=FALSE] + } + + if(missing(fun)) { + fun <- function(y) apply(y, 2, mean) + + r <- range(y, na.rm=TRUE) + uy <- unique(y[!is.na(y)]) #fixed 1Jun95, 16Mar96 + funlab <- if(length(uy)==2 && r[1]==0 & r[2]==1) + "Fraction" + else + "Mean" + } else { + funlab <- as.character(substitute(fun)) + funlab <- funlab[length(funlab)] #handles fun=function(x)mean(x) + } + lab <- as.character(sys.call())[-1] + m <- (!missing(fun)) + (!missing(nmiss)) + (!missing(subset)) + lab <- lab[1:(length(lab)-m)] + if(length(lab)>2) + lab2 <- paste(lab[-1],collapse=", ") + else + lab2 <- lab[-1] + heading <- if(funlab=="") + paste(lab[1],"by",lab2) + else + paste(funlab,"of",lab[1],"by",lab2) + + nna <- !is.na(y %*% rep(1,ncol(y))) + N <- sum(nna) + stats <- fun(y[nna,,drop=FALSE]) + nstats <- length(stats) + name.stats <- if(length(dn <- dimnames(stats))) + as.vector(outer(dn[[1]],dn[[2]], + FUN=function(a,b)paste(b, a))) + else + names(stats) + + if(length(name.stats)) + funlab <- name.stats + if(nstats>1 && length(name.stats)==0) + funlab <- rep(" ", nstats) + s <- matrix(NA, nrow=length(l) + 1, ncol=2 + nstats, + dimnames=list(c(l, "ALL"),c("N", "Missing", funlab))) + j <- 0 + for(i in l) { + j <- j+1 + w <- y[x==i,,drop=FALSE] + nna <- !is.na(w %*% rep(1,ncol(w))) + n <- sum(nna) + s[j,] <- c(n, nrow(w)-n, + if(n) fun(w[nna,,drop=FALSE]) + else rep(NA,nstats)) + } + + s[j+1,] <- c(N, nrow(y)-N, stats) + if((!missing(nmiss) && !nmiss) || (missing(nmiss) && all(s[,"Missing"]==0))) + s <- s[,-2] + + attr(s, "heading") <- heading + attr(s, "byvarnames") <- lab2 + attr(s,'class') <- "bystats" + s +} + +print.bystats <- function(x, ...) +{ + cat("\n",attr(x,"heading"),"\n\n") + attr(x,"heading") <- NULL + attr(x,"byvarnames") <- NULL + attr(x,'class') <- NULL + invisible(print(x, ...)) +} + +latex.bystats <- function(object, + title=first.word(expr=substitute(object)), + caption=attr(object,"heading"), + rowlabel=attr(object,"byvarnames"), ...) +{ + dm <- dimnames(object) + ##inn <- c("%","<=","<",">=",">","\\[") + ##out <- c("\\\\%","$\\\\leq$","$<$","$\\\\geq$","$>$","\\\\verb|[|") + ##dm[[1]] <- translate(dm[[1]],inn,out) + ##dm[[2]] <- translate(dm[[2]],inn,out) + inn <- c("%","<=","<",">=",">","[") + out <- c("\\%","$\\leq$","$<$","$\\geq$","$>$","\\verb|[|") + dimnames(object) <- dm + caption <- sedit(caption, "cbind", "") + latex(unclass(object), title=title, caption=caption, rowlabel=rowlabel, + n.rgroup=c(nrow(object)-1,1), ...) +} + +bystats2 <- function(y, v, h, fun, nmiss, subset) +{ + y <- as.matrix(y) + if(!missing(subset)) { + y <- y[subset,,drop=FALSE]; + v <- v[subset]; + h <- h[subset] + } + + v <- factor(v, exclude=NULL) + h <- factor(h, exclude=NULL) + + lv <- levels(v) + lh <- levels(h) + nv <- length(lv) + nh <- length(lh) + + if(missing(fun)) { + fun <- function(y) apply(y, 2, mean) + r <- range(y, na.rm=TRUE) + funlab <- if(length(r)==2 && r[1]==0 & r[2]==1) "Fraction" + else "Mean" + } else { + funlab <- as.character(substitute(fun)) + funlab <- funlab[length(funlab)] #handles fun=function(x)mean(x) + } + lab <- as.character(sys.call())[-1] + m <- (!missing(fun)) + (!missing(nmiss)) + (!missing(subset)) + lab <- lab[1:(length(lab)-m)] + if(length(lab)>2) + lab2 <- paste(lab[-1],collapse=", ") + else + lab2 <- lab[-1] + + heading <- if(funlab=="") + paste(lab[1],"by",lab2) + else + paste(funlab,"of",lab[1],"by",lab2) + + nna <- !is.na(y %*% rep(1,ncol(y))) + N <- sum(nna) + stats <- fun(y[nna,,drop=FALSE]) + nstats <- length(stats) + name.stats <- if(length(dn <- dimnames(stats))) + as.vector(outer(dn[[1]],dn[[2]],FUN=function(a,b)paste(b,a))) + else + names(stats) + + if(length(name.stats)) + funlab <- name.stats + + if(nstats>1 && length(name.stats)==0) + funlab <- rep(" ", nstats) + + s <- array(NA,dim=c(nv+1,nh+1,2+nstats), + dimnames=list(c(lv,"ALL"), c(lh,"ALL"), c("N","Missing",funlab))) + + for(xv in c(lv,"ALL")) { + for(xh in c(lh,"ALL")) { + if(xv=="ALL" && xh=="ALL") + st <- c(N, nrow(y)-N, stats) + else { + if(xv=="ALL") + u <- h==xh + else if(xh=="ALL") + u <- v==xv + else + u <- h==xh & v==xv + + if(any(u)) { + w <- y[u,,drop=FALSE] + nna <- !is.na(w %*% rep(1,ncol(w))) + n <- sum(nna) + st <- c(n, nrow(w)-n, fun(w[nna,,drop=FALSE])) + } else st <- c(0, n, rep(NA, length(stats))) + } + s[xv,xh,] <- st + } + } + + if((!missing(nmiss) && !nmiss) || + (missing(nmiss) && all(s[,,"Missing"]==0))) + s <- s[,,-2,drop=FALSE] + + attr(s, "heading") <- heading + attr(s, "byvarnames") <- lab[-1] + attr(s,'class') <- "bystats2" + s +} + +print.bystats2 <- function(x, abbreviate.dimnames=FALSE, + prefix.width=max(nchar(dimnames(x)[[1]])),...) +{ + cat("\n",attr(x,"heading"),"\n\n") + if(!exists("print.char.matrix")) { # Vanilla S + attr(x, "heading") <- attr(x, "byvarnames") <- attr(x, "class") <- + NULL + return(invisible(print(x))) + } + + d <- dim(x) + cstats <- array("", dim=d[1:3]) + + header <- matrix(paste(dimnames(x)[[3]],collapse="\n"),1,1) + print.char.matrix(header) + + for(k in 1:d[3]) + cstats[,,k] <- format(x[,,k]) + + dimn <- dimnames(x)[1:2] + names(dimn) <- attr(x,"byvarnames") + cstats2 <- matrix("", nrow=d[1], ncol=d[2], dimnames=dimn) + for(i in 1:d[1]) { + for(j in 1:d[2]) { + cstats2[i,j] <- paste(cstats[i,j,],collapse="\n") + } + } + invisible(print.char.matrix(cstats2,...)) +} + +latex.bystats2 <- function(object, + title=first.word(expr=substitute(object)), + caption=attr(object, "heading"), + rowlabel="", ...) +{ + dm <- dimnames(object) + inn <- c("%", "<=", "<", ">=", ">", "[") + out <- c("\\%", "$\\leq$","$<$", "$\\geq$","$>$", "\\verb|[|") + dm[[1]] <- sedit(dm[[1]], inn, out) + dm[[2]] <- sedit(dm[[2]],inn,out) + dm[[3]] <- sedit(dm[[3]],inn,out) + dimnames(object) <- dm + caption <- sedit(caption, "cbind", "") + d <- dim(object) + dn <- rep(dimnames(object)[[3]], d[2]) + st <- matrix(NA, nrow=d[1], ncol=d[2]*d[3], + dimnames=list(dimnames(object)[[1]], dn)) + + for(i in 1:d[1]) { + l <- 0 + for(j in 1:d[2]) { + for(k in 1:d[3]) { + l <- l+1 + st[i,l] <- object[i,j,k] + } + } + } + + latex(st, title=title, caption=caption, rowlabel=rowlabel, + n.rgroup=c(nrow(st)-1,1), + cgroup=dimnames(object)[[2]], n.cgroup=rep(d[3],d[2]),...) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/capitalize.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/capitalize.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,7 @@ +capitalize <- function(string) { + capped <- grep('^[^A-Z]*$', string, perl=TRUE) + + substr(string[capped], 1,1) <- toupper(substr(string[capped], 1,1)) + return(string) +} + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/ciapower.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/ciapower.s Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,97 @@ +## tref time at which mortalities estimated +## n1 total sample size, stratum 1 +## n2 total sample size, stratum 2 +## m1c tref-year mortality, stratum 1 control +## m2c " " 2 " +## r1 % reduction in m1c by intervention, stratum 1 +## r2 % reduction in m2c by intervention, stratum 2 +## accrual duration of accrual period +## tmin minimum follow-up time +## alpha type I error +## pr set to T to print intermediate results + +ciapower <- function(tref, + n1, + n2, + m1c, + m2c, + r1, + r2, + accrual, + tmin, + alpha=.05, + pr=TRUE) +{ + ## Find mortality in intervention groups + if(m1c>1 | m2c>1) + stop("m1c and m2c must be fractions") + + m1i <- (1-r1/100)*m1c + m2i <- (1-r2/100)*m2c + + if(pr) { + cat("\nAccrual duration:",accrual,"y Minimum follow-up:",tmin,"y\n") + cat("\nSample size Stratum 1:",n1," Stratum 2:",n2,"\n") + cat("\nAlpha=",alpha,"\n") + d <- list(c("Stratum 1","Stratum 2"), c("Control","Intervention")) + m <- cbind(c(m1c,m2c),c(m1i,m2i)) + dimnames(m) <- d + cat("\n",tref,"-year Mortalities\n",sep=""); print(m) + } + + ## Find exponential hazards for all groups + lam1c <- -logb(1-m1c)/tref + lam2c <- -logb(1-m2c)/tref + lam1i <- -logb(1-m1i)/tref + lam2i <- -logb(1-m2i)/tref + + if(pr) { + lam <- cbind(c(lam1c,lam2c),c(lam1i,lam2i)) + dimnames(lam) <- d + cat("\nHazard Rates\n"); print(lam) + } + + ## Find probability that a subject will have her event observed during + ## the study, for all groups + tmax <- tmin+accrual + p1c <- 1-1/accrual/lam1c*(exp(-tmin*lam1c)-exp(-tmax*lam1c)) + p2c <- 1-1/accrual/lam2c*(exp(-tmin*lam2c)-exp(-tmax*lam2c)) + p1i <- 1-1/accrual/lam1i*(exp(-tmin*lam1i)-exp(-tmax*lam1i)) + p2i <- 1-1/accrual/lam2i*(exp(-tmin*lam2i)-exp(-tmax*lam2i)) + + if(pr) { + p <- cbind(c(p1c,p2c), c(p1i,p2i)) + dimnames(p) <- d + cat("\nProbabilities of an Event During Study\n") + print(p) + } + + ##Find expected number of events, all groups + m1c <- p1c*n1/2 + m2c <- p2c*n2/2 + m1i <- p1i*n1/2 + m2i <- p2i*n2/2 + + if(pr) { + m <- cbind(c(m1c,m2c), c(m1i,m2i)) + dimnames(m) <- d + cat("\nExpected Number of Events\n") + print(round(m,1)) + } + + ## Find expected value of observed log hazard ratio + delta <- logb((lam1i/lam1c)/(lam2i/lam2c)) + if(pr) + cat("\nRatio of hazard ratios:",format(exp(delta)),"\n") + + ## Find its variance + v <- 1/m1c + 1/m2c + 1/m1i + 1/m2i + sd <- sqrt(v) + if(pr) + cat("Standard deviation of log ratio of ratios:",format(sd),"\n") + + z <- -qnorm(alpha/2) + ## if(pr) cat("\nCritical value:",format(z),"\n") + + c(Power = 1 - ( pnorm(z - abs(delta)/sd) - pnorm(-z - abs(delta)/sd) ) ) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/cnvrt.coords.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/cnvrt.coords.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,100 @@ +cnvrt.coords <- function (x, y = NULL, input = c("usr", "plt", "fig", + "dev", "tdev")) { + input <- match.arg(input) + xy <- xy.coords(x, y, recycle = TRUE) + cusr <- par("usr") + cplt <- par("plt") + cfig <- par("fig") + cdin <- par("din") + comi <- par("omi") + cdev <- c(comi[2]/cdin[1], (cdin[1] - comi[4])/cdin[1], comi[1]/cdin[2], + (cdin[2] - comi[3])/cdin[2]) + if (input == "usr") { + usr <- xy + plt <- list() + plt$x <- (xy$x - cusr[1])/(cusr[2] - cusr[1]) + plt$y <- (xy$y - cusr[3])/(cusr[4] - cusr[3]) + fig <- list() + fig$x <- plt$x * (cplt[2] - cplt[1]) + cplt[1] + fig$y <- plt$y * (cplt[4] - cplt[3]) + cplt[3] + dev <- list() + dev$x <- fig$x * (cfig[2] - cfig[1]) + cfig[1] + dev$y <- fig$y * (cfig[4] - cfig[3]) + cfig[3] + tdev <- list() + tdev$x <- dev$x * (cdev[2] - cdev[1]) + cdev[1] + tdev$y <- dev$y * (cdev[4] - cdev[3]) + cdev[3] + return(list(usr = usr, plt = plt, fig = fig, dev = dev, + tdev = tdev)) + } + if (input == "plt") { + plt <- xy + usr <- list() + usr$x <- plt$x * (cusr[2] - cusr[1]) + cusr[1] + usr$y <- plt$y * (cusr[4] - cusr[3]) + cusr[3] + fig <- list() + fig$x <- plt$x * (cplt[2] - cplt[1]) + cplt[1] + fig$y <- plt$y * (cplt[4] - cplt[3]) + cplt[3] + dev <- list() + dev$x <- fig$x * (cfig[2] - cfig[1]) + cfig[1] + dev$y <- fig$y * (cfig[4] - cfig[3]) + cfig[3] + tdev <- list() + tdev$x <- dev$x * (cdev[2] - cdev[1]) + cdev[1] + tdev$y <- dev$y * (cdev[4] - cdev[3]) + cdev[3] + return(list(usr = usr, plt = plt, fig = fig, dev = dev, + tdev = tdev)) + } + if (input == "fig") { + fig <- xy + plt <- list() + plt$x <- (fig$x - cplt[1])/(cplt[2] - cplt[1]) + plt$y <- (fig$y - cplt[3])/(cplt[4] - cplt[3]) + usr <- list() + usr$x <- plt$x * (cusr[2] - cusr[1]) + cusr[1] + usr$y <- plt$y * (cusr[4] - cusr[3]) + cusr[3] + dev <- list() + dev$x <- fig$x * (cfig[2] - cfig[1]) + cfig[1] + dev$y <- fig$y * (cfig[4] - cfig[3]) + cfig[3] + tdev <- list() + tdev$x <- dev$x * (cdev[2] - cdev[1]) + cdev[1] + tdev$y <- dev$y * (cdev[4] - cdev[3]) + cdev[3] + return(list(usr = usr, plt = plt, fig = fig, dev = dev, + tdev = tdev)) + } + if (input == "dev") { + dev <- xy + fig <- list() + fig$x <- (dev$x - cfig[1])/(cfig[2] - cfig[1]) + fig$y <- (dev$y - cfig[3])/(cfig[4] - cfig[3]) + plt <- list() + plt$x <- (fig$x - cplt[1])/(cplt[2] - cplt[1]) + plt$y <- (fig$y - cplt[3])/(cplt[4] - cplt[3]) + usr <- list() + usr$x <- plt$x * (cusr[2] - cusr[1]) + cusr[1] + usr$y <- plt$y * (cusr[4] - cusr[3]) + cusr[3] + tdev <- list() + tdev$x <- dev$x * (cdev[2] - cdev[1]) + cdev[1] + tdev$y <- dev$y * (cdev[4] - cdev[3]) + cdev[3] + return(list(usr = usr, plt = plt, fig = fig, dev = dev, + tdev = tdev)) + } + if (input == "tdev") { + tdev <- xy + dev <- list() + dev$x <- (tdev$x - cdev[1])/(cdev[2] - cdev[1]) + dev$y <- (tdev$y - cdev[3])/(cdev[4] - cdev[3]) + fig <- list() + fig$x <- (dev$x - cfig[1])/(cfig[2] - cfig[1]) + fig$y <- (dev$y - cfig[3])/(cfig[4] - cfig[3]) + plt <- list() + plt$x <- (fig$x - cplt[1])/(cplt[2] - cplt[1]) + plt$y <- (fig$y - cplt[3])/(cplt[4] - cplt[3]) + usr <- list() + usr$x <- plt$x * (cusr[2] - cusr[1]) + cusr[1] + usr$y <- plt$y * (cusr[4] - cusr[3]) + cusr[3] + tdev <- list() + tdev$x <- dev$x * (cdev[2] - cdev[1]) + cdev[1] + tdev$y <- dev$y * (cdev[4] - cdev[3]) + cdev[3] + return(list(usr = usr, plt = plt, fig = fig, dev = dev, + tdev = tdev)) + } +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/confbar.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/confbar.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,104 @@ +confbar <- function(at, est, se, width, + q=c(.7,.8,.9,.95,.99), + col=gray(c(0,.25,.5,.75,1)), + type=c("v","h"), labels=TRUE, ticks=FALSE, + cex=.5, side="l", lwd=5, clip=c(-1e30, 1e30), + fun=function(x)x, + qfun=function(x) + ifelse(x==.5, qnorm(x), + ifelse(x<.5,qnorm(x/2),qnorm((1+x)/2)))) +{ + type <- match.arg(type) + iusr <- if(type=="v") + 1:2 + else + 3:4 + + if(missing(width)) + width <- diff(par("usr")[iusr])*.02 + + if(side=="b") + side <- "l" #treat bottom as left + + if(length(q)!=length(col)) + stop("q and col must have same length") + + q <- c(1-rev(q), .5, q) + ##qe <- seq(.01, .99, length=n) + ##col <- seq(.8,.01, length=n/2) + col <- c(rev(col), col) + w <- width/2 + if(type=="v") { + polyg <- function(a, b, col, clip) + { + b[b < clip[1] | b > clip[2]] <- NA + polygon(a, b, col=col) + } + + Lines <- function(a, b, lwd=1, clip) + { + b[b < clip[1] | b > clip[2]] <- NA + lines(a, b, lwd=lwd) + } + + Text <- function(a, b, clip, ...) + { + b[b < clip[1] | b > clip[2]] <- NA + text(a, b, ...) + } + + srt <- 0 + } else { + polyg <- function(a, b, col, clip) + { + b[b < clip[1] | b > clip[2]] <- NA + polygon(b, a, col=col) + } + + Lines <- function(a, b, lwd=1, clip) + { + b[b < clip[1] | b > clip[2]] <- NA + lines(b, a, lwd=lwd) + } + + Text <- function(a, b, clip, ...) + { + b[b < clip[1] | b > clip[2]] <- NA + text(b, a, ...) + } + + srt <- 45 + } + for(i in 1:(length(q)-1)) + polyg(c(at-w,at+w,at+w,at-w),fun(est+se*qfun(c(q[i],q[i],q[i+1],q[i+1]))), + col=col[i], clip=clip) + + a <- fun(est) + z <- w*.24 + Lines(c(at-w-3.5*z, at+w+3.5*z), c(a,a), lwd=lwd, clip=clip) + a <- fun(est+se*qfun(q)) + do <- TRUE + if(labels || ticks) + for(i in 1:length(q)) { + b <- c(a[i], a[i]) + if(ticks) { + Lines(c(at-w-z,at-w),b, clip=clip) + Lines(c(at+w+z,at+w),b, clip=clip) + } + + if(labels && do && q[i]!=.5) { + if(side=="l") + Text(at-w-2*z, a[i], format(max(1-q[i],q[i])), + cex=cex, adj=1, srt=srt, clip=clip) + else + Text(at+w+2*z, a[i], format(max(1-q[i],q[i])), + cex=cex, adj=0, srt=srt, clip=clip) + } + + if(q[i]!=.5) + do <- !do + } + + names(a) <- format(q) + invisible(a) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/consolidate.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/consolidate.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,32 @@ +combine <- function(x, value, protect, ...) stop("combine() depricated due to naming conflict renamed consolidate()") +'combine<-' <- function(x, protect, ..., value) stop("combine<-() depricated due to naming conflict renamed consolidate<-()") + +consolidate <- function(x, value, protect, ...) { + UseMethod("consolidate") +} + +'consolidate<-' <- function(x, protect, ..., value) + eval.parent(replace(match.call(expand.dots=FALSE), list=1, + values=list(as.name("consolidate")))) + +consolidate.default <- function(x, value, protect=FALSE, ...) { + if(missing(x) || is.null(x)) + x <- vector() + + if(missing(value) || is.null(value)) + value <- vector() + + xNames <- names(x) + valueNames <- names(value) + + if(is.null(xNames) || is.null(valueNames) || all(valueNames == "") || + all(xNames == "")) + return(c(x, value)) + + vars <- intersect(xNames, valueNames[valueNames != ""]) + if(!protect) + x[vars] <- value[vars] + + c(x, value[!valueNames %in% vars]) +} + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/cpower.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/cpower.s Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,122 @@ +## tref time at which mortalities estimated +## n total sample size +## mc tref-year mortality, control +## r % reduction in m1c by intervention +## accrual duration of accrual period +## tmin minimum follow-up time +## noncomp.c % non-compliant in control group (drop-ins) +## noncomp.i % non-compliant in intervention group (non-adherers) +## alpha type I error +## nc Sample size for control (if not n/2) +## ni Sample size for intervention (if not n/2) +## pr set to T to print intermediate results +## +## non-compliance handled by an approximation of Eq. 5.4 of +## Lachin JM, Foulkes MA (1986): Evaluation of sample size and power for +## analyses of survival with allowance for nonuniform patient entry, +## losses to follow-up, noncompliance, and stratification. +## Here we're using log hazard ratio instead of their hazard difference + +cpower <- function(tref, + n, + mc, + r, + accrual, + tmin, + noncomp.c=0, + noncomp.i=0, + alpha=.05, + nc, ni, + pr=TRUE) +{ + if(mc>1) + stop("mc should be a fraction") + + ## Find mortality in intervention group + mi <- (1-r/100)*mc + + if(missing(nc) | missing(ni)) { + nc <- n/2; ni <- n/2 + } else n <- nc+ni + + if(pr) { + cat("\nAccrual duration:",accrual,"y Minimum follow-up:",tmin,"y\n") + cat("\nTotal sample size:",n,"\n") + cat("\nAlpha=",alpha,"\n") + d <- c("Control","Intervention") + m <- c(mc,mi) + names(m) <- d + cat("\n",tref,"-year Mortalities\n",sep=""); print(m) + } + + ## Find exponential hazards for all groups + lamc <- -logb(1-mc)/tref + lami <- -logb(1-mi)/tref + + if(pr) { + lam <- c(lamc,lami) + names(lam) <- d + cat("\nHazard Rates\n"); + print(lam) + } + + ## Find probability that a subject will have her event observed during + ## the study, for all groups + tmax <- tmin+accrual + pc <- if(accrual==0) + 1-exp(-lamc*tmin) + else + 1-1/accrual/lamc*(exp(-tmin*lamc)-exp(-tmax*lamc)) + + pi <- if(accrual==0) + 1-exp(-lami*tmin) + else + 1-1/accrual/lami*(exp(-tmin*lami)-exp(-tmax*lami)) + + if(pr) { + p <- c(pc,pi) + names(p) <- d + cat("\nProbabilities of an Event During Study\n") + print(p) + } + + ## Find expected number of events, all groups + mc <- pc*nc + mi <- pi*ni + + if(pr) { + m <- c(mc,mi) + names(m) <- d + cat("\nExpected Number of Events\n") + print(round(m,1)) + } + + ## Find expected value of observed log hazard ratio + delta <- logb(lami/lamc) + if(pr) + cat("\nHazard ratio:",format(exp(delta)),"\n") + + if(noncomp.c+noncomp.i>0) { + if(pr) + cat("\nDrop-in rate (controls):",noncomp.c, + "%\nNon-adherence rate (intervention):",noncomp.i,"%\n",sep="") + + delta <- delta * (1 - (noncomp.c+noncomp.i)/100) + if(pr) + cat("Effective hazard ratio with non-compliance:", + format(exp(delta)),"\n") + } + + ## Find its variance + v <- 1/mc + 1/mi + + ## Get same as /sasmacro/samsizc.sas if use 4/(mc+mi) + + sd <- sqrt(v) + if(pr) + cat("Standard deviation of log hazard ratio:",format(sd),"\n") + + z <- -qnorm(alpha/2) + + c(Power = 1 - (pnorm(z - abs(delta)/sd) - pnorm(-z - abs(delta)/sd))) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/curveRep.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/curveRep.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b"@@ -0,0 +1,403 @@\n+curveRep <- function(x, y, id, kn=5, kxdist=5, k=5, p=5, force1=TRUE,\n+ metric=c('euclidean','manhattan'),\n+ smooth=FALSE, extrap=FALSE, pr=FALSE) {\n+ metric <- match.arg(metric)\n+ \n+ id <- as.character(id)\n+ omit <- is.na(x + y)\n+ missfreq <- NULL; nomit <- sum(omit)\n+ if(nomit) {\n+ m <- tapply(omit, id, sum)\n+ missfreq <- table(m)\n+ x <- x[!omit]; y <- y[!omit]; id <- id[!omit]\n+ }\n+ n <- length(x)\n+ ns <- table(id)\n+ nunique <- length(unique(ns))\n+\n+ if(nunique==1 || nunique <= kn) ncuts <- c(sort(unique(ns)),Inf) else {\n+ grouped.n <- cut2(ns, g=kn)\n+ ncuts <- cut2(ns, g=kn, onlycuts=TRUE)\n+ if(force1 && ncuts[2] > 1 && min(ns)==1)\n+ ncuts <- sort(unique(c(1:2, ncuts)))\n+ }\n+ nlev <- length(ncuts)-1\n+ res <- vector('list', nlev)\n+ names(res) <- as.character(ncuts[-length(ncuts)])\n+\n+ clust <- function(x, k)\n+ if(diff(range(x))==0 || NROW(x) < k+1) rep(1, NROW(x)) else\n+ clara(x, k, metric=metric)$clustering\n+\n+ interp <- if(extrap)\n+ function(x, y=NULL, xout) approxExtrap(x, y, xout=xout)$y else\n+ function(x, y=NULL, xout) approx(x, y, xout=xout, rule=2)$y\n+\n+ ## Cluster by sample size first\n+ if(pr) cat('Creating',nlev,'sample size groups\\n\\n')\n+ for(i in 1:nlev) {\n+ ## Get list of curve ids in this sample size group\n+ if(i==nlev) {\n+ below <- ns <= ncuts[i+1]\n+ brack <- ']'\n+ } else {\n+ below <- ns < ncuts[i+1]\n+ brack <- ')'\n+ }\n+ ids <- names(ns)[ns >= ncuts[i] & below]\n+ if(pr) cat('Processing sample size [',ncuts[i],',',ncuts[i+1],\n+ brack,' containing ', length(ids),' curves\\n',sep='')\n+ if(length(ids) < kxdist) res[[i]] <- list(ids) else {\n+ ## Cluster by distribution of x within sample size group\n+ ## Summarize these ids by clustering on range of x,\n+ ## plus the largest gap if minimum sample size > 2\n+ ## Use only the x position is min sample size is 1\n+ s <- id %in% ids\n+ ssize <- min(tapply(x[s], id[s], function(w) length(unique(w))))\n+ z <- tapply((1:n)[s], id[s],\n+ function(j) if(ssize==1) x[j][1] else\n+ if(ssize==2) range(x[j]) else\n+ c(range(x[j]),max(diff(sort(x[j])))))\n+ z <- matrix(unlist(z), nrow=length(z), byrow=TRUE)\n+ if(kxdist > nrow(z) - 1)\n+ stop('number of curves to cluster must be >= kxdist+1')\n+ distclusters <- clust(z, kxdist)\n+ if(pr) {\n+ cat(' Number of curves in each x-dist cluster:\\n')\n+ print(table(distclusters))\n+ }\n+ resi <- list()\n+ ## Within x distribution and within sample size interval,\n+ ## cluster on linearly interpolated y at p equally spaced x points\n+ ## unless <2 unique x-points for some curve\n+ for(clus in 1:max(distclusters)) {\n+ idc <- ids[distclusters==clus]\n+ if(pr) cat(' Processing x-distribution group', clus,\n+ 'containing', length(idc),'curves\\n')\n+ s <- id %in% idc\n+ ssize <- min(tapply(x[s], id[s], function(w) length(unique(w))))\n+ if(ssize > 1) {\n+ xrange <- range(x[s])\n+ xseq <- seq(xrange[1], xrange[2], length.out=p)\n+ }\n+ g <- if(ssize==1) function(j) c(mean(x[j]), mean(y[j])) else\n+ if(smooth && ssize > 2)\n+ function(j) interp(clowess(x[j],y[j]), xout=xseq) else\n+ function(j) interp(x[j], y[j], xout=xseq)\n+ \n+ z <- tapply((1:n)[s], id[s], g)\n+ z <- matrix(unlist(z), nrow=length(idc), byrow=TRUE)\n+ yclusters <- clust(z, min(k, max(length(idc)-2,1)))\n+ names(yclusters) <- idc\n+ resi[[clus]] <- yclusters\n+ }\n+ res[[i]] <- resi\n+ }\n+ }\n+ structure(list(res=res, ns=table(ns), nomit=nomit, missfreq=missfreq,\n+ ncuts=ncuts, kn=kn, kxdist=kxdist, k=k, p=p,\n+ smooth=smooth, x=x, y=y, id=id),\n+ class='curveRep')\n+}\n+\n+print.curveRep <- functi"..b"idcol))\n+ function(x, y, subscripts, groups, type, ...) {\n+ groups <- as.factor(groups)[subscripts]\n+ textfun(subscripts, groups)\n+ for(g in levels(groups)) {\n+ idx <- groups == g\n+ xx <- x[idx]; yy <- y[idx]; ccols <- idcol[g]\n+ if (any(idx)) { \n+ switch(type, \n+ p = lpoints(xx, yy, col = ccols), \n+ l = llines(xx, yy, col = ccols), \n+ b = { lpoints(xx, yy, col = ccols) \n+ llines(xx, yy, col = ccols) }) \n+ } \n+ } \n+ } else function(x, y, subscripts, groups, ...) {\n+ panel.superpose(x, y, subscripts, groups, ...)\n+ textfun(subscripts, groups)\n+ }\n+ if(is.character(m))\n+ print(xYplot(Y ~ X | distribution*cluster,\n+ method='quantiles', probs=probs, nx=nx,\n+ xlab=xlab, ylab=ylab,\n+ xlim=xlim, ylim=ylim,\n+ main=nname, as.table=TRUE,\n+ panel=function(x, y, subscripts, ...) {\n+ if(length(subscripts)) {\n+ panel.xYplot(x, y, subscripts, ...)\n+ textfun(subscripts)\n+ }\n+ })) else\n+ print(xyplot(Y ~ X | distribution*cluster, groups=curve,\n+ xlab=xlab, ylab=ylab,\n+ xlim=xlim, ylim=ylim,\n+ type=if(nres[which]=='1')'b' else 'l',\n+ main=nname, panel=pan, as.table=TRUE))\n+ return(invisible())\n+ }\n+\n+ for(jn in which) {\n+ ngroup <- res[[jn]]\n+ for(jx in 1:length(ngroup)) {\n+ xgroup <- ngroup[[jx]]\n+ ids <- names(xgroup)\n+ for(jclus in 1:max(xgroup)) {\n+ rids <- ids[xgroup==jclus]\n+ nc <- length(rids)\n+ ids.in.cluster <- samp(rids)\n+ for(curve in 1:length(ids.in.cluster)) {\n+ s <- id %in% ids.in.cluster[curve]\n+ i <- order(x[s])\n+ type <- if(length(unique(x[s]))==1)'b' else 'l'\n+ if(curve==1) {\n+ plot(x[s][i], y[s][i], xlab=xlab, ylab=ylab,\n+ type='n', xlim=xlim, ylim=ylim)\n+ brack <- if(jn==nng) ']' else ')'\n+ z <- if(is.infinite(ncuts[jn+1])) ncuts[jn] else\n+ paste('[', ncuts[jn],',',ncuts[jn+1],brack,sep='')\n+ title(paste('n ', z, ' x=',jx,\n+ ' c=',jclus,' ',nc,' curves', sep=''), cex=.5)\n+ }\n+ lines(x[s][i], y[s][i], type=type,\n+ col=if(length(idcol))\n+ idcol[ids.in.cluster[curve]] else curve)\n+ }\n+ }\n+ if(fill && max(xgroup) < k)\n+ for(i in 1:(k - max(xgroup)))\n+ plot(0, 0, type='n', axes=FALSE, xlab='', ylab='')\n+ }\n+ }\n+}\n+\n+curveSmooth <- function(x, y, id, p=NULL, pr=TRUE) {\n+ omit <- is.na(x + y)\n+ if(any(omit)) {\n+ x <- x[!omit]; y <- y[!omit]; id <- id[!omit]\n+ }\n+ uid <- unique(id)\n+ m <- length(uid)\n+ pp <- length(p)\n+ if(pp) {\n+ X <- Y <- numeric(p*m)\n+ Id <- rep(id, length.out=p*m)\n+ }\n+ st <- 1\n+ en <- 0\n+ ncurve <- 0\n+ for(j in uid) {\n+ if(pr) {\n+ ncurve <- ncurve + 1\n+ if((ncurve %% 50) == 0) cat(ncurve,'')\n+ }\n+ s <- id==j\n+ xs <- x[s]\n+ ys <- y[s]\n+ if(length(unique(xs)) < 3) {\n+ if(pp) {\n+ en <- st + length(xs) - 1\n+ X[st:en] <- xs\n+ Y[st:en] <- ys\n+ Id[st:en] <- j\n+ }\n+ } else {\n+ if(pp) {\n+ uxs <- sort(unique(xs))\n+ xseq <- if(length(uxs) < p) uxs else\n+ seq(min(uxs), max(uxs), length.out=p)\n+ ye <- approx(clowess(xs, ys), xout=xseq)$y\n+ n <- length(xseq)\n+ en <- st + n - 1\n+ X[st:en] <- xseq\n+ Y[st:en] <- ye\n+ Id[st:en] <- j\n+ } else y[s] <- approx(clowess(xs, ys), xout=xs)$y\n+ }\n+ st <- en + 1\n+ }\n+ if(pr) cat('\\n')\n+ if(pp) {\n+ X <- X[1:en]\n+ Y <- Y[1:en]\n+ Id <- Id[1:en]\n+ list(x=X, y=Y, id=Id)\n+ } else list(x=x, y=y, id=id)\n+}\n" |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/cut2.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/cut2.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,135 @@ +# $Id$ +## Function like cut but left endpoints are inclusive and labels are of +## the form [lower, upper), except that last interval is [lower,upper]. +## F. Harrell 3 Dec 90, modified 7 Mar 92, mod 30May95 (more efficient digits) +## Modified 2Jun95 (preserve label attribute) +## Modified 16Jun95 (categories with 1 unique value -> label=value, not interval) +## Modified 1Jul95 - if specified cuts, mindif would cause improper +## categorization if a cut was close to but not equal an actual value + +cut2 <- function(x, cuts, m=150, g, levels.mean=FALSE, digits, minmax=TRUE, + oneval=TRUE, onlycuts=FALSE) +{ + method <- 1 ## 20may02 + x.unique <- sort(unique(c(x[!is.na(x)],if(!missing(cuts))cuts))) + min.dif <- min(diff(x.unique))/2 + min.dif.factor <- 1 + + ## Make formatted values look good + if(missing(digits)) + digits <- if(levels.mean) 5 else 3 + + oldopt <- options(digits=digits) + on.exit(options(oldopt)) + + xlab <- attr(x, 'label') + + if(missing(cuts)) { + nnm <- sum(!is.na(x)) + if(missing(g)) g <- max(1,floor(nnm/m)) + if(g < 1) + stop('g must be >=1, m must be positive') + + options(digits=15) + n <- table(x) + xx <- as.double(names(n)) + options(digits=digits) + cum <- cumsum(n) + m <- length(xx) + + y <- as.integer(ifelse(is.na(x),NA,1)) + labs <- character(g) + cuts <- approx(cum, xx, xout=(1:g)*nnm/g, + method='constant', rule=2, f=1)$y + cuts[length(cuts)] <- max(xx) + lower <- xx[1] + upper <- 1e45 + up <- low <- double(g) + i <- 0 + for(j in 1:g) { + cj <- if(method==1 || j==1) cuts[j] else { + if(i==0) + stop('program logic error') + s <- if(is.na(lower)) FALSE else xx >= lower + cum.used <- if(all(s)) 0 else max(cum[!s]) + if(j==m) max(xx) else if(sum(s)<2) max(xx) else + approx(cum[s]-cum.used, xx[s], xout=(nnm-cum.used)/(g-j+1), + method='constant', rule=2, f=1)$y + } + + if(cj==upper) next + + i <- i + 1 + upper <- cj + y[x >= (lower-min.dif.factor*min.dif)] <- i + low[i] <- lower + lower <- if(j==g) upper else min(xx[xx > upper]) + + if(is.na(lower)) lower <- upper + + up[i] <- lower + } + + low <- low[1:i] + up <- up[1:i] + variation <- logical(i) + for(ii in 1:i) { + r <- range(x[y==ii], na.rm=TRUE) + variation[ii] <- diff(r) > 0 + } + if(onlycuts) return(unique(c(low, max(xx)))) + flow <- format(low) + fup <- format(up) + bb <- c(rep(')',i-1),']') + labs <- ifelse(low==up | (oneval & !variation), flow, + paste('[',flow,',',fup,bb,sep='')) + ss <- y==0 & !is.na(y) + if(any(ss)) + stop(paste('categorization error in cut2. Values of x not appearing in any interval:\n', + paste(format(x[ss],digits=12),collapse=' '), + '\nLower endpoints:', + paste(format(low,digits=12), collapse=' '), + '\nUpper endpoints:', + paste(format(up,digits=12),collapse=' '))) + + y <- structure(y, class='factor', levels=labs) + } else { + if(minmax) { + r <- range(x, na.rm=TRUE) + if(r[1]<cuts[1]) cuts <- c(r[1], cuts) + if(r[2]>max(cuts)) cuts <- c(cuts, r[2]) + } + + l <- length(cuts) + k2 <- cuts-min.dif + k2[l] <- cuts[l] + y <- cut(x, k2) + + if(!levels.mean) { + brack <- rep(")",l-1) + brack[l-1] <- "]" + fmt <- format(cuts) + ## If any interval has only one unique value, set label for + ## that interval to that value and not to an interval + labs <- paste("[",fmt[1:(l-1)],",",fmt[2:l], + brack,sep="") + + if(oneval) { + nu <- table(cut(x.unique,k2)) + + if(length(nu)!=length(levels(y))) + stop('program logic error') + levels(y) <- ifelse(nu==1,c(fmt[1:(l-2)],fmt[l]),labs) + } else + levels(y) <- labs + } + } + + if(levels.mean) { + means <- tapply(x, y, function(w)mean(w,na.rm=TRUE)) + levels(y) <- format(means) + } + attr(y,'class') <- "factor" + if(length(xlab)) label(y) <- xlab + y +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/data.frame.labelled.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/data.frame.labelled.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,11 @@ +## For every object in a data frame that has a 'label' attribute, make it +## class 'labelled' + +data.frame.labelled <- function(object) +{ + for(n in names(object)) + if(length(attr(object[[n]],'label'))) + attr(object[[n]],'class') <- c('labelled',attr(object[[n]],'class')) + + object +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/dataRep.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/dataRep.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,212 @@ +dataRep <- function(formula, data, subset, na.action) +{ + call <- match.call() + nact <- NULL + y <- match.call(expand.dots=FALSE) + if(missing(na.action)) + y$na.action <- na.delete + + y[[1]] <- as.name("model.frame") + + X <- eval(y, sys.parent()) + nact <- attr(X,"na.action") + n <- nrow(X) + nam <- names(X) + p <- length(nam) + types <- character(p) + parms <- character(p) + pctl <- vector('list',p) + margfreq <- vector('list',p) + Xu <- vector('list',p) + for(j in 1:p) { + namj <- nam[j] + xj <- X[[j]] + if(is.character(xj)) + xj <- as.factor(xj) + + if(is.factor(xj)) { + parms[[j]] <- paste(levels(xj),collapse=' ') + types[j] <- 'exact categorical' + } else if(inherits(xj,'roundN')) { + atr <- attributes(xj) + nam[j] <- atr$name + types[j] <- 'round' + parms[j] <- paste('to nearest',format(atr$tolerance)) + if(length(w <- atr$clip)) + parms[j] <- paste(parms[j],', clipped to [', + paste(format(w),collapse=','),']',sep='') + + pctl[[j]] <- atr$percentiles + } else { + types[j] <- 'exact numeric' + parms[j] <- '' + pctl[[j]] <- quantile(xj, seq(0,1,by=.01)) + } + + margfreq[[j]] <- table(xj) + Xu[[j]] <- sort(unique(xj)) + X[[j]] <- xj + } + + names(types) <- names(parms) <- names(pctl) <- names(margfreq) <- + names(Xu) <- nam + + Xu <- expand.grid(Xu) + m <- nrow(Xu) + count <- integer(m) + for(i in 1:m) { + matches <- rep(TRUE,n) + for(j in 1:p) + matches <- matches & (as.character(X[[j]]) == + as.character(Xu[[j]][i])) + + count[i] <- sum(matches) + } + + if(any(count==0)) { + s <- count > 0 + Xu <- Xu[s,] + count <- count[s] + m <- sum(s) + } + + structure(list(call=call, formula=formula, n=n, names=nam, + types=types, parms=parms, margfreq=margfreq, + percentiles=pctl, X=Xu, count=count, na.action=nact), + class='dataRep') +} + +roundN <- function(x, tol=1, clip=NULL) +{ + pct <- quantile(x, seq(0,1,by=.01), na.rm=TRUE) + name <- deparse(substitute(x)) + lab <- attr(x, 'label') + if(!length(lab)) + lab <- name + + if(!missing(clip)) + x <- pmin(pmax(x,clip[1]),clip[2]) + + structure(as.single(tol*round(x/tol)), tolerance=tol, clip=clip, + percentiles=pct, name=name, label=lab, class='roundN') +} + +as.data.frame.roundN <- as.data.frame.vector + + +'[.roundN' <- function(x, i, ...) +{ + atr <- attributes(x) + x <- unclass(x)[i] + attributes(x) <- atr + x +} + + +print.dataRep <- function(x, long=FALSE, ...) +{ + cat("\n") + cat("Data Representativeness n=",x$n,"\n\n", sep='') + dput(x$call) + cat("\n") + if(length(z <- x$na.action)) + naprint(z) + + specs <- data.frame(Type=x$types, + Parameters=x$parms, + row.names=x$names) + + cat('Specifications for Matching\n\n') + print.data.frame(specs) + X <- x$X + if(long) { + X$Frequency <- x$count + cat('\nUnique Combinations of Descriptor Variables\n\n') + print.data.frame(X) + } else cat('\n',nrow(X), + 'unique combinations of variable values were found.\n\n') + invisible() +} + + +predict.dataRep <- function(object, newdata, ...) +{ + n <- object$n + count <- object$count + if(missing(newdata)) + return(count) + + pctl <- object$percentiles + margfreq <- object$margfreq + p <- length(margfreq) + m <- nrow(newdata) + nam <- object$names + types <- object$types + X <- object$X + + ##Xn <- if(length(model.frame.default$Des)) 3Aug02 + ## model.frame(object$formula, newdata, na.action=na.keep, Des=FALSE) else + Xn <- model.frame(object$formula, newdata, na.action=na.keep) + names(Xn) <- nam + + worst.margfreq <- rep(1e8, m) + pct <- matrix(NA, m, p, dimnames=list(row.names(Xn),nam)) + for(j in 1:p) { + xj <- Xn[[j]] + freq <- margfreq[[nam[j]]][as.character(xj)] + freq[is.na(freq)] <- 0 + pct[,j] <- if(types[j]=='exact categorical') + 100*freq/n + else + approx(pctl[[nam[j]]], seq(0,100,by=1), + xout=newdata[[nam[j]]], rule=2)$y + + worst.margfreq <- pmin(worst.margfreq, freq) + } + + cnt <- integer(m) + for(i in 1:m) { + matches <- rep(TRUE,nrow(X)) + for(j in 1:p) { + matches <- matches & (as.character(X[[j]]) == as.character(Xn[[j]][i])) + } + + s <- sum(matches) + if(s > 1) + warning('more than one match to original data combinations') + + cnt[i] <- if(s) + count[matches] + else + 0 + } + + if(any(cnt > worst.margfreq)) + warning('program logic error') + + structure(list(count=cnt, percentiles=pct, worst.margfreq=worst.margfreq, + newdata=newdata), class='predict.dataRep') +} + +print.predict.dataRep <- function(x, prdata=TRUE, prpct=TRUE, ...) +{ + if(prdata) { + dat <- x$newdata + dat$Frequency <- x$count + dat$Marginal.Freq <- x$worst.margfreq + cat('\nDescriptor Variable Values, Estimated Frequency in Original Dataset,\nand Minimum Marginal Frequency for any Variable\n\n') + print.data.frame(dat) + } else { + cat('\nFrequency in Original Dataset\n\n') + print(x$count) + cat('\nMinimum Marginal Frequency for any Variable\n\n') + print(x$worst.margfreq) + } + + if(prpct) { + cat('\n\nPercentiles for Continuous Descriptor Variables,\nPercentage in Category for Categorical Variables\n\n') + print(round(x$percentiles)) + } + + invisible() +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/dates.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/dates.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,137 @@ +yearDays <- function(time) { + time <- as.POSIXlt(time) + + time$mon[] <- time$mday[] <- time$sec[] <- time$min <- time$hour <- 0 + time$year <- time$year + 1 + + return(as.POSIXlt(as.POSIXct(time))$yday) +} + +monthDays <- function(time) { + time <- as.POSIXlt(time) + time$mday[] <- time$sec[] <- time$min <- time$hour <- 0 + time$mon <- time$mon + 1 + + return(as.POSIXlt(as.POSIXct(time))$mday) +} + +round.POSIXt <- function(x, digits=c("secs", "mins", "hours", "days", "months", "years")) + { + ## this gets the default from the generic, as that has two args. + if(is.numeric(digits) && digits == 0.0) digits <-"secs" + units <- match.arg(digits) + + month.length <- monthDays(x) + x <- as.POSIXlt(x) + + if(length(x$sec) > 0) + switch(units, + "secs" = {x$sec <- x$sec + 0.5}, + "mins" = {x$sec <- x$sec + 30}, + "hours" = {x$sec <- 0; x$min <- x$min + 30}, + "days" = {x$sec <- 0; x$min <- 0; x$hour <- x$hour + 12 + isdst <- x$isdst <- -1}, + "months" = {x$sec <- 0; x$min <- 0; x$hour <- 0; + x$mday <- x$mday + trunc(monthDays(x)/2); + isdst <- x$isdst <- -1}, + "years" = {x$sec <- 0; x$min <- 0; x$hour <- 0; + x$mday <- 0; x$mon <- x$mon + 6; + isdst <- x$isdst <- -1} + ) + + return(trunc(as.POSIXct(x), units=units)) + } + +trunc.POSIXt <- function(x, units=c("secs", "mins", "hours", "days", "months", "years"), ...) { + units <- match.arg(units) + + x <- as.POSIXlt(x) + + isdst <- x$isdst + if(length(x$sec) > 0) + switch(units, + "secs" = {x$sec <- trunc(x$sec)}, + "mins" = {x$sec <- 0}, + "hours"= {x$sec <- 0; x$min <- 0}, + "days" = {x$sec <- 0; x$min <- 0; x$hour <- 0; isdst <- x$isdst <- -1}, + "months" = { + x$sec <- 0 + x$min <- 0 + x$hour <- 0 + x$mday <- 1 + isdst <- x$isdst <- -1 + }, + "years" = { + x$sec <- 0 + x$min <- 0 + x$hour <- 0 + x$mday <- 1 + x$mon <- 0 + isdst <- x$isdst <- -1 + } + ) + + x <- as.POSIXlt(as.POSIXct(x)) + if(isdst == -1) { + x$isdst <- -1 + } + return(x) + } + +ceil <- function(x, units, ...) { + UseMethod('ceil', x) +} + +ceil.default <- function(x, units, ...) { + ceiling(x) +} + +ceil.POSIXt <- function(x, units=c("secs", "mins", "hours", "days", "months", "years"), ...) { + units <- match.arg(units) + + x <- as.POSIXlt(x) + + isdst <- x$isdst + if(length(x$sec) > 0 && x != trunc.POSIXt(x, units=units)) { + switch(units, + "secs" = { + x$sec <- ceiling(x$sec) + }, + "mins" = { + x$sec <- 0 + x$min <- x$min + 1 + }, + "hours"= {x$sec <- 0; x$min <- 0; x$hour <- x$hour + 1}, + "days" = { + x$sec <- 0 + x$min <- 0 + x$hour <- 0 + x$mday <- x$mday + 1 + isdst <- x$isdst <- -1 + }, + "months" = { + x$sec <- 0 + x$min <- 0 + x$hour <- 0 + x$mday <- 1 + x$mon <- x$mon + 1 + isdst <- x$isdst <- -1 + }, + "years" = { + x$sec <- 0 + x$min <- 0 + x$hour <- 0 + x$mday <- 1 + x$mon <- 0 + x$year <- x$year + 1 + isdst <- x$isdst <- -1 + } + ) + + x <- as.POSIXlt(as.POSIXct(x)) + if(isdst == -1) { + x$isdst <- -1 + } + } + return(x) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/deff.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/deff.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,28 @@ +deff <- function(y, cluster) +{ + ss <- function(x) + { + n <- length(x) + xbar <- sum(x) / n + sum((x - xbar)^2) + } + + if(!is.factor(cluster)) cluster <- as.factor(cluster) + + cluster <- unclass(cluster) + s <- !is.na(cluster + y) + y <- y[s] + cluster <- as.integer(cluster[s]) + n <- length(y) + sst <- ss(y) + sses <- tapply(y,cluster,ss) + k <- length(sses) + R2 <- 1 - sum(sses) / sst + Fstat <- R2 * (n - k) / (1 - R2) / k + g <- (Fstat - 1.) * k / n + rho <- if(R2 == 1.) 1. else g / (1. + g) + ng <- table(cluster) + B <- sum(ng^2) / n + deff <- 1 + (B - 1) * rho + c(n=n, clusters=k, rho=rho, deff=deff) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/describe.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/describe.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,1047 @@\n+describe <- function(x, ...) UseMethod("describe")\n+describe.default <- function(x, descript, ...) {\n+ if(missing(descript)) {\n+ descript <- deparse(substitute(x))\n+ }\n+ \n+ if(is.matrix(x)) {\n+ describe.matrix(x, descript, ...)\n+ } else {\n+ describe.vector(x, descript, ...)\n+ }\n+}\n+\n+\n+describe.vector <- function(x, descript, exclude.missing=TRUE, digits=4,\n+ listunique=0, listnchar=12,\n+ weights=NULL, normwt=FALSE, minlength=NULL, ...)\n+{\n+ oldopt <- options(digits=digits)\n+ on.exit(options(oldopt))\n+ \n+ if(! length(weights)) weights <- rep(1,length(x))\n+ \n+ special.codes <- attr(x, "special.miss")$codes\n+ labx <- attr(x,"label")\n+ \n+ if(missing(descript)) descript <- as.character(sys.call())[2]\n+\n+ if(length(labx) && labx!=descript) descript <- paste(descript,":",labx)\n+\n+ un <- attr(x, "units")\n+ if(length(un) && un == \'\') un <- NULL\n+\n+ fmt <- attr(x, \'format\')\n+ if(length(fmt) && (is.function(fmt) || fmt == \'\')) fmt <- NULL\n+ \n+ if(length(fmt) > 1)\n+ fmt <- paste(as.character(fmt[[1]]), as.character(fmt[[2]]))\n+ \n+ present <- if(all(is.na(x))) rep(FALSE, length(x))\n+ else if(is.character(x)) x != "" & x != " " & ! is.na(x)\n+ else ! is.na(x)\n+ \n+ present <- present & ! is.na(weights)\n+ \n+ if(length(weights) != length(x))\n+ stop(\'length of weights must equal length of x\')\n+\n+ if(normwt) {\n+ weights <- sum(present) * weights / sum(weights[present])\n+ n <- sum(present)\n+ } else n <- sum(weights[present])\n+ \n+ if(exclude.missing && n==0)\n+ return(structure(NULL, class="describe"))\n+ \n+ missing <- sum(weights[!present], na.rm=TRUE)\n+ atx <- attributes(x)\n+ atx$names <- atx$dimnames <- atx$dim <- atx$special.miss <- NULL \n+ \n+ atx$class <- atx$class[atx$class!=\'special.miss\']\n+ \n+ isdot <- testDateTime(x,\'either\') # is date or time var\n+ isdat <- testDateTime(x,\'both\') # is date and time combo var\n+\n+ x <- x[present, drop=FALSE]\n+ x.unique <- sort(unique(x))\n+ weights <- weights[present]\n+\n+ n.unique <- length(x.unique)\n+ attributes(x) <- attributes(x.unique) <- atx\n+\n+ isnum <- (is.numeric(x) || isdat) && !is.factor(x)\n+ timeUsed <- isdat && testDateTime(x.unique, \'timeVaries\')\n+\n+ z <- list(descript=descript, units=un, format=fmt)\n+\n+ counts <- c(n,missing)\n+ lab <- c("n","missing")\n+\n+ if(length(special.codes)) {\n+ tabsc <- table(special.codes)\n+ counts <- c(counts, tabsc)\n+ lab <- c(lab, names(tabsc))\n+ }\n+ \n+ if(length(atx$imputed)) {\n+ counts <- c(counts, length(atx$imputed))\n+ lab <- c(lab, "imputed")\n+ }\n+ \n+ if(length(pd <- atx$partial.date)) {\n+ if((nn <- length(pd$month))>0) {\n+ counts <- c(counts, nn)\n+ lab <- c(lab, "missing month")\n+ }\n+ \n+ if((nn <- length(pd$day)) > 0) {\n+ counts <- c(counts, nn)\n+ lab <- c(lab,"missing day")\n+ }\n+ \n+ if((nn <- length(pd$both)) > 0) {\n+ counts <- c(counts, nn)\n+ lab <- c(lab,"missing month,day")\n+ }\n+ }\n+\n+ if(length(atx$substi.source)) {\n+ tabss <- table(atx$substi.source)\n+ counts <- c(counts, tabss)\n+ lab <- c(lab, names(tabss))\n+ }\n+\n+ counts <- c(counts, n.unique)\n+ lab <- c(lab, "unique")\n+\n+ if(isnum) {\n+ xnum <- unclass(x)\n+ if(n.unique < 2) reff <- 0\n+ else {\n+ fp <- wtd.table(xnum, weights, normwt=FALSE, na.rm=FALSE, type=\'table\') /\n+ sum(weights)\n+ reff <- (1 - sum(fp ^ 3)) / (1 - 1 / n / n)\n+ }\n+ counts <- c(counts, round(reff, 2))\n+ lab <- c(lab, \'Info\')\n+ }\n+ \n+ x.binary <- n.unique == 2 && isnum && x.unique[1] == 0 && x.unique[2] == 1\n+ if(x.binary) {\n+ counts <- c(counts, sum(weights[x == 1]))\n+ lab <- c(lab, "Sum")\n+ }\n+ \n+ if(isnum) {\n+ if(isdot) {\n+ dd <- sum(weights * xnum) /sum(weights)\n+ fval <- formatDateTime(dd, atx, ! timeUsed)\n+ counts <- c(counts, fval)\n+ } else counts <- c(counts, format(sum(weights * x) / sum(weights), ...))\n+ \n+ '..b'usingLevels))\n+ if(L[[k]] == nami) w <- c(w, Lnames[k])\n+ cat(\'<a name="levels.\',nami,\'"><h3>\',\n+ paste(w, collapse=\', \'), \'</h3>\\n\', sep=\'\', \n+ file=file, append=TRUE)\n+ cat(\'<ul>\\n\', file=file, append=TRUE)\n+ if(length(l) > maxlevels) l <- c(l[1 : maxlevels], \'...\')\n+ for(k in l) cat(\'<li>\', k, \'</li>\\n\', sep=\'\',\n+ file=file, append=TRUE)\n+ cat(\'</ul>\\n\', file=file, append=TRUE)\n+ }\n+ }\n+ else { \n+ ## Function to split a character vector x as evenly as\n+ ## possible into n elements, pasting multiple elements\n+ ## together when needed\n+ evenSplit <- function(x, n) {\n+ indent <- function(z) if(length(z) == 1) z else\n+ c(z[1], paste(\'   \', z[-1], sep=\'\'))\n+ m <- length(x)\n+ if(m <= n) return(c(indent(x), rep(\'\',n-m)))\n+ totalLength <- sum(nchar(x)) + (m-1)*3.5\n+ ## add indent, comma, space\n+ lineLength <- ceiling(totalLength/n)\n+ y <- pasteFit(x, sep=\', \', width=lineLength)\n+ m <- length(y)\n+ if(m > n) for(j in 1:10) {\n+ lineLength <- round(lineLength*1.1)\n+ y <- pasteFit(x, sep=\', \', width=lineLength)\n+ m <- length(y)\n+ if(m <= n) break\n+ }\n+ ## Take evasive action if needed\n+ if(m == n) indent(y) else if(m < n)\n+ c(indent(y), rep(\'\', n - m)) else \n+ c(paste(x, collapse=\', \'), rep(\'\', n - 1))\n+ }\n+ nam <- names(L)\n+ v <- lab <- lev <- character(0)\n+ j <- 0\n+ for(i in fullLevels) {\n+ j <- j + 1\n+ l <- L[[i]]\n+ if(length(l) > maxlevels) l <- c(l[1 : maxlevels], \'...\')\n+ nami <- nam[i]\n+ v <- c(v, nami)\n+ w <- nami\n+ if(sum(reusingLevels))\n+ for(k in which(reusingLevels)) if(L[[k]] == nam[i]) w <- c(w, nam[k])\n+ lab <- c(lab, evenSplit(w, length(l)))\n+ lev <- c(lev, l)\n+ }\n+ z <- cbind(Variable=lab, Levels=lev)\n+ out <- html(z, file=file, append=TRUE,\n+ link=ifelse(lab==\'\',\'\',paste(\'levels\',v,sep=\'.\')),\n+ linkCol=\'Variable\', linkType=\'name\', ...)\n+ cat(\'<hr>\\n\',file=file,append=TRUE)\n+ }\n+ }\n+ \n+ i <- longlab != \'\'\n+ if(any(i)) {\n+ nam <- names(longlab)[i]\n+ names(longlab) <- NULL\n+ lab <- paste(\'longlab\', nam, sep=\'.\')\n+ z <- cbind(Variable=nam, \'Long Label\'=longlab[i])\n+ out <- html(z, file=file, append=TRUE,\n+ link=lab, linkCol=\'Variable\', linkType=\'name\', ...)\n+ cat(\'<hr>\\n\', file=file, append=TRUE)\n+ }\n+ out\n+}\n+\n+\n+contents.list <- function(object, dslabels=NULL, ...) {\n+ nam <- names(object)\n+ if(length(dslabels)) {\n+ dslabels <- dslabels[nam]\n+ names(dslabels) <- NULL\n+ }\n+ \n+ g <- function(w) {\n+ if(length(w)==0 || is.null(w))\n+ c(Obs=0, Var=if(is.null(w))\n+ NA\n+ else\n+ length(w),\n+ Var.NA=NA)\n+ else\n+ c(Obs=length(w[[1]]), Var=length(w),\n+ Var.NA=sum(sapply(w, function(x) sum(is.present(x))==0)))\n+ }\n+ \n+ v <- t(sapply(object, g))\n+ structure(list(contents=if(length(dslabels))\n+ data.frame(Label=dslabels,Obs=v[,\'Obs\'],\n+ Var=v[,\'Var\'],Var.NA=v[,\'Var.NA\'],\n+ row.names=nam)\n+ else\n+ data.frame(Obs=v[,\'Obs\'],Var=v[,\'Var\'],\n+ Var.NA=v[,\'Var.NA\'], row.names=nam)),\n+ class=\'contents.list\')\n+}\n+\n+\n+print.contents.list <-\n+ function(x, sort=c(\'none\',\'names\',\'labels\',\'NAs\',\'vars\'), ...)\n+{\n+ sort <- match.arg(sort)\n+ cont <- x$contents\n+ nam <- row.names(cont)\n+\n+ cont <- cont[\n+ switch(sort,\n+ none=1:length(nam),\n+ names=order(nam),\n+ vars=order(cont$Var),\n+ labels=order(cont$Label, nam),\n+ NAs=order(cont$Var.NA,nam)),]\n+ \n+ print(cont)\n+ invisible()\n+}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/discrete.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/discrete.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,79 @@ +discrete <- function(x, levels=sort(unique.default(x), na.last=TRUE), + exclude = NA) { + if(!is.numeric(x)) { + stop('x must be a numeric vairable') + } + + exclude <- as.vector(exclude, typeof(x)) + levels <- levels[is.na(match(levels, exclude))] + + f <- x[!(x %in% exclude)] + + attr(f, 'levels') <- levels + class(f) <- "discrete" + f +} + +as.discrete <- function(x, ...) UseMethod("as.discrete") + +as.discrete.default <- function(x, ...) { + if(is.discrete(x)) x else discrete(x) +} + +is.discrete <- function(x) inherits(x, 'discrete') + +"[.discrete" <- function(x, ..., drop=FALSE) { + y <- NextMethod("[") + + attr(y, 'levels') <- attr(x, 'levels') + class(y) <- class(x) + if( drop ) { + factor(y) + } else { + y + } +} + +"[<-.discrete" <- function(x, ..., value) { + lx <- levels(x) + cx <- class(x) + + m <- match(value, lx) + + if (any(is.na(m) & !is.na(value))) { + warning("invalid factor level, NAs generated") + } + + class(x) <- NULL + x[...] <- m + + attr(x,"levels") <- lx + class(x) <- cx + x +} + +"[[.discrete" <- function(x, i) +{ + y <- NextMethod("[[") + + attr(y,"levels")<-attr(x,"levels") + class(y) <- class(x) + y +} + +"is.na<-.discrete" <- function(x, value) +{ + lx <- levels(x) + cx <- class(x) + class(x) <- NULL + x[value] <- NA + structure(x, levels = lx, class = cx) +} + +"length<-.discrete" <- function(x, value) +{ + cl <- class(x) + levs <- levels(x) + x <- NextMethod() + structure(x, levels=levs, class=cl) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/dotchart3.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/dotchart3.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,192 @@ +dotchart3 <- + function (x, labels = NULL, groups = NULL, gdata = NULL, cex = par("cex"), + pch = 21, gpch = pch, bg = par("bg"), color = par("fg"), + gcolor = par("fg"), lcolor = "gray", + xlim = range(c(x, gdata), na.rm=TRUE), main = NULL, + xlab = NULL, ylab = NULL, auxdata=NULL, auxtitle=NULL, + auxgdata=NULL, axisat=NULL, axislabels=NULL, + cex.labels = cex, cex.group.labels = cex.labels*1.25, + cex.auxdata = cex, groupfont=2, ...) +{ + opar <- par("mai", "mar", "cex", "yaxs") + on.exit(par(opar)) + par(cex = cex, yaxs = "i") + if (!is.numeric(x)) + stop("'x' must be a numeric vector or matrix") + x <- as.matrix(x) + n <- nrow(x) + nc <- ncol(x) + pch <- rep(pch, length=nc) + + if(!length(labels)) labels <- rownames(x) + if(!length(labels)) stop('labels not defined') + if(length(groups)) groups <- as.factor(groups) + glabels <- levels(groups) + + plot.new() + linch <- max(strwidth(labels, "inch"), na.rm = TRUE) + if (!length(glabels)) { + ginch <- 0 + goffset <- 0 + } + else { + ginch <- max(strwidth(glabels, "inch", cex=cex.group.labels, + font=groupfont), + na.rm = TRUE) + goffset <- 0.4 + } + if(length(labels) + length(glabels) > 0) { + nmai <- par("mai") + nmai[2L] <- max(nmai[2L], nmai[4L] + max(linch + goffset, ginch) + 0.1) + ## Run strwidth separately because on of the 3 variables might + ## be an expression, in which case an overall c(...) would combine the + ## widths of character vectors + if(length(auxdata) + length(auxgdata) > 0) + nmai[4L] <- .2 + 1.1 * max(strwidth(auxtitle, 'inch', cex=cex.auxdata), + strwidth(auxdata, 'inch', cex=cex.auxdata), + strwidth(auxgdata, 'inch', cex=cex.auxdata)) + par(mai = nmai) + } + if (!length(groups)) { + o <- n:1L + y <- o + ylim <- c(.5, n + .5) + x <- x[o, , drop=FALSE] + labels <- labels[o] + if(length(auxdata)) auxdata <- auxdata[o] + } + else { + # Added: For each group reverse order of data so plotting will + # put first levels at top + o <- sort.list(as.numeric(groups), decreasing = TRUE) + groups <- groups[o] +# for(g in levels(groups)) { +# i <- groups == g +# o[i] <- rev(o[i]) +# } + x <- x[o, , drop=FALSE] # ascending within region + labels <- labels[o] + if(length(auxdata)) auxdata <- auxdata[o] + # End added + # groups <- groups[o] (put earlier) + color <- rep(color, length.out = length(groups))[o] + lcolor <- rep(lcolor, length.out = length(groups))[o] + offset <- cumsum(c(0, diff(as.numeric(groups)) != 0)) + y <- 1L:n + 2 * offset + ylim <- range(0.5, y + 1.5) # range(0, y + 2) + } + + plot.window(xlim = xlim, ylim = ylim, log = "") + lheight <- par("csi") + if(length(labels)) { + linch <- max(strwidth(labels, "inch", cex=cex.labels), na.rm = TRUE) + loffset <- (linch + 0.1) / lheight + # was line=loffset + mtext(labels, side = 2, line = .1*loffset, at = y, adj = 1, + col = color, las = 2, cex = cex.labels, ...) + } + abline(h = y, lty = "dotted", col = lcolor) + if(length(auxtitle)) { + upedge <- par('usr')[4] + outerText(auxtitle, + upedge + strheight(auxtitle, cex=cex) / 2, + cex=cex) + } + gpos <- if(length(groups)) + rev(cumsum(rev(tapply(groups, groups, length)) + 2) - 1) + if(length(auxdata) + length(auxgdata) > 0) + outerText(c(auxdata, auxgdata), c(y, if(length(auxgdata)) gpos), + cex=cex.auxdata) + + for(i in 1:nc) + points(x[,i], y, pch = pch[i], col = color, bg = bg) + + if(length(groups)) { + ginch <- max(strwidth(glabels, "inch", font=groupfont, + cex=cex.group.labels), + na.rm = TRUE) + goffset <- (max(linch + 0.2, ginch, na.rm = TRUE) + 0.1)/lheight + mtext(glabels, side = 2, line = .2, at = gpos, adj = 1, # was adj=0 + col = gcolor, las = 2, cex = cex.group.labels, font=groupfont, ...) + if (length(gdata)) { + abline(h = gpos, lty = "dotted") + if(is.matrix(gdata)) + for(j in 1:ncol(gdata)) + points(gdata[, j], gpos, pch=gpch[j], col=gcolor, bg=bg, ...) + else + points(gdata, gpos, pch = gpch, col = gcolor, bg = bg, + ...) + } + } + if(length(axisat)) axis(1, at=axisat, labels=axislabels) + else + axis(1) + box() + title(main = main, xlab = xlab, ylab = ylab, ...) + invisible() +} + +summaryD <- function(formula, data=NULL, fun=mean, funm=fun, + groupsummary=TRUE, auxvar=NULL, auxtitle='', + vals=length(auxvar) > 0, fmtvals=format, + cex.auxdata=.7, xlab=v[1], ylab=NULL, + gridevery=NULL, gridcol=gray(.95), sort=TRUE, ...) { + if(!missing(fmtvals)) vals <- TRUE + if(!length(data)) data <- environment(formula) + else data <- list2env(data, parent=environment(formula)) + if(length(auxvar) && is.character(auxvar) && missing(auxtitle)) + auxtitle <- auxvar + v <- all.vars(formula) + m <- length(v) - 1 + yn <- v[1]; xn <- v[-1] + two <- length(xn) == 2 + y <- get(yn, envir=data) + x1 <- get(xn[1], envir=data) + x2 <- if(two) get(xn[2], envir=data) + + s <- summarize(y, if(two) llist(x1, x2) else llist(x1), fun, type='matrix') + if(sort) s <- s[order(if(is.matrix(s$y)) s$y[, 1, drop=FALSE] else s$y), ] + + auxd <- function(z) { + sy <- z$y + if(length(auxvar)) { + if(!is.matrix(sy)) + stop('auxvar is only used when fun returns > 1 statistic') + f <- if(vals) fmtvals(sy[, auxvar]) + sy <- if(is.numeric(auxvar)) sy[, -auxvar, drop=FALSE] + else + sy[, setdiff(colnames(sy), auxvar), drop=FALSE] + } + else + f <- if(vals) fmtvals(if(is.matrix(sy)) sy[, 1] else sy) + list(sy=sy, fval=f) # sy = remaining y, fval = formatted auxvar + } + + z <- auxd(s) + if(two) { + if(groupsummary) { + s2 <- summarize(y, llist(x1), funm, type='matrix') + z2 <- auxd(s2) + } + z <- auxd(s) + dotchart3(z$sy, s$x2, groups=s$x1, + auxdata=z$fval, auxtitle=if(vals) auxtitle, + cex.auxdata=cex.auxdata, + gdata =if(groupsummary) z2$sy, + auxgdata=if(groupsummary) z2$fval, + xlab=xlab, ylab=ylab, ...) + } + else + dotchart3(z$sy, s$x1, auxdata=z$fval, + auxtitle=if(vals) auxtitle, + cex.auxdata=cex.auxdata, xlab=xlab, ylab=ylab, ...) + + if(length(gridevery)) { + xmin <- par('usr')[1] + xmin <- ceiling(xmin/gridevery)*gridevery + xmax <- if(length(xn) == 1) max(s$y, na.rm=TRUE) + else + max(c(s$y, s2$y), na.rm=TRUE) + abline(v=seq(xmin, xmax, by=gridevery), col=gridcol) + } +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/ecdf.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/ecdf.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,442 @@\n+Ecdf <- function(x, ...) UseMethod(\'Ecdf\')\n+\n+\n+Ecdf.default <- function(x, what=c(\'F\',\'1-F\',\'f\',\'1-f\'), \n+ weights=rep(1, length(x)), normwt=FALSE,\n+ xlab, ylab, q, pl=TRUE, add=FALSE, lty=1,\n+ col=1, group=rep(1, length(x)), \n+ label.curves=TRUE, xlim, subtitles=TRUE, \n+ datadensity=c(\'none\',\'rug\',\'hist\',\'density\'), \n+ side=1, \n+ frac=switch(datadensity,\n+ none=NA,rug=.03,hist=.1,density=.1),\n+ dens.opts=NULL, lwd=1, log=\'\', ...)\n+{\n+ datadensity <- match.arg(datadensity)\n+ what <- match.arg(what)\n+ colspec <- FALSE\n+ if(datadensity != \'none\') {\n+ if(side %in% c(2,4))\n+ stop(\'side must be 1 or 3 when datadensity is specified\')\n+ \n+ if(\'frac\' %nin% names(dens.opts))\n+ dens.opts$frac <- frac\n+ \n+ if(\'side\' %nin% names(dens.opts))\n+ dens.opts$side <- side\n+ \n+ if(\'col\' %in% names(dens.opts))\n+ colspec <- TRUE\n+ }\n+ \n+ if(missing(xlab))\n+ xlab <- label(x, units=TRUE, plot=TRUE, default=deparse(substitute(x)))\n+ \n+ what <- match.arg(what)\n+ if(missing(ylab)) ylab <- switch(what,\n+ \'F\'=\'Proportion <= x\',\n+ \'1-F\'=\'Proportion > x\',\n+ \'f\'=\'Frequency <= x\')\n+ \n+ group <- as.factor(group)\n+ group <- group[drop=TRUE]\n+ if(length(x) != length(group))\n+ stop(\'length of x != length of group\')\n+\n+ nna <- !(is.na(x) | is.na(group) | is.na(weights))\n+ \n+ X <- x[nna]\n+ group <- group[nna]\n+\n+ lev <- levels(group)\n+ nlev <- length(lev)\n+ curves <- vector(\'list\',nlev)\n+ names(curves) <- lev\n+\n+ lty <- rep(lty, length=nlev)\n+ col <- rep(col, length=nlev)\n+ lwd <- rep(lwd, length=nlev)\n+\n+ if(missing(xlim)) xlim <- range(X)\n+\n+ n <- if(normwt) length(X) else sum(weights[nna])\n+ \n+ m <- (if(normwt) length(nna) else sum(weights, na.rm=TRUE)) - n\n+ \n+ weights <- weights[nna]\n+\n+ for(i in 1:nlev) {\n+ s <- group == lev[i]\n+ x <- X[s]\n+ wt <- weights[s]\n+ xorig <- x\n+ \n+ z <- wtd.Ecdf(x, wt, type=\'i/n\', normwt=normwt, na.rm=FALSE)\n+ x <- z$x; y <- z$ecdf\n+ switch(what,\n+ \'1-F\' = {y <- 1 - y},\n+ \'f\' = {y <- y * sum(wt)},\n+ \'1-f\' = {x <- x[-1]\n+ y <- as.vector((1 - y[- length(y)]) * sum(wt)) } )\n+ \n+ if(pl) {\n+ if(i==1 && !add)\n+ plot(x, y, xlab=xlab, ylab=ylab, xlim=xlim, type=\'n\', log=log, ...)\n+ \n+ lines(x, y, type="s", lty=lty[i], col=col[i], lwd=lwd[i])\n+ if(subtitles && i == 1) {\n+ pm <- paste("n:", n, " m:", m, sep="")\n+ title(sub=pm, adj=0, cex=.5)\n+ }\n+ \n+ if(!missing(q)) {\n+ if(what == \'1-f\') stop(\'what="1-f" not yet implemented with q\')\n+ if(what==\'f\') q <- q * y[length(y)]\n+ else\n+ if(what == \'1-F\') q <- 1 - q\n+ q <- switch(what,\n+ \'f\' = q * sum(wt),\n+ \'1-F\' = 1 - q,\n+ \'F\' = q)\n+ \n+ a <- par("usr")\n+ for(w in q) {\n+ quant <-\n+ if(what==\'1-F\') min(x[y <= w]) else min(x[y >= w])\n+ \n+ lines(c(a[1], quant), c(w, w), lty=2, col=1)\n+ lines(c(quant, quant), c(w, a[3]), lty=2, col=col[i])\n+ }\n+ }\n+ }\n+ \n+ curves[[i]] <- list(x=x, y=y)\n+ if(datadensity!=\'none\') {\n+ if(!colspec)\n+ dens.opts$col <- col[i]\n+ \n+ do.call(switch(datadensity, \n+ rug = \'scat1d\',\n+ hist = \'histSpike\',\n+ density= \'histSpike\'),\n+ c(list(x=xorig, add=TRUE),\n+ if(datadensity==\'density\') list(type=\'density\'),\n+ dens.opts))\n+ }\n+ }\n+\n+ if(nlev > 1 && (is.list(label.curves) || label.curves))\n+ l'..b'ups)\n+ curves <- list()\n+ \n+ for(i in 1:length(lev)) {\n+ which <- N[groups == i]\n+ ## sort in x\n+ j <- which # no sorting\n+ if(any(j)) {\n+ z <- wtd.Ecdf(x[j], type=ecdf.type, na.rm=FALSE)\n+ zx <- z$x\n+ y <- z$ecdf\n+ switch(what,\n+ \'1-F\' = {y <- 1 - y},\n+ \'f\' = {y <- y * length(x[j])},\n+ \'1-f\' = {zx <- zx[-1]\n+ y <- as.vector((1 - y[- length(y)]) * length(x[j])) } )\n+\n+ \n+ do.call(\'llines\',\n+ list(zx, fun(y),\n+ col = col[i], lwd = lwd[i], lty = lty[i], \n+ type = type, ...))\n+ if(length(q)) qrefs(x[j], q, col[i], fun=fun, llines=llines,\n+ grid=TRUE)\n+ curves[[lev[i]]] <- list(x=zx, y=fun(y))\n+ }\n+ }\n+ \n+ curves\n+ }\n+ \n+ lty <- rep(lty, length = ng)\n+ lwd <- rep(lwd, length = ng)\n+ pch <- rep(pch, length = ng)\n+ cex <- rep(cex, length = ng)\n+ font <- rep(font,length = ng)\n+ if(!length(col)) col <- plot.line$col\n+\n+ col <- rep(col, length = ng)\n+\n+ if(ng > 1) {\n+ levnum <- sort(unique(g))\n+ curves <- pspanel(x, subscripts, groups,\n+ lwd=lwd, lty=lty, pch=pch, cex=cex, \n+ font=font, col=col, type=type, q=q, qrefs=qrefs, \n+ ecdf.type=method, fun=fun, what=what, llines=llines)\n+ if(!(is.logical(label.curves) && !label.curves)) {\n+ lc <-\n+ if(is.logical(label.curves))\n+ list(lwd=lwd, cex=cex[1])\n+ else\n+ c(list(lwd=lwd, cex=cex[1]), label.curves)\n+ \n+ labcurve(curves, lty=lty[levnum], lwd=lwd[levnum], col.=col[levnum], \n+ opts=lc, grid=TRUE, ...)\n+ }\n+ }\n+ else ppanel(x, lwd=lwd, lty=lty, pch=pch, cex=cex, \n+ font=font, col=col, type=type, q=q, qrefs=qrefs, \n+ ecdf.type=method, fun=fun, what=what, llines=llines, ...)\n+\n+ if(ng > 1) { ##set up for key() if points plotted\n+ .Key <- function(x=0, y=1, lev, col, lty, lwd, ...)\n+ {\n+ oldpar <- par(usr=c(0,1,0,1),xpd=NA)\n+ \n+ ## Even though par(\'usr\') shows 0,1,0,1 after lattice draws\n+ ## its plot, it still needs resetting\n+ on.exit(par(oldpar))\n+ if(is.list(x))\n+ {\n+ y <- x[[2]]; x <- x[[1]]\n+ }\n+ \n+ if(!length(x)) x <- 0\n+ if(!length(y)) y <- 1 ## because of formals()\n+ rlegend(x, y, legend=lev, lty=lty, lwd=lwd, col=col)\n+ invisible()\n+ }\n+ \n+ \n+ formals(.Key) <- list(x=NULL, y=NULL, lev=levels(groups), col=col,\n+ lty=lty, lwd=lwd,...=NULL)\n+ .setKey(.Key)\n+ }\n+}\n+\n+\n+Ecdf.formula <- function(x, data = sys.frame(sys.parent()), \n+ groups = NULL, \n+ prepanel=prepanel.Ecdf, panel=panel.Ecdf, ..., \n+ xlab, ylab, fun=function(x)x,\n+ what=c(\'F\', \'1-F\', \'f\', \'1-f\'),\n+ subset=TRUE)\n+{\n+ what <- match.arg(what)\n+ vars <- all.vars(x)\n+ xname <- vars[1]\n+ if(missing(xlab))\n+ xlab <- label(eval(parse(text=xname), data),\n+ units=TRUE, plot=TRUE, default=xname, grid=TRUE)\n+ if(missing(ylab)) \n+ ylab <-\n+ if(missing(fun))\n+ paste(switch(what,\n+ F = \'Proportion <=\',\n+ \'1-F\' = \'Proportion >=\',\n+ \'f\' = \'Number <=\',\n+ \'1-f\' = \'Number >=\'), xname)\n+ else \'\'\n+ \n+ subset <- eval(substitute(subset), data)\n+\n+ do.call("histogram",\n+ c(list(x, data=data, prepanel=prepanel, panel=panel,\n+ ylab=ylab, xlab=xlab, fun=fun, what=what),\n+ if(!missing(groups))\n+ list(groups=eval(substitute(groups), data)),\n+ if(!missing(subset))\n+ list(subset=subset),\n+ list(...)))\n+}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/epi.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/epi.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,111 @@ +## $Id$ +## Relative risk estimation from binary responses +## See http://www.csm.ornl.gov/~frome/ES/RRMHex/MHanalysis.txt and +## http://www.csm.ornl.gov/~frome/ES/RRMHex for related code + +mhgr <- function(y, group, strata, conf.int=.95) + { + group <- as.factor(group) + i <- is.na(y) | is.na(group) | is.na(strata) + if(any(i)) + { + i <- !i + y <- y[i] + group <- group[i] + strata <- strata[i] + } + N <- tapply(y, list(group,strata), length) + if(nrow(N) != 2) stop('only works for 2 groups') + N[is.na(N)] <- 0 + s <- tapply(y, list(group,strata), sum) + s[is.na(s)] <- 0 + n <- N[1,] + m <- N[2,] + x <- s[1,] + y <- s[2,] + N <- m + n + tk<- x + y + R <- x*m/N + S <- y*n/N + D <- (m*n*tk - x*y*N)/N/N + rr <- sum(R)/sum(S) + varlog <- sum(D)/(sum(R)*sum(S)) + sigma <- sqrt(varlog) + z <- -qnorm((1-conf.int)/2) + ci <- rr*c(exp(-z*sigma), exp(z*sigma)) + structure(list(rr=rr, ci=ci, conf.int=conf.int, N=table(group)), + class='mhgr') + } +print.mhgr <- function(x, ...) + { + cat('Mantel-Haenszel Risk Ratio and', x$conf.int, 'Greenland-Robins Confidence Interval\n\n') + cat('Common Relative Risk:', x$rr, 'CI:', x$ci, '\n\n') + cat('N in Each Group\n\n') + print(x$N) + invisible() + } + + +lrcum <- function(a, b, c, d, conf.int=0.95) + { + if(any(is.na(a+b+c+d))) stop('NAs not allowed') + if(min(a,b,c,d)==0) + { + warning('A frequency of zero exists. Adding 0.5 to all frequencies.') + a <- a + .5 + b <- b + .5 + c <- c + .5 + d <- d + .5 + } + + lrpos <- a/(a+c) / (b/(b+d)) + lrneg <- c/(a+c) / (d/(b+d)) + + zcrit <- qnorm((1+conf.int)/2) + + varloglrpos <- 1/a - 1/(a+c) + 1/b - 1/(b+d) + varloglrneg <- 1/d - 1/(b+d) + 1/c - 1/(a+c) + upperlrpos <- exp(log(lrpos) + zcrit*sqrt(varloglrpos)) + lowerlrpos <- exp(log(lrpos) - zcrit*sqrt(varloglrpos)) + upperlrneg <- exp(log(lrneg) + zcrit*sqrt(varloglrneg)) + lowerlrneg <- exp(log(lrneg) - zcrit*sqrt(varloglrneg)) + + lrposcum <- cumprod(lrpos) + lrnegcum <- cumprod(lrneg) + + varloglrposcum <- cumsum(varloglrpos) + varloglrnegcum <- cumsum(varloglrneg) + + upperlrposcum <- exp(log(lrposcum) + zcrit*sqrt(varloglrposcum)) + lowerlrposcum <- exp(log(lrposcum) - zcrit*sqrt(varloglrposcum)) + upperlrnegcum <- exp(log(lrnegcum) + zcrit*sqrt(varloglrnegcum)) + lowerlrnegcum <- exp(log(lrnegcum) - zcrit*sqrt(varloglrnegcum)) + + structure(llist(lrpos, upperlrpos, lowerlrpos, + lrneg, upperlrneg, lowerlrneg, + lrposcum, upperlrposcum, lowerlrposcum, + lrnegcum, upperlrnegcum, lowerlrnegcum, conf.int), + class='lrcum') + } + +print.lrcum <- function(x, dec=3, ...) + { + ci <- x$conf.int + l <- paste('Lower', ci) + u <- paste('Upper', ci) + a <- with(x, + cbind(lrpos, lowerlrpos, upperlrpos, + lrposcum, lowerlrposcum, upperlrposcum)) + b <- with(x, + cbind(lrneg, lowerlrneg, upperlrneg, + lrnegcum, lowerlrnegcum, upperlrnegcum)) + a <- round(a, dec) + b <- round(b, dec) + colnames(a) <- c('LR+', l, u, 'Cum. LR+', l, u) + colnames(b) <- c('LR-', l, u, 'Cum. LR-', l, u) + rownames(a) <- rownames(b) <- rep('', nrow(a)) + print(a) + cat('\n') + print(b) + invisible() + } |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/errbar.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/errbar.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,99 @@ +## From: geyer@galton.uchicago.edu +## Modified 11May91 FEH - added na.rm to range() +## Modified 12Jul91 FEH - added add=T and lty=1 parameters +## Modified 12Aug91 FEH - added explicit ylim parameter +## Modified 26Aug94 FEH - added explicit lwd parameter for segments() +## FEH 2Jul02 added horizontal charts with differences on 2nd axis + +errbar <- + function(x, y, yplus, yminus, cap=.015, + main=NULL, sub=NULL, + xlab=as.character(substitute(x)), + ylab=if(is.factor(x) || is.character(x)) '' + else + as.character(substitute(y)), + add=FALSE, lty=1, type='p', ylim=NULL, lwd=1, pch=16, + errbar.col=par("fg"), + Type=rep(1,length(y)), ...) +{ + if(is.null(ylim)) + ylim <- range(y[Type==1], yplus[Type==1], yminus[Type==1], + na.rm=TRUE) + + if(is.factor(x) || is.character(x)) + { + x <- as.character(x) + n <- length(x) + t1 <- Type==1 + t2 <- Type==2 + n1 <- sum(t1) + n2 <- sum(t2) + + omai <- par('mai') + mai <- omai + mai[2] <- max(strwidth(x, 'inches')) + .25 + + par(mai=mai) + on.exit(par(mai=omai)) + + plot(NA, NA, xlab=ylab, ylab='', + xlim=ylim, ylim=c(1, n+1), + axes=FALSE, ...) + axis(1) + + w <- + if(any(t2)) n1+(1:n2)+1 + else + numeric(0) + + axis(2, at=c(seq.int(length.out=n1), w), labels=c(x[t1], x[t2]), + las=1, adj=1) + points(y[t1], seq.int(length.out=n1), pch=pch, type=type, ...) + segments(yplus[t1], seq.int(length.out=n1), yminus[t1], + seq.int(length.out=n1), lwd=lwd, lty=lty, col=errbar.col) + + if(any(Type==2)) + { + abline(h=n1+1, lty=2, ...) + offset <- mean(y[t1]) - mean(y[t2]) + + if(min(yminus[t2]) < 0 & max(yplus[t2]) > 0) + lines(c(0,0)+offset, c(n1+1,par('usr')[4]), lty=2, ...) + + + points(y[t2] + offset, w, pch=pch, type=type, ...) + segments(yminus[t2] + offset, w, yplus[t2] + offset, w, + lwd=lwd, lty=lty, col=errbar.col) + + at <- pretty(range(y[t2], yplus[t2], yminus[t2])) + axis(side=3, at=at + offset, labels=format(round(at, 6))) + } + + return(invisible()) + } + + if(add) + points(x, y, pch=pch, type=type, ...) + else + plot(x, y, ylim=ylim, xlab=xlab, ylab=ylab, pch=pch, type=type, ...) + + xcoord <- par()$usr[1:2] + smidge <- cap * ( xcoord[2] - xcoord[1] ) / 2 + + segments(x, yminus, x, yplus , lty=lty, lwd=lwd, col=errbar.col) + + if(par()$xlog) + { + xstart <- x * 10 ^ (-smidge) + xend <- x * 10 ^ (smidge) + } + else + { + xstart <- x - smidge + xend <- x + smidge + } + segments( xstart, yminus, xend, yminus, lwd=lwd, lty=lty, col=errbar.col) + segments( xstart, yplus, xend, yplus, lwd=lwd, lty=lty, col=errbar.col) + + return(invisible()) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/event.chart.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/event.chart.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,614 @@\n+## event.chart.q: eventchart program 1.0 (creates function event.chart)\n+##\n+## last edited: 9-27-97\n+## last edited: 10-20-98, add pty=\'m\' for the default plotting;\n+## one may change to pty=\'s\' to get the \'square plot\' for the Goldman\'s Event Chart\n+## FEH changes 9may02 for R\n+\n+event.chart <-\n+ function(data, subset.r = 1:dim(data)[1], subset.c = 1:dim(data)[2],\n+\n+ sort.by = NA, sort.ascending = TRUE,\n+ sort.na.last = TRUE, sort.after.subset = TRUE,\n+ y.var = NA, y.var.type = "n",\n+ y.jitter = FALSE, y.jitter.factor = 1,\n+ y.renum = FALSE, NA.rm = FALSE, x.reference = NA,\n+ now = max(data[, subset.c], na.rm = TRUE),\n+ now.line = FALSE, now.line.lty = 2,\n+ now.line.lwd = 1, now.line.col = 1, pty = "m",\n+ date.orig = c(1, 1, 1960), titl = "Event Chart",\n+\n+ y.idlabels = NA, y.axis = "auto",\n+ y.axis.custom.at = NA, y.axis.custom.labels = NA,\n+ y.julian = FALSE, y.lim.extend = c(0, 0),\n+ y.lab = ifelse(is.na(y.idlabels), "", as.character(y.idlabels)),\n+\n+ x.axis.all = TRUE, x.axis = "auto",\n+ x.axis.custom.at = NA, x.axis.custom.labels = NA,\n+ x.julian = FALSE, x.lim.extend = c(0, 0), x.scale = 1,\n+ x.lab = ifelse(x.julian, "Follow-up Time", "Study Date"),\n+\n+ line.by = NA, line.lty = 1, line.lwd = 1, line.col = 1,\n+ line.add = NA, line.add.lty = NA,\n+ line.add.lwd = NA, line.add.col = NA,\n+ point.pch = 1:length(subset.c),\n+ point.cex = rep(0.6, length(subset.c)),\n+ point.col = rep(1, length(subset.c)),\n+\n+ point.cex.mult = 1., point.cex.mult.var = NA,\n+ extra.points.no.mult = rep(NA, length(subset.c)),\n+\n+ legend.plot = FALSE, legend.location = "o", legend.titl = titl,\n+ legend.titl.cex = 3, legend.titl.line = 1,\n+ legend.point.at = list(x = c(5, 95), y = c(95, 30)),\n+ legend.point.pch = point.pch,\n+ legend.point.text = ifelse(rep(is.data.frame(data), length(subset.c)),\n+ names(data[, subset.c]),\n+ subset.c),\n+ legend.cex = 2.5, legend.bty = "n",\n+ legend.line.at = list(x = c(5, 95), y = c(20, 5)),\n+ legend.line.text = names(table(as.character(data[, line.by]),\n+ exclude = c("", "NA"))),\n+ legend.line.lwd = line.lwd, legend.loc.num = 1,\n+\n+ ...)\n+{\n+ legnd <- function(..., pch) {\n+ if(missing(pch)) legend(...)\n+ else legend(..., pch = pch)\n+ }\n+\n+ month.day.year <- function(jul, origin.) {\n+ if (missing(origin.) || is.null(origin.)) {\n+ origin. <- .Options$chron.origin\n+ if (is.null(origin.))\n+ origin. <- c(month = 1, day = 1, year = 1960)\n+ }\n+ \n+ shift <- if (all(origin. == 0)) 0 else julian(origin = origin.)\n+ \n+ ## relative origin\n+ ## "absolute" origin\n+ j <- jul + shift\n+ j <- j - 1721119\n+ y <- (4 * j - 1) %/% 146097\n+ j <- 4 * j - 1 - 146097 * y\n+ d <- j %/% 4\n+ j <- (4 * d + 3) %/% 1461\n+ d <- 4 * d + 3 - 1461 * j\n+ d <- (d + 4) %/% 4\n+ m <- (5 * d - 3) %/% 153\n+ d <- 5 * d - 3 - 153 * m\n+ d <- (d + 5) %/% 5\n+ y <- 100 * y + j\n+ y <- y + ifelse(m < 10, 0, 1)\n+ m <- m + ifelse(m < 10, 3, -9)\n+ list(month = m, day = d, year = y)\n+ }\n+\n+ ## julian.r\n+ ## Convert between Julian and Calendar Dates\n+\n+ julian <- function(m, d, y, origin.) {\n+ only.origin <- all(missing(m), missing(d), missing(y))\n+ if (only.origin)\n+ m <- d <- y <- NULL\n+ \n+ ## return days since origin\n+ if (missing(origin.) || is.null(origin.)) {\n+ origin. <- .Options$chron.origin\n+ if (is.null(origin.))\n+ origin. <- c(month = 1, day = 1, year = 1960)\n+ }\n+ \n+ nms <- names(d)\n+ max.len <- max(length(m), length(d), length(y))\n+ '..b'line.add.col) != dim.m[2])\n+ stop("length of line.add.col must be the same as number of columns in line.add\\n")\n+\n+ for (j in (1:dim.m[2])) {\n+ for (i in (1:length(t.whotoplot))) {\n+ add.var1 <- subset.c == line.add.m[1, j]\n+ if (!any(add.var1))\n+ stop("variables chosen in line.add must also be in subset.c\\n")\n+ \n+ add.var2 <- subset.c == line.add.m[2, j]\n+ if (!any(add.var2))\n+ stop("variables chosen in line.add must also be in subset.c\\n")\n+\n+ segments(targdata[i, (1:len.c)[add.var1]], whattoplot[i],\n+ targdata[i, (1:len.c)[add.var2]], whattoplot[i],\n+ lty = line.add.lty[j], lwd = line.add.lwd[j],\n+ col = line.add.col[j])\n+ }\n+ }\n+ }\n+\n+ ## plot internal legend (if requested)\n+\n+ if (legend.plot == TRUE & legend.location != "o") {\n+ if (legend.location == "i") {\n+ legnd(legend.point.at[[1]], legend.point.at[[2]],\n+ leg = legend.point.text,\n+ pch = legend.point.pch, cex = legend.cex,\n+ col = point.col, bty = legend.bty)\n+ if (!is.na(line.by))\n+ legnd(legend.line.at[[1]], legend.line.at[[2]],\n+ leg = legend.line.text, cex = legend.cex,\n+ lty = line.lty, lwd = legend.line.lwd,\n+ col = line.col, bty = legend.bty)\n+ } else if (legend.location == "l") {\n+ cat("Please click at desired location to place legend for points.\\n")\n+ legnd(locator(legend.loc.num), leg = legend.point.text,\n+ pch = legend.point.pch, cex = legend.cex,\n+ col = point.col, bty = legend.bty)\n+ if (!is.na(line.by)) {\n+ cat("Please click at desired location to place legend for lines.\\n")\n+ legnd(locator(legend.loc.num), leg = legend.line.text,\n+ cex = legend.cex, lty = line.lty,\n+ lwd = legend.line.lwd, col = line.col, bty = legend.bty)\n+ }\n+ }\n+ }\n+\n+ ## add box to main plot and clean up\n+\n+ invisible(box())\n+ invisible(par(ask = FALSE))\n+ par(oldpar)\n+}\n+\n+\n+## event.convert.s\n+## convert 2-column coded events to multiple event time for event.chart()\n+## input: a matrix or dataframe with at least 2 columns\n+## by default, the first column contains the event time and\n+## the second column contains the k event codes (e.g. 1=dead, 0=censord)\n+## ouput: a matrix of k columns, each column contains the time of kth coded event\n+##\n+event.convert <- function(data2, event.time = 1, event.code = 2)\n+{\n+ dim.d <- dim(data2)\n+ len.t <- length(event.time)\n+ if(len.t != length(event.code))\n+ stop("length of event.time and event.code must be the same")\n+\n+ if(any(event.time > dim.d[2]))\n+ stop(paste("Column(s) in event.time cannot be greater than ", dim.d[2]))\n+\n+ if(any(event.code > dim.d[2]))\n+ stop(paste("Column(s) in event.code cannot be greater than ",\n+ dim.d[2]))\n+\n+ name.data <- names(data2)[event.time]\n+ if(is.null(name.data)) {\n+ name.data <- paste("V", event.time, sep = "")\n+ }\n+\n+ n.level <- rep(NA, len.t)\n+ for (i in (1:len.t)) {\n+ n.level[i] <- length(table(data2[, event.code[i]]))\n+ }\n+\n+ tot.col <- sum(n.level)\n+ data.out <- matrix(NA, dim.d[1], tot.col)\n+ name.col <- rep(NA, tot.col)\n+ n.col <- 1\n+ for (i in (1:len.t)) {\n+ tab.d <- table(data2[, event.code[i]])\n+ if(is.null(class(data2[, event.code[i]])))\n+ level.value <- as.numeric(names(tab.d))\n+ else\n+ level.value <- names(tab.d)\n+\n+ for (j in (1:length(tab.d))) {\n+ data.out[, n.col] <- rep(NA, dim.d[1])\n+ check <- data2[, event.code[i]] == level.value[j]\n+ check[is.na(check)] <- FALSE\n+ data.out[, n.col][data2[, event.code[i]] == level.value[j]] <-\n+ data2[, event.time[i]][check]\n+ name.col[n.col] <-\n+ paste(name.data[i], ".", names(tab.d)[j], sep = "")\n+ n.col <- n.col + 1\n+ }\n+ }\n+ dimnames(data.out) <- list(1:dim.d[1], name.col)\n+ return(as.matrix(data.out))\n+}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/event.history.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/event.history.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b"@@ -0,0 +1,253 @@\n+## event.history-sim-request.txt: s-plus code to make event history graphs\n+## (for distribution, including SIM readers)\n+## last edited: 09-28-01\n+\n+## start event.history function \n+## --> assume data is approporately pre-processed (e.g., smoothed) \n+## prior to function call\n+\n+\n+event.history <- function(data, survtime.col, surv.col, \n+ surv.ind = c(1,0), \n+ subset.rows = NULL, \n+ covtime.cols = NULL, cov.cols = NULL, \n+ num.colors = 1, cut.cov = NULL, colors = 1, \n+ cens.density = 10, mult.end.cens = 1.05,\n+ cens.mark.right = FALSE, cens.mark = '-', \n+ cens.mark.ahead = .5, cens.mark.cutoff = -1e-8, cens.mark.cex = 1.0, \n+ x.lab = 'time under observation', \n+ y.lab = 'estimated survival probability', \n+ title = 'event history graph', \n+ ...)\n+{\n+ ## if covtime.cols was assigned a single zero, then\n+ ## make it a one-column matrix of zeroes:\n+ if(is.null(covtime.cols))\n+ covtime.cols <- as.matrix(rep(0, dim(data)[1]))\n+\n+ ## do necessary subsetting\n+ if(!is.null(subset.rows)) {\n+ data <- data[subset.rows,]\n+ surv.col <- surv.col[subset.rows]\n+ survtime.col <- survtime.col[subset.rows]\n+ covtime.cols <- covtime.cols[subset.rows,]\n+ if(!is.null(cov.cols))\n+ cov.cols <- cov.cols[subset.rows,]\n+ }\n+\n+ ## put in stops signifying 'illegal' data\n+ if(any(is.na(surv.col)))\n+ stop('cannot have NA entries in surv.col column \\n')\n+\n+ if(any(is.na(survtime.col)))\n+ stop('cannot have NA entries in survtime.col column \\n')\n+\n+ if(min(survtime.col) < 0)\n+ stop('survtime.col observations cannot be < 0 \\n')\n+\n+ if(min(covtime.cols, na.rm = TRUE) < 0)\n+ stop('covtime.cols observations cannot be < 0 \\n')\n+\n+ ## create color-covariate cutting based on subset data, as desired\n+ if(is.null(cov.cols))\n+ colors.cat <- matrix(1, nrow=dim(data)[1])\n+ else {\n+ if(is.null(cut.cov))\n+ colors.cat <- matrix(as.numeric(cut(cov.cols, breaks = num.colors)), \n+ ncol=dim(cov.cols)[2])\n+ else colors.cat <- matrix(as.numeric(cut(cov.cols, breaks = cut.cov)), \n+ ncol=dim(cov.cols)[2])\n+ }\n+\n+ ## order the entire dataframe such that\n+ ## time is in descending order and, when tied, then, \n+ ## survival comes before censoring \n+\n+ if(surv.ind[1] > surv.ind[2])\n+ data <- data[order(unlist(survtime.col), unlist(-surv.col)),]\n+ else if(surv.ind[1] < surv.ind[2])\n+ data <- data[order(unlist(survtime.col), unlist(surv.col)),]\n+\n+ ## determine vector of upcoming consecutive censored objects if current is censored\n+ cens.consec.vec <- rep(NA, dim(data)[1])\n+ cnt <- 0\n+ for(i in dim(data)[1]:1) {\n+ if(surv.col[i] == surv.ind[1]) {\n+ cnt <- 0\n+ cens.consec.vec[i] <- 0\n+ next\n+ } else if(surv.col[i] == surv.ind[2]) {\n+ cnt <- cnt + 1\n+ cens.consec.vec[i] <- cnt - 1\n+ }\n+ }\n+\n+ ## some pre-processing here before plotting:\n+ ## determine vector of upcoming events (possibly tied events) following\n+ ## any censored time or string of consecutive censored times;\n+ ## also, determine upcoming event times (or, by default,\n+ ## 5% beyond final censored time if no event times\n+ ## eventually follow a censored time)\n+ ## --> also, determine string size of censored obs followed by event(s)\n+\n+ n <- dim(data)[1]\n+ cnt <- 0\n+ seq.events <- (1:n)[surv.col == surv.ind[1]]\n+ upcoming.events <- time.ahead <- string <- split <- rep(NA, dim(data)[1])\n+ table.temp <- table(survtime.col[surv.col == surv.ind[1]]) \n+\n+ for(i in 1:n) {\n+ if(surv.col[i] == surv.ind[2]) {\n+ if((n - cens.consec.vec[i]) > i) {\n+ cnt <- cnt + 1\n+ upcoming.events[i] <-\n+ table.temp[as.numeric(names(table"..b"otting for uncensored obs i \t\n+ if(len.cov > 1) {\n+ for(j in (1:(len.cov - 1))) {\n+ color <- switch(colors.cat[i, j], colors[1], colors[2], colors[3], colors[4], colors[5],\n+ colors[6], colors[7], colors[8], colors[9], colors[10],\n+ colors[11], colors[12], colors[13], colors[14], colors[15],\n+ colors[16], colors[17], colors[18], colors[19], colors[20])\n+\n+ polygon(x=c(covtime.cols[i,j], covtime.cols[i,j+1], covtime.cols[i,j+1], covtime.cols[i,j]), \n+\t\t y=c(temp.prob.plot, temp.prob.plot, temp.prob.old, temp.prob.old), col=color)\n+ }\n+ }\n+\t\t\n+ color <- switch(colors.cat[i, len.cov], colors[1], colors[2], colors[3], colors[4], colors[5],\n+ colors[6], colors[7], colors[8], colors[9], colors[10],\n+ colors[11], colors[12], colors[13], colors[14], colors[15],\n+ colors[16], colors[17], colors[18], colors[19], colors[20])\n+\t\t\t\t\t\t\t\t\t\t\n+ polygon(x=c(covtime.cols[i,len.cov], survtime.col[i], survtime.col[i], covtime.cols[i,len.cov]), \n+ y=c(temp.prob.plot, temp.prob.plot, temp.prob.old, temp.prob.old), col=color)\n+\n+ if(!is.na(string[i]) & (split[i] < string[i])) \n+ temp.prob.old <- temp.prob.plot\n+ else \n+ temp.prob.e.old <- temp.prob.old <- temp.prob.plot\t \t\n+ ## end event if statement for plotting\n+ } else if(surv.col[i] == surv.ind[2]) { ## censored\n+ if((n - cens.consec.vec[i]) > i) {\n+ upcoming.prob.c <- (n - (i + (string[i] - split[i]))) / \n+ (n + upcoming.events[i] - (i + (string[i] - split[i]))) *\n+ temp.prob.e.old\n+ temp.prob.plot <- temp.prob.e.old - \n+ ((temp.prob.e.old - upcoming.prob.c) * split[i]/string[i]) \n+ upcoming.event.old <- upcoming.events[i]\n+ } else if((n - cens.consec.vec[i]) <= i) {\n+ temp.prob.plot <- temp.prob.e.old - (temp.prob.e.old * split[i]/string[i])\t\n+ }\n+\t\n+ ## perform plotting for censored obs i \t\n+ if(len.cov > 1) {\n+ for(j in (1:(len.cov - 1))) {\n+ color <- switch(colors.cat[i, j], colors[1], colors[2], colors[3], colors[4], colors[5],\n+ colors[6], colors[7], colors[8], colors[9], colors[10],\n+ colors[11], colors[12], colors[13], colors[14], colors[15],\n+ colors[16], colors[17], colors[18], colors[19], colors[20])\n+ polygon(x=c(covtime.cols[i,j], covtime.cols[i,j+1], covtime.cols[i,j+1], covtime.cols[i,j]), \n+ y=c(temp.prob.plot, temp.prob.plot, temp.prob.old, temp.prob.old), col=color)\n+ }\n+ }\n+\t\n+ color <- switch(colors.cat[i, len.cov], colors[1], colors[2], colors[3], colors[4], colors[5],\n+ colors[6], colors[7], colors[8], colors[9], colors[10],\n+ colors[11], colors[12], colors[13], colors[14], colors[15],\n+ colors[16], colors[17], colors[18], colors[19], colors[20])\n+ polygon(x=c(covtime.cols[i,len.cov], survtime.col[i], survtime.col[i], covtime.cols[i,len.cov]), \n+ y=c(temp.prob.plot, temp.prob.plot, temp.prob.old, temp.prob.old), col=color)\n+ polygon(x=c(survtime.col[i], time.ahead[i], time.ahead[i], survtime.col[i]), \n+ y=c(temp.prob.plot, temp.prob.plot, temp.prob.old, temp.prob.old), \n+ density=cens.density, border=TRUE)\t \n+\n+ ## Following was if(cens.mark.right == TRUE) FEH 31jan03\n+ if(cens.mark.right & temp.prob.plot >= cens.mark.cutoff)\n+ text(x = time.ahead[i] + cens.mark.ahead, \n+ y = temp.prob.old, \t\n+ labels = cens.mark, cex = cens.mark.cex) \n+\t\n+ temp.prob.c <- temp.prob.old <- temp.prob.plot\n+\t \n+ ## end censored if statement for plotting\n+ }\n+ ## end of function's major for loop\n+ }\n+ ## end of function itself\n+}\n" |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/find.matches.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/find.matches.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,178 @@ +find.matches <- function(x, y, tol=rep(0,ncol(y)), scale=tol, + maxmatch=10) +{ + ##if(length(dim(x))==0) x <- matrix(x, nrow=1) 10may02 + if(!is.matrix(x)) + x <- as.matrix(x) + + n <- nrow(x) + p <- ncol(x) + if(!is.matrix(y)) + y <- as.matrix(y) ## 10may02 + + if(p != ncol(y)) + stop("number of columns of x and y must match") + + ny <- nrow(y) + rown <- dimnames(x)[[1]] + ry <- dimnames(y)[[1]] + matches <- matrix(if(length(ry)) + "" + else + 0, + n, maxmatch, + dimnames=list(rown, + paste("Match #",1:maxmatch,sep=""))) + + distance <- matrix(NA, n, maxmatch, + dimnames=list(rown, + paste("Distance #",1:maxmatch,sep=""))) + + if(length(ry)==0) + ry <- 1:ny + + scale <- ifelse(scale==0,1,tol) + ones <- rep(1,p) + mx <- 0 + for(i in 1:n) { + dif <- abs(y - rep(x[i,], rep.int(ny,p))) + toll <- rep(tol, rep.int(nrow(dif),p)) + which <- (1:ny)[((dif > toll) %*% ones)==0] + lw <- length(which) + if(lw) { + scaled <- dif[which,,drop=FALSE]/rep(scale, rep.int(lw,p)) + dist <- (scaled^2) %*% ones + lw <- min(lw,maxmatch) + mx <- max(mx,lw) + d <- order(dist)[1:lw] + matches[i,1:lw] <- ry[which[d]] + distance[i,1:lw] <- dist[d] + } + } + + structure(list(matches=matches[,1:mx], distance=distance[,1:mx]), + class="find.matches") +} + + +print.find.matches <- function(x, digits=.Options$digits, ...) +{ + cat("\nMatches:\n\n") + print(x$matches, quote=FALSE) + cat("\nDistances:\n\n") + print(x$distance, digits=digits) + invisible() +} + + +summary.find.matches <- function(object, ...) +{ + mat <- object$matches + dist <- object$distance + cat("Frequency table of number of matches found per observation\n\n") + m <- (!is.na(dist)) %*% rep(1,ncol(mat)) + print(table(m)) + cat("\nMedian minimum distance by number of matches\n\n") + print(tapply(dist[m>0,1], m[m>0], median)) + ta <- table(mat[m>0,1]) + ta <- ta[ta>1] + if(length(ta)) { + cat("\nObservations selected first more than once (with frequencies)\n\n") + print(ta) + } else cat("\nNo observations selected first more than once\n\n") + + invisible() +} + + +matchCases <- function(xcase, ycase, idcase=names(ycase), + xcontrol, ycontrol, idcontrol=names(ycontrol), + tol=NULL, + maxobs=max(length(ycase),length(ycontrol))*10, + maxmatch=20, which=c('closest','random')) +{ + if(!length(tol)) + stop('must specify tol') + + if((length(xcase)!=length(ycase)) || (length(xcontrol)!=length(ycontrol))) + stop('lengths of xcase, ycase and of xcontrol, ycontrol must be same') + + which <- match.arg(which) + + ycase <- as.matrix(ycase) + ycontrol <- as.matrix(ycontrol) + if(!length(idcase)) + idcase <- 1:length(ycase) + + if(!length(idcontrol)) + idcontrol <- 1:length(ycontrol) + + idcase <- as.character(idcase) + idcontrol <- as.character(idcontrol) + + j <- is.na(ycase %*% rep(1,ncol(ycase))) | is.na(xcase) + if(any(j)) { + warning(paste(sum(j),'cases removed due to NAs')) + ycase <- ycase[!j,,drop=FALSE] + xcase <- xcase[!j] + idcase <- idcase[!j] + } + + j <- is.na(ycontrol %*% rep(1,ncol(ycontrol))) | is.na(xcontrol) + if(any(j)) { + warning(paste(sum(j),'controls removed due to NAs')) + ycontrol <- ycontrol[!j,,drop=FALSE] + xcontrol <- xcontrol[!j] + idcontrol <- idcontrol[!j] + } + + idCase <- id <- character(maxobs) + type <- factor(rep(NA,maxobs), c('case','control')) + x <- numeric(maxobs) + y <- matrix(NA, ncol=ncol(ycase), nrow=maxobs) + + last <- 0 + ncase <- length(ycase) + ncontrol <- length(ycontrol) + matches <- integer(ncase) + for(i in 1:ncase) { + s <- abs(xcontrol-xcase[i]) <= tol + nmatch <- sum(s) + if(nmatch > maxmatch) { + s <- (1:ncontrol)[s] ## next line was sample(j,...) 4jun02 + if(which=="random") + s <- sample(s, maxmatch, replace=FALSE) + else { + errors <- abs(xcontrol[s]-xcase[i]) + serrors <- order(errors) + s <- (s[serrors])[1:maxmatch] + } + + nmatch <- maxmatch + } + + matches[i] <- nmatch + if(!nmatch) + next + + end <- last + nmatch + 1 + if(end > maxobs) + stop(paste('needed maxobs >',maxobs)) + + start <- last+1 + last <- end + idCase[start:end] <- rep(idcase[i], nmatch+1) + type[start:end] <- c('case',rep('control',nmatch)) + id[start:end] <- c(idcase[i], idcontrol[s]) + x[start:end] <- c(xcase[i], xcontrol[s]) + y[start:end,] <- rbind(ycase[i,,drop=FALSE], ycontrol[s,,drop=FALSE]) + } + + cat('\nFrequencies of Number of Matched Controls per Case:\n\n') + print(table(matches)) + cat('\n') + structure(list(idcase=idCase[1:end], type=type[1:end], + id=id[1:end], x=x[1:end], y=drop(y[1:end,])), + row.names=as.character(1:end), + class='data.frame') +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/format.pval.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/format.pval.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,45 @@ +## Use R function for S-Plus, just changed to .Options +format.pval <- function (x, pv=x, digits = max(1, .Options$digits - 2), + eps = .Machine$double.eps, + na.form = "NA", ...) { + if ((has.na <- any(ina <- is.na(pv)))) + pv <- pv[!ina] + + r <- character(length(is0 <- pv < eps)) + if (any(!is0)) { + rr <- pv <- pv[!is0] + expo <- floor(log10(ifelse(pv > 0, pv, 1e-50))) + fixp <- expo >= -3 | (expo == -4 & digits > 1) + if (any(fixp)) + rr[fixp] <- format(round(pv[fixp], digits = digits), + ...) + if (any(!fixp)) + rr[!fixp] <- format(round(pv[!fixp], digits = digits), + ...) + r[!is0] <- rr + } + + if (any(is0)) { + digits <- max(1, digits - 2) + if (any(!is0)) { + nc <- max(nchar(rr)) + if (digits > 1 && digits + 6 > nc) + digits <- max(1, nc - 7) + sep <- if (digits == 1 && nc <= 6) + "" + else " " + } + else sep <- if(digits == 1) + "" + else " " + + r[is0] <- paste("<", format(eps, digits = digits, ...), sep = sep) + } + if (has.na) { + rok <- r + r <- character(length(ina)) + r[!ina] <- rok + r[ina] <- na.form + } + r +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/ftu.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/ftu.s Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,50 @@ +##Dan Heitjan dheitjan@biostats.hmc.psu.edu + +ftupwr <- function(p1,p2,bign,r,alpha) +{ + ## Compute the power of a two-sided level alpha test of the + ## hypothesis that pi1=pi2, when pi1=p1, pi2=p2, and there are + ## bign observations, bign/(1+r) in group 1 and r*bign/(1+r) in + ## group 2. This is based on the two-tailed test version of + ## formula (6) in Fleiss, Tytun and Ury (1980 Bcs 36, 343--346). + ## This may be used for del not too small (del>=0.1) and r not + ## too big or small (0.33<=r<=3). + ## Daniel F. Heitjan, 30 April 1991 + mstar <- bign/(r+1) + del <- abs(p2-p1) + rp1 <- r+1 + zalp <- qnorm(1-alpha/2) + pbar <- (p1+r*p2)/(1+r) + qbar <- 1-pbar + num <- (r*del^2*mstar-rp1*del)^0.5-zalp*(rp1*pbar*qbar)^0.5 + den <- (r*p1*(1-p1)+p2*(1-p2))^0.5 + zbet <- num/den + pnorm(zbet) +} + + +ftuss <- function(p1,p2,r,alpha,beta) +{ + ## Compute the approximate sample size needed to have power 1-beta + ## for detecting significance in a two-tailed level alpha test of + ## the hypothesis that pi1=pi2, when pi1=p1, pi2=p2, and there + ## are to be m in group 1 and rm in group 2. The calculation is + ## based on equations (3) and (4) of Fleiss, Tytun and Ury (1980 + ## Bcs 36, 343--346). This is accurate to within 1% for + ## moderately large values of del(p2-p1) (del>=0.1) and sample + ## sizes that are not too disproportionate (0.5<=r<=2). + ## Daniel F. Heitjan, 30 April 1991 + zalp <- qnorm(1-alpha/2) + zbet <- qnorm(1-beta) + rp1 <- (r+1) + pbar <- (p1+r*p2)/rp1 + qbar <- 1-pbar + q1 <- 1-p1 + q2 <- 1-p2 + del <- abs(p2-p1) + num <- (zalp*(rp1*pbar*qbar)^0.5+zbet*(r*p1*q1+p2*q2)^0.5)^2 + den <- r*del^2 + mp <- num/den + m <- 0.25*mp*(1+(1+2*rp1/(r*mp*del))^0.5)^2 + list(n1=floor(m+1),n2=floor(m*r+1)) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/gbayes.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/gbayes.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,223 @@ +gbayes <- function(mean.prior, var.prior, m1, m2, stat, var.stat, + n1, n2, cut.prior, cut.prob.prior=.025) +{ + if(!missing(cut.prior)) + var.prior <- ((cut.prior - mean.prior)/qnorm(1 - cut.prob.prior))^2 + + if(!is.function(var.stat)) { + vs <- var.stat + if(!missing(n1)) + stop('may not specify n1,n2 when var.stat is not a function') + } else + vs <- var.stat(m1,m2) + + var.post <- 1/(1/var.prior + 1/vs) + mean.post <- (mean.prior/var.prior + stat/vs)*var.post + result <- list(mean.prior=mean.prior, var.prior=var.prior, + mean.post=mean.post, var.post=var.post) + + if(!missing(n1)) { + mean.pred <- mean.post + var.pred <- var.post + var.stat(n1,n2) + result$mean.pred <- mean.pred + result$var.pred <- var.pred + } + + structure(result, class='gbayes') +} + + +plot.gbayes <- function(x, xlim, ylim, name.stat='z', ...) +{ + obj <- x + pred <- length(obj$mean.pred)>0 + if(missing(xlim)) + xlim <- obj$mean.post + c(-6,6)*sqrt(obj$var.post) + + x <- seq(xlim[1], xlim[2], length=200) + y1 <- dnorm(x,obj$mean.prior,sqrt(obj$var.prior)) + y2 <- dnorm(x,obj$mean.post, sqrt(obj$var.post)) + plot(x, y1, xlab=name.stat, ylab='Density',type='l',lty=1, + ylim=if(missing(ylim)) + range(c(y1,y2)) + else + ylim) + + curves <- vector('list',2+pred) + names(curves) <- c('Prior','Posterior', + if(pred)' + Predictive') + + curves[[1]] <- list(x=x,y=y1) + lines(x, y2, lty=2) + curves[[2]] <- list(x=x,y=y2) + if(pred) { + y <- dnorm(x,obj$mean.pred,sqrt(obj$var.pred)) + lines(x, y, lty=3) + curves[[3]] <- list(x=x,y=y) + } + + labcurve(curves, ...) + invisible() +} + + +gbayes2 <- function(sd, prior, delta.w=0, alpha=0.05, + upper=Inf, prior.aux=NULL) +{ + if(!is.function(prior)) + stop('prior must be a function') + + z <- qnorm(1-alpha/2) + prod <- function(delta, prior, delta.w, sd, z, prior.aux) + { + (1 - pnorm((delta.w - delta)/sd + z)) * + if(length(prior.aux)) + prior(delta, prior.aux) + else + prior(delta) + } + + ww <- 'value' + + + ip <- if(length(prior.aux)) + integrate(prior, -Inf, upper, prior.aux=prior.aux)[[ww]] + else + integrate(prior, -Inf, upper)[[ww]] + + if(abs(ip-1) > .01) + warning(paste('integrate failed to obtain 1.0 for integral of prior.\nDivided posterior probability by the integral it did obtain (', + format(ip),').\nTry specifying upper=.',sep='')) + integrate(prod, delta.w, upper, + prior=prior, delta.w=delta.w, sd=sd, z=z, + prior.aux=prior.aux)[[ww]] +} + + +## v = variance of Xn after future obs. +gbayesMixPredNoData <- function(mix=NA, d0=NA, v0=NA, d1=NA, v1=NA, + what=c('density','cdf')) +{ + what <- match.arg(what) + g <- function(delta, v, mix, d0, v0, d1, v1, dist) + { + if(mix==1) { + pv <- 1/(1/v0 + 1/v) + dist(delta, d0, sqrt(pv)) + } else if(mix==0) { + pv <- 1/(1/v1 + 1/v) + dist(delta, d1, sqrt(pv)) + } else { + pv0 <- 1/(1/v0 + 1/v) + pv1 <- 1/(1/v1 + 1/v) + mix*dist(delta, d0, sqrt(pv0)) + + (1-mix)*dist(delta, d1, sqrt(pv1)) + } + } + + ##g$mix <- mix; g$d0 <- d0; g$v0 <- v0; g$d1 <- d1; g$v1 <- v1 10may02 + ##g$dist <- switch(what, density=dnorm, cdf=pnorm) + formals(g) <- list(delta=numeric(0), v=NA, mix=mix, d0=d0, v0=v0, + d1=d1, v1=v1, dist=NA) + g +} + + +##mp <- function(d,mix,d0,v0,d1,v1,what=c('density','cdf')) { +## what <- match.arg(what) +## f <- switch(what, density=dnorm, cdf=pnorm) +## plot(d,mix*f(d,d0,sqrt(v0))+(1-mix)*f(d,d1,sqrt(v1)), +## type='l', lwd=3) +## invisible() +##} + + +gbayesMixPost <- function(x=NA, v=NA, mix=1, d0=NA, v0=NA, d1=NA, + v1=NA, what=c('density','cdf')) +{ + what <- match.arg(what) + g <- function(delta, x, v, mix=1, + d0, v0, d1, v1, dist) + { + if(mix==1) { + pv <- 1/(1/v0 + 1/v) + dist(delta, (d0/v0 + x/v)*pv, sqrt(pv)) + } else if(mix==0) { + pv <- 1/(1/v1 + 1/v) + dist(delta, (d1/v1 + x/v)*pv, sqrt(pv)) + } else { + prior.odds <- mix/(1-mix) + pv0 <- 1/(1/v0 + 1/v); + pv1 <- 1/(1/v1 + 1/v) + likelihood.ratio <- dnorm(x, d0, sqrt(v0))/ + dnorm(x, d1, sqrt(v1)) + post.odds <- prior.odds * likelihood.ratio + mixp <- post.odds/(1+post.odds) + mixp*dist(delta, (d0/v0 + x/v)*pv0, sqrt(pv0)) + + (1-mixp)*dist(delta, (d1/v1 + x/v)*pv1, sqrt(pv1)) + } + } + + ##g$x <- x; g$v <- v; g$mix <- mix; g$d0 <- d0; g$v0 <- v0; + ##g$d1 <- d1; g$v1 <- v1 + ##g$dist <- switch(what, density=dnorm, cdf=pnorm) 10may02 + formals(g) <- list(delta=numeric(0), x=x, v=v, mix=mix, d0=d0, v0=v0, + d1=d1, v1=v1, + dist=switch(what, + density=dnorm, + cdf=pnorm)) + + g +} + + +gbayesMixPowerNP <- function(pcdf, delta, v, delta.w=0, mix, interval, + nsim=0, alpha=0.05) +{ + if(nsim==0) { + ## Solve for statistic x such that the posterior cdf at + ## (delta.w,x)=alpha/2 + g <- function(x, delta.w, v, alpha, pcdf, mix) + { + pcdf(delta.w, x, v, mix) - alpha/2 + } + + ##g$delta.w <- delta.w; g$v <- v; g$alpha <- alpha; g$pcdf <- pcdf + ##g$mix <- if(missing(mix)) pcdf$mix else mix 10may02 + formals(g) <- list(x=numeric(0), delta.w=delta.w, v=v, + alpha=alpha, pcdf=pcdf, + mix=if(missing(mix)) as.list(pcdf)$mix else mix) + + ##s <- seq(interval[1],interval[2],length=100) + ##gs <- g(s) + ##plot(s, gs, type='l') + ##interval[2] <- min(s[sign(gs)!=sign(gs[1])]) + ##interval[1] <- max(s[s < interval[2] & sign(gs)==sign(gs[1])]) + ##interval[1] <- max(s[sign(gs)!=sign(gs[100])]) + ##interval[2] <- min(s[s > interval[1] & sign(gs)==sign(gs[100])]) + ##prn(interval) + + x <- uniroot(g, interval=interval)$root + c('Critical value'=x, Power=1 - pnorm(x, delta, sqrt(v))) + } else { + x <- rnorm(nsim, delta, sqrt(v)) + probs <- + if(missing(mix)) + pcdf(delta.w, x, v) + else + pcdf(delta.w, x, v, mix=mix) + + pow <- mean(probs <= alpha/2) + se <- sqrt(pow*(1-pow)/nsim) + c(Power=pow, 'Lower 0.95'=pow-1.96*se, 'Upper 0.95'=pow+1.96*se) + } +} + + +gbayes1PowerNP <- function(d0, v0, delta, v, delta.w=0, alpha=0.05) +{ + pv <- 1/(1/v0 + 1/v) + z <- qnorm(alpha/2) + 1 - pnorm(v*( (delta.w - sqrt(pv)*z)/pv - d0/v0 ), delta, sqrt(v)) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/gettext.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/gettext.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,63 @@ +### These are function that are designed to compatibility with S-plus +### for R internationalization. They are named with a prefix of +### "Splus". +### +### These functions contain representations of sprintf, gettext, +### gettextf, and ngettext + + +if(!exists("sprintf")) sprintf <- function(fmt, ...) { + ldots <- list(...) + + text <- vector("character") + vars <- vector("character") + i <- 1; j <- 1; + temp <- fmt + while (nchar(temp)) { + ne <- regexpr('(?<!%)%[^%]*?[dixXfeEgGs]', temp, perl=TRUE) + if( ne < 0 ) { + text[i] <- gsub('%%', '%', temp) + temp <- "" + } else { + text[i] <- gsub('%%', '%', substr(temp, 0, ne-1)) + i <- i + 1 + vars[j] <- substr(temp, ne+1, ne+attr(ne, "match.length")-1) + j <- j + 1 + temp <- substr(temp, ne+attr(ne, "match.length"), nchar(temp)) + } + } + + output <- NULL + j <- 1 + for( i in 1:(length(text) - 1)) { + output <- paste(output, text[i], sep='') + if(regexpr('^\\d+\\$', vars[i], perl=TRUE) > 0){ + arg <- sub('^(\\d+)\\$.*$', '\\1', vars[i], perl=TRUE) + if(arg > 0 && arg < length(ldots)) { + val <- as.integer(arg) + } + else + stop("Error") + } + else { + val <- j + j <- j + 1 + } + output <- paste(output, ldots[[val]], sep='') + } + return(paste(output, text[length(text)], sep='')) +} + +if(!exists("gettext")) gettext <- function(..., domain=NULL) + return(unlist(list(...))) + + +if(!exists("gettextf")) gettextf <- function(fmt, ..., domain=NULL) { + return(sprintf(fmt, ...)) +} + +if(!exists("ngettext")) ngettext <- function(n, msg1, msg2, domain = NULL) { + if(n == 1) + return(msg1) + return(msg2) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/groupn.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/groupn.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,30 @@ +groupn <- function(x, y, m=150) +{ + s <- !is.na(x + y) + x<-x[s] + y<-y[s] + i<-order(x) + x<-x[i] + y<-y[i] + n<-length(x) + if(n<m) + stop("m<number of observations in groupn") + + start <- 1 + end <- m + meanx <- NULL + meany <- NULL + while(end <= n) { + meanx <- c(meanx,mean(x[start:end])) + meany <- c(meany,mean(y[start:end])) + start <- start+m + end <- end+m + } + + if(end > n) { + meanx <- c(meanx,mean(x[n-m+1:n])) + meany <- c(meany,mean(y[n-m+1:n])) + } + + return(list(x=meanx,y=meany)) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/hist.data.frame.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/hist.data.frame.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,102 @@ +hist.data.frame <- function(x, n.unique=3, nclass="compute", na.big=FALSE, + rugs=FALSE, freq=TRUE, mtitl=FALSE, ...) +{ + oldmf <- par('mfrow') + oldoma <- par('oma') + oldmar <- par('mar') # resetting mfrow causes a new mar + on.exit(par(mfrow=oldmf, oma=oldoma, mar=oldmar)) + mf <- oldmf + if(length(mf)==0) mf <- c(1,1) + + automf <- FALSE + if((la <- length(x))>1 & max(mf)==1) { + mf <- + if(la<=4) c(2,2) + else if(la<=6) c(2,3) + else if(la<=9) c(3,3) + else if(la<=12) c(3,4) + else if(la<=16) c(4,4) + else c(4,5) + + automf <- TRUE + par(mfrow=mf) + } + + if(is.character(mtitl)) + par(oma=c(0,0,3,0)) + + nam <- names(x) + i <- 0 + j <- 0 + for(v in x) { + j <- j+1 + type <- + if(is.character(v) || is.factor(v)) + 'cat' + else if(inherits(v,'Date')) + 'Date' + else + 'none' + + lab <- attr(v,"label") + lab <- + if(length(lab) && nchar(lab) > 35) + nam[j] + else + label(v, units=TRUE, plot=type!='cat', default=nam[j]) + + if(type=='cat') { + tab <- -sort(-table(v)) + dotchart3(tab, xlab=paste('Frequencies for', lab)) + } else { + type <- if(inherits(v,'Date')) 'Date' else 'none' + + if(type %nin% c('none','Date')) + v <- unclass(v) + + w <- v[!is.na(v)] + n <- length(w) + if(length(unique(w)) >= n.unique) { + i <- i+1 + if(is.numeric(nclass)) + nc <- nclass else + + if(nclass=="compute") + nc <- max(2,trunc(min(n/10,25*logb(n,10))/2)) + + if(nclass == 'default') { + if(type == 'Date') + hist(v, nc, xlab=lab, freq=freq, main='') + else hist(v, xlab=lab, main='', freq=freq) + } else { + if(type == 'Date') + hist(v, nc, xlab=lab, freq=freq, main='') + else + hist(v, nclass=nc, xlab=lab, freq=freq, main='') + } + + m <- sum(is.na(v)) + pm <- paste("n:",n," m:",m,sep="") + title(sub=pm,adj=0,cex=.5) + if(na.big && m>0) + mtext(paste(m,"NAs"),line=-2,cex=1) + + if(rugs) + scat1d(v, ...) + + if(automf && interactive() && + names(dev.list())!='postscript' && + (i %% prod(mf)==0)) { + if(is.character(mtitl)) + mtitle(mtitl) + + cat("click left mouse button to proceed\n") + locator(1) + } else if(is.character(mtitl) && i %% prod(mf)==1) + mtitle(mtitl) + } + } + } + + invisible(ceiling(i / prod(mf))) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/histbackback.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/histbackback.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,76 @@ +histbackback <- + function(x, y, brks = NULL, xlab = NULL, axes = TRUE, probability = FALSE, + xlim = NULL, ylab='', ...) +{ + if(length(xlab)) + xlab <- rep(xlab, length = 2) + + if(is.list(x)) + { + namx <- names(x) + y <- x[[2]] + if(!length(xlab)) + { + if(length(namx)) + xlab <- namx[1:2] + else + { + xlab <- deparse(substitute(x)) + xlab <- paste(xlab, c("x", "y"), sep = "$") + } + } + + x <- x[[1]] + } + else if(!length(xlab)) + xlab <- c(deparse(substitute(x)), deparse(substitute(y))) + + if(!length(brks)) + brks <- hist(c(x, y), plot = FALSE)$breaks + + ll <- hist(x, breaks = brks, plot = FALSE) + rr <- hist(y, breaks = brks, plot = FALSE) + + if(probability) + { + ll$counts <- ll$density + rr$counts <- rr$density + } + + if(length(xlim) == 2) + xl <- xlim + else + { + xl <- pretty(range(c( - ll$counts, rr$counts))) + xl <- c(xl[1], xl[length(xl)]) + } + + if(length(ll$counts) > 0) + { + barplot(-ll$counts, xlim=xl, space=0, + horiz=TRUE, axes=FALSE, col=0, ...) + par(new = TRUE) + } + + if(length(rr$counts) > 0) + barplot(rr$counts, xlim=xl, space=0, + horiz=TRUE, axes=FALSE, col=0, ...) + + if(axes) + { + mgp.axis(1, at=pretty(xl), labels=format(abs(pretty(xl)))) + del <- (brks[2]-brks[1] - (brks[3]-brks[2]))/2 + brks[1] <- brks[1] + del + brks[-1] <- brks[-1] - del + mgp.axis(2, at=0:(length(brks)-1), + labels=formatC(brks, format='f', digits=.Options$digits)) + + title(xlab = xlab[1], adj = (-0.5 * xl[1])/( - xl[1] + xl[2])) + title(xlab = xlab[2], adj = (-xl[1] + 0.5 * xl[2])/(-xl[1] + xl[2])) + if(ylab!='') title(ylab=ylab) + } + + abline(v = 0) + box() + invisible(list(left = ll$counts, right = rr$counts, breaks = brks)) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/hoeffd.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/hoeffd.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,100 @@ +## Changes since sent to statlib: improved printing N matrix in print.hoeffd +hoeffd <- function(x, y) +{ + phoeffd <- function(d, n) + { + d <- as.matrix(d); n <- as.matrix(n) + b <- d + 1/36/n + z <- .5*(pi^4)*n*b + zz <- as.vector(z) + zz[is.na(zz)] <- 1e30 # so approx won't bark + + tabvals <- c(5297,4918,4565,4236,3930, + 3648,3387,3146,2924,2719,2530,2355, + 2194,2045,1908,1781,1663,1554,1453, + 1359,1273,1192,1117,1047,0982,0921, + 0864,0812,0762,0716,0673,0633,0595, + 0560,0527,0496,0467,0440,0414,0390, + 0368,0347,0327,0308,0291,0274,0259, + 0244,0230,0217,0205,0194,0183,0173, + 0163,0154,0145,0137,0130,0123,0116, + 0110,0104,0098,0093,0087,0083,0078, + 0074,0070,0066,0063,0059,0056,0053, + 0050,0047,0045,0042,0025,0014,0008, + 0005,0003,0002,0001)/10000 + + P <- ifelse(z<1.1 | z>8.5, pmax(1e-8,pmin(1,exp(.3885037-1.164879*z))), + matrix(approx(c(seq(1.1, 5,by=.05), + seq(5.5,8.5,by=.5)), + tabvals, zz)$y, + ncol=ncol(d))) + + dimnames(P) <- dimnames(d) + P + } + + if(!missing(y)) + x <- cbind(x, y) + + x[is.na(x)] <- 1e30 + storage.mode(x) <- "double" + + p <- as.integer(ncol(x)) + if(p < 1) + stop("must have > 1 column") + + n <- as.integer(nrow(x)) + if(n<5) + stop("must have >4 observations") + + h <- + .Fortran("hoeffd", x, n, p, hmatrix=double(p*p), aad=double(p*p), + maxad=double(p*p), npair=integer(p*p), + double(n), double(n), double(n), double(n), double(n), + PACKAGE="Hmisc") + + nam <- dimnames(x)[[2]] + npair <- matrix(h$npair, ncol=p) + aad <- maxad <- NULL + aad <- matrix(h$aad, ncol=p) + maxad <- matrix(h$maxad, ncol=p) + dimnames(aad) <- dimnames(maxad) <- list(nam, nam) + + h <- matrix(h$hmatrix, ncol=p) + h[h > 1e29] <- NA + dimnames(h) <- list(nam, nam) + dimnames(npair) <- list(nam, nam) + P <- phoeffd(h, npair) + diag(P) <- NA + structure(list(D=30*h, n=npair, P=P, aad=aad, maxad=maxad), class="hoeffd") +} + + +print.hoeffd <- function(x, ...) +{ + cat("D\n") + print(round(x$D,2)) + if(length(aad <- x$aad)) { + cat('\navg|F(x,y)-G(x)H(y)|\n') + print(round(aad,4)) + } + if(length(mad <- x$maxad)) { + cat('\nmax|F(x,y)-G(x)H(y)|\n') + print(round(mad,4)) + } + n <- x$n + if(all(n==n[1,1])) + cat("\nn=",n[1,1],"\n") + else { + cat("\nn\n") + print(x$n) + } + + cat("\nP\n") + P <- x$P + P <- ifelse(P<.0001,0,P) + p <- format(round(P,4)) + p[is.na(P)] <- "" + print(p, quote=FALSE) + invisible() +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/impute.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/impute.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,164 @@ +impute <- function(x, ...) UseMethod("impute") + + +impute.default <- function(x, fun=median, ...) +{ + m <- is.na(x) + k <- sum(m) + if(k==0) + return(x) + + nam <- names(x) + if(!length(nam)) { + nam <- as.character(1:length(x)); names(x) <- nam + } + + if(!is.function(fun)) { + fill <- fun + if(is.character(fill) && length(fill)==1 && fill=="random") + fill <- sample(x[!is.na(x)], sum(is.na(x)), replace=TRUE) + } else if(is.factor(x)) { + freq <- table(x) + fill <- names(freq)[freq==max(freq)][1] #take first if not unique + } else + fill <- + if(missing(fun) && is.logical(x)) + (if(sum(x[!m]) >= sum(!m)/2) + TRUE + else + FALSE) + else + fun(x[!m]) + + ## median(logical vector) doesn't work - know trying to get median + ## if fun is omitted. Get mode. + + if(length(fill)>1 && length(fill)!=k) + stop("length of vector of imputed values != no. NAs in x") + + ## lab <- label(x) + ## if(is.null(lab) || lab=="") lab <- name + ## lab <- paste(lab,"with",sum(m),"NAs imputed to",format(fill)) + ## attr(x, "label") <- lab + if(is.factor(x)) { + newlev <- sort(unique(fill)) + if(any(!(z <- newlev %in% levels(x)))) { + xc <- as.character(x) + xc[m] <- fill + x <- factor(xc, c(levels(x), newlev[!z])) + } else x[m] <- fill + } else x[m] <- fill + + structure(x, imputed=(1:length(x))[m], + class=c('impute', attr(x, 'class'))) +} + + +print.impute <- function(x, ...) +{ + i <- attr(x,"imputed") + if(!length(i)) { + print.default(x); + return(invisible()) + } + + if(is.factor(x)) + w <- as.character(x) + else + w <- format(x) + + names(w) <- names(x) + w[i] <- paste(w[i], "*", sep="") + attr(w, "label") <- attr(w,"imputed") <- attr(w, "class") <- NULL + print.default(w, quote=FALSE) + invisible() +} + + +summary.impute <- function(object, ...) +{ + i <- attr(object, "imputed") + oi <- object + attr(oi,'class') <- attr(oi,'class')[attr(oi,'class')!="impute"] + oi <- oi[i] + if(all(oi==oi[1])) + cat("\n",length(i),"values imputed to", + if(is.numeric(oi)) + format(oi[1]) + else + as.character(oi[1]), + "\n\n") + else { + cat("\nImputed Values:\n\n") + if(length(i)<20) + print(oi) + else + print(describe(oi, descript=as.character(sys.call())[2])) + + cat("\n") + } + + NextMethod("summary") +} + + +"[.impute" <- function(x, ..., drop=FALSE) +{ + ats <- attributes(x) + ats$dimnames <- NULL + ats$dim <- NULL + ats$names <- NULL + attr(x,'class') <- NULL + y <- x[..., drop = drop] + if(length(y)==0) + return(y) + + k <- 1:length(x); + names(k) <- names(x) + k <- k[...] + attributes(y) <- c(attributes(y), ats) + imp <- attr(y, "imputed") + attr(y, "imputed") <- j <- (1:length(k))[k %in% imp] + if(length(j)==0) { + cy <- attr(y,'class')[attr(y,'class')!='impute'] + y <- structure(y, imputed=NULL, + class=if(length(cy)) + cy + else + NULL) + } + + y +} + + +is.imputed <- function(x) +{ + w <- rep(FALSE, if(is.matrix(x))nrow(x) else length(x)) + if(length(z <- attr(x,"imputed"))) + w[z] <- TRUE + + w +} + + +as.data.frame.impute <- function(x, row.names = NULL, optional = FALSE, ...) +{ + nrows <- length(x) + if(!length(row.names)) { + ## the next line is not needed for the 1993 version of data.class and is + ## included for compatibility with 1992 version + if(length(row.names <- names(x)) == nrows && + !any(duplicated(row.names))) { + } else if(optional) + row.names <- character(nrows) + else + row.names <- as.character(1:nrows) + } + + value <- list(x) + if(!optional) + names(value) <- deparse(substitute(x))[[1]] + + structure(value, row.names=row.names, class='data.frame') +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/in.operator.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/in.operator.s Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,1 @@ +"%nin%" <- function(x, table) match(x, table, nomatch = 0) == 0 |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/inc-dec.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/inc-dec.s Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,8 @@ +"inc<-" <- function(x, value) { + x + value +} + +"dec<-" <- function(x, value) { + x - value +} + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/is.present.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/is.present.s Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,7 @@ +is.present <- function(x) +{ + if(is.character(x)) + return(x!="") + else + return(!is.na(x)) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/james.stein.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/james.stein.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,32 @@ +james.stein <- function(y, group) +{ + s <- !(is.na(y)|is.na(group)) + y <- y[s]; + group <- as.character(group[s]) + ## as.char -> unused levels OK + k <- length(unique(group)) + if(k<3) + stop("must have >=3 groups") + + stats <- function(w) { + bar <- mean(w) + ss <- sum((w-bar)^2) + n <- length(w) + ##if(n<2) + ## stop("a group has n<2") + + c(n=length(w), mean=bar, ss=ss, var=ss/n/(n-1)) + } + + Z <- stats(y) + st <- tapply(y, group, FUN=stats) + nams <- names(st) + z <- matrix(unlist(st),ncol=4,byrow=TRUE) + ssb <- stats(z[,2])["ss"] + shrink <- 1 - (k-3)*z[,4]/ssb + shrink[z[,1]==1] <- 0 + shrink <- pmin(pmax(shrink,0),1) + list(n=z[,1], mean=z[,2], + shrunk.mean=structure(Z["mean"]*(1-shrink)+shrink*z[,2], names=nams), + shrink=shrink) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/labcurve.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/labcurve.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,1450 @@\n+## $Id$\n+\n+labcurve <- function(curves, labels=names(curves), \n+ method=NULL, keys=NULL, keyloc=c(\'auto\',\'none\'),\n+ type=\'l\', step.type=c(\'left\',\'right\'),\n+ xmethod=if(any(type==\'s\')) \'unique\' else \'grid\', \n+ offset=NULL,\n+ xlim=NULL, tilt=FALSE, window=NULL,\n+ npts=100, cex=NULL, \n+ adj=\'auto\', angle.adj.auto=30, \n+ lty=pr$lty, lwd=pr$lwd, col.=pr$col,\n+ transparent=TRUE, arrow.factor=1, \n+ point.inc=NULL, opts=NULL, key.opts=NULL, \n+ empty.method=c(\'area\',\'maxdim\'), \n+ numbins=25, \n+ pl=!missing(add), add=FALSE, \n+ ylim=NULL, xlab="", ylab="",\n+ whichLabel=1:length(curves),\n+ grid=FALSE, xrestrict=NULL, ...)\n+{\n+ if(pl && !add) {\n+ plot.new(); par(new=TRUE) # enables strwidth etc.\n+ }\n+ \n+ oxpd <- par(\'xpd\')\n+ par(xpd=NA)\n+ on.exit(par(xpd=oxpd))\n+ \n+ gfun <- ordGridFun(grid) ## see Misc.s\n+ gun <- gfun$unit\n+\n+ diffu <- function(v) diff(unclass(v)) # mainly for POSIXt\n+ ## also look at difftime\n+ \n+ mcurves <- missing(curves)\n+\n+ pr <- par(c(\'cex\',\'col\',\'lwd\',\'lty\'))\n+\n+ if(!mcurves)\n+ {\n+ nc <- length(curves)\n+ type <- rep(type, length=nc)\n+ lty <- rep(lty, length=nc)\n+ lwd <- rep(lwd, length=nc)\n+ col. <- rep(col., length=nc)\n+ for(i in 1:nc)\n+ {\n+ z <- curves[[i]]\n+ if(pl && !add)\n+ {\n+ if(i==1)\n+ {\n+ xlm <- range(z[[1]],na.rm=TRUE)\n+ ylm <- range(z[[2]],na.rm=TRUE)\n+ }\n+ else\n+ {\n+ xlm <- range(xlm,z[[1]],na.rm=TRUE)\n+ ylm <- range(ylm,z[[2]],na.rm=TRUE)\n+ }\n+ }\n+ if(length(a <- z$type)) type[i] <- a\n+ if(length(a <- z$lty)) lty[i] <- a\n+ if(length(a <- z$lwd)) lwd[i] <- a\n+ if(length(a <- z$col)) col.[i] <- a\n+ }\n+ }\n+\n+ ## Optionally bring arguments from opts as if they were listed outside opts\n+ ## This is used when opts is passed through to a function calling labcurve\n+ if(length(opts) && is.list(opts))\n+ {\n+ names.opts <- names(opts)\n+ full.names <- c(\'labels\',\'method\',\'keys\',\'keyloc\',\'type\',\'step.type\',\n+ \'xmethod\',\'offset\',\'xlim\',\'tilt\',\'window\',\'npts\',\'cex\',\n+ \'adj\',\'angle.adj.auto\',\'lty\',\'lwd\',\'col.\',\'n.auto.keyloc\',\n+ \'transparent\',\'arrow.factor\',\'point.inc\',\'key.opts\',\n+ \'empty.method\',\'numbins\',\'ylim\',\'xlab\',\'ylab\')\n+ i <- charmatch(names.opts, full.names, -1)\n+ if(any(i < 1))\n+ stop(paste(\'Illegal elements in opts:\',\n+ paste(names.opts[i < 1], collapse=\' \')))\n+ \n+ for(j in 1:length(opts)) assign(full.names[i[j]],opts[[j]],immediate=TRUE)\n+ }\n+\n+ if(mcurves) nc <- length(labels)\n+ else if(!is.logical(labels) && nc != length(labels))\n+ stop(\'length of labels is not equal to # curves\')\n+\n+ type <- rep(type, length=nc)\n+ lty <- rep(lty, length=nc)\n+ lwd <- rep(lwd, length=nc)\n+ col. <- rep(col., length=nc)\n+\n+ if(pl)\n+ {\n+ if(mcurves) stop(\'curves must be given if pl=T\')\n+ \n+ if(!add)\n+ {\n+ if(!length(xlim)) xlim <- xlm\n+ if(!length(ylim)) ylim <- ylm\n+ \n+ namcur <- names(curves[[1]])\n+ if(!is.expression(xlab) && xlab==\'\' && length(namcur))\n+ xlab <- namcur[1]\n+ \n+ if(!is.expression(ylab) && ylab==\'\' && length(namcur))\n+ ylab <- namcur[2]\n+ \n+ if(grid) stop("grid=TRUE when pl=TRUE is not yet implemented")\n+ else\n+ plot(0, 0, xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab,\n+ type=\'n\', '..b'ure(list(W, xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim,\n+ ticks=ticks, key=key, keyloc=keyloc, opts=opts),\n+ class=\'drawPlot\')\n+}\n+\n+\n+bezier <- function(x, y, xlim, evaluation=100)\n+{\n+ if(missing(y)) {\n+ y <- x[[2]]\n+ x <- x[[1]]\n+ }\n+ \n+ n <- length(x)\n+ X <- Y <- single(evaluation)\n+ Z <- seq(0, 1, length=evaluation)\n+ X[1] <- x[1];\n+ X[evaluation] <- x[n]\n+ Y[1] <- y[1];\n+ Y[evaluation] <- y[n]\n+ for(i in 2:(evaluation-1)) {\n+ z <- Z[i]\n+ xz <- yz <- 0\n+ const <- (1 - z)^(n-1)\n+ for(j in 0:(n-1)) {\n+ xz <- xz + const*x[j+1]\n+ yz <- yz + const*y[j+1]\n+ const <- const* (n-1-j)/(j+1) * z/(1-z)\n+ if(is.na(const)) prn(c(i,j,z))\n+ }\n+ \n+ X[i] <- xz; Y[i] <- yz\n+ }\n+ \n+ list(x=as.single(X), y=as.single(Y))\n+}\n+\n+\n+plot.drawPlot <- function(x, xlab, ylab, ticks,\n+ key=x$key, keyloc=x$keyloc, ...)\n+{\n+ if(missing(xlab)) xlab <- x$xlab\n+ \n+ if(missing(ylab)) ylab <- x$ylab\n+ \n+ xlim <- x$xlim\n+ ylim <- x$ylim\n+ if(missing(ticks)) ticks <- x$ticks\n+ \n+ plot(xlim, ylim, xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab,\n+ type=\'n\', axes=ticks==\'xy\')\n+ switch(ticks,\n+ none={\n+ axis(1, at=xlim, labels=FALSE)\n+ axis(2, at=ylim, labels=FALSE)\n+ },\n+ x={\n+ axis(1)\n+ axis(2, at=ylim, labels=FALSE)\n+ },\n+ y={\n+ axis(1, at=xlim, labels=FALSE)\n+ axis(2)\n+ },\n+ xy= )\n+\n+ data <- x[[1]]\n+ m <- length(data)\n+ type <- label <- rep(\'\', m)\n+ lty <- lwd <- pch <- cex <- rep(NA, m)\n+ curves <- vector(\'list\', m)\n+ i <- 0\n+ for(j in 1:m) {\n+ w <- data[[j]]\n+ if(attr(w, \'class\') == \'Abline\') {\n+ do.call("abline", unclass(w))\n+ next\n+ }\n+ \n+ i <- i + 1\n+ if(is.function(w$type))\n+ w$type <- \'l\'\n+ \n+ curves[[i]] <-\n+ if(!key)\n+ w$points\n+ else switch(w$type,\n+ step = approx(w$points,\n+ xout=seq(min(w$points$x),max(w$points$x),\n+ length=50),\n+ method=\'constant\', f=0),\n+ linear = approx(w$points,\n+ xout=seq(min(w$points$x),max(w$points$x),\n+ length=50)),\n+ w$points)\n+ \n+ label[i] <- w$label\n+ switch(attr(w, \'class\'),\n+ Points = {\n+ type[i] <- w$type\n+ pch[i] <- w$pch\n+ cex[i] <- w$cex\n+ switch(w$type,\n+ p = points(w$points, cex=w$cex, pch=w$pch),\n+ r = scat1d(w$points$x, side=1))\n+ switch(w$rug,\n+ x = scat1d(w$points$x, side=1),\n+ y = scat1d(w$points$y, side=2),\n+ xy = {\n+ scat1d(w$points$x, side=1)\n+ scat1d(w$points$y, side=2)\n+ },\n+ none = )\n+ },\n+ Curve = {\n+ type[i] <- if(w$type==\'step\') \'s\' else \'l\'\n+ \n+ lty[i] <- w$lty\n+ lwd[i] <- w$lwd\n+ lines(w$points, lty=w$lty, lwd=w$lwd, type=type[i])\n+ })\n+ }\n+\n+ if(i < m) {\n+ curves <- curves[1:i]\n+ label <- label[1:i]\n+ type <- type[1:i]\n+ pch <- pch[1:i]\n+ lty <- lty[1:i]\n+ lwd <- lwd[1:i]\n+ cex <- cex[1:i]\n+ }\n+ \n+ if(key && !length(keyloc))\n+ stop(\'you may not specify key=T unless key=T was specified to drawPlot or keyloc is specified to plot\')\n+\n+ if(any(label!=\'\')) {\n+ j <- type!=\'r\'\n+ if(any(j)) {\n+ if(key) putKey(keyloc, labels=label[j],\n+ type=type[j], pch=pch[j],\n+ lty=lty[j], lwd=lwd[j], cex=cex[j])\n+ else\n+ labcurve(curves[j], type=type[j],\n+ lty=lty[j], lwd=lwd[j], labels=label[j], opts=x$opts)\n+ }\n+ }\n+ \n+ invisible()\n+}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/label.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/label.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,326 @@\n+##"label<-" <- function(x, value) {\n+## attr(x, "label") <- value\n+## x\n+##}\n+\n+label <- function(x, default=NULL, ...) UseMethod("label")\n+\n+label.default <- function(x, default=NULL, units=FALSE, plot=FALSE,\n+ grid=FALSE, ...)\n+{\n+ if(length(default) > 1)\n+ stop("the default string cannot be of length greater then one")\n+ \n+ at <- attributes(x)\n+ lab <- at$label\n+ if(length(default) && (!length(lab) || lab==\'\'))\n+ lab <- default\n+ \n+ un <- at$units\n+ labelPlotmath(lab,\n+ if(units) un else NULL,\n+ plotmath=plot, grid=grid)\n+}\n+\n+label.Surv <- function(x, default=NULL, units=FALSE,\n+ plot=FALSE, grid=FALSE,\n+ type=c(\'any\', \'time\', \'event\'), ...)\n+{\n+ type <- match.arg(type)\n+ \n+ if(length(default) > 1)\n+ stop("the default string cannot be of length greater then one")\n+ \n+ at <- attributes(x)\n+ lab <- at$label\n+ ia <- at$inputAttributes\n+ if((! length(lab) || lab == \'\') && length(ia)) {\n+ poss <- switch(type,\n+ any = c(ia$event$label, ia$time2$label, ia$time$label),\n+ time = c( ia$time2$label, ia$time$label),\n+ event = ia$event$label )\n+ for(lb in poss)\n+ if(! length(lab) && lb != \'\') lab <- lb\n+ }\n+ \n+ if(length(default) && (!length(lab) || lab==\'\')) lab <- default\n+ \n+ un <- NULL\n+ if(units) {\n+ un <- at$units\n+ if(! length(un) && length(ia)) {\n+ un <- ia$time2$units\n+ if(! length(un)) un <- ia$time$units\n+ }\n+ }\n+\n+ labelPlotmath(lab, un,\n+ plotmath=plot, grid=grid)\n+}\n+\n+\n+\n+label.data.frame <- function(x, default=NULL, self=FALSE, ...) {\n+ if(self) {\n+ label.default(x)\n+ } else {\n+ if(length(default) > 0 && length(default) != length(x)) {\n+ stop(\'length of default must same as x\')\n+ } else if(length(default) == 0) {\n+ default <- list(default)\n+ }\n+ \n+ labels <- mapply(FUN=label, x=x, default=default, MoreArgs=list(self=TRUE), USE.NAMES=FALSE)\n+ names(labels) <- names(x)\n+ return(labels)\n+ }\n+}\n+\n+labelPlotmath <- function(label, units=NULL, plotmath=TRUE, grid=FALSE)\n+{\n+ if(!length(label)) label <- \'\'\n+ \n+ if(!length(units) || (length(units)==1 && is.na(units))) units <- \'\'\n+ \n+ g <-\n+ if(plotmath) function(x, y=NULL, xstyle=NULL, ystyle=NULL)\n+ {\n+ h <- function(w, style=NULL)\n+ if(length(style))\n+ paste(style,\'(\',w,\')\',sep=\'\')\n+ else\n+ w\n+\n+ tryparse <- function(z, original)\n+ {\n+ p <- try(parse(text=z), silent=TRUE)\n+ if(is.character(p)) original else p\n+ }\n+ if(!length(y))\n+ return(tryparse(h(plotmathTranslate(x), xstyle), x))\n+ \n+ w <- paste(\'list(\',h(plotmathTranslate(x), xstyle), \',\',\n+ h(plotmathTranslate(y), ystyle), \')\', sep=\'\')\n+ tryparse(w, paste(x, y))\n+ } else function(x, y=NULL, ...) if(length(y)) paste(x,y) else x\n+\n+ if(units==\'\') g(label)\n+ else if(label==\'\') g(units)\n+ else if(plotmath)\n+ g(label, units, ystyle=\'scriptstyle\')\n+ else paste(label,\' [\',units,\']\',sep=\'\')\n+}\n+\n+\n+plotmathTranslate <- function(x)\n+{\n+ if(length(grep(\'paste\', x))) return(x)\n+ \n+ specials <- c(\' \',\'%\',\'_\')\n+ spec <- FALSE\n+ for(s in specials)\n+ if(length(grep(s,x)))\n+ spec <- TRUE\n+ \n+ if(spec) x <- paste(\'paste("\',x,\'")\',sep=\'\')\n+ else if(substring(x,1,1)==\'/\') x <- paste(\'phantom()\', x, sep=\'\')\n+ x\n+}\n+\n+labelLatex <- function(x=NULL, label=\'\', units=\'\', size=\'smaller[2]\',\n+ hfill=FALSE, bold=FALSE, default=\'\', double=FALSE) {\n+ if(length(x)) {\n+ if(label == \'\') label <- label(x)\n+ if(units == \'\') units <- units(x)\n+ }\n+ if(default == \'\' && length(x)) default <- deparse(substitute(x))\n+ if(label == \'\') return(default)\n+\n+ label <- latexTranslate(label)\n+ bs <- if(double) \'\\\\\\\\\' else \'\\\\\'\n+ if(bold'..b'bel, \'}\', sep=\'\')\n+ if(units != \'\') {\n+ units <- latexTranslate(units)\n+ if(length(size) && size != \'\')\n+ units <- paste(\'{\', bs, size, \' \', units, \'}\', sep=\'\')\n+ if(hfill) units <- paste(bs, \'hfill \', units, sep=\'\')\n+ else\n+ units <- paste(\' \', units, sep=\'\')\n+ label <- paste(label, units, sep=\'\')\n+ }\n+ label\n+}\n+\n+"label<-" <- function(x, ..., value) UseMethod("label<-")\n+\n+##From Bill Dunlap, StatSci 15Mar95:\n+"label<-.default" <- function(x, ..., value)\n+{\n+ if(is.list(value)) {\n+ stop("cannot assign a list to be a object label")\n+ }\n+ \n+ if(length(value) != 1L) {\n+ stop("value must be character vector of length 1")\n+ }\n+\n+ attr(x, \'label\') <- value\n+\n+ if(\'labelled\' %nin% class(x)) {\n+ class(x) <- c(\'labelled\', class(x))\n+ }\n+ return(x)\n+}\n+## } else function(x, ..., value)\n+## {\n+## ## Splus 5.x, 6.x\n+## ## oldClass(x) <- unique(c(\'labelled\', oldClass(x),\n+## ## if(is.matrix(x))\'matrix\'))\n+## attr(x,\'label\') <- value\n+## return(x)\n+## }\n+\n+"label<-.data.frame" <- function(x, self=TRUE, ..., value) {\n+ if(!is.data.frame(x)) {\n+ stop("x must be a data.frame")\n+ }\n+\n+ if(missing(self) && is.list(value)) {\n+ self <- FALSE\n+ }\n+ \n+ if(self) {\n+ xc <- class(x)\n+ xx <- unclass(x)\n+ label(xx) <- value\n+ class(xx) <- xc\n+ return(xx)\n+ } else {\n+ if(length(value) != length(x)) {\n+ stop("value must have the same length as x")\n+ }\n+\n+ for (i in seq(along.with=x)) {\n+ label(x[[i]]) <- value[[i]]\n+ }\n+ }\n+\n+ return(x)\n+}\n+\n+"[.labelled"<- function(x, ...) {\n+ tags <- valueTags(x)\n+ x <- NextMethod("[")\n+ valueTags(x) <- tags\n+ x\n+}\n+\n+"print.labelled"<- function(x, ...) {\n+ x.orig <- x\n+ u <- attr(x,\'units\')\n+ if(length(u))\n+ attr(x,\'units\') <- NULL # so won\'t print twice\n+ \n+ cat(attr(x, "label"),\n+ if(length(u))\n+ paste(\'[\', u, \']\', sep=\'\'),\n+ "\\n")\n+ \n+ attr(x, "label") <- NULL\n+ class(x) <-\n+ if(length(class(x))==1 && class(x)==\'labelled\')\n+ NULL\n+ else\n+ class(x)[class(x) != \'labelled\']\n+ \n+ ## next line works around print bug\n+ if(!length(attr(x,\'class\')))\n+ attr(x,\'class\') <- NULL\n+ \n+ NextMethod("print")\n+ invisible(x.orig)\n+}\n+\n+\n+as.data.frame.labelled <- as.data.frame.vector\n+\n+Label <- function(object, ...) UseMethod("Label")\n+\n+\n+Label.data.frame <- function(object, file=\'\', append=FALSE, ...)\n+{\n+ nn <- names(object)\n+ for(i in 1:length(nn)) {\n+ lab <- attr(object[[nn[i]]],\'label\')\n+ lab <- if(length(lab)==0) \'\' else lab\n+ cat("label(",nn[i],")\\t<- \'",lab,"\'\\n", \n+ append=if(i==1)\n+ append\n+ else\n+ TRUE,\n+ file=file, sep=\'\')\n+ }\n+ \n+ invisible()\n+}\n+\n+\n+reLabelled <- function(object)\n+{\n+ for(i in 1:length(object))\n+ {\n+ x <- object[[i]]\n+ lab <- attr(x, \'label\')\n+ cl <- class(x)\n+ if(length(lab) && !any(cl==\'labelled\')) {\n+ class(x) <- c(\'labelled\',cl)\n+ object[[i]] <- x\n+ }\n+ }\n+ \n+ object\n+}\n+\n+\n+llist <- function(..., labels=TRUE)\n+{\n+ dotlist <- list(...)\n+ lname <- names(dotlist)\n+ name <- vname <- as.character(sys.call())[-1]\n+ for(i in 1:length(dotlist))\n+ {\n+ vname[i] <-\n+ if(length(lname) && lname[i]!=\'\')\n+ lname[i]\n+ else\n+ name[i]\n+ \n+ ## R barked at setting vname[i] to NULL\n+ lab <- vname[i]\n+ if(labels)\n+ {\n+ lab <- attr(dotlist[[i]],\'label\')\n+ if(length(lab) == 0)\n+ lab <- vname[i]\n+ }\n+ \n+ label(dotlist[[i]]) <- lab\n+ }\n+ \n+ names(dotlist) <- vname[1:length(dotlist)]\n+ dotlist\n+}\n+\n+combineLabels <- function(...)\n+ {\n+ w <- list(...)\n+ labs <- sapply(w[[1]], label)\n+ lw <- length(w)\n+ if(lw > 1) for(j in 2:lw)\n+ {\n+ lab <- sapply(w[[j]], label)\n+ lab <- lab[lab != \'\']\n+ if(length(lab)) labs[names(lab)] <- lab\n+ }\n+ labs[labs != \'\']\n+ }\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/latex.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/latex.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,1354 @@\n+first.word <- function(x, i=1, expr=substitute(x))\n+{\n+ words <- if(!missing(x)) as.character(x)[1]\n+ else\n+ as.character(unlist(expr))[1]\n+ \n+ if(i > 1) stop(\'i > 1 not implemented\')\n+ \n+ chars <- substring(words, 1 : nchar(words), 1 : nchar(words))\n+ legal.chars <- c(letters, LETTERS, \'.\',\n+ \'0\',\'1\',\'2\',\'3\',\'4\',\'5\',\'6\',\'7\',\'8\',\'9\')\n+ non.legal.chars <- (1:length(chars))[chars %nin% legal.chars]\n+ if(!any(non.legal.chars)) return(words)\n+ \n+ if(non.legal.chars[1] == 1) return(character(0))\n+ \n+ substring(words, 1, non.legal.chars[1] - 1)\n+}\n+\n+\n+##1. if x is a data.frame, then do each component separately.\n+##2. if x is a matrix, but not a data.frame, make it a data.frame\n+## with individual components for the columns.\n+##3. if a component x$x is a matrix, then do all columns the same.\n+##4. Use right justify by default for numeric columns.\n+##5. Use left justify for non-numeric columns.\n+\n+## The following are made complicated by matrix components of data.frames:\n+##6. vector cdec must have number of items equal to number of columns\n+## of input x.\n+##7. matrix dec must have number of columns equal to number of columns\n+## of input x.\n+##8. scalar dec is expanded to a vector cdec with number of items equal\n+## to number of columns of input x.\n+##9. vector rdec must have number of items equal to number of rows of input x.\n+## rdec is expanded to matrix dec.\n+##10. col.just must have number of columns equal to number of columns\n+## of output cx.\n+\n+## Value:\n+## character matrix with character images of properly rounded x.\n+## matrix components of input x are now just sets of columns of character matrix.\n+## attr(,col.just) repeats input col.just when provided.\n+##\tOtherwise, recommended justification for columns of output.\n+##\tDefault is "l" for characters and factors, "r" for numeric.\n+##\tWhen dcolumn==T, numerics will have ".".\n+\n+\n+## FEH 21May96 - changed default for numeric.dollar to cdot\n+## FEH 5Jun96 - re-written to not rely on as.data.frame,\n+## converted data frames to matrices the slow way\n+## added matrix.sep \n+## 12Aug99 - allowed # decimal places=NA (no rounding, just use format())\n+## 27May02 - added booktabs FEH\n+## 13Dec02 - added ctable FEH\n+## arguments included check.names=TRUE 23jan03\n+\n+format.df <- function(x,\n+ digits, dec=NULL, rdec=NULL, cdec=NULL,\n+ numeric.dollar=!dcolumn, na.blank=FALSE,\n+ na.dot=FALSE, blank.dot=FALSE, col.just=NULL,\n+ cdot=FALSE, dcolumn=FALSE, matrix.sep=\' \',\n+ scientific=c(-4,4), math.row.names=FALSE,\n+ math.col.names=FALSE, double.slash=FALSE,\n+ format.Date=\'%m/%d/%Y\',\n+ format.POSIXt="%m/%d/%Y %H:%M:%OS", ...)\n+{\n+ sl <- ifelse(double.slash, "\\\\\\\\", "\\\\")\n+\n+ cleanLatex <- function(string) {\n+ if(!is.character(string))\n+ string <- as.character(string)\n+ \n+ ## Find strings not in math mode (surrounded by $)\n+ s <- gsub("(^[[:space:]]+)|([[:space:]]+$)", "", string)\n+ k <- !(substring(s, 1, 1) == \'$\' & substring(s, nchar(s)) == \'$\')\n+ k <- k & !is.na(k)\n+ \n+ if(!any(k)) return(string)\n+\n+ inn <- c(\'< =\', \'> =\', \'<=\', \'>=\', \'<\', \'>\',\n+ \'\\\\\\\\%\', \'%\', \n+ \'\\\\\\\\&\', \'&\')\n+ out <- c(\'<=\',\n+ \'>=\',\n+ paste(\'$\', sl, sl, \'leq$\', sep=\'\'),\n+ paste(\'$\', sl, sl, \'geq$\', sep=\'\'),\n+ paste(sl, sl, \'textless\', sep=\'\'),\n+ paste(sl, sl, \'textgreater\', sep=\'\'),\n+ \'%\', paste(sl, sl, \'%\', sep=\'\'),\n+ \'&\', paste(sl, sl, \'&\', sep=\'\'))\n+ for(i in 1 : length(inn))\n+ string[k] <- gsub(inn[i], out[i], string[k])\n+ string\n+ }\n+\n+ if(numeric.dollar && dcolumn)\n+ stop(\'cannot have both numeric.dollar=TRUE and dcolumn=TRUE\')\n+ \n+ if(missing(digits))\n+ digits <- NULL\n+ \n+ if'..b'lse 5dec03\n+ ## 2 dQuote 26jan04\n+ invisible(sys(cmd))\n+}\n+\n+dvigv.dvi <- function(object, ...)\n+ invisible(sys(paste(optionsCmds(\'dvips\'), \'-f\', object$file,\n+ \'| gv - &\')))\n+\n+## added ... to dvixx.dvi calls below 1dec03\n+dvips.latex <- function(object, ...) invisible(dvips.dvi(dvi.latex(object),...))\n+dvigv.latex <- function(object, ...) invisible(dvigv.dvi(dvi.latex(object),...))\n+\n+\n+html <- function(object, ...) UseMethod(\'html\')\n+\n+\n+html.latex <- function(object, file, ...)\n+{\n+ fi <- object$file\n+ sty <- object$style\n+ \n+ if(length(sty))\n+ sty <- paste(\'\\\\usepackage{\',sty,\'}\',sep=\'\')\n+ \n+ ## pre <- tempfile(); post <- tempfile() 1dec03\n+ tmp <- tempfile()\n+ tmptex <- paste(tmp,\'tex\',sep=\'.\') # 5dec03\n+ infi <- readLines(fi)\n+ cat(\'\\\\documentclass{report}\', sty, \'\\\\begin{document}\', infi,\n+ \'\\\\end{document}\\n\', file=tmptex, sep=\'\\n\')\n+ sc <-\n+ if(.Platform$OS.type == \'unix\')\n+ \';\'\n+ else\n+ \'&\' # 7feb03\n+\n+ ## Create system call to hevea to convert temporary latex file to html.\n+ cmd <-\n+ if(missing(file)) {\n+ paste(optionsCmds(\'hevea\'), shQuote(tmptex))\n+ } else {\n+ paste(optionsCmds(\'hevea\'), \'-o\', file, shQuote(tmptex))\n+ }\n+ \n+ ## perform system call\n+ sys(cmd)\n+ ## 24nov03 dQuote\n+\n+ ## Check to see if .html tag exist and add it if\n+ ## if does not\n+ if(missing(file)) {\n+ file <- paste(tmp,\'html\',sep=\'.\')\n+ } else {\n+ if(!length(grep(".*\\\\.html", file))) {\n+ file <- paste(file, \'html\', sep=\'.\')\n+ }\n+ }\n+ \n+ structure(list(file=file), class=\'html\')\n+}\n+\n+\n+html.data.frame <-\n+ function(object,\n+ file=paste(first.word(deparse(substitute(object))),\n+ \'html\',sep=\'.\'),\n+ append=FALSE, link=NULL, linkCol=1,\n+ linkType=c(\'href\',\'name\'), ...)\n+{\n+ linkType <- match.arg(linkType)\n+ \n+ x <- as.matrix(object)\n+ for(i in 1:ncol(x))\n+ {\n+ xi <- x[,i]\n+ if(is.numeric(object[,i]))\n+ x[,i] <- paste(\'<div align=right>\',xi,\'</div>\',sep=\'\')\n+ }\n+ if(length(r <- dimnames(x)[[1]]))\n+ x <- cbind(Name=as.character(r), x)\n+ \n+ cat(\'<TABLE BORDER>\\n\', file=file, append=append)\n+ cat(\'<tr>\', paste(\'<td><h3>\', dimnames(x)[[2]], \'</h3></td>\',sep=\'\'), \'</tr>\\n\',\n+ sep=\'\', file=file, append=file!=\'\')\n+ \n+ if(length(link)) {\n+ if(is.matrix(link)) \n+ x[link!=\'\'] <- paste(\'<a \',linkType,\'="\', link[link!=\'\'],\'">\',\n+ x[link!=\'\'],\'</a>\',sep=\'\') else\n+ x[,linkCol] <- ifelse(link == \'\',x[,linkCol],\n+ paste(\'<a \',linkType,\'="\',link,\'">\',\n+ x[,linkCol],\'</a>\',sep=\'\'))\n+ }\n+\n+ for(i in 1:nrow(x))\n+ cat(\'<tr>\',paste(\'<td>\',x[i,],\'</td>\',sep=\'\'),\'</tr>\\n\',\n+ sep=\'\', file=file, append=file!=\'\')\n+\n+ cat(\'</TABLE>\\n\', file=file, append=file!=\'\')\n+ structure(list(file=file), class=\'html\')\n+}\n+\n+\n+html.default <- function(object,\n+ file=paste(first.word(deparse(substitute(object))),\n+ \'html\',sep=\'.\'),\n+ append=FALSE,\n+ link=NULL, linkCol=1, linkType=c(\'href\',\'name\'),\n+ ...)\n+{\n+ html.data.frame(object, file=file, append=append, link=link,\n+ linkCol=linkCol, linkType=linkType, ...)\n+}\n+\n+show.html <- function(object)\n+{\n+ browser <- .Options$help.browser\n+ if(!length(browser))\n+ browser <- .Options$browser\n+ \n+ if(!length(browser))\n+ browser <- \'netscape\'\n+ \n+ sys(paste(browser, object, if(.Platform$OS.type == \'unix\') \'&\'))\n+ invisible()\n+}\n+\n+print.html <- function(x, ...) show.html(x)\n+\n+latexSN <- function(x) {\n+ x <- format(x)\n+ x <- sedit(x, c(\'e+00\',\'e-0*\',\n+ \'e-*\',\n+ \'e+0*\',\n+ \'e+*\'),\n+ c(\'\',\n+ \'\\\\!\\\\times\\\\!10^{-*}\',\'\\\\!\\\\times\\\\!10^{-*}\',\n+ \'\\\\!\\\\times\\\\!10^{*}\',\'\\\\!\\\\times\\\\!10^{*}\'))\n+ x\n+}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/latexDotchart.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/latexDotchart.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,178 @@ +latexDotchart <- + function(data, labels, groups = NULL, gdata = NA, + xlab = "", auxdata, auxgdata=NULL, auxtitle, + w=4, h=4, margin, lines = TRUE, dotsize = .075, size='small', + size.labels = 'small', size.group.labels = 'normalsize', + ttlabels = FALSE, sort.=TRUE, xaxis=TRUE, lcolor='gray', + ...) +{ + txt <- function(x, y, s, size=NULL, just=c('c','l','r'), tt=FALSE) { + just <- match.arg(just) + n <- max(length(x), length(y), length(s)) + x <- rep(x, length.out=n) + y <- rep(y, length.out=n) + s <- rep(s, length.out=n) + z <- character(n) + if(tt) s <- paste('\\texttt{', s, '}', sep='') + if(length(size)) s <- paste('\\', size, ' ', s, sep='') + for(i in 1:n) + z[i] <- sprintf('\\put(%g,%g){\\makebox(.001,.001)[%s]{%s}}', + x[i], y[i], just, s[i]) + z + } + ln <- function(x1, y1, x2, y2, color='black') { + n <- max(length(x1), length(x2), length(y1), length(y2)) + x1 <- rep(x1, length.out=n) + y1 <- rep(y1, length.out=n) + x2 <- rep(x2, length.out=n) + y2 <- rep(y2, length.out=n) + z <- character(n) + for(i in 1:n) + z[i] <- if(x1[i]==x2[i]) + sprintf('\\put(%g,%g){\\line(0,%g){%g}}', x1[i], y1[i], + 1*(y2[i] >= y1[i]) - 1*(y2[i] < y1[i]), abs(y2[i]-y1[i])) + else if(y1[i]==y2[i]) + sprintf('\\put(%g,%g){\\line(%g,0){%g}}', x1[i], y1[i], + 1*(x2[i] >= x1[i]) - 1*(x2[i] < x1[i]), abs(x2[i]-x1[i])) + else + sprintf('\\drawline(%g,%g)(%g,%g)', + x1[i], y1[i], x2[i], y2[i]) + if(color != 'black') + z <- c(if(color=='gray') '\\color[gray]{0.8}' else + sprintf('\\color{%s}', color), + z, '\\color{black}') + z + } + ## Approximate length in inches of longest char. string + acl <- function(s) 0.09 * max(nchar(s)) + f <- sprintf + + if(size.labels == size) size.labels <- NULL + if(size.group.labels == size) size.group.labels <- NULL + + z <- c('\\setlength{\\unitlength}{1in}', + f('\\begin{picture}(%g,%g)', w, h), + f('\\%s', size)) + + ndata <- length(data) + if(missing(labels)) + { + if(length(names(data))) + labels <- names(data) + else labels <- paste("#", seq(along = ndata)) + } + else labels <- rep(as.character(labels), length = ndata) + + if(missing(groups)) { + glabels <- NULL + gdata <- NULL + if(sort.) { + ord <- order(-data) + data <- data[ord] + labels <- labels[ord] + if(!missing(auxdata)) auxdata <- auxdata[ord] + } + } else { + if(!sort.) { + ##assume data sorted in groups, but re-number groups + ##to be as if groups given in order 1,2,3,... + ug <- unique(as.character(groups)) + groups <- factor(as.character(groups), levels=ug) + } + + groups <- unclass(groups) + glabels <- levels(groups) + gdata <- rep(gdata, length = length(glabels)) + ord <- if(sort.) order(groups, -data) else + order(groups, seq(along = groups)) + groups <- groups[ord] + data <- data[ord] + labels <- labels[ord] + if(!missing(auxdata)) auxdata <- auxdata[ord] + } + + alldat <- c(data, gdata) + if(!missing(auxdata)) + auxdata <- format(c(auxdata, auxgdata)) + + alllab <- c(labels, glabels) + ## set up margins and user coordinates, draw box + + xl <- range(p <- pretty(alldat)) + yl <- c(1, length(alldat)) + + if(missing(margin)) + margin <- c(acl(alllab), + ifelse(xlab == '', .2, .4), + ifelse(missing(auxdata), 0, acl(auxdata)), + ifelse(missing(auxtitle), 0, .1)) + + + xt <- function(x) round((w - sum(margin[c(1,3)]))*(x - xl[1])/diff(xl) + + margin[1], 5) + yt <- function(y) round((h - sum(margin[c(2,4)]))*(y - yl[1])/diff(yl) + + margin[2], 5) + + ## \color screws up line and circle placement if first multiputlist + ## and put occur after \color + if(xaxis) { + z <- c(z, paste(f('\\multiputlist(%g,%g)(%g,%g){', + xt(xl[1]), yt(yl[1]) - .15, diff(xt(p[1:2])), 0), + paste(p, collapse=','), '}', sep='')) + z <- c(z, ln(xt(p), yt(yl[1]) - 0.05, xt(p), yt(yl[1]))) + if(xlab != '') + z <- c(z, txt(xt(xl[1] + diff(xl)/2), .1, xlab)) + } + + z <- c(z, ln(xt(xl), yt(yl[1]), xt(xl), yt(yl[2])), + ln(xt(xl[1]), yt(yl), xt(xl[2]), yt(yl))) + + den <- ndata + 2 * length(glabels) + 1 + + delt <- ( - (yl[2] - yl[1]))/den + ypos <- seq(yl[2], by = delt, length = ndata) + + if(!missing(groups)) + { + ypos1 <- ypos + 2 * delt * (if(length(groups)>1) + cumsum(c(1, diff(groups) > 0)) + else 1) + diff2 <- c(3 * delt, diff(ypos1)) + ypos2 <- ypos1[abs(diff2 - 3 * delt) < abs(0.001 * delt)] - + delt + ypos <- c(ypos1, ypos2) - delt + } + + ##put on labels and data + ypos <- ypos + delt + nongrp <- 1:ndata + + if(lines) + z <- c(z, ln(xt(xl[1]), yt(ypos[nongrp]), xt(xl[2]), yt(ypos[nongrp]), + color=lcolor)) + + for(i in seq(along = alldat)) + if(!is.na(alldat[i] + ypos[i])) + z <- c(z, f('\\put(%g,%g){\\circle*{%g}}', + xt(alldat[i]), yt(ypos[i]), dotsize)) + + if(!missing(auxdata)) + { + z <- c(z, txt(w - 0.02, yt(ypos[nongrp]), auxdata, + size=size.labels, just='r')) + if(!missing(auxtitle)) + z <- c(z, txt(w - 0.02, yt(yl[2]) + 0.1, auxtitle, + size=size.labels, just='r')) + } + labng <- alllab[nongrp] + yposng <- ypos[nongrp] + + z <- c(z, txt(margin[1] - 0.05, yt(yposng), labng, + size=size.labels, just='r', tt=ttlabels)) + if(!missing(groups)) + z <- c(z, txt(margin[1] - 0.05, yt(ypos[-nongrp]), alllab[-nongrp], + size=size.group.labels, just='r')) + + z <- c(z, '\\end{picture}') + z +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/latexTabular.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/latexTabular.s Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,26 @@ +latexTabular <- function(x, headings=colnames(x), + align =paste(rep('c',ncol(x)),collapse=''), + halign=paste(rep('c',ncol(x)),collapse=''), + helvetica=TRUE, translate=TRUE, hline=0, ...) +{ + x <- latexTranslate(x) + if(length(list(...))) x <- format.df(x, ...) + xhalign <- substring(halign, 1:nchar(halign), 1:nchar(halign)) + w <- paste('\\begin{tabular}{', align, '}', sep='') + if(hline == 2) w <- paste(w, '\\hline', sep='') + if(helvetica) w <- paste('{\\fontfamily{phv}\\selectfont', w, sep='') + if(length(headings)) { + if(translate) headings <- latexTranslate(headings) + h <- if(halign != align) + paste(paste(paste('\\multicolumn{1}{', xhalign, '}{', + headings, '}',sep=''), + collapse='&'), '\\\\', sep='') + else paste(paste(headings, collapse='&'), '\\\\', sep='') + } + if(hline == 1) h <- paste(h, '\\hline', sep='') + if(hline == 2) h <- paste(h, '\\hline\\hline', sep='') + v <- apply(x, 1, paste, collapse='&') + v <- paste(paste(v, '\\\\', if(hline == 2) '\\hline'), collapse='\n') + if(length(headings)) v <- paste(h, v, sep='\n') + paste(w, v, '\\end{tabular}', if(helvetica)'}', sep='\n') +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/latexTherm.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/latexTherm.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,84 @@ +latexTherm <- function(y, name, w=.075, h=.15, spacefactor=1/2, extra=.07, + file='', append=TRUE) { + ct <- function(..., append=TRUE) cat(..., file=file, append=append, sep='') + ct('\\def\\', name, '{\n', append=append) + tab <- attr(y, 'table') + if(length(tab)) { + ct('\\protect\\tooltipn{\n') + } + ct('\\setlength{\\unitlength}{.001in}\n') + k <- length(y) + W <- k * w + (k-1) * spacefactor * w + z <- function(a) round(a * 1000) + ct('\\begin{picture}(', z(W + extra), ',', z(h + extra), ')\n') + x <- 0 + for(i in 1 : k) { + b <- y[i] + if(! is.na(b)) { + if(b < 1) { # Draw frame if not completely filled + ct('\\linethickness{.05pt}\n') + ct('\\put(', z(x), ', 0){\\line(1, 0){', z(w), '}}\n') + ct('\\put(', z(x + w), ', 0){\\line(0, 1){', z(h), '}}\n') + ct('\\put(', z(x + w), ',', z(h), '){\\line(-1, 0){', z(w), '}}\n') + ct('\\put(', z(x), ',', z(h), '){\\line(0, -1){', z(h), '}}\n') + } + if(b > 0) { + ct('\\linethickness{', w, 'in}\n') + ct('\\put(', z(x + w / 2), ', 0){\\line(0,1){', z(h * b), '}}\n') + } + } + x <- x + w + spacefactor * w + } + + ct('\\end{picture}', + if(length(tab)) '}{\n', + tab, + if(length(tab)) '}', + '}\n') +} + +latexNeedle <- function(y, col='black', href=0.5, name, w=.05, h=.15, + extra=0, file='', append=TRUE) { + ct <- function(..., append=TRUE) cat(..., file=file, append=append, sep='') + ct('\\def\\', name, '{%\n', append=append) + tab <- attr(y, 'table') + if(length(tab)) { + ct('\\protect\\tooltipn{%\n') + } + ct('\\setlength{\\unitlength}{.001in}%\n') + k <- length(y) + col <- rep(col, length=k) + W <- max(k, 2) * w + z <- function(a) round(a * 1000) + ct('\\begin{picture}(', z(W + extra), ',', z(h), ')%\n') + + ## Draw grayscale frame + ct('\\linethickness{.05pt}\\color[gray]{0.85}%\n') + ct('\\put(0,0){\\line(1,0){', z(W), '}}%\n') + # ct('\\put(', z(W), ',0){\\line(0,1){', z(h), '}}%\n') + ct('\\put(', z(W), ',', z(h), '){\\line(-1,0){', z(W), '}}%\n') + # ct('\\put(0,', z(h), '){\\line(0,-1){', z(h), '}}%\n') + + ## Draw horizontal reference lines + if(length(href)) for(hr in href) + ct('\\put(0,', z(h * hr), '){\\line(1,0){', z(W), '}}%\n') + + ## Draw vertical needles + x <- w / 2 + ct('\\linethickness{1.55pt}%\n') + for(i in 1 : k) { + b <- y[i] + if(! is.na(b)) { + co <- paste(round(col2rgb(col[i]) / 255, 3), collapse=',') + ct('\\color[rgb]{', co, '}') + ct('\\put(', z(x), ',0){\\line(0,1){', z(h * b), '}}%\n') + } + x <- x + w + } + + ct('\\end{picture}', + if(length(tab)) '}{%\n', + tab, + if(length(tab)) '}', + '}%\n') +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/list.tree.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/list.tree.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,123 @@ +list.tree <- function(struct,depth=-1, numbers=FALSE, maxlen=22, + maxcomp=12, attr.print=TRUE, front="", + fill=". ", name.of, size=TRUE) +{ + if(depth==0) + return() + + opts <- options(digits=5) + on.exit(options(opts)) + if (missing(name.of)) + name.of <- deparse(substitute(struct)) + + len <- length(struct) + cat(front,name.of,"=",storage.mode(struct),len) + if(size) + cat(" (",object.size(struct)," bytes)",sep="") + + if(is.array(struct)) + cat("=", + if(length(dimnames(struct))) + "named", + "array",paste(dim(struct),collapse=" X ")) + + if(is.ts(struct)) cat("= time series",tsp(struct)) + if(is.factor(struct)) + cat("= factor (",length(levels(struct))," levels)",sep="") + + if(length(attr(struct,'class'))>0) + cat("(",attr(struct,'class'),")") + + if(is.atomic(struct) && !is.character(struct)&& len>0 && maxlen>0) { + field <- "=" + for(i in 1:length(struct)) { + field <- paste(field,format(as.vector(struct[i]))) + if(nchar(field)>maxlen-6) { + field <- paste(field,"..."); + break + } + } + + cat(field,"\n",sep="") + } else if(is.character(struct) && len>0 && maxlen>0) + cat("=",substring(struct[1:(last <- max(1,(1:len) + [cumsum(nchar(struct)+1)<maxlen]))],1,maxlen), + if(last<len) + " ...","\n") + else cat("\n") + + if (mode(struct)=="list" && len>0) { + structnames <- names(struct) + if(!length(structnames)) + structnames <- rep("",len) + + noname <- structnames=="" + structnames[noname] <- + paste("[[",(1:length(structnames))[noname],"]]",sep="") + for (i in 1:min(length(structnames),maxcomp)) + if (mode(struct[[i]])=="argument" | mode(struct[[i]])=="unknown") + cat(front,fill," ",structnames[i]," = ", + as.character(struct[[i]])[1],"\n",sep="") + else + list.tree(struct[[i]],depth=depth-1,numbers,maxlen,maxcomp, + attr.print, + if(numbers) + paste(front,i,sep=".") + else paste(front,fill,sep=""), + + fill,structnames[i],size=FALSE) + + if(length(structnames)>maxcomp) + cat(front,fill," ... and ",length(structnames)-maxcomp, + " more\n",sep="") + } + + attribs <- attributes(struct) + attribnames <- names(attribs) + if(length(attribnames)>0 && attr.print) + for (i in (1:length(attribnames)) + [attribnames!="dim" & attribnames!="dimnames" & + attribnames!="levels" & attribnames!="class" & + attribnames!="tsp" & + (attribnames!="names" | mode(struct)!="list")]) + list.tree(attribs[[i]],depth-1,numbers,maxlen,maxcomp,attr.print, + if(numbers) + paste(front,i,sep="A") + else paste(front,"A ",sep=""), + + fill,attribnames[i],size=FALSE) + + invisible() +} + + +############################################################################## +expr.tree <- function(struct,front="",fill=". ",name.of,numbers=FALSE,depth=-1, + show.comment=FALSE) +{ + if (missing(name.of)) + name.of <- deparse(substitute(struct)) + else if(is.atomic(struct) | is.name(struct)) + name.of <- paste(name.of,deparse(struct)) + + cat(front,"",name.of,"=",mode(struct),length(struct),"\n") + if(depth!=0 && is.recursive(struct) ) { + structlength <- length(struct) + structnames <- names(struct) + if(length(structnames)==0) + structnames <- rep("",structlength) + if(structlength>0) + for (i in 1:length(structnames)) { + if((mode(struct[[i]])!="missing" || is.function(struct)) && + (mode(struct[[i]])!="comment" || show.comment)) + expr.tree(struct[[i]], + if(numbers) + paste(front,i,sep=".") + else paste(front,fill,sep=""), + + fill,structnames[i],numbers,"depth"=depth-1) + } + } + + invisible(character(0)) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/mApply.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/mApply.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,69 @@ +mApply <- function(X, INDEX, FUN, ..., simplify=TRUE, keepmatrix=FALSE) { + ## Matrix tapply + ## X: matrix with n rows; INDEX: vector or list of vectors of length n + ## FUN: function to operate on submatrices of x by INDEX + ## ...: arguments to FUN; simplify: see sapply + ## Modification of code by Tony Plate <tplate@blackmesacapital.com> 10Oct02 + ## If FUN returns more than one number, mApply returns a matrix with + ## rows corresponding to unique values of INDEX + + ## X should be either a Matrix or a Vector + if((!is.matrix(X) && is.array(X)) || is.list(X)){ + if(is.data.frame(X)) + X <- as.matrix(X) + else + stop("X must either be a vector or a matrix") + } + + km <- if(keepmatrix) function(x)x else function(x)drop(x) + + if(!is.matrix(X)) { ## X is a vector + r <- tapply(X, INDEX, FUN, ..., simplify=simplify) + + if(is.matrix(r)) + r <- km(t(r)) + + else if(simplify && is.list(r)) + r <- km(matrix(unlist(r), nrow=length(r), + dimnames=list(names(r),names(r[[1]])), byrow=TRUE)) + } + else { + idx.list <- tapply(1:NROW(X), INDEX, c) + r <- sapply(idx.list, function(idx,x,fun,...) fun(x[idx,,drop=FALSE],...), + x=X, fun=FUN, ..., simplify=simplify) + + if(simplify) + r <- km(t(r)) + } + + dn <- dimnames(r) + lengthdn <- length(dn) + if(lengthdn && !length(dn[[lengthdn]])) { + fx <- FUN(X,...) + dnl <- if(length(names(fx))) names(fx) + else dimnames(fx)[[2]] + + dn[[lengthdn]] <- dnl + dimnames(r) <- dn + } + + if(simplify && is.list(r) && is.array(r)) { + ll <- sapply(r, length) + maxl <- max(ll) + empty <- (1:length(ll))[ll==0] + for(i in empty) + r[[i]] <- rep(NA, maxl) + + ## unlist not keep place for NULL entries for nonexistent categories + first.not.empty <- ((1:length(ll))[ll > 0])[1] + nam <- names(r[[first.not.empty]]) + dr <- dim(r) + + r <- aperm(array(unlist(r), dim=c(maxl,dr), + dimnames=c(list(nam),dimnames(r))), + c(1+seq(length(dr)), 1)) + } + + r +} + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/mChoice.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/mChoice.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,230 @@ +# $Id$ +mChoice <- function(..., label='', + sort.levels=c('original','alphabetic'), + add.none=FALSE, drop=TRUE) +{ + sort.levels <- match.arg(sort.levels) + dotlist <- list(...) + + if (drop) + lev <- unique(as.character(unlist(dotlist))) + else + lev <- unique(unlist(lapply(dotlist, function(x)levels(as.factor(x))))) + + if(sort.levels=='alphabetic') lev <- sort(lev) + + lev <- setdiff(lev,'') + + vcall <- as.character(sys.call())[-1] + + dotlist <- lapply(dotlist, FUN=match, table=lev, nomatch=0) + + g <- function(...) { + set <- c(...) + set <- set[!is.na(set)] + + if(!length(set)) return('') + + paste(sort(unique(set)), collapse=';') + } + + Y <- do.call(mapply, + c(list(FUN=g, SIMPLIFY=TRUE, USE.NAMES=FALSE, MoreArgs=NULL), + dotlist)) + + if(add.none && any(Y=='') && 'none' %nin% lev) { + lev <- c(lev, 'none') + Y[Y==''] <- as.character(length(lev)) + } + + if(label == '') + label <- attr(dotlist[[1]],'label') + + if(!length(label)) { + label <- vcall[1] + if(length(nn <- names(dotlist)[1])) + label <- nn + } + + structure(Y, label=label, levels=lev, class=c('mChoice','labelled')) +} + +Math.mChoice <- function(x, ...) { + stop(.Generic, " not meaningful for mChoice") +} + +Summary.mChoice <- function(..., na.rm) { + .NotYetImplemented() +} + +Ops.mChoice <- function(e1, e2) +{ + ok <- switch(.Generic, "=="=, "!="=TRUE, FALSE) + if(!ok) { + warning(.Generic, " not meaningful for mChoice") + return(rep.int(NA, max(length(e1), if(!missing(e2))length(e2)))) + } + nas <- is.na(e1) | is.na(e2) + if (nchar(.Method[1])) { + l1 <- levels(e1) + e1 <- l1[e1] + } + if (nchar(.Method[2])) { + l2 <- levels(e2) + e2 <- l2[e2] + } + if (all(nchar(.Method)) && (length(l1) != length(l2) || + !all(sort.int(l2) == sort.int(l1)))) + stop("level sets of factors are different") + value <- NextMethod(.Generic) + value[nas] <- NA + value +} + +format.mChoice <- function(x, minlength=NULL, sep=";", ...) +{ + lev <- attr(x, 'levels') + if(length(minlength)) lev <- abbreviate(lev, minlength) + w <- strsplit(x, ';') + sapply(w, function(x, lev, sep) + paste(lev[as.numeric(x)], collapse=sep), lev=lev, sep=sep) +} + +'[.mChoice' <- function(x, ..., drop=FALSE) { + if(drop) stop('drop=TRUE not implemented') + atr <- attributes(x) + atr$names <- NULL + x <- NextMethod('[') + consolidate(attributes(x)) <- atr + x +} + +print.mChoice <- function(x, quote=FALSE, max.levels=NULL, width = getOption("width"), + ...) { + if (length(x) <= 0) + cat("mChoice", "(0)\n", sep = "") + else { + xx <- x + class(xx) <- NULL + levels(xx) <- NULL + xx[] <- as.character(x) + print(xx, quote=quote, ...) + } + maxl <- if (is.null(max.levels)){ + TRUE + }else max.levels + + if (maxl) { + n <- length(lev <- encodeString(levels(x), + quote = ifelse(quote, "\"", ""))) + colsep <- " " + T0 <- "Levels: " + if(is.logical(maxl)) + maxl <- { + width <- width - (nchar(T0, "w") + 3 + 1 + 3) + lenl <- cumsum(nchar(lev, "w") + nchar(colsep, "w")) + if(n <= 1 || lenl[n] <= width) + n + else max(1, which(lenl > width)[1] - 1) + } + drop <- n > maxl + cat(if(drop) paste(format(n), ""), T0, + paste(if(drop) {c(lev[1:max(1, maxl - 1)], "...", if (maxl > 1) lev[n]) + }else lev, collapse = colsep), "\n", sep = "") + } + invisible(x) +} + +as.character.mChoice <- function(x, ...) { + lev <- levels(x) + sapply(strsplit(x=x, split=';'), + function(z) paste(lev[as.integer(z)], collapse=';')) +} + +as.double.mChoice <- function(x, drop=FALSE, ...) { + lev <- attr(x,'levels') + X <- matrix(0, nrow=length(x), ncol=length(lev), + dimnames=list(names(x), lev)) + unused <- numeric(0) + for(i in 1:length(lev)) { + xi <- 1*inmChoice(x, i) + if(sum(xi)==0) unused <- c(unused, i) + X[,i] <- xi + } + if(drop && length(unused)) X <- X[, -unused, drop=FALSE] + X +} + +summary.mChoice <- function(object, ncombos=5, minlength=NULL, + drop=TRUE, ...) { + nunique <- length(unique(object)) + y <- gsub('[^;]', '', object) + nchoices <- nchar(y) + 1 + nchoices[object == ''] <- 0 + nchoices <- table(nchoices) + + X <- as.numeric(object, drop=drop) + if(length(minlength)) + dimnames(X)[[2]] <- abbreviate(dimnames(X)[[2]],minlength) + crosstab <- crossprod(X) + + combos <- table(format(object, minlength)) + i <- order(-combos) + combos <- combos[i[1:min(ncombos,length(combos))]] + + structure(list(nunique=nunique, nchoices=nchoices, + crosstab=crosstab, combos=combos, + label=label(object)), + class='summary.mChoice') +} + +print.summary.mChoice <- function(x, prlabel=TRUE, ...) { + if(prlabel) cat(x$label, ' ', x$nunique, ' unique combinations\n', sep='') + cat('Frequencies of Numbers of Choices Per Observation\n\n') + print(x$nchoices) + crosstab <-format(x$crosstab) + crosstab[lower.tri(crosstab)] <- '' + cat('\nPairwise Frequencies (Diagonal Contains Marginal Frequencies)\n') + print(crosstab, quote=FALSE) + s <- if(length(x$combos)==x$nunique) 'Frequencies of All Combinations' else + paste('Frequencies of Top', length(x$combos), 'Combinations') + cat('\n', s, '\n') + print(x$combos) + invisible() +} + +match.mChoice <- function(x, table, nomatch = NA, + incomparables = FALSE) { + if (!is.logical(incomparables) || incomparables) { + .NotYetUsed("incomparables != FALSE") + } + + lev <- attr(table, 'levels') + if(is.factor(x) || is.character(x)) { + x <- match(as.character(x), lev, nomatch=0) + } + return(.Call("do_mchoice_match", as.integer(x), table, as.integer(nomatch))) +} + +# inmChoice <- function(x, values) { +# match.mChoice(values, x, nomatch=0) > 0 +# } +inmChoice <- function(x, values) { + lev <- attr(x, 'levels') + if(is.character(values)) { + v <- match(values, lev) + if(any(is.na(v))) stop(paste('values not in levels:', + paste(values[is.na(v)],collapse=';'))) + values <- v + } + x <- paste(';', unclass(x), ';', sep='') + values <- paste(';', values, ';', sep='') + res <- rep(FALSE, length(x)) + for(j in 1:length(values)) { + i <- grep(values[j], x) + if(length(i)) res[i] <- TRUE + } + res +} + +is.mChoice <- function(x) inherits(x, 'mChoice') |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/makeNstr.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/makeNstr.s Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,11 @@ +makeNstr <- function(char, len) { + mapply(function(char, len) { + if(is.na(len)) { + '\n' + } else if(len == 0) { + '' + } else { + paste(rep.int(x=char, times=len), collapse='') + } + }, char, len, USE.NAMES=FALSE) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/mask.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/mask.s Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,13 @@ +mask<- function(a) +{ + ##determine which bits are on in a vector of status bytes + if(a>=.Machine$integer.max) + stop("Value > integer.max") + + a <- as.integer(a) + as.logical((rep(a, 8)%/%rep(2^(0:7), rep(length(a),8)))%%2) +} + +## Rick Becker +## Improved by Peter Melewski 14Apr02 + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/matxv.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/matxv.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,77 @@ +## Multiply matrix by a vector +## vector can be same length as # columns in a, or can be longer, +## in which case b[kint] is added to a * b[s:length(b)], s=length(b)-ncol(a)+1 +## F. Harrell 17 Oct90 +## Mod 5 Jul91 - is.vector -> !is.matrix +## 16 Oct91 - as.matrix -> matrix(,nrow=1) +## 29 Oct91 - allow b to be arbitrarily longer than ncol(a), use b(1) +## 13 Nov91 - matrix(,nrow=1) -> matrix(,ncol=1) +## 14 Nov91 - changed to nrow=1 if length(b)>1, ncol=1 otherwise +## 25 Mar93 - changed to use %*% +## 13 Sep93 - added kint parameter +## 22 Jun13 - allowed null kint, matrix b (e.g. bootstrap coefs) +## 3 Jul13 - sense intercepts attribute in b which signals +## which subset of intercepts were retained in fit + +matxv <- function(a, b, kint=1, bmat=FALSE) +{ + bi <- attr(b, 'intercepts') + lbi <- length(bi) + lkint <- length(kint) + if(lkint > 1L) stop('kint must have length 0 or 1') + + if(bmat) { + if(!is.matrix(a)) stop('a must be a matrix when b is a matrix') + ca <- ncol(a); cb <- ncol(b) + if(cb < ca) stop('number of columns in b must be >= number in a') + if(cb == ca) return(a %*% t(b)) + excess <- cb - ca + xx <- matrix(0, nrow=nrow(a), ncol=excess) + if(lbi && lkint) { + if(lbi != excess) + stop('b intercepts attribute has different length from number of excess elements in b') + bi <- round(bi) + kint <- round(kint) + if(!isTRUE(all.equal(sort(bi), sort(kint)))) + stop('b intercepts attribute do not match kint') + xx[] <- 1. + } + else if(lkint) { + if(kint > excess) + stop('kint > number of excess elements in b') + xx[,kint] <- 1. + } + return(cbind(xx, a) %*% t(b)) + } + + if(!is.matrix(a)) + a <- if(length(b) == 1L) matrix(a, ncol=1L) else matrix(a, nrow=1L) + + nc <- dim(a)[2] + lb <- length(b) + if(lb < nc) + stop(paste("columns in a (", nc, ") must be <= length of b (", + length(b), ")", sep="")) + + if(nc == lb) return(drop(a %*% b)) + + excess <- lb - nc + if(lbi && lkint) { + if(lbi != excess) + stop('b intercepts attribute has different length from number of excess elements in b') + bi <- round(bi) + kint <- round(kint) + if(!isTRUE(all.equal(sort(bi), sort(kint)))) + stop('b intercepts attribute do not match kint') + bkint <- b[1] + } + else if(lkint) { + if(kint > excess) + stop('kint > number excess elements in b') + + bkint <- b[kint] + } + else + bkint <- 0. + drop(bkint + (a %*% b[(lb - nc + 1L) : lb])) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/mdb.get.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/mdb.get.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,34 @@ +## $Id$ +mdb.get <- function(file, tables=NULL, lowernames=FALSE, allow=NULL, + dateformat='%m/%d/%y', ...) +{ + rettab <- length(tables) && is.logical(tables) + if(rettab) tables <- NULL + if(!length(tables)) + tables <- system(paste('mdb-tables -1', file), intern=TRUE) + if(rettab) return(tables) + + f <- tempfile() + D <- vector('list', length(tables)) + names(D) <- tables + + for(tab in tables) { + s <- system(paste('mdb-schema -T', shQuote(tab), file), intern=TRUE) + start <- grep('^ \\($', s) + 1 + end <- grep('^\\);$', s) - 1 + s <- s[start:end] + s <- strsplit(s, '\t') + vnames <- sapply(s, function(x)x[2]) + vnames <- makeNames(vnames, unique=TRUE, allow=allow) + if(lowernames) vnames <- casefold(vnames) + types <- sapply(s, function(x)x[length(x)]) + datetime <- vnames[grep('DateTime', s)] + system(paste('mdb-export -b strip', file, shQuote(tab), '>', f)) + d <- csv.get(f, datetimevars=datetime, + lowernames=lowernames, allow=allow, + dateformat=dateformat, ...) + if(length(tables) == 1) return(d) + else D[[tab]] <- d + } + D +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/minor.tick.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/minor.tick.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,34 @@ +minor.tick <- function(nx=2, ny=2, tick.ratio=.5) +{ + ax <- function(w, n, tick.ratio) { + range <- par("usr")[if(w == "x") 1 : 2 else 3 : 4] + + tick.pos <- + if(w == "x") + par("xaxp") + else par("yaxp") + + ## Solve for first and last minor tick mark positions that are on the graph + + distance.between.minor <- (tick.pos[2] - tick.pos[1]) / tick.pos[3] / n + possible.minors <- tick.pos[1] - (0 : 100) * distance.between.minor + low.minor <- min(possible.minors[possible.minors >= range[1]]) + if(is.na(low.minor)) low.minor <- tick.pos[1] + possible.minors <- tick.pos[2] + (0 : 100) * distance.between.minor + hi.minor <- max(possible.minors[possible.minors <= range[2]]) + if(is.na(hi.minor)) + hi.minor <- tick.pos[2] + + axis(if(w == "x") 1 else 2, + seq(low.minor, hi.minor, by=distance.between.minor), + labels=FALSE, tcl=par('tcl') * tick.ratio) + } + + if(nx > 1) + ax("x", nx, tick.ratio=tick.ratio) + + if(ny > 1) + ax("y", ny, tick.ratio=tick.ratio) + + invisible() +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/model.frame.default.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/model.frame.default.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,109 @@ +## $Id$ +GetModelFrame <- function(formula, specials, default.na.action=NULL) { + if(missing(formula) || !inherits(formula, "formula")) + stop("GetModelFrame needs a function argument specified", + "as a forumula or terms object") + + ## get the function call of the calling function + fun.call <- match.call(sys.function(sys.parent()), + call=sys.call(sys.parent()), + expand.dots=FALSE) + + args.needed <- c("formula", "data", "weights", "subset", "na.action") + m <- structure(match(args.needed, names(fun.call), nomatch=0), names=args.needed) + + ## get the envronment of the formula + env <- environment(formula) + if (is.null(env)) + env <- parent.frame() + + ## If formula is not a terms object then + ## the formula must be turned into a terms object using the + ## 'terms' function + if(!inherits(formula, "terms")) { + ## Check for precence of args needed for terms call + has.arg <- c(formula=TRUE, data=FALSE) + if(m["data"]) + has.arg["data"] <- TRUE + + junk <- lapply(fun.call, print) + new.call <- fun.call[c(1,has.arg)] + new.call[[1]] <- as.name('terms') + + names(new.call)[2] <- "x" + + if(!missing(specials) && !is.null(specials)) + new.call$specials=specials + + ## convert the formula to a terms object + print(new.call) + formula <- eval(new.call, envir=env) +# formula <- do.call("terms", args=list(x=formula, +# data=if(m["data"]) fun.call[m["data"]] else NULL, +# specials=specials)[has.arg], +# envir=env) + } + + new.call <- fun.call[c(1, m)] + new.call[[1]] <- as.name("model.frame") + new.call$formula <- formula + + if("na.action" %nin% names(fun.call) && !is.null(default.na.action)) + new.call$na.action <- default.na.action + + return(eval(new.call, env, parent.frame())) +} + +## Replaced with one more like default R 3nov02 +## With R 1.6 was getting error with ... arguments +## if(FALSE) '[.factor' <- function (x, i, drop = TRUE) +## { +## y <- NextMethod("[") +## class(y) <- class(x) +## attr(y, "contrasts") <- attr(x, "contrasts") +## attr(y, "levels") <- attr(x, "levels") +## opt <- .Options$drop.factor.levels +## if(!length(opt)) +## opt <- .Options$drop.unused.levels + +## if(drop && (!missing(drop) || (length(opt)==0 || opt))) +## reFactor(y) +## else y +## } + + +termsDrop <- function(object, drop, data) +{ + trm <- terms(object, data=data) + if(is.numeric(drop)) { + vars <- attr(trm, 'term.labels') + if(any(drop > length(vars))) + stop('subscript out of range') + + drop <- vars[drop] + } + form <- update(trm, + as.formula(paste('~ . ', + paste('-', drop, collapse='')))) + terms(form, data=data) +} + + +var.inner <- function(formula) +{ + if(!inherits(formula,"formula")) + formula <- attr(formula,"formula") + + if(!length(formula)) + stop('no formula object found') + + if(length(formula) > 2) + formula[[2]] <- NULL # remove response variable + + av <- all.vars(formula, max.names=1e6) + ## Thanks to Thomas Lumley <tlumley@u.washington.edu> 28Jul01 : + unique(sapply(attr(terms(formula),"term.labels"), + function(term,av) + av[match(all.vars(parse(text=term), max.names=1e6),av)][1], + av=av)) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/mtitle.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/mtitle.s Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,29 @@ +## Thanks for Rick Becker for suggestions +mtitle <- + function(main,ll,lc, + lr=format(Sys.time(),'%d%b%y'), + cex.m=1.75, cex.l=.5, ...) +{ + out <- any(par()$oma!=0) + g <- + if(out) function(...) mtext(..., outer=TRUE) + else function(z, adj, cex, side, ...) + if(missing(side)) + title(z, adj=adj, cex=cex) + else + title(sub=z, adj=adj, cex=cex) + + if(!missing(main)) + g(main,cex=cex.m,adj=.5) + + if(!missing(lc)) + g(lc,side=1,adj=.5,cex=cex.l,...) + + if(!missing(ll)) + g(ll,side=1,adj=0,cex=cex.l,...) + + if(lr!="") + g(lr,side=1,adj=1,cex=cex.l,...) + + invisible() +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/multLines.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/multLines.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,39 @@ +multLines <- function(x, y, pos=c('left', 'right'), + col='gray', lwd=1, lty=1, + lwd.vert=.85, lty.vert=1, alpha=0.4, + grid=FALSE) { + pos <- match.arg(pos) + p <- ncol(y) + n <- nrow(y) + if(! is.matrix(y) || p == 1 || p %% 2 != 1) + stop('y must have 3, 5, 7, ... columns') + if(length(x) != n) + stop('length of x must match rows of y') + + vcol <- adjustcolor(col, alpha.f=alpha) + + if(grid) { + llines(x, y[, 1], col=col, lwd=lwd, lty=lty) + xdel <- unit(0.75, 'mm') + x <- unit(x, 'native') + gp <- gpar(col=vcol, lwd=lwd.vert, lty=lty.vert) + } + else { + lines(x, y[, 1], col=col, lwd=lwd, lty=lty) + xdel <- 0.005 * diff(par('usr')[1 : 2]) + } + + + half <- (p - 1) / 2 + x0 <- if(grid) unit(x, 'native') else x + for(i in 1 : half) { + i1 <- i + 1 + i2 <- p - i + 1 + x0 <- switch(pos, left = x0 - xdel, right = x0 + xdel) + if(grid) grid.segments(x0, y[, i1], x0, y[, i2], + gp=gp, default.units='native') + else segments(x0, y[, i1], x0, y[, i2], col=vcol, + lty=lty.vert, lwd=lwd.vert) + } +} + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/na.delete.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/na.delete.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,100 @@ +## Enhancement of na.omit F. Harrell 20 Oct 91 +## Allows an element of the data frame to be another data frame +## Note: S does not invoke na.action if only a data frame variable is missing! + +na.delete <- function(frame) +{ + y.detail <- na.detail.response(frame) + n <- length(frame) + omit <- FALSE + vars <- seq(length = n) + nmiss <- rep(0,n) + storage.mode(nmiss) <- "integer" + for(j in vars) { + x <- frame[[j]] + if(is.data.frame(x)) + x <- as.matrix(x) + + class(x) <- NULL #so Surv object is.na ignored + if(!is.atomic(x)) + stop("non-atomic, non-data frame variables not allowed") + + ## variables are assumed to be either some sort of matrix, numeric or cat'y + isna <- is.na(x) #Change from T. Therneau + d <- dim(x) + if(is.null(d) || length(d) != 2) { + ##isna <- is.na(x) + nmiss[j] <- sum(isna) + omit <- omit | isna + } else { + ##isna <-is.na(x %*% rep(0,d[2])) + isna <- (isna %*% rep(1,d[2])) > 0 + nmiss[j] <- sum(isna) + omit <- omit | isna + } + } + + if(any(omit)) { + rn <- row.names(frame) + + frame <- frame[!omit,,drop=FALSE] + names(nmiss) <- names(frame) + ## a %ia% b terms are included - delete them since main effects + ## already counted (next 2 stmts reinstated 27Oct93) + + i <- grep("%ia%", names(nmiss)) + if(length(i)>0) + nmiss <- nmiss[-i] + + attr(frame,"nmiss") <- nmiss # for backward compatibility + temp <- seq(omit)[omit] + names(temp) <- rn[omit] + na.info <- list(nmiss=nmiss, omit=temp, + na.detail.response=y.detail) + + class(na.info) <- "delete" + attr(frame, "na.action") <- na.info + } + + frame +} + + +naprint.delete <- function(x, ...) +{ + if(length(g <- x$nmiss)) { + cat("Frequencies of Missing Values Due to Each Variable\n") + print(g) + cat("\n") + } + + if(length(g <- x$na.detail.response)) { + cat("\nStatistics on Response by Missing/Non-Missing Status of Predictors\n\n") + print(unclass(g)) + cat("\n") + } + + invisible() +} + +globalVariables("naresid.omit") +naresid.delete <- napredict.delete <- function(omit, x, ...) +{ + omit <- omit$omit + if(exists('naresid.omit')) naresid.omit(omit, x) + else { + if(!existsFunction('naresid.exclude')) + naresid.exclude <- getFromNamespace('naresid.exclude','stats') + naresid.exclude(omit, x) + } +} + + +nafitted.delete <- function(obj, x) +{ + omit <- obj$omit + if(exists('naresid.omit')) + naresid.omit(omit, x) + else + getFromNamespace('naresid.exclude','stats')(omit, x) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/na.detail.response.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/na.detail.response.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,63 @@ +na.detail.response <- function(mf) +{ + if(is.null(z <- .Options$na.detail.response) || !z) + return(NULL) + + response <- model.extract(mf, response) + if(is.null(response)) + return(NULL) + + if(!is.matrix(response)) + response <- as.matrix(response) + + GFUN <- options()$na.fun.response + if(is.null(GFUN)) + GFUN <- function(x, ...) + { + if(is.matrix(x)) x <- x[,ncol(x)] + x <- x[!is.na(x)] + c(N=length(x),Mean=mean(x)) + } + else GFUN <- eval.parent(as.name(GFUN)) + + w <- NULL; nam <- names(mf); wnam <- NULL + N <- nrow(mf) + p <- ncol(mf) + omit <- rep(FALSE, N) + for(i in 2:p) { + x <- mf[,i] + if(is.matrix(x)) + x <- x[,1] + + isna <- is.na(x) + omit <- omit | isna + nmiss <- sum(isna) + if(nmiss) { + w <- cbind(w, GFUN(response[isna,])) + wnam <- c(wnam, paste(nam[i],"=NA",sep="")) + } + + n <- N-nmiss + if(n) { + w <- cbind(w, GFUN(response[!isna,])) + wnam <- c(wnam, paste(nam[i],"!=NA",sep="")) + } + } + + ## summarize responce for ANY x missing + if(p>2) { + nmiss <- sum(omit) + if(nmiss) { + w <- cbind(w, GFUN(response[omit,])) + wnam <- c(wnam, "Any NA") + } + + if(N-nmiss) { + w <- cbind(w, GFUN(response[!omit,])) + wnam <- c(wnam, "No NA") + } + } + + dimnames(w)[[2]] <- wnam + w +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/na.keep.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/na.keep.s Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,24 @@ +na.keep <- function(mf) +{ + w <- na.detail.response(mf) + if(length(w)) + class(w) <- 'keep' + + attr(mf, "na.action") <- w + mf +} + + +naprint.keep <- function(x, ...) +{ + if(length(x)) { + cat("\nStatistics on Response by Missing/Non-Missing Status of Predictors\n\n") + print(unclass(x)) + cat("\n") + } + + invisible() +} + + +naresid.keep <- function(omit, x, ...) x |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/na.pattern.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/na.pattern.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,18 @@ +na.pattern<-function(x) +{ + if(is.list(x)) { + k <- length(x) + n <- length(x[[1]]) + x <- matrix(unlist(x), n, k) + } + + n <- dim(x)[1] + k <- dim(x)[2] + y <- matrix(as.integer(is.na(x)), n, k) + pattern <- y[, 1] + for(i in 2:k) { + pattern <- paste(pattern, y[, i], sep = "") + } + + table(pattern) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/nobsY.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/nobsY.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,109 @@ +nobsY <- function(formula, group=NULL, + data=NULL, subset=NULL, na.action=na.retain, + matrixna=c('all', 'any')) { + matrixna <- match.arg(matrixna) + forig <- formula + formula <- Formula(formula) + environment(formula) <- new.env(parent = environment(formula)) + en <- environment(formula) + assign(envir = en, 'id', function(x) x) + assign(envir = en, 'pending', function(x) x) + assign(envir = en, 'randomized', function(x) x) + assign(envir = en, 'cond', + function(x, label, condition) rep(1, length(condition))) + marg <- length(data) && '.marginal.' %in% names(data) + if(marg) formula <- update(formula, .~. + .marginal.) + mf <- if(length(subset)) + model.frame(formula, data=data, subset=subset, na.action=na.action) + else + model.frame(formula, data=data, na.action=na.action) + + Y <- model.part(formula, data=mf, lhs=1) + X <- model.part(formula, data=mf, rhs=1) + ## Get id variable if present so can count unique subjects + rhs <- terms(formula, rhs=1, specials='id') + sr <- attr(rhs, 'specials') + ## specials counts from lhs variables + wid <- sr$id + if(length(wid)) { + xid <- X[[wid - ncol(Y)]] + if(length(wid) > 1) { + xid$sep <- '.' + xid <- do.call('paste', xid) + } + ## Remove id() from formula + forig <- as.character(forig) + if(ncol(X) == 1) ## id() is the only right-hand term + forig <- as.formula(paste(forig[2], ' ~ 1')) + else { + forig[3] <- sub(' \\+ id(.*)', '', forig[3]) + forig <- as.formula(paste(forig[2], forig[3], sep=' ~ ')) + } + } else xid <- 1 : nrow(Y) + idv <- xid + + group <- if(length(group) && group %in% names(X)) X[[group]] + if(marg) { + xm <- X$.marginal. + if(length(group)) group <- group[xm == ''] + Y <- Y [xm == '',, drop=FALSE] + xid <- xid[xm == ''] + } + nY <- ncol(Y) + nobs <- rep(NA, nY) + ylab <- sapply(Y, label) + ylab <- ifelse(ylab == '', names(Y), ylab) + names(nobs) <- ylab + nobsg <- if(length(group)) { + glev <- if(is.factor(group)) levels(group) + else sort(unique(group[! is.na(group)])) + matrix(NA, ncol=nY, nrow=length(glev), dimnames=list(glev, ylab)) + } + + if(nY > 0) for(i in 1 : nY) { + y <- Y[[i]] + ## is.na.Surv reduces to vector but need to keep as matrix + notna <- if(is.matrix(y)) { + numna <- rowSums(is.na(unclass(y))) + if(matrixna == 'any') numna == 0 else numna < ncol(y) + } else ! is.na(y) + nobs[i] <- length(unique(xid[notna])) + if(length(group)) + nobsg[, i] <- tapply(xid[notna], group[notna], + function(x) length(unique(x))) + } + structure(list(nobs=nobs, nobsg=nobsg, id=idv, formula=forig)) +} + +addMarginal <- function(data, ..., label='All') { + vars <- as.character(sys.call())[- (1 : 2)] + vars <- intersect(vars, names(data)) + data$.marginal. <- '' + + labs <- sapply(data, function(x) { + la <- attr(x, 'label') + if(! length(la)) la <- '' + la }) + un <- sapply(data, function(x) { + u <- attr(x, 'units') + if(! length(u)) u <- '' + u }) + + for(v in vars) { + d <- data + d$.marginal. <- ifelse(d$.marginal. == '', v, + paste(d$.marginal., v, sep=',')) + d[[v]] <- label + data <- rbind(data, d) + } + ## Restore any Hmisc attributes + if(any(labs != '') || any(un != '')) + for(i in 1 : length(data)) { + if(labs[i] != '') { + attr(data[[i]], 'label') <- labs[i] + class(data[[i]]) <- c('labelled', class(data[[i]])) + } + if(un[i] != '') attr(data[[i]], 'units') <- un[i] + } + data +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/nstr.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/nstr.s Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,15 @@ +nstr <- function(string, times) { + if(!is.atomic(string)) + stop("argument string must be an atomic vector") + + if(!is.numeric(times)) + stop("len must be a numeric vector") + + if(length(string) == 0) + return(NULL) + + if(length(times) == 0) + return(character(0)) + + return(.Call("do_nstr", as.character(string), as.integer(times))) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/num.intercepts.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/num.intercepts.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,15 @@ +num.intercepts <- function(fit, type=c('fit', 'var', 'coef')) +{ + type <- match.arg(type) + nrp <- fit$non.slopes + if(!length(nrp)) { + nm1 <- names(fit$coef)[1] + nrp <- 1*(nm1=="Intercept" | nm1=="(Intercept)") + } + if(type == 'fit') return(nrp) + w <- if(type == 'var') fit$var else fit$coefficients + i <- attr(w, 'intercepts') + li <- length(i) + if(!li) return(nrp) + if(li == 1 && i == 0) 0 else li +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/panel.abwplot.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/panel.abwplot.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,63 @@ +if(FALSE) { + panel.abwplot <- function(x, y, box.ratio = 1, means=TRUE, + font = box.dot$font, pch = box.dot$pch, + cex = box.dot$cex, + col = box.dot$col, ...) + { + ok <- !is.na(x) & !is.na(y) + x <- x[ok] + y <- y[ok] + y.unique <- sort(unique(y)) + width <- box.ratio/(1 + box.ratio) + w <- width/2 + lineopts <- trellis.par.get("box.rectangle") + for(Y in y.unique) { + X <- x[y == Y] + q <- quantile(X, c(.01,.05,.1,.25,.75,.9,.95,.99,.5)) + median.value <- list(x = q[9], y = Y) + z <- c(1, .01, + 2, .01, + 2, .05, + 3, .05, + 3, .10, + 4, .10, + 4, .25, + 5, .25, + 5, .10, + 6, .10, + 6, .05, + 7, .05, + 7, .01, + 8, .01, + 8,-.01, + 7,-.01, + 7,-.05, + 6,-.05, + 6,-.10, + 5,-.10, + 5,-.25, + 4,-.25, + 4,-.10, + 3,-.10, + 3,-.05, + 2,-.05, + 2,-.01, + 1,-.01, + 1, .01) + box.dot <- trellis.par.get("box.dot") + box.dot.par <- c(list(pch = pch, cex = cex, col = col, font = font), ...) + do.call('lines',c(list(x=q[z[seq(1,length(z),by=2)]], + y=Y + 4*w*z[seq(2,length(z),by=2)]),lineopts)) + ##do.call('segments',c(list(x1=q[c(2:7)],y1=Y+rep(-w,6), + ## x2=q[c(2:7)],y2=Y+rep(w,6)), + ## lineopts)) + + do.call("points", c(median.value, box.dot.par)) + if(means) + do.call('lines',c(list(x=rep(mean(X),2),y=Y+c(-w,w)), + lineopts, lty=2)) + } + } + + NULL +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/panel.bpplot.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/panel.bpplot.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,323 @@\n+panel.bpplot <- function(x, y, box.ratio = 1, means=TRUE,\n+ qref=c(.5, .25, .75),\n+ probs= c(.05, .125, .25, .375), nout=0,\n+ nloc=c(\'right lower\', \'right\', \'left\', \'none\'),\n+ cex.n=.7, datadensity=FALSE, scat1d.opts=NULL,\n+ violin=FALSE, violin.opts=NULL,\n+ font = box.dot$font, pch = box.dot$pch, \n+ cex.means = box.dot$cex, col = box.dot$col,\n+ nogrid=NULL, height=NULL, ...)\n+{\n+ grid <- TRUE\n+ if(length(nogrid) && nogrid) grid <- FALSE\n+ if(missing(nloc) && !grid) nloc <- \'none\'\n+ else nloc <- match.arg(nloc)\n+ \n+ if(grid) {\n+ lines <- llines\n+ points <- lpoints\n+ segments <- lsegments\n+ }\n+\n+ y <- as.numeric(y)\n+ y <- rep(y, length.out=length(x))\n+ ok <- !is.na(x) & !is.na(y)\n+ x <- x[ok]\n+ y <- y[ok]\n+ y.unique <- sort(unique(y))\n+ width <- box.ratio / (1 + box.ratio)\n+ w <- width / 2\n+ if(length(height)) w <- height\n+ probs2 <- sort(c(probs, 1 - probs))\n+\n+ if(grid) {\n+ box.dot <- trellis.par.get("box.dot")\n+ lineopts <- trellis.par.get("box.rectangle")\n+ box.dot.par <- c(list(pch = pch, cex = cex.means, col = col,\n+ font = font), ...)\n+ }\n+ else {\n+ pr <- par()\n+ box.dot <- c(pr[c(\'cex\', \'pch\', \'col\', \'font\')], xpd=NA)\n+ lineopts <- c(pr[c(\'lty\', \'col\', \'lwd\')], xpd=NA)\n+ box.dot.par <- c(list(pch=pch, cex=cex.means, col=col, xpd=NA))\n+ }\n+\n+ m <- length(probs)\n+ m2 <- length(probs2)\n+ j <- c(1, sort(rep(2 : m2, 2)), - sort(- rep(1 : (m2 - 1),2)))\n+ z <- c(sort(rep(probs, 2)), - sort(- rep(probs[1 : (m - 1)], 2)))\n+ z <- c(z, -z, probs[1])\n+ k <- max(z)\n+ k <- if(k > .48) .5 else k\n+ \n+ if(length(qref)) {\n+ size.qref <- pmin(qref, 1 - qref)\n+ size.qref[qref == .5] <- k\n+ }\n+ \n+ for(Y in y.unique) {\n+ X <- x[y == Y]\n+ if(!length(X)) next\n+ \n+ q <- quantile(X, c(probs2, qref))\n+ if(length(qref)) \n+ do.call(\'segments\',c(list(q[-(1 : m2)], Y - w * size.qref / k,\n+ q[-(1 : m2)], \t Y + w * size.qref / k),\n+ lineopts))\n+ \n+ do.call(\'lines\',c(list(x=q[j], y=Y + w * z / k), lineopts))\n+ if(means) {\n+ mean.value <- list(x=mean(X), y=Y)\n+ do.call(\'points\', c(mean.value, box.dot.par))\n+ }\n+ xlimits <- if(grid) current.panel.limits()$xlim else par(\'usr\')[1:2]\n+ switch(nloc,\n+ right= ltext(xlimits[2] - .01*diff(xlimits), Y,\n+ paste(\'n=\', length(X), sep=\'\'),\n+ adj=c(1, .5), cex=cex.n),\n+ left= ltext(xlimits[1] + .01*diff(xlimits), Y,\n+ paste(\'n=\', length(X), sep=\'\'),\n+ adj=c(0, .5), cex=cex.n),\n+ \'right lower\'= ltext(xlimits[2] - .01*diff(xlimits),\n+ Y - w * min(size.qref) / k,\n+ paste(\'n=\', length(X), sep=\'\'),\n+ adj=c(1, 1), cex=cex.n))\n+\n+ if(datadensity)\n+ do.call(\'scat1d\',c(list(x=X, y=Y, grid=grid), scat1d.opts))\n+\n+ if(violin)\n+ do.call(\'panel.violin\', c(list(x=X, y=Y), violin.opts))\n+\n+ if(nout > 0) {\n+ ii <- if(nout < 1) {\n+ ## Note - bug in quantile - endless loop if probs=c(.5,.5)\n+ if(nout == .5)\n+ stop(\'instead of nout=.5 use datadensity=TRUE\')\n+\n+ cuts <- quantile(X, c(nout, 1 - nout))\n+ X < cuts[1] | X > cuts[2]\n+ } else {\n+ X <- sort(X)\n+ nx <- length(X)\n+ ll <- 1 : nx\n+ (ll <= min(nout, nx / 2)) | (ll >= max(nx - nout + 1, nx / 2))\n+ }\n+ \n+ if(sum(ii))\n+ do.call(\'scat1d\',c(list(x=X[ii], y=Y, grid=grid), scat1d.opts))\n+ }\n+ }\n+}\n+\n+\n+# Given a matrix where rows are groups and columns have all the\n+# quantiles already computed, plus the Mean, draw a panel containing\n+# horizontal box-percentile plots like the default in panel.bpplot. This is\n+# primarily for p'..b"ref.x, y0=qref.y - qref.mod,\n+ x1=qref.x, y1=qref.y + qref.mod)\n+\n+ polygon(x=as.vector(t(stats[, match(probs2, qq)[j]])),\n+ y=rep(y, each=length(j)) + w * z / k)\n+\n+ if(means)\n+ points(Means, y, pch=pch, cex=cex.points)\n+ \n+ if(prototype) {\n+ mar <- par('mar')\n+ on.exit(par(mar=mar))\n+ par(mar=rep(.5,4))\n+ text(Means, 1.025+.02, 'Mean')\n+ for(a in c(.5, probs2)) {\n+ arrows(a, .6, a, .725, length=.1)\n+ f <- format(a)\n+ text(a, .575, format(a))\n+ }\n+ \n+ text(.5, .52, 'Quantiles')\n+ xd <- .004\n+ text(.485 - xd, 1,\n+ expression(Median==Q[2]),\n+ srt=90)\n+ \n+ text(.235 - xd, 1,\n+ expression(Q[1]),\n+ srt=90)\n+ \n+ text(.735 - xd, 1,\n+ expression(Q[3]),\n+ srt=90)\n+ \n+ lines(c(.375, .625), rep(1.3, 2));\n+ text(.635, 1.3, '1/4', adj=0, cex=.9)\n+ \n+ lines(c(.25, .75 ), rep(1.35, 2));\n+ text(.76, 1.35, '1/2', adj=0, cex=.9)\n+ \n+ lines(c(.125, .875), rep(1.4, 2));\n+ text(.885, 1.4, '3/4', adj=0, cex=.9)\n+ \n+ lines(c(.05, .95), rep(1.45, 2));\n+ text(.96, 1.45, '9/10', adj=0, cex=.9)\n+ \n+ text(.68, 1.24, 'Fraction of Sample Covered', adj=0, srt=13, cex=.7)\n+ }\n+}\n+\n+bpplotM <- function(formula=NULL, groups=NULL, data=NULL, subset=NULL,\n+ na.action=NULL, qlim=0.01, xlim=NULL,\n+ nloc=c('right lower','right','left','none'),\n+ vnames=c('labels', 'names'), cex.n=.7, cex.strip=1,\n+ outerlabels=TRUE, ...) {\n+ nloc <- match.arg(nloc)\n+ vnames <- match.arg(vnames)\n+\n+ if(! length(formula)) {\n+ g <- function(x) is.numeric(x) && length(unique(x)) > 5\n+ v <- setdiff(names(data), groups)\n+ z <- sapply(data[, v], g)\n+ if(!any(z))\n+ stop('no variable was numeric with > 5 unique levels')\n+ formula <- v[z]\n+ }\n+\n+ if(!inherits(formula, 'formula')) {\n+ if(!length(groups))\n+ stop('must specify group if first argument is not a formula')\n+ formula <- paste(paste(formula, collapse=' + '), '~',\n+ paste(groups, collapse=' + '))\n+ formula <- as.formula(formula)\n+ }\n+ form <- Formula(formula)\n+ Y <- if(length(subset)) model.frame(form, data=data, subset=subset,\n+ na.action=na.action)\n+ else model.frame(form, data=data, na.action=na.action)\n+ X <- model.part(form, data=Y, rhs=1)\n+ if(ncol(X) == 0) X <- rep('', nrow(Y))\n+ Y <- model.part(form, data=Y, lhs=1)\n+\n+ vars <- names(Y)\n+ labs <- vars\n+ if(vnames == 'labels') {\n+ ylabs <- sapply(Y, label)\n+ labs <- ifelse(ylabs == '', labs, ylabs)\n+ }\n+ names(labs) <- vars\n+ w <- reshape(cbind(X, Y), direction='long', v.names='x',\n+ varying=vars, times=vars)\n+ w$time <- factor(w$time, levels=vars)\n+ lims <- lapply(Y,\n+ function(x) quantile(x, c(qlim, 1 - qlim), na.rm=TRUE))\n+ if(length(xlim)) lims[names(xlim)] <- xlim\n+ scales <- list(x=list(relation='free', limits=lims))\n+ nv <- length(vars)\n+ lev <- NULL\n+ for(v in levels(w$time)) {\n+ un <- units(Y[[v]])\n+ l <- if(labs[v] == v && un == '') v else\n+ labelPlotmath(labs[v], un)\n+ lev <- c(lev, l)\n+ }\n+\n+ strip <- function(which.given, which.panel, var.name, factor.levels, ...) {\n+ current.var <- var.name[which.given]\n+ levs <- if(current.var == 'time') lev else factor.levels\n+ strip.default(which.given, which.panel, var.name, factor.levels=levs, ...)\n+ }\n+ \n+ namx <- names(X)\n+ form <- paste(namx[1], '~ x | time')\n+ if(length(namx) > 1) form <- paste(form, '+',\n+ paste(namx[-1], collapse= '+'))\n+ form <- as.formula(form)\n+ d <- bwplot(form, panel=panel.bpplot, scales=scales, data=w, xlab='',\n+ nloc=nloc, cex.n=cex.n, strip=strip,\n+ par.strip.text=list(cex=cex.strip), ...)\n+ if(outerlabels && length(dim(d)) == 2)\n+ d <- useOuterStrips(d, strip=strip, strip.left=strip)\n+ d\n+}\n" |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/pc1.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/pc1.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,31 @@ +pc1 <- function(x, hi) +{ + p <- ncol(x) + x <- x[!is.na(x %*% rep(1,p)),] + xo <- x + for(i in 1:p) { + y <- x[,i] + x[,i] <- (y-mean(y))/sqrt(var(y)) + } + + g <- prcomp(x) + cat("Fraction variance explained by PC1:",format(g$sdev[1]^2/sum(g$sdev^2)), + "\n\n") + pc1 <- g$x[,1] + + f <- lsfit(xo, pc1) + + if(!missing(hi)) { + if(sum(f$coef[-1]<0) >= p/2) + pc1 <- -pc1 + + r <- range(pc1) + pc1 <- hi*(pc1-r[1])/diff(r) + f <- lsfit(xo, pc1) + } + + cat("Coefficients to obtain PC1:\n\n") + print(f$coef) + attr(pc1,"coef") <- f$coef + invisible(pc1) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/plsmo.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/plsmo.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,209 @@ +plsmo <- + function(x, y, method=c("lowess", "supsmu", "raw"), + xlab, ylab, add=FALSE, lty=1 : lc, col=par('col'), + lwd=par('lwd'), iter=if(length(unique(y)) > 2) 3 else 0, + bass=0, f=2 / 3, trim, fun, group=rep(1, length(x)), + prefix, xlim, ylim, + label.curves=TRUE, datadensity=FALSE, scat1d.opts=NULL, + lines.=TRUE, subset=TRUE, grid=FALSE, evaluate=NULL, ...) +{ + gfun <- ordGridFun(grid) + nam <- as.character(sys.call())[2 : 3] + method <- match.arg(method) + if(missing(ylab)) + ylab <- label(y, units=TRUE, plot=TRUE, default=nam[2]) + Y <- as.matrix(y) + p <- ncol(Y) + if(!missing(subset)) { + x <- x[subset] + Y <- Y[subset,, drop=FALSE] + group <- group[subset] + } + + group <- as.factor(group) + if(!missing(prefix)) levels(group) <- paste(prefix, levels(group)) + + group <- as.factor(group) + nna <- !(is.na(x) | (rowSums(is.na(Y)) == p) | is.na(group)) + x <- x[nna] + Y <- Y[nna,, drop=FALSE] + group <- group[nna] + + lev <- levels(group) + nlev <- length(lev) + lc <- p * nlev + curves <- list() + clev <- rep('', lc) # for each curve what is the level of group + + xmin <- ymin <- 1e30; xmax <- ymax <- -1e30 + ic <- 0 + for(k in 1:p) { + y <- Y[, k] + for(g in lev) { + ic <- ic + 1 + s <- group == g + z <- switch(method, + lowess=lowess(x[s], y[s], iter=iter, f=f), + supsmu=supsmu(x[s], y[s], bass=bass), + raw=approx(x[s], y[s], xout=sort(unique(x[s])))) + + if(missing(trim)) + trim <- if(sum(s) > 200) 10 / sum(s) else 0 + + if(trim > 0 && trim < 1) { + xq <- quantile(x[s], c(trim, 1 - trim)) + s <- z$x >= xq[1] & z$x <= xq[2] + z <- list(x=z$x[s], y=z$y[s]) + } + + if(length(evaluate)) { + rx <- range(z$x) + xseq <- seq(rx[1], rx[2], length=evaluate) + z <- approx(z, xout=xseq) + } + + if(!missing(fun)) { + yy <- fun(z$y) + s <- !is.infinite(yy) & !is.na(yy) + z <- list(x=z$x[s], y=yy[s]) + } + + clev[ic] <- g + lab <- if(p == 1) g + else if(nlev == 1 & p == 1) '1' + else if(nlev == 1 & p > 1) colnames(Y)[k] + else paste(colnames(Y)[k], g) + + curves[[lab]] <- z + xmin <- min(xmin, z$x); xmax <- max(xmax, z$x) + ymin <- min(ymin, z$y); ymax <- max(ymax, z$y) + } + } + if(add) { + if(missing(xlim)) + xlim <- if(grid) current.panel.limits()$xlim else par('usr')[1:2] + } + else { + if(missing(xlab)) + xlab <- label(x, units=TRUE, plot=TRUE, default=nam[1]) + + if(missing(xlim)) xlim <- c(xmin, xmax) + if(missing(ylim)) ylim <- c(ymin, ymax) + plot(xmin, ymin, xlim=xlim, ylim=ylim, + type='n', xlab=xlab, ylab=ylab) + } + + lty <- rep(lty, length=lc) + col <- rep(col, length=lc) + if(missing(lwd) && is.list(label.curves) && length(label.curves$lwd)) + lwd <- label.curves$lwd + + lwd <- rep(lwd, length=lc) + + for(i in 1 : lc) { + cu <- curves[[i]] + s <- cu$x >= xlim[1] & cu$x <= xlim[2] + curves[[i]] <- list(x=cu$x[s], y=cu$y[s]) + } + if(lines.) + for(i in 1 : lc) + gfun$lines(curves[[i]], lty=lty[i], col=col[i], lwd=lwd[i]) + + if(datadensity) { + for(i in 1 : nlev) { + s <- group == lev[i] + x1 <- x[s] + for(ii in which(clev == lev[i])) { + y.x1 <- approx(curves[[ii]], xout=x1)$y + sopts <- c(list(x=x1, y=y.x1, col=col[ii], grid=grid), scat1d.opts) + do.call('scat1d', sopts) + } + } + } + + if((is.list(label.curves) || label.curves) && + lc > 1 && (!missing(prefix) | !add | !missing(label.curves))) + labcurve(curves, lty=lty, col.=col, opts=label.curves, grid=grid) + + invisible(curves) +} + + +panel.plsmo <- function(x, y, subscripts, groups=NULL, type='b', + label.curves=TRUE, + lwd = superpose.line$lwd, + lty = superpose.line$lty, + pch = superpose.symbol$pch, + cex = superpose.symbol$cex, + font = superpose.symbol$font, + col = NULL, scat1d.opts=NULL, ...) +{ + superpose.symbol <- trellis.par.get("superpose.symbol") + superpose.line <- trellis.par.get("superpose.line") + if(length(groups)) groups <- as.factor(groups) + + g <- unclass(groups)[subscripts] + ng <- if(length(groups)) max(g) else 1 + + lty <- rep(lty, length = ng) + lwd <- rep(lwd, length = ng) + pch <- rep(pch, length = ng) + cex <- rep(cex, length = ng) + font <- rep(font, length = ng) + if(!length(col)) + col <- if(type == 'p') superpose.symbol$col else superpose.line$col + + col <- rep(col, length = ng) + lc <- + if(is.logical(label.curves)) { + if(label.curves) + list(lwd=lwd, cex=cex[1]) + else FALSE + } else c(list(lwd=lwd, cex=cex[1]), label.curves) + + if(type != 'p') if(ng > 1) + plsmo(x, y, group=groups[subscripts, drop=FALSE], + add=TRUE, lty=lty, col=col, label.curves=lc, grid=TRUE, + scat1d.opts=scat1d.opts, ...) + else + plsmo(x, y, add=TRUE, lty=lty, col=col, label.curves=lc, grid=TRUE, + scat1d.opts=scat1d.opts, ...) + + if(type != 'l') { + if(ng > 1) + panel.superpose(x, y, subscripts, + as.integer(groups), + lwd=lwd, lty=lty, pch=pch, cex=cex, + font=font, col=col) + else + panel.xyplot(x, y, + lwd=lwd, lty=lty, pch=pch, cex=cex, + font=font, col=col) + + if(ng > 1) { + Key <- function(x=NULL, y=NULL, lev, cex, col, font, pch){ + oldpar <- par(usr=c(0, 1, 0, 1), xpd=NA) + on.exit(par(oldpar)) + if(is.list(x)) { + y <- x[[2]] + x <- x[[1]] + } + + ## Even though par('usr') shows 0,1,0,1 after lattice draws + ## its plot, it still needs resetting + if(!length(x)) + x <- 0 + + if(!length(y)) + y <- 1 ## because of formals() + + rlegend(x, y, legend=lev, cex=cex, col=col, pch=pch) + invisible() + } + + formals(Key) <- list(x=NULL,y=NULL,lev=levels(groups), cex=cex, + col=col, font=font, pch=pch) + .setKey(Key) + } + } +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/popower.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/popower.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,56 @@ +popower <- function(p, odds.ratio, n, n1, n2, alpha=.05) +{ + if(missing(n)) + n <- n1+n2 + else { + n1 <- n2 <- n/2 + } + + p <- p[!is.na(p)] + if(abs(sum(p)-1)>.0001) + stop('probabilities in p do not add up to 1') + + z <- qnorm(1-alpha/2) + A <- n2/n1 + ps <- 1 - sum(p^3) + V <- n1*n2*n/3/((n+1)^2)*ps + power <- pnorm(abs(logb(odds.ratio))*sqrt(V) - z) + eff <- ps/(1-1/n/n) + structure(list(power=power, eff=eff), class='popower') +} + + +print.popower <- function(x, ...) +{ + cat('Power:',round(x$power,3), + '\nEfficiency of design compared with continuous response:', + round(x$eff,3),'\n\n') + invisible() +} + + +posamsize <- function(p, odds.ratio, fraction=.5, + alpha=.05, power=.8) +{ + p <- p[!is.na(p)] + if(abs(sum(p)-1)>.0001) + stop('probabilities in p do not add up to 1') + + A <- (1-fraction)/fraction + log.or <- logb(odds.ratio) + z.alpha <- qnorm(1-alpha/2) + z.beta <- qnorm(power) + ps <- 1 - sum(p^3) + n <- 3*((A+1)^2)*(z.alpha+z.beta)^2/A/(log.or^2)/ps + eff <- ps/(1-1/n/n) + structure(list(n=n,eff=eff), class='posamsize') +} + + +print.posamsize <- function(x, ...) +{ + cat('Total sample size:',round(x$n,1), + '\nEfficiency of design compared with continuous response:', + round(x$eff,3),'\n\n') + invisible() +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/print.char.list.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/print.char.list.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,571 @@\n+if(!exists("string.bounding.box")) {\n+ string.bounding.box <- function(string, type=c("chars", "width")) {\n+ thisfun <- function(x, type) {\n+ height <- length(x)\n+ # get rid of \':\' on last string \n+ x[height] <- substr(x[height], start=1, stop=nchar(x[height], type=\'chars\') - 1)\n+\n+ c(height = height, width = max(nchar(x, type=type)))\n+ }\n+\n+ mode(string) <- "character"\n+\n+ type <- match.arg(type)\n+\n+ ## Add remove \'\\n\' if it is ends the string and add a \':\' so that string split\n+ ## functions the way I want it to.\n+ string <- paste(string, \':\', sep=\'\')\n+\n+ ans <- sapply(strsplit(string, \'\\n\', fixed=TRUE), FUN=thisfun, type=type, USE.NAMES=FALSE)\n+ return(list(columns = ans[2,], rows = ans[1,]))\n+ }\n+}\n+\n+equalBins <- function(widths, subwidths) {\n+ ## The length of widths and subwidths must be the same\n+ if(length(widths) != length(subwidths)) {\n+ stop("width and subwidth must be of the same length")\n+ }\n+\n+ ## adjust width for column spacers\n+ widths <- widths - unlist(lapply(subwidths, length)) + 1\n+ unlist(mapply(function(width, subwidths) {\n+ if(sum(subwidths) < width) {\n+ div <- width %/% length(subwidths)\n+ mod <- width %% length(subwidths)\n+ c(rep.int(div + 1, mod), rep.int(div, length(subwidths) - mod))\n+ } else {\n+ subwidths\n+ }\n+ }, widths, subwidths, SIMPLIFY = FALSE))\n+}\n+\n+stringDims <- function(string) {\n+ if(is.null(string)) {\n+ return(height = 0, width = 0)\n+ }\n+ \n+ dims <- dim(string)\n+\n+ bbox <- string.bounding.box(string)\n+ height <- bbox$rows\n+ width <- bbox$columns\n+\n+ if(any(dims)) {\n+ dim(height) <- dims\n+ dim(width) <- dims\n+ }\n+ \n+ list(height = height, width = width)\n+}\n+\n+simplifyDims <- function(x) {\n+ if(any(sapply(x, FUN=is.matrix)))\n+ do.call(rbind, x)\n+ else\n+ do.call(c, x)\n+}\n+\n+partition.vector <- function(x, sep, ...) {\n+ if(missing(sep)) {\n+ stop("sep is a required arg")\n+ }\n+\n+ if(sum(sep) != length(x)) {\n+ stop("sep must sum to the number of columns in x")\n+ }\n+\n+ split(x, rep(seq(along.with=sep), times=sep))\n+}\n+\n+\n+partition.matrix <- function(x, rowsep, colsep, ...) { \n+ colmissing <- missing(colsep)\n+ rowmissing <- missing(rowsep)\n+ \n+ if(rowmissing && colmissing) {\n+ stop("Atleast one of rowsep or colsep args must be specified")\n+ }\n+ \n+ ## If length of group is equal to length of x assume that this is a\n+ ## a vector of group numbers\n+ if(!rowmissing) {\n+ if(sum(rowsep) != NROW(x)) {\n+ stop("rowsep must sum to the number of columns in x")\n+ }\n+ if(!is.numeric(rowsep)) {\n+ stop("the rowsep vector must be numeric")\n+ }\n+ }\n+\n+ if(!colmissing) {\n+ if(sum(colsep) != NCOL(x)) {\n+ stop("colsep must sum to the number of rows in x")\n+ }\n+ if(!is.numeric(colsep)) {\n+ stop("the colsep vector must be numeric")\n+ }\n+ }\n+\n+ ## Separate x into row chunks\n+ if(!rowmissing) {\n+ set <- lapply(split(seq(NROW(x)), rep(seq(along.with=rowsep), times=rowsep)), function(index) x[index,,drop=FALSE])\n+ } else {\n+ set <- NULL\n+ }\n+\n+ if(!colmissing) {\n+ FUN <- function(x)\n+ lapply(split(seq(NCOL(x)), rep(seq(along.with=colsep), times=colsep)), function(index) x[,index,drop=FALSE])\n+ \n+ if(is.null(set)) {\n+ FUN(x)\n+ } else {\n+ lapply(set, FUN)\n+ }\n+ } else {\n+ set\n+ }\n+} \n+ \n+\n+print.char.list <- function(x, ..., hsep = c("|"), vsep = c("-"), csep = c("+"),\n+ print.it = TRUE, rowname.halign = c("left", "centre", "right"),\n+ rowname.valign = c("top", "centre", "bottom"),\n+ colname.halign = c("centre", "left", "right"),\n+ colname.valign = c("centre", "top", "bottom"),\n+ text.halign = c("right", "centre", "left"),\n+ text.valign = c("top", "centre", "bottom"), rowname.width,\n+ r'..b'greater\n+ supercolwidth <- tapply(width, rep.int(seq(along.with=colsets), times=colsets), sum) + colsets - 1\n+ supercolheight <- max(superrowDims$height)\n+ colheight <- max(colDims$height)\n+\n+ superrowheight <- tapply(height, rep.int(seq(along.with=rowsets), times=rowsets), sum)\n+\n+ if(missing(prefix.width)) {\n+ if(!is.null(rownames)) {\n+ prefix.width <- max(max(na.rm = TRUE, as.integer(median(width)), max(rowDims$width)))\n+ } else {\n+ prefix.width <- 0\n+ }\n+ }\n+\n+ if(missing(superprefix.width)) {\n+ if(!is.null(superrows)) {\n+ superprefix.width <- max(na.rm = TRUE, as.integer(median(width)), max(superrowDims$width))\n+ } else {\n+ superprefix.width <- 0\n+ }\n+ }\n+\n+ header <- NULL\n+ headerwidth <- NULL\n+ rows <- NULL\n+ entries <- list()\n+ blanks <- list()\n+\n+ \n+ ## Figure out the centering of the cells.\n+ rowNameHalign <- match.arg(rowname.halign)\n+ rowNameValign <- match.arg(rowname.valign)\n+ colNameHalign <- match.arg(colname.halign)\n+ colNameValign <- match.arg(colname.valign)\n+ cellHalign <- match.arg(text.halign)\n+ cellValign <- match.arg(text.valign)\n+\n+ ## create the superrowname column\n+ superrow <- if(!is.null(superrows)) {\n+ superrows <- matrix(superrows, ncol=1)\n+\n+ header <- NA\n+ headerwidth <- superprefix.width\n+ ## perform verical and horizontal centering.\n+ justText(superrows, superprefix.width, superrowheight,\n+ rowNameHalign, rowNameValign)\n+ }\n+ \n+ row <- if(!is.null(rownames)) {\n+ header <- cbind(header, NA)\n+ headerwidth <- c(headerwidth, prefix.width)\n+ justText(rownames, prefix.width, height, rowNameHalign, rowNameValign)\n+ }\n+\n+ body <- cbind(superrow, row,\n+ justText(matrix, width, height, cellHalign, cellValign))\n+\n+ width <- c(headerwidth, width)\n+\n+ body <- split(body, row(body))\n+ \n+ ## Create the super column name row and the column name row\n+ if(!is.null(supercols)) {\n+ supercols <- matrix(supercols, nrow=1)\n+ \n+ supercolwidth <- c(headerwidth, supercolwidth)\n+ entry <- c(header, rep(seq(along.with=colsets), colsets), 0)\n+ entries <- c(entries, list(ifelse(is.na(entry), FALSE, !duplicated(entry))))\n+\n+ blank <- ifelse(is.na(c(header, rep(supercols, colsets))), TRUE, FALSE)\n+ blanks <- c(blanks, list(blank))\n+ \n+ rows <- printRow(justText(cbind(header, supercols), supercolwidth, supercolheight,\n+ colNameHalign, colNameValign), width=supercolwidth, sep=hsep)\n+ }\n+\n+ if(!is.null(colnames)) {\n+ entry <- c(header, rep(seq(along.with=colnames), 1), 0)\n+ entries <- c(entries, list(ifelse(is.na(entry), FALSE, !duplicated(entry))))\n+ \n+ blank <- ifelse(is.na(c(header, colnames)), TRUE, FALSE)\n+ blanks <- c(blanks, list(blank))\n+\n+ rows <- c(rows,\n+ printRow(justText(cbind(header, colnames), width, colheight,\n+ colNameHalign, colNameValign), width=width, sep=hsep))\n+ }\n+ \n+\n+ env <- environment()\n+\n+ rows <- c(rows, unlist(lapply(split(body, rep(seq(along.with=rowsets), rowsets)), function(set) {\n+ index <- seq(along.with=set)\n+\n+ mapply(FUN = function(line, index) {\n+ entry <- c(ifelse(is.na(line), NA, rep(seq(along.with=line), 1)), 0)\n+ entry <- ifelse(is.na(entry), FALSE, !duplicated(entry))\n+ \n+ assign(\'entries\', c(entries, list(entry)), env)\n+\n+ blank <- ifelse(is.na(line), FALSE, FALSE)\n+ if(index != 1) {\n+ blank[1] <- TRUE\n+ }\n+ \n+ assign(\'blanks\', c(blanks, list(blank)), env)\n+ printRow(line, width=width, sep=hsep)\n+ }, set, index)\n+ }), use.names=FALSE))\n+\n+ blanks[[1]] <- logical(length(width))\n+ entries <- lapply(entries, function(entry) {entry[1] <- TRUE; entry})\n+\n+ bars <- printBars(entries, blanks, width, hsep=hsep, vsep=vsep, csep=csep)\n+ total <- paste(bars, c(rows, ""), sep=\'\\n\', collapse=\'\\n\')\n+\n+ if(print.it) {\n+ cat(total)\n+ invisible(x)\n+ } else {\n+ total\n+ }\n+}\n+\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/pstamp.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/pstamp.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,42 @@ +pstamp <- function(txt, pwd=FALSE, time.=TRUE) +{ + stamp <- function(string = Sys.time(), print = TRUE, plot = TRUE) + { + opar <- par(yaxt='s',xaxt='s',xpd=NA) + on.exit(par(opar)) + plt <- par('plt') + usr <- par('usr') + + ## when a logrithmic scale is in use (i.e. par('xlog') is true), + ## then the x-limits would be 10^par('usr')[1:2]. Similarly for + ## the y axis + xcoord <- usr[2] + (usr[2] - usr[1])/(plt[2] - plt[1]) * + (1-plt[2]) - .6*strwidth('m') + ycoord <- usr[3] - diff(usr[3:4])/diff(plt[3:4])*(plt[3]) + + 0.6*strheight('m') + + if(par('xlog')) + xcoord <- 10^(xcoord) + if(par('ylog')) + ycoord <- 10^(ycoord) + + ## Print the text on the current plot + text(xcoord, ycoord, string, adj=1) + invisible(string) + } + + date.txt <- if(time.) format(Sys.time()) + else format(Sys.time(), '%Y-%m-%d') + + if(pwd) + date.txt <- paste(getwd(), date.txt) + + oldpar <- par(mfrow=c(1,1), cex = 0.5) + on.exit(par(oldpar)) + if(!missing(txt)) + date.txt <- paste(txt,' ',date.txt, sep='') + + stamp(string=date.txt,print=FALSE,plot=TRUE) + invisible() + +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/rcorr.cens.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/rcorr.cens.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,44 @@ +## Computes rank correlation measures between a variable X and a possibly +## censored variable Y, with event/censoring indicator EVENT +## Rank correlation is extension of Somers' Dxy = 2(Concordance Prob-.5) +## See Harrell et al JAMA 1984(?) +## Set outx=T to exclude ties in X from computations (-> Goodman-Kruskal +## gamma-type rank correlation) + +rcorr.cens <- function(x, S, outx=FALSE) { + if(is.Surv(S)) { + if(attr(S, 'type') != 'right') + stop('only handles right censored times') + } else S <- cbind(S, rep(1, length(S))) + + y <- S[,1] + event <- S[,2] + if(length(y)!=length(x)) + stop("y must have same length as x") + + miss <- is.na(x) | is.na(y) | is.na(event) + nmiss <- sum(miss) + if(nmiss>0) { + miss <- !miss + x <- x[miss] + y <- y[miss] + event <- event[miss] + } + + n <- length(x) + ne <- sum(event) + storage.mode(x) <- "double" + storage.mode(y) <- "double" + storage.mode(event) <- "logical" + + z <- + .Fortran("cidxcn",x,y,event,length(x),nrel=double(1),nconc=double(1), + nuncert=double(1), + c.index=double(1),gamma=double(1),sd=double(1),as.logical(outx), + PACKAGE="Hmisc") + r <- c(z$c.index,z$gamma,z$sd,n,nmiss,ne,z$nrel,z$nconc,z$nuncert) + names(r) <- c("C Index","Dxy","S.D.","n","missing","uncensored", + "Relevant Pairs", + "Concordant","Uncertain") + r +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/rcorr.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/rcorr.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,57 @@ +rcorr <- function(x, y, type=c("pearson","spearman")) +{ + type <- match.arg(type) + + if(!missing(y)) + x <- cbind(x, y) + + x[is.na(x)] <- 1e30 + storage.mode(x) <- "double" + + p <- as.integer(ncol(x)) + if(p < 1) + stop("must have >1 column") + + n <- as.integer(nrow(x)) + if(n < 5) + stop("must have >4 observations") + + h <- + .Fortran("rcorr", x, n, p, itype=as.integer(1+(type=="spearman")), + hmatrix=double(p*p), npair=integer(p*p), + double(n), double(n), double(n), double(n), + double(n), integer(n), PACKAGE="Hmisc") + + npair <- matrix(h$npair, ncol=p) + h <- matrix(h$hmatrix, ncol=p) + h[h > 1e29] <- NA + nam <- dimnames(x)[[2]] + dimnames(h) <- list(nam, nam) + dimnames(npair) <- list(nam, nam) + P <- matrix(2 * (1 - pt(abs(h)*sqrt(npair - 2) / sqrt(1 - h * h), + npair - 2)), ncol=p) + P[abs(h) == 1] <- 0 + diag(P) <- NA + dimnames(P) <- list(nam, nam) + structure(list(r=h, n=npair, P=P), class="rcorr") +} + +print.rcorr <- function(x, ...) +{ + print(round(x$r,2)) + n <- x$n + if(all(n == n[1,1])) + cat("\nn=", n[1,1], "\n\n") + else { + cat("\nn\n") + print(n) + } + + cat("\nP\n") + P <- x$P + P <- ifelse(P < .0001, 0, P) + p <- format(round(P, 4)) + p[is.na(P)] <- "" + print(p, quote=FALSE) + invisible() +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/rcorrp.cens.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/rcorrp.cens.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,181 @@ +## Computes rank correlation measures between a variable X and a possibly +## censored Surv variable Y +## Rank correlation is extension of Somers' Dxy = 2(Concordance Prob-.5) +## See Harrell et al JAMA 1984(?) +## Set outx=T to exclude ties in X from computations (-> Goodman-Kruskal +## gamma-type rank correlation) +## No. This is the version extended to paired predictions +## method=1: concordance=delta x1 < delta x2 +## method=2: concordance=x1 concordant and x2 discordant + +rcorrp.cens <- function(x1, x2, S, outx=FALSE, method=1) +{ + if(is.Surv(S)) + { + if(attr(S, 'type') != 'right') + stop('only handles right censored times') + } else S <- cbind(S, rep(1, length(S))) + + y <- S[,1] + event <- S[,2] + + if(length(x1)!=length(x2)) + stop("x1 and x3 must have same length") + + if(length(y)!=length(x1)) + stop("y must have same length as x") + + if(method!=1 & method!=2) + stop("method must be 1 or 2") + + miss <- is.na(x1+x2+y+event) + nmiss <- sum(miss) + if(nmiss>0) { + miss <- !miss + x1 <- x1[miss] + x2 <- x2[miss] + y <- y[miss] + event <- event[miss] + } + + n <- length(x1) + if(n<2) + stop("<2 non-missing observations") + + ne <- sum(event) + storage.mode(x1) <- "double" + storage.mode(x2) <- "double" + storage.mode(y) <- "double" + storage.mode(event) <- "logical" + storage.mode(method) <- "integer" + storage.mode(outx) <- "logical" + + z <- + .Fortran("cidxcp",x1,x2,y,event,length(x1),method,outx, + nrel=double(1),nuncert=double(1), + c1=double(1),c2=double(1),gamma1=double(1),gamma2=double(1), + gamma=double(1),sd=double(1),c12=double(1),c21=double(1), + PACKAGE="Hmisc") + + r <- c(z$gamma,z$sd,z$c12,z$c21,n,nmiss,ne,z$nrel,z$nuncert,z$c1,z$c2, + z$gamma1,z$gamma2) + names(r) <- c("Dxy","S.D.","x1 more concordant","x2 more concordant", + "n","missing","uncensored", + "Relevant Pairs","Uncertain","C X1","C X2","Dxy X1","Dxy X2") + r +} + +improveProb <- function(x1, x2, y) + { + s <- is.na(x1+x2+y) + if(any(s)) + { + s <- !s + x1 <- x1[s] + x2 <- x2[s] + y <- y[s] + } + n <- length(y) + y <- as.numeric(y) + u <- sort(unique(y)) + if(length(u) != 2 || u[1] != 0 || u[2] != 1) + stop('y must have two values: 0 and 1') + r <- range(x1,x2) + if(r[1] < 0 || r[2] > 1) + stop('x1 and x2 must be in [0,1]') + + a <- y==1 + b <- y==0 + na <- sum(a) + nb <- sum(b) + d <- x2 - x1 + + nup.ev <- sum(d[a] > 0); pup.ev <- nup.ev/na + nup.ne <- sum(d[b] > 0); pup.ne <- nup.ne/nb + ndown.ev <- sum(d[a] < 0); pdown.ev <- ndown.ev/na + ndown.ne <- sum(d[b] < 0); pdown.ne <- ndown.ne/nb + + nri.ev <- pup.ev - pdown.ev + # se.nri.ev <- sqrt((pup.ev + pdown.ev)/na) # old est under H0 + v.nri.ev <- (nup.ev + ndown.ev)/(na^2) - ((nup.ev - ndown.ev)^2)/(na^3) + se.nri.ev <- sqrt(v.nri.ev) + z.nri.ev <- nri.ev/se.nri.ev + + nri.ne <- pdown.ne - pup.ne + # se.nri.ne <- sqrt((pdown.ne + pup.ne)/nb) # old est under H0 + v.nri.ne <- (ndown.ne + nup.ne)/(nb^2) - ((ndown.ne - nup.ne)^2)/(nb^3) + se.nri.ne <- sqrt(v.nri.ne) + z.nri.ne <- nri.ne/se.nri.ne + + nri <- pup.ev - pdown.ev - (pup.ne - pdown.ne) + # old estimate under H0: + # se.nri <- sqrt((pup.ev + pdown.ev)/na + (pup.ne + pdown.ne)/nb) + se.nri <- sqrt(v.nri.ev + v.nri.ne) + z.nri <- nri/se.nri + + + improveSens <- sum(d[a])/na + improveSpec <- -sum(d[b])/nb + idi <- mean(d[a]) - mean(d[b]) + var.ev <- var(d[a])/na + var.ne <- var(d[b])/nb + se.idi <- sqrt(var.ev + var.ne) + z.idi <- idi/se.idi + + structure(llist(n, na, nb, pup.ev, pup.ne, pdown.ev, pdown.ne, + nri, se.nri, z.nri, + nri.ev, se.nri.ev, z.nri.ev, + nri.ne, se.nri.ne, z.nri.ne, + improveSens, improveSpec, + idi, se.idi, z.idi, labels=FALSE), class='improveProb') + } + +print.improveProb <- function(x, digits=3, conf.int=.95, ...) + { + cat('\nAnalysis of Proportions of Subjects with Improvement in Predicted Probability\n\n') + cat('Number of events:', x$na,'\tNumber of non-events:', x$nb, '\n\n') + + p <- matrix(c(x$pup.ev, x$pup.ne, x$pdown.ev, x$pdown.ne), + dimnames=list(c( + 'Increase for events (1)', + 'Increase for non-events (2)', + 'Decrease for events (3)', + 'Decrease for non-events (4)'), + 'Proportion')) + cat('Proportions of Positive and Negative Changes in Probabilities\n\n') + print(p, digits=digits) + + zpci <- function(m, se, conf.int) + { + z <- qnorm((1+conf.int)/2) + cbind(m/se, (1 - pnorm(abs(m/se)))*2, m - z*se, m + z*se) + } + + p <- cbind(c(x$nri, x$nri.ev, x$nri.ne), + c(x$se.nri, x$se.nri.ev, x$se.nri.ne), + zpci(c(x$nri, x$nri.ev, x$nri.ne), + c(x$se.nri, x$se.nri.ev, x$se.nri.ne), + conf.int=conf.int)) + low <- paste('Lower', conf.int) + hi <- paste('Upper', conf.int) + dimnames(p) <- list(c('NRI (1-3+4-2)', + 'NRI for events (1-3)', + 'NRI for non-events (4-2)'), + c('Index', 'SE', 'Z', '2P', low, hi)) + cat('\n\nNet Reclassification Improvement\n\n') + print(p, digits=digits) + + cat('\n\nAnalysis of Changes in Predicted Probabilities\n\n') + + p <- matrix(c(x$improveSens, x$improveSpec), + dimnames=list(c('Increase for events (sensitivity)', + 'Decrease for non-events (specificity)'), + 'Mean Change in Probability')) + print(p, digits=digits) + + cat("\n\nIntegrated Discrimination Improvement\n (average of sensitivity and 1-specificity over [0,1];\n also is difference in Yates' discrimination slope)\n\n") + p <- c(x$idi, x$se.idi, zpci(x$idi, x$se.idi, conf.int=conf.int)) + names(p) <- c('IDI', 'SE', 'Z', '2P', low, hi) + print(p, digits=digits) + invisible() + } |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/rcspline.eval.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/rcspline.eval.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,160 @@ +##rcspline.eval - function to create design matrix for restricted cubic +## spline function of Stone & Koo, given an input vector and optionally +## a vector of knots. If knots are not given, knots are set using +## default algorithm. If the number of knots is not given, 5 are used. +## Terms are normalized by (outer-inner knot)^2. +## Can optionally return antiderivative of spline functions if +## type="integral". +## norm=0 : no normalization of constructed variables +## norm=1 : divide by cube of difference in last 2 knots +## makes all variables unitless +## norm=2 : (default) divide by square of difference in outer knots +## makes all variables in original units of x +## +## Returns: +## x - design matrix for derived spline variables +## (includes original x in first column if inclx=T or +## type="integral") +## attribute knots - input or derived vector of knots +## If knots.only=T, returns instead the vector of estimated or given +## knots. +## If rpm is not null, replaces missing x with rpm before evaluating +## but after estimating knots. +## +## F. Harrell 13 Feb 90 +## Modified 28 Mar 90 - improved default knot computation +## 22 Aug 90 - put knots as attribute, return matrix +## 20 Sep 90 - added knots.only argument +## 16 Oct 90 - added rpm argument +## 11 Dec 91 - added type argument +## 27 Dec 91 - added norm argument +## 26 Jun 93 - added evasive action if <3 knots +## 1 Oct 13 - added logic to handle excessive ties at start or end x +## 8 Mar 14 - refined that logic, added logic for low # uniques + +rcspline.eval <- function(x, knots=NULL, nk=5, inclx=FALSE, knots.only=FALSE, + type="ordinary", norm=2, rpm=NULL, pc=FALSE, + fractied=0.05) +{ + if(! length(knots)) { ## knot locations unspecified + xx <- x[!is.na(x)] + n <- length(xx) + if(n < 6) + stop('knots not specified, and < 6 non-missing observations') + + if(nk < 3) + stop('nk must be >= 3') + + xu <- sort(unique(xx)) + nxu <- length(xu) + + if((nxu - 2) <= nk) { + warning(sprintf('%s knots requested with %s unique values of x. knots set to %s interior values.', nk, nxu, nxu - 2)) + knots <- xu[- c(1, length(xu))] + } + else { + outer <- if(nk > 3) .05 else .1 + if(nk > 6) outer <- .025 + + knots <- numeric(nk) + overrideFirst <- overrideLast <- FALSE + nke <- nk + firstknot <- lastknot <- numeric(0) + + if(fractied > 0 && fractied < 1) { + f <- table(xx) / n + if(max(f[- c(1, length(f))]) < fractied) { + if(f[1] >= fractied) { + firstknot <- min(xx[xx > min(xx)]) + xx <- xx[xx > firstknot] + nke <- nke - 1 + overrideFirst <- TRUE + } + if(f[length(f)] >= fractied) { + lastknot <- max(xx[xx < max(xx)]) + xx <- xx[xx < lastknot] + nke <- nke - 1 + overrideLast <- TRUE + } + } + } + if(nke == 1) knots <- median(xx) + else { + if(nxu <= nke) knots <- xu + else { + p <- if(nke == 2) seq(.5, 1.0 - outer, length=nke) + else + seq(outer, 1.0 - outer, length=nke) + knots <- quantile(xx, p) + if(length(unique(knots)) < min(nke, 3)) { + knots <- quantile(xx, seq(outer, 1.0 - outer, length=2 * nke)) + if(length(firstknot) && length(unique(knots)) < 3) { + midval <- if(length(firstknot) && length(lastknot)) + (firstknot + lastknot) / 2. else median(xx) + knots <- sort(c(firstknot, midval, + if(length(lastknot)) lastknot + else quantile(xx, 1.0 - outer) )) + } + if((nu <- length(unique(knots))) < 3) { + cat("Fewer than 3 unique knots. Frequency table of variable:\n") + print(table(x)) + stop() + } + + warning(paste("could not obtain", nke, + "interior knots with default algorithm.\n", + "Used alternate algorithm to obtain", + nu, "knots")) + } + } + + if(length(xx) < 100) { + xx <- sort(xx) + if(! overrideFirst) knots[1] <- xx[5] + if(! overrideLast) knots[nke] <- xx[length(xx) - 4] + } + } + knots <- c(firstknot, knots, lastknot) + } + } ## end knot locations not specified + + knots <- sort(unique(knots)) + nk <- length(knots) + + if(nk < 3) { + cat("fewer than 3 unique knots. Frequency table of variable:\n") + print(table(x)) + stop() + } + + if(knots.only) return(knots) + + if(length(rpm)) x[is.na(x)] <- rpm + xx <- matrix(1.1, length(x), nk - 2) + knot1 <- knots[1 ] + knotnk <- knots[nk ] + knotnk1 <- knots[nk - 1] + kd <- if(norm == 0) 1 else if(norm == 1) knotnk - knotnk1 else + (knotnk - knot1) ^ (2 / 3) + + power <- if(type=="integral") 4 else 3 + + for(j in 1 : (nk - 2)) { + xx[,j] <- pmax((x - knots[j]) / kd, 0) ^ power + + ((knotnk1 - knots[j]) * pmax((x - knotnk) / kd, 0) ^ power - + (knotnk - knots[j]) * (pmax((x - knotnk1) / kd, 0) ^ power)) / + (knotnk - knotnk1) + } + + if(power == 4) xx <- cbind(x, x * x / 2, xx * kd / 4) else + if(inclx) xx <- cbind(x, xx) + + if(pc) { + p <- prcomp(xx, scale=TRUE, center=TRUE) + pcparms <- p[c('center', 'scale', 'rotation')] + xx <- p$x + attr(xx, 'pcparms') <- pcparms + } + attr(xx, 'knots') <- knots + xx +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/rcspline.plot.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/rcspline.plot.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,263 @@ +##Mod rep(1,n)-> rep(1,length(xe)) 1 Jul 91 +rcspline.plot <- function(x, y, model=c("logistic","cox","ols"), xrange, + event, nk=5, knots=NULL, show=c("xbeta", "prob"), + adj=NULL, xlab, ylab, ylim, plim=c(0,1), + plotcl=TRUE, showknots=TRUE, add=FALSE, subset, + lty=1, noprint=FALSE, m, smooth=FALSE, bass=1, + main="auto", statloc) +{ + model <- match.arg(model) + show <- match.arg(show) + + if(!missing(event)) + model<-"cox" + + if(model=="cox" & missing(event)) + stop('event must be given for model="cox"') + + if(show=="prob" & !missing(adj)) + stop('show="prob" cannot be used with adj') + + if(show=="prob" & model!="logistic") + stop('show="prob" can only be used with model="logistic"') + + if(length(x)!=length(y)) + stop('x and y must have the same length') + + if(!missing(event) && length(event)!=length(y)) + stop('y and event must have the same length') + + if(!missing(adj)) { + if(!is.matrix(adj)) adj <- as.matrix(adj) + if(dim(adj)[1]!=length(x)) + stop('x and adj must have the same length') + } + + if(missing(xlab)) + xlab <- label(x) + + if(missing(ylab)) + ylab <- label(y) + + isna <- is.na(x) | is.na(y) + if(!missing(event)) + isna <- isna | is.na(event) + + nadj <- 0 + if(!missing(adj)) { + nadj <- ncol(adj) + isna <- isna | apply(is.na(adj),1,sum)>0 + } + + if(!missing(subset)) + isna <- isna | (!subset) + + x <- x[!isna] + y <- y[!isna] + if(!missing(event)) + event <- event[!isna] + + if(!missing(adj)) + adj <- adj[!isna,] + + n <- length(x) + if(n<6) + stop('fewer than 6 non-missing observations') + + if(missing(xrange)) { + frac<-10./max(n,200) + xrange<-quantile(x,c(frac,1.-frac)) + } + + if(missing(knots)) + xx <- rcspline.eval(x,nk=nk) + else xx <- rcspline.eval(x,knots) + + knots <- attr(xx,"knots") + nk <- length(knots) + + df1 <- nk-2 + if(model=="logistic") { + require(rms) + b <- rms::lrm.fit(cbind(x,xx,adj),y) + ##b <- glim(cbind(x,xx,adj),y,rep(1,n),error="binomial", + ##link="logit") + ##if(!noprint)glim.print(b) + beta <- b$coef + cov <- b$var + ##model.lr <- b$deviance[1] - b$deviance[2] + model.lr <- b$stats["Model L.R."] + offset <- 1 #to skip over intercept parameter + ylabl <- + if(show=="prob") + "Probability" + else "log Odds" + + sampled <- paste("Logistic Regression Model, n=",n," d=",sum(y),sep="") + } + + if(model=="cox") { + if(!existsFunction('coxph.fit')) + coxph.fit <- getFromNamespace('coxph.fit','survival') + ##11mar04 + + ## added coxph.control around iter.max, eps 11mar04 + lllin <- coxph.fit(cbind(x,adj),cbind(y,event),strata=NULL, + offset=NULL, init=NULL, control=coxph.control(iter.max=10, eps=.0001), + method="efron", rownames=NULL)$loglik[2] + b <- coxph.fit(cbind(x,xx,adj),cbind(y,event),strata=NULL, + offset=NULL, init=NULL, control=coxph.control(iter.max=10, eps=.0001), + method="efron", rownames=NULL) + beta <- b$coef + if(!noprint) { + print(beta); + print(b$loglik) + } + + beta <- b$coef + cov <- b$var + model.lr<-2*(b$loglik[2]-b$loglik[1]) + offset <- 0 + ylabl <- "log Relative Hazard" + sampled <- paste("Cox Regression Model, n=",n," events=",sum(event), + sep="") + } + + if(model=="logistic"|model=="cox") { + model.df <- nk-1+nadj + model.aic <- model.lr-2.*model.df + v <- solve(cov[(1+offset):(nk+offset-1),(1+offset):(nk+offset-1)]) + assoc.chi <- beta[(1+offset):(nk+offset-1)] %*% v %*% + beta[(1+offset):(nk+offset-1)] + assoc.df <- nk-1 #attr(v,"rank") + assoc.p <- 1.-pchisq(assoc.chi,nk-1) + v <- solve(cov[(2+offset):(nk+offset-1),(2+offset):(nk+offset-1)]) + linear.chi <- beta[(2+offset):(nk+offset-1)] %*% v %*% + beta[(2+offset):(nk+offset-1)] + linear.df <- nk - 2 #attr(v,"rank") + linear.p <- 1. - pchisq(linear.chi, linear.df) + if(nadj > 0) { + ntot <- offset + nk - 1 + nadj + v <- solve(cov[(nk+offset):ntot, (nk+offset):ntot]) + adj.chi <- beta[(nk+offset):ntot] %*% v %*% + beta[(nk+offset):ntot] + adj.df <- ncol(v) #attr(v,"rank") + adj.p <- 1. - pchisq(adj.chi, adj.df) + } else { + adj.chi <- 0 + adj.p <- 0 + } + } + + ## Evaluate xbeta for expanded x at desired range + xe <- seq(xrange[1],xrange[2],length=600) + if(model=="cox") + xx <- rcspline.eval(xe,knots,inclx=TRUE) + else + xx<- cbind(rep(1,length(xe)),rcspline.eval(xe,knots,inclx=TRUE)) + + xbeta <- xx %*% beta[1:(nk-1+offset)] + var <- drop(((xx %*% cov[1:(nk-1+offset),1:(nk-1+offset)])*xx) %*% + rep(1,ncol(xx))) + lower <- xbeta-1.96*sqrt(var) + upper <- xbeta+1.96*sqrt(var) + if(show=="prob") { + xbeta <- 1./(1.+exp(-xbeta)) + lower <- 1./(1.+exp(-lower)) + upper <- 1./(1.+exp(-upper)) + } + + xlim <- range(pretty(xe)) + if(missing(ylim)) + ylim <- range(pretty(xbeta)) + + if(main=="auto") { + if(show=="xbeta") + main <- "Estimated Spline Transformation" + else main <- "Spline Estimate of Prob{Y=1}" + } + + if(!interactive() & missing(statloc)) + statloc<-"ll" + + if(!add) { + oldmar<-par("mar") + if(!missing(statloc) && statloc[1]=="ll") + oldmar[1]<-11 + + oldpar <- par(err=-1,mar=oldmar) + plot(xe,xbeta,type="n",main=main,xlab=xlab,ylab=ylabl, + xlim=xlim,ylim=ylim) + lines(xe,xbeta,lty=lty) + ltext<-function(z,line,label,cex=.8,adj=0) + { + zz<-z + zz$y<-z$y-(line-1)*1.2*cex*par("csi")*(par("usr")[4]-par("usr")[3])/ + (par("fin")[2]) #was 1.85 + text(zz,label,cex=cex,adj=adj) + } + + sl<-0 + if(missing(statloc)) { + cat("Click left mouse button at upper left corner for statistics\n") + z<-locator(1) + statloc<-"l" + } else if(statloc[1]!="none") { + if(statloc[1]=="ll") { + z<-list(x=par("usr")[1],y=par("usr")[3]) + sl<-3 + } else z<-list(x=statloc[1],y=statloc[2]) + } + + if(statloc[1]!="none" & (model=="logistic" | model=="cox")) { + rnd <- function(x,r=2) as.single(round(x,r)) + + ltext(z,1+sl,sampled) + ltext(z,2+sl," Statistic X2 df") + chistats<-format(as.single(round(c(model.lr,model.aic, + assoc.chi,linear.chi,adj.chi),2))) + pvals<-format(as.single(round(c(assoc.p,linear.p,adj.p),4))) + ltext(z,3+sl,paste("Model L.R. ",chistats[1],model.df, + " AIC=",chistats[2])) + ltext(z,4+sl,paste("Association Wald ",chistats[3],assoc.df, + " p= ",pvals[1])) + ltext(z,5+sl,paste("Linearity Wald ",chistats[4],linear.df, + " p= ",pvals[2])) + if(nadj>0)ltext(z,6+sl,paste("Adjustment Wald " ,chistats[5], + adj.df," p= ",pvals[3]))} + } else lines(xe,xbeta,lty=lty) + + if(plotcl) { + lines(xe,lower,lty=2) + lines(xe,upper,lty=2) + } + + if(showknots) { + bot.arrow <- par("usr")[3] + top.arrow <- bot.arrow+.05*(par("usr")[4]-par("usr")[3]) + for(i in 1:nk) + arrows(knots[i],top.arrow,knots[i],bot.arrow,length=.1) + } + + if(model=="logistic" & nadj==0) { + if(smooth) { + z<-supsmu(x,y,bass=bass) + if(show=="xbeta") + z$y <- logb(z$y/(1.-z$y)) + + points(z,cex=.4) + } + + if(!missing(m)) { + z<-groupn(x,y,m=m) + if(show=="xbeta") + z$y <- logb(z$y/(1.-z$y)) + + points(z,pch=2,mkh=.05)} + } + + if(!add) + par(oldpar) + + invisible(list(knots=knots,x=xe,xbeta=xbeta,lower=lower,upper=upper)) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/rcspline.restate.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/rcspline.restate.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,224 @@ +rcspline.restate <- function(knots, coef, type=c("ordinary","integral"), + x="X", lx=nchar(x),norm=2, + columns=65, before="& &", after="\\", + begin="", nbegin=0, + digits=max(8,.Options$digits)) +{ + type <- match.arg(type) + k <- length(knots) + if(k<3) + stop("must have >=3 knots in a restricted cubic spline") + + p <- length(coef) + if(p == k) + { + Intc <- coef[1] + coef <- coef[-1] + p <- p-1 + } + else Intc <- 0 + + if(k-1 != p) + stop("coef must be of length # knots - 1") + + knotnk <- knots[k] + knotnk1 <- knots[k-1] + knot1 <- knots[1] + + kd <- if(norm==0) 1 else if(norm==1)(knotnk-knotnk1)^3 else (knotnk-knot1)^2 + + coef[-1] <- coef[-1]/kd + + d <- c(0, knots-knotnk)[1:p] + coefk <- sum(coef*d)/(knotnk-knotnk1) + + d <- c(0, knots-knotnk1)[1:p] + coefk1 <- sum(coef*d)/(knotnk1-knotnk) + + if(!length(names(coef))) + names(coef) <- paste(x,1:length(coef),sep="") + + coef <- c(coef, coefk, coefk1) + names(coef)[k] <- "1st restricted coef" + names(coef)[k+1] <- "2nd restricted coef" + + if(type=="integral") + coef <- c(.5*coef[1],.25*coef[-1]) + + cof <- format.sep(coef, digits) + kn <- format.sep(-knots, digits) + if(Intc!=0) + { + txt <- txt2 <- format.sep(Intc, digits) + if(type=="integral") + { + txt <- paste(txt, "* x") + txt2 <- paste(txt2, '*', x) + } + + if(coef[1]>=0) + { + txt <- paste(txt, "+"); + txt2 <- paste(txt2, '+') + } + } + else txt <- txt2 <- "" + + if(cof[1]!=0) + { + txt <- paste(txt, cof[1], + if(type=="ordinary")"* x" + else "* x^2", + sep="") + + txt2 <- paste(txt2, cof[1], + if(type=="ordinary") paste("*",x) + else paste("*",x,"^2"), + sep="") + } + + for(i in 2:(p+2)) + { + nam <- paste("pmax(x", + if(knots[i-1]<0) "+" + else NULL, + if(knots[i-1]!=0) kn[i-1] + else NULL, + ",0)^", + if(type=="ordinary")"3" + else "4", + sep="") + + nam2 <- paste("pmax(",x, + if(knots[i-1]<0) "+" + else NULL, + if(knots[i-1]!=0) kn[i-1] + else NULL, + ",0)^", + if(type=="ordinary")"3" + else "4", + sep="") + + z <- paste(if(coef[i]>0 & (i>2 | coef[1]!=0 | Intc!=0)) "+" + else NULL, + cof[i], "*", nam, sep="") + + z2 <- paste(if(coef[i]>0 & (i>2 | coef[1]!=0 | Intc!=0)) "+" + else NULL, + cof[i], "*", nam2, sep="") + + txt <- paste(txt , z, sep="") + txt2<- paste(txt2, z2, sep="") + } + + func <- parse(text=paste('function(x)', txt)) + + cof <- format.sep(coef, digits) + kn <- format.sep(-knots, digits) + + lcof <- nchar(cof) + cof <- latexSN(cof) + + cur <- begin; colcnt <- nbegin; tex <- NULL + if(Intc!=0) + { + fint <- format.sep(Intc, digits) + if(type=="integral") + { + fint <- paste(fint, x) + colcnt <- colcnt+2 + } + + cur <- paste(cur, fint, sep="") + colcnt <- colcnt + nchar(fint) + if(coef[1]>0) + { + cur <- paste(cur, " + ", sep=""); + colcnt <- colcnt+3 + } + } + + if(coef[1]!=0) + { + sp <- if(substring.location(cof[1],"times")$first > 0) "\\:" + else NULL + + cur <- paste(cur, cof[1], sp, x, + if(type=="integral") "^2", + sep="") + + ##\:=medium space in LaTeX + colcnt <- colcnt+lcof[1]+lx+(type=="integral") + } + + tex.names <- character(p+2) + size <- lx+lcof[-1]+nchar(kn)+3 + + for(i in 2:(p+2)) + { + nam <- paste("(", x, + if(knots[i-1]<0) "+" + else NULL, + if(knots[i-1]!=0) kn[i-1] + else NULL, + ")_{+}^{", + if(type=="ordinary")"3}" + else "4}", + sep="") + + q <- paste(if(coef[i]>0 & (i>2 | coef[1]!=0 | Intc!=0)) "+" + else NULL, + cof[i], nam, sep="") + + n <- size[i-1] + if(colcnt+n > columns) + { + tex <- c(tex, cur) + cur <- "" + colcnt <- 0 + } + + cur <- paste(cur, q, sep="") + colcnt <- colcnt+n + } + + tex <- c(tex, cur) + tex <- paste(before, tex, after) + + if(Intc!=0) coef <- c(Intercept=Intc, coef) + + attr(coef, "knots") <- knots + attr(coef, "function") <- func + attr(coef, "function.text") <- txt2 + attr(coef, "latex") <- tex + names(colcnt) <- NULL + attr(coef, "columns.used") <- colcnt + + coef +} + +rcsplineFunction <- function(knots, coef=numeric(0), norm=2) +{ + k <- length(knots) + kd <- if(norm==0) 1 else if(norm==1) knots[k]-knots[k-1] else + (knots[k]-knots[1])^.66666666666666666666666 + + f <- function(x, knots, coef, kd) + { + k <- length(knots) + knotnk <- knots[k] + knotnk1 <- knots[k-1] + knot1 <- knots[1] + if(length(coef) < k) coef <- c(0, coef) + y <- coef[1] + coef[2]*x + for(j in 1:(k-2)) + y <- y + + coef[j+2]*(pmax((x - knots[j])/kd, 0)^3 + + ((knotnk1 - knots[j]) * pmax((x - knotnk)/kd, 0)^3 - + (knotnk - knots[j]) * (pmax((x - knotnk1)/kd, 0)^3))/ + (knotnk - knotnk1)) + y + } + formals(f) <- list(x=numeric(0), knots=knots, coef=coef, kd=kd) + f +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/reShape.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/reShape.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,96 @@ +reShape <- function(x, ..., id, colvar, base, reps, + times=1:reps, timevar='seqno', constant=NULL) +{ + if(!missing(base)) { + if(!is.list(x)) + stop('x must be a list or data frame when base is given') + + repvars <- as.vector(outer(base,1:reps,paste,sep='')) + nam <- names(x) + nonrep <- nam[nam %nin% repvars] + res <- vector('list', 1+length(nonrep)+length(base)) + names(res) <- c(timevar, nonrep, base) + x1 <- x[[1]] + n <- if(is.matrix(x1)) nrow(x1) + else length(x1) + + res[[1]] <- rep(times[1:reps], n) + + for(i in nonrep) res[[i]] <- rep(x[[i]], rep(reps,n)) + + ## Get indexes that will put unlist() in right order + k <- as.vector(matrix(1:(reps*n), nrow=reps, byrow=TRUE)) + for(i in base) { + bn <- paste(i, 1:reps, sep='') + x1 <- x[[bn[1]]] + at <- attributes(x1) + at$names <- NULL + x1 <- unlist(x[bn])[k] + if(length(at)) attributes(x1) <- at + res[[i]] <- x1 + } + + if(is.data.frame(x)) { + rn <- attr(x,'row.names') + ln <- length(rn) + if(ln) { + ## R calls data.frame even if specify structure, and R does + ## not have dup.row.names argument to data.frame as does S+ + return(data.frame(res, + row.names=paste(rep(rn,rep(reps,ln)), + rep(1:reps,n)))) + } + } + + return(res) + } + + if(is.matrix(x)) { + y <- as.vector(x) + v1 <- all.is.numeric(dimnames(x)[[1]][row(x)],'vector') + v2 <- all.is.numeric(dimnames(x)[[2]][col(x)],'vector') + w <- list(v1, v2, y) + names(w) <- c('rowvar','colvar',as.character(substitute(x))) + if(length(nd <- names(dimnames(x)))) + names(w)[1:2] <- nd + + w + } else { + listid <- is.list(id) + i <- as.factor(if(listid) do.call('paste', c(id, sep='~')) + else id) + + colvar <- as.factor(colvar) + m <- matrix(NA, nrow=length(levels(i)), ncol=length(levels(colvar)), + dimnames=list(levels(i), levels(colvar))) + dotlist <- list(...) + if(!length(dotlist)) { + m[cbind(i, colvar)] <- x + if(listid) { + j <- match(as.character(dimnames(m)[[1]]), as.character(i)) + if(length(constant)) + data.frame(id[j,,drop=FALSE], constant[j,,drop=FALSE], m) + else data.frame(id[j,,drop=FALSE], m) + } else m + + } else { + res <- vector('list',nx <- 1+length(dotlist)) + names(res) <- (as.character(sys.call())[-1])[1:nx] + nam2 <- names(sys.call()[-1])[1:nx] + if(length(nam2)) + names(res) <- ifelse(nam2=='',names(res),nam2) + + w <- m; + w[cbind(i, colvar)] <- x; + res[[1]] <- w + + for(j in 2:nx) { + w <- m; + w[cbind(i, colvar)] <- dotlist[[j-1]] + res[[j]] <- w + } + + res + } + } +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/redun.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/redun.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b"@@ -0,0 +1,295 @@\n+redun <- function(formula, data=NULL, subset=NULL,\n+ r2=.9, type=c('ordinary','adjusted'),\n+ nk=3, tlinear=TRUE, allcat=FALSE, minfreq=0,\n+ iterms=FALSE, pc=FALSE,\n+ pr=FALSE, ...)\n+{\n+ acall <- match.call()\n+ type <- match.arg(type)\n+\n+ if(!inherits(formula,'formula'))\n+ stop('formula must be a formula')\n+\n+ a <- as.character(formula)\n+ if(length(a)==2 && a[1]=='~' && a[2]=='.' &&\n+ length(list(...))) data <- dataframeReduce(data, ...)\n+\n+ Terms <- terms(formula, specials='I', data=data)\n+ m <- list(formula=formula, data=data, subset=subset, na.action=na.delete)\n+ data <- do.call('model.frame', m)\n+ nam <- names(data)\n+ linear <- nam[attr(Terms,'specials')$I]\n+ p <- length(data)\n+ n <- nrow(data)\n+ at <- attributes(data)\n+ na.action <- at$na.action\n+ if(pr) cat(n, 'observations used in analysis\\n')\n+\n+ cat.levels <- vector('list',p)\n+ names(cat.levels) <- nam\n+ vtype <- rep('s', p); names(vtype) <- nam\n+ enough <- rep(TRUE, p)\n+\n+ for(i in 1:p)\n+ {\n+ xi <- data[[i]]\n+ ni <- nam[i]\n+\n+ iscat <- FALSE\n+ if(is.character(xi))\n+ {\n+ xi <- as.factor(xi)\n+ lev <- levels(xi)\n+ iscat <- TRUE\n+ }\n+ else if(is.factor(xi))\n+ {\n+ lev <- levels(xi)\n+ iscat <- TRUE\n+ }\n+ if(iscat)\n+ {\n+ data[[i]] <- as.integer(xi)\n+ cat.levels[[ni]] <- lev\n+ vtype[ni] <- 'c'\n+ if(minfreq > 0 && sum(table(xi) >= minfreq) < 2) enough[i] <- FALSE\n+ }\n+ else\n+ {\n+ u <- unique(xi)\n+ if(length(u) == 1)\n+ {\n+ warning(paste(ni,'is constant'))\n+ enough[i] <- FALSE\n+ }\n+ if(minfreq > 0 && length(u)==2 && sum(table(xi) >= minfreq) < 2)\n+ enough[i] <- FALSE\n+ if(nk==0 || length(u) < 3 || ni %in% linear)\n+ vtype[ni] <- 'l'\n+ }\n+ }\n+\n+ toofew <- nam[!enough]\n+ if(length(toofew))\n+ {\n+ p <- sum(enough)\n+ nam <- nam[enough]\n+ cat.levels <- cat.levels[enough]\n+ vtype <- vtype[enough]\n+ data <- data[enough]\n+ }\n+\n+ xdf <- ifelse(vtype=='l', 1, nk-1)\n+ j <- vtype=='c'\n+ if(any(j)) for(i in which(j)) xdf[i] <- length(cat.levels[[i]]) - 1\n+ names(xdf) <- nam\n+\n+ orig.df <- sum(xdf)\n+ X <- matrix(NA, nrow=n, ncol=orig.df)\n+ st <- en <- integer(p)\n+ start <- 1\n+ for(i in 1:p)\n+ {\n+ xi <- data[[i]]\n+ x <- aregTran(xi, vtype[i], nk)\n+ st[i] <- start\n+ nc <- ncol(x)\n+ xdf[i]<- nc\n+ end <- start + nc - 1\n+ en[i] <- end\n+ if(end > orig.df) stop('program logic error')\n+ X[,start:end] <- x\n+ start <- end + 1\n+ }\n+\n+ if(end < orig.df) X <- X[, 1:end, drop=FALSE]\n+ ## if couldn't derive the requested number of knots in splines\n+\n+ fcan <- function(ix, iy, X, st, en, vtype, tlinear, type,\n+ allcat, r2, minfreq)\n+ {\n+ ## Get all subscripts for variables in the right hand side\n+ k <- rep(FALSE, ncol(X))\n+ for(i in ix) k[st[i]:en[i]] <- TRUE\n+ ytype <- if(tlinear && vtype[iy]=='s')'l' else vtype[iy]\n+ Y <- if(ytype=='l') X[,st[iy],drop=FALSE] else\n+ X[,st[iy]:en[iy],drop=FALSE]\n+ d <- dim(Y); n <- d[1]; ny <- d[2]\n+ f <- cancor(X[,k,drop=FALSE], Y)\n+ R2 <- f$cor[1]^2\n+ if(type=='adjusted')\n+ {\n+ dof <- sum(k) + ny - 1\n+ R2 <- max(0, 1 - (1 - R2)*(n-1)/(n-dof-1))\n+ }\n+ ## If variable to possibly remove is categorical with more than 2\n+ ## categories (more than one dummy variable) make sure ALL frequent\n+ ## categories are redundant (not just the linear combination of\n+ ## dummies) if allcat is TRUE. Do this by substituting for R^2 the\n+ ## minimum R^2 over predicting each dummy variable.\n+ if(R2 > r2 && allcat && ytype=='c' && (en[iy] > st[iy]))\n+ {\n+ f"..b' r2r <- numeric(0)\n+ r2l <- list()\n+\n+ for(i in 1:p) {\n+ if(pr) cat(\'Step\',i,\'of a maximum of\', p, \'\\r\')\n+ ## For each variable currently on the right hand side ("In")\n+ ## find out how well it can be predicted from all the other "In" variables\n+ if(length(In) < 2) break\n+ Rsq <- In*0\n+ l <- 0\n+ for(j in In)\n+ {\n+ l <- l + 1\n+ k <- setdiff(In, j)\n+ Rsq[l] <- fcan(k, j, X, st, en, vtype, tlinear, type,\n+ allcat, r2, minfreq)\n+ }\n+ if(i==1) {Rsq1 <- Rsq; names(Rsq1) <- nam[In]}\n+ if(max(Rsq) < r2) break\n+ removed <- In[which.max(Rsq)]\n+ r2removed <- max(Rsq)\n+ ## Check that all variables already removed can be predicted\n+ ## adequately if new variable \'removed\' is removed\n+ k <- setdiff(In, removed)\n+ r2later <- NULL\n+ if(length(Out))\n+ {\n+ r2later <- Out*0\n+ names(r2later) <- nam[Out]\n+ l <- 0\n+ for(j in Out)\n+ {\n+ l <- l+1\n+ r2later[l] <-\n+ fcan(k, j, X, st, en, vtype, tlinear, type, allcat, r2, minfreq)\n+ }\n+ if(min(r2later) < r2) break\n+ }\n+ Out <- c(Out, removed)\n+ In <- setdiff(In, Out)\n+ r2r <- c(r2r, r2removed)\n+ if(length(r2later)) r2l[[i]] <- r2later\n+ }\n+ if(length(r2r)) names(r2r) <- nam[Out]\n+ if(length(r2l)) names(r2l) <- nam[Out]\n+ if(pr) cat(\'\\n\')\n+ \n+ structure(list(call=acall, formula=formula,\n+ In=nam[In], Out=nam[Out], toofew=toofew,\n+ rsquared=r2r, r2later=r2l, rsq1=Rsq1,\n+ n=n, p=p, na.action=na.action,\n+ vtype=vtype, tlinear=tlinear,\n+ allcat=allcat, minfreq=minfreq, nk=nk, df=xdf,\n+ cat.levels=cat.levels,\n+ r2=r2, type=type),\n+ class=\'redun\')\n+}\n+\n+print.redun <- function(x, digits=3, long=TRUE, ...)\n+{\n+ cat("\\nRedundancy Analysis\\n\\n")\n+ dput(x$call)\n+ cat("\\n")\n+ cat(\'n:\',x$n,\'\\tp:\',x$p, \'\\tnk:\',x$nk,\'\\n\')\n+ cat(\'\\nNumber of NAs:\\t\', length(x$na.action$omit), \'\\n\')\n+ a <- x$na.action\n+ if(length(a)) naprint(a)\n+ \n+ if(x$tlinear)\n+ cat(\'\\nTransformation of target variables forced to be linear\\n\')\n+ if(x$allcat)\n+ cat(\'\\nAll levels of a categorical variable had to be redundant before the\\nvariable was declared redundant\\n\')\n+ if(x$minfreq > 0)\n+ cat(\'\\nMinimum category frequency required for retention of a binary or\\ncategorical variable:\', x$minfreq, \'\\n\')\n+ if(length(x$toofew))\n+ {\n+ cat(\'\\nBinary or categorical variables removed because of inadequate frequencies:\\n\\n\')\n+ cat(x$toofew, \'\\n\')\n+ }\n+ cat(\'\\nR-squared cutoff:\', x$r2, \'\\tType:\', x$type,\'\\n\')\n+ if(long)\n+ {\n+ cat(\'\\nR^2 with which each variable can be predicted from all other variables:\\n\\n\')\n+ print(round(x$rsq1, digits))\n+ if(x$allcat)\n+ cat(\'\\n(For categorical variables the minimum R^2 for any sufficiently\\nfrequent dummy variable is displayed)\\n\\n\')\n+ }\n+ if(!length(x$Out))\n+ {\n+ cat(\'\\nNo redundant variables\\n\\n\')\n+ return(invisible())\n+ }\n+ cat(\'\\nRendundant variables:\\n\\n\')\n+ cat(x$Out)\n+ cat(\'\\n\\nPredicted from variables:\\n\\n\')\n+ cat(x$In, \'\\n\\n\')\n+ w <- x$r2later\n+ vardel <- names(x$rsquared)\n+ if(!long)\n+ {\n+ print(data.frame(\'Variable Deleted\'=vardel,\n+ \'R^2\'=round(x$rsquared,digits),\n+ row.names=NULL, check.names=FALSE))\n+ return(invisible())\n+ }\n+ later <- rep(\'\', length(vardel))\n+ i <- 0\n+ for(v in vardel)\n+ {\n+ i <- i + 1\n+ for(z in w)\n+ {\n+ if(length(z) && v %in% names(z))\n+ later[i] <- paste(later[i], round(z[v], digits), sep=\' \')\n+ }\n+ }\n+ print(data.frame(\'Variable Deleted\'=vardel,\n+ \'R^2\'=round(x$rsquared,digits),\n+ \'R^2 after later deletions\'=later,\n+ row.names=NULL,\n+ check.names=FALSE))\n+ invisible()\n+}\n+\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/regexpEscape.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/regexpEscape.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,7 @@ +escapeBS <- function(string) { + gsub('\\\\', '\\\\\\\\\\', string) +} + +escapeRegex <- function(string) { + gsub('([.|()\\^{}+$*?]|\\[|\\])', '\\\\\\1', string) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/responseSummary.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/responseSummary.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,340 @@\n+## $Id$\n+\n+responseSummary <- function(formula, data, na.action=na.pass,\n+ FUN=function(y) sapply(y, mean), fun,\n+ overall=TRUE, continuous=10, na.rm=TRUE,\n+ na.include=TRUE, g, quantile.groups=4,\n+ groups=quantile.groups, nmin=0, ...) {\n+ func.call <- match.call()\n+ \n+ ## Print warnings for obsolete function arguments\n+ if(!missing(g)) {\n+ warning("argument g is depricated; use quantile.groups instead",\n+ immediate. = TRUE)\n+ quantile.groups <- g\n+ }\n+\n+ if(!missing(fun)) {\n+ warning("argument fun is depreicated; use FUN instead", immediate. = TRUE)\n+ FUN <- fun\n+ }\n+ \n+ ## create model.frame call to create data.frame needed to use formula.\n+ m <- GetModelFrame(formula=formula,specials="stratify", default.na.action=na.action)\n+ Terms <- attr(m, "terms")\n+\n+ ## Extract response and remove from model\n+ Y <- model.extract(m, "response")\n+ \n+ if(is.null(Y))\n+ stop("must have a variable on the left hand side of the formula")\n+\n+ yname <- names(m)[1]\n+ m <- m[-1]\n+\n+ ylabel <- valueLabel(Y)\n+ yunit <- valueUnit(Y)\n+\n+ ## extract stratified variables from m or create a blank\n+ ## strat if non exists.\n+ if(!is.null(attr(Terms, \'specials\')$stratify)) {\n+ temp <- untangle.specials(Terms, \'stratify\')\n+\n+ if(length(temp$vars) == 1)\n+ stratified <- m[[temp$vars]]\n+ else {\n+ stratified <- stratify(m[,temp$vars])\n+ }\n+\n+ ## Get labels and names of stratified variables\n+ stratified.Tags <- valueTags(stratified)\n+\n+ newTerms <- drop.terms(Terms, dropx=temp$terms)\n+ }\n+ else {\n+ stratified <- factor(rep(\'\',nrow(m)))\n+ stratified.Tags <- NULL\n+\n+ newTerms <- delete.response(Terms)\n+ }\n+\n+\n+ ## Number of stratified terms\n+ nstratified <- length(levels(stratified))\n+\n+ ## Create X from m using newTerms.\n+ X <- GetModelFrame(formula=newTerms, default.na.action=na.action)\n+\n+ ## Throw warning if name overall exists in X\n+ if("Overall" %in% names(X) && overall)\n+ stop("Data Frame contains a column named \'Overall\'; Name confilcts with \'overall=TRUE\' argument in function")\n+\n+ funlab <- NULL\n+ ## Check to see if fun = "%"\n+ if(!is.function(FUN)) {\n+ if (FUN == \'%\') {\n+ FUN <- function(y) {\n+ stats <- 100 * apply(y, 2, mean)\n+ names(stats) <- paste(dimnames(y)[[2]], "%")\n+ stats\n+ }\n+\n+ funlab <- paste("% of", yname)\n+ } else \n+ FUN <- match.fun(FUN)\n+ }\n+\n+ ## Compute number of descriptive statistics per cell\n+\n+ ## find vector of rows that are NA\n+ s <- is.na(Y)\n+\n+ if(is.matrix(s))\n+ s <- as.vector(s %*% rep(1, ncol(s)), mode="logical")\n+\n+\n+ ## Run fun on non NA elements of Y\n+ if(is.matrix(Y))\n+ stats <- FUN(Y[!s,, drop=FALSE])\n+ else\n+ stats <- FUN(Y[!s, drop=FALSE])\n+\n+ nstats <- length(stats)\n+\n+ ## Create the names of the columns of summary output\n+ dn <- dimnames(stats)\n+ if(length(dn) == 2)\n+ name.stats <- as.vector(outer(dn[[1]], dn[[2]], FUN=function(a,b) paste(b,a)))\n+ else\n+ name.stats <- names(stats)\n+\n+ if(is.null(name.stats)) {\n+ if(nstats == 1)\n+ name.stats <- yname\n+ else\n+ name.stats <- paste(yname, 1:nstats, sep="")\n+ }\n+\n+ ## Figure out the funlab name\n+ if(is.null(funlab))\n+ funlab <- yname\n+\n+ ## find number of missing observations\n+ numberMissing <- sum(s)\n+\n+ if(numberMissing) {\n+ if(is.matrix(Y))\n+ Y <- Y[!s,, drop=FALSE]\n+ else\n+ Y <- Y[!s, drop=FALSE]\n+\n+ X <- X[!s,, drop=FALSE]\n+ stratified <- stratified[!s]\n+ }\n+\n+\n+ ## Compute total number of columns\n+ ncolumns <- nstratified * (1 + nstats)\n+\n+ colNames <- rep(c(\'N\', name.stats), nstratified)\n+\n+ ## Initialize default values\n+ n <- NROW(X)\n+\n+ subsetX <- function(x, ...) { \n+ tags <- valueTags(x)\n+\n+ if(length(x) == 0) {\n+ return(x)\n+ }\n+ \n+ if(!is.matrix(x)) {\n+ ## Find all na\'s in x\n+'..b's to column name or NA\n+ x <- ifelse(x, rep(cnames, each=n), NA)\n+ }\n+\n+ valueTags(x) <- tags\n+ return(x)\n+ }\n+\n+ ## Subset X\n+ X <- lapply(X, FUN=subsetX, ...)\n+ \n+## if(is.matrix(Y)) {\n+## Y <- split(Y, row(Y))\n+## \n+## procY <- function(y) do.call(rbind, y)\n+## } else {\n+## procY <- function(y) y\n+## }\n+ \n+\n+ comp.stats <- function(grouped.y) {\n+ ans <- c(length(grouped.y), FUN(grouped.y))\n+ names(ans) <- c(\'N\', name.stats)\n+ ans\n+ }\n+\n+ ## create stats for each element of X\n+ processX <- function(x) {\n+ if(is.mChoice(x)) {\n+ } else {\n+ xstats <- tapply(Y, list(x, stratified), FUN=comp.stats)\n+ }\n+ \n+ valueTags(xstats) <- valueTags(x)\n+ xstats\n+ }\n+\n+ Xstats <- lapply(X, FUN=processX)\n+\n+ ## if overall selected add Overall column\n+ if(overall) {\n+ overall <- tapply(Y, stratified, FUN=comp.stats)\n+ overall <- matrix(overall, ncol=dim(overall), dimnames=list(NULL, dimnames(overall)[[1]]))\n+ Xstats$Overall <- overall\n+ }\n+ \n+ \n+# str(Xstats)\n+ newAttr <- list(terms=Terms, call=match.call(), n=n, nmissing=numberMissing, yname=yname,\n+ ylabel=ylabel, ycolnames=colnames(Y), funlab=funlab,\n+ stratified.Tags=stratified.Tags, stratified.levels=levels(stratified))\n+ attributes(Xstats) <- c(attributes(Xstats), newAttr)\n+ class(Xstats)<- \'responseSummary\'\n+ return(Xstats)\n+}\n+\n+print.responseSummary <- function(x,\n+ valueNames = c(\'labels\',\'names\'),\n+ vnames, printUnits = TRUE, prUnits,\n+ abbreviate.dimnames=FALSE,\n+ prefix.width, min.colwidth, formatArgs=NULL,\n+ ...) {\n+\n+ if(missing(valueNames) && !missing(vnames)){\n+ warning("argument vnames is depricated; use valueNames instead",\n+ immediate. = TRUE)\n+ valueNames <- vnames\n+ }\n+ \n+ if(missing(printUnits) && !missing(prUnits)){\n+ warning("argument prUnits is depricated; use printUnits instead",\n+ immediate. = TRUE)\n+ printUnits <- prUnits\n+ }\n+\n+ x.orig <- x\n+ \n+ ## fuzy match value of varNames to default options\n+ valueNames <- match.arg(valueNames)\n+\n+ ## Get attributes of x for further use\n+ xattribs <- attributes(x)\n+ attributes(x) <- NULL\n+ \n+ ## Set useLabels flag to TRUE if user wants to use labels\n+ ## instead of names\n+ useLabel <- valueNames == \'labels\'\n+\n+ if(useLabel && !is.null(xattribs$ylabel)) {\n+ yname <- xattribs$ylabel\n+ } else {\n+ yname <- xattribs$yname\n+ }\n+ cat(yname)\n+ \n+ ## If more then one stratifed levels make by line\n+ if(length(xattribs$stratified.levels) > 1) {\n+ if(useLabel && !is.null(xattribs$stratified.Tags$label)) {\n+ strat.name <- xattribs$stratified.Tags$label\n+ } else {\n+ strat.name <- xattribs$stratifed.Tags$label\n+ }\n+\n+ cat(\' by\', strat.name)\n+ }\n+\n+ cat(\' N=\', xattribs$n, sep=\'\')\n+ \n+ if(xattribs$nmissing) {\n+ cat(\' ,\', xattribs$nmissing, \'Missing\')\n+ }\n+\n+ cat(\'\\n\\n\')\n+\n+ if(useLabel) {\n+ labels <- unlist(lapply(x, function(x) if(is.null(lab <- valueLabel(x))) NA else lab))\n+ \n+ names(x) <- ifelse(is.na(labels), xattribs$names, labels)\n+ }\n+\n+ print.char.list(x, abbreviate.dimnames=abbreviate.dimnames, print.it=TRUE, ...)\n+ invisible(x.orig)\n+}\n+\n+\n+latex.responseSummary <- function(object,\n+ title=first.word(deparse(substitute(object))),\n+ caption,\n+ trios,\n+ vnames=c(\'labels\', \'names\'),\n+ prn=TRUE,\n+ prUnits=TRUE,\n+ rowlabel=\'\',\n+ cdec=2,\n+ ncaption=TRUE,\n+ ...) {\n+ ## Fix lazy evaluation\n+ title <- title\n+ \n+ \n+}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/rm.boot.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/rm.boot.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,456 @@\n+rm.boot <- function(time, y, id=seq(along=time), subset=TRUE,\n+ plot.individual=FALSE,\n+ bootstrap.type=c(\'x fixed\',\'x random\'),\n+ nk=6, knots, B=500, smoother=supsmu, \n+ xlab, xlim, ylim=range(y), \n+ times=seq(min(time),max(time),length=100),\n+ absorb.subject.effects=FALSE, rho=0,\n+ cor.pattern=c(\'independent\',\'estimate\'), ncor=10000,\n+ ...)\n+{\n+ bootstrap.type <- match.arg(bootstrap.type)\n+ absorb.subject.effects <- absorb.subject.effects & !missing(id)\n+ if(!is.function(cor.pattern))\n+ cor.pattern <- match.arg(cor.pattern)\n+ \n+ if(!(is.character(cor.pattern) && cor.pattern==\'independent\') && \n+ rho!=0)\n+ stop("can\'t specify both cor.pattern=\'estimate\' and rho")\n+ \n+ if(rho != 0)\n+ cor.pattern <- \'equal correlation\'\n+ \n+ dodep <- rho !=0 || !is.character(cor.pattern) || cor.pattern==\'estimate\'\n+\n+ ## X fixed also implies that subjects are fixed\n+\n+ id <- as.character(id)\n+ ylab <- label(y)\n+ if(ylab==\'\')\n+ ylab <- \'y\'\n+ \n+ if(missing(xlab)) {\n+ xlab <- units(time)\n+ if(xlab==\'\')\n+ xlab <- \'Time\'\n+ }\n+\n+ if(length(subset) > 1) {\n+ id <- id[subset];\n+ time <- time[subset]; y <- y[subset]\n+ }\n+\n+ s <- is.na(time + y)\n+ if(any(s)) {\n+ s <- !s\n+ id <- id[s]\n+ time <- time[s]\n+ y <- y[s]\n+ }\n+ ## Need to order data so that a subject\'s records stay together\n+ ## Otherwise, the mean residuals at each time will not vary over resamples\n+ ## when bootstrap.type=\'x fixed\'\n+\n+ s <- order(id, time)\n+ id <- id[s];\n+ time <- time[s];\n+ y <- y[s]\n+\n+ if(bootstrap.type==\'x fixed\' && diff(range(table(id))) != 0) \n+ warning(\'To work properly with bootstrap.type="x fixed" all subjects must have the same # observations\')\n+\n+ n <- length(y)\n+\n+ clusters <- unique(id)\n+\n+ if(plot.individual) {\n+ ploti <- function(time, y, id, clusters, xlim, ylim, xlab, ylab, \n+ smoother, ...)\n+ {\n+ plot(0,0,xlim=range(pretty(range(time))),ylim=ylim,\n+ xlab=xlab, ylab=ylab, type=\'n\')\n+ j <- 0\n+ for(i in clusters) {\n+ s <- id==i\n+ j <- j+1\n+ lines(smoother(time[s],y[s],...),lty=j)\n+ }\n+ }\n+ \n+ ploti(time, y, id, clusters, xlim, ylim, xlab, ylab, smoother, ...)\n+ }\n+\n+ if(nk==0) knots <- double(0)\n+ if(missing(knots) && nk>0) {\n+ knots <- rcspline.eval(time,nk=nk,knots.only=TRUE)\n+ if(length(knots) != nk) {\n+ warning(\'could not obtain requested number of knots\')\n+ nk <- length(knots) \n+ }\n+ } else nk <- length(knots)\n+ \n+ p <- if(nk==0) 1\n+ else nk-1\n+\n+ X.times <- if(nk==0) as.matrix(times)\n+ else rcspline.eval(times, knots, inclx=TRUE)\n+\n+ X.Time <- if(nk==0) as.matrix(time)\n+ else rcspline.eval(time, knots, inclx=TRUE)\n+ \n+ X <- if(missing(id)) cbind(X.Time,1)\n+ else \n+ model.matrix(~ X.Time+id-1,\n+ data=list(X.Time=X.Time,id=as.factor(id)))\n+ \n+ ## was id=id 3Apr02 Thanks: Don MacQueen, for R\n+\n+ f <- lm.fit.qr.bare(X, y, intercept=FALSE)\n+ res <- f$residuals\n+ sigma2 <- sum(res^2)/n\n+\n+ if(absorb.subject.effects) {\n+ mean.intercept <- mean(c(0,f$coef[-(1:p)]))\n+ y <- y + mean.intercept - (f$coef[-(1:p)])[paste(\'id\',id,sep=\'\')]\n+ if(plot.individual) {\n+ ploti(time, y, id, clusters, xlim, ylim, xlab, ylab, smoother, ...)\n+ title(\'Raw Data Adjusted to Have a Common Intercept\')\n+ }\n+ }\n+\n+ if(is.character(cor.pattern) && cor.pattern==\'estimate\') {\n+ timediff <- product <- single(ncor)\n+ used <- 0\n+ i <- 0\n+ meanres <- tapply(res, time, mean)\n+ names(meanres) <- as.numeric(names(meanres))\n+ sdres <- sqrt(tapply(res, time, var))\n+ names(sdres) <- as.numeric(names(sdres))\n+ if(any(is.na(sdres)))\n+ stop(\'one or more times occur in only one subject\')\n+\n+ for(wid in clusters) {\n+ '..b"obj$rho==0 && is.character(obj$cor.pattern))\n+ '-2 log L'\n+ else 'dep -2 log L'\n+\n+ sse <- switch(objective, \n+ sse = obj$sse,\n+ '-2 log L' = obj$loglik,\n+ 'dep -2 log L' = obj$loglik.dep)\n+\n+ B <- length(sse)\n+ Coef <- obj$Coef\n+ times <- obj$times\n+\n+ if(!missing(obj2)) {\n+ if((length(times) != length(obj2$times)) || \n+ (any(times != obj2$times, na.rm=TRUE)))\n+ stop('times vector must be identical for both rm.boot objects')\n+ \n+ times <- ifelse(is.na(times), NA, obj2$times)\n+ sse <- sse + obj2$sse\n+ if(missing(ylab))\n+ ylab <- paste(obj$ylab,'-',obj2$ylab) \n+ }\n+\n+ ## order from best -2 log likelihood or sum of squared errors to worst\n+ i <- order(sse)\n+ ## Select best confidence coefficient*B estimates\n+ conf <- if(multi) max(multi.conf)\n+ else conf.int\n+ \n+ i <- i[1:round(conf*B)]\n+ if(i[1] != 1)\n+ warning(paste('design is imbalanced enough that best log likelihood or SSE was not\\n',\n+ 'obtained from overall fit (objective=',format(sse[1]),') but from\\n',\n+ 'a bootstrap fit (objective=',format(sse[i[1]]),\n+ ')\\nThis can also happen if the objective is not -2 log L',sep=''))\n+\n+ ## Evaluate all fits on time grid and compute point by point max and min\n+\n+ curves <- cbind(1,obj$X.times) %*% t(Coef)\n+ if(!missing(obj2)) {\n+ curves <- curves - cbind(1,obj2$X.times) %*% t(obj2$Coef)\n+ if(missing(ylim))\n+ ylim <- range(curves[,i])\n+ }\t\t\t\n+ \n+ if(multi) {\n+ multi.method <- match.arg(multi.method)\n+ if(missing(xlim))\n+ plot(times, curves[,1], type='n',\n+ xlab=xlab, ylab=ylab, ylim=ylim)\n+ else\n+ plot(times, curves[,1], type='n',\n+\t xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim)\n+ \n+ title(paste('Simultaneous',min(multi.conf),'-',max(multi.conf),\n+\t\t'Confidence Regions'))\n+ high.prev <- low.prev <- curves[,1]\n+ for(j in 1:length(multi.conf)) {\n+ ii <- i[1:round(multi.conf[j]*B)]\n+ high <- apply(curves[,ii], 1, max)\n+ low <- apply(curves[,ii], 1, min)\n+ if(multi.method=='density') {\n+\tpolygon(c(times,rev(times)), c(high.prev,rev(high)), \n+ density=multi.density[j])\n+\tpolygon(c(times,rev(times)), c(low.prev, rev(low)), \n+ density=multi.density[j])\n+ } else {\n+\tpolygon(c(times,rev(times)), c(high.prev,rev(high)), \n+ col=multi.col[j])\n+\tpolygon(c(times,rev(times)), c(low.prev, rev(low)), \n+ col=multi.col[j])\n+ }\n+ \n+ high.prev <- high; low.prev <- low\n+ }\n+ \n+ lines(times, curves[,1], lwd=2, col=0) ## point estimates in white\n+ } else {\n+ if(add)\n+ lines(times, curves[,1])\n+ else {\n+ if(missing(xlim))\n+ plot(times, curves[,1], type='l',\n+ xlab=xlab, ylab=ylab, ylim=ylim)\n+ else\n+\tplot(times, curves[,1], type='l',\n+ xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim)\n+ \n+ title(paste('Simultaneous',conf.int,'Confidence Region'))\n+ }\n+\n+ high <- apply(curves[,i], 1, max)\n+ low <- apply(curves[,i], 1, min)\n+ lines(times, high, lty=2)\n+ lines(times, low, lty=2)\n+ }\n+ \n+ result <- list(times=times, fitted=curves[,1], lower=low, upper=high)\n+\n+ if(individual.boot || curves.in.simultaneous.band) {\n+ subs <- if(individual.boot) 1:B\n+ else i\n+ \n+ if(!missing(ncurves))\n+ subs <- sample(subs, ncurves)\n+ \n+ for(j in subs)\n+ lines(times, curves[,j], lty=2)\n+ }\n+\n+ if(pointwise.band) {\n+ p <- apply(curves, 1, quantile, probs=c((1-conf.int)/2,1-(1-conf.int)/2))\n+ lines(times,p[1,],col=col.pointwise.band)\n+ lines(times,p[2,],col=col.pointwise.band)\n+ result <- c(result, list(pointwise.lower=p[1,], pointwise.upper=p[2,]))\n+ }\n+ \n+ if(!add && subtitles) {\n+ title(sub=obj$bootstrap.type,adj=1)\n+ title(sub=paste(B-1,'bootstrap repetitions'),adj=0)\n+ }\n+\n+ invisible(result)\n+}\n" |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/samplesize.bin.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/samplesize.bin.s Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,30 @@ +## Rick Chappell <> Asst. Professor, Depts. of Statistics and Human Oncology +## <> University of Wisconsin at Madison <> chappell@stat.wisc.edu +## (608) 263-5572 / 262-2733 <> take logs + +samplesize.bin <- function(alpha, beta, pit, pic, rho=.5) +{ + + ## alpha is the scalar ONE-SIDED test size, or two-sided size/2 + ## beta is a scalar or vector of powers + ## pit is the hypothesized treatment probability of success + ## pic is the hypothesized control probability of success + ## returns required TOTAL sample size, using arcsin transformation + ## rho is the proportion of the sample devoted to treated group (0 <rho < 1) + + as <- function(x) + { + asin(sqrt(x)) + } + + invas <- function(x) { + (sin(x))**2 + } + + Zalpha <- qnorm(1-alpha) + Zbeta <- qnorm(beta) + n <- Zalpha + Zbeta + n <- n/(as(pit) - as(pic)) + n <- (n**2)/(4*rho*(1-rho)) + round(n+.5,0) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/sas.get.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/sas.get.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,1602 @@\n+## $Id$\n+sas.get <- \n+ function(libraryName,\n+ member,\n+ variables = character(0), \n+ ifs = character(0), \n+ format.library = libraryName,\n+ id, \n+ dates. = c("sas","yymmdd","yearfrac","yearfrac2"), \n+ keep.log = TRUE,\n+ log.file = "_temp_.log", \n+ macro = sas.get.macro,\n+ data.frame.out = existsFunction("data.frame"), \n+ clean.up = FALSE,\n+ quiet = FALSE,\n+ temp = tempfile("SaS"), \n+ formats=TRUE,\n+ recode=formats, \n+ special.miss=FALSE,\n+ sasprog="sas",\n+ as.is=.5,\n+ check.unique.id=TRUE,\n+ force.single=FALSE,\n+ pos,\n+ uncompress=FALSE,\n+ defaultencoding="latin1")\n+{\n+ if(force.single) stop(\'force.single does not work under R\')\n+ dates. <- match.arg(dates.)\n+\n+ fexists <- function(name) {\n+ w <- file.exists(name)\n+ attr(w, \'which\') <- name[w]\n+ w\n+ }\n+\n+ file.is.dir <- function(name) {\n+ isdir <- file.info(name)$isdir\n+ isdir && !is.na(isdir)\n+ }\n+\n+ file.is.readable <- function(name) file.access(name,4)==0\n+\n+ fileShow <- function(x) file.show(x)\n+\n+ if(recode) formats <- TRUE\n+\n+ if(missing(formats) || formats) {\n+ ## ***** Next line begins mod from Mike Kattan edits 11 Sep 97\n+ ## Redone FEH 22Oct00\n+ no.format <- all(!fexists(file.path(format.library,\n+ c(\'formats.sc2\',\'formats.sct\',\'formats.sct01\',\'formats.sas7bcat\'))))\n+ if(no.format) {\n+ if((!missing(formats) && formats) || (!missing(recode) && recode))\n+ warning(paste(paste(format.library, \n+ "/formats.sc? or formats.sas7bcat",sep = ""), \n+ " not found. Formatting ignored. \\n"))\n+ formats <- recode <- FALSE\n+ }\n+ ## ***** End Mike Kattan edits 11 Sep 97\n+ }\n+ \n+ ## 5 Changes here from Claudie Berger <claudie@osteo1.ri.mgh.mcgill.ca> 19feb00\n+ ## Allows work on sas v7.\n+ sasin <- paste(temp, ".3.sas", sep = "")\n+ sasout1 <- paste(temp, ".1.sas", sep = "")\n+ sasout2 <- paste(temp, ".2.sas", sep = "")\n+ sasout3 <- paste(temp, ".4.sas", sep = "")\n+ sasout4 <- paste(temp, ".5.sas", sep = "")\n+ nvariables <- length(variables)\n+ if(nvariables>0) {\n+ if(any(jdup <- duplicated(variables)))\n+ stop(paste("duplicate variables requested: ", variables[jdup]))\n+ }\n+ \n+ varstring <- paste(variables, collapse = "\\n ")\n+ ifs <- paste("\'",paste(ifs, collapse = ";\\n "),"\'",sep="")\n+ if(length(sasin) != 1)\n+ stop("Illegal temporary file name")\n+ \n+ temp.files <- c(sasin, sasout1, sasout2, sasout3, sasout4)\n+ if(!keep.log)\n+ temp.files <- c(temp.files, log.file)\n+ \n+ if(clean.up)\n+ on.exit(unlink(temp.files))\n+ ##on.exit(sys(paste("rm -f", paste(temp.files, collapse = " "))))\n+ ## 4oct03\n+ \n+ if(missing(member))\n+ stop("SAS member name is required")\n+ \n+ if(missing(libraryName))\n+ stop("SAS library name is required")\n+\n+ ## Encoding added by Reinhold Koch 24Jan14 <reinhold.koch@roche.com>\n+ cat("%LET DEFAULTE=", defaultencoding, ";\\n", sep="", file=sasin)\n+ cat(macro, sep="\\n", file=sasin, append=TRUE)\n+\n+ sasds.suffix <- c(\'sd2\',\'sd7\',\'ssd01\',\'ssd02\',\'ssd03\',\'ssd04\',\'sas7bdat\') \n+ ## 22Oct00\n+\n+ if(libraryName == "") libraryName <- "."\n+ if(!file.is.dir(libraryName))\n+ stop(paste(sep = "", "library, \\"", libraryName, \n+ "\\", is not a directory"))\n+\n+ unix.file <- file.path(libraryName, paste(member, sasds.suffix, sep="."))\n+\n+ if(uncompress) {\n+ if(any(fe <- fexists(paste(unix.file,".gz",sep=""))))\n+ system(paste("gunzip ", attr(fe,\'which\'),\'.gz\',sep=\'\'))\n+ else if(any(fe <- fexists(paste(unix.file,".Z",sep=""))))\n+ system(paste("uncompress ",attr(fe,\'which\'),\'.Z\',sep=\'\'))\n+ }\n+\n+ if(!any(fe <- fexists(unix.file))) {\n+ stop(paste(sep = "", "Unix file, \\"",\n+ paste(unix.file,collapse='..b's(dsinfo)))\n+ if(length(FUN))\n+ attr(dsinfo, \'FUN\') <- funout\n+\n+ invisible(dsinfo)\n+ } else if(nds > 1)\n+ res\n+ else w\n+}\n+\n+## Use dataload program to create a structure like read.xport does\n+read.xportDataload <- function(file, dsnames) {\n+ outf <- substring(tempfile(tmpdir=\'\'),2)\n+ file.copy(file, paste(tempdir(),outf,sep=\'/\'))\n+ curwd <- getwd()\n+ on.exit(setwd(curwd))\n+ setwd(tempdir())\n+ n <- length(dsnames)\n+ w <- vector(\'list\', n); names(w) <- dsnames\n+ for(a in dsnames) {\n+ status <- system(paste(\'dataload\', outf, \'zzzz.rda\', a),\n+ intern=FALSE)\n+ if(status==0) {\n+ load(\'zzzz.rda\')\n+ names(zzzz) <- makeNames(names(zzzz))\n+ w[[a]] <- zzzz\n+ }\n+ }\n+\n+ w\n+}\n+\n+utils::globalVariables(c("NOBS", "memname", "memlabel"))\n+## Read _contents_.csv and store it like lookup.xport output\n+lookupSASContents <- function(sasdir) {\n+ w <- read.csv(paste(sasdir,\'_contents_.csv\',sep=\'/\'), as.is=TRUE)\n+ z <- tapply(w$NOBS, w$MEMNAME, function(x)x[1])\n+ if(any(z == 0)) {\n+ cat(\'\\nDatasets with 0 observations ignored:\\n\')\n+ print(names(z)[z == 0], quote=FALSE)\n+ w <- subset(w, NOBS > 0)\n+ }\n+\n+ w$TYPE <- ifelse(w$TYPE==1, \'numeric\', \'character\')\n+ names(w) <- tolower(names(w))\n+ unclass(split(subset(w,select=-c(memname,memlabel)), w$memname))\n+}\n+\n+## Read all SAS csv export files and store in a list\n+readSAScsv <- function(sasdir, dsinfo, dsnames=names(dsinfo)) {\n+ sasnobs <- sapply(dsinfo, function(x)x$nobs[1])\n+ multi <- length(dsnames) > 1\n+ if(multi) {\n+ w <- vector(\'list\', length(dsnames))\n+ names(w) <- dsnames\n+ }\n+\n+ for(a in dsnames) {\n+ z <- read.csv(paste(sasdir,\'/\',a,\'.csv\', sep=\'\'),\n+ as.is=TRUE, blank.lines.skip=FALSE,\n+ comment.char="")\n+\n+ importedLength <- length(z[[1]])\n+ if(importedLength != sasnobs[a])\n+ cat(\'\\nError: NOBS reported by SAS (\',sasnobs[a],\') for dataset \',\n+ a,\' is not the same as imported length (\', importedLength,\n+ \')\\n\', sep=\'\')\n+\n+ if(multi)\n+ w[[a]] <- z\n+ }\n+\n+ if(multi)\n+ w\n+ else z\n+}\n+\n+\n+\n+csv.get <- function(file, lowernames=FALSE, datevars=NULL, datetimevars=NULL,\n+ dateformat=\'%F\', fixdates=c(\'none\',\'year\'),\n+ comment.char = "", autodates=TRUE, allow=NULL,\n+ charfactor=FALSE,\n+ sep=\',\', skip=0, vnames=NULL, labels=NULL, ...){\n+ fixdates <- match.arg(fixdates)\n+ if(length(vnames))\n+ vnames <- scan(file, what=character(0), skip=vnames-1, nlines=1,\n+ sep=sep, quiet=TRUE)\n+ if(length(labels))\n+ labels <- scan(file, what=character(0), skip=labels-1, nlines=1,\n+ sep=sep, quiet=TRUE)\n+\n+ w <- if(length(vnames))\n+ read.csv(file, check.names=FALSE, comment.char=comment.char,\n+ header=FALSE, col.names=vnames, skip=skip, sep=sep, ...)\n+ else read.csv(file, check.names=FALSE, comment.char=comment.char,\n+ sep=sep, skip=skip, ...)\n+ n <- nam <- names(w)\n+ m <- makeNames(n, unique=TRUE, allow=allow)\n+ if(length(labels)) n <- labels\n+ if(lowernames)\n+ m <- casefold(m)\n+ \n+ changed <- any(m != nam)\n+ if(changed)\n+ names(w) <- m\n+\n+ if(autodates) {\n+ tmp <- w\n+ names(tmp) <- NULL\n+\n+ for(i in 1:length(tmp)) {\n+ if(! is.character(tmp[[1]]))\n+ next\n+ }\n+ }\n+ cleanup.import(w,\n+ labels=if(length(labels))labels else if(changed)n else NULL,\n+ datevars=datevars, datetimevars=datetimevars,\n+ dateformat=dateformat,\n+ fixdates=fixdates, charfactor=charfactor)\n+}\n+\n+\n+sasdsLabels <- function(file)\n+{\n+ w <- scan(file, sep=\'\\n\', what=\'\', quiet=TRUE)\n+ i <- grep(\'Data Set Name:\', w)\n+ if(!length(i))\n+ return(NULL)\n+ \n+ n <- tolower(sub(\'.*\\\\.([A-Z0-9\\\\_]*)[[:space:]]+.*\',\'\\\\1\',w[i]))\n+ w <- gsub(\'\\t\',\'\',w)\n+ labs <- ifelse(nchar(w[i-1])==0,w[i-2],w[i-1])\n+ names(labs) <- n\n+ labs\n+}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/scat1d.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/scat1d.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,505 @@\n+## $Id$\n+## -*-S-*- Improvements due to Martin Maechler <maechler@stat.math.ethz.ch>\n+\n+scat1d <- function(x, side=3, frac=.02, jitfrac=.008, tfrac, \n+ eps=ifelse(preserve,0,.001),\n+ lwd=0.1, col=par(\'col\'), y=NULL, curve=NULL,\n+ bottom.align=FALSE, preserve=FALSE, fill=1/3, limit=TRUE, \n+ nhistSpike=2000, nint=100, \n+ type=c(\'proportion\',\'count\',\'density\'),\n+ grid=FALSE,\n+ ...)\n+{\n+ type <- match.arg(type)\n+ if(length(x) >= nhistSpike)\n+ return(histSpike(x, side=side, type=type,\n+ frac=2.5 * frac, col=col, y=y, curve=curve,\n+ bottom.align=if(type==\'density\') TRUE else bottom.align, \n+ add=TRUE, nint=nint, grid=grid, ...))\n+\n+ gfun <- ordGridFun(grid)\n+ if(side==1 || side==3 || length(y) || length(curve)) {\n+ l <- 1:2\n+ ax <- 1\n+ } else {\n+ l <- 3:4\n+ ax <- 2\n+ }\n+ \n+ pr <- parGrid(grid)\n+ usr <- pr$usr; pin <- pr$pin; uin <- pr$uin\n+ \n+ u <- usr[ l]\n+ u.opp <- usr[- l]\n+ w <- u[2] - u[1]\n+ ## Start JOA 12.8.97 : handle xy missings parallel\n+ if (length(y) > 1) { ## length=1 special case needed for datadensity\n+ if (length(x) != length(y))\n+ stop("y must have same length as x (or length(y)=1)")\n+\n+ selector <- ! (is.na(x) | is.na(y))\n+ x <- unclass(x[selector])\n+ y <- unclass(y[selector])\n+ } else x <- unclass(x[! is.na(x)])\n+ ## Stop JOA 12.8.97\n+ \n+ if(length(curve))\n+ y <- approx(curve, xout=x, rule=2)$y\n+\n+ n <- length(x)\n+ if(missing(tfrac))\n+ tfrac <- if(n < 125) 1 else max(.1, 125 / n)\n+ else if (tfrac < 0 || tfrac > 1)\n+ stop("must have 0 <= tfrac <= 1")\n+\n+ ## Start JOA 19.8.97\n+ if(jitfrac > 0 && any(duplicated( if(eps > 0) round(x / w / eps) else x )))\n+ if (preserve)\n+ x <- jitter2(x, fill=fill, limit=limit, eps=w * eps)\n+ else\n+ ## Stop JOA 19.8.97\n+ x <- x + runif(n, -w * jitfrac, w * jitfrac)\n+ h <- min(pin) * frac / uin[- ax]\n+ h2 <- h / 2\n+ if(grid && length(y) && inherits(y, \'unit\')) {\n+ h <- unit(frac, \'npc\')\n+ h2 <- unit(frac/2, \'npc\')\n+ }\n+ if(length(y)) {\n+ a <- y - h2\n+ b <- y + h2\n+ } else {\n+ a <- if(side < 3) u.opp[1]\n+ else u.opp[2] - h\n+ b <- if(side < 3) u.opp[1] + h\n+ else u.opp[2]\n+ }\n+ if(tfrac < 1) {\n+ l <- tfrac * (b - a)\n+ a <- a + runif(n) * (b - l - a) ##runif(n, a, b-l) if frac>0\n+ b <- a + l\n+ }\n+ if(ax == 1 && bottom.align) {\n+ a <- a + h2\n+ b <- b + h2\n+ }\n+ if(ax==1)\n+ gfun$segments(x, a, x, b, lwd=lwd, xpd=frac < 0, col=col)\n+ else\n+ gfun$segments(a, x, b, x, lwd=lwd, xpd=frac < 0, col=col)\n+ invisible()\n+}\n+\n+\n+jitter2 <- function(x,...) UseMethod("jitter2")\n+\n+jitter2.default <- function(x, fill=1/3, limit=TRUE, eps=0,\n+ presorted=FALSE, ...)\n+{ \n+ x2 <- x[!is.na(x)]\n+ if (!presorted){\n+ o <- order(x2)\n+ x2 <- x2[o]\n+ }\n+\n+ r <- if (eps > 0) rle(round(x2 / eps) * eps) else rle(x2)\n+\n+ if ( length(r$length) < 2 || max(r$length) < 2 )\n+ return(x)\n+\n+ d <- abs(diff(r$values))\n+ d <- pmin( c(d[1], d), c(d, d[length(d)]) )\n+ who <- rep(r$lengths > 1, r$lengths)\n+ d <- d[r$lengths > 1] * fill / 2\n+ if (is.logical(limit) && limit) limit <- min(d)\n+\n+ if (limit) d <- pmin(d,limit)\n+\n+ r$values <- r$values[r$lengths > 1] - d\n+ r$lengths <- r$lengths[r$lengths > 1]\n+ d <- d * 2 / (r$lengths - 1)\n+ k <- length(r$lengths)\n+ n <- sum(who)\n+ val <- rep(r$values, r$lengths)\n+ add <- (0 : (n - 1)) - rep(c(0, cumsum(r$lengths[-k])), r$lengths)\n+ add <- add[order(rep(1 : k, r$lengths), runif(n))]\n+ add <- add * rep(d, r$lengths)\n+ val <- val + add\n+ x2[who] <- val\n+ if (!presorted)\n+ x2[o]<-x2\n+\n+ x[!is.na(x)] <- x2\n+ x\n+}\n+\n+\n+jitter2.data.frame <- function(x, ...)\n+{\n+ as.data.frame(lapply(x,\n+ function(z,...)\n+ {\n+ '..b'tangles=cbind(.02, .4 * tabg / maxfreq),\n+ inches=FALSE,\n+ col=col.group[g])\n+ else text(xx, rep(y + .1, nl), format(tabg),\n+ cex=cex.axis * sqrt(tab / maxfreq),\n+ adj=.5)\n+ }\n+ }\n+ \n+ mtext(if(length(labels))labels[i]\n+ else nams[i],\n+ 2, 0, at = y, srt = 0, cex = cex.var, adj = 1, las=1)\n+ ## las=1 for R (also 3 lines down)\n+\n+ if(show.na && nna > 0)\n+ outerText(fnumna[i], y, cex=cex.var)\n+ }\n+ \n+ invisible()\n+}\n+\n+\n+histSpike <-\n+ function(x, side=1, nint=100, frac=.05, minf=NULL, mult.width=1,\n+ type=c(\'proportion\',\'count\',\'density\'),\n+ xlim=range(x),\n+ ylim=c(0,max(f)), xlab=deparse(substitute(x)), \n+ ylab=switch(type,proportion=\'Proportion\',\n+ count =\'Frequency\',\n+ density =\'Density\'),\n+ y=NULL, curve=NULL, add=FALSE, \n+ bottom.align=type==\'density\', \n+ col=par(\'col\'), lwd=par(\'lwd\'), grid=FALSE, ...)\n+{\n+ type <- match.arg(type)\n+ if(! add && side != 1)\n+ stop(\'side must be 1 if add=FALSE\')\n+\n+ if(add && type==\'count\')\n+ warning(\'type="count" is ignored if add=TRUE\')\n+\n+ if(length(y) > 1) {\n+ if(length(y) != length(x))\n+ stop(\'lengths of x and y must match\')\n+ \n+ if(length(curve))\n+ warning(\'curve ignored when y specified\')\n+ \n+ i <- !is.na(x+y)\n+ curve <- list(x=x[i], y=y[i])\n+ }\n+ \n+ if(length(curve) && !missing(bottom.align) && bottom.align)\n+ warning(\'bottom.align=T specified with curve or y; ignoring bottom.align\')\n+\n+ gfun <- ordGridFun(grid)\n+ x <- x[!is.na(x)]\n+ x <- x[x >= xlim[1] & x <= xlim[2]]\n+ \n+ if(type != \'density\') {\n+ if(is.character(nint) || length(x) <= 10) {\n+ f <- table(x)\n+ x <- as.numeric(names(f))\n+ } else {\n+ ncut <- nint+1\n+ bins <- seq(xlim[1], xlim[2], length = ncut)\n+ delta <- (bins[2]-bins[1]) / 2\n+ f <- table(cut(x, c(bins[1] - delta, bins)))\n+ \n+ x <- bins\n+ j <- f > 0\n+ x <- x[j]\n+ f <- f[j]\n+ }\n+ \n+ if(type==\'proportion\') f <- f / sum(f)\n+ } else {\n+ nbar <- logb(length(x), base = 2) + 1\n+ width <- diff(range(x)) / nbar * .75 * mult.width\n+ den <- density(x, width=width, n=200, from=xlim[1], to=xlim[2])\n+ x <- den$x\n+ f <- den$y\n+ }\n+ \n+ if(!add) {\n+ if(grid)\n+ stop(\'add=FALSE not implemented for lattice\')\n+ \n+ plot(0, 0, xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab, type=\'n\')\n+ }\n+ \n+ if(side==1 || side==3) {\n+ l <- 1:2;\n+ ax <- 1\n+ } else {\n+ l <- 3:4;\n+ ax <- 2\n+ }\n+ \n+ f <- f / max(f)\n+ if(length(minf)) f <- pmax(f, minf)\n+\n+ pr <- parGrid(grid)\n+ usr <- pr$usr;\n+ pin <- pr$pin;\n+ uin <- pr$uin\n+ \n+ u <- usr[l]\n+ u.opp <- usr[- l]\n+ \n+ h <- min(pin) * frac / uin[- ax] * f\n+ h2 <- h / 2\n+ if(length(y) && inherits(y, \'unit\')) {\n+ h <- unit(frac, \'npc\')\n+ h2 <- unit(frac/2, \'npc\')\n+ }\n+ if(length(curve) || length(y)) {\n+ if(length(curve))\n+ y <- approx(curve, xout=x, rule=2)$y\n+\n+ a <- y - h2; b <- y + h2\n+ } else {\n+ a <- if(side < 3) u.opp[1]\n+ else u.opp[2] - h\n+ \n+ b <- if(side < 3) u.opp[1] + h\n+ else u.opp[2]\n+ }\n+ \n+ if(ax==1 && bottom.align && type!=\'density\') {\n+ a <- a + h2\n+ b <- b + h2\n+ }\n+ \n+ if(type==\'density\') {\n+ lll <- gfun$lines\n+ ## Problem in S+ getting right value of lwd\n+ if(ax==1)\n+ do.call(\'lll\',list(x,\n+ if(side==1)b\n+ else a,\n+ lwd=lwd, col=col))\n+ else\n+ do.call(\'lll\',list(if(side==2)b\n+ else a,\n+ x, lwd=lwd, col=col))\n+ } else {\n+ lll <- gfun$segments\n+ if(ax==1)\n+ do.call(\'lll\',list(x, a, x, b, lwd=lwd, xpd=frac<0, col=col))\n+ else\n+ do.call(\'lll\',list(a, x, b, x, lwd=lwd, xpd=frac<0, col=col))\n+ }\n+ \n+ invisible(xlim)\n+}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/score.binary.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/score.binary.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,39 @@ +score.binary <- function(..., fun=max, points=1:p, + na.rm=funtext=='max', retfactor=TRUE) +{ + x <- list(...) + p <- length(x) + nam <- (as.character(sys.call())[-1])[1:p] + x <- matrix(unlist(x), ncol=p) + if(!missing(points)) { + if(length(points)==1) + points <- rep(points, p) + if(length(points)!=p) + stop('wrong length for points') + } + + x <- x * rep(points, rep.int(nrow(x),p)) + funtext <- as.character(substitute(fun)) + if(funtext=='max' && !missing(points) && retfactor) + warning('points do not matter for fun=max with retfactor=T\nas long as they are in ascending order') + + if(!missing(retfactor) && retfactor && funtext!='max') + stop('retfactor=T only applies to fun=max') + + xna <- apply(x, 1, function(x) any(is.na(x))) + funargs <- as.list(args(fun)) + funargs <- funargs[-length(funargs)] + + if(any(names(funargs) == "na.rm")) { + x <- apply(x, 1, fun, na.rm=na.rm) + } else { + x <- apply(x, 1, fun) + } + + if(!na.rm) + x[x==0 & xna] <- NA + + if(retfactor && funtext=='max') + factor(x, c(0,points), c("none",nam)) + else x +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/sedit.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/sedit.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,281 @@ +sedit <- function(text, from, to, test=NULL, wild.literal=FALSE) +{ + to <- rep(to, length=length(from)) + for(i in 1:length(text)) { + s <- text[i] + if(length(s)) + for(j in 1:length(from)) { + old <- from[j] + front <- back <- FALSE + if(!wild.literal) { + if(substring(old,1,1)=='^') { + front <- TRUE; + old <- substring(old,2) + } + + if(substring(old,nchar(old))=='$') { + back <- TRUE; old <- substring(old, 1, nchar(old)-1) + } + } + + new <- to[j] + + lold <- nchar(old) + if(lold > nchar(s)) + next + + ex.old <- substring(old, 1:lold, 1:lold) + if(!wild.literal && any(ex.old=='*')) + s <- replace.substring.wild(s, old, new, test=test, front=front, back=back) + else { + l.s <- nchar(s) + is <- 1:(l.s-lold+1) + if(front) + is <- 1 + + ie <- is + lold - 1 + if(back) + ie <- l.s + + ss <- substring(s, is, ie) + k <- ss==old + if(!any(k)) + next + + k <- is[k] + substring2(s, k, k+lold-1) <- new + } + } + + text[i] <- s + } + + text +} + + +substring.location <- function(text, string, restrict) +{ + if(length(text)>1) + stop('only works with a single character string') + + l.text <- nchar(text) + l.string <- nchar(string) + if(l.string > l.text) + return(list(first=0,last=0)) + + if(l.string==l.text) + return(if(text==string) + list(first=1,last=l.text) + else + list(first=0,last=0)) + + is <- 1:(l.text-l.string+1) + ss <- substring(text, is, is+l.string-1) + k <- ss==string + if(!any(k)) + return(list(first=0,last=0)) + + k <- is[k] + if(!missing(restrict)) + k <- k[k>=restrict[1] & k<=restrict[2]] + + if(length(k)==0) + return(list(first=0,last=0)) + + list(first=k, last=k+l.string-1) +} + + +## if(version$major < 5) 14Sep00 +substring2 <- function(text, first, last=100000L) + base::substring(text, first, last) + +'substring2<-' <- function(text, first, last=100000, value) +{ + if(is.character(first)) { + if(!missing(last)) + stop('wrong # arguments') + + return(sedit(text, first, value)) ## value was setto 25May01 + } + + lf <- length(first) + + if(length(text)==1 && lf > 1) { + if(missing(last)) + last <- nchar(text) + + last <- rep(last, length=lf) + for(i in 1:lf) { + text <- paste(if(first[i]>1) + substring(text, 1, first[i]-1), + value, + substring(text, last[i]+1), sep='') + + if(i < lf) { + j <- (i+1):lf + w <- nchar(value) - (last[i]-first[i]+1) + first[j] <- first[j] + w + last[j] <- last[j] + w + } + } + + return(text) + } + text <- paste(ifelse(first>1,substring(text, 1, first-1),''), value, + substring(text, last+1), sep='') + text +} + + +replace.substring.wild <- function(text, old, new, test=NULL, + front=FALSE, back=FALSE) +{ + if(length(text)>1) + stop('only works with a single character string') + + if(missing(front) && missing(back)) { + if(substring(old,1,1)=='^') { + front <- TRUE; + old <- substring(old,2) + } + + if(substring(old, nchar(old))=='$') { + back <- TRUE + old <- substring(old, 1, nchar(old)-1) + } + } + if((front || back) && old!='*') + stop('front and back (^ and $) only work when the rest of old is *') + + star.old <- substring.location(old,'*') + if(length(star.old$first)>1) + stop('does not handle > 1 * in old') + + if(sum(star.old$first)==0) + stop('no * in old') + + star.new <- substring.location(new,'*') + if(length(star.new$first)>1) + stop('cannot have > 1 * in new') + + if(old=='*' && (front | back)) { + if(front && back) + stop('may not specify both front and back (or ^ and $) with old=*') + + if(length(test)==0) + stop('must specify test= with old=^* or *$') + + et <- nchar(text) + if(front) { + st <- rep(1, et); + en <- et:1 + } else { + st <- 1:et; + en <- rep(et,et) + } + + qual <- test(substring(text, st, en)) + if(!any(qual)) + return(text) + + st <- (st[qual])[1] + en <- (en[qual])[1] + text.before <- if(st==1)'' + else substring(text, 1, st-1) + + text.after <- if(en==et)'' + else substring(text, en+1, et) + + text.star <- substring(text, st, en) + new.before.star <- + if(star.new$first>1) + substring(new, 1, star.new$first-1) + else '' + + new.after.star <- if(star.new$last==length(new))'' + else substring(new, star.new$last+1) + + return(paste(text.before, new.before.star, text.star, new.after.star, + text.after, sep='')) + } + + old.before.star <- if(star.old$first==1)'' + else substring(old, 1, star.old$first-1) + + old.after.star <- if(star.old$last==nchar(old))'' + else substring(old, star.old$first+1) + + if(old.before.star=='') + loc.before <- list(first=0, last=0) + else { + loc.before <- substring.location(text, old.before.star) + loc.before <- list(first=loc.before$first[1], last=loc.before$last[1]) + } + + if(sum(loc.before$first+loc.before$last)==0) + return(text) + + loc.after <- if(old.after.star=='') list(first=0, last=0) + else { + la <- substring.location(text, old.after.star, + restrict=c(loc.before$last+1,1e10)) + lastpos <- length(la$first) + la <- list(first=la$first[lastpos], last=la$last[lastpos]) + if(la$first+la$last==0) + return(text) + + la + } + + loc.star <- list(first=loc.before$last+1, + last=if(loc.after$first==0) nchar(text) + else loc.after$first-1) + + star.text <- substring(text, loc.star$first, loc.star$last) + if(length(test) && !test(star.text)) + return(text) + + if(star.new$first==0) + return(paste(if(loc.before$first>1)substring(text,1,loc.before$first-1), + new, sep='')) + + new.before.star <- if(star.new$first==1)'' + else substring(new, 1, star.new$first-1) + new.after.star <- if(star.new$last==nchar(new)) '' + else substring(new, star.new$first+1) + + paste(if(loc.before$first>1)substring(text,1,loc.before$first-1), + new.before.star, + substring(text,loc.star$first,loc.star$last), + new.after.star, + if(loc.after$last<nchar(text) && loc.after$last>0) + substring(text,loc.after$last+1), + sep='') +} + + +## Some functions useful as test= arguments to replace.substring.wild, sedit +numeric.string <- function(string) +{ + ##.Options$warn <- -1 6Aug00 + oldopt <- options(warn=-1) + on.exit(options(oldopt)) + !is.na(as.numeric(string)) +} + + +all.digits <- function(string) +{ + k <- length(string) + result <- logical(k) + for(i in 1:k) { + st <- string[i] + ls <- nchar(st) + ex <- substring(st, 1:ls, 1:ls) + result[i] <- all(match(ex,c('0','1','2','3','4','5','6','7','8','9'),nomatch=0)>0) + } + + result +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/show.pch.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/show.pch.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,74 @@ +show.pch <- function(object=par('font')) +{ + plot(0,0,xlim=c(-1,11),ylim=c(0,26),type='n',axes=FALSE,xlab='',ylab='') + j <- -1 + for(i in 0:253) { + if(i %% 25==0) { + j <- j+1; + k <- 26 + } + + k <- k-1 + points(j, k, pch=i, font=object) + text(j+.45, k, i) + } + + invisible() +} + + +character.table <- function(font=1) +{ + ## Prints numeric equivalents to all latin characters + ## Usage: graphsheet(orientation = "portrait") + ## character.table() + ## Print the resulting graphsheet. The printed version doesn't allways + ## corresponds to the screen display. The character on line "xy" and column "z" + ## of the table has code "xyz". + ## These codes can be used as any other characters. e.g. + ## title("\347\340 et \340") + ## As the command line window of Splus can't print special characters + ## cat("\347\340 et \340") + ## will not print the special characters, at least under 4.5 and under 2000. + ## + ## Author: + ## Pierre Joyet / Aktuariat pierre.joyet@bluewin.ch + + v <- 40:377 + v <- v[v %% 100 < 80 & v %% 10 < 8] + opar <- par(mar = c(5, 5, 4, 2) + 0.1, xpd=NA) + plot(0:7, seq(4, 31, length = 8), type = "n", axes = FALSE, xlab = "", + ylab = "") + k <- 1 + for(i in 4:31) + for(j in 0:7) { + text(j, 35 - i, eval(parse(text = paste("\"\\", v[k], "\"", + sep = ""))), font = font) + k <- k + 1 + } + + text(0:7, rep(33, 7), as.character(0:7), font = 3) + text(rep(-1, 28), 31:4, as.character(c(4:7, 10:17, 20:27, 30:37)), + font = 3) + par(opar) + invisible() +} + + +show.col <- function(object=NULL) +{ + plot(0,0,xlim=c(-1,10),ylim=c(0,10),type='n',axes=FALSE,xlab='',ylab='') + j <- -1 + for(i in 0:99) { + if(i %% 10==0) { + j <- j+1; + k <- 10 + } + + k <- k-1 + points(j, k, pch=15, col=i, cex=3) + text(j+.45, k, i) + } + + invisible() +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/showPsfrag.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/showPsfrag.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,16 @@ +showPsfrag <- function(filename) +{ + file <- paste(as.character(substitute(filename)),'ps',sep='.') + out <- "TEMPltx" + cat('\\documentclass{article}', + '\\usepackage{graphics}', + '\\usepackage[scanall]{psfrag}', + '\\begin{document}', + paste('\\includegraphics{',file,'}',sep=''), + '\\end{document}',sep='\n', file=paste(out,'tex',sep='.')) + sys(paste('latex "\\scrollmode\\input" ',out,';dvips -o ',out,'.ps ',out, + '; gv ',out,'.ps &', + sep='')) + unlink(paste(out,c('tex','log','dvi','ps','aux','pfg'),sep='.')) + invisible() +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/solvet.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/solvet.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,22 @@ +#FEH version of solve with argument tol passed to qr +#8 Apr 91 + +solvet <- function(a, b, tol=1e-9) +{ + if(!is.list(a)) + a <- qr(a, tol=tol) + + if(a$rank < ncol(a$qr)) + stop("apparently singular matrix") + + if(missing(b)) { + b <- a$qr + db <- dim(b) + if(diff(db)) + stop("matrix inverse only for square matrices") + + b[] <- rep(c(1, rep(0, db[1])), length = prod(db)) + } + + qr.coef(a, b) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/somers2.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/somers2.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,126 @@ +##S function somers2 +## +## Calculates concordance probability and Somers' Dxy rank correlation +## between a variable X (for which ties are counted) and a binary +## variable Y (having values 0 and 1, for which ties are not counted). +## Uses short cut method based on average ranks in two groups. +## +## Usage: +## +## somers2(x, y, weights) +## +## Returns vector whose elements are C Index, Dxy, n and missing, where +## C Index is the concordance probability and Dxy=2(C Index-.5). +## +## F. Harrell 28 Nov 90 6 Apr 98: added weights + +somers2 <- function(x, y, weights=NULL, normwt=FALSE, na.rm=TRUE) +{ + if(length(y) != length(x)) stop("y must have same length as x") + y <- as.integer(y) + wtpres <- length(weights) + if(wtpres && (wtpres != length(x))) + stop('weights must have same length as x') + + if(na.rm) { + miss <- if(wtpres) is.na(x + y + weights) + else is.na(x + y) + + nmiss <- sum(miss) + if(nmiss > 0) + { + miss <- !miss + x <- x[miss] + y <- y[miss] + if(wtpres) weights <- weights[miss] + } + } + else nmiss <- 0 + + if(any(y %nin% 0:1)) stop('y must be binary') + + if(wtpres) { + if(normwt) + weights <- length(x)*weights/sum(weights) + n <- sum(weights) + } + else n <- length(x) + + if(n < 2) stop("must have >=2 non-missing observations") + + n1 <- if(wtpres)sum(weights[y==1]) else sum(y==1) + + if(n1 == 0 || n1 == n) + return(c(C=NA, Dxy=NA, n=n, Missing=nmiss)) + + mean.rank <- + if(wtpres) + wtd.mean(wtd.rank(x, weights, na.rm=FALSE), weights * y) + else + mean(rank(x)[y==1]) + + c.index <- (mean.rank - (n1 + 1) / 2) / (n - n1) + dxy <- 2 * (c.index - 0.5) + r <- c(c.index, dxy, n, nmiss) + names(r) <- c("C", "Dxy", "n", "Missing") + r +} + + +if(FALSE) rcorrs <- function(x, y, weights=rep(1,length(y)), + method=c('exact','bin'), nbin=1000, + na.rm=TRUE) +{ + ## Experimental function - probably don't need + + method <- match.arg(method) + + if(na.rm) { + s <- !is.na(x + unclass(y) + weights) + x <- x[s]; y <- y[s]; weights <- weights[s] + } + + n <- length(x) + if(missing(method)) + method <- if(n < 1000) 'exact' + else 'bin' + + y <- as.factor(y); + nly <- length(levels(y)) + y <- as.integer(y) + if(method == 'bin') { + r <- range(x); d <- r[2] - r[1] + x <- 1 + trunc((nbin - 1) * (x - r[1]) / d) + + xy <- y * nbin + x + + ## Code below is lifted from rowsum() + storage.mode(weights) <- "double" + temp <- + .C('R_rowsum', dd=as.integer(dd), + as.double(max(1,weights)*n), + x=weights, as.double(xy), PACKAGE='base') + new.n <- temp$dd[1] + weights <- temp$x[1:new.n] + + uxy <- unique(xy) + x <- uxy %% nbin + y <- (uxy - x)/nbin + n <- length(x) + } + + list(x=x, y=y, weights=weights) + + #storage.mode(x) <- "single" + #storage.mode(y) <- "single" + #storage.mode(event) <- "logical" + + ## wcidxy doesn't exist yet + z <- .Fortran("wcidxy",as.single(x),as.single(y),as.integer(weights),as.integer(n), + nrel=double(1),nconc=double(1),nuncert=double(1), + c.index=double(1),gamma=double(1),sd=double(1),as.logical(outx)) + r <- c(z$c.index,z$gamma,z$sd,n,z$nrel,z$nconc,z$nuncert) + names(r) <- c("C Index","Dxy","S.D.","n","missing","uncensored", + "Relevant Pairs", "Concordant","Uncertain") + r +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/spearman.test.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/spearman.test.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,37 @@ +## Spearman correlation test (p=1) or Spearman test extended by adding +## rank(x)^2 to model (p=2) +## F Harrell 30Sep90 + +spearman.test <- function(x,y,p=1) +{ + x <- as.numeric(x); + y <- as.numeric(y) ## 17Jul97 + + if(length(x)!=length(y)) + stop("length of x must = length of y") + + nomiss <- !is.na(x+y) + n <- sum(nomiss) + if(n<3) + stop("fewer than 3 non-missing x-y pairs") + + if(!(p==1 | p==2)) + stop("p must be 1 or 2") + + x <- x[nomiss] + x <- rank(x) + y <- y[nomiss] + y <- rank(y) + sst <- sum((y-mean(y))^2) + if(p==2) + x <- cbind(x,x^2) + + sse <- sum((lsfit(x,y)$residuals)^2) + rsquare <- 1-sse/sst + df2 <- n-p-1 + fstat <- rsquare/p/((1-rsquare)/df2) + pvalue <- 1-pf(fstat,p,df2) + x <- c(rsquare,fstat,p,df2,pvalue,n) + names(x) <- c("Rsquare","F","df1","df2","pvalue","n") + x +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/spower.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/spower.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,378 @@\n+spower <- function(rcontrol, rinterv, rcens, nc, ni,\n+ test=logrank, cox=FALSE, nsim=500, alpha=.05, pr=TRUE)\n+{\n+ crit <- qchisq(1-alpha, 1)\n+ group <- c(rep(1,nc), rep(2,ni))\n+ nexceed <- 0\n+ if(cox) beta <- numeric(nsim)\n+\n+ maxfail <- 0; maxcens <- 0\n+ for(i in 1:nsim) {\n+ if(pr && i %% 10 == 0) cat(i,\'\\r\')\n+\n+ yc <- rcontrol(nc)\n+ yi <- rinterv(ni)\n+ cens <- rcens(nc+ni)\n+ y <- c(yc, yi)\n+ maxfail <- max(maxfail, max(y))\n+ maxcens <- max(maxcens, max(cens))\n+ S <- cbind(pmin(y,cens), 1*(y <= cens))\n+ nexceed <- nexceed + (test(S, group) > crit)\n+ if(cox)\n+ {\n+ fit <- coxph.fit(as.matrix(group), S, strata=NULL,\n+ offset=NULL, init=NULL,\n+ control=coxph.control(iter.max=10, eps=.0001), \n+ method="efron", rownames=NULL)\n+ beta[i] <- fit$coefficients\n+ }\n+ }\n+ cat(\'\\n\')\n+ if(maxfail < 0.99*maxcens)\n+ stop(paste(\'Censoring time distribution defined at later times than\\nsurvival time distribution. There will likely be uncensored failure times\\nstacked at the maximum allowed survival time.\\nMaximum simulated failure time:\', max(y),\'\\nMaximum simulated censoring time:\', max(cens)))\n+\n+ power <- nexceed/nsim\n+ if(cox) structure(list(power=power, betas=beta, nc=nc, ni=ni,\n+ alpha=alpha, nsim=nsim), class=\'spower\') else power\n+}\n+\n+print.spower <- function(x, conf.int=.95, ...)\n+ {\n+ b <- x$betas\n+ hr <- exp(b)\n+ pp <- (1+conf.int)/2\n+ cl <- quantile(hr, c((1-conf.int)/2, pp))\n+ meanbeta <- mean(b)\n+ medbeta <- median(b)\n+ hrmean <- exp(meanbeta)\n+ hrmed <- exp(medbeta)\n+ moehi <- cl[2]/hrmed\n+ moelo <- hrmed/cl[1]\n+ g <- function(w) round(w, 4)\n+ mmoe <- max(moehi, moelo)\n+ cat(\'\\nTwo-Group Event Time Comparison Simulation\\n\\n\',\n+ x$nsim,\' simulations\\talpha: \', x$alpha, \'\\tpower: \', x$power,\n+ \'\\t\', conf.int, \' confidence interval\\n\',\n+ \'\\nHazard ratio from mean beta : \', g(hrmean),\n+ \'\\nHazard ratio from median beta : \', g(hrmed),\n+ \'\\nStandard error of log hazard ratio: \', g(sd(b)),\n+ \'\\nConfidence limits for hazard ratio: \', g(cl[1]), \', \', g(cl[2]),\n+ \'\\nFold-change margin of error high : \', g(moehi),\n+ \'\\t(upper CL/median HR)\',\n+ \'\\nFold-change margin of error low : \', g(moelo),\n+ \'\\t(median HR/lower CL)\',\n+ \'\\nMax fold-change margin of error : \', g(mmoe),\'\\n\\n\')\n+\n+ cat(\'The fold change margin of error of\', g(mmoe),\n+ \'represents the margin of error\\n\',\n+ \'the study is likely to achieve in estimating the intervention:control\\n\',\n+ \'hazard ratio. It is the ratio of a\', conf.int, \'confidence limit on the\\n\',\n+ \'hazard ratio to the median hazard ratio obtained over the\', x$nsim, \'simulations.\\n\',\n+ \'The confidence limit was obtained by computing the\', pp, \'quantile of the\\n\',\n+ x$nsim, \'observed hazard ratios. The standard error is the standard deviation\\n\',\n+ \'of the\', x$nsim, \'simulated log hazard ratios.\\n\\n\')\n+\n+ res <- c(cl, hrmean, hrmed, sd(b), moelo, moehi, x$power)\n+ names(res) <- c(\'CLlower\',\'CLupper\',\'HRmean\',\'HRmedian\',\'SE\',\n+ \'MOElower\',\'MOEupper\',\'Power\')\n+ invisible(res)\n+ }\n+ \n+Quantile2 <- function(scontrol, hratio, \n+ dropin=function(times)0, \n+ dropout=function(times)0,\n+ m=7500, tmax, qtmax=.001, mplot=200, pr=TRUE,\n+ ...)\n+{\n+ ## Solve for tmax such that scontrol(t)=qtmax\n+ dlist <- list(...)\n+ k <- length(dlist) && !is.null(dlist)\n+ f <- if(k) function(x, scontrol, qt, ...) scontrol(x, ...) - qt\n+ else function(x, scontrol, qt) scontrol(x) - qt\n+\n+ if(missing(tmax)) {\n+ if(k) tmax <- uniroot(f, c(0,1e9), scontrol=scontrol, qt=qtmax, ...)$root\n+ else tmax <- uniroot(f, c(0,1e9), scontro'..b"m=ylim,\n+ opts=label.curves)\n+ labcurve(pi[i[-(1:2)]], pl=TRUE, lty=lty, col.=col, xlim=xlim, ylim=ylim,\n+ opts=label.curves)\n+ } else\n+ labcurve(pi[i], pl=TRUE, lty=lty, col.=col, xlim=xlim, ylim=ylim,\n+ opts=label.curves)\n+ }\n+\n+ if(what %in% c('hazard','both','all')) {\n+ if(dropsep && (dropin|dropout)) {\n+ labcurve(pi[5:6], pl=TRUE, lty=lty, col.=col, xlim=xlim, ylim=ylim,\n+ opts=label.curves)\n+ labcurve(pi[4+i[-(1:2)]], pl=TRUE, lty=lty, col.=col, xlim=xlim,\n+ ylim=ylim, opts=label.curves)\n+ } else\n+ labcurve(pi[4+i], pl=TRUE, lty=lty, col.=col, xlim=xlim, ylim=ylim,\n+ opts=label.curves)\n+ }\n+ \n+ if(what=='drop' || (what=='all' && (dropin | dropout))) {\n+ i <- c(if(dropin)9,\n+ if(dropout)10)\n+\n+ if(length(i)==0)\n+ i <- 10\n+\n+ labcurve(pi[i], pl=TRUE, lty=lty, col.=col, xlim=xlim, ylim=ylim,\n+ opts=label.curves)\n+ }\n+\n+ if(what %in% c('hratio','all')) {\n+ i <- c(11,\n+ if(dropin|dropout) 12)\n+\n+ labcurve(pi[i], pl=TRUE, lty=lty, col.=col, xlim=xlim, ylim=ylim,\n+ opts=label.curves)\n+ }\n+\n+ invisible()\n+}\n+\n+logrank <- function(S, group)\n+{\n+ group <- as.factor(group)\n+ i <- is.na(S) | is.na(group)\n+ if(any(i))\n+ {\n+ i <- !i\n+ S <- S[i,,drop=FALSE]\n+ group <- group[i]\n+ }\n+ group <- as.integer(group)\n+ y <- S[,1]\n+ event <- S[,2]\n+ i <- order(-y)\n+ y <- y[i]\n+ event <- event[i]\n+ group <- group[i]\n+ x <- cbind(group==1, group==2, (group==1)*event, (group==2)*event)\n+ if(TRUE)\n+ {\n+ s <- rowsum(x, y, FALSE)\n+ nr1 <- cumsum(s[,1])\n+ nr2 <- cumsum(s[,2])\n+ d1 <- s[,3]\n+ d2 <- s[,4]\n+ rd <- d1+d2\n+ rs <- nr1+nr2-rd\n+ n <- nr1+nr2\n+ oecum <- d1 - rd*nr1/n\n+ vcum <- rd * rs * nr1 * nr2 / n / n / (n-1)\n+ chisq <- sum(oecum)^2 / sum(vcum,na.rm=TRUE)\n+ hr <- sum(d1*(nr1-d1)/n)/sum(d2*(nr2-d2)/n)\n+ }\n+ else\n+ { # non-working code; trying to get stratification to work\n+ OE <- v <- hrn <- hrd <- 0\n+ for(strat in unique(strata))\n+ {\n+ j <- strata==strat\n+ s <- rowsum(x[j,], y[j], FALSE)\n+ nr1 <- cumsum(s[,1])\n+ nr2 <- cumsum(s[,2])\n+ d1 <- s[,3]\n+ d2 <- s[,4]\n+ rd <- d1+d2\n+ rs <- nr1+nr2-rd\n+ n <- nr1+nr2\n+ oecum <- d1 - rd*nr1/n\n+ vcum <- rd * rs * nr1 * nr2 / n / n / (n-1)\n+ OE <- OE + sum(oecum)\n+ v <- v + sum(vcum, na.rm=TRUE)\n+ hrn <- hrn + sum(d1*(nr1-d1)/n)\n+ hrd <- hrd + sum(d2*(nr2-d2)/n)\n+ }\n+ chisq <- OE^2 / v\n+ hr <- hrn/hrd\n+ }\n+ structure(chisq, hr=hr)\n+}\n+\n+\n+Weibull2 <- function(times, surv)\n+{\n+ z1 <- -logb(surv[1])\n+ z2 <- -logb(surv[2])\n+ t1 <- times[1]\n+ t2 <- times[2]\n+ gamma <- logb(z2/z1)/logb(t2/t1)\n+ alpha <- z1/(t1^gamma)\n+ \n+ g <- function(times, alpha, gamma)\n+ {\n+ exp(-alpha*(times^gamma))\n+ }\n+\n+ formals(g) <- list(times=NULL, alpha=alpha, gamma=gamma)\n+ g\n+}\n+\n+\n+## Function to fit a Gompertz survival distribution to two points\n+## The function is S(t) = exp[-(1/b)exp(a+bt)]\n+## Returns a list with components a and b, and a function for\n+## generating S(t) for a vector of times\n+Gompertz2 <- function(times, surv)\n+{\n+ z1 <- logb(-logb(surv[1]))\n+ z2 <- logb(-logb(surv[2]))\n+ t1 <- times[1]\n+ t2 <- times[2]\n+ b <- (z2-z1)/(t2-t1)\n+ a <- z1 + logb(b)-b*t1\n+ \n+ g <- function(times, a, b) {\n+ exp(-exp(a+b*times)/b)\n+ }\n+\n+ formals(g) <- list(times=NULL, a=a, b=b)\n+ g\n+}\n+\n+\n+Lognorm2 <- function(times, surv)\n+{\n+ z1 <- qnorm(1-surv[1])\n+ z2 <- qnorm(1-surv[2])\n+ sigma <- logb(times[2]/times[1])/(z2-z1)\n+ mu <- logb(times[1]) - sigma*z1\n+\n+ g <- function(times, mu, sigma) {\n+ 1 - pnorm((logb(times)-mu)/sigma)\n+ }\n+\n+ formals(g) <- list(times=NULL, mu=mu, sigma=sigma)\n+ g\n+}\n" |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/src.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/src.s Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,18 @@ +##Function to source(x) if x is given, or source(last x given) otherwise +##Last x is stored in options() last.source. x is unquoted with .s omitted. +##Author: Frank Harrell 19May91 + +src <- function(x) { + if(!missing(x)) { + y <- paste(as.character(substitute(x)),".s",sep="") + options(last.source=y, TEMPORARY=FALSE) + } + else y <- options()$last.source + + if(is.null(y)) + stop("src not called with file name earlier") + + source(y) + cat(y, "loaded\n") + invisible() +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/stata.get.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/stata.get.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,145 @@ +stata.get <- function(file, lowernames=FALSE, + convert.dates=TRUE, convert.factors=TRUE, + missing.type=FALSE, convert.underscore=TRUE, + warn.missing.labels=TRUE, force.single=TRUE, + allow=NULL, charfactor=FALSE, ...) + { + ## depends on the read.dta function from foriegn + + ## Function to convert the elements of w into more compact + ## data storage types. + convertObjs <- function(x, charfactor, force.single) { + ## Date is not nessarely a integer but it ignores any + ## fraction it might have + if((inherits(x, 'Date') || is.factor(x)) + && storage.mode(x) != 'integer') { + storage.mode(x) <- 'integer' + } else if(charfactor && is.character(x)) { + ## If x is a character and arg charfactor is TRUE then + ## convert x to a factor if the number of unique values of x is less + # than half the total number of values in x + if(length(unique(x)) < length(x) / 2) + { + x <- sub(' +$', '', x) # remove trailing blanks + x <- factor(x, exclude='') + } + } else if(is.numeric(x)) { + + if(all(is.na(x))) { + ## if all values are NA then convert to integer because + ## it is 4 bytes instead of 8 + storage.mode(x) <- 'integer' + } + else if(force.single && max(abs(x), na.rm=TRUE) <= (2^31-1) && + all(floor(x) == x, na.rm=TRUE)) { + ## convert x to integer if arg force.single is TRUE and the maximum + ## absolute value of x is less then maximum value that an integer + ## can store. + storage.mode(x) <- 'integer' + } + } + + return(x) + } + + ## A function to create additional attributes to add to the elements of + ## w + create.attribs <- function(var.label, val.label, format, label.table) { + attribs <- list() + + if(format != '') { + attribs$format <- format + } + + ## Translate var labels into Hmisc var lables + if(var.label != '') { + attribs$label <- var.label + } + + ## The label.table values are found by looking a the checking to see + ## if there is a non-empty value in val.labels. That value corrasponds + ## a named element in label.table. + + ## Check to see if val.label is not empty and it is one of the + ## names in label.table and that its value is not NULL + if(val.label != '' && val.label %in% names(label.table) && + !is.null(label.table[[val.label]])) { + attribs$value.label.table <- label.table[[val.label]] + } + + return(attribs) + } + + ## If file is a url download and set file = to temp file name + if(length(grep('^http://', file))){ + tf <- tempfile() + download.file(file, tf, mode='wb', quiet=TRUE) + file <- tf + } + + ## Read the stata file into w + w <- read.dta(file, convert.dates=convert.dates, + convert.factors=convert.factors, + missing.type=missing.type, + convert.underscore=convert.underscore, + warn.missing.labels=warn.missing.labels, ...) + + ## extract attributes from w + a <- attributes(w) + num.vars <- length(w) + + ## Do translate attributes names into R names + nam <- makeNames(a$names, unique=TRUE, allow=allow) + if(lowernames) nam <- casefold(nam, upper=FALSE) + a$names <- nam + + ## If var.labels is empty then create a empty char vector. + if(!length(a$var.labels)) { + a$var.labels <- character(num.vars) + } + + ## If val.labels is empty then create an empty char vector. + if(length(a$val.labels)) { + val.labels <- a$val.labels + } else { + val.labels <- character(num.vars) + } + + ## create list of attributes for the elements in w. An mapply is faster + ## then a for loop in large data sets. + attribs <- mapply(FUN=create.attribs, var.label=a$var.labels, + val.label=val.labels, format=a$formats, + MoreArgs=list(label.table=a$label.table), + SIMPLIFY=FALSE) + + ## clear var.labels attribute + attr(w, 'var.labels') <- NULL + + ## Convert the elements of w as needed + w <- lapply(w, FUN=convertObjs, force.single=force.single, + charfactor=charfactor) + + ## strip off the naming info for w + w <- unname(w) + + ## add the new attributes to the current attributes of + ## the elements of w + for(i in seq(along.with=w)) { + ## Set the label for the element + if('label' %in% names(attribs[[i]])) { + label(w[[i]]) <- attribs[[i]]$label + ## clear the label value from attribs[[i]] + attribs[[i]]$label <- NULL + } + + ## combine the new attribs with the current attributes + consolidate(attributes(w[[i]])) <- attribs[[i]] + } + + ## add the names, rownames, class variables, and some extra stata + ## info back to w + stata.info <- a[c('datalabel','version','time.stamp','val.labels','label.table')] + attributes(w) <- c(a[c('names','row.names','class')], + stata.info=list(stata.info)) + return(w) + } |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/strgraphwrap.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/strgraphwrap.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,84 @@ +strgraphwrap <- + function (x, width = 0.9 * getOption("width"), + indent = 0, exdent = 0, + prefix = "", simplify = TRUE, units='user', cex=NULL) +{ + if (!is.character(x)) + x <- as.character(x) + + spc.len <- strwidth(" ", units=units, cex=cex) + prefix.len <- strwidth(prefix, units = units, cex=cex) + indentString <- paste(rep.int(" ", indent), collapse = "") + indent <- indent * spc.len + exdentString <- paste(rep.int(" ", exdent), collapse = "") + exdent <- exdent * spc.len + + y <- list() + z <- lapply(strsplit(x, "\n[ \t\n]*\n"), strsplit, "[ \t\n]") + for (i in seq_along(z)) { + yi <- character(0) + for (j in seq_along(z[[i]])) { + words <- z[[i]][[j]] + nc <- strwidth(words, units=units, cex=cex) + if (any(is.na(nc))) { + nc0 <- strwidth(words, units=units, cex=cex) + nc[is.na(nc)] <- nc0[is.na(nc)] + } + if (any(nc == 0)) { + zLenInd <- which(nc == 0) + zLenInd <- zLenInd[!(zLenInd %in% (grep("\\.$", + words) + 1))] + if (length(zLenInd) > 0) { + words <- words[-zLenInd] + nc <- nc[-zLenInd] + } + } + if (length(words) == 0) { + yi <- c(yi, "", prefix) + next + } + currentIndex <- 0 + lowerBlockIndex <- 1 + upperBlockIndex <- integer(0) + lens <- cumsum(nc + spc.len) + first <- TRUE + maxLength <- width - prefix.len - + indent + while (length(lens) > 0) { + k <- max(sum(lens <= maxLength), 1) + if (first) { + first <- FALSE + maxLength <- maxLength + indent - exdent + } + currentIndex <- currentIndex + k + if (nc[currentIndex] == 0) + upperBlockIndex <- c(upperBlockIndex, currentIndex - + 1) + else upperBlockIndex <- c(upperBlockIndex, currentIndex) + if (length(lens) > k) { + if (nc[currentIndex + 1] == 0) { + currentIndex <- currentIndex + 1 + k <- k + 1 + } + lowerBlockIndex <- c(lowerBlockIndex, currentIndex + + 1) + } + if (length(lens) > k) + lens <- lens[-(1:k)] - lens[k] + else lens <- NULL + } + nBlocks <- length(upperBlockIndex) + s <- paste(prefix, c(indentString, rep.int(exdentString, + nBlocks - 1)), sep = "") + for (k in (1:nBlocks)) s[k] <- paste(s[k], paste(words[lowerBlockIndex[k]:upperBlockIndex[k]], + collapse = " "), sep = "") + yi <- c(yi, s, prefix) + } + y <- if (length(yi)) + c(y, list(yi[-length(yi)])) + else c(y, "") + } + if (simplify) + y <- unlist(y) + y +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/string.break.line.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/string.break.line.s Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,9 @@ +if(!exists("string.break.line", mode='function')) { + string.break.line <- function(string) { + if(! is.character(string)) { + x <- as.character(string) + } + + ifelse(string == '', '', strsplit(string, '\n', fixed=TRUE)) + } +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/strwrap.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/strwrap.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,73 @@ +if(!exists('strwrap')) { + strwrap <- function (x, width = 0.9 * getOption("width"), indent = 0, exdent = 0, + prefix = "", simplify = TRUE) { + indentString <- paste(rep.int(" ", indent), collapse = "") + exdentString <- paste(rep.int(" ", exdent), collapse = "") + y <- list() + z <- lapply(strsplit(x, "\n[ \t\n]*\n"), strsplit, "[ \t\n]") + for (i in seq(along = z)) { + yi <- character(0) + for (j in seq(along = z[[i]])) { + words <- z[[i]][[j]] + nc <- nchar(words, type = "w") + if (any(is.na(nc))) { + nc0 <- nchar(words) + nc[is.na(nc)] <- nc0[is.na(nc)] + } + if (any(nc == 0)) { + zLenInd <- which(nc == 0) + zLenInd <- zLenInd[!(zLenInd %in% (grep("\\.$", + words) + 1))] + if (length(zLenInd) > 0) { + words <- words[-zLenInd] + nc <- nc[-zLenInd] + } + } + if (length(words) == 0) { + yi <- c(yi, "", prefix) + next + } + currentIndex <- 0 + lowerBlockIndex <- 1 + upperBlockIndex <- integer(0) + lens <- cumsum(nc + 1) + first <- TRUE + maxLength <- width - nchar(prefix, type = "w") - + indent + while (length(lens) > 0) { + k <- max(sum(lens <= maxLength), 1) + if (first) { + first <- FALSE + maxLength <- maxLength + indent - exdent + } + currentIndex <- currentIndex + k + if (nc[currentIndex] == 0) + upperBlockIndex <- c(upperBlockIndex, currentIndex - + 1) + else upperBlockIndex <- c(upperBlockIndex, currentIndex) + if (length(lens) > k) { + if (nc[currentIndex + 1] == 0) { + currentIndex <- currentIndex + 1 + k <- k + 1 + } + lowerBlockIndex <- c(lowerBlockIndex, currentIndex + + 1) + } + if (length(lens) > k) + lens <- lens[-(1:k)] - lens[k] + else lens <- NULL + } + nBlocks <- length(upperBlockIndex) + s <- paste(prefix, c(indentString, rep.int(exdentString, + nBlocks - 1)), sep = "") + for (k in (1:nBlocks)) s[k] <- paste(s[k], paste(words[lowerBlockIndex[k]:upperBlockIndex[k]], + collapse = " "), sep = "") + yi <- c(yi, s, prefix) + } + y <- c(y, list(yi[-length(yi)])) + } + if (simplify) + y <- unlist(y) + y + } +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/subplot.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/subplot.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,28 @@ +subplot <- function (fun, x, y = NULL, size = c(1, 1), vadj = 0.5, + hadj = 0.5, pars = NULL) { + old.par <- par(no.readonly = TRUE) + on.exit(par(old.par)) + if (missing(x)) + x <- locator(2) + + xy <- xy.coords(x, y) + if (length(xy$x) != 2) { + pin <- par("pin") + tmp <- cnvrt.coords(xy$x[1], xy$y[1], "usr")$plt + x <- c(tmp$x - hadj * size[1]/pin[1], tmp$x + (1 - hadj) * + size[1]/pin[1]) + y <- c(tmp$y - vadj * size[2]/pin[2], tmp$y + (1 - vadj) * + size[2]/pin[2]) + xy <- cnvrt.coords(x, y, "plt")$fig + } + else { + xy <- cnvrt.coords(xy, , "usr")$fig + } + + if(length(pars)) par(pars) + par(plt = c(xy$x, xy$y), new = TRUE) + if(is.function(fun))fun() else fun + tmp.par <- par(no.readonly = TRUE) + + return(invisible(tmp.par)) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/substi.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/substi.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,97 @@ +##Substitute y when element of x is missing +##also return an attribute "substi.source"=vector of var names and NAs +substi <- function(x,y,pr=TRUE) +{ + if(length(x)!=length(y)) + stop("lengths of x and y are different") + + nf <- is.factor(x) + is.factor(y) + if(nf==1) + stop("both x and y must be factor variables if either is") + + isna <- is.na(x) + vnames <- sys.call()[c(2,3)] + if(pr) { + cat("Variables:",vnames,"\n") + cat("Used first variable:",sum(!is.na(x)),"\n") + cat("Used second variable:",sum(is.na(x) & !is.na(y)),"\n") + } + + if(nf) { + levs <- unique(c(levels(x),levels(y))) + x <- as.character(x) + y <- as.character(y) + x[isna] <- y[isna] + x <- factor(x,levs) + y <- factor(y,levs) + } else + x[isna] <- y[isna] + + ss <- ifelse(isna & is.na(y),NA,ifelse(isna,2,1)) + attr(ss,"names") <- NULL + ss <- factor(ss,labels=vnames) + if(pr) + cat("Obs:",sum(!is.na(x))," Obs missing:",sum(is.na(x)),"\n") + + attr(x,"substi.source") <- ss + attr(x,'class') <- c("substi",attr(x,'class')) + x +} + + +substi.source <- function(x) attr(x,"substi.source") + + +"[.substi" <- function(x, ...) +{ + ss <- attr(x,"substi.source") + ats <- attributes(x) + ats$dimnames <- ats$dim <- ats$names <- ats$substi.source <- + attr(x,'class') <- NULL + x <- (x)[...] + attributes(x) <- ats + attr(x,"substi.source") <- ss[...] + x +} + + +print.substi <- function(x, ...) +{ + i <- unclass(attr(x, "substi.source")) + if(!length(i)) { + print.default(x) + return(invisible()) + } + + if(is.factor(x)) + w <- as.character(x) + else w <- format(x) + + names(w) <- names(x) + w[i==2] <- paste(w[i==2], "*", sep = "") + attr(w, "label") <- attr(w, "substi.source") <- attr(w, "class") <- NULL + print.default(w, quote = FALSE) + invisible() +} + + +as.data.frame.substi <- function(x, row.names = NULL, optional = FALSE, ...) +{ + nrows <- length(x) + if(!length(row.names)) { + ## the next line is not needed for the 1993 version of data.class and is + ## included for compatibility with 1992 version + if(length(row.names <- names(x)) == nrows && + !any(duplicated(row.names))) { + } + else if(optional) + row.names <- character(nrows) + else row.names <- as.character(1:nrows) + } + + value <- list(x) + if(!optional) + names(value) <- deparse(substitute(x))[[1]] + + structure(value, row.names=row.names, class='data.frame') +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/summary.formula.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/summary.formula.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,2658 @@\n+## $Id$\n+##note: ars may always be T\n+\n+summary.formula <-\n+ function(formula, data=NULL, subset=NULL, na.action=NULL, \n+ fun=NULL,\n+ method=c(\'response\',\'reverse\',\'cross\'),\n+ overall=method==\'response\'|method==\'cross\', \n+ continuous=10, na.rm=TRUE, na.include=method!=\'reverse\',\n+ g=4, quant = c(0.025, 0.05, 0.125, 0.25, 0.375, 0.5, 0.625,\n+ 0.75, 0.875, 0.95, 0.975),\n+ nmin=if(method==\'reverse\') 100 else 0,\n+ test=FALSE,\n+ conTest=conTestkw,\n+ catTest=catTestchisq,\n+ ordTest=ordTestpo,\n+ ...)\n+{\n+ call <- match.call()\n+ missmethod <- missing(method)\n+ method <- match.arg(method)\n+\n+ ## Multiple left hand side variables -> automatically call summaryM\n+ if(grepl(\'.*\\\\+.*~\', paste(deparse(formula), collapse=\'\')))\n+ return(summaryM(formula, data=data, subset=subset,\n+ na.action=na.action, overall=overall,\n+ continuous=continuous, na.include=na.include,\n+ quant=quant, nmin=nmin, test=test,\n+ conTest=conTest, catTest=catTest, ordTest=ordTest))\n+ \n+ X <- match.call(expand.dots=FALSE)\n+ X$fun <- X$method <- X$na.rm <- X$na.include <- X$g <-\n+ X$overall <- X$continuous <- X$quant <- X$nmin <- X$test <-\n+ X$conTest <- X$catTest <- X$... <- NULL\n+ if(missing(na.action))\n+ X$na.action <- na.retain\n+ \n+ Terms <- if(missing(data)) terms(formula,\'stratify\')\n+ else terms(formula,\'stratify\',data=data)\n+ \n+ X$formula <- Terms\n+ X[[1]] <- as.name("model.frame")\n+ \n+ X <- eval(X, sys.parent())\n+ \n+ Terms <- attr(X,"terms")\n+ resp <- attr(Terms,"response")\n+ \n+ if(resp==0 && missmethod)\n+ method <- \'reverse\'\n+ \n+ if(test && method!=\'reverse\')\n+ stop(\'test=TRUE only allowed for method="reverse"\')\n+ \n+ if(method!=\'reverse\' && resp!=1) \n+ stop("must have a variable on the left hand side of the formula")\n+ \n+ nact <- attr(X, "na.action")\n+ nvar <- ncol(X)-1\n+ strat <- attr(Terms,\'specials\')$stratify\n+\n+ getlab <- function(x, default)\n+ {\n+ lab <- attr(x, \'label\')\n+ if(!length(lab) || lab==\'\') default else lab\n+ }\n+ \n+ if(length(strat)) {\n+ if(method!=\'response\') \n+ stop(\'stratify only allowed for method="response"\')\n+\n+ temp <- untangle.specials(Terms,\'stratify\')\n+ strat.name <- var.inner(Terms)[temp$terms]\n+ strat <- if(length(temp$vars)==1) as.factor(X[[temp$vars]])\n+ else stratify(X[,temp$vars])\n+\n+ strat.label <- getlab(X[,temp$vars[1]], strat.name)\n+\n+ X[[temp$vars]] <- NULL # remove strata factors\n+ } else {\n+ strat <- factor(rep(\'\',nrow(X)))\n+ strat.name <- strat.label <- \'\'\n+ }\n+\n+ nstrat <- length(levels(strat))\n+ \n+ if(resp>0) {\n+ Y <- X[[resp]]\n+ yname <- as.character(attr(Terms,\'variables\'))[2]\n+ ylabel <- getlab(Y, yname)\n+\n+ if(!is.matrix(Y))\n+ Y <- matrix(Y, dimnames=list(names(Y),yname))\n+ } else {\n+ yname <- ylabel <- NULL\n+ }\n+ \n+ if(method!=\'reverse\') {\n+ if(!length(fun)) { # was missing(fun) 25May01\n+ fun <- function(y) apply(y, 2, mean)\n+\n+ uy <- unique(Y[!is.na(Y)]) # fixed 16Mar96\n+ r <- range(uy, na.rm=TRUE)\n+ funlab <- if(length(uy)==2 && r[1]==0 & r[2]==1) "Fraction"\n+ else "Mean"\n+\n+ funlab <- paste(funlab, \'of\', yname)\n+ } else if(is.character(fun) && fun==\'%\') {\n+ fun <- function(y)\n+ {\n+ stats <- 100*apply(y, 2, mean)\n+ names(stats) <- paste(dimnames(y)[[2]],\'%\')\n+ stats\n+ }\n+\n+ funlab <- paste(\'% of\', yname)\n+ }\n+\n+ ## Compute number of descriptive statistics per cell\n+ s <-\n+ if(inherits(Y,\'Surv\'))\n+ as.vector((1 * is.na(unclass(Y))) %*% rep(1, ncol(Y)) > 0)\n+ else\n+ ((if(is.character(Y)) Y==\'\'|Y==\'NA\'\n+ else is.na(Y)) %*%\n+ rep(1,ncol(Y))) > 0\n+ \n+ stats <- if(length(dim(Y))) fun(Y[!s,,drop=FALSE])\n+ '..b"r','Upper')\n+ quant\n+}\n+\n+\n+asNumericMatrix <- function(x)\n+{\n+ a <- attributes(x)\n+ k <- length(a$names)\n+ at <- vector('list', k); names(at) <- a$names\n+ for(i in 1:k) {\n+ xi <- x[[i]]\n+ ischar <- FALSE\n+ A <- attributes(xi)\n+ if(is.character(xi)) {\n+ ischar <- TRUE\n+ xi <- factor(xi)\n+ A <- c(A, attributes(xi))\n+ x[[i]] <- xi\n+ }\n+ A$dim <- A$names <- A$dimnames <- NULL\n+ A$ischar <- ischar\n+ at[[i]] <- A\n+ }\n+# assign('origAttributes', at, pos='.GlobalEnv')\n+ resp <- matrix(unlist(x), ncol=k,\n+ dimnames=list(a$row.names, a$names))\n+ attr(resp, 'origAttributes') <- at\n+ resp\n+}\n+\n+matrix2dataFrame <- function(x, at=attr(x, 'origAttributes'), restoreAll=TRUE)\n+{\n+ d <- dimnames(x)\n+ k <- length(d[[2]])\n+ w <- vector('list',k)\n+ nam <- names(w) <- d[[2]]\n+ sm <- storage.mode(x)\n+ \n+ for(i in 1:k) {\n+ a <- at[[nam[i]]]\n+ isc <- a$ischar\n+ if(!length(a))\n+ next\n+\n+ xi <- x[,i]\n+ names(xi) <- NULL\n+ if(restoreAll) {\n+ a$ischar <- NULL\n+ if(isc) {\n+ xi <- as.character(xi)\n+ a$levels <- NULL\n+ if(length(a$class)) a$class <- setdiff(a$class, 'factor')\n+ }\n+ if('factor' %in% a$class) storage.mode(xi) <- 'integer'\n+ ## R won't let something be assigned class factor by brute\n+ ## force unless it's an integer object\n+ attributes(xi) <- a\n+ } else {\n+ if(length(l <- a$label))\n+ label(xi) <- l\n+ \n+ if(length(u <- a$units))\n+ units(xi) <- u\n+ \n+ if(length(lev <- a$levels)) {\n+ xi <- factor(xi, 1:length(lev), lev)\n+ if(isc) xi <- as.character(xi)\n+ }\n+ }\n+ \n+ w[[i]] <- xi\n+ }\n+ rn <- d[[1]]\n+ if(!length(rn)) rn <- as.character(seq(along=xi))\n+ structure(w, class='data.frame', row.names=rn)\n+}\n+\n+\n+stripChart <- function(x, xlim, xlab='', pch=1,\n+ cex.labels=par('cex'), cex.points=.5,\n+ lcolor='gray',\n+ grid=FALSE)\n+{\n+ groups <- names(x)\n+ if(missing(xlim))\n+ xlim <- range(unlist(x),na.rm=TRUE)\n+ \n+ i <- integer(0)\n+\n+ if(grid) {\n+ lines <- llines;\n+ points <- lpoints;\n+ segments <- lsegments\n+ }\n+\n+ plot.new()\n+ \n+ mai <- omai <- par('mai')\n+ on.exit(par(mai=omai))\n+ mxlab <- .3+max(strwidth(groups, units='inches', cex=cex.labels))\n+ mai[2] <- mxlab\n+ par(mai=mai, new=TRUE)\n+ \n+ plot(xlim, c(.5,length(groups)+.5), xlim=xlim, xlab='', ylab='',\n+ axes=FALSE, type='n')\n+ box()\n+ mgp.axis(1, axistitle=xlab)\n+\n+ mtext(paste(groups,''), 2, 0, at=length(groups):1,\n+ adj=1, las=1, cex=cex.labels)\n+\n+ y <- 0\n+ abline(h = 1:length(groups), lty = 1, lwd=1, col=lcolor)\n+\n+ for(Y in length(groups):1) {\n+ y <- y + 1\n+ X <- x[[y]]\n+ if(length(X))\n+ points(X, rep(Y, length(X)), pch=pch)\n+ }\n+}\n+\n+conTestkw <- function(group,x) {\n+ st <- spearman2(group,x)\n+ list(P=st['P'], stat=st['F'],\n+ df=st[c('df1','df2')],\n+ testname=if(st['df1']==1)'Wilcoxon'\n+ else 'Kruskal-Wallis',\n+ statname='F', latexstat='F_{df}',\n+ plotmathstat='F[df]')\n+}\n+catTestchisq=function(tab) {\n+ st <-\n+ if(!is.matrix(tab) || nrow(tab) < 2 | ncol(tab) < 2)\n+ list(p.value=NA, statistic=NA, parameter=NA)\n+ else {\n+ rowcounts <- tab %*% rep(1, ncol(tab))\n+ tab <- tab[rowcounts > 0,]\n+ if(!is.matrix(tab)) \n+ list(p.value=NA, statistic=NA, parameter=NA)\n+ else chisq.test(tab, correct=FALSE)\n+ }\n+ list(P=st$p.value, stat=st$statistic,\n+ df=st$parameter,\n+ testname='Pearson', statname='Chi-square',\n+ latexstat='\\\\chi^{2}_{df}',\n+ plotmathstat='chi[df]^2')\n+}\n+ordTestpo=function(group, x) {\n+ require(rms)\n+ f <- rms::lrm(x ~ group)$stats\n+ list(P=f['P'], stat=f['Model L.R.'], df=f['d.f.'],\n+ testname='Proportional odds likelihood ratio',\n+ statname='Chi-square',latexstat='\\\\chi^{2}_{df}',\n+ plotmathstat='chi[df]^2')\n+}\n" |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/summaryM.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/summaryM.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,749 @@\n+summaryM <- function(formula, groups=NULL, data=NULL, subset,\n+ na.action=na.retain, \n+ overall=FALSE, continuous=10, na.include=FALSE,\n+ quant=c(0.025, 0.05, 0.125, 0.25, 0.375, 0.5, 0.625,\n+ 0.75, 0.875, 0.95, 0.975),\n+ nmin=100, test=FALSE,\n+ conTest=conTestkw, catTest=catTestchisq,\n+ ordTest=ordTestpo) {\n+\n+ formula <- Formula(formula)\n+ Y <- if(!missing(subset) && length(subset))\n+ model.frame(formula, data=data, subset=subset, na.action=na.action)\n+ else\n+ model.frame(formula, data=data, na.action=na.action)\n+\n+# mf <- match.call(expand.dots=FALSE)\n+# m <- match(c(\'formula\', \'data\', \'subset\', \'na.action\'), names(mf), 0)\n+# mf <- mf[c(1, m)]\n+# if(missing(na.action)) mf$na.action <- na.retain\n+# formula <- Formula(formula)\n+# mf[[1]] <- as.name(\'model.frame\')\n+# mf$formula <- formula\n+# Y <- eval(mf, parent.frame())\n+\n+ X <- model.part(formula, data=Y, rhs=1)\n+ Y <- model.part(formula, data=Y, lhs=1)\n+\n+ getlab <- function(x, default) {\n+ lab <- attr(x, \'label\')\n+ if(!length(lab) || lab==\'\') default else lab\n+ }\n+ \n+ if(length(X)) {\n+ xname <- names(X)\n+ if(length(xname) == 1 && ! length(groups)) groups <- xname\n+ if(! length(groups) && length(xname) > 1) {\n+ warnings(\'Must specify groups when > 1 right hand side variable is present.\\ngroups taken as first right hand variable.\')\n+ groups <- xname[1]\n+ }\n+ svar <- if(length(xname) == 1) factor(rep(\'.ALL.\', nrow(X)))\n+ else do.call(\'interaction\', list(X[setdiff(xname, groups)], sep=\' \'))\n+\n+ group <- X[[groups]]\n+ glabel <- getlab(group, groups)\n+ } else {\n+ svar <- factor(rep(\'.ALL.\', nrow(Y)))\n+ group <- rep(\'\', nrow(Y)) # y1 + y2 ~ 1, no grouping\n+ groups <- group.freq <- NULL\n+ glabel <- \'\'\n+ }\n+\n+ quants <- unique(c(quant, 0.025, 0.05, 0.125, 0.25, 0.375, 0.5,\n+ 0.625, 0.75, 0.875, 0.95, 0.975 ))\n+\n+ nv <- ncol(Y)\n+ nameY <- names(Y)\n+\n+ R <- list()\n+ for(strat in levels(svar)) {\n+ instrat <- svar == strat\n+ \n+ n <- integer(nv)\n+ type <- n\n+\n+ comp <- dat <- vector("list", nv)\n+ names(comp) <- names(dat) <- nameY\n+\n+ labels <- Units <- vector("character", nv)\n+ if(test) {\n+ testresults <- vector(\'list\', nv)\n+ names(testresults) <- names(comp)\n+ }\n+\n+ gr <- group[instrat]\n+ group.freq <- table(gr)\n+ group.freq <- group.freq[group.freq > 0]\n+ if(overall) group.freq <- c(group.freq, Combined=sum(group.freq))\n+ \n+ for(i in 1 : nv) {\n+ w <- Y[instrat, i]\n+ \n+ if(length(attr(w, "label")))\n+ labels[i] <- attr(w, "label")\n+ \n+ if(length(attr(w, \'units\'))) Units[i] <- attr(w, \'units\')\n+ \n+ if(!inherits(w, \'mChoice\')) {\n+ if(!is.factor(w) && !is.logical(w) &&\n+ length(unique(w[! is.na(w)])) < continuous) \n+ w <- as.factor(w)\n+ \n+ s <- !is.na(w)\n+\n+ if(na.include && !all(s) && length(levels(w))) {\n+ w <- na.include(w)\n+ levels(w)[is.na(levels(w))] <- \'NA\'\n+ s <- rep(TRUE, length(s))\n+ }\n+\n+ n[i] <- sum(s)\n+ w <- w[s]\n+ g <- gr[s, drop=TRUE]\n+ if(is.factor(w) || is.logical(w)) {\n+ tab <- table(w, g)\n+ if(test) {\n+ if(is.ordered(w))\n+ testresults[[i]] <- ordTest(g, w)\n+ else\n+ testresults[[i]] <- catTest(tab)\n+ }\n+ \n+ if(nrow(tab) == 1) {\n+ b <- casefold(dimnames(tab)[[1]], upper=TRUE)\n+ pres <- c(\'1\', \'Y\', \'YES\', \'PRESENT\')\n+ abse <- c(\'0\', \'N\', \'NO\', \'ABSENT\')\n+ jj <- match(b, pres, nomatch=0)\n+ if(jj > 0) bc <- abse[jj]\n+ else {\n+ jj <- match(b, abse, nomatch=0)\n+ if(jj > 0) bc <- pres[jj]\n+ '..b'e.bold,\n+ outer.size=outer.size, msdsize=msdsize,\n+ pdig=pdig, eps=eps, footnoteTest=gt1.test)\n+ \n+ cstats <- rbind(cstats, cs)\n+ if(length(auxc) && nrow(cstats) > 1)\n+ auxc <- c(auxc, rep(NA, nrow(cs)-1))\n+ }\n+ \n+ lab <- dimnames(cstats)[[1]]\n+ gl <- names(x$group.freq)\n+ if(!length(gl)) gl <- " "\n+ \n+ lab <- sedit(lab,c(" ","&"),c("~","\\\\&")) #was format(lab) 21Jan99\n+ lab <- latexTranslate(lab, greek=TRUE)\n+ gl <- latexTranslate(gl, greek=TRUE)\n+ extracolheads <-\n+ if(any(gl != " "))\n+ c(if(prn)\'\', paste(\'$N=\',x$group.freq,\'$\',sep=\'\'))\n+ else NULL\n+ \n+ if(length(test) && !all(prtest==\'none\')) {\n+ gl <- c(gl,\n+ if(length(prtest)==1 && prtest!=\'stat\')\n+ if(prtest==\'P\') \'P-value\'\n+ else prtest\n+ else \'Test Statistic\')\n+ \n+ if(length(extracolheads)) extracolheads <- c(extracolheads,\'\')\n+ }\n+ \n+ dimnames(cstats) <- list(NULL,gl) \n+ cstats <- data.frame(cstats, check.names=FALSE, stringsAsFactors=FALSE)\n+ \n+ col.just <- rep("c",length(gl))\n+ if(dcolumn && all(prtest!=\'none\') &&\n+ gl[length(gl)] %in% c(\'P-value\',\'Test Statistic\'))\n+ col.just[length(col.just)] <- \'.\'\n+ \n+ if(prn) {\n+ cstats <- data.frame(N=nn, cstats, check.names=FALSE,\n+ stringsAsFactors=FALSE)\n+ col.just <- c("r",col.just)\n+ }\n+ \n+ legend <- character()\n+ if(any(type==2))\n+ legend <- paste("{\\\\", outer.size, " $a$\\\\ }{", bld, "$b$\\\\ }{\\\\",\n+ outer.size, " $c$\\\\ } represent the lower quartile $a$, the median $b$, and the upper quartile $c$\\\\ for continuous variables.",\n+ if(prmsd) \'~~$x\\\\pm s$ represents $\\\\bar{X}\\\\pm 1$ SD.\'\n+ else \'\',\n+ sep="")\n+\n+ if(prn)\n+ legend <- c(legend, \'$N$\\\\ is the number of non--missing values.\')\n+ \n+ if(any(type == 1) && npct==\'numerator\')\n+ legend <- c(legend, \'Numbers after percents are frequencies.\')\n+ \n+ if(length(testUsed))\n+ legend <-c(legend,\n+ if(length(testUsed) == 1)\'\\\\noindent Test used:\'\n+ else \'\\\\indent Tests used:\',\n+ if(length(testUsed) == 1) paste(testUsed, \'test\')\n+ else paste(paste(\'\\\\textsuperscript{\\\\normalfont \',\n+ 1:length(testUsed),\'}\',testUsed,\n+ \' test\',sep=\'\'),collapse=\'; \'))\n+\n+ if(length(auxc)) {\n+ if(length(auxc) != nrow(cstats))\n+ stop(paste(\'length of auxCol (\',length(auxCol[[1]]),\n+ \') is not equal to number or variables in table (\',\n+ nv,\').\', sep=\'\'))\n+ auxcc <- format(auxc)\n+ auxcc[is.na(auxc)] <- \'\'\n+ cstats <- cbind(auxcc, cstats)\n+ nax <- names(auxCol)\n+ heads <- get2rowHeads(nax)\n+ names(cstats)[1] <- heads[[1]]\n+ if(length(col.just)) col.just <- c(\'r\', col.just)\n+ if(length(extracolheads)) extracolheads <- c(heads[2], extracolheads)\n+ }\n+ if(length(legend) && ! table.env)\n+ legend[1] <- paste(\'\\n\', legend[1], sep=\'\')\n+ laststrat <- strat == strats[length(strats)]\n+ noib <- is.logical(insert.bottom) && ! insert.bottom\n+ w <- latex(cstats, title=title, file=file, append=TRUE,\n+ caption=if(table.env)\n+ paste(caption, if(laststrat) paste(legend, collapse=\' \'),\n+ sep=\'. \'),\n+ rowlabel=rowlabel, table.env=table.env,\n+ col.just=col.just, numeric.dollar=FALSE, \n+ insert.bottom=if(! noib && laststrat && ! table.env) legend,\n+ rowname=lab, dcolumn=dcolumn,\n+ extracolheads=extracolheads, extracolsize=Nsize,\n+ insert.top=if(strat != \'.ALL.\') strat,\n+ ...)\n+ attr(w, \'legend\') <- legend\n+ }\n+ w\n+}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/summaryP.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/summaryP.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b"@@ -0,0 +1,250 @@\n+summaryP <- function(formula, data=NULL,\n+ subset=NULL, na.action=na.retain,\n+ exclude1=TRUE, sort=TRUE,\n+ asna=c('unknown', 'unspecified'), ...) {\n+ \n+ formula <- Formula(formula)\n+\n+ Y <- if(length(subset))\n+ model.frame(formula, data=data, subset=subset, na.action=na.action)\n+ else\n+ model.frame(formula, data=data, na.action=na.action)\n+ X <- model.part(formula, data=Y, rhs=1)\n+ Y <- model.part(formula, data=Y, lhs=1)\n+ nY <- NCOL(Y)\n+ nX <- NCOL(X)\n+ namY <- names(Y)\n+ if(nX == 0) X <- data.frame(x=rep(1, NROW(Y)))\n+ else {\n+ ## Remove observations with any values of X NA\n+ i <- apply(is.na(X), 1, any)\n+ if(any(i)) {\n+ X <- X[! i,, drop=FALSE]\n+ Y <- Y[! i,, drop=FALSE]\n+ }\n+ }\n+ ux <- unique(X)\n+ Z <- NULL\n+ n <- nrow(X)\n+ \n+ if(sort) {\n+ ## Compute marginal frequencies of all regular variables so can sort\n+ mfreq <- list()\n+ for(ny in namY) {\n+ y <- Y[[ny]]\n+ if(!inherits(y, 'ynbind') && !inherits(y, 'pBlock')) {\n+ if(length(asna) && (is.factor(y) || is.character(y)))\n+ y[y %in% asna] <- NA\n+ freq <- table(y)\n+ counts <- as.numeric(freq)\n+ names(counts) <- names(freq)\n+ mfreq[[ny]] <- - sort(- counts)\n+ }\n+ }\n+ }\n+ for(i in 1 : nrow(ux)) {\n+ j <- rep(TRUE, n)\n+ if(nX > 0) for(k in 1 : nX) j <- j & (X[[k]] == ux[i, k])\n+ for(k in 1 : nY) {\n+ ## y <- yx[[k]] doesn't work as attributes lost by [.data.frame\n+ y <- Y[[k]]\n+ y <- if(is.matrix(y)) y[j,, drop=FALSE] else y[j]\n+# y <- (Y[[k]])[j,, drop=FALSE]\n+ if(inherits(y, 'ynbind') || inherits(y, 'pBlock')) {\n+ overlab <- attr(y, 'label')\n+ labs <- attr(y, 'labels')\n+ z <- NULL\n+ for(iy in 1 : ncol(y)) {\n+ tab <- table(y[, iy])\n+ no <- as.numeric(sum(tab))\n+ d <- if(inherits(y, 'ynbind'))\n+ data.frame(var=overlab,\n+ val=labs[iy],\n+ freq=as.numeric(tab['TRUE']),\n+ denom=no)\n+ else\n+ data.frame(var=overlab,\n+ val=names(tab), # paste(labs[iy], names(tab)),\n+ freq=as.numeric(tab),\n+ denom=no)\n+ z <- rbind(z, d)\n+ }\n+ }\n+ else { # regular single column\n+ if(length(asna) && (is.factor(y) || is.character(y)))\n+ y[y %in% asna] <- NA\n+ tab <- table(y)\n+ ny <- namY[k]\n+ la <- label(y)\n+ if(la == '') la <- ny\n+ lev <- names(tab)\n+ mf <- mfreq[[ny]]\n+ no <- as.numeric(sum(tab))\n+ if(exclude1 && length(mf) == 2) {\n+ lowest <- names(which.min(mf))\n+ z <- data.frame(var=la, val=lowest,\n+ freq=as.numeric(tab[lowest]),\n+ denom=no)\n+ }\n+ else {\n+ if(sort) lev <- reorder(lev, (mfreq[[ny]])[lev])\n+ z <- data.frame(var=la, val=lev,\n+ freq=as.numeric(tab),\n+ denom=no)\n+ }\n+ }\n+ ## Add current X subset settings\n+ if(nX > 0) for(k in 1: nX) z[[names(ux)[k]]] <- ux[i, k]\n+ Z <- rbind(Z, z)\n+ }\n+ }\n+ structure(Z, class=c('summaryP', 'data.frame'), formula=formula,\n+ nX=nX, nY=nY)\n+}\n+\n+plot.summaryP <-\n+ function(x, formula=NULL, groups=NULL, xlim=c(-.05, 1.05), text.at=NULL,\n+ cex.values=0.5,\n+ key=list(columns=length(groupslevels),\n+ x=.75, y=-.04, cex=.9,\n+ col=trellis.par.get('superpose.symbol')$col, corner=c(0,1)),\n+ outerlabels=TRUE, autoarrange=TRUE, ...)\n+{\n+ X <- x\n+ at <- attributes(x)\n+ Form <- at$formula\n+ nX <- at$nX\n+ nY <- at$nY\n+\n+ groupslevels <- if(length(groups)) levels(x[[groups]])\n+ condvar <- setdiff(names(X), c('val', 'freq', 'denom', groups))\n+ ## Reorder condvar in de"..b'ngest.string)\n+ xpos <- unit(1, \'npc\') - unit(1, \'mm\')\n+ txt <- if(length(groups)) {\n+ groups <- groups[subscripts]\n+ tx <- \'\'\n+ ig <- 0\n+ xpos <- xpos - length(levels(groups)) * length.longest\n+ for(g in levels(groups)) {\n+ ig <- ig + 1\n+ i <- groups == g\n+ fr <- paste(x[i], denom[i], sep=\'/\')\n+ xpos <- xpos + length.longest\n+ grid.text(fr, xpos, unit(y, \'native\') - unit(1, \'mm\'),\n+ just=c(\'right\',\'top\'), gp=gpar(cex=cex.values, col=col[ig]))\n+ }\n+ }\n+ else {\n+ fr <- paste(x, denom, sep=\'/\')\n+ grid.text(fr, xpos, unit(y, \'native\') - unit(1, \'mm\'),\n+ gp=gpar(cex=cex.values, col=col[1]), just=c(\'right\',\'top\'))\n+ }\n+ }\n+ }\n+\n+ scal <- list(y=\'free\', rot=0)\n+ scal$x <- if(length(text.at)) {\n+ at <- pretty(xlim)\n+ list(limits=range(c(xlim, text.at)), at=at[at >= -0.0001 & at <= 1.0001])\n+ } else list(limits=xlim)\n+ d <- if(!length(groups))\n+ dotplot(form, data=X, scales=scal, panel=pan,\n+ xlab=\'Proportion\', ...)\n+ else eval(parse(text=\n+ sprintf("dotplot(form, groups=%s, data=X, scales=scal, panel=pan, auto.key=key, xlab=\'Proportion\', ...)", groups) ))\n+\n+# if(outerlabels && ((nX - length(groups) + 1 == 2) ||\n+# length(dim(d)) == 2)) d <- useOuterStrips(d)\n+ if(length(dim(d)) == 2) d <- useOuterStrips(d)\n+ ## Avoid wasting space for vertical variables with few levels\n+ if(condvar[length(condvar)] == \'var\') {\n+ vars <- levels(X$var)\n+ nv <- length(vars)\n+ h <- integer(nv)\n+ for(i in 1 : nv) h[i] <- length(unique((X$val[X$var == vars[i]])))\n+ d <- resizePanels(d, h = h + 1)\n+ }\n+ d\n+}\n+\n+latex.summaryP <- function(object, groups=NULL, file=\'\', round=3,\n+ size=NULL, append=TRUE, ...) {\n+ class(object) <- \'data.frame\'\n+ if(! append) cat(\'\', file=file)\n+\n+ p <- ifelse(object$denom == 0, \'\',\n+ format(round(object$freq / object$denom, round)))\n+ object$y <- paste(p, \' {\\\\scriptsize$\\\\frac{\',\n+ format(object$freq), \'}{\', format(object$denom),\n+ \'}$}\', sep=\'\')\n+ object$freq <- object$denom <- NULL\n+\n+ stratvar <- setdiff(names(object), c(\'var\', \'val\', \'y\', groups))\n+ svar <- if(! length(stratvar)) as.factor(rep(\'\', nrow(object)))\n+ else {\n+ if(length(stratvar) == 1) object[[stratvar]]\n+ else do.call(\'interaction\', list(object[stratvar], sep=\' \'))\n+ }\n+\n+ object$stratvar <- svar\n+ object <- object[, c(\'var\', \'val\', \'y\', groups, \'stratvar\')]\n+\n+ nl <- 0\n+\n+ slev <- levels(svar)\n+ nslev <- length(slev)\n+ for(i in 1 : nslev) {\n+ \n+ if(nslev > 1) cat(\'\\n\\\\vspace{1ex}\\n\\n\\\\textbf{\', slev[i],\n+ \'}\\n\\\\vspace{1ex}\\n\\n\', sep=\'\', file=file, append=TRUE)\n+ x <- object[svar == slev[i], colnames(object) != \'stratvar\']\n+ if(length(groups)) {\n+ r <- reshape(x, timevar=groups, direction=\'wide\',\n+ idvar=c(\'var\', \'val\'))\n+ ## reshape does not respect order of levels of factor variables; reorder\n+ lev <- levels(x[[groups]])\n+ r <- r[c(\'var\', \'val\', paste(\'y\', lev, sep=\'.\'))]\n+ \n+ nl <- length(lev)\n+ var <- unique(as.character(r$var))\n+ w <- latex(r[colnames(r) != \'var\'],\n+ table.env=FALSE, file=file, append=TRUE,\n+ rowlabel=\'\', rowname=rep(\'\', nrow(r)),\n+ rgroup=levels(r$var), n.rgroup=as.vector(table(r$var)),\n+ size=size,\n+ colheads=c(\' \', lev),\n+ center=\'none\')\n+ }\n+ else {\n+ w <- latex(x[colnames(x) != \'var\'],\n+ table.env=FALSE, file=file, append=TRUE,\n+ rowlabel=\'\', rowname=rep(\'\', nrow(x)),\n+ rgroup=levels(x$var), n.rgroup=as.vector(table(x$var)),\n+ size=size, colheads=c(\' \', \' \'), center=\'none\')\n+ }\n+ }\n+ attr(w, \'ngrouplevels\') <- nl\n+ w\n+}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/summaryRc.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/summaryRc.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,147 @@ +summaryRc <- + function(formula, data=NULL, subset=NULL, na.action=NULL, + fun=function(x) x, na.rm=TRUE, + ylab=NULL, ylim = NULL, xlim=NULL, nloc=NULL, datadensity=NULL, + quant = c(0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95), + quantloc = c('top', 'bottom'), cex.quant=.6, srt.quant=0, + bpplot = c('none', 'top', 'top outside', 'top inside', 'bottom'), + height.bpplot = 0.08, + trim=NULL, test=FALSE, vnames=c('labels','names'), + ...) +{ + call <- match.call() + quantloc <- match.arg(quantloc) + vnames <- match.arg(vnames) + bpplot <- match.arg(bpplot) + if(bpplot == 'top') bpplot <- 'top inside' + X <- match.call(expand.dots=FALSE) + X$fun <- X$na.rm <- X$ylim <- X$xlim <- X$ylab <- X$nloc <- X$datadensity <- + X$quant <- X$quantloc <- X$cex.quant <- X$srt.quant <- X$trim <- X$test <- + X$vnames <- X$bpplot <- X$height.bpplot <- X$... <- NULL + if(missing(na.action)) X$na.action <- na.retain + + Terms <- if(missing(data)) terms(formula, 'stratify') + else terms(formula, 'stratify', data=data) + + X$formula <- Terms + X[[1]] <- as.name("model.frame") + + X <- eval(X, sys.parent()) + + Terms <- attr(X, "terms") + resp <- attr(Terms, "response") + + nact <- attr(X, "na.action") + nvar <- ncol(X) - 1 + strat <- attr(Terms, 'specials')$stratify + + getlab <- function(x, default) { + if(vnames == 'names') return(default) + lab <- attr(x, 'label') + if(!length(lab) || lab=='') default else lab + } + + if(length(strat)) { + temp <- untangle.specials(Terms,'stratify') + strat.name <- var.inner(Terms)[temp$terms] + strat <- if(length(temp$vars) == 1) as.factor(X[[temp$vars]]) + else stratify(X[,temp$vars]) + + strat.label <- getlab(X[, temp$vars[1]], strat.name) + X[[temp$vars]] <- NULL # remove strata factors + } + Y <- X[[resp]] + yname <- as.character(attr(Terms, 'variables'))[2] + ylabel <- if(length(ylab)) ylab else getlab(Y, yname) + + X[[resp]] <- NULL # remove response var + Y <- as.matrix(Y) + s <- rowSums(is.na(Y)) == ncol(Y) + nmissy <- sum(s) + if(nmissy) { + X <- X[!s,, drop=FALSE] + Y <- Y[!s,, drop=FALSE] + strat <- strat[!s] + } + + pl <- function(x, y, strat=NULL, quant, bpplot, width.bpplot, + xlab='', ylab='', + ylim=NULL, xlim=NULL, fun=function(x) x, ...) { + n <- sum(!is.na(x)) + group <- if(length(strat)) strat else rep(1, length(x)) + if(!length(trim)) trim <- if(n > 200) 10 / n else 0 + if(!length(xlim)) { + xlim <- if(trim == 0) range(x, na.rm=TRUE) + else quantile(x, c(trim, 1 - trim), na.rm=TRUE) + } + a <- list(x=x, y=y, xlab=xlab, ylab=ylab, xlim=xlim, trim=0, group=group, + datadensity=if(length(datadensity)) datadensity + else length(strat) > 0, ...) + if(length(fun)) a$fun <- fun + if(length(ylim)) a$ylim <- ylim + z <- do.call('plsmo', a) + usr <- par('usr') + xl <- usr[1:2] + yl <- usr[3:4] + if(! (length(nloc) && is.logical(nloc) && !nloc)) { + if(length(nloc)) { + xx <- nloc[[1]] + yy <- nloc[[2]] + xx <- xl[1] + xx * diff(xl) + yy <- yl[1] + yy * diff(yl) + w <- list(x=xx, y=yy) + } + else { + xs <- unlist(lapply(z, function(x)x$x)) + ys <- unlist(lapply(z, function(x)x$y)) + w <- largest.empty(xs, ys, method='area') + } + text(w, paste('n=', n, sep=''), cex=.75, font=3, adj=.5) + } + # Compute user y-units per inch + u <- diff(yl) / par('fin')[2] + if(bpplot != 'none') { + h <- u * height.bpplot + yy <- switch(bpplot, + 'top outside' = yl[2] + h/2 + u*.11, + 'top inside' = yl[2] - h/2 - u*.11, + bottom = yl[1] + h/2 + u*.11) + panel.bpplot(x, yy, nogrid=TRUE, pch=19, cex.means=.6, height=h) + } + else + if(length(quant)) { + h <- u * .15 + qu <- quantile(x, quant, na.rm=TRUE) + names(qu) <- as.character(quant) + qu <- pooleq(qu) + yq <- if(quantloc == 'top') yl[2] else yl[1] + arrows(qu, yq + h, qu, yq, col='blue', length=.05, xpd=NA) + if(cex.quant > 0) + text(qu, yq + 1.4 * h, names(qu), adj=if(srt.quant == 0) .5 else 0, + cex=cex.quant, srt=srt.quant, xpd=NA) + } + ## text(xl[2], yl[2] + h/4, paste('n=', n, sep=''), + ## cex=.75, font=3, adj=c(1,0), xpd=NA) + } + + ## Find all ties in quantiles and build combined labels + pooleq <- function(x) { + w <- tapply(names(x), x, paste, collapse=', ') + x <- as.numeric(names(w)) + names(x) <- w + x + } + + i <- 0 + nams <- names(X) + for(v in nams) { + i <- i + 1 + x <- X[[v]] + xlab <- getlab(x, nams[i]) + units <- if(length(l <- attr(x,'units'))) l else '' + xlab <- labelPlotmath(xlab, units) + pl(x, Y, strat=strat, quant=quant, bpplot=bpplot, + height.bpplot=height.bpplot, + xlab=xlab, ylab=ylabel, ylim=ylim, xlim=xlim[[v]], ...) + } +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/summaryS.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/summaryS.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b"@@ -0,0 +1,468 @@\n+summaryS <- function(formula, fun=NULL,\n+ data=NULL, subset=NULL, na.action=na.retain,\n+ continuous=10,\n+ ...) {\n+ \n+ formula <- Formula(formula)\n+\n+ Y <- if(length(subset))\n+ model.frame(formula, data=data, subset=subset, na.action=na.action)\n+ else\n+ model.frame(formula, data=data, na.action=na.action)\n+ X <- model.part(formula, data=Y, rhs=1)\n+ Y <- model.part(formula, data=Y, lhs=1)\n+\n+ nY <- NCOL(Y)\n+ nX <- NCOL(X)\n+ namY <- names(Y)\n+ namX <- names(X)\n+ if(nX == 0) X <- data.frame(x=rep(1, NROW(Y)))\n+ ux <- unique(X)\n+ Z <- NULL\n+ n <- nrow(X)\n+ \n+ g <- function(y) {\n+ y <- y[! is.na(y)]\n+ if(is.character(y)) c(NA, NA)\n+ else if(is.factor(y)) c(1, length(levels(y)))\n+ else if(length(unique(y)) < continuous) range(y)\n+ else if(! is.matrix(y)) quantile(y, c(0.01, 0.99))\n+ }\n+ ylim <- lapply(Y, g)\n+\n+ w <- reshape(cbind(X, Y), direction='long', v.names='y',\n+ varying=namY, times=namY, timevar='yvar')\n+\n+ if(is.Surv(Y[[1]])) {\n+ at <- attributes(Y[[1]])\n+ at <- at[setdiff(names(at), c('dim', 'dimnames'))]\n+ attributes(w$y) <- c(attributes(w$y), at)\n+ }\n+ w$yvar <- factor(w$yvar, namY)\n+ funlabel <- NULL\n+ if(length(fun)) {\n+ by <- c('yvar', if(length(namX)) namX else 'x')\n+ y <- w$y\n+ w <- summarize(y, w[by], fun, type='matrix', keepcolnames=TRUE)\n+ funlabel <- if(is.matrix(w$y)) colnames(w$y)[1]\n+ }\n+\n+ g <- function(x) if(is.character(x) || is.factor(x))\n+ 'categorical' else 'numeric'\n+ xlabels <- sapply(X, label)\n+ xlabels <- ifelse(xlabels == '', names(xlabels), xlabels)\n+ ylabels <- sapply(Y, label)\n+ ylabels <- ifelse(ylabels == '', names(ylabels), ylabels)\n+\n+ structure(w, class=c('summaryS', 'data.frame'), formula=formula, fun=fun,\n+ xnames=names(X), xlabels=xlabels, xunits=sapply(X, units),\n+ xtype=sapply(X, g),\n+ ynames=namY, ylabels=ylabels, yunits=sapply(Y, units),\n+ ylim=ylim, funlabel=funlabel)\n+}\n+\n+plot.summaryS <-\n+ function(x, formula=NULL, groups=NULL, panel=NULL,\n+ paneldoesgroups=FALSE, datadensity=NULL, ylab='',\n+ funlabel=NULL, textonly='n', textplot=NULL, digits=3, custom=NULL,\n+ xlim=NULL, ylim=NULL, cex.strip=1, cex.values=0.5, pch.stats=NULL,\n+ key=list(columns=length(groupslevels),\n+ x=.75, y=-.04, cex=.9,\n+ col=trellis.par.get('superpose.symbol')$col, corner=c(0,1)),\n+ outerlabels=TRUE, autoarrange=TRUE, scat1d.opts=NULL, ...)\n+{\n+ xtype <- attr(x, 'xtype')\n+ nn <- sum(xtype == 'numeric')\n+ if(nn > 1) stop('does not handle more than one numeric continuous x')\n+ X <- x\n+ at <- attributes(x)\n+ Form <- at$formula\n+ nX <- at$nX\n+ nY <- at$nY\n+ ylabels <- at$ylabels\n+ yunits <- at$yunits\n+ ylims <- at$ylim\n+ xnames <- at$xnames\n+ xlabels <- at$xlabels\n+ xunits <- at$xunits\n+ fun <- at$fun\n+ funlabel <- if(length(at$funlabel)) at$funlabel else funlabel\n+ Panel <- panel\n+ \n+ ptype <- if(length(fun)) { # used to always be 'dot'\n+ if(length(Panel)) 'xy.special' else 'dot'\n+ } else 'xy'\n+ if(ptype %in% c('xy', 'xy.special') && ! any(xtype == 'numeric'))\n+ stop('must have a numeric x variable to make x-y plot')\n+\n+ groupslevels <- if(length(groups)) levels(x[[groups]])\n+ condvar <- xnames[xtype == 'categorical']\n+ ## Reorder condvar in descending order of number of levels\n+ numu <- function(x) if(is.factor(x)) length(levels(x))\n+ else length(unique(x[! is.na(x)]))\n+\n+ if(autoarrange && length(condvar) > 1) {\n+ nlev <- sapply(X[condvar], numu)\n+ condvar <- condvar[order(nlev)]\n+ }\n+ \n+ form <- if(length(formula)) formula\n+ else {\n+ ## Non-groups conditioning variables\n+ ngcond <- setdiff(condvar, groups)\n+ ## Collapsed non-group conditioning variables\n+ ccv <- paste('|', paste(c(ngcond, 'yvar'), collapse=' * ')"..b'er(xu), c(\'y1\', \'y2\', \'se1\', \'se2\')))\n+ x1 <- as.character(x)[j1]\n+ x2 <- as.character(x)[j2]\n+ Y[x1, \'y1\' ] <- y [j1]\n+ Y[x1, \'se1\'] <- se[j1]\n+ Y[x2, \'y2\' ] <- y [j2]\n+ Y[x2, \'se2\'] <- se[j2]\n+ ymid <- (Y[, \'y1\'] + Y[, \'y2\']) / 2.\n+ halfwidthci <- qnorm(0.975) * sqrt(Y[, \'se1\']^2 + Y[, \'se2\']^2)\n+ col <- adjustcolor(\'black\', alpha.f=0.7)\n+ grid.segments(xu, ymid - 0.5 * halfwidthci,\n+ xu, ymid + 0.5 * halfwidthci,\n+ default.units=\'native\',\n+ gp=gpar(col=col, lwd=1.5))\n+ }\n+}\n+\n+medvPanel <-\n+ function(x, y, subscripts, groups=NULL, violin=TRUE, quantiles=FALSE,\n+ ...) {\n+ gp <- length(groups)\n+ plot.line <-\n+ trellis.par.get(if(gp) "superpose.line" else "plot.line")\n+ sym <- trellis.par.get(if(gp) "superpose.symbol" else "plot.symbol")\n+\n+ quant <- function(y) {\n+ probs <- c(0.05, 0.125, 0.25, 0.375)\n+ probs <- sort(c(probs, 1. - probs))\n+ y <- y[! is.na(y)]\n+ if(length(y) < 3) {\n+ if(quantiles) {\n+ w <- c(median(y), rep(NA, 9), length(y))\n+ names(w) <- c(\'Median\', format(probs), \'se\', \'n\')\n+ }\n+ else w <- c(Median=median(y), se=NA, n=length(y))\n+ return(w)\n+ }\n+ w <- if(quantiles) hdquantile(y, probs)\n+ m <- hdquantile(y, 0.5, se=TRUE)\n+ se <- as.numeric(attr(m, \'se\'))\n+ c(Median=as.numeric(m), w, se=se, n=length(y))\n+ }\n+\n+ denpoly <- function(x, y, col, n=50, pos, ...) {\n+ y <- y[! is.na(y)]\n+ n <- length(y)\n+ if(n < 2) return()\n+ den <- density(y, n=n, ...)\n+ d <- den$y\n+ y <- den$x\n+ ## Scale density of 0-3 mm\n+ d <- 3 * d / max(d)\n+ d <- c(d, d[length(d)])\n+ mm <- convertUnit(unit(d, \'mm\'), \'mm\', typeFrom=\'dimension\')\n+ kol <- if(n < 5 ) adjustcolor(col, alpha.f=0.2)\n+ else if(n < 10) adjustcolor(col, alpha.f=0.4)\n+ else col\n+ grid.polygon(y=unit(c(y, y[1]), \'native\'),\n+ x=if(pos == \'left\') unit(x, \'native\') - mm\n+ else unit(x, \'native\') + mm,\n+ gp=gpar(col=FALSE, fill=kol))\n+ }\n+ \n+\n+ gr <- if(gp) groups[subscripts] else factor(rep(\'\', length(x)))\n+ lev <- levels(gr)\n+ W <- NULL\n+ for(i in 1 : length(lev)) {\n+ j <- which(gr == levels(gr)[i])\n+ xj <- x[j]\n+ yj <- y[j]\n+ w <- summarize(yj, xj, quant, type=\'matrix\', keepcolnames=TRUE)\n+ Y <- w$yj\n+ xu <- w$xj\n+ lpoints(xu, Y[,\'Median\'], cex=sym$cex[i], pch=sym$pch[i], col=sym$col[i],\n+ alpha=sym$alpha[i])\n+ llines(xu, Y[,\'Median\'], col=plot.line$col[i], lty=plot.line$lty[i],\n+ lwd=plot.line$lwd[i], alpha=plot.line$alpha)\n+ col <- plot.line$col[i]\n+ if(violin) for(xx in sort(unique(xj)))\n+ denpoly(xx, yj[xj == xx],\n+ col=adjustcolor(plot.line$col[i], alpha.f=0.4),\n+ pos=c(\'left\', \'right\')[i])\n+ \n+ if(quantiles)\n+ multLines(xu, Y[, colnames(Y) %nin% c(\'se\', \'n\'), drop=FALSE],\n+ col=plot.line$col[i],\n+ lty=plot.line$lty[i],\n+ lwd=plot.line$lwd[i],\n+ grid=TRUE, pos=c(\'left\', \'right\')[i])\n+ W <- rbind(W, cbind(gr=levels(gr)[i], w))\n+ }\n+ if(length(lev) == 2) {\n+ x <- W$xj\n+ xu <- sort(unique(W$xj))\n+ j1 <- W$gr == lev[1]\n+ j2 <- W$gr == lev[2]\n+ Y <- matrix(NA, nrow=length(xu), ncol=4,\n+ dimnames=list(as.character(xu), c(\'y1\', \'y2\', \'se1\', \'se2\')))\n+ x1 <- as.character(x)[j1]\n+ x2 <- as.character(x)[j2]\n+ Y[x1, \'y1\' ] <- W$yj[j1, \'Median\']\n+ Y[x1, \'se1\'] <- W$yj[j1, \'se\']\n+ Y[x2, \'y2\' ] <- W$yj[j2, \'Median\']\n+ Y[x2, \'se2\'] <- W$yj[j2, \'se\']\n+ ymid <- (Y[, \'y1\'] + Y[, \'y2\']) / 2.\n+ halfwidthci <- qnorm(0.975) * sqrt(Y[, \'se1\']^2 + Y[, \'se2\']^2)\n+ col <- adjustcolor(\'black\', alpha.f=0.7)\n+ grid.segments(xu, ymid - 0.5 * halfwidthci,\n+ xu, ymid + 0.5 * halfwidthci,\n+ default.units=\'native\',\n+ gp=gpar(col=col, lwd=1.5))\n+ }\n+}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/symbol.freq.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/symbol.freq.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,133 @@ +## marginals applies only to symbol="therm", orig.scale to symbol="circle" + +symbol.freq <- function(x, y, symbol=c("thermometer","circle"), + marginals=FALSE, orig.scale=FALSE, + inches=.25, width=.15, subset, srtx=0, ...) +{ + symbol <- match.arg(symbol) + if(missing(subset)) + subset <- rep(TRUE, length(x)) + + if(!is.logical(subset)) { + s <- rep(FALSE,length(x)) + s[subset] <- FALSE + subset <- s + } + + xlab <- attr(x,'label') + if(!length(xlab)) + xlab <- as.character(substitute(x)) + + ylab <- attr(y,'label') + if(!length(ylab)) + ylab <- as.character(substitute(y)) + + s <- !(is.na(x) | is.na(y)) & subset + x <- x[s] + y <- y[s] + f <- table(x, y) + dx <- dimnames(f)[[1]] + dy <- dimnames(f)[[2]] + if(orig.scale) + xp <- as.numeric(dimnames(f)[[1]]) + else + xp <- 1:length(dimnames(f)[[1]]) + + xp1 <- length(xp)+1 + if(orig.scale) + yp <- as.numeric(dimnames(f)[[2]]) + else + yp <- 1:length(dimnames(f)[[2]]) + + yp1 <- length(yp)+1 + m <- nrow(f) * ncol(f) + xx <- single(m) + yy <- single(m) + zz <- single(m) + k <- 0 + for(i in 1:nrow(f)) { + for(j in 1:ncol(f)) { + k <- k + 1 + xx[k] <- xp[i] + yy[k] <- yp[j] + if(f[i, j] > 0) + zz[k] <- f[i, j] + else zz[k] <- NA + } + } + + maxn <- max(f) + n <- 10^round(log10(maxn)) + if(marginals) { + xx <- c(xx, rep(xp1, length(yp))) + yy <- c(yy, yp) + zz <- c(zz, table(y)/2) + xx <- c(xx, xp) + yy <- c(yy, rep(yp1, length(xp))) + zz <- c(zz, table(x)/2) + xx <- c(xx, xp1) + yy <- c(yy, yp1) + zz <- c(zz, n) + } + + if(symbol=="circle") { + ## zz <- inches*sqrt(zz/maxn) + zz <- sqrt(zz) + if(orig.scale) + symbols(xx,yy,circles=zz,inches=inches, + smo=.02,xlab=xlab,ylab=ylab,...) + else + symbols(xx,yy,circles=zz,inches=inches,smo=.02, + xlab=xlab,ylab=ylab,axes=FALSE,...) + + title(sub=paste("n=",sum(s),sep=""),adj=0) + if(marginals) { + axis(1, at = 1:xp1, + labels = c(dx, "All/2"), srt=srtx, + adj=if(srtx>0)1 + else .5) + + axis(2, at = 1:yp1, + labels = c(dy, "All/2"),adj=1) + } else { # if(!orig.scale) { + axis(1, at=xp, labels=dx, srt=srtx, + adj=if(srtx>0)1 + else .5) + + axis(2, at=yp, labels=dy) + } + + return(invisible()) + } + + zz <- cbind(rep(width,length(zz)), inches*zz/maxn, rep(0,length(zz))) + symbols(xx,yy,thermometers=zz,inches=FALSE, + axes=FALSE,xlab=xlab,ylab=ylab,...) + title(sub=paste("n=",sum(s),sep=""),adj=0) + if(marginals) { + text(xp1-width, yp1, n, adj=1, cex=.5) + axis(1, at = 1:xp1, + labels = c(dx, "All/2"), srt=srtx, + adj=if(srtx>0)1 + else .5) + + axis(2, at = 1:yp1, + labels = c(dy, "All/2"),adj=1) + abline(h=yp1-.5, lty=2) + abline(v=xp1-.5, lty=2) + } else { + axis(1, at=xp, labels=dx, srt=srtx, + adj=if(srtx>0)1 + else .5) + + axis(2, at=yp, labels=dy) + cat("click left mouse button to position legend\n") + xy <- locator(1) + symbols(xy$x, xy$y, thermometers=cbind(width,inches*n/maxn,0), + inches=FALSE,add=TRUE,xlab=xlab,ylab=ylab) + text(xy$x-width, xy$y, n,adj=1,cex=.5) + } + + box() + invisible() +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/sys.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/sys.s Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,6 @@ +## Improvements by Sebastian Weber <Sebastian.Weber@aventis.com> 26Aug03 + +sys <- function(command, text=NULL, output=TRUE) { + cmd <- if(length(text)) paste(command, text) else command + system(cmd, intern=output) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/t.test.cluster.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/t.test.cluster.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,140 @@ +t.test.cluster <- function(y, cluster, group, conf.int=.95) +{ + ## See: + ## Donner A, Birkett N, Buck C, Am J Epi 114:906-914, 1981. + ## Donner A, Klar N, J Clin Epi 49:435-439, 1996. + ## Hsieh FY, Stat in Med 8:1195-1201, 1988. + + group <- as.factor(group) + cluster <- as.factor(cluster) + s <- !(is.na(y)|is.na(cluster)|is.na(group)) + y <- y[s]; + cluster <- cluster[s]; + group <- group[s] + n <- length(y) + + if(n<2) + stop("n<2") + + gr <- levels(group) + if(length(gr)!=2) + stop("must have exactly two treatment groups") + + n <- table(group) + nc <- tapply(cluster, group, function(x)length(unique(x))) + bar <- tapply(y, group, mean) + + u <- unclass(group) + y1 <- y[u==1]; + y2 <- y[u==2] + + c1 <- factor(cluster[u==1]); + c2 <- factor(cluster[u==2]) #factor rids unused lev + + b1 <- tapply(y1, c1, mean); + b2 <- tapply(y2, c2, mean) + + m1 <- table(c1); + m2 <- table(c2) + + if(any(names(m1)!=names(b1))) + stop("logic error 1") + + if(any(names(m2)!=names(b2))) + stop("logic error 2") + + if(any(m2 < 2)) + stop(paste('The following clusters contain only one observation:', + paste(names(m2[m2 < 2]), collapse=' '))) + + M1 <- mean(y1); + M2 <- mean(y2) + + ssc1 <- sum(m1*((b1-M1)^2)); + ssc2 <- sum(m2*((b2-M2)^2)) + + if(nc[1]!=length(m1)) + stop("logic error 3") + + if(nc[2]!=length(m2)) + stop("logic error 4") + + df.msc <- sum(nc)-2 + msc <- (ssc1+ssc2)/df.msc + v1 <- tapply(y1,c1,var); + v2 <- tapply(y2,c2,var) + + ssw1 <- sum((m1-1)*v1); + ssw2 <- sum((m2-1)*v2) + + df.mse <- sum(n)-sum(nc) + mse <- (ssw1+ssw2)/df.mse + na <- (sum(n)-(sum(m1^2)/n[1]+sum(m2^2)/n[2]))/(sum(nc)-1) + rho <- (msc-mse)/(msc+(na-1)*mse) + r <- max(rho, 0) + C1 <- sum(m1*(1+(m1-1)*r))/n[1] + C2 <- sum(m2*(1+(m2-1)*r))/n[2] + v <- mse*(C1/n[1]+C2/n[2]) + v.unadj <- mse*(1/n[1]+1/n[2]) + de <- v/v.unadj + dif <- diff(bar) + se <- sqrt(v) + zcrit <- qnorm((1+conf.int)/2) + cl <- c(dif-zcrit*se, dif+zcrit*se) + z <- dif/se + P <- 2*pnorm(-abs(z)) + + + stats <- + matrix(NA, nrow=20, ncol=2, + dimnames=list(c("N","Clusters","Mean", + "SS among clusters within groups", + "SS within clusters within groups", + "MS among clusters within groups","d.f.", + "MS within clusters within groups","d.f.", + "Na","Intracluster correlation", + "Variance Correction Factor","Variance of effect", + "Variance without cluster adjustment","Design Effect", + "Effect (Difference in Means)", + "S.E. of Effect",paste(format(conf.int),"Confidence limits"), + "Z Statistic","2-sided P Value"), gr)) + + stats[1,] <- n + stats[2,] <- nc + stats[3,] <- bar + stats[4,] <- c(ssc1, ssc2) + stats[5,] <- c(ssw1, ssw2) + stats[6,1] <- msc + stats[7,1] <- df.msc + stats[8,1] <- mse + stats[9,1] <- df.mse + stats[10,1] <- na + stats[11,1] <- rho + stats[12,] <- c(C1, C2) + stats[13,1] <- v + stats[14,1] <- v.unadj + stats[15,1] <- de + stats[16,1] <- dif + stats[17,1] <- se + stats[18,] <- cl + stats[19,1] <- z + stats[20,1] <- P + + attr(stats,'class') <- "t.test.cluster" + stats +} + +print.t.test.cluster <- function(x, digits, ...) +{ + ## if(!missing(digits)).Options$digits <- digits 6Aug00 + if(!missing(digits)) { + oldopt <- options(digits=digits) + on.exit(options(oldopt)) + } + + cstats <- t(apply(x,1,format)) + ## cstats <- format(x) + attr(cstats,'class') <- NULL + cstats[is.na(x)] <- "" + invisible(print(cstats, quote=FALSE)) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/tabulr.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/tabulr.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,167 @@ +tabulr <- function(formula, data=NULL, nolabel=NULL, nofill=NULL, ...) { + ## require(gsubfn) || stop('package gsubfn not installed') + if(!length(data)) data <- environment(formula) + else if(is.list(data)) data <- list2env(data, parent=environment(formula)) + + ## f <- as.character(deparse(formula)) + lab <- function(x, hfill=TRUE) { + ## x <- gsub('^ +', '', x) + ## x <- gsub(' +$', '', x) + l <- labelLatex(get(x, envir=data), default=x, double=TRUE, hfill=hfill) + paste("Heading('", l, "')*", x, sep='') + } + lab <- function(x) { + x <- deparse(x) + if(x == 'trio') return('table_trio') + if(x == 'freq') return('table_freq') + if(x == 'N') return('Heading()*table_N') + if(! (exists(x, envir=data, mode='numeric') | + exists(x, envir=data, mode='character'))) return(x) + if(length(nolabel) && x %in% all.vars(nolabel)) return(x) + xval <- get(x, envir=data) + if(label(xval) == '') return(x) + l <- labelLatex(xval, double=FALSE, + hfill=!length(nofill) || x %nin% all.vars(nofill)) + paste("Heading('", l, "')*", x, sep='') + } + +# f <- gsubfn("\\.\\((.*?)\\)", ~ lab(x), f) +# f <- gsubfn("\\.n\\((.*?)\\)", ~ lab(x, hfill=FALSE), f) +# f <- gsubfn("\\.n\\((.*?)\\)", ~ lab(x, hfill=FALSE), f) +# f <- gsubfn('([ \\(]+)l \\* *([A-Za-z\\_\\.][A-Z0-9a-z\\_\\.]*?)', +# ~ paste(x, lab(y), sep=''), f) +# f <- gsubfn('([ \\(]+)l\\. +\\* *([A-Za-z\\_\\.][A-Z0-9a-z\\_\\.]*?)', +# ~ paste(x, lab(y, hfill=FALSE), sep=''), f) + ## A variable is a string of characters, _, . not starting with 0-9 + ## delimited by +# f <- gsubfn('[ \\(\\*\\+ ]*([A-Za-z\\_\\.]+[A-Za-z0-9\\_\\.]*)[ \\(\\*\\+]*', ~ paste('#',x,'#',sep=''), '1a+b') +# gsubfn('[ \\(\\*\\+ ]*([A-Za-z\\_\\.]+[A-Za-z0-9\\_\\.]*)[ \\(\\*\\+]*', ~ paste('#',x,'#',sep=''), '1a+b*dd + f==h' +# f <- gsubfn( "([a-zA-Z_\\.][a-zA-Z0-9_\\.]*)((?=\\s*[-+~)*])|\\s*$)", +# ~ paste0(toupper(x),'z'), f, perl=TRUE ) +# From Bill Dunlap + + ff <- function(expr, convertName) { + if (is.call(expr) && is.name(expr[[1]]) && + is.element(as.character(expr[[1]]), + c("~","+","-","*","/","%in%","%nin%","(", ":"))) { + for(i in seq_along(expr)[-1]) + expr[[i]] <- Recall(expr[[i]], convertName = convertName) + } else if (is.name(expr)) expr <- as.name(convertName(expr)) + expr + } + + f <- ff(formula, lab) + f <- as.formula(gsub("`", "", as.character(deparse(f)))) + result <- tables::tabular(f, data=data, ...) + attr(result, 'originalformula') <- formula + result +} + +table_trio <- function(x) { + o <- tables::table_options() + s <- function(x, default) if(length(x)) x else default + left <- s(o$left, 3) + right <- s(o$right, 1) + prmsd <- s(o$prmsd, FALSE) + pn <- s(o$pn, FALSE) + pnformat <- s(o$pnformat, "n") + pnwhen <- s(o$pnwhen, "all") + bold <- s(o$bold, FALSE) + + isna <- is.na(x) + x <- x[!isna] + if(!length(x)) return('') + qu <- quantile(x, (1:3)/4) + w <- paste('{\\smaller ', nFm(qu[1], left, right), '} ', + if(bold) '\\textbf{', nFm(qu[2], left, right), if(bold) '}', + ' {\\smaller ', nFm(qu[3], left, right), '}', sep='') + if(pnwhen == 'ifna' && !any(isna)) pn <- FALSE + if(prmsd || pn) { + w <- paste(w, '~{\\smaller (', sep='') + if(prmsd) w <- paste(w, nFm(mean(x), left, right), '$\\pm$', + nFm(sd(x), left, right), sep='') + if(pn) w <- paste(w, if(prmsd)' ', '$', + if(pnformat == 'n') 'n=', length(x), '$', sep='') + w <- paste(w, ')}', sep='') + } + w +} + +table_N <- function(x) paste('{\\smaller $n=', length(x), '$}', sep='') + +nFm <- function(x, left, right, neg=FALSE, pad=FALSE) { + tot <- if(right == 0) left + neg else left + right + neg + 1 + fmt <- paste('%', tot, '.', right, 'f', sep='') + x <- sprintf(fmt, x) + if(pad) x <- gsub(' ', '~', x) + x +} + +table_freq <- function(x) { + if(!length(x) || all(is.na(x))) return('') + w <- table(x) + den <- sum(w) + to <- tables::table_options() + showfreq <- to$showfreq + if(!length(showfreq)) showfreq <- 'all' + pctdec <- to$pctdec + if(!length(pctdec)) pctdec <- 0 + + i <- switch(showfreq, + all = 1:length(w), + high = which(w == max(w)), + low = which(w == min(w))) + m <- w[i] + fpct <- table_formatpct(m, den) + if(showfreq == 'all') { + z <- paste(names(m), '\\hfill', fpct, sep='') + z <- paste(z, collapse='\\\\', sep='') + len <- max(nchar(names(m))) + 9 + pctdec + 1 * (pctdec > 0) + z <- paste('\\parbox{', len, 'ex}{\\smaller ', z, '}', sep='') + return(z) + } + lab <- paste(names(m), collapse=', ') + num <- m[1] + paste(lab, ':', table_formatpct(num, den), sep='') +} + +table_pc <- function(x, y) { + maxn <- max(length(x), length(y)) + maxdig <- 1L + floor(log10(maxn)) + num <- if(all(is.na(x))) length(x) else + if(is.logical(x)) sum(x) else sum(x %in% c('yes','Yes')) + den <- if(all(is.na(y))) length(y) else sum(!is.na(y)) + prn(c(num,den)); prn(table(x, exclude=NULL)); prn(table(y, exclude=NULL)) + table_formatpct(num, den) +} + +table_formatpct <- function(num, den) { + if(den == 0 | all(is.na(num + den))) return('') + to <- tables::table_options() + npct <- to$npct + pctdec <- to$pctdec + if(!length(pctdec)) pctdec <- 0 + if(!length(npct)) npct <- 'both' + poss <- c('numerator', 'denominator', 'both', 'none') + i <- charmatch(npct, poss) + if(is.na(i)) stop('in table_options(npct=) npct must be "numerator", "denominator", "both", or "none"') + npct <- poss[i] + z <- paste(nFm(100 * num / den, 3, pctdec), '\\%', sep='') + if(npct == 'none') return(z) + if(npct == 'both') + return(paste(z, '{\\smaller[2] $\\frac{', num, '}{', den, '}$}', sep='')) + paste(z, '{\\smaller (', if(npct == 'numerator') num else den, ')}', sep='') +} + + +table_latexdefs <- function(file='') { + ct <- function(...) cat(..., file=file) + + ct('\\makeatletter\n', + '\\def\\blfootnote{\\xdef\\@thefnmark{}\\@footnotetext}\n', + '\\makeatother\n') + ct('\\def\\keytrio{\\blfootnote{Numbers in parentheses are the number of non-missing values. {\\smaller $a$} \\textbf{$b$}{\\smaller $c$} represents the first quartile $a$, the median $b$, and the third quartile $c$.}}\n') + ct('\\def\\keytriomsd{\\blfootnote{Numbers in parentheses are the number of non-missing values. {\\smaller $a$} \\textbf{$b$}{\\smaller $c$} represents the first quartile $a$, the median $b$, and the third quartile $c$. $x \\pm s$ represents the mean and standard deviation.}}\n') + + invisible() +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/tex.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/tex.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,3 @@ +tex <- function(string, lref='c', psref='c', scale=1, srt=0) + paste('\\tex[',lref,'][',psref,'][', + format(scale),'][',format(srt),']{',string,'}',sep='') |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/transace.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/transace.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,744 @@\n+# $Id$\n+transace <- function(x, monotonic=NULL, categorical=NULL, binary=NULL,\n+ pl=TRUE)\n+{\n+ ## require(acepack) # provides ace, avas\n+\n+ nam <- dimnames(x)[[2]]\n+ omit <- is.na(x %*% rep(1,ncol(x)))\n+ omitted <- (1:nrow(x))[omit]\n+ if(length(omitted)) x <- x[!omit,]\n+ p <- ncol(x)\n+ xt <- x # binary variables retain original coding\n+ if(!length(nam))\n+ stop("x must have column names")\n+ \n+ rsq <- rep(NA, p)\n+ names(rsq) <- nam\n+\n+ for(i in (1:p)[!(nam %in% binary)]) {\n+ lab <- nam[-i]\n+ w <- 1:(p-1)\n+ im <- w[lab %in% monotonic]\n+ ic <- w[lab %in% categorical]\n+ if(nam[i] %in% monotonic)\n+ im <- c(0, im)\n+\n+ if(nam[i] %in% categorical)\n+ ic <- c(0, ic)\n+ m <- 10*(length(im)>0)+(length(ic)>0)\n+ if(m==11)\n+ a <- ace(x[,-i], x[,i], mon=im, cat=ic)\n+ else if (m==10)\n+ a <- ace(x[,-i], x[,i], mon=im)\n+ else if(m==1)\n+ a <- ace(x[,-i], x[,i], cat=ic)\n+ else\n+ a <- ace(x[,-i], x[,i])\n+\n+ xt[,i] <- a$ty\n+ rsq[i] <- a$rsq\n+ if(pl)\n+ plot(x[,i], xt[,i], xlab=nam[i], ylab=paste("Transformed",nam[i]))\n+ }\n+\n+ cat("R-squared achieved in predicting each variable:\\n\\n")\n+ print(rsq)\n+\n+ attr(xt, "rsq") <- rsq\n+ attr(xt, "omitted") <- omitted\n+ invisible(xt)\n+}\n+\n+\n+areg.boot <- function(x, data, weights, subset, na.action=na.delete,\n+ B = 100, method=c(\'areg\',\'avas\'), nk=4, evaluation=100, \n+ valrsq=TRUE, probs=c(.25,.5,.75),\n+ tolerance=NULL)\n+{\n+ acall <- match.call()\n+ method <- match.arg(method)\n+ ## if(method==\'avas\') require(acepack)\n+\n+ if(!inherits(x,\'formula\')) stop(\'first argument must be a formula\')\n+\n+ m <- match.call(expand.dots = FALSE)\n+ Terms <- terms(x, specials=c(\'I\',\'monotone\'))\n+ m$formula <- x\n+ m$x <- m$B <- m$method <- m$evaluation <- m$valrsq <- m$probs <- \n+ m$nk <- m$tolerance <- NULL\n+ m$na.action <- na.action\n+ \n+ m[[1]] <- as.name("model.frame")\n+ x <- eval(m, sys.parent())\n+\n+ nam <- unique(var.inner(Terms))\n+ ylab <- names(x)[1]\n+\n+ k <- length(x)\n+ p <- k - 1\n+ nact <- attr(x,"na.action")\n+ \n+ default <- if(nk==0)\'l\' else \'s\'\n+ xtype <- rep(default, p); ytype <- default\n+ names(xtype) <- nam\n+ linear <- attr(Terms,\'specials\')$I\n+ if(length(linear)) {\n+ if(any(linear==1)) ytype <- \'l\'\n+ if(any(linear>1 )) xtype[linear-1] <- \'l\'\n+ }\n+ mono <- attr(Terms,\'specials\')$monotone\n+ if(length(mono)) {\n+ if(method==\'avas\' && any(mono==1))\n+ stop(\'y is always monotone with method="avas"\')\n+ if(method==\'areg\') stop(\'monotone not implemented by areg\')\n+ xtype[mono-1] <- \'m\'\n+ }\n+\n+ xbase <- \'x\'\n+ weights <- model.extract(x, weights)\n+ cat.levels <- values <- vector(\'list\',k)\n+ names(cat.levels) <- names(values) <- c(ylab,nam)\n+\n+ for(j in 1:k) {\n+ typ <- \' \'\n+ xj <- x[[j]]\n+ if(is.character(xj)) {\n+ xj <- as.factor(xj)\n+ cat.levels[[j]] <- lev <- levels(xj)\n+ x[[j]] <- as.integer(xj)\n+ typ <- \'c\'\n+ values[[j]] <- 1:length(lev)\n+ } else if(is.factor(xj)) {\n+ cat.levels[[j]] <- lev <- levels(xj)\n+ x[[j]] <- as.integer(xj)\n+ typ <- \'c\'\n+ values[[j]] <- 1:length(lev)\n+ if(method==\'avas\' && j==1)\n+ stop(\'categorical y not allowed for method="avas"\')\n+ } else {\n+ xj <- unclass(xj) # 5Mar01\n+ xu <- sort(unique(xj))\n+ nu <- length(xu)\n+ if(nu < 3) typ <- \'l\'\n+ values[[j]] <- if(nu <= length(probs)) xu else quantile(xj,probs)\n+ }\n+ if(typ != \' \') {\n+ if(j==1) ytype <- typ else xtype[j-1] <- typ\n+ }\n+ }\n+\n+ y <- x[,1]\n+ x <- x[,-1,drop=FALSE]\n+ n <- length(y)\n+\n+ if(length(weights)) stop(\'weights not implemented for areg\') else\n+ weights <- rep(1,n)\n+\n+ if(method==\'areg\')\n+ {\n+ f <- areg(x, y, xtype=xtype, ytype=ytype,\n+ nk=nk, na.rm=FALSE, tolerance=tolerance)\n+ rsquared.app <- f$rsquared\n+ }\n+ else\n+ {\n+ Avas <- function(x, y, x'..b'LL, ...)\n+{\n+ if(!is.function(statistic))\n+ statistic <- match.arg(statistic)\n+\n+ fit <- object$fit\n+ fity <- fit[[1]]\n+ res <- object$residuals\n+ if(missing(newdata)) {\n+ if(statistic==\'terms\')\n+ stop(\'statistic cannot be "terms" when newdata is omitted\')\n+\n+ lp <- object$linear.predictors\n+ y <- smearingEst(lp, fity, res, statistic=statistic, q=q)\n+ nac <- object$na.action\n+ return(if(length(nac)) naresid(nac, y) ## FEH30Aug09 was nafitted\n+ else y)\n+ }\n+ \n+ cof <- object$coefficients\n+ Fun <- Function(object)\n+ nam <- names(fit)\n+ p <- length(nam)-1\n+ X <- matrix(NA, nrow=length(newdata[[1]]), ncol=p)\n+ for(i in 1:p) {\n+ nami <- nam[i+1]\n+ X[,i] <- Fun[[nami]](newdata[[nami]])\n+ }\n+\n+ if(!is.function(statistic) && statistic==\'terms\')\n+ return(X)\n+\n+ lp <- matxv(X, cof)\n+ smearingEst(lp, fity, res, statistic=statistic, q=q)\n+}\n+\n+\n+monotone <- function(x) structure(x, class = unique(c("monotone",\n+ attr(x,\'class\'))))\n+\n+Mean <- function(object, ...) UseMethod("Mean")\n+Quantile <- function(object, ...) UseMethod("Quantile")\n+\n+\n+Mean.areg.boot <- function(object, evaluation=200, ...)\n+{\n+ r <- range(object$linear.predictors)\n+ lp <- seq(r[1], r[2], length=evaluation)\n+ res <- object$residuals\n+ ytrans <- object$fit[[1]]\n+ asing <- function(x)x\n+\n+ if(length(lp)*length(res) < 100000)\n+ means <- asing(smearingEst(lp, ytrans, res, statistic=\'mean\'))\n+ else {\n+ means <- double(evaluation)\n+ for(i in 1:evaluation)\n+ means[i] <- mean(approxExtrap(ytrans, xout=lp[i]+res)$y)\n+ }\n+\n+ g <- function(lp, trantab) approxExtrap(trantab, xout=lp)$y\n+\n+ formals(g) <- list(lp=numeric(0),\n+ trantab=list(x=lp,\n+ y=means))\n+ g\n+}\n+\n+\n+Quantile.areg.boot <- function(object, q=.5, ...)\n+{\n+ if(length(q) != 1 || is.na(q))\n+ stop(\'q must be length 1 and not NA\')\n+ \n+ g <- function(lp, trantab, residualQuantile)\n+ approxExtrap(trantab, xout=lp+residualQuantile)$y\n+\n+ formals(g) <- list(lp=numeric(0), trantab=object$fit[[1]],\n+ residualQuantile = quantile(object$residuals, q))\n+ g\n+}\n+\n+\n+smearingEst <- function(transEst, inverseTrans, res,\n+ statistic=c(\'median\',\'quantile\',\'mean\',\'fitted\',\'lp\'),\n+ q=NULL)\n+{\n+ if(is.function(statistic))\n+ label <- deparse(substitute(statistic))\n+ else {\n+ statistic <- match.arg(statistic)\n+ switch(statistic,\n+ median = {statistic <- \'quantile\'; q <- .5; label <- \'Median\'},\n+ quantile = {\n+ if(!length(q))\n+ stop(\'q must be given for statistic="quantile"\');\n+ \n+ label <- paste(format(q),\'quantile\')\n+ },\n+ mean = {\n+ statistic <- mean;\n+ label <- \'Mean\'\n+ },\n+ fitted = {\n+ label <- \'Inverse Transformation\'\n+ },\n+ lp = {\n+ label <- \'Transformed\'\n+ })\n+ }\n+ y <- if(is.function(statistic)) {\n+ if(is.list(inverseTrans))\n+ apply(outer(transEst, res,\n+ function(a, b, ytab) approxExtrap(ytab, xout=a+b)$y,\n+ inverseTrans), 1, statistic) else\n+ apply(outer(transEst, res, function(a, b, invfun)invfun(a+b),\n+ inverseTrans), 1, statistic)\n+ } else switch(statistic,\n+ lp = transEst,\n+ fitted = if(is.list(inverseTrans))\n+ approxExtrap(\n+ inverseTrans,\n+ xout=transEst)$y else\n+ inverseTrans(transEst),\n+ quantile = if(is.list(inverseTrans))\n+ approxExtrap(\n+ inverseTrans,\n+ xout=transEst+quantile(res,q))$y else\n+ inverseTrans(transEst+quantile(res,q)))\n+ structure(y, class=\'labelled\', label=label)\n+}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/transcan.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/transcan.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,1605 @@\n+## $Id$\n+\n+transcan <-\n+ function(x, method=c("canonical","pc"),\n+ categorical=NULL, asis=NULL, nk, \n+ imputed=FALSE, n.impute, \n+ boot.method=c(\'approximate bayesian\', \'simple\'),\n+ trantab=FALSE, transformed=FALSE,\n+ impcat=c("score","multinom","rpart"),\n+ mincut=40,\n+ inverse=c(\'linearInterp\',\'sample\'), tolInverse=.05,\n+ pr=TRUE, pl=TRUE, allpl=FALSE, show.na=TRUE,\n+ imputed.actual=c(\'none\',\'datadensity\',\'hist\',\'qq\',\'ecdf\'),\n+ iter.max=50, eps=.1, curtail=TRUE, \n+ imp.con=FALSE, shrink=FALSE, init.cat="mode",\n+ nres=if(boot.method==\'simple\')200 else 400,\n+ data, subset, na.action, treeinfo=FALSE,\n+ rhsImp=c(\'mean\',\'random\'),\n+ details.impcat=\'\', ...)\n+{\n+ ##This is a non-.Internal version of the approx function. The\n+ ##S-Plus version of approx sometimes bombs with a bus error.\n+\n+ asing <- function(x)x\n+ \n+ call <- match.call()\n+ method <- match.arg(method)\n+ impcat <- match.arg(impcat)\n+ boot.method <- match.arg(boot.method)\n+ imputed.actual <- match.arg(imputed.actual)\n+ inverse <- match.arg(inverse)\n+ rhsImp <- match.arg(rhsImp)\n+\n+ if(missing(n.impute))\n+ n.impute <- 0\n+ \n+ if(n.impute > 0) {\n+ imputed <- TRUE\n+ if(impcat == \'rpart\')\n+ stop(\'n.impute not supported for impcat="rpart"\')\n+ \n+ warning(\'transcan provides only an approximation to true multiple imputation.\\nA better approximation is provided by the aregImpute function.\\nThe MICE and other S libraries provide imputations from Bayesian posterior distributions.\')\n+ }\n+\n+ if(imputed.actual!=\'none\')\n+ imputed <- TRUE\n+\n+# if(impcat==\'multinom\') require(nnet)\n+# if(impcat==\'rpart\') require(rpart)\n+\n+ if(missing(data))\n+ stop(\'Must specify data= when using R\')\n+\n+ formula <- nact <- NULL\n+\n+ if(inherits(x,"formula")) {\n+ formula <- x\n+ y <- match.call(expand.dots=FALSE)\n+ y$x <- y$method <- y$categorical <- y$asis <- y$nk <- y$imputed <- \n+ y$trantab <- y$impcat <- y$mincut <- y$pr <- y$pl <- y$allpl <- y$show.na <-\n+ y$iter.max <- y$eps <- y$curtail <- y$imp.con <- y$shrink <-\n+ y$init.cat <- y$n.impute <- y$... <- y$nres <- y$boot.method <-\n+ y$transformed <- y$treeinfo <- y$imputed.actual <-\n+ y$inverse <- y$tolInverse <- y$details.impcat <-\n+ y$rhsImp <- NULL\n+ y$formula <- x\n+ \n+ if(missing(na.action))\n+ y$na.action <- na.retain\n+\n+ y[[1]] <- as.name("model.frame")\n+ y <- eval(y, sys.parent())\n+ nact <- attr(y,"na.action")\n+ d <- dim(y)\n+\n+ # Error if user is trying to use a non-allowed formula\n+ if(length(attr(y, "terms")) > 2)\n+ stop(\'transcan does not support a left hand side variable in the formula\')\n+\n+\n+ nam <- all.vars(attr(y, "terms"))\n+\n+ # Error if user has passed an invalid formula\n+ if(length(nam) != d[2])\n+ stop(paste(\'Formula\', formula,\n+ \'does not have a dominant inner variable.\'))\n+ \n+ if(!length(asis)) {\n+ Terms <- terms(formula, specials=\'I\')\n+ asis <- nam[attr(Terms,\'specials\')$I]\n+ ## all.vars will cause I() wrapper to be ignored\n+ }\n+\n+ x <- matrix(NA,nrow=d[1],ncol=d[2],\n+ dimnames=list(attr(y,"row.names"),nam))\n+ for(i in 1:d[2]) {\n+ w <- y[[i]]\n+ if(is.character(w))\n+ w <- factor(w)\n+\n+ if(is.factor(w)) { \n+ x[,i] <- unclass(w)\n+ categorical <- c(categorical, nam[i])\n+ } else {\n+ x[,i] <- w\n+ nu <- length(unique(w[!is.na(w)]))\n+ if(nu<2)\n+ stop(paste("variable",nam[i],"has only one value"))\n+ \n+ if(nu==2)\n+ asis <- c(asis, nam[i])\n+ else if(nu==3) categorical <- c(categorical, nam[i])\n+ }\n+ }\n+ }\n+\n+ nam <- dimnames(x)[[2]]\n+ rnam <- dimnames(x)[[1]]\n+ if(length(rnam)==0)\n+ rnam <- as.character(1:nrow(x))\n+ '..b'd if Design is not in effect, to make anova work\n+\n+vcov.default <- function(object, regcoef.only=FALSE, ...)\n+{\n+ vc <- object$Varcov\n+ if(length(vc))\n+ {\n+ if(regcoef.only) return(object$var)\n+ else return(vc(object, which=\'var\'))\n+ }\n+\n+ cov <- object$var\n+ if(!length(cov))\n+ stop("object does not have variance-covariance matrix")\n+ \n+ if(regcoef.only)\n+ {\n+ p <- length(object$coef)\n+ cov <- cov[1:p, 1:p, drop=FALSE]\n+ }\n+\n+ cov\n+}\n+\n+\n+if(FALSE) Varcov.lm <- function(object, ...)\n+{\n+ cof <- object$coefficients\n+ Qr <- object$qr\n+ cov <- chol2inv(Qr$qr)\n+ \n+ cov <- sum(object$residuals^2)*cov/object$df.residual\n+ nm <- names(cof)\n+ dimnames(cov) <- list(nm, nm)\n+ cov\n+}\n+\n+\n+if(FALSE) Varcov.glm <- function(object, ...)\n+{\n+ if(length(object$var))\n+ return(object$var) ## for glmD\n+\n+ s <- summary.glm(object)\n+ s$cov.unscaled * s$dispersion\n+}\n+\n+\n+#Varcov.multinom <- function(object, ...) vcov(object)\n+\n+invertTabulated <- function(x, y, freq=rep(1,length(x)),\n+ aty, name=\'value\',\n+ inverse=c(\'linearInterp\',\'sample\'),\n+ tolInverse=0.05, rule=2)\n+{\n+ inverse <- match.arg(inverse)\n+ if(is.list(x))\n+ {\n+ freq <- x[[3]]\n+ y <- x[[2]]\n+ x <- x[[1]]\n+ }\n+ \n+ if(inverse==\'linearInterp\')\n+ return(approx(y, x, xout=aty, rule=rule, ties=mean)$y)\n+\n+ del <- diff(range(y, na.rm=TRUE))\n+ m <- length(aty)\n+ yinv <- double(m)\n+ \n+ cant <- double(0)\n+\n+ for(i in 1:m)\n+ {\n+ a <- aty[i]\n+ s <- abs(y-a) < (tolInverse * del)\n+ nclose <- sum(s)\n+ if(nclose < 2)\n+ {\n+ if(nclose==0)\n+ cant <- c(cant, a)\n+ \n+ xest <- approx(y, x, xout=a, rule=rule)$y\n+ ## If a outside range of y, approx(rule=2) will return min or max\n+ ## x. There may be many x\'s with y values near this extreme x.\n+ ## Take a random draw from them.\n+ a <- approx(x, y, xout=xest, rule=rule)$y\n+ s <- abs(y - a) < (tolInverse * del)\n+ nclose <- sum(s)\n+ if(nclose > 1)\n+ {\n+ maxdist <- max((y[s] - a)^2)\n+ wt <- if(maxdist==0) freq[s]\n+ else (1 - ((y[s] - a)^2) / maxdist) * freq[s]\n+\n+ if(all(wt==0)) wt <- freq[s] # y[s] all the same\n+\n+ if(any(wt==0)) wt[wt==0] <- min(wt[wt>0])/2\n+\n+ xest <- x[s][sample(nclose, 1, replace=FALSE, prob=wt/sum(wt))]\n+ }\n+ }\n+ else\n+ {\n+ maxdist <- max((y[s] - a)^2)\n+ wt <- if(maxdist==0) freq[s]\n+ else (1 - ((y[s] - a)^2) / maxdist) * freq[s]\n+\n+ if(all(wt==0))\n+ wt <- freq[s] # y[s] all the same\n+\n+ if(any(wt==0))\n+ wt[wt==0] <- min(wt[wt>0])/2\n+ \n+ xest <- x[s][sample(nclose, 1, replace=FALSE, prob=wt/sum(wt))]\n+ ## sample(x[s],...) fails if x[s] is scalar; thanks: Bill Dunlap\n+ }\n+ yinv[i] <- xest\n+ }\n+ \n+ if(length(cant))\n+ warning(paste(\'No actual \',name, \' has y value within \',\n+ format(tolInverse),\n+ \'* range(y) (\',format(del),\n+ \') of the following y values:\',\n+ paste(format(sort(unique(cant))),collapse=\' \'),\n+ \'.\\nConsider increasing tolInverse. \',\n+ \'Used linear interpolation instead.\',sep=\'\'))\n+ \n+ yinv\n+}\n+\n+\n+## Trick taken from MICE impute.polyreg\n+rMultinom <- function(probs, m)\n+{\n+ d <- dim(probs)\n+ n <- d[1]\n+ k <- d[2]\n+ lev <- dimnames(probs)[[2]]\n+ if(!length(lev))\n+ lev <- 1:k\n+\n+ ran <- matrix(lev[1], ncol=m, nrow=n)\n+ z <- apply(probs, 1, sum)\n+ if(any(abs(z-1) > .00001))\n+ stop(\'error in multinom: probabilities do not sum to 1\')\n+\n+ U <- apply(probs, 1, cumsum)\n+ for(i in 1:m)\n+ {\n+ un <- rep(runif(n), rep(k,n))\n+ ran[,i] <- lev[1 + apply(un > U, 2, sum)]\n+ }\n+ \n+ ran\n+}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/translate.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/translate.s Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,20 @@ +translate <- function(text, old, new, multichar=FALSE) { + if(length(old)>1 || (nchar(old)!=nchar(new))) multichar <- TRUE + if(length(old)>1 && (length(new)>1 & length(new)!=length(old))) + stop("old and new must have same lengths or new must have 1 element") + + if(!multichar) k <- chartr(old, new, text) + else { + if(multichar) command <- paste("sed",paste('-e "s/',old,"/",new,'/g"', + sep="", collapse=" ")) + else command <- paste("tr \"", old, "\" \"", new, "\"", sep="") + ## k <- sys(command, text) replace with next 2 27aug03 + ## Thanks: <Sebastian.Weber@aventis.com> + k <- unlist(lapply(text, function(x, command) { + sys(paste("echo \"", x, "\" | ", command, sep="")) }, + command=command)) # command= 22feb04 + ## added command 26jan04; thanks:<Willi.Weber@aventis.com> + } + if(is.matrix(text)) k <- matrix(k, nrow=nrow(text)) + k + } |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/units.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/units.s Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,34 @@ +units <- function(x, ...) + UseMethod("units") + +"units<-.default" <- function(x, value) +{ + attr(x, "units") <- value + x +} + +units.default <- function(x, none='', ...) +{ + lab <- attr(x, "units") + if(is.null(lab)) + lab <- attr(attr(x,'tspar'),'units') + + if(is.null(lab)) + lab <- none + + lab +} + + +units.Surv <- function(x, none='', ...) +{ + at <- attributes(x) + un <- at$units + ia <- at$inputAttributes + if(! length(un) && length(ia)) { + un <- ia$time2$units + if(! length(un)) un <- ia$time$units + } + if(! length(un)) un <- none + un +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/valueTags.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/valueTags.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,81 @@ +## $Id$ +.valueTagAttrs <- c(label="label", units="units", name="shortlabel") + + +valueTags <- function(x) + attributes(x)[names(attributes(x)) %in% .valueTagAttrs] + + +"valueTags<-" <- function(x, value) { + if(is.null(value) || length(value) == 0) { + attributes(x)[names(attributes(x)) %in% .valueTagAttrs] <- NULL + class(x) <- class(x)[class(x) != 'labelled'] + return(x) + } + + if(!is.list(value)) { + stop("list must be a named list of valueTags") + } + + value[(!names(value) %in% .valueTagAttrs) | + unlist(lapply(value, is.null))] <- NULL + + if(length(value) == 0) { + attributes(x)[names(attributes(x)) %in% .valueTagAttrs] <- NULL + class(x) <- class(x)[class(x) != 'labelled'] + return(x) + } + + attributes(x)[setdiff(names(attributes(x))[names(attributes(x)) %in% + .valueTagAttrs], + names(value))] <- NULL + + consolidate(attributes(x)) <- value + + if(all(class(x) != 'labelled')) + class(x) <- c('labelled', class(x)) + + return(x) +} + +valueLabel <- function(x) + attr(x, 'label') + +"valueLabel<-" <- function(x, value) { + if(!is.character(value) || length(value) != 1) + stop("value label must be a character vector of length 1") + + attr(x, 'label') <- value + + class(x) <- c('labelled', class(x)[class(x) != 'labelled']) + + return(x) +} + +valueUnit <- function(x) + attr(x, 'units') + +"valueUnit<-" <- function(x, value) { + if(!is.character(value) || length(value) != 1) + stop("value unit must be a character vector of length 1") + + attr(x, 'units') <- value + + class(x) <- c('labelled', class(x)[class(x) != 'labelled']) + + return(x) +} + +valueName <- function(x) + attr(x, 'valueName') + +"valueName<-" <- function(x, value) { + if(!is.character(value) || length(value) != 1) + stop("value name must be a character vector of length 1") + + attr(x, 'valueName') <- value + + class(x) <- c('labelled', class(x)[class(x) != 'labelled']) + + return(x) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/varclus.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/varclus.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,368 @@\n+varclus <-\n+ function(x,\n+ similarity=c("spearman","pearson","hoeffding",\n+ "bothpos","ccbothpos"), \n+ type=c("data.matrix","similarity.matrix"),\n+ method="complete",\n+ data=NULL, subset=NULL, na.action=na.retain,\n+ trans=c("square", "abs", "none"),\n+ ...)\n+{\n+ call <- match.call()\n+ type <- match.arg(type)\n+ if(type!="similarity.matrix") similarity <- match.arg(similarity)\n+ trans <- match.arg(trans)\n+ \n+ nact <- NULL\n+\n+ if(inherits(x,"formula")) {\n+ form <- x\n+ oldops <- options(contrasts=c("contr.treatment","contr.poly"))\n+ if(length(list(...))) data <- dataframeReduce(data, ...)\n+ x <- list(formula=form, data=data, na.action=na.action, subset=subset)\n+ x <- do.call(\'model.frame\', x)\n+ nam <- names(x)\n+ nv <- length(x)\n+ Terms <- attr(x,\'terms\')\n+ \n+ nact <- attr(x,"na.action")\n+ x <- model.matrix(Terms, x)\n+ if(dimnames(x)[[2]][1]==\'(Intercept)\') x <- x[,-1]\n+ form <- TRUE\n+ options(oldops)\n+ type <- "data.matrix"\n+ }\n+ else form <- FALSE\n+ \n+ n <- NULL\n+ if(mode(x) != "numeric") stop("x matrix must be numeric")\n+\n+ if(type == "data.matrix") { ## assume not a correlation matrix\n+ if(similarity %in% c("bothpos","ccbothpos")) {\n+ isthere <- 1*(!is.na(x))\n+ x[is.na(x)] <- 0\n+ x[x > 0] <- 1\n+ n <- crossprod(isthere)\n+ x <- crossprod(x)/n\n+ if(similarity==\'ccbothpos\') {\n+ cc <- diag(x) %*% t(diag(x))\n+ cc[row(cc)==col(cc)] <- 0\n+ x <- x - cc\n+ }\n+ }\n+ else if(similarity=="hoeffding") {\n+ D <- hoeffd(x); x <- D$D; n <- D$n \n+ }\n+ else {\n+ D <- rcorr(x, type=similarity)\n+ x <- D$r\n+ x <- switch(trans,\n+ square = x^2,\n+ abs = abs(x),\n+ none = x)\n+ n <- D$n\n+ }\n+ }\n+ else if(diff(dim(x)) != 0) \n+ stop("x must be square to be a similarity matrix")\n+ \n+ if(any(is.na(x))) {\n+ cat("Part of the similarity matrix could not be computed:\\n")\n+ x[x < .01] <- 0\n+ print(x, digits=2)\n+ stop()\n+ }\n+ \n+ w <- if(similarity==\'ccbothpos\') NULL\n+ else hclust(as.dist(1-x), method=method)\n+ \n+ structure(list(call=call, sim=x, n=n, hclust=w, similarity=similarity,\n+ trans=trans, method=method, na.action=nact),\n+ class="varclus")\n+}\n+\n+\n+print.varclus <- function(x, abbrev=FALSE, ...)\n+{\n+ dput(x$call); cat("\\n")\n+ if(length(x$na.action))\n+ naprint(x$na.action)\n+ trans <- x$trans\n+ s <- c(hoeffding="30 * Hoeffding D",\n+ spearman=switch(trans,\n+ square = "Spearman rho^2",\n+ abs = "|Spearman rho|",\n+ none = "Spearman rho"),\n+ pearson=switch(trans,\n+ square = "Pearson r^2",\n+ abs = "|Pearson r|",\n+ none = "Pearson r"),\n+ bothpos="Proportion",\n+ ccbothpos="Chance-Corrected Proportion")[x$similarity]\n+ cat("\\nSimilarity matrix (",s,")\\n\\n",sep="")\n+ k <- x$sim\n+ lab <- dimnames(k)[[2]]\n+ if(abbrev)\n+ lab <- abbreviate(lab)\n+\n+ dimnames(k) <- list(lab,lab)\n+ print.default(round(k, 2))\n+ n <- x$n\n+ if(length(n)) {\n+ if(length(n) == 1)\n+ cat("\\nNo. of observations used=", n,"\\n\\n")\n+ else {\n+ cat("\\nNo. of observations used for each pair:\\n\\n")\n+ dimnames(n) <- list(lab,lab)\n+ print(n)\n+ }\n+ }\n+ \n+ cat("\\nhclust results (method=",x$method,")\\n\\n",sep="")\n+ print(x$hclust)\n+ invisible()\n+}\n+\n+plot.varclus <- function(x, ylab, abbrev=FALSE, legend.=FALSE, loc, maxlen=20,\n+ labels=NULL, ...)\n+{\n+ trans <- x$trans\n+ if(missing(ylab)) {\n+ s <- c(hoeffding="30 * Hoeffding D",\n+ spearman=switch(trans,\n+ square = expression(paste(Spearman,~rho^2)),\n+ abs = expression(paste(Spearman,~abs(rho))),\n+ none = expression(paste(Spearma'..b"dotchart2(sort(mean.na), \n+ xlab='Mean Number of NAs',\n+ main='Mean Number of Other Variables Missing for\\nObservations where Indicated Variable is NA',\n+ ...)\n+\n+ if(which %in% c('all','na per var vs mean na')) {\n+ xpd <- par('xpd')\n+ par(xpd=NA)\n+ on.exit(par(xpd=xpd))\n+ \n+ plot(na.per.var, mean.na, xlab='Fraction of NAs for Single Variable',\n+ ylab='Mean # Other Variables Missing', type='p')\n+ usr <- par('usr')\n+ eps <- .015*diff(usr[1:2]);\n+ epsy <- .015*diff(usr[3:4])\n+ \n+ s <- (1:length(na.per.var))[!is.na(mean.na)]\n+ taken.care.of <- NULL\n+ for(i in s) {\n+ if(i %in% taken.care.of)\n+ next\n+ \n+ w <- s[s > i & abs(na.per.var[s]-na.per.var[i]) < eps &\n+ abs(mean.na[s]-mean.na[i]) < epsy]\n+ if(any(w)) {\n+ taken.care.of <- c(taken.care.of, w)\n+ text(na.per.var[i]+eps, mean.na[i],\n+ paste(names(na.per.var[c(i,w)]),collapse='\\n'),adj=0)\n+ }\n+ else text(na.per.var[i]+eps, mean.na[i], names(na.per.var)[i], adj=0)\n+ }\n+ }\n+ \n+ invisible(tab)\n+}\n+\n+\n+combine.levels <- function(x, minlev=.05) {\n+ x <- as.factor(x)\n+ notna <- sum(! is.na(x))\n+ if(notna == 0) return(rep(NA, length(x)))\n+ lev <- levels(x)\n+ f <- table(x) / notna\n+ i <- f < minlev\n+ si <- sum(i)\n+ if(si == 0) return(x)\n+\n+ comb <- if(si == 1) names(sort(f))[1 : 2]\n+ else names(f)[i]\n+ \n+ keepsep <- setdiff(names(f), comb)\n+ names(keepsep) <- keepsep\n+ w <- c(list(OTHER=comb), keepsep)\n+ levels(x) <- w\n+ x\n+}\n+\n+\n+plotMultSim <- function(s, x=1:dim(s)[3],\n+ slim=range(pretty(c(0,max(s,na.rm=TRUE)))),\n+ slimds=FALSE,\n+ add=FALSE, lty=par('lty'), col=par('col'),\n+ lwd=par('lwd'), vname=NULL, h=.5, w=.75,\n+ u=.05, labelx=TRUE, xspace=.35)\n+{\n+ if(!length(vname))\n+ vname <- dimnames(s)[[1]]\n+ p <- dim(s)[1]\n+ if(length(vname) != p) stop('wrong length for vname')\n+ \n+ if(p != dim(s)[2])\n+ stop('similarity matrix not square')\n+ \n+ if(length(x) != dim(s)[3])\n+ stop('length of x differs from extent of 3rd dimension of s')\n+\n+ if(!add) {\n+ plot(c(-xspace,p+.5),c(.5,p+.25), type='n', axes=FALSE, xlab='',ylab='')\n+ if(labelx)\n+ text(1:p, rep(.6,p), vname, adj=.5)\n+ \n+ text(rep(.5,p), 1:p, vname, adj=1)\n+ }\n+ \n+ scaleit <- function(x, xlim, lim) lim[1] +\n+ (x - xlim[1]) / diff(xlim) * diff(lim)\n+\n+ if(slimds) {\n+ slim.diag <- -1e10\n+ for(k in 1:length(x)) {\n+ sk <- s[,,k]\n+ r <- max(diag(sk))\n+ slim.diag <- max(slim.diag, r)\n+ }\n+ \n+ slim.diag <- range(pretty(c(0,slim.diag)))\n+ slim.offdiag <- slim.diag - diff(slim.diag)/2\n+ }\n+ \n+ rx <- range(x)\n+ rxe <- c(rx[1] - u * diff(rx), rx[2] + u * diff(rx))\n+\n+ for(i in 1 : p) {\n+ for(j in 1 : p) {\n+ if((i == j) && all(s[i,j,] == 1))\n+ next\n+ \n+ sl <- if(slimds) if(i==j) slim.diag\n+ else slim.offdiag\n+ else slim\n+ \n+ sle <- c(sl[1]-u*diff(sl), sl[2]+u*diff(sl))\n+ \n+ if(!add) {\n+ lines(c(i-w/2,i+w/2,i+w/2,\n+ i-w/2,i-w/2),\n+ c(j-h/2,j-h/2,j+h/2,\n+ j+h/2,j-h/2), col=gray(.5), lwd=.65)\n+ xc <- rep(i-w/2-u/3,2)\n+ yc <- scaleit(sl, sle, c(j-h/2,j+h/2))\n+ if(i==1 && j<=2)\n+ {\n+ text(xc, yc,\n+ format(sl,digits=2), adj=1, cex=.7)\n+ segments(rep(xc+u/8,2),yc,\n+ rep(xc+u/3,2),yc)\n+ }\n+ }\n+ lines(scaleit(x, rxe, c(i-w/2,i+w/2)),\n+ scaleit(s[i,j,], sle, c(j-h/2,j+h/2)),\n+ lty=lty, lwd=lwd, col=col)\n+ if(!add && slimds && (i!=j))\n+ lines(c(i-w/2,i+w/2),\n+ rep(scaleit(0, sle, c(j-h/2,j+h/2)),2),\n+ col=gray(.5))\n+ }\n+ }\n+ \n+ invisible(slim)\n+}\n" |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/wtd.stats.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/wtd.stats.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,247 @@ +wtd.mean <- function(x, weights=NULL, normwt='ignored', na.rm=TRUE) +{ + if(!length(weights)) return(mean(x, na.rm=na.rm)) + if(na.rm) { + s <- !is.na(x + weights) + x <- x[s] + weights <- weights[s] + } + + sum(weights*x)/sum(weights) +} + + +wtd.var <- function(x, weights=NULL, normwt=FALSE, na.rm=TRUE) +{ + if(!length(weights)) { + if(na.rm) x <- x[!is.na(x)] + return(var(x)) + } + + if(na.rm) { + s <- !is.na(x + weights) + x <- x[s] + weights <- weights[s] + } + + if(normwt) + weights <- weights * length(x) / sum(weights) + + sw <- sum(weights) + xbar <- sum(weights * x) / sw + sum(weights*((x - xbar)^2)) / (sw - sum(weights ^ 2) / sw) +} + + +wtd.quantile <- function(x, weights=NULL, probs=c(0, .25, .5, .75, 1), + type=c('quantile','(i-1)/(n-1)','i/(n+1)','i/n'), + normwt=FALSE, na.rm=TRUE) +{ + if(!length(weights)) + return(quantile(x, probs=probs, na.rm=na.rm)) + + type <- match.arg(type) + if(any(probs < 0 | probs > 1)) + stop("Probabilities must be between 0 and 1 inclusive") + + nams <- paste(format(round(probs * 100, if(length(probs) > 1) + 2 - log10(diff(range(probs))) else 2)), + "%", sep = "") + + if(type=='quantile') { + w <- wtd.table(x, weights, na.rm=na.rm, normwt=normwt, type='list') + x <- w$x + wts <- w$sum.of.weights + n <- sum(wts) + order <- 1 + (n - 1) * probs + low <- pmax(floor(order), 1) + high <- pmin(low + 1, n) + order <- order %% 1 + ## Find low and high order statistics + ## These are minimum values of x such that the cum. freqs >= c(low,high) + allq <- approx(cumsum(wts), x, xout=c(low,high), + method='constant', f=1, rule=2)$y + k <- length(probs) + quantiles <- (1 - order)*allq[1:k] + order*allq[-(1:k)] + names(quantiles) <- nams + return(quantiles) + } + w <- wtd.Ecdf(x, weights, na.rm=na.rm, type=type, normwt=normwt) + structure(approx(w$ecdf, w$x, xout=probs, rule=2)$y, + names=nams) +} + + +wtd.Ecdf <- function(x, weights=NULL, + type=c('i/n','(i-1)/(n-1)','i/(n+1)'), + normwt=FALSE, na.rm=TRUE) +{ + type <- match.arg(type) + switch(type, + '(i-1)/(n-1)'={a <- b <- -1}, + 'i/(n+1)' ={a <- 0; b <- 1}, + 'i/n' ={a <- b <- 0}) + + if(!length(weights)) { + ##.Options$digits <- 7 ## to get good resolution for names(table(x))6Aug00 + oldopt <- options(digits=7) + on.exit(options(oldopt)) + cumu <- table(x) ## R does not give names for cumsum + isdate <- testDateTime(x) ## 31aug02 + ax <- attributes(x) + ax$names <- NULL + x <- as.numeric(names(cumu)) + if(isdate) attributes(x) <- c(attributes(x),ax) + cumu <- cumsum(cumu) + cdf <- (cumu + a)/(cumu[length(cumu)] + b) + if(cdf[1]>0) { + x <- c(x[1], x); + cdf <- c(0,cdf) + } + + return(list(x = x, ecdf=cdf)) + } + + w <- wtd.table(x, weights, normwt=normwt, na.rm=na.rm) + cumu <- cumsum(w$sum.of.weights) + cdf <- (cumu + a)/(cumu[length(cumu)] + b) + list(x = c(if(cdf[1]>0) w$x[1], w$x), ecdf=c(if(cdf[1]>0)0, cdf)) +} + + +wtd.table <- function(x, weights=NULL, type=c('list','table'), + normwt=FALSE, na.rm=TRUE) +{ + type <- match.arg(type) + if(!length(weights)) + weights <- rep(1, length(x)) + + isdate <- testDateTime(x) ## 31aug02 + next 2 + ax <- attributes(x) + ax$names <- NULL + + if(is.character(x)) x <- as.factor(x) + lev <- levels(x) + x <- unclass(x) + + if(na.rm) { + s <- !is.na(x + weights) + x <- x[s, drop=FALSE] ## drop is for factor class + weights <- weights[s] + } + + n <- length(x) + if(normwt) + weights <- weights * length(x) / sum(weights) + + i <- order(x) # R does not preserve levels here + x <- x[i]; weights <- weights[i] + + if(any(duplicated(x))) { ## diff(x) == 0 faster but doesn't handle Inf + weights <- tapply(weights, x, sum) + if(length(lev)) { + levused <- lev[sort(unique(x))] + if((length(weights) > length(levused)) && + any(is.na(weights))) + weights <- weights[!is.na(weights)] + + if(length(weights) != length(levused)) + stop('program logic error') + + names(weights) <- levused + } + + if(!length(names(weights))) + stop('program logic error') + + if(type=='table') + return(weights) + + x <- all.is.numeric(names(weights), 'vector') + if(isdate) + attributes(x) <- c(attributes(x),ax) + + names(weights) <- NULL + return(list(x=x, sum.of.weights=weights)) + } + + xx <- x + if(isdate) + attributes(xx) <- c(attributes(xx),ax) + + if(type=='list') + list(x=if(length(lev))lev[x] + else xx, + sum.of.weights=weights) + else { + names(weights) <- if(length(lev)) lev[x] + else xx + weights + } +} + + +wtd.rank <- function(x, weights=NULL, normwt=FALSE, na.rm=TRUE) +{ + if(!length(weights)) + return(rank(x, na.last=if(na.rm) NA else TRUE)) + + tab <- wtd.table(x, weights, normwt=normwt, na.rm=na.rm) + + freqs <- tab$sum.of.weights + ## rank of x = # <= x - .5 (# = x, minus 1) + r <- cumsum(freqs) - .5*(freqs-1) + ## Now r gives ranks for all unique x values. Do table look-up + ## to spread these ranks around for all x values. r is in order of x + approx(tab$x, r, xout=x)$y +} + + +wtd.loess.noiter <- function(x, y, weights=rep(1,n), + span=2/3, degree=1, cell=.13333, + type=c('all','ordered all','evaluate'), + evaluation=100, na.rm=TRUE) { + type <- match.arg(type) + n <- length(y) + if(na.rm) { + s <- !is.na(x+y+weights) + x <- x[s]; y <- y[s]; weights <- weights[s]; n <- length(y) + } + + max.kd <- max(200, n) + # y <- stats:::simpleLoess(y, x, weights=weights, span=span, + # degree=degree, cell=cell)$fitted + y <- fitted(loess(y ~ x, weights=weights, span=span, degree=degree, + control=loess.control(cell=cell, iterations=1))) + + switch(type, + all=list(x=x, y=y), + 'ordered all'={ + i <- order(x); + list(x=x[i],y=y[i]) + }, + evaluate={ + r <- range(x, na.rm=na.rm) + approx(x, y, xout=seq(r[1], r[2], length=evaluation)) + }) +} + +num.denom.setup <- function(num, denom) +{ + n <- length(num) + if(length(denom) != n) + stop('lengths of num and denom must match') + + s <- (1:n)[!is.na(num + denom) & denom != 0] + num <- num[s]; + denom <- denom[s] + + subs <- s[num > 0] + y <- rep(1, length(subs)) + wt <- num[num > 0] + other <- denom - num + subs <- c(subs, s[other > 0]) + wt <- c(wt, other[other > 0]) + y <- c(y, rep(0, sum(other>0))) + list(subs=subs, weights=wt, y=y) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/xYplot.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/xYplot.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,714 @@\n+Cbind <- function(...)\n+{ # See llist function with Hmisc label function\n+ dotlist <- list(...)\n+ if(is.matrix(dotlist[[1]]))\n+ {\n+ y <- dotlist[[1]]\n+ ynam <- dimnames(y)[[2]]\n+ if(!length(ynam))\n+ stop(\'when first argument is a matrix it must have column dimnames\')\n+ \n+ other <- y[,-1,drop= FALSE]\n+ return(structure(y[,1], class=\'Cbind\', label=ynam[1], other=other))\n+ }\n+ \n+ lname <- names(dotlist)\n+ name <- vname <- as.character(sys.call())[-1]\n+ for(i in 1:length(dotlist))\n+ {\n+ vname[i] <- if(length(lname)) lname[i] else \'\'\n+ ## Added length() and \'\' 12Jun01, remove length(vname[i])==0 below\n+ if(vname[i]==\'\') vname[i] <- name[i]\n+ }\n+\n+ lab <- attr(y <- dotlist[[1]], \'label\')\n+ if(!length(lab)) lab <- vname[1]\n+ if(!is.matrix(other <- dotlist[[2]]) || ncol(other)<2)\n+ {\n+ other <- as.matrix(as.data.frame(dotlist))[,-1,drop= FALSE]\n+ dimnames(other)[[2]] <- vname[-1]\n+ }\n+ \n+ structure(y, class=\'Cbind\', label=lab, other=other)\n+}\n+\n+as.numeric.Cbind <- as.double.Cbind <- function(x, ...) x\n+## Keeps xyplot from stripping off "other" attribute in as.numeric\n+\n+\n+\'[.Cbind\' <- function(x, ...)\n+{\n+ structure(unclass(x)[...], class=\'Cbind\',\n+ label=attr(x,\'label\'),\n+ other=attr(x,\'other\')[...,,drop= FALSE])\n+}\n+\n+\n+prepanel.xYplot <- function(x, y, ...)\n+{\n+ xlim <- range(x, na.rm=TRUE)\n+ ylim <- range(y, attr(y,\'other\'), na.rm=TRUE)\n+ list(xlim=xlim, ylim=ylim, dx=diff(xlim), dy=diff(ylim))\n+}\n+\n+\n+## MB add method="filled bands" \n+## MB use col.fill to specify colors for filling bands\n+panel.xYplot <-\n+ function(x, y, subscripts, groups = NULL, \n+ type = if(is.function(method) || method == "quantiles") "b"\n+ else "p", \n+ method = c("bars", "bands", "upper bars", "lower bars", \n+ "alt bars", "quantiles", "filled bands"), \n+ methodArgs = NULL, label.curves = TRUE, abline, \n+ probs = c(0.5, 0.25, 0.75), nx=NULL, cap = 0.015, lty.bar = 1, \n+ lwd = plot.line$lwd, lty = plot.line$lty, \n+ pch = plot.symbol$pch, cex = plot.symbol$cex, \n+ font = plot.symbol$font, col = NULL, \n+ lwd.bands = NULL, lty.bands = NULL, col.bands = NULL, \n+ minor.ticks = NULL, col.fill = NULL,\n+ size=NULL, rangeCex=c(.5,3), ...)\n+{\n+ if(missing(method) || !is.function(method))\n+ method <- match.arg(method) # was just missing() 26Nov01\n+\n+ type <- type # evaluate type before method changes 9May01\n+ sizeVaries <- length(size) && length(unique(size)) > 1\n+ if(length(groups)) groups <- as.factor(groups)\n+\n+ g <- as.integer(groups)[subscripts]\n+ ng <- if(length(groups)) max(g)\n+ else 1\n+\n+ plot.symbol <- trellis.par.get(if(ng > 1) "superpose.symbol"\n+ else "plot.symbol")\n+\n+ plot.line <- trellis.par.get(if(ng > 1) "superpose.line"\n+ else "plot.line")\n+\n+ lty <- rep(lty, length = ng)\n+ lwd <- rep(lwd, length = ng)\n+ if(length(rangeCex) != 1) pch <- rep(pch, length = ng)\n+\n+ if(!sizeVaries) cex <- rep(cex, length = ng)\n+\n+ font <- rep(font, length = ng)\n+ if(!length(col))\n+ col <- if(type == "p") plot.symbol$col\n+ else plot.line$col\n+\n+ col <- rep(col, length = ng)\n+ pchVaries <- FALSE\n+ ## Thanks to Deepayan Sarkar for the following size code\n+ if(sizeVaries)\n+ {\n+ if(length(rangeCex) > 1)\n+ srng <- range(size, na.rm=TRUE)\n+\n+ size <- size[subscripts]\n+ if(length(rangeCex)==1)\n+ {\n+ pch <- as.character(size)\n+ cex <- rangeCex\n+ sizeVaries <- FALSE\n+ pchVaries <- TRUE\n+ }\n+ else\n+ {\n+ cex <- rangeCex[1] + diff(rangeCex)*(size - srng[1])/diff(srng)\n+ sKey <- function(x=0, y=1, cexObserved, cexCurtailed, col, pch,\n+ other)\n+ {\n+ if(!length'..b'other[,1], y, other[,nc], y, lwd=plot.line$lwd[1],\n+ lty=plot.line$lty[1], col=plot.line$col[1])\n+ if(nc==4)\n+ {\n+ segmnts(other[,2], y, other[,3], y, lwd=2*plot.line$lwd[1],\n+ lty=plot.line$lty[1], col=plot.line$col[1])\n+ gfun$points(other[,2], y, pch=3, cex=cex, col=col, font=font)\n+ gfun$points(other[,3], y, pch=3, cex=cex, col=col, font=font)\n+ }\n+ \n+ if(gp) panel.superpose(x, y, groups=as.numeric(groups), pch=pch,\n+ col=col, cex=cex, font=font, ...)\n+ else\n+ gfun$points(x, y, pch=pch[1], cex=cex, col=col, font=font)\n+ }\n+ else\n+ {\n+ if(gp) \n+ panel.superpose(x, y, groups=as.numeric(groups),\n+ pch=pch, col=col, cex=cex,\n+ font=font, ...)\n+ else\n+ panel.dotplot(x, y, pch=pch, col=col, cex=cex, font=font, ...)\n+ }\n+ if(gp)\n+ {\n+ Key <- function(x=0, y=1, lev, cex, col, font, pch, other)\n+ {\n+ if(!length(x)) x <- 0.05\n+ if(!length(y)) y <- 0.95 ## because of formals()\n+ rlegendg(x, y, legend=lev, cex=cex, col=col, pch=pch, other=other)\n+ invisible()\n+ }\n+ \n+ lev <- levels(as.factor(groups))\n+ ng <- length(lev)\n+ formals(Key) <- list(x=NULL,y=NULL,lev=lev,\n+ cex=cex[1:ng], col=col[1:ng],\n+ font=font[1:ng], pch=pch[1:ng], other=NULL)\n+ .setKey(Key)\n+ }\n+}\n+\n+\n+Dotplot <-\n+ function (formula, data=sys.frame(sys.parent()),\n+ groups, subset,\n+ xlab=NULL, ylab=NULL, ylim=NULL,\n+ panel=panel.Dotplot, prepanel=prepanel.Dotplot,\n+ scales=NULL, xscale=NULL, ...)\n+{\n+ yvname <- as.character(formula[2]) # tried deparse\n+ yv <- eval(parse(text=yvname), data)\n+ if(!length(ylab))\n+ ylab <- label(yv, units=TRUE, plot=TRUE,\n+ default=yvname, grid=TRUE)\n+\n+ if(!length(ylim))\n+ {\n+ yother <- attr(yv,\'other\')\n+ if(length(yother)) ylim <- range(yv, yother, na.rm=TRUE)\n+ }\n+\n+ if(is.character(yv)) yv <- factor(yv)\n+ if(!length(scales) && is.factor(yv))\n+ scales <- list(y=list(at=1:length(levels(yv)),labels=levels(yv)))\n+ if(length(xscale)) scales$x <- xscale\n+ \n+ xvname <- formula[[3]]\n+ if(length(xvname)>1 && as.character(xvname[[1]])==\'|\') \n+ xvname <- xvname[[2]] # ignore conditioning var\n+ xv <- eval(xvname, data)\n+ if(!length(xlab)) xlab <- label(xv, units=TRUE, plot=TRUE,\n+ default=as.character(xvname)[1], grid=TRUE)\n+\n+ if(!missing(groups)) groups <- eval(substitute(groups),data)\n+ \n+ if(!missing(subset)) subset <- eval(substitute(subset),data)\n+ \n+ dul <- options(drop.unused.levels=FALSE) ## for empty cells\n+ on.exit(options(dul)) ## across some panels\n+ \n+ do.call("xyplot", c(list(x = formula, data=data, prepanel=prepanel,\n+ panel=panel),\n+ if(length(ylab))list(ylab=ylab),\n+ if(length(ylim))list(ylim=ylim),\n+ if(length(xlab))list(xlab=xlab),\n+ if(!missing(groups))list(groups=groups),\n+ if(!missing(subset))list(subset=subset),\n+ if(length(scales))list(scales=scales),\n+ list(...)))\n+}\n+\n+\n+setTrellis <- function(strip.blank=TRUE, lty.dot.line=2,\n+ lwd.dot.line=1)\n+{\n+ if(strip.blank) trellis.strip.blank() # in Hmisc Misc.s\n+ dot.line <- trellis.par.get(\'dot.line\')\n+ dot.line$lwd <- lwd.dot.line\n+ dot.line$lty <- lty.dot.line\n+ trellis.par.set(\'dot.line\',dot.line)\n+ invisible()\n+}\n+\n+\n+numericScale <- function(x, label=NULL, ...)\n+{\n+ xn <- as.numeric(x)\n+ attr(xn,\'label\') <- if(length(label)) label\n+ else\n+ deparse(substitute(x))\n+ xn\n+}\n+\n+## See proc.scale.trellis, render.trellis, axis.trellis for details of\n+## how scale is used\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/xtfrm.labelled.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/xtfrm.labelled.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,9 @@ +xtfrm.labelled <- function(x) { + newclass <- class(x)[class(x) != 'labelled'] + if (length(newclass) == 0) { + class(x) <- NULL + } else { + class(x) <- newclass + } + xtfrm(x) +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/xy.group.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/xy.group.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,23 @@ +## Author: Frank Harrell 24 Jun 91 +xy.group <- function(x,y,m=150,g,fun=mean,result="list") +{ + k <- !is.na(x+y) + if(sum(k)<2) + stop("fewer than 2 non-missing x and y") + + x <- x[k] + y <- y[k] + if(missing(m)) + q <- cut2(x,g=g,levels.mean=TRUE,digits=7) + else + q <- cut2(x,m=m,levels.mean=TRUE,digits=7) + + n <- table(q) + x.mean <- as.single(levels(q)) + y.fun <- as.vector(tapply(y, q, fun)) + if(result=="matrix") { + z <- cbind(table(q),x.mean,y.fun) + dimnames(z) <- list(levels(q), c("n","x","y")) + } else z <- list(x=x.mean,y=y.fun) + z +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/ynbind.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/ynbind.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,78 @@ +ynbind <- function(..., label=deparse(substitute(...)), asna=c('unknown', 'unspecified'), sort=TRUE) { +w <- list(...) +k <- length(w) +if(! k) stop('no variables to process') +nam <- as.character(sys.call())[-1] +nam <- nam[1 : k] +lab <- nam +W <- matrix(NA, nrow=length(w[[1]]), ncol=k, dimnames=list(NULL, nam)) +for(j in 1 : k) { + x <- w[[j]] + na <- is.na(x) + la <- label(x) + if(la != '') lab[j] <- la + if(is.numeric(x) && all(x %in% 0 : 1)) x <- x == 1 + if(! is.logical(x)) { + x <- tolower(as.character(x)) + if(length(asna)) { + i <- x %in% asna + if(any(i)) na[i] <- TRUE + } + x <- x %in% c('y', 'yes', 'present') + if(any(na)) x[na] <- NA + } + W[, j] <- x +} +## Sort columns in ascending order of overall proportion +prop <- apply(W, 2, mean, na.rm=TRUE) +if(sort) { + i <- order(prop) + W <- W[, i, drop=FALSE] + lab <- lab[i] +} +structure(W, label=label, labels=lab, class=c('ynbind', 'matrix')) +} + +'[.ynbind' <- function(x, rows=1:d[1], cols=1:d[2], ...) { + d <- dim(x) + at <- attributes(x)[c('label', 'labels')] + x <- NextMethod('[') + at$labels <- at$labels[cols] + attributes(x) <- c(attributes(x), at) + class(x) <- 'ynbind' + x + } + +pBlock <- function(..., subset=NULL, label=deparse(substitute(...))) { +w <- list(...) +k <- length(w) +if(! k) stop('no variables to process') +nam <- as.character(sys.call())[-1] +nam <- nam[1 : k] +lab <- nam +W <- matrix(NA, nrow=length(w[[1]]), ncol=k, dimnames=list(NULL, nam)) +for(j in 1 : k) { + x <- w[[j]] + na <- is.na(x) + la <- label(x) + if(la != '') lab[j] <- la + W[, j] <- if(is.factor(x)) as.character(x) else x +} +if(length(subset)) { + if(is.logical(subset) && (length(subset) != nrow(W))) + stop('length of subset does not match length of analysis variables') + subset <- if(is.logical(subset)) ! subset else - subset + W[subset, ] <- NA +} +structure(W, label=label, labels=lab, class=c('pBlock', 'matrix')) +} + +'[.pBlock' <- function(x, rows=1:d[1], cols=1:d[2], ...) { + d <- dim(x) + at <- attributes(x)[c('label', 'labels')] + x <- NextMethod('[') + at$labels <- at$labels[cols] + attributes(x) <- c(attributes(x), at) + class(x) <- 'pBlock' + x + } |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/R/zoom.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/R/zoom.s Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,21 @@ +## Function to use the mouse to zoom in on plots. +## Author: Bill Dunlap <bill@STAT.WASHINGTON.EDU> +zoom <- function(fun, ...) +{ + on.exit(par(oldpar)) + oldpar<-par(err=-1) + fun(...) + while(TRUE) { + cat("Click mouse over corners of zoom area: ") + p<-locator(n=2) + if(is.null(p$x) || length(p$x)!=2) + break + + xlim<-range(p$x) + ylim<-range(p$y) + cat("xlim=",xlim,"ylim=",ylim,"\n") + fun(...,xlim=xlim,ylim=ylim) + } + + cat("Bye!\n") +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/README.md Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,20 @@ +Hmisc +===== + +Harrell Miscellaneous + +Current Goals +============= +* Continue to refine the summaryX class of functions that replace tables with graphics + * See also bpplotM and tabulr +* See http://biostat.mc.vanderbilt.edu/HmiscNew + +Web Sites +============= +* Overall: http://biostat.mc.vanderbilt.edu/Hmisc +* CRAN: http://cran.r-project.org/web/packages/Hmisc +* Changelog: https://github.com/harrelfe/Hmisc/commits/master + +To Do +===== +* Once more users are using R >= 2.9.1 replace any(duplicated()) with anyDuplicated (which returns an integer). Thanks: Benjamin Tyler |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/inst/CHANGELOG --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/inst/CHANGELOG Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,1 @@ +See https://github.com/harrelfe/Hmisc/commits/master |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/inst/THANKS --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/inst/THANKS Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,4 @@ +Greg Snow for providing the subplot function and documentations +Greg Snow, Ph.D. +greg.snow@ihc.com + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/inst/todo --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/inst/todo Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,6 @@ +Make latex use options(latexcmd, dvipscmd) + +See if R mailbox has generalization of var.inner in +model.frame.default.s + +Check arguments to .C("loess_raw") in wtd.stats.s \ No newline at end of file |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/Cs.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/Cs.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,28 @@ +\name{Cs} +\alias{Cs} +\title{ +Character strings from unquoted names +} +\description{ +Makes a vector of character strings from a list of valid S names +} +\usage{ +Cs(\dots) +} +\arguments{ +\item{...}{ +any number of names separated by commas +}} +\value{ +character string vector +} +\seealso{ +sys.frame, deparse +} +\examples{ +Cs(a,cat,dog) +# subset.data.frame <- dataframe[,Cs(age,sex,race,bloodpressure,height)] +} +\keyword{character} +\keyword{utilities} +% Converted by Sd2Rd version 1.21. |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/Ecdf.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/Ecdf.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| b'@@ -0,0 +1,264 @@\n+\\name{Ecdf}\n+\\alias{Ecdf}\n+\\alias{Ecdf.default}\n+\\alias{Ecdf.data.frame}\n+\\alias{Ecdf.formula}\n+\\alias{panel.Ecdf}\n+\\alias{prepanel.Ecdf}\n+\\title{Empirical Cumulative Distribution Plot}\n+\\description{\n+Computes coordinates of cumulative distribution function of x, and by defaults\n+plots it as a step function. A grouping variable may be specified so that\n+stratified estimates are computed and (by default) plotted. If there is\n+more than one group, the \\code{labcurve} function is used (by default) to label\n+the multiple step functions or to draw a legend defining line types, colors,\n+or symbols by linking them with group labels. A \\code{weights} vector may\n+be specified to get weighted estimates. Specify \\code{normwt} to make\n+\\code{weights} sum to the length of \\code{x} (after removing NAs). Other wise\n+the total sample size is taken to be the sum of the weights.\n+\n+\\code{Ecdf} is actually a method, and \\code{Ecdf.default} is what\'s\n+called for a vector argument. \\code{Ecdf.data.frame} is called when the\n+first argument is a data frame. This function can automatically set up\n+a matrix of ECDFs and wait for a mouse click if the matrix requires more\n+than one page. Categorical variables, character variables, and\n+variables having fewer than a set number of unique values are ignored.\n+If \\code{par(mfrow=..)} is not set up before \\code{Ecdf.data.frame} is\n+called, the function will try to figure the best layout depending on the\n+number of variables in the data frame. Upon return the original\n+\\code{mfrow} is left intact.\n+\n+When the first argument to \\code{Ecdf} is a formula, a Trellis/Lattice function\n+\\code{Ecdf.formula} is called. This allows for multi-panel\n+conditioning, superposition using a \\code{groups} variable, and other\n+Trellis features, along with the ability to easily plot transformed\n+ECDFs using the \\code{fun} argument. For example, if \\code{fun=qnorm},\n+the inverse normal transformation will be used for the y-axis. If the\n+transformed curves are linear this indicates normality. Like the\n+\\code{xYplot} function, \\code{Ecdf} will create a function \\code{Key} if\n+the \\code{groups} variable is used. This function can be invoked by the\n+user to define the keys for the groups.\n+}\n+\n+\\usage{\n+Ecdf(x, \\dots)\n+\n+\\method{Ecdf}{default}(x, what=c(\'F\',\'1-F\',\'f\',\'1-f\'),\n+ weights=rep(1, length(x)), normwt=FALSE,\n+ xlab, ylab, q, pl=TRUE, add=FALSE, lty=1, \n+ col=1, group=rep(1,length(x)), label.curves=TRUE, xlim, \n+ subtitles=TRUE, datadensity=c(\'none\',\'rug\',\'hist\',\'density\'),\n+ side=1, \n+ frac=switch(datadensity,none=NA,rug=.03,hist=.1,density=.1),\n+ dens.opts=NULL, lwd=1, log=\'\', \\dots)\n+\n+\n+\\method{Ecdf}{data.frame}(x, group=rep(1,nrows),\n+ weights=rep(1, nrows), normwt=FALSE,\n+ label.curves=TRUE, n.unique=10, na.big=FALSE, subtitles=TRUE, \n+ vnames=c(\'labels\',\'names\'),\\dots)\n+\n+\\method{Ecdf}{formula}(x, data=sys.frame(sys.parent()), groups=NULL,\n+ prepanel=prepanel.Ecdf, panel=panel.Ecdf, \\dots, xlab,\n+ ylab, fun=function(x)x, what=c(\'F\',\'1-F\',\'f\',\'1-f\'), subset=TRUE)\n+}\n+\\arguments{\n+\\item{x}{a numeric vector, data frame, or Trellis/Lattice formula}\n+\\item{what}{\n+The default is \\code{"F"} which results in plotting the fraction of values\n+<= x. Set to \\code{"1-F"} to plot the fraction > x or \\code{"f"} to plot the\n+cumulative frequency of values <= x. Use \\code{"1-f"} to plot the\n+cumulative frequency of values >= x.\n+}\n+\\item{weights}{\n+numeric vector of weights. Omit or specify a zero-length vector or\n+NULL to get unweighted estimates.\n+}\n+\\item{normwt}{see above}\n+\\item{xlab}{\n+x-axis label. Default is label(x) or name of calling argument. For\n+\\code{Ecdf.formula}, \\code{xlab} defaults to the \\code{label} attribute\n+of the x-axis variable.\n+}\n+\\item{ylab}{\n+y-axis label. Default is \\code{"Proportion <= x"}, \\code{"Proportion > x"}, \n+or "Frequency <= x" depending on value of \\code{what}.\n+}\n+\\item{q}{\n+a vector for quantiles for whic'..b'efault is to place the additional\n+information on top of the x-axis (\\code{side=1}). Use \\code{side=3} to place at\n+the top of the graph.\n+}\n+\\item{frac}{\n+passed to \\code{histSpike}\n+}\n+\\item{dens.opts}{\n+a list of optional arguments for \\code{histSpike}\n+}\n+\\item{...}{\n+other parameters passed to plot if add=F. For data frames, other\n+parameters to pass to \\code{Ecdf.default}.\n+For \\code{Ecdf.formula}, if \\code{groups} is not used, you can also add\n+data density information to each panel\'s ECDF by specifying the\n+\\code{datadensity} and optional \\code{frac}, \\code{side},\n+\\code{dens.opts} arguments. \n+}\n+\\item{n.unique}{\n+minimum number of unique values before an ECDF is drawn for a variable\n+in a data frame. Default is 10.\n+}\n+\\item{na.big}{\n+set to \\code{TRUE} to draw the number of NAs in larger letters in the middle of\n+the plot for \\code{Ecdf.data.frame}\n+}\n+\\item{vnames}{\n+By default, variable labels are used to label x-axes. Set \\code{vnames="names"}\n+to instead use variable names.\n+}\n+\\item{method}{\n+method for computing the empirical cumulative distribution. See\n+\\code{wtd.Ecdf}. The default is to use the standard \\code{"i/n"} method as is\n+used by the non-Trellis versions of \\code{Ecdf}.\n+}\n+\\item{fun}{\n+a function to transform the cumulative proportions, for the\n+Trellis-type usage of \\code{Ecdf}\n+}\n+\\item{data, groups, subset,prepanel, panel}{the usual Trellis/Lattice parameters, with \\code{groups}\n+ causing \\code{Ecdf.formula} to overlay multiple ECDFs on one panel.}\n+}\n+\\value{\n+for \\code{Ecdf.default} an invisible list with elements x and y giving the\n+coordinates of the cdf. If there is more than one \\code{group}, a list of\n+such lists is returned. An attribute, \\code{N}, is in the returned\n+object. It contains the elements \\code{n} and \\code{m}, the number of\n+non-missing and missing observations, respectively.\n+}\n+\\author{\n+Frank Harrell\n+\\cr\n+Department of Biostatistics, Vanderbilt University\n+\\cr\n+\\email{f.harrell@vanderbilt.edu}\n+}\n+\\section{Side Effects}{\n+plots\n+}\n+\\seealso{\n+\\code{\\link{wtd.Ecdf}}, \\code{\\link{label}}, \\code{\\link{table}}, \\code{\\link{cumsum}}, \\code{\\link{labcurve}}, \\code{\\link{xYplot}}, \\code{\\link{histSpike}}\n+}\n+\\examples{\n+set.seed(1)\n+ch <- rnorm(1000, 200, 40)\n+Ecdf(ch, xlab="Serum Cholesterol")\n+scat1d(ch) # add rug plot\n+histSpike(ch, add=TRUE, frac=.15) # add spike histogram\n+# Better: add a data density display automatically:\n+Ecdf(ch, datadensity=\'density\')\n+\n+\n+label(ch) <- "Serum Cholesterol"\n+Ecdf(ch)\n+other.ch <- rnorm(500, 220, 20)\n+Ecdf(other.ch,add=TRUE,lty=2)\n+\n+\n+sex <- factor(sample(c(\'female\',\'male\'), 1000, TRUE))\n+Ecdf(ch, q=c(.25,.5,.75)) # show quartiles\n+Ecdf(ch, group=sex,\n+ label.curves=list(method=\'arrow\'))\n+\n+\n+# Example showing how to draw multiple ECDFs from paired data\n+pre.test <- rnorm(100,50,10)\n+post.test <- rnorm(100,55,10)\n+x <- c(pre.test, post.test)\n+g <- c(rep(\'Pre\',length(pre.test)),rep(\'Post\',length(post.test)))\n+Ecdf(x, group=g, xlab=\'Test Results\', label.curves=list(keys=1:2))\n+# keys=1:2 causes symbols to be drawn periodically on top of curves\n+\n+\n+# Draw a matrix of ECDFs for a data frame\n+m <- data.frame(pre.test, post.test, \n+ sex=sample(c(\'male\',\'female\'),100,TRUE))\n+Ecdf(m, group=m$sex, datadensity=\'rug\')\n+\n+\n+freqs <- sample(1:10, 1000, TRUE)\n+Ecdf(ch, weights=freqs) # weighted estimates\n+\n+\n+# Trellis/Lattice examples:\n+\n+\n+region <- factor(sample(c(\'Europe\',\'USA\',\'Australia\'),100,TRUE))\n+year <- factor(sample(2001:2002,1000,TRUE))\n+Ecdf(~ch | region*year, groups=sex)\n+Key() # draw a key for sex at the default location\n+# Key(locator(1)) # user-specified positioning of key\n+age <- rnorm(1000, 50, 10)\n+Ecdf(~ch | equal.count(age), groups=sex) # use overlapping shingles\n+Ecdf(~ch | sex, datadensity=\'hist\', side=3) # add spike histogram at top\n+}\n+\\keyword{nonparametric}\n+\\keyword{hplot}\n+\\keyword{methods}\n+\\keyword{distribution}\n+\\concept{trellis}\n+\\concept{lattice}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/Hmisc-internal.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/Hmisc-internal.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,72 @@ +\name{Hmisc-internal} +\title{Internal Hmisc functions} +\alias{dataDensityString} +\alias{aregTran} +\alias{as.double.Cbind} +\alias{as.numeric.Cbind} +\alias{format.sep} +\alias{as.data.frame.impute} +\alias{as.data.frame.roundN} +\alias{as.data.frame.special.miss} +\alias{as.data.frame.substi} +\alias{substi} +\alias{substi.source} +\alias{[.substi} +\alias{bpx} +\alias{ddmmmyy} +\alias{expr.tree} +\alias{fillin} +\alias{formatCats} +\alias{formatCons} +\alias{formatDateTime} +\alias{formatTestStats} +\alias{format.timePOSIXt} +\alias{ftuss} +\alias{ftupwr} +\alias{get2rowHeads} +\alias{groupn} +\alias{importConvertDateTime} +\alias{is.present} +\alias{lookupSASContents} +\alias{makeNames} +\alias{mask} +\alias{nafitted.delete} +\alias{na.include} +\alias{Names2names} +\alias{GetModelFrame} +\alias{naprint.keep} +\alias{naresid.keep} +\alias{naprint.delete} +\alias{naresid.delete} +\alias{napredict.delete} +\alias{oPar} +\alias{optionsCmds} +\alias{ordGridFun} +\alias{parGrid} +\alias{pasteFit} +\alias{print.substi} +\alias{print.timePOSIXt} +\alias{read.xportDataload} +\alias{readSAScsv} +\alias{rowsumFast} +\alias{sas.get.macro} +\alias{setParNro} +\alias{stepfun.eval} +\alias{stripChart} +\alias{[.terms} +\alias{termsDrop} +\alias{testDateTime} +\alias{uncbind} +\alias{var.inner} +\alias{xInch} +\alias{xySortNoDupNoNA} +\alias{yInch} +\alias{zoom} +\alias{latex.responseSummary} +\alias{print.responseSummary} +\alias{responseSummary} +\alias{combine} +\alias{combine<-} +\description{Internal Hmisc functions.} +\details{These are not to be called by the user or are undocumented.} +\keyword{internal} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/Lag.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/Lag.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,36 @@ +\name{Lag} +\alias{Lag} +\title{Lag a Numeric, Character, or Factor Vector} +\description{ +Shifts a vector \code{shift} elements later. Character or factor +variables are padded with \code{""}, numerics with \code{NA}. The shift +may be negative. +} +\usage{ +Lag(x, shift = 1) +} +\arguments{ + \item{x}{a vector} + \item{shift}{integer specifying the number of observations to + be shifted to the right. Negative values imply shifts to the left.} +} +\details{ +A.ttributes of the original object are carried along to the new lagged +one. +} +\value{ +a vector like \code{x} +} +\author{Frank Harrell} +\seealso{\code{\link{lag}}} +\examples{ +Lag(1:5,2) +Lag(letters[1:4],2) +Lag(factor(letters[1:4]),-2) +# Find which observations are the first for a given subject +id <- c('a','a','b','b','b','c') +id != Lag(id) +!duplicated(id) +} +\keyword{manip} + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/Misc.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/Misc.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,295 @@\n+\\name{Misc}\n+\\alias{clowess}\n+\\alias{confbar}\n+\\alias{getLatestSource}\n+\\alias{inverseFunction}\n+\\alias{james.stein}\n+\\alias{km.quick}\n+\\alias{latexBuild}\n+\\alias{lm.fit.qr.bare}\n+\\alias{matxv}\n+\\alias{makeSteps}\n+\\alias{nomiss}\n+\\alias{outerText}\n+\\alias{sepUnitsTrans}\n+\\alias{strgraphwrap}\n+\\alias{trap.rule}\n+\\alias{trellis.strip.blank}\n+\\alias{unPaste}\n+\\alias{whichClosest}\n+\\alias{whichClosePW}\n+\\alias{whichClosek}\n+\\alias{xless}\n+\\title{Miscellaneous Functions}\n+\\description{\n+ This documents miscellaneous small functions in Hmisc that may be of\n+ interest to users.\n+\n+ \\code{clowess} runs \\code{lowess} but if the \\code{iter} argument\n+ exceeds zero, sometimes wild values can result, in which case\n+ \\code{lowess} is re-run with \\code{iter=0}.\n+ \n+ \\code{confbar} draws multi-level confidence bars using small rectangles\n+ that may be of different colors.\n+\n+ \\code{getLatestSource} fetches and \\code{source}s the most recent\n+ source code for functions in packages in the Vanderbilty University\n+ CVS repository.\n+\n+ \\code{inverseFunction} generates a function to find all inverses of a\n+ monotonic or nonmonotonic function that is tabulated at vectors (x,y),\n+ typically 1000 points. If the original function is monotonic, simple linear\n+ interpolation is used and the result is a vector, otherwise linear\n+ interpolation is used within each interval in which the function is\n+ monotonic and the result is a matrix with number of columns equal to the\n+ number of monotonic intervals. If a requested y is not within any\n+ interval, the extreme x that pertains to the nearest extreme y is\n+ returned. Specifying what=\'sample\' to the returned function will cause a\n+ vector to be returned instead of a matrix, with elements taken as a\n+ random choice of the possible inverses.\n+\n+ \\code{james.stein} computes James-Stein shrunken estimates of cell\n+ means given a response variable (which may be binary) and a grouping\n+ indicator.\n+\n+ \\code{km.quick} provides a fast way to invoke \\code{survfitKM} in the\n+ \\code{survival} package to get Kaplan-Meier estimates for a\n+ single stratum for a vector of time points (if \\code{times} is given) or to\n+ get a vector of survival time quantiles (if \\code{q} is given).\n+\n+\t\\code{latexBuild} takes pairs of character strings and produces a\n+\tsingle character string containing concatenation of all of them, plus\n+\tan attribute \\code{"close"} which is a character string containing the\n+\tLaTeX closure that will balance LaTeX code with respect to\n+\tparentheses, braces, brackets, or \\code{begin} vs. \\code{end}. When\n+\tan even-numbered element of the vector is not a left parenthesis,\n+\tbrace, or bracket, the element is taken as a word that was surrounded\n+\tby \\code{begin} and braces, for which the corresponding \\code{end} is\n+\tconstructed in the returned attribute.\n+\n+ \\code{lm.fit.qr.bare} is a fast stripped-down function for computing\n+ regression coefficients, residuals, \\eqn{R^2}, and fitted values. It\n+ uses \\code{lm.fit}. \n+\n+ \\code{matxv} multiplies a matrix by a vector, handling automatic\n+ addition of intercepts if the matrix does not have a column of ones.\n+ If the first argument is not a matrix, it will be converted to one.\n+ An optional argument allows the second argument to be treated as a\n+ matrix, useful when its rows represent bootstrap reps of\n+ coefficients. Then ab\' is computed. \\code{matxv} respects the\n+ \\code{"intercepts"} attribute if it is stored on \\code{b} by the\n+ \\code{rms} package. This is used by \\code{\\link[rms]{orm}}\n+ fits that are bootstrap-repeated by \\code{\\link[rms]{bootcov}} where\n+ only the intercept corresponding to the median is retained. If\n+ \\code{kint} has nonzero length, it is checked for consistency with the\n+ attribute.\n+\n+ \\code{makeSteps} is a copy of the dostep function inside the\n+ \\code{survival} package\'s \\code{plot.survfit} function. It expands a\n+ series of points to include all the segment'..b'}{line widths}\n+ \\item{package}{name of package for \\code{getLatestSource}, default is\n+\t\\code{\'Hmisc\'}}\n+ \\item{q}{vector of confidence coefficients or quantiles}\n+ \\item{qfun}{quantiles on transformed scale}\n+ \\item{recent}{an integer telling \\code{getLatestSource} to get the\n+\t\\code{recent} most recently modified files from the package}\n+ \\item{round}{set to \\code{TRUE} to round converted values}\n+ \\item{S}{a \\code{\\link[survival]{Surv}} object}\n+ \\item{se}{vector of standard errors}\n+ \\item{sep}{a single character string specifying the delimiter. For\n+\t\t\\code{latexBuild} the default is \\code{""}.}\n+ \\item{side}{for \\code{confbar} is \\code{"b","l","t","r"} for bottom,\n+\tleft, top, right.}\n+ \\item{str}{a character string vector}\n+ \\item{string}{a character string vector}\n+ \\item{ticks}{set to \\code{TRUE} to draw lines between rectangles}\n+ \\item{times}{a numeric vector of times}\n+ \\item{title}{a character string to title a window or plot}\n+ \\item{tolerance}{tolerance for judging singularity in matrix}\n+ \\item{type}{\\code{"v"} for vertical, \\code{"h"} for horizontal. For\n+\t\\code{getLatestSource} this specifies the type of source code\n+\trepository, \\code{\'svn\'} (the default) or \n+\t\\code{\'cvs\'}, which is now outdated as Subversion has replaced CVS\n+\tin the Vanderbilt Biostatistics server.}\n+ \\item{w}{a numeric vector}\n+ \\item{width}{width of confidence rectanges in user units, or see\n+\t\\code{\\link{strwrap}}} \n+ \\item{x}{a numeric vector (matrix for \\code{lm.fit.qr.bare}) or data\n+\tframe. For \\code{xless} may be any object that is sensible to\n+\t\\code{print}. For \\code{sepUnitsTrans} is a character or factor\n+\tvariable. For \\code{getLatestSource} is a character string or\n+\tvector of character strings containing base file names to retrieve\n+\tfrom CVS. Set \\code{x=\'all\'} to retrieve all source files. For\n+\t\\code{clowess}, \\code{x} may also be a list with x and y\n+\tcomponents. For \\code{inverseFunction}, \\code{x} and \\code{y}\n+\tcontain evaluations of the function whose inverse is needed.\n+\t\\code{x} is typically an equally-spaced grid of 1000 points. For\n+\t\\code{strgraphwrap} is a character vector.}\n+ \\item{xpxi}{set to \\code{TRUE} to add an element to the result\n+\t\tcontaining the inverse of \\eqn{X\'X}}\n+\t\\item{singzero}{set to \\code{TRUE} to set coefficients corresponding\n+\t\tto singular variables to zero instead of \\code{NA}.}\n+ \\item{y}{a numeric vector. For \\code{inverseFunction} \\code{y} is the\n+\tevaluated function values at \\code{x}.}\n+ \\item{indent, exdent, prefix}{see \\code{\\link{strwrap}}}\n+ \\item{simplify}{see \\code{\\link{sapply}}}\n+ \\item{units}{see \\code{\\link{par}}}\n+ \\item{\\dots}{arguments passed through to another function. For\n+\t\t\\code{latexBuild} represents pairs, with odd numbered elements being\n+\t\tcharacter strings containing LaTeX code or a zero-length object to\n+\t\tignore, and even-numbered elements representing LaTeX left\n+\t\tparenthesis, left brace, or left bracket, or environment name.}\n+}\n+\\author{Frank Harrell and Charles Dupont}\n+\\examples{\n+\n+\n+trap.rule(1:100,1:100)\n+\n+unPaste(c(\'a;b or c\',\'ab;d\',\'qr;s\'), \';\')\n+\n+sepUnitsTrans(c(\'3 days\',\'4 months\',\'2 years\',\'7\'))\n+\n+set.seed(1)\n+whichClosest(1:100, 3:5)\n+whichClosest(1:100, rep(3,20))\n+\n+whichClosePW(1:100, rep(3,20))\n+whichClosePW(1:100, rep(3,20), f=.05)\n+whichClosePW(1:100, rep(3,20), f=1e-10)\n+\n+x <- seq(-1, 1, by=.01)\n+y <- x^2\n+h <- inverseFunction(x,y)\n+formals(h)$turns # vertex\n+a <- seq(0, 1, by=.01)\n+plot(0, 0, type=\'n\', xlim=c(-.5,1.5))\n+lines(a, h(a)[,1]) ## first inverse\n+lines(a, h(a)[,2], col=\'red\') ## second inverse\n+a <- c(-.1, 1.01, 1.1, 1.2)\n+points(a, h(a)[,1])\n+\n+\\dontrun{\n+getLatestSource(recent=5) # source() most recent 5 revised files in Hmisc\n+getLatestSource(\'cut2\') # fetch and source latest cut2.s\n+getLatestSource(\'all\') # get everything\n+getLatestSource(avail=TRUE) # list available files and latest versions\n+}\n+}\n+\\keyword{programming}\n+\\keyword{utilities}\n+\\keyword{iplot}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/Overview.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/Overview.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| b"@@ -0,0 +1,308 @@\n+\\name{HmiscOverview}\n+\\alias{HmiscOverview}\n+\\alias{Hmisc.Overview}\n+\\title{\n+ Overview of Hmisc Library\n+}\n+\\description{\n+ The Hmisc library contains many functions useful for data analysis,\n+ high-level graphics, utility operations, functions for computing\n+ sample size and power, translating SAS datasets into \\R, imputing\n+ missing values, advanced table making, variable clustering, character\n+ string manipulation, conversion of \\R objects to LaTeX code, recoding\n+ variables, and bootstrap repeated measures analysis. Most of these\n+ functions were written by F Harrell, but a few were collected from\n+ statlib and from s-news; other authors are indicated below. This\n+ collection of functions includes all of Harrell's submissions to\n+ statlib other than the functions in the \\pkg{rms} and display\n+ libraries. A few of the functions do not have \\dQuote{Help}\n+ documentation.\n+ \n+ To make \\pkg{Hmisc} load silently, issue\n+ \\code{options(Hverbose=FALSE)} before \\code{library(Hmisc)}.\n+}\n+\\section{Functions}{\n+\\tabular{ll}{\n+\\bold{Function Name} \\tab \\bold{Purpose} \\cr\n+abs.error.pred \\tab Computes various indexes of predictive accuracy based\\cr\n+\\tab on absolute errors, for linear models\\cr\n+addMarginal \\tab Add marginal observations over selected variables\\cr\n+all.is.numeric \\tab Check if character strings are legal numerics\\cr\n+approxExtrap \\tab Linear extrapolation\\cr\n+aregImpute \\tab Multiple imputation based on additive regression,\\cr\n+ \\tab bootstrapping, and predictive mean matching\\cr\n+areg.boot \\tab Nonparametrically estimate transformations for both\\cr\n+ \\tab sides of a multiple additive regression, and\\cr\n+ \\tab bootstrap these estimates and \\eqn{R^2}\\cr\n+ballocation \\tab Optimum sample allocations in 2-sample proportion test\\cr\n+binconf \\tab Exact confidence limits for a proportion and more accurate\\cr\n+ \\tab (narrower!) score stat.-based Wilson interval\\cr\n+ \\tab (Rollin Brant, mod. FEH)\\cr\n+bootkm \\tab Bootstrap Kaplan-Meier survival or quantile estimates\\cr\n+bpower \\tab Approximate power of 2-sided test for 2 proportions\\cr\n+ \\tab Includes bpower.sim for exact power by simulation\\cr\n+bpplot \\tab Box-Percentile plot \\cr\n+ \\tab (Jeffrey Banfield, \\email{umsfjban@bill.oscs.montana.edu})\\cr\n+bpplotM \\tab Chart extended box plots for multiple variables\\cr\n+bsamsize \\tab Sample size requirements for test of 2 proportions\\cr\n+bystats \\tab Statistics on a single variable by levels of >=1 factors\\cr\n+bystats2 \\tab 2-way statistics\\cr\n+character.table \\tab Shows numeric equivalents of all latin characters\\cr\n+ \\tab Useful for putting many special chars. in graph titles\\cr\n+ \\tab (Pierre Joyet, \\email{pierre.joyet@bluewin.ch})\\cr\n+ciapower \\tab Power of Cox interaction test\\cr\n+cleanup.import \\tab More compactly store variables in a data frame, and clean up\\cr\n+ \\tab problem data when e.g. Excel spreadsheet had a non-\\cr\n+ \\tab numeric value in a numeric column\\cr\n+combine.levels \\tab Combine infrequent levels of a categorical variable\\cr\n+confbar \\tab Draws confidence bars on an existing plot using multiple\\cr\n+ \\tab confidence levels distinguished using color or gray scale\\cr\n+contents \\tab Print the contents (variables, labels, etc.) of a data frame\\cr\n+cpower \\tab Power of Cox 2-sample test allowing for noncompliance\\cr\n+Cs \\tab Vector of character strings from list of unquoted names\\cr\n+csv.get \\tab Enhanced importing of comma separated files labels\\cr\n+cut2 \\tab Like cut with better endpoint label construction and allows\\cr\n+ \\tab construction of quantile groups or groups with give"..b'tab Summary for continuous variables using lowess\\cr\n+symbol.freq \\tab X-Y Frequency plot with circles\' area prop. to frequency\\cr\n+sys \\tab Execute unix() or dos() depending on what\'s running\\cr\n+tabulr \\tab Front-end to tabular function in the tables package\\cr\n+tex \\tab Enclose a string with the correct syntax for using\\cr\n+ \\tab with the LaTeX psfrag package, for postscript graphics\\cr\n+transace \\tab ace() packaged for easily automatically transforming all\\cr\n+ \\tab variables in a matrix\\cr\n+transcan \\tab automatic transformation and imputation of NAs for a\\cr\n+ \\tab series of predictor variables\\cr\n+trap.rule \\tab Area under curve defined by arbitrary x and y vectors,\\cr\n+ \\tab using trapezoidal rule\\cr\n+trellis.strip.blank\n+ \\tab To make the strip titles in trellis more visible, you can \\cr\n+ \\tab make the backgrounds blank by saying trellis.strip.blank().\\cr\n+ \\tab Use before opening the graphics device.\\cr\n+t.test.cluster \\tab 2-sample t-test for cluster-randomized observations\\cr\n+uncbind \\tab Form individual variables from a matrix\\cr\n+upData \\tab Update a data frame (change names, labels, remove vars, etc.)\\cr\n+units \\tab Set or fetch "units" attribute - units of measurement for var.\\cr\n+varclus \\tab Graph hierarchical clustering of variables using squared\\cr\n+ \\tab Pearson or Spearman correlations or Hoeffding D as similarities\\cr\n+ \\tab Also includes the naclus function for examining similarities in\\cr\n+ \\tab patterns of missing values across variables.\\cr\n+wtd.mean \\tab \\cr wtd.var \\tab \\cr wtd.quantile \\tab \\cr wtd.Ecdf \\tab\n+\\cr wtd.table \\tab \\cr wtd.rank \\tab \\cr wtd.loess.noiter \\tab \\cr\n+num.denom.setup \\tab Set of function for obtaining weighted estimates\\cr\n+xy.group \\tab Compute mean x vs. function of y by groups of x\\cr\n+xYplot \\tab Like trellis xyplot but supports error bars and multiple\\cr\n+ \\tab response variables that are connected as separate lines\\cr\n+ynbind \\tab Combine a series of yes/no true/false present/absent variables into a matrix\\cr\n+zoom \\tab Zoom in on any graphical display\\cr \\tab (Bill Dunlap, \\email{bill@statsci.com})\n+}}\n+\n+\\references{\n+See Alzola CF, Harrell FE (2004): An Introduction to S and the\n+Hmisc and Design Libraries at\n+\\url{http://biostat.mc.vanderbilt.edu/twiki/pub/Main/RS/sintro.pdf}\n+for extensive documentation and examples for the Hmisc package.\n+}\n+\n+\\section{Copyright Notice}{\n+\\bold{GENERAL DISCLAIMER}\\cr\n+This program is free software; you can redistribute it\n+and/or modify it under the terms of the GNU General Public\n+License as published by the Free Software Foundation; either\n+version 2, or (at your option) any later version.\\cr\n+\n+This program is distributed in the hope that it will be\n+useful, but WITHOUT ANY WARRANTY; without even the implied\n+warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR\n+PURPOSE. See the GNU General Public License for more\n+details.\\cr\n+\n+In short: You may use it any way you like, as long as you\n+don\'t charge money for it, remove this notice, or hold anyone liable\n+for its results. Also, please acknowledge the source and communicate\n+changes to the author.\\cr\n+\n+If this software is used is work presented for publication, kindly\n+reference it using for example:\\cr\n+ Harrell FE (2014): Hmisc: A package of miscellaneous R functions.\n+ Programs available from \\url{http://biostat.mc.vanderbilt.edu/Hmisc}.\\cr\n+ Be sure to reference \\R itself and other libraries used.\n+}\n+\\author{\n+Frank E Harrell Jr\\cr\n+Professor of Biostatistics\\cr\n+Chair, Department of Biostatistics\\cr\n+Vanderbilt University School of Medicine\\cr\n+Nashville, Tennessee\\cr\n+\\email{f.harrell@vanderbilt.edu}\n+}\n+\\keyword{misc}\n+\\concept{overview}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/Save.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/Save.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,44 @@ +\name{Save} +\alias{Save} +\alias{Load} +\title{Faciliate Use of save and load to Remote Directories} +\description{ +These functions are slightly enhanced versions of \code{save} and +\code{load} that allow a target directory to be specified using +\code{options(LoadPath="pathname")}. If the \code{LoadPath} option is +not set, the current working directory is used. +} +\usage{ +# options(LoadPath='mypath') +Save(object, name=deparse(substitute(object))) +Load(object) +} +\arguments{ + \item{object}{the name of an object, usually a data frame. It must + not be quoted.} + \item{name}{an optional name to assign to the object and file name + prefix, if the argument name is not used} +} +\details{ +\code{Save} creates a temporary version of the object under the name +given by the user, so that \code{save} will internalize this name. +Then subsequent \code{Load} or \code{load} will cause an object of the +original name to be created in the global environment. The name of +the \R data file is assumed to be the name of the object (or the value +of \code{name}) appended with \code{".rda"}. For \code{Save}, +compression is used. +} +\author{Frank Harrell} +\seealso{\code{\link{save}}, \code{\link{load}}} +\examples{ +\dontrun{ +d <- data.frame(x=1:3, y=11:13) +options(LoadPath='../data/rda') +Save(d) # creates ../data/rda/d.rda +Load(d) # reads ../data/rda/d.rda +Save(d, 'D') # creates object D and saves it in .../D.rda +} +} +\keyword{data} +\keyword{file} +\keyword{utilities} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/abs.error.pred.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/abs.error.pred.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,81 @@ +\name{abs.error.pred} +\alias{abs.error.pred} +\alias{print.abs.error.pred} +\title{ + Indexes of Absolute Prediction Error for Linear Models +} +\description{ + Computes the mean and median of various absolute errors related to + ordinary multiple regression models. The mean and median absolute + errors correspond to the mean square due to regression, error, and + total. The absolute errors computed are derived from \eqn{\hat{Y} - + \mbox{median($\hat{Y}$)}}{\var{Yhat} - median(\var{Yhat})}, + \eqn{\hat{Y} - Y}{\var{Yhat} - \var{Y}}, and \eqn{Y - + \mbox{median($Y$)}}{\var{Y} - median(\var{Y})}. The function also + computes ratios that correspond to \eqn{R^2} and \eqn{1 - R^2} (but + these ratios do not add to 1.0); the \eqn{R^2} measure is the ratio of + mean or median absolute \eqn{\hat{Y} - \mbox{median($\hat{Y}$)}}{Yhat + - median(Yhat)} to the mean or median absolute \eqn{Y - + \mbox{median($Y$)}}{Y - median(Y)}. The \eqn{1 - R^2} or SSE/SST + measure is the mean or median absolute \eqn{\hat{Y} - Y}{Yhat - Y} + divided by the mean or median absolute \eqn{\hat{Y} - + \mbox{median($Y$)}}{Y - median(Y)}. +} +\usage{ +abs.error.pred(fit, lp=NULL, y=NULL) + +\method{print}{abs.error.pred}(x, \dots) +} +\arguments{ + \item{fit}{ + a fit object typically from \code{\link{lm}} or \code{\link[rms]{ols}} + that contains a \var{y} vector (i.e., you should have specified + \code{y=TRUE} to the fitting function) unless the \code{y} argument + is given to \code{abs.error.pred}. If you do not specify the + \code{lp} argument, \code{fit} must contain \code{fitted.values} or + \code{linear.predictors}. You must specify \code{fit} or both of + \code{lp} and \code{y}. + } + \item{lp}{ + a vector of predicted values (Y hat above) if \code{fit} is not given + } + \item{y}{ + a vector of response variable values if \code{fit} (with + \code{y=TRUE} in effect) is not given + } + \item{x}{an object created by \code{abs.error.pred}} + \item{\dots}{unused} +} +\value{ + a list of class \code{abs.error.pred} (used by + \code{print.abs.error.pred}) containing two matrices: + \code{differences} and \code{ratios}. +} +\author{ + Frank Harrell\cr + Department of Biostatistics\cr + Vanderbilt University School of Medicine\cr + \email{f.harrell@vanderbilt.edu} +} +\seealso{ + \code{\link{lm}}, \code{\link[rms]{ols}}, \code{\link{cor}}, + \code{\link[rms]{validate.ols}} +} +\references{ + Schemper M (2003): Stat in Med 22:2299-2308. + + Tian L, Cai T, Goetghebeur E, Wei LJ (2007): Biometrika 94:297-311. +} +\examples{ +set.seed(1) # so can regenerate results +x1 <- rnorm(100) +x2 <- rnorm(100) +y <- exp(x1+x2+rnorm(100)) +f <- lm(log(y) ~ x1 + poly(x2,3), y=TRUE) +abs.error.pred(lp=exp(fitted(f)), y=y) +rm(x1,x2,y,f) +} +\keyword{robust} +\keyword{regression} +\keyword{models} +\concept{predictive accuracy} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/addMarginal.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/addMarginal.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,29 @@ +\name{addMarginal} +\alias{addMarginal} +\title{Add Marginal Observations} +\usage{ +addMarginal(data, ..., label = "All") +} +\arguments{ + \item{data}{a data frame \dots one or more variable + names, unquoted label a character string specifying the + name of the combined category, default is \code{"All"}.} + \item{\dots}{a list of names of variables to marginalize} + \item{label}{category name for added marginal observations} +} +\description{ +Given a data frame and the names of variable, doubles the +data frame for each variable with a new category +\code{"All"} (or optionally \code{"Combined"}). +A new variable \code{.marginal.} is added to the resulting data frame, +with value \code{""} if the observation is an original one, and with +value equal to the names of the variable being marginalized (separated +by commas) otherwise. +} +\examples{ +d <- expand.grid(sex=c('female', 'male'), country=c('US', 'Romania'), + reps=1:2) +addMarginal(d, sex, country) +} +\keyword{utilities} +\keyword{manip} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/all.is.numeric.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/all.is.numeric.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,29 @@ +\name{all.is.numeric} +\alias{all.is.numeric} +\title{Check if All Elements in Character Vector are Numeric} +\description{ +Tests, without issuing warnings, whether all elements of a character +vector are legal numeric values, or optionally converts the vector to a +numeric vector. Leading and trailing blanks in \code{x} are ignored. +} +\usage{ +all.is.numeric(x, what = c("test", "vector"), extras=c('.','NA')) +} +\arguments{ + \item{x}{a character vector} + \item{what}{specify \code{what="vector"} to return a numeric vector if + it passes the test, or the original character vector otherwise} + \item{extras}{a vector of character strings to count as numeric + values, other than \code{""}.} +} +\value{a logical value if \code{what="test"} or a vector otherwise} +\author{Frank Harrell} +\seealso{\code{\link{as.numeric}}} +\examples{ +all.is.numeric(c('1','1.2','3')) +all.is.numeric(c('1','1.2','3a')) +all.is.numeric(c('1','1.2','3'),'vector') +all.is.numeric(c('1','1.2','3a'),'vector') +all.is.numeric(c('1','',' .'),'vector') +} +\keyword{character} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/approxExtrap.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/approxExtrap.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,38 @@ +\name{approxExtrap} +\alias{approxExtrap} +\title{Linear Extrapolation} +\description{ + Works in conjunction with the \code{\link{approx}} function to do linear + extrapolation. \code{\link{approx}} in R does not support extrapolation at + all, and it is buggy in S-Plus 6. +} +\usage{ +approxExtrap(x, y, xout, method = "linear", n = 50, rule = 2, f = 0, + ties = "ordered", na.rm = FALSE) +} +\arguments{ + \item{x,y,xout,method,n,rule,f}{ + see \code{\link{approx}} + } + \item{ties}{ + applies only to R. See \code{\link{approx}} + } + \item{na.rm}{ + set to \code{TRUE} to remove \code{NA}s in \code{x} and + \code{y} before proceeding + } +} +\details{ + Duplicates in \code{x} (and corresponding \code{y} elements) are removed + before using \code{approx}. +} +\value{ + a vector the same length as \code{xout} +} +\author{Frank Harrell} +\seealso{\code{\link{approx}}} +\examples{ +approxExtrap(1:3,1:3,xout=c(0,4)) +} +\keyword{arith} +\keyword{dplot} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/areg.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/areg.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,229 @@\n+\\name{areg}\n+\\alias{areg}\n+\\alias{print.areg}\n+\\alias{predict.areg}\n+\\alias{plot.areg}\n+\\title{Additive Regression with Optimal Transformations on Both Sides using\n+Canonical Variates}\n+\\description{\n+Expands continuous variables into restricted cubic spline bases and\n+categorical variables into dummy variables and fits a multivariate\n+equation using canonical variates. This finds optimum transformations\n+that maximize \\eqn{R^2}. Optionally, the bootstrap is used to estimate\n+the covariance matrix of both left- and right-hand-side transformation\n+parameters, and to estimate the bias in the \\eqn{R^2} due to overfitting\n+and compute the bootstrap optimism-corrected \\eqn{R^2}.\n+Cross-validation can also be used to get an unbiased estimate of\n+\\eqn{R^2} but this is not as precise as the bootstrap estimate. The\n+bootstrap and cross-validation may also used to get estimates of mean\n+and median absolute error in predicted values on the original \\code{y}\n+scale. These two estimates are perhaps the best ones for gauging the\n+accuracy of a flexible model, because it is difficult to compare\n+\\eqn{R^2} under different y-transformations, and because \\eqn{R^2}\n+allows for an out-of-sample recalibration (i.e., it only measures\n+relative errors).\n+\n+Note that uncertainty about the proper transformation of \\code{y} causes\n+an enormous amount of model uncertainty. When the transformation for\n+\\code{y} is estimated from the data a high variance in predicted values\n+on the original \\code{y} scale may result, especially if the true\n+transformation is linear. Comparing bootstrap or cross-validated mean\n+absolute errors with and without restricted the \\code{y} transform to be\n+linear (\\code{ytype=\'l\'}) may help the analyst choose the proper model\n+complexity.\n+}\n+\\usage{\n+areg(x, y, xtype = NULL, ytype = NULL, nk = 4,\n+ B = 0, na.rm = TRUE, tolerance = NULL, crossval = NULL)\n+\n+\\method{print}{areg}(x, digits=4, \\dots)\n+\n+\\method{plot}{areg}(x, whichx = 1:ncol(x$x), \\dots)\n+\n+\\method{predict}{areg}(object, x, type=c(\'lp\',\'fitted\',\'x\'),\n+ what=c(\'all\',\'sample\'), \\dots)\n+}\n+\\arguments{\n+ \\item{x}{\n+\tA single predictor or a matrix of predictors. Categorical\n+\tpredictors are required to be coded as integers (as \\code{factor}\n+\tdoes internally).\n+\tFor \\code{predict}, \\code{x} is a data matrix with the same integer\n+\tcodes that were originally used for categorical variables.\n+\t}\n+ \\item{y}{a \\code{factor}, categorical, character, or numeric response\n+\tvariable}\n+ \\item{xtype}{\n+\ta vector of one-letter character codes specifying how each predictor\n+\tis to be modeled, in order of columns of \\code{x}. The codes are\n+\t\\code{"s"} for smooth function (using restricted cubic splines),\n+\t\\code{"l"} for no transformation (linear), or \\code{"c"} for\n+\tcategorical (to cause expansion into dummy variables). Default is\n+\t\\code{"s"} if \\code{nk > 0} and \\code{"l"} if \\code{nk=0}.\n+ }\n+ \\item{ytype}{same coding as for \\code{xtype}. Default is \\code{"s"}\n+\tfor a numeric variable with more than two unique values, \\code{"l"}\n+\tfor a binary numeric variable, and \\code{"c"} for a factor,\n+\tcategorical, or character variable.}\n+ \\item{nk}{number of knots, 0 for linear, or 3 or more. Default is 4\n+\twhich will fit 3 parameters to continuous variables (one linear term\n+ and two nonlinear terms)}\n+ \\item{B}{number of bootstrap resamples used to estimate covariance\n+\tmatrices of transformation parameters. Default is no bootstrapping.}\n+ \\item{na.rm}{set to \\code{FALSE} if you are sure that observations\n+\twith \\code{NA}s have already been removed}\n+ \\item{tolerance}{singularity tolerance. List source code for\n+\t\\code{lm.fit.qr.bare} for details.}\n+ \\item{crossval}{set to a positive integer k to compute k-fold\n+\tcross-validated R-squared (square of first canonical correlation)\n+\tand mean and median absolute error of predictions on the original scale}\n+ \\item{digits}{number of digits to use in formatting for printi'..b'erseFunction}}.}\n+ \\item{\\dots}{arguments passed to the plot function.}\n+}\n+\\details{\n+\\code{areg} is a competitor of \\code{ace} in the \\code{acepack}\n+package. Transformations from \\code{ace} are seldom smooth enough and\n+are often overfitted. With \\code{areg} the complexity can be controlled\n+with the \\code{nk} parameter, and predicted values are easy to obtain\n+because parametric functions are fitted.\n+\n+If one side of the equation has a categorical variable with more than\n+two categories and the other side has a continuous variable not assumed\n+to act linearly, larger sample sizes are needed to reliably estimate\n+transformations, as it is difficult to optimally score categorical\n+variables to maximize \\eqn{R^2} against a simultaneously optimally\n+transformed continuous variable.\n+}\n+\\value{\n+ a list of class \\code{"areg"} containing many objects\n+}\n+\\references{Breiman and Friedman, Journal of the American Statistical\n+ Association (September, 1985).} \n+\\author{\n+Frank Harrell\n+\\cr\n+Department of Biostatistics\n+\\cr\n+Vanderbilt University\n+\\cr\n+\\email{f.harrell@vanderbilt.edu}\n+}\n+\\seealso{\\code{\\link{cancor}},\\code{\\link[acepack]{ace}}, \\code{\\link{transcan}}}\n+\\examples{\n+set.seed(1)\n+\n+ns <- c(30,300,3000)\n+for(n in ns) {\n+ y <- sample(1:5, n, TRUE)\n+ x <- abs(y-3) + runif(n)\n+ par(mfrow=c(3,4))\n+ for(k in c(0,3:5)) {\n+ z <- areg(x, y, ytype=\'c\', nk=k)\n+ plot(x, z$tx)\n+\ttitle(paste(\'R2=\',format(z$rsquared)))\n+ tapply(z$ty, y, range)\n+ a <- tapply(x,y,mean)\n+ b <- tapply(z$ty,y,mean)\n+ plot(a,b)\n+\tabline(lsfit(a,b))\n+ # Should get same result to within linear transformation if reverse x and y\n+ w <- areg(y, x, xtype=\'c\', nk=k)\n+ plot(z$ty, w$tx)\n+ title(paste(\'R2=\',format(w$rsquared)))\n+ abline(lsfit(z$ty, w$tx))\n+ }\n+}\n+\n+par(mfrow=c(2,2))\n+# Example where one category in y differs from others but only in variance of x\n+n <- 50\n+y <- sample(1:5,n,TRUE)\n+x <- rnorm(n)\n+x[y==1] <- rnorm(sum(y==1), 0, 5)\n+z <- areg(x,y,xtype=\'l\',ytype=\'c\')\n+z\n+plot(z)\n+z <- areg(x,y,ytype=\'c\')\n+z\n+plot(z)\n+\n+\\dontrun{\t\t\n+# Examine overfitting when true transformations are linear\n+par(mfrow=c(4,3))\n+for(n in c(200,2000)) {\n+ x <- rnorm(n); y <- rnorm(n) + x\n+ for(nk in c(0,3,5)) {\n+ z <- areg(x, y, nk=nk, crossval=10, B=100)\n+ print(z)\n+ plot(z)\n+ title(paste(\'n=\',n))\n+ }\n+}\n+par(mfrow=c(1,1))\n+\n+# Underfitting when true transformation is quadratic but overfitting\n+# when y is allowed to be transformed\n+set.seed(49)\n+n <- 200\n+x <- rnorm(n); y <- rnorm(n) + .5*x^2\n+#areg(x, y, nk=0, crossval=10, B=100)\n+#areg(x, y, nk=4, ytype=\'l\', crossval=10, B=100)\n+z <- areg(x, y, nk=4) #, crossval=10, B=100)\n+z\n+# Plot x vs. predicted value on original scale. Since y-transform is\n+# not monotonic, there are multiple y-inverses\n+xx <- seq(-3.5,3.5,length=1000)\n+yhat <- predict(z, xx, type=\'fitted\')\n+plot(x, y, xlim=c(-3.5,3.5))\n+for(j in 1:ncol(yhat)) lines(xx, yhat[,j], col=j)\n+# Plot a random sample of possible y inverses\n+yhats <- predict(z, xx, type=\'fitted\', what=\'sample\')\n+points(xx, yhats, pch=2)\n+}\n+\n+# True transformation of x1 is quadratic, y is linear\n+n <- 200\n+x1 <- rnorm(n); x2 <- rnorm(n); y <- rnorm(n) + x1^2\n+z <- areg(cbind(x1,x2),y,xtype=c(\'s\',\'l\'),nk=3)\n+par(mfrow=c(2,2))\n+plot(z)\n+\n+# y transformation is inverse quadratic but areg gets the same answer by\n+# making x1 quadratic\n+n <- 5000\n+x1 <- rnorm(n); x2 <- rnorm(n); y <- (x1 + rnorm(n))^2\n+z <- areg(cbind(x1,x2),y,nk=5)\n+par(mfrow=c(2,2))\n+plot(z)\n+\n+# Overfit 20 predictors when no true relationships exist\n+n <- 1000\n+x <- matrix(runif(n*20),n,20)\n+y <- rnorm(n)\n+z <- areg(x, y, nk=5) # add crossval=4 to expose the problem\n+\n+# Test predict function\n+n <- 50\n+x <- rnorm(n)\n+y <- rnorm(n) + x\n+g <- sample(1:3, n, TRUE)\n+z <- areg(cbind(x,g),y,xtype=c(\'s\',\'c\'))\n+range(predict(z, cbind(x,g)) - z$linear.predictors)\n+}\n+\\keyword{smooth}\n+\\keyword{regression}\n+\\keyword{multivariate}\n+\\keyword{models}\n+\\concept{bootstrap}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/aregImpute.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/aregImpute.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,584 @@\n+\\name{aregImpute}\n+\\alias{aregImpute}\n+\\alias{print.aregImpute}\n+\\alias{plot.aregImpute}\n+\\title{\n+Multiple Imputation using Additive Regression, Bootstrapping, and\n+Predictive Mean Matching\n+}\n+\\description{\n+The \\code{transcan} function creates flexible additive imputation models\n+but provides only an approximation to true multiple imputation as the\n+imputation models are fixed before all multiple imputations are\n+drawn. This ignores variability caused by having to fit the\n+imputation models. \\code{aregImpute} takes all aspects of uncertainty in\n+the imputations into account by using the bootstrap to approximate the\n+process of drawing predicted values from a full Bayesian predictive\n+distribution. Different bootstrap resamples are used for each of the\n+multiple imputations, i.e., for the \\code{i}th imputation of a sometimes\n+missing variable, \\code{i=1,2,\\dots n.impute}, a flexible additive\n+model is fitted on a sample with replacement from the original data and\n+this model is used to predict all of the original missing and\n+non-missing values for the target variable.\n+\n+\\code{areg} is used to fit the imputation models. By default, linearity\n+is assumed for target variables (variables being imputed) and\n+\\code{nk=3} knots are assumed for continuous predictors transformed\n+using restricted cubic splines. If \\code{nk} is three or greater and\n+\\code{tlinear} is set to \\code{FALSE}, \\code{areg}\n+simultaneously find transformations of the target variable and of all of\n+the predictors, to get a good fit assuming additivity, maximizing\n+\\eqn{R^2}, using the same canonical correlation method as\n+\\code{transcan}. Flexible transformations may be overridden for\n+specific variables by specifying the identity transformation for them.\n+When a categorical variable is being predicted, the flexible\n+transformation is Fisher\'s optimum scoring method. Nonlinear transformations for continuous variables may be nonmonotonic. If\n+\\code{nk} is a vector, \\code{areg}\'s bootstrap and \\code{crossval=10}\n+options will be used to help find the optimum validating value of\n+\\code{nk} over values of that vector, at the last imputation iteration.\n+For the imputations, the minimum value of \\code{nk} is used.\n+\n+Instead of defaulting to taking random draws from fitted imputation\n+models using random residuals as is done by \\code{transcan},\n+\\code{aregImpute} by default uses predictive mean matching with optional\n+weighted probability sampling of donors rather than using only the\n+closest match. Predictive mean matching works for binary, categorical,\n+and continuous variables without the need for iterative maximum\n+likelihood fitting for binary and categorical variables, and without the\n+need for computing residuals or for curtailing imputed values to be in\n+the range of actual data. Predictive mean matching is especially\n+attractive when the variable being imputed is also being transformed\n+automatically. See Details below for more information about the\n+algorithm. A \\code{"regression"} method is also available that is\n+similar to that used in \\code{transcan}. This option should be used\n+when mechanistic missingness requires the use of extrapolation during\n+imputation.\n+\n+A \\code{print} method summarizes the results, and a \\code{plot} method plots\n+distributions of imputed values. Typically, \\code{fit.mult.impute} will\n+be called after \\code{aregImpute}.\n+\n+If a target variable is transformed nonlinearly (i.e., if \\code{nk} is\n+greater than zero and \\code{tlinear} is set to \\code{FALSE}) and the\n+estimated target variable transformation is non-monotonic, imputed\n+values are not unique. When \\code{type=\'regression\'}, a random choice\n+of possible inverse values is made.\n+}\n+\\usage{\n+aregImpute(formula, data, subset, n.impute=5, group=NULL,\n+ nk=3, tlinear=TRUE, type=c(\'pmm\',\'regression\',\'normpmm\'),\n+ pmmtype=1, match=c(\'weighted\',\'closest\',\'kclosest\'),\n+ kclosest=3, fweighted=0.2,\n+ curt'..b'+\\dontrun{\n+# Example 2: Very discriminating imputation models,\n+# x1 and x2 have some NAs on the same rows, smaller n\n+set.seed(5)\n+x1 <- factor(sample(c(\'a\',\'b\',\'c\'),100,TRUE))\n+x2 <- (x1==\'b\') + 3*(x1==\'c\') + rnorm(100,0,.4)\n+x3 <- rnorm(100)\n+y <- x2 + 1*(x1==\'c\') + .2*x3 + rnorm(100,0,.4)\n+orig.x1 <- x1[1:20]\n+orig.x2 <- x2[18:23]\n+x1[1:20] <- NA\n+x2[18:23] <- NA\n+#x2[21:25] <- NA\n+d <- data.frame(x1,x2,x3,y)\n+n <- naclus(d)\n+plot(n); naplot(n) # Show patterns of NAs\n+# 100 imputations to study them; normally use 5 or 10\n+f <- aregImpute(~y + x1 + x2 + x3, n.impute=100, nk=0, data=d)\n+par(mfrow=c(2,3))\n+plot(f, diagnostics=TRUE, maxn=2)\n+# Note: diagnostics=TRUE makes graphs similar to those made by:\n+# r <- range(f$imputed$x2, orig.x2)\n+# for(i in 1:6) { # use 1:2 to mimic maxn=2\n+# plot(1:100, f$imputed$x2[i,], ylim=r,\n+# ylab=paste("Imputations for Obs.",i))\n+# abline(h=orig.x2[i],lty=2)\n+# }\n+\n+table(orig.x1,apply(f$imputed$x1, 1, modecat))\n+par(mfrow=c(1,1))\n+plot(orig.x2, apply(f$imputed$x2, 1, mean))\n+\n+\n+fmi <- fit.mult.impute(y ~ x1 + x2, lm, f, \n+ data=d)\n+sqrt(diag(vcov(fmi)))\n+fcc <- lm(y ~ x1 + x2)\n+summary(fcc) # SEs are larger than from mult. imputation\n+}\n+\n+\\dontrun{\n+# Study relationship between smoothing parameter for weighting function\n+# (multiplier of mean absolute distance of transformed predicted\n+# values, used in tricube weighting function) and standard deviation\n+# of multiple imputations. SDs are computed from average variances\n+# across subjects. match="closest" same as match="weighted" with\n+# small value of fweighted.\n+# This example also shows problems with predicted mean\n+# matching almost always giving the same imputed values when there is\n+# only one predictor (regression coefficients change over multiple\n+# imputations but predicted values are virtually 1-1 functions of each\n+# other)\n+\n+set.seed(23)\n+x <- runif(200)\n+y <- x + runif(200, -.05, .05)\n+r <- resid(lsfit(x,y))\n+rmse <- sqrt(sum(r^2)/(200-2)) # sqrt of residual MSE\n+\n+y[1:20] <- NA\n+d <- data.frame(x,y)\n+f <- aregImpute(~ x + y, n.impute=10, match=\'closest\', data=d)\n+# As an aside here is how to create a completed dataset for imputation\n+# number 3 as fit.mult.impute would do automatically. In this degenerate\n+# case changing 3 to 1-2,4-10 will not alter the results.\n+imputed <- impute.transcan(f, imputation=3, data=d, list.out=TRUE,\n+ pr=FALSE, check=FALSE)\n+sd <- sqrt(mean(apply(f$imputed$y, 1, var)))\n+\n+ss <- c(0, .01, .02, seq(.05, 1, length=20))\n+sds <- ss; sds[1] <- sd\n+\n+for(i in 2:length(ss)) {\n+ f <- aregImpute(~ x + y, n.impute=10, fweighted=ss[i])\n+ sds[i] <- sqrt(mean(apply(f$imputed$y, 1, var)))\n+}\n+\n+plot(ss, sds, xlab=\'Smoothing Parameter\', ylab=\'SD of Imputed Values\',\n+ type=\'b\')\n+abline(v=.2, lty=2) # default value of fweighted\n+abline(h=rmse, lty=2) # root MSE of residuals from linear regression\n+}\n+\n+\\dontrun{\n+# Do a similar experiment for the Titanic dataset\n+getHdata(titanic3)\n+h <- lm(age ~ sex + pclass + survived, data=titanic3)\n+rmse <- summary(h)$sigma\n+set.seed(21)\n+f <- aregImpute(~ age + sex + pclass + survived, n.impute=10,\n+ data=titanic3, match=\'closest\')\n+sd <- sqrt(mean(apply(f$imputed$age, 1, var)))\n+\n+ss <- c(0, .01, .02, seq(.05, 1, length=20))\n+sds <- ss; sds[1] <- sd\n+\n+for(i in 2:length(ss)) {\n+ f <- aregImpute(~ age + sex + pclass + survived, data=titanic3,\n+ n.impute=10, fweighted=ss[i])\n+ sds[i] <- sqrt(mean(apply(f$imputed$age, 1, var)))\n+}\n+\n+plot(ss, sds, xlab=\'Smoothing Parameter\', ylab=\'SD of Imputed Values\',\n+ type=\'b\')\n+abline(v=.2, lty=2) # default value of fweighted\n+abline(h=rmse, lty=2) # root MSE of residuals from linear regression\n+}\n+}\n+\\keyword{smooth}\n+\\keyword{regression}\n+\\keyword{multivariate}\n+\\keyword{methods}\n+\\keyword{models}\n+\\concept{bootstrap}\n+\\concept{predictive mean matching}\n+\\concept{imputation}\n+\\concept{NA}\n+\\concept{missing data}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/biVar.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/biVar.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,196 @@ +\name{biVar} +\alias{biVar} +\alias{print.biVar} +\alias{plot.biVar} +\alias{spearman2} +\alias{spearman2.default} +\alias{spearman2.formula} +\alias{spearman} +\alias{spearman.test} +\alias{chiSquare} +\title{Bivariate Summaries Computed Separately by a Series of Predictors} +\description{ + \code{biVar} is a generic function that accepts a formula and usual + \code{data}, \code{subset}, and \code{na.action} parameters plus a + list \code{statinfo} that specifies a function of two variables to + compute along with information about labeling results for printing and + plotting. The function is called separately with each right hand side + variable and the same left hand variable. The result is a matrix of + bivariate statistics and the \code{statinfo} list that drives printing + and plotting. The plot method draws a dot plot with x-axis values by + default sorted in order of one of the statistics computed by the function. + + \code{spearman2} computes the square of Spearman's rho rank correlation + and a generalization of it in which \code{x} can relate + non-monotonically to \code{y}. This is done by computing the Spearman + multiple rho-squared between \code{(rank(x), rank(x)^2)} and \code{y}. + When \code{x} is categorical, a different kind of Spearman correlation + used in the Kruskal-Wallis test is computed (and \code{spearman2} can do + the Kruskal-Wallis test). This is done by computing the ordinary + multiple \code{R^2} between \code{k-1} dummy variables and + \code{rank(y)}, where \code{x} has \code{k} categories. \code{x} can + also be a formula, in which case each predictor is correlated separately + with \code{y}, using non-missing observations for that predictor. + \code{biVar} is used to do the looping and bookkeeping. By default the + plot shows the adjusted \code{rho^2}, using the same formula used for + the ordinary adjusted \code{R^2}. The \code{F} test uses the unadjusted + R2. + + \code{spearman} computes Spearman's rho on non-missing values of two + variables. \code{spearman.test} is a simple version of + \code{spearman2.default}. + + \code{chiSquare} is set up like \code{spearman2} except it is intended + for a categorical response variable. Separate Pearson chi-square tests + are done for each predictor, with optional collapsing of infrequent + categories. Numeric predictors having more than \code{g} levels are + categorized into \code{g} quantile groups. \code{chiSquare} uses + \code{biVar}. +} +\usage{ +biVar(formula, statinfo, data=NULL, subset=NULL, + na.action=na.retain, exclude.imputed=TRUE, ...) + +\method{print}{biVar}(x, ...) + +\method{plot}{biVar}(x, what=info$defaultwhat, + sort.=TRUE, main, xlab, + vnames=c('names','labels'), ...) + +spearman2(x, ...) + +\method{spearman2}{default}(x, y, p=1, minlev=0, na.rm=TRUE, exclude.imputed=na.rm, ...) + +\method{spearman2}{formula}(formula, data=NULL, + subset, na.action=na.retain, exclude.imputed=TRUE, ...) + +spearman(x, y) + +spearman.test(x, y, p=1) + +chiSquare(formula, data=NULL, subset=NULL, na.action=na.retain, + exclude.imputed=TRUE, ...) +} +\arguments{ + \item{formula}{a formula with a single left side variable} + \item{statinfo}{see \code{spearman2.formula} or \code{chiSquare} code} + \item{data, subset, na.action}{ + the usual options for models. Default for \code{na.action} is to retain + all values, NA or not, so that NAs can be deleted in only a pairwise + fashion. + } + \item{exclude.imputed}{ + set to \code{FALSE} to include imputed values (created by + \code{impute}) in the calculations. + } + \item{...}{other arguments that are passed to the function used to + compute the bivariate statistics or to \code{dotchart3} for + \code{plot}. + } + \item{na.rm}{logical; delete NA values?} + \item{x}{ + a numeric matrix with at least 5 rows and at least 2 columns (if + \code{y} is absent). For \code{spearman2}, the first argument may + be a vector of any type, including character or factor. The first + argument may also be a formula, in which case all predictors are + correlated individually with + the response variable. \code{x} may be a formula for \code{spearman2} + in which case \code{spearman2.formula} is invoked. Each + predictor in the right hand side of the formula is separately correlated + with the response variable. For \code{print} or \code{plot}, \code{x} + is an object produced by \code{biVar}. For \code{spearman} and + \code{spearman.test} \code{x} is a numeric vector, as is \code{y}. For + \code{chiSquare}, \code{x} is a formula. + } +% \item{type}{ +% specifies the type of correlations to compute. Spearman correlations +% are the Pearson linear correlations computed on the ranks of non-missing +% elements, using midranks for ties. +% } + \item{y}{ + a numeric vector + } + \item{p}{ + for numeric variables, specifies the order of the Spearman \code{rho^2} to + use. The default is \code{p=1} to compute the ordinary + \code{rho^2}. Use \code{p=2} to compute the quadratic rank + generalization to allow non-monotonicity. \code{p} is ignored for + categorical predictors. + } + \item{minlev}{ + minimum relative frequency that a level of a categorical predictor + should have before it is pooled with other categories (see + \code{combine.levels}) in \code{spearman2} and \code{chiSquare} (in + which case it also applies to the response). The default, + \code{minlev=0} causes no pooling. + } + \item{what}{ + specifies which statistic to plot. Possibilities include the + column names that appear with the print method is used. + } + \item{sort.}{ + set \code{sort.=FALSE} to suppress sorting variables by the + statistic being plotted + } + \item{main}{ + main title for plot. Default title shows the name of the response + variable. + } + \item{xlab}{ + x-axis label. Default constructed from \code{what}. + } + \item{vnames}{ + set to \code{"labels"} to use variable labels in place of names for + plotting. If a variable does not have a label the name is always + used.} +% \item{g}{number of quantile groups into which to categorize continuous +% predictors having more than \code{g} unique values, for \code{chiSquare}} +} +\value{ + \code{spearman2.default} (the + function that is called for a single \code{x}, i.e., when there is no + formula) returns a vector of statistics for the variable. + \code{biVar}, \code{spearman2.formula}, and \code{chiSquare} return a + matrix with rows corresponding to predictors. +} +\details{ + Uses midranks in case of ties, as described by Hollander and Wolfe. + P-values for Spearman, Wilcoxon, or Kruskal-Wallis tests are + approximated by using the \code{t} or \code{F} distributions. +} +\author{ +Frank Harrell +\cr +Department of Biostatistics +\cr +Vanderbilt University +\cr +\email{f.harrell@vanderbilt.edu} +} +\references{ +Hollander M. and Wolfe D.A. (1973). Nonparametric Statistical Methods. +New York: Wiley. + +Press WH, Flannery BP, Teukolsky SA, Vetterling, WT (1988): Numerical +Recipes in C. Cambridge: Cambridge University Press. +} +\seealso{ +\code{\link{combine.levels}}, +\code{\link{varclus}}, \code{\link{dotchart3}}, \code{\link{impute}}, +\code{\link{chisq.test}}, \code{\link{cut2}}. +} +\examples{ +x <- c(-2, -1, 0, 1, 2) +y <- c(4, 1, 0, 1, 4) +z <- c(1, 2, 3, 4, NA) +v <- c(1, 2, 3, 4, 5) + +spearman2(x, y) +plot(spearman2(z ~ x + y + v, p=2)) + +f <- chiSquare(z ~ x + y + v) +f +} +\keyword{nonparametric} +\keyword{htest} +\keyword{category} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/binconf.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/binconf.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,89 @@ +\name{binconf} +\alias{binconf} +\title{ +Confidence Intervals for Binomial Probabilities +} +\description{ +Produces 1-alpha confidence intervals for binomial probabilities. +} +\usage{ +binconf(x, n, alpha=0.05, + method=c("wilson","exact","asymptotic","all"), + include.x=FALSE, include.n=FALSE, return.df=FALSE) +} +\arguments{ +\item{x}{ +vector containing the number of "successes" for binomial variates +} +\item{n}{ +vector containing the numbers of corresponding observations +} +\item{alpha}{ +probability of a type I error, so confidence coefficient = 1-alpha +} +\item{method}{ +character string specifing which method to use. The "all" method only +works when +x and n are length 1. The "exact" method uses the F distribution +to compute exact (based on the binomial cdf) intervals; the +"wilson" interval is score-test-based; and the "asymptotic" is the +text-book, asymptotic normal interval. Following Agresti and +Coull, the Wilson interval is to be preferred and so is the +default. +} +\item{include.x}{ +logical flag to indicate whether \code{x} should be included in the +returned matrix or data frame +} +\item{include.n}{ +logical flag to indicate whether \code{n} should be included in the +returned matrix or data frame +} +\item{return.df}{ +logical flag to indicate that a data frame rather than a matrix be +returned +}} +\value{ +a matrix or data.frame containing the computed intervals and, +optionally, \code{x} and \code{n}. +} +\author{ +Rollin Brant, Modified by Frank Harrell and +\cr +Brad Biggerstaff +\cr +Centers for Disease Control and Prevention +\cr +National Center for Infectious Diseases +\cr +Division of Vector-Borne Infectious Diseases +\cr +P.O. Box 2087, Fort Collins, CO, 80522-2087, USA +\cr +\email{bkb5@cdc.gov} +} +\references{ +A. Agresti and B.A. Coull, Approximate is better than "exact" for +interval estimation of binomial proportions, +\emph{American Statistician,} +\bold{52}:119--126, 1998. + + +R.G. Newcombe, Logit confidence intervals and the inverse sinh +transformation, +\emph{American Statistician,} +\bold{55}:200--202, 2001. + + +L.D. Brown, T.T. Cai and A. DasGupta, Interval estimation for +a binomial proportion (with discussion), +\emph{Statistical Science,} +\bold{16}:101--133, 2001. +} +\examples{ +binconf(0:10,10,include.x=TRUE,include.n=TRUE) +binconf(46,50,method="all") +} +\keyword{category} +\keyword{htest} +% Converted by Sd2Rd version 1.21. |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/bootkm.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/bootkm.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,80 @@ +\name{bootkm} +\alias{bootkm} +\title{ + Bootstrap Kaplan-Meier Estimates +} +\description{ + Bootstraps Kaplan-Meier estimate of the probability of survival to at + least a fixed time (\code{times} variable) or the estimate of the \code{q} + quantile of the survival distribution (e.g., median survival time, the + default). +} +\usage{ +bootkm(S, q=0.5, B=500, times, pr=TRUE) +} +\arguments{ + \item{S}{ + a \code{Surv} object for possibly right-censored survival time + } + \item{q}{ + quantile of survival time, default is 0.5 for median + } + \item{B}{ + number of bootstrap repetitions (default=500) + } + \item{times}{ + time vector (currently only a scalar is allowed) at which to compute + survival estimates. You may specify only one of \code{q} and + \code{times}, and if \code{times} is specified \code{q} is ignored. + } + \item{pr}{ + set to \code{FALSE} to suppress printing the iteration number every + 10 iterations + } +} +\value{ + a vector containing \code{B} bootstrap estimates +} +\section{Side Effects}{ + updates \code{.Random.seed}, and, if \code{pr=TRUE}, prints progress + of simulations +} +\details{ + \code{bootkm} uses Therneau's \code{survfitKM} function to efficiently + compute Kaplan-Meier estimates. +} +\author{ + Frank Harrell + \cr + Department of Biostatistics + \cr + Vanderbilt University School of Medicine + \cr + \email{f.harrell@vanderbilt.edu} +} +\references{ + Akritas MG (1986): Bootstrapping the Kaplan-Meier estimator. JASA + 81:1032--1038. +} +\seealso{ + \code{\link[survival]{survfit}}, \code{\link[survival]{Surv}}, + \code{\link[rms]{Survival.cph}}, \code{\link[rms]{Quantile.cph}} +} +\examples{ +# Compute 0.95 nonparametric confidence interval for the difference in +# median survival time between females and males (two-sample problem) +set.seed(1) +library(survival) +S <- Surv(runif(200)) # no censoring +sex <- c(rep('female',100),rep('male',100)) +med.female <- bootkm(S[sex=='female',], B=100) # normally B=500 +med.male <- bootkm(S[sex=='male',], B=100) +describe(med.female-med.male) +quantile(med.female-med.male, c(.025,.975), na.rm=TRUE) +# na.rm needed because some bootstrap estimates of median survival +# time may be missing when a bootstrap sample did not include the +# longer survival times +} +\keyword{survival} +\keyword{nonparametric} +\concept{bootstrap} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/bpower.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/bpower.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,163 @@ +\name{bpower} +\alias{bpower} +\alias{bsamsize} +\alias{ballocation} +\alias{bpower.sim} +\title{ +Power and Sample Size for Two-Sample Binomial Test +} +\description{ +Uses method of Fleiss, Tytun, and Ury (but without the continuity +correction) to estimate the power (or the sample size to achieve a given +power) of a two-sided test for the difference in two proportions. The two +sample sizes are allowed to be unequal, but for \code{bsamsize} you must specify +the fraction of observations in group 1. For power calculations, one +probability (\code{p1}) must be given, and either the other probability (\code{p2}), +an \code{odds.ratio}, or a \code{percent.reduction} must be given. For \code{bpower} or +\code{bsamsize}, any or all of the arguments may be vectors, in which case they +return a vector of powers or sample sizes. All vector arguments must have +the same length. + + +Given \code{p1, p2}, \code{ballocation} uses the method of Brittain and Schlesselman +to compute the optimal fraction of observations to be placed in group 1 +that either (1) minimize the variance of the difference in two proportions, +(2) minimize the variance of the ratio of the two proportions, +(3) minimize the variance of the log odds ratio, or +(4) maximize the power of the 2-tailed test for differences. For (4) +the total sample size must be given, or the fraction optimizing +the power is not returned. The fraction for (3) is one minus the fraction +for (1). + + +\code{bpower.sim} estimates power by simulations, in minimal time. By using +\code{bpower.sim} you can see that the formulas without any continuity correction +are quite accurate, and that the power of a continuity-corrected test +is significantly lower. That's why no continuity corrections are implemented +here. +} +\usage{ +bpower(p1, p2, odds.ratio, percent.reduction, + n, n1, n2, alpha=0.05) + + +bsamsize(p1, p2, fraction=.5, alpha=.05, power=.8) + + +ballocation(p1, p2, n, alpha=.05) + + +bpower.sim(p1, p2, odds.ratio, percent.reduction, + n, n1, n2, + alpha=0.05, nsim=10000) +} +\arguments{ +\item{p1}{ +population probability in the group 1 +} +\item{p2}{ +probability for group 2 +} +\item{odds.ratio}{ +} +\item{percent.reduction}{ +} +\item{n}{ +total sample size over the two groups. If you omit this for +\code{ballocation}, the \code{fraction} which optimizes power will not be +returned. +} +\item{n1}{ +} +\item{n2}{ +the individual group sample sizes. For \code{bpower}, if \code{n} is given, +\code{n1} and \code{n2} are set to \code{n/2}. +} +\item{alpha}{ +type I error +} +\item{fraction}{ +fraction of observations in group 1 +} +\item{power}{ +the desired probability of detecting a difference +} +\item{nsim}{ +number of simulations of binomial responses +}} +\value{ +for \code{bpower}, the power estimate; for \code{bsamsize}, a vector containing +the sample sizes in the two groups; for \code{ballocation}, a vector with +4 fractions of observations allocated to group 1, optimizing the four +criteria mentioned above. For \code{bpower.sim}, a vector with three +elements is returned, corresponding to the simulated power and its +lower and upper 0.95 confidence limits. +} +\details{ +For \code{bpower.sim}, all arguments must be of length one. +} +\section{AUTHOR}{ +Frank Harrell + + +Department of Biostatistics + + +Vanderbilt University + + +\email{f.harrell@vanderbilt.edu} +} +\references{ +Fleiss JL, Tytun A, Ury HK (1980): A simple approximation for calculating +sample sizes for comparing independent proportions. Biometrics 36:343--6. + + +Brittain E, Schlesselman JJ (1982): Optimal allocation for the comparison +of proportions. Biometrics 38:1003--9. + + +Gordon I, Watson R (1996): The myth of continuity-corrected sample size +formulae. Biometrics 52:71--6. +} +\seealso{ +\code{\link{samplesize.bin}}, \code{\link{chisq.test}}, \code{\link{binconf}} +} +\examples{ +bpower(.1, odds.ratio=.9, n=1000, alpha=c(.01,.05)) +bpower.sim(.1, odds.ratio=.9, n=1000) +bsamsize(.1, .05, power=.95) +ballocation(.1, .5, n=100) + + +# Plot power vs. n for various odds ratios (base prob.=.1) +n <- seq(10, 1000, by=10) +OR <- seq(.2,.9,by=.1) +plot(0, 0, xlim=range(n), ylim=c(0,1), xlab="n", ylab="Power", type="n") +for(or in OR) { + lines(n, bpower(.1, odds.ratio=or, n=n)) + text(350, bpower(.1, odds.ratio=or, n=350)-.02, format(or)) +} + + +# Another way to plot the same curves, but letting labcurve do the +# work, including labeling each curve at points of maximum separation +pow <- lapply(OR, function(or,n)list(x=n,y=bpower(p1=.1,odds.ratio=or,n=n)), + n=n) +names(pow) <- format(OR) +labcurve(pow, pl=TRUE, xlab='n', ylab='Power') + + +# Contour graph for various probabilities of outcome in the control +# group, fixing the odds ratio at .8 ([p2/(1-p2) / p1/(1-p1)] = .8) +# n is varied also +p1 <- seq(.01,.99,by=.01) +n <- seq(100,5000,by=250) +pow <- outer(p1, n, function(p1,n) bpower(p1, n=n, odds.ratio=.8)) +# This forms a length(p1)*length(n) matrix of power estimates +contour(p1, n, pow) +} +\keyword{htest} +\keyword{category} +\concept{power} +\concept{study design} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/bpplot.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/bpplot.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,82 @@ +\name{bpplot} +\alias{bpplot} +\title{ +Box-percentile plots +} +\description{ +Producess side-by-side box-percentile plots from several vectors or a +list of vectors. +} +\usage{ +bpplot(\dots, name=TRUE, main="Box-Percentile Plot", + xlab="", ylab="", srtx=0) +} +\arguments{ +\item{...}{ +vectors or lists containing +numeric components (e.g., the output of \code{split}). +} +\item{name}{ +character vector of names for the groups. +Default is \code{TRUE} to put names on the x-axis. Such names are taken from the +data vectors or the \code{names} attribute of the first argument if it is a list. +Set \code{name} to \code{FALSE} to suppress names. +If a character vector is supplied the names in the vector are +used to label the groups. +} +\item{main}{ +main title for the plot. +} +\item{xlab}{ +x axis label. +} +\item{ylab}{ +y axis label. +} +\item{srtx}{ +rotation angle for x-axis labels. Default is zero. +}} +\value{ +There are no returned values +} +\section{Side Effects}{ +A plot is created on the current graphics device. +} +\section{BACKGROUND}{ +Box-percentile plots are similiar to boxplots, except box-percentile plots +supply more information about the univariate distributions. At any height +the width of the irregular "box" is proportional to the percentile of that +height, up to the 50th percentile, and above the 50th percentile the width +is proportional to 100 minus the percentile. Thus, the width at any given +height is proportional to the percent of observations that are more +extreme in that direction. As in boxplots, the median, 25th and 75th +percentiles are marked with line segments across the box. +} +\author{ +Jeffrey Banfield +\cr +\email{umsfjban@bill.oscs.montana.edu} +\cr +Modified by F. Harrell 30Jun97 +} +\references{ +Esty WW, Banfield J: The box-percentile plot. J Statistical +Software 8 No. 17, 2003. +} +\seealso{ +\code{\link{panel.bpplot}}, \code{\link{boxplot}}, \code{\link{Ecdf}}, +\code{\link[lattice:xyplot]{bwplot}} +} +\examples{ +set.seed(1) +x1 <- rnorm(500) +x2 <- runif(500, -2, 2) +x3 <- abs(rnorm(500))-2 +bpplot(x1, x2, x3) +g <- sample(1:2, 500, replace=TRUE) +bpplot(split(x2, g), name=c('Group 1','Group 2')) +rm(x1,x2,x3,g) +} +\keyword{nonparametric} +\keyword{hplot} +% Converted by Sd2Rd version 1.21. |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/bystats.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/bystats.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,146 @@ +\name{bystats} +\alias{bystats} +\alias{print.bystats} +\alias{latex.bystats} +\alias{bystats2} +\alias{print.bystats2} +\alias{latex.bystats2} +\title{ +Statistics by Categories +} +\description{ + + For any number of cross-classification variables, \code{bystats} + returns a matrix with the sample size, number missing \code{y}, and + \code{fun(non-missing y)}, with the cross-classifications designated + by rows. Uses Harrell's modification of the \code{interaction} + function to produce cross-classifications. The default \code{fun} is + \code{mean}, and if \code{y} is binary, the mean is labeled as + \code{Fraction}. There is a \code{print} method as well as a + \code{latex} method for objects created by \code{bystats}. + \code{bystats2} handles the special case in which there are 2 + classifcation variables, and places the first one in rows and the + second in columns. The \code{print} method for \code{bystats2} uses + the \code{print.char.matrix} function to organize statistics + for cells into boxes. } + +\usage{ +bystats(y, \dots, fun, nmiss, subset) +\method{print}{bystats}(x, \dots) +\method{latex}{bystats}(object, title, caption, rowlabel, \dots) +bystats2(y, v, h, fun, nmiss, subset) +\method{print}{bystats2}(x, abbreviate.dimnames=FALSE, + prefix.width=max(nchar(dimnames(x)[[1]])), \dots) +\method{latex}{bystats2}(object, title, caption, rowlabel, \dots) +} +\arguments{ +\item{y}{ +a binary, logical, or continuous variable or a matrix or data frame of +such variables. If \code{y} is a data frame it is converted to a matrix. +If \code{y} is a data frame or matrix, computations are done on subsets of +the rows of \code{y}, and you should specify \code{fun} so as to be able to operate +on the matrix. For matrix \code{y}, any column with a missing value causes +the entire row to be considered missing, and the row is not passed to +\code{fun}. +} +\item{...}{ +For \code{bystats}, one or more classifcation variables separated by commas. +For \code{print.bystats}, options passed to \code{print.default} such as \code{digits}. +For \code{latex.bystats}, and \code{latex.bystats2}, +options passed to \code{latex.default} such as \code{digits}. +If you pass \code{cdec} to \code{latex.default}, keep in mind that the first one or +two positions (depending on \code{nmiss}) should have zeros since these +correspond with frequency counts. +} +\item{v}{ +vertical variable for \code{bystats2}. Will be converted to \code{factor}. +} +\item{h}{ +horizontal variable for \code{bystats2}. Will be converted to \code{factor}. +} +\item{fun}{ +a function to compute on the non-missing \code{y} for a given subset. +You must specify \code{fun=} in front of the function name or definition. +\code{fun} may return a single number or a vector or matrix of any length. +Matrix results are rolled out into a vector, with names preserved. +When \code{y} is a matrix, a common \code{fun} is \code{function(y) apply(y, 2, ff)} +where \code{ff} is the name of a function which operates on one column of +\code{y}. +} +\item{nmiss}{ +A column containing a count of missing values is included if \code{nmiss=TRUE} +or if there is at least one missing value. +} +\item{subset}{ +a vector of subscripts or logical values indicating the subset of +data to analyze +} +\item{abbreviate.dimnames}{set to \code{TRUE} to abbreviate + \code{dimnames} in output} +\item{prefix.width}{see \code{\link{print.char.matrix}}} +\item{title}{ +\code{title} to pass to \code{latex.default}. Default is the first word of +the character string version of the first calling argument. +} +\item{caption}{ +caption to pass to \code{latex.default}. Default is the \code{heading} +attribute from the object produced by \code{bystats}. +} +\item{rowlabel}{ +\code{rowlabel} to pass to \code{latex.default}. Default is the \code{byvarnames} +attribute from the object produced by \code{bystats}. For \code{bystats2} the +default is \code{""}. +} +\item{x}{an object created by \code{bystats} or \code{bystats2}} +\item{object}{an object created by \code{bystats} or \code{bystats2}} +} +\value{ +for \code{bystats}, a matrix with row names equal to the classification labels and column +names \code{N, Missing, funlab}, where \code{funlab} is determined from \code{fun}. +A row is added to the end with the summary statistics computed +on all observations combined. The class of this matrix is \code{bystats}. +For \code{bystats}, returns a 3-dimensional array with the last dimension +corresponding to statistics being computed. The class of the array is +\code{bystats2}. +} +\section{Side Effects}{ +\code{latex} produces a \code{.tex} file. +} +\author{ +Frank Harrell +\cr +Department of Biostatistics +\cr +Vanderbilt University +\cr +\email{f.harrell@vanderbilt.edu} +} +\seealso{ +\code{\link{interaction}}, \code{\link{cut}}, \code{\link{cut2}}, \code{\link{latex}}, \code{\link{print.char.matrix}}, +\code{\link{translate}} +} +\examples{ +\dontrun{ +bystats(sex==2, county, city) +bystats(death, race) +bystats(death, cut2(age,g=5), race) +bystats(cholesterol, cut2(age,g=4), sex, fun=median) +bystats(cholesterol, sex, fun=quantile) +bystats(cholesterol, sex, fun=function(x)c(Mean=mean(x),Median=median(x))) +latex(bystats(death,race,nmiss=FALSE,subset=sex=="female"), digits=2) +f <- function(y) c(Hazard=sum(y[,2])/sum(y[,1])) +# f() gets the hazard estimate for right-censored data from exponential dist. +bystats(cbind(d.time, death), race, sex, fun=f) +bystats(cbind(pressure, cholesterol), age.decile, + fun=function(y) c(Median.pressure =median(y[,1]), + Median.cholesterol=median(y[,2]))) +y <- cbind(pressure, cholesterol) +bystats(y, age.decile, + fun=function(y) apply(y, 2, median)) # same result as last one +bystats(y, age.decile, fun=function(y) apply(y, 2, quantile, c(.25,.75))) +# The last one computes separately the 0.25 and 0.75 quantiles of 2 vars. +latex(bystats2(death, race, sex, fun=table)) +} +} +\keyword{category} +\concept{grouping} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/capitalize.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/capitalize.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,25 @@ +\name{capitalize} +\alias{capitalize} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{ capitalize the first letter of a string} +\description{ +Capitalizes the first letter of each element of the string vector. +} +\usage{ +capitalize(string) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{string}{ String to be capitalized } +} +\value{ + Returns a vector of charaters with the first letter capitalized +} +\author{ Charles Dupont } +\examples{ +capitalize(c("Hello", "bob", "daN")) +} +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{ manip } +\keyword{ character }% __ONLY ONE__ keyword per line |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/ciapower.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/ciapower.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,97 @@ +\name{ciapower} +\alias{ciapower} +\title{ +Power of Interaction Test for Exponential Survival +} +\description{ +Uses the method of Peterson and George to compute the power of an +interaction test in a 2 x 2 setup in which all 4 distributions are +exponential. This will be the same as the power of the Cox model +test if assumptions hold. The test is 2-tailed. +The duration of accrual is specified +(constant accrual is assumed), as is the minimum follow-up time. +The maximum follow-up time is then \code{accrual + tmin}. Treatment +allocation is assumed to be 1:1. +} +\usage{ +ciapower(tref, n1, n2, m1c, m2c, r1, r2, accrual, tmin, + alpha=0.05, pr=TRUE) +} +\arguments{ +\item{tref}{ +time at which mortalities estimated +} +\item{n1}{ +total sample size, stratum 1 +} +\item{n2}{ +total sample size, stratum 2 +} +\item{m1c}{ +tref-year mortality, stratum 1 control +} +\item{m2c}{ +tref-year mortality, stratum 2 control +} +\item{r1}{ +\% reduction in \code{m1c} by intervention, stratum 1 +} +\item{r2}{ +\% reduction in \code{m2c} by intervention, stratum 2 +} +\item{accrual}{ +duration of accrual period +} +\item{tmin}{ +minimum follow-up time +} +\item{alpha}{ +type I error probability +} +\item{pr}{ +set to \code{FALSE} to suppress printing of details +}} +\value{ +power +} +\section{Side Effects}{ +prints +} +\section{AUTHOR}{ +Frank Harrell + + +Department of Biostatistics + + +Vanderbilt University + + +\email{f.harrell@vanderbilt.edu} +} +\references{ +Peterson B, George SL: Controlled Clinical Trials 14:511--522; 1993. +} +\seealso{ +\code{\link{cpower}}, \code{\link{spower}} +} +\examples{ +# Find the power of a race x treatment test. 25\% of patients will +# be non-white and the total sample size is 14000. +# Accrual is for 1.5 years and minimum follow-up is 5y. +# Reduction in 5-year mortality is 15\% for whites, 0\% or -5\% for +# non-whites. 5-year mortality for control subjects if assumed to +# be 0.18 for whites, 0.23 for non-whites. +n <- 14000 +for(nonwhite.reduction in c(0,-5)) { + cat("\n\n\n\% Reduction in 5-year mortality for non-whites:", + nonwhite.reduction, "\n\n") + pow <- ciapower(5, .75*n, .25*n, .18, .23, 15, nonwhite.reduction, + 1.5, 5) + cat("\n\nPower:",format(pow),"\n") +} +} +\keyword{survival} +\keyword{htest} +\concept{power} +\concept{study design} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/cnvrt.coords.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/cnvrt.coords.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,142 @@ +\name{cnvrt.coords} +\alias{cnvrt.coords} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{Convert between the 5 different coordinate sytems on a graphical device} +\description{ + Takes a set of coordinates in any of the 5 coordinate systems (usr, + plt, fig, dev, or tdev) and returns the same points in all 5 + coordinate systems. +} +\usage{ +cnvrt.coords(x, y = NULL, input = c("usr", "plt", "fig", "dev","tdev")) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{x}{Vector, Matrix, or list of x coordinates (or x and y + coordinates), NA's allowed. } + \item{y}{y coordinates (if \code{x} is a vector), NA's allowed. } + \item{input}{Character scalar indicating the coordinate system of the + input points. } +} +\details{ + Every plot has 5 coordinate systems: + + usr (User): the coordinate system of the data, this is shown by the + tick marks and axis labels. + + plt (Plot): Plot area, coordinates range from 0 to 1 with 0 + corresponding to the x and y axes and 1 corresponding to the top and + right of the plot area. Margins of the plot correspond to plot + coordinates less than 0 or greater than 1. + + fig (Figure): Figure area, coordinates range from 0 to 1 with 0 + corresponding to the bottom and left edges of the figure (including + margins, label areas) and 1 corresponds to the top and right edges. + fig and dev coordinates will be identical if there is only 1 figure + area on the device (layout, mfrow, or mfcol has not been used). + + dev (Device): Device area, coordinates range from 0 to 1 with 0 + corresponding to the bottom and left of the device region within the + outer margins and 1 is the top and right of the region withing the + outer margins. If the outer margins are all set to 0 then tdev and + dev should be identical. + + tdev (Total Device): Total Device area, coordinates range from 0 to 1 with 0 + corresponding to the bottom and left edges of the device (piece of + paper, window on screen) and 1 corresponds to the top and right edges. +} +\value{ + A list with 5 components, each component is a list with vectors named + x and y. The 5 sublists are: + \item{usr}{The coordinates of the input points in usr (User) coordinates.} + \item{plt}{The coordinates of the input points in plt (Plot) + coordinates.} + \item{fig}{The coordinates of the input points in fig (Figure) + coordinates.} + \item{dev}{The coordinates of the input points in dev (Device) + coordinates.} + \item{tdev}{The coordinates of the input points in tdev (Total Device) + coordinates. + } +} +%\references{ ~put references to the literature/web site here ~ } +\author{Greg Snow \email{greg.snow@imail.org}} +\note{ You must provide both x and y, but one of them may be \code{NA}. + +This function is becoming depricated with the new functions +\code{grconvertX} and \code{grconvertY} in R version 2.7.0 and beyond. +These new functions use the correct coordinate system names and have +more coordinate systems available, you should start using them instead. +} + +% ~Make other sections like Warning with \section{Warning }{....} ~ + +\seealso{ \code{\link{par}} specifically 'usr','plt', and 'fig'. Also + 'xpd' for plotting outside of the plotting region and 'mfrow' and + 'mfcol' for multi figure plotting. \code{\link{subplot}}, + \code{grconvertX} and \code{grconvertY} in R2.7.0 and later} +\examples{ + +old.par <- par(no.readonly=TRUE) + +par(mfrow=c(2,2),xpd=NA) + +# generate some sample data +tmp.x <- rnorm(25, 10, 2) +tmp.y <- rnorm(25, 50, 10) +tmp.z <- rnorm(25, 0, 1) + +plot( tmp.x, tmp.y) + +# draw a diagonal line across the plot area +tmp1 <- cnvrt.coords( c(0,1), c(0,1), input='plt' ) +lines(tmp1$usr, col='blue') + +# draw a diagonal line accross figure region +tmp2 <- cnvrt.coords( c(0,1), c(1,0), input='fig') +lines(tmp2$usr, col='red') + +# save coordinate of point 1 and y value near top of plot for future plots +tmp.point1 <- cnvrt.coords(tmp.x[1], tmp.y[1]) +tmp.range1 <- cnvrt.coords(NA, 0.98, input='plt') + +# make a second plot and draw a line linking point 1 in each plot +plot(tmp.y, tmp.z) + +tmp.point2 <- cnvrt.coords( tmp.point1$dev, input='dev' ) +arrows( tmp.y[1], tmp.z[1], tmp.point2$usr$x, tmp.point2$usr$y, + col='green') + +# draw another plot and add rectangle showing same range in 2 plots + +plot(tmp.x, tmp.z) +tmp.range2 <- cnvrt.coords(NA, 0.02, input='plt') +tmp.range3 <- cnvrt.coords(NA, tmp.range1$dev$y, input='dev') +rect( 9, tmp.range2$usr$y, 11, tmp.range3$usr$y, border='yellow') + +# put a label just to the right of the plot and +# near the top of the figure region. +text( cnvrt.coords(1.05, NA, input='plt')$usr$x, + cnvrt.coords(NA, 0.75, input='fig')$usr$y, + "Label", adj=0) + +par(mfrow=c(1,1)) + +## create a subplot within another plot (see also subplot) + +plot(1:10, 1:10) + +tmp <- cnvrt.coords( c( 1, 4, 6, 9), c(6, 9, 1, 4) ) + +par(plt = c(tmp$dev$x[1:2], tmp$dev$y[1:2]), new=TRUE) +hist(rnorm(100)) + +par(fig = c(tmp$dev$x[3:4], tmp$dev$y[3:4]), new=TRUE) +hist(rnorm(100)) + +par(old.par) + +} + +\keyword{ dplot }% at least one, from doc/KEYWORDS +\keyword{ aplot }% __ONLY ONE__ keyword per line |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/consolidate.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/consolidate.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,47 @@ +\name{consolidate} +\alias{consolidate} +\alias{consolidate<-} +\alias{consolidate.default} +\title{ Element Merging } +\description{ + Merges an object by the names of its elements. Inserting elements in + \code{value} into \code{x} that do not exists in \code{x} and + replacing elements in \code{x} that exists in \code{value} with + \code{value} elements if \code{protect} is false. +} +\usage{ +consolidate(x, value, protect, \dots) +\method{consolidate}{default}(x, value, protect=FALSE, \dots) + +consolidate(x, protect, \dots) <- value +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{x}{ named list or vector } + \item{value}{ named list or vector } + \item{protect}{ + logical; should elements in \code{x} be kept instead + of elements in \code{value}? + } + \item{\dots}{ currently does nothing; included if ever want to make generic. } +} +\author{ Charles Dupont } +\seealso{ \code{\link{names}} } +\examples{ +x <- 1:5 +names(x) <- LETTERS[x] + +y <- 6:10 +names(y) <- LETTERS[y-2] + +x # c(A=1,B=2,C=3,D=4,E=5) +y # c(D=6,E=7,F=8,G=9,H=10) + +consolidate(x, y) # c(A=1,B=2,C=3,D=6,E=7,F=8,G=9,H=10) +consolidate(x, y, protect=TRUE) # c(A=1,B=2,C=3,D=4,E=5,F=8,G=9,H=10) + +} +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{ utilities } + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/contents.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/contents.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,145 @@ +\name{contents} +\alias{contents} +\alias{contents.data.frame} +\alias{print.contents.data.frame} +\alias{html.contents.data.frame} +\alias{contents.list} +\alias{print.contents.list} +\title{ + Metadata for a Data Frame +} +\description{ + \code{contents} is a generic method for which \code{contents.data.frame} + is currently the only method. \code{contents.data.frame} creates an + object containing the following attributes of the variables + from a data frame: names, labels (if any), units (if any), number of + factor levels (if any), factor levels, + class, storage mode, and number of NAs. \code{print.contents.data.frame} + will print the results, with options for sorting the variables. + \code{html.contents.data.frame} creates HTML code for displaying the + results. This code has hyperlinks so that if the user clicks on the + number of levels the browser jumps to the correct part of a table of + factor levels for all the \code{factor} variables. If long labels are + present (\code{"longlabel"} attributes on variables), these are printed + at the bottom and the \code{html} method links to them through the + regular labels. Variables having the same \code{levels} in the same + order have the levels factored out for brevity. + + \code{contents.list} prints a directory of datasets when + \code{\link{sasxport.get}} imported more than one SAS dataset. +} +\usage{ +contents(object, \dots) +\method{contents}{data.frame}(object, sortlevels=FALSE, id=NULL, + range=NULL, values=NULL, \dots) +\method{print}{contents.data.frame}(x, + sort=c('none','names','labels','NAs'), prlevels=TRUE, maxlevels=Inf, + number=FALSE, \dots) +\method{html}{contents.data.frame}(object, + sort=c('none','names','labels','NAs'), prlevels=TRUE, maxlevels=Inf, + file=paste('contents',object$dfname,'html',sep='.'), + levelType=c('list','table'), + append=FALSE, number=FALSE, nshow=TRUE, \dots) +\method{contents}{list}(object, dslabels, \dots) +\method{print}{contents.list}(x, + sort=c('none','names','labels','NAs','vars'), \dots) +} +\arguments{ + \item{object}{ + a data frame. For \code{html} is an object created by + \code{contents}. For \code{contents.list} is a list of data frames. + } + \item{sortlevels}{set to \code{TRUE} to sort levels of all factor + variables into alphabetic order. This is especially useful when two + variables use the same levels but in different orders. They will + still be recognized by the \code{html} method as having identical + levels if sorted.} + \item{id}{an optional subject ID variable name that if present in + \code{object} will cause the number of unique IDs to be printed in + the contents header} + \item{range}{an optional variable name that if present in \code{object} + will cause its range to be printed in the contents header} + \item{values}{an optional variable name that if present in + \code{object} will cause its unique values to be printed in the + contents header} + \item{x}{ + an object created by \code{contents} + } + \item{sort}{ + Default is to print the variables in their original order in the + data frame. Specify one of + \code{"names"}, \code{"labels"}, or \code{"NAs"} to sort the variables by, + respectively, alphabetically by names, alphabetically by labels, or by + increaseing order of number of missing values. For + \code{contents.list}, \code{sort} may also be the value + \code{"vars"} to cause sorting by the number of variables in the dataset. + } + \item{prlevels}{ + set to \code{FALSE} to not print all levels of \code{factor} variables + } + \item{maxlevels}{maximum number of levels to print for a \code{factor} variable} + \item{number}{ + set to \code{TRUE} to have the \code{print} and \code{latex} methods + number the variables by their order in the data frame + } + \item{nshow}{set to \code{FALSE} to suppress outputting number of + observations and number of \code{NA}s; useful when these counts + would unblind information to blinded reviewers} + \item{file}{ + file to which to write the html code. Default is + \code{"conents.dfname.html"} where \code{dfname} is the name of the data + frame processed by \code{contents}. + } + \item{levelType}{ + By default, bullet lists of category levels are + constructed in html. Set \code{levelType='table'} to put levels in + html table format. + } + \item{append}{ + set to \code{TRUE} to add html code to an existing file + } + \item{\dots}{ + arguments passed from \code{html} to \code{format.df}, + unused otherwise + } + \item{dslabels}{ + named vector of SAS dataset labels, created for + example by \code{\link{sasdsLabels}} + } +} +\value{ + an object of class \code{"contents.data.frame"} or + \code{"contents.list"} +} + +\author{ + Frank Harrell + \cr + Vanderbilt University + \cr + \email{f.harrell@vanderbilt.edu} +} +\seealso{ + \code{\link{describe}}, \code{\link{html}}, \code{\link{upData}} +} +\examples{ +set.seed(1) +dfr <- data.frame(x=rnorm(400),y=sample(c('male','female'),400,TRUE)) +contents(dfr) +dfr <- upData(dfr, labels=c(x='Label for x', y='Label for y')) +attr(dfr$x, 'longlabel') <- + 'A very long label for x that can continue onto multiple long lines of text' + +k <- contents(dfr) +print(k, sort='names', prlevels=FALSE) +\dontrun{ +html(k) +html(contents(dfr)) # same result +w <- html(k, file='my.html') # create my.html, don't display +latex(k$contents) # latex.default just the main information +} +} +\keyword{data} +\keyword{interface} +\concept{html} + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/cpower.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/cpower.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,171 @@ +\name{cpower} +\alias{cpower} +\title{ +Power of Cox/log-rank Two-Sample Test +} +\description{ +Assumes exponential distributions for both treatment groups. +Uses the George-Desu method along with +formulas of Schoenfeld that allow estimation of the expected number of +events in the two groups. +To allow for drop-ins (noncompliance to control therapy, crossover to +intervention) and noncompliance of the intervention, the method of +Lachin and Foulkes is used. +} +\usage{ +cpower(tref, n, mc, r, accrual, tmin, noncomp.c=0, noncomp.i=0, + alpha=0.05, nc, ni, pr=TRUE) +} +\arguments{ +\item{tref}{ +time at which mortalities estimated +} +\item{n}{ +total sample size (both groups combined). If allocation is unequal +so that there are not \code{n/2} observations in each group, you may specify +the sample sizes in \code{nc} and \code{ni}. +} +\item{mc}{ +tref-year mortality, control +} +\item{r}{ +\% reduction in \code{mc} by intervention +} +\item{accrual}{ +duration of accrual period +} +\item{tmin}{ +minimum follow-up time +} +\item{noncomp.c}{ +\% non-compliant in control group (drop-ins) +} +\item{noncomp.i}{ +\% non-compliant in intervention group (non-adherers) +} +\item{alpha}{ +type I error probability. A 2-tailed test is assumed. +} +\item{nc}{ +number of subjects in control group +} +\item{ni}{ +number of subjects in intervention group. \code{nc} and \code{ni} are specified +exclusive of \code{n}. +} +\item{pr}{ +set to \code{FALSE} to suppress printing of details +}} +\value{ +power +} +\section{Side Effects}{ +prints +} +\details{ +For handling noncompliance, uses a modification of formula (5.4) of +Lachin and Foulkes. Their method is based on a test for the difference +in two hazard rates, whereas \code{cpower} is based on testing the difference +in two log hazards. It is assumed here that the same correction factor +can be approximately applied to the log hazard ratio as Lachin and Foulkes applied to +the hazard difference. + + +Note that Schoenfeld approximates the variance +of the log hazard ratio by \code{4/m}, where \code{m} is the total number of events, +whereas the George-Desu method uses the slightly better \code{1/m1 + 1/m2}. +Power from this function will thus differ slightly from that obtained with +the SAS \code{samsizc} program. +} +\author{ +Frank Harrell +\cr +Department of Biostatistics +\cr +Vanderbilt University +\cr +\email{f.harrell@vanderbilt.edu} +} +\references{ +Peterson B, George SL: Controlled Clinical Trials 14:511--522; 1993. + + +Lachin JM, Foulkes MA: Biometrics 42:507--519; 1986. + + +Schoenfeld D: Biometrics 39:499--503; 1983. +} +\seealso{ +\code{\link{spower}}, \code{\link{ciapower}}, \code{\link{bpower}} +} +\examples{ +#In this example, 4 plots are drawn on one page, one plot for each +#combination of noncompliance percentage. Within a plot, the +#5-year mortality \% in the control group is on the x-axis, and +#separate curves are drawn for several \% reductions in mortality +#with the intervention. The accrual period is 1.5y, with all +#patients followed at least 5y and some 6.5y. + + +par(mfrow=c(2,2),oma=c(3,0,3,0)) + + +morts <- seq(10,25,length=50) +red <- c(10,15,20,25) + + +for(noncomp in c(0,10,15,-1)) { + if(noncomp>=0) nc.i <- nc.c <- noncomp else {nc.i <- 25; nc.c <- 15} + z <- paste("Drop-in ",nc.c,"\%, Non-adherence ",nc.i,"\%",sep="") + plot(0,0,xlim=range(morts),ylim=c(0,1), + xlab="5-year Mortality in Control Patients (\%)", + ylab="Power",type="n") + title(z) + cat(z,"\n") + lty <- 0 + for(r in red) { + lty <- lty+1 + power <- morts + i <- 0 + for(m in morts) { + i <- i+1 + power[i] <- cpower(5, 14000, m/100, r, 1.5, 5, nc.c, nc.i, pr=FALSE) + } + lines(morts, power, lty=lty) + } + if(noncomp==0)legend(18,.55,rev(paste(red,"\% reduction",sep="")), + lty=4:1,bty="n") +} +mtitle("Power vs Non-Adherence for Main Comparison", + ll="alpha=.05, 2-tailed, Total N=14000",cex.l=.8) +# +# Point sample size requirement vs. mortality reduction +# Root finder (uniroot()) assumes needed sample size is between +# 1000 and 40000 +# +nc.i <- 25; nc.c <- 15; mort <- .18 +red <- seq(10,25,by=.25) +samsiz <- red + + +i <- 0 +for(r in red) { + i <- i+1 + samsiz[i] <- uniroot(function(x) cpower(5, x, mort, r, 1.5, 5, + nc.c, nc.i, pr=FALSE) - .8, + c(1000,40000))$root +} + + +samsiz <- samsiz/1000 +par(mfrow=c(1,1)) +plot(red, samsiz, xlab='\% Reduction in 5-Year Mortality', + ylab='Total Sample Size (Thousands)', type='n') +lines(red, samsiz, lwd=2) +title('Sample Size for Power=0.80\nDrop-in 15\%, Non-adherence 25\%') +title(sub='alpha=0.05, 2-tailed', adj=0) +} +\keyword{htest} +\keyword{survival} +\concept{power} +\concept{study design} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/csv.get.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/csv.get.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,95 @@ +\name{csv.get} +\alias{csv.get} +\title{Read Comma-Separated Text Data Files} +\description{ + Read comma-separated text data files, allowing optional translation + to lower case for variable names after making them valid S names. + There is a facility for reading long variable labels as one of the + rows. If labels are not specified and a final variable name is not + the same as that in the header, the original variable name is saved as + a variable label. Uses \code{read.csv} if the \code{data.table} + package is not in effect, otherwise calls \code{fread}. +} +\usage{ +csv.get(file, lowernames=FALSE, datevars=NULL, datetimevars=NULL, + dateformat='\%F', + fixdates=c('none','year'), comment.char="", autodates=TRUE, + allow=NULL, charfactor=FALSE, + sep=',', skip=0, vnames=NULL, labels=NULL, \dots) +} +\arguments{ + \item{file}{the file name for import.} + \item{lowernames}{set this to \code{TRUE} to change variable names to + lower case.} + \item{datevars}{character vector of names (after \code{lowernames} is + applied) of variables to consider as a factor or character vector + containing dates in a format matching \code{dateformat}. The + default is \code{"\%F"} which uses the yyyy-mm-dd format.} + \item{datetimevars}{character vector of names (after \code{lowernames} + is applied) of variables to consider to be date-time variables, with + date formats as described under \code{datevars} followed by a space + followed by time in hh:mm:ss format. \code{chron} is used to store + such variables. If all times in the variable + are 00:00:00 the variable will be converted to an ordinary date variable.} + \item{dateformat}{for \code{cleanup.import} is the input format (see + \code{\link{strptime}})} + \item{fixdates}{for any of the variables listed in \code{datevars} + that have a \code{dateformat} that \code{cleanup.import} understands, + specifying \code{fixdates} allows corrections of certain formatting + inconsistencies before the fields are attempted to be converted to + dates (the default is to assume that the \code{dateformat} is followed + for all observation for \code{datevars}). Currently + \code{fixdates='year'} is implemented, which will cause 2-digit or + 4-digit years to be shifted to the alternate number of digits when + \code{dateform} is the default \code{"\%F"} or is \code{"\%y-\%m-\%d"}, + \code{"\%m/\%d/\%y"}, or \code{"\%m/\%d/\%Y"}. Two-digits years are + padded with \code{20} on the left. Set \code{dateformat} to the + desired format, not the exceptional format.} + \item{comment.char}{a character vector of length one containing a + single character or an empty string. Use '""' to turn off the + interpretation of comments altogether.} + \item{autodates}{Set to true to allow function to guess at which + variables are dates} + \item{allow}{a vector of characters allowed by \R that should not be + converted to periods in variable names. By default, underscores in + variable names are converted to periods as with \R before version + 1.9.} + \item{charfactor}{set to \code{TRUE} to change character variables to + factors if they have fewer than n/2 unique values. Blanks and null + strings are converted to \code{NA}s.} + \item{sep}{field separator, defaults to comma} + \item{skip}{number of records to skip before data start. Required if + \code{vnames} or \code{labels} is given.} + \item{vnames}{number of row containing variable names, default is one} + \item{labels}{number of row containing variable labels, default is no labels} + \item{\dots}{arguments to pass to \code{read.csv} other than + \code{skip} and \code{sep}.} +} +\details{ + \code{csv.get} reads comma-separated text data files, allowing optional + translation to lower case for variable names after making them valid S + names. Original possibly non-legal names are taken to be variable + labels if \code{labels} is not specified. Character or factor + variables containing dates can be converted to date variables. + \code{cleanup.import} is invoked to finish the job. +} +\value{a new data frame.} +\author{Frank Harrell, Vanderbilt University} +\seealso{ + \code{\link{sas.get}}, \code{\link{data.frame}}, + \code{\link{cleanup.import}}, \code{\link{read.csv}}, + \code{\link{strptime}}, \code{\link{POSIXct}}, \code{\link{Date}}, + \code{\link[data.table]{fread}} +} +\examples{ +\dontrun{ +dat <- csv.get('myfile.csv') + +# Read a csv file with junk in the first row, variable names in the +# second, long variable labels in the third, and junk in the 4th row +dat <- csv.get('myfile.csv', vnames=2, labels=3, skip=4) +} +} +\keyword{manip} +\keyword{IO} +\keyword{file} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/curveRep.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/curveRep.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,293 @@\n+\\name{curveRep}\n+\\alias{curveRep}\n+\\alias{print.curveRep}\n+\\alias{plot.curveRep}\n+\\alias{curveSmooth}\n+\\title{Representative Curves}\n+\\description{\\code{curveRep} finds representative curves from a\n+ relatively large collection of curves. The curves usually represent\n+ time-response profiles as in serial (longitudinal or repeated) data\n+ with possibly unequal time points and greatly varying sample sizes per\n+ subject. After excluding records containing missing \\code{x} or\n+ \\code{y}, records are first stratified into \\code{kn} groups having similar\n+ sample sizes per curve (subject). Within these strata, curves are\n+ next stratified according to the distribution of \\code{x} points per\n+ curve (typically measurement times per subject). The\n+ \\code{\\link[cluster]{clara}} clustering/partitioning function is used\n+ to do this, clustering on one, two, or three \\code{x} characteristics\n+ depending on the minimum sample size in the current interval of sample\n+ size. If the interval has a minimum number of unique \\code{values} of\n+ one, clustering is done on the single \\code{x} values. If the minimum\n+ number of unique \\code{x} values is two, clustering is done to create\n+ groups that are similar on both \\code{min(x)} and \\code{max(x)}. For\n+ groups containing no fewer than three unique \\code{x} values,\n+ clustering is done on the trio of values \\code{min(x)}, \\code{max(x)},\n+ and the longest gap between any successive \\code{x}. Then within\n+ sample size and \\code{x} distribution strata, clustering of\n+ time-response profiles is based on \\code{p} values of \\code{y} all\n+ evaluated at the same \\code{p} equally-spaced \\code{x}\'s within the\n+ stratum. An option allows per-curve data to be smoothed with\n+ \\code{\\link{lowess}} before proceeding. Outer \\code{x} values are\n+ taken as extremes of \\code{x} across all curves within the stratum.\n+ Linear interpolation within curves is used to estimate \\code{y} at the\n+ grid of \\code{x}\'s. For curves within the stratum that do not extend\n+ to the most extreme \\code{x} values in that stratum, extrapolation\n+ uses flat lines from the observed extremes in the curve unless\n+ \\code{extrap=TRUE}. The \\code{p} \\code{y} values are clustered using\n+ \\code{\\link[cluster]{clara}}.\n+\n+ \\code{print} and \\code{plot} methods show results. By specifying an\n+ auxiliary \\code{idcol} variable to \\code{plot}, other variables such\n+ as treatment may be depicted to allow the analyst to determine for\n+ example whether subjects on different treatments are assigned to\n+ different time-response profiles. To write the frequencies of a\n+ variable such as treatment in the upper left corner of each panel\n+ (instead of the grand total number of clusters in that panel), specify\n+ \\code{freq}.\n+\n+ \\code{curveSmooth} takes a set of curves and smooths them using\n+ \\code{\\link{lowess}}. If the number of unique \\code{x} points in a curve is\n+ less than \\code{p}, the smooth is evaluated at the unique \\code{x}\n+ values. Otherwise it is evaluated at an equally spaced set of\n+ \\code{x} points over the observed range. If fewer than 3 unique\n+ \\code{x} values are in a curve, those points are used and smoothing is not done.\n+}\n+\\usage{\n+curveRep(x, y, id, kn = 5, kxdist = 5, k = 5, p = 5,\n+ force1 = TRUE, metric = c("euclidean", "manhattan"),\n+ smooth=FALSE, extrap=FALSE, pr=FALSE)\n+\n+\\method{print}{curveRep}(x, \\dots)\n+\n+\\method{plot}{curveRep}(x, which=1:length(res), method=c(\'all\',\'lattice\'),\n+ m=NULL, probs=c(.5, .25, .75), nx=NULL, fill=TRUE,\n+ idcol=NULL, freq=NULL, plotfreq=FALSE,\n+ xlim=range(x), ylim=range(y),\n+ xlab=\'x\', ylab=\'y\', colorfreq=FALSE, \\dots)\n+curveSmooth(x, y, id, p=NULL, pr=TRUE)\n+\n+}\n+\\arguments{\n+ \\item{x}{a numeric vector, typically measurement times.\n+\tFor \\code{plot.curveRep} is an object created by \\code{curveRep}.}\n+ \\item'..b'r\n+ Vanderbilt University\\cr\n+ \\email{f.harrell@vanderbilt.edu}\n+}\n+\\note{The references describe other methods for deriving\n+ representative curves, but those methods were not used here. The last\n+ reference which used a cluster analysis on principal components\n+ motivated \\code{curveRep} however. The \\code{kml} package does k-means clustering of longitudinal data with imputation.}\n+\\seealso{\\code{\\link[cluster]{clara}},\\code{\\link[Hmisc]{dataRep}}}\n+\\examples{\n+\\dontrun{\n+# Simulate 200 curves with pre-curve sample sizes ranging from 1 to 10\n+# Make curves with odd-numbered IDs have an x-distribution that is random\n+# uniform [0,1] and those with even-numbered IDs have an x-dist. that is\n+# half as wide but still centered at 0.5. Shift y values higher with\n+# increasing IDs\n+set.seed(1)\n+N <- 200\n+nc <- sample(1:10, N, TRUE)\n+id <- rep(1:N, nc)\n+x <- y <- id\n+for(i in 1:N) {\n+ x[id==i] <- if(i \\%\\% 2) runif(nc[i]) else runif(nc[i], c(.25, .75))\n+ y[id==i] <- i + 10*(x[id==i] - .5) + runif(nc[i], -10, 10)\n+}\n+\n+w <- curveRep(x, y, id, kxdist=2, p=10)\n+w\n+par(ask=TRUE, mfrow=c(4,5))\n+plot(w) # show everything, profiles going across\n+par(mfrow=c(2,5))\n+plot(w,1) # show n=1 results\n+# Use a color assignment table, assigning low curves to green and\n+# high to red. Unique curve (subject) IDs are the names of the vector.\n+cols <- c(rep(\'green\', N/2), rep(\'red\', N/2))\n+names(cols) <- as.character(1:N)\n+plot(w, 3, idcol=cols)\n+par(ask=FALSE, mfrow=c(1,1))\n+\n+plot(w, 1, \'lattice\') # show n=1 results\n+plot(w, 3, \'lattice\') # show n=4-5 results\n+plot(w, 3, \'lattice\', idcol=cols) # same but different color mapping\n+plot(w, 3, \'lattice\', m=1) # show a single "representative" curve\n+# Show median, 10th, and 90th percentiles of supposedly representative curves\n+plot(w, 3, \'lattice\', m=\'quantiles\', probs=c(.5,.1,.9))\n+# Same plot but with much less grouping of x variable\n+plot(w, 3, \'lattice\', m=\'quantiles\', probs=c(.5,.1,.9), nx=2)\n+\n+# Smooth data before profiling. This allows later plotting to plot\n+# smoothed representative curves rather than raw curves (which\n+# specifying smooth=TRUE to curveRep would do, if curveSmooth was not used)\n+d <- curveSmooth(x, y, id)\n+w <- with(d, curveRep(x, y, id))\n+\n+# Example to show that curveRep can cluster profiles correctly when\n+# there is no noise. In the data there are four profiles - flat, flat\n+# at a higher mean y, linearly increasing then flat, and flat at the\n+# first height except for a sharp triangular peak\n+\n+set.seed(1)\n+x <- 0:100\n+m <- length(x)\n+profile <- matrix(NA, nrow=m, ncol=4)\n+profile[,1] <- rep(0, m)\n+profile[,2] <- rep(3, m)\n+profile[,3] <- c(0:3, rep(3, m-4))\n+profile[,4] <- c(0,1,3,1,rep(0,m-4))\n+col <- c(\'black\',\'blue\',\'green\',\'red\')\n+matplot(x, profile, type=\'l\', col=col)\n+xeval <- seq(0, 100, length.out=5)\n+s <- x %in% xeval\n+matplot(x[s], profile[s,], type=\'l\', col=col)\n+\n+id <- rep(1:100, each=m)\n+X <- Y <- id\n+cols <- character(100)\n+names(cols) <- as.character(1:100)\n+for(i in 1:100) {\n+ s <- id==i\n+ X[s] <- x\n+ j <- sample(1:4,1)\n+ Y[s] <- profile[,j]\n+ cols[i] <- col[j]\n+}\n+table(cols)\n+yl <- c(-1,4)\n+w <- curveRep(X, Y, id, kn=1, kxdist=1, k=4)\n+plot(w, 1, \'lattice\', idcol=cols, ylim=yl)\n+# Found 4 clusters but two have same profile\n+w <- curveRep(X, Y, id, kn=1, kxdist=1, k=3)\n+plot(w, 1, \'lattice\', idcol=cols, freq=cols, plotfreq=TRUE, ylim=yl)\n+# Incorrectly combined black and red because default value p=5 did\n+# not result in different profiles at x=xeval\n+w <- curveRep(X, Y, id, kn=1, kxdist=1, k=4, p=40)\n+plot(w, 1, \'lattice\', idcol=cols, ylim=yl)\n+# Found correct clusters because evaluated curves at 40 equally\n+# spaced points and could find the sharp triangular peak in profile 4\n+}\n+}\n+\\keyword{multivariate}\n+\\keyword{hplot}\n+\\concept{repeated measures}\n+\\concept{longitudinal data}\n+\\concept{serial data}\n+\\concept{representative curves}\n+\\concept{descriptive statistics}\n+\\concept{exploratory data analysis}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/cut2.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/cut2.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,75 @@ +\name{cut2} +\alias{cut2} +\title{ +Cut a Numeric Variable into Intervals +} +\description{ +Function like cut but left endpoints are inclusive and labels are of +the form \code{[lower, upper)}, except that last interval is \code{[lower,upper]}. +If cuts are given, will by default make sure that cuts include entire +range of \code{x}. +Also, if cuts are not given, will cut \code{x} into quantile groups +(\code{g} given) or groups +with a given minimum number of observations (\code{m}). Whereas cut creates a +category object, \code{cut2} creates a factor object. +} +\usage{ +cut2(x, cuts, m, g, levels.mean, digits, minmax=TRUE, oneval=TRUE, onlycuts=FALSE) +} +\arguments{ +\item{x}{ +numeric vector to classify into intervals +} +\item{cuts}{ +cut points +} +\item{m}{ +desired minimum number of observations in a group. The algorithm does +not guarantee that all groups will have at least \code{m} observations. +} +\item{g}{ +number of quantile groups +} +\item{levels.mean}{ +set to \code{TRUE} to make the new categorical vector have levels attribute that is +the group means of \code{x} instead of interval endpoint labels +} +\item{digits}{ +number of significant digits to use in constructing levels. Default is 3 +(5 if \code{levels.mean=TRUE}) +} +\item{minmax}{ +if cuts is specified but \code{min(x)<min(cuts)} or \code{max(x)>max(cuts)}, augments +cuts to include min and max \code{x} +} +\item{oneval}{ +if an interval contains only one unique value, the interval will be +labeled with the formatted version of that value instead of the +interval endpoints, unless \code{oneval=FALSE} +} +\item{onlycuts}{ + set to \code{TRUE} to only return the vector of computed cuts. This + consists of the interior values plus outer ranges. +} +} +\value{ +a factor variable with levels of the form \code{[a,b)} or formatted means +(character strings) unless \code{onlycuts} is \code{TRUE} in which case +a numeric vector is returned +} +\seealso{ +\code{\link{cut}}, \code{\link{quantile}} +} +\examples{ +set.seed(1) +x <- runif(1000, 0, 100) +z <- cut2(x, c(10,20,30)) +table(z) +table(cut2(x, g=10)) # quantile groups +table(cut2(x, m=50)) # group x into intevals with at least 50 obs. +} +\keyword{category} +\keyword{nonparametric} +\concept{grouping} +\concept{categorization} +\concept{discretization} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/data.frame.create.modify.check.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/data.frame.create.modify.check.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,533 @@\n+\\name{data.frame.create.modify.check}\n+\\alias{data.frame.create.modify.check}\n+\\title{\n+ Tips for Creating, Modifying, and Checking Data Frames\n+}\n+\\description{\n+ This help file contains a template for importing data to create an R\n+ data frame, correcting some problems resulting from the import and\n+ making the data frame be stored more efficiently, modifying the data\n+ frame (including better annotating it and changing the names of some\n+ of its variables), and checking and inspecting the data frame for\n+ reasonableness of the values of its variables and to describe patterns\n+ of missing data. Various built-in functions and functions in the\n+ Hmisc library are used. At the end some methods for creating data\n+ frames \\dQuote{from scratch} within \\R are presented.\n+ \n+\n+ The examples below attempt to clarify the separation of operations\n+ that are done on a data frame as a whole, operations that are done on\n+ a small subset of its variables without attaching the whole data\n+ frame, and operations that are done on many variables after attaching\n+ the data frame in search position one. It also tries to clarify that\n+ for analyzing several separate variables using \\R commands that do not\n+ support a \\code{data} argument, it is helpful to attach the data frame\n+ in a search position later than position one.\n+\n+ It is often useful to create, modify, and process datasets in the\n+ following order.\n+ \\enumerate{\n+ \\item{\n+ Import external data into a data frame (if the raw data do not\n+ contain column names, provide these during the import if possible)\n+ }\n+ \\item{\n+ Make global changes to a data frame (e.g., changing variable\n+ names)\n+ }\n+ \\item{\n+ Change attributes or values of variables within a data frame\n+ }\n+ \\item{\n+ Do analyses involving the whole data frame (without attaching it)\\cr\n+ (Data frame still in .Data)\n+ }\n+ \\item{\n+ Do analyses of individual variables (after attaching the data\n+ frame in search position two or later)\n+ }\n+ }\n+}\n+\\details{\n+ The examples below use the \\code{FEV} dataset from\n+ \\cite{Rosner 1995}. Almost any dataset would do. The jcetable data\n+ are taken from \\cite{Galobardes, etal.}\n+ \n+ Presently, giving a variable the \\code{"units"} attribute (using the\n+ \\pkg{Hmisc} \\code{\\link{units}} function) only benefits the\n+ \\pkg{Hmisc} \\code{\\link{describe}} function and the \\pkg{rms}\n+ library\'s version of the \\code{link[rms]{Surv}} function. Variables\n+ labels defined with the Hmisc \\code{\\link{label}} function are used by\n+ \\code{\\link{describe}}, \\code{\\link{summary.formula}}, and many of\n+ the plotting functions in \\pkg{Hmisc} and \\pkg{rms}.\n+}\n+\\references{\n+ Alzola CF, Harrell FE (2001):\n+ \\emph{An Introduction to S and the Hmisc and Design Libraries.}\n+ Chapters 3 and 4,\n+ \\url{http://biostat.mc.vanderbilt.edu/twiki/pub/Main/RS/sintro.pdf}.\n+\n+ Galobardes, et al. (1998), \\emph{J Clin Epi} 51:875-881.\n+\n+ Rosner B (1995): \\emph{Fundamentals of Biostatistics, 4th Edition. }\n+ New York: Duxbury Press.\n+}\n+\\seealso{\n+ \\code{\\link{scan}}, \\code{\\link{read.table}},\n+ \\code{\\link{cleanup.import}}, \\code{\\link{sas.get}},\n+ \\code{\\link{data.frame}}, \\code{\\link{attach}}, \\code{\\link{detach}},\n+ \\code{\\link{describe}}, \\code{\\link{datadensity}},\n+ \\code{\\link{plot.data.frame}}, \\code{\\link{hist.data.frame}},\n+ \\code{\\link{naclus}}, \\code{\\link{factor}}, \\code{\\link{label}},\n+ \\code{\\link{units}}, \\code{\\link{names}}, \\code{\\link{expand.grid}},\n+ \\code{\\link{summary.formula}}, \\code{\\link{summary.data.frame}},\n+ \\code{\\link{casefold}}, \\code{\\link{edit}}, \\code{\\link{page}},\n+ \\code{\\link{plot.data.frame}}, \\code{\\link{Cs}},\n+ \\code{\\link{combine.levels}},\\code{\\link{upData}}\n+}\n+\\examples{\n+\\dontrun{\n+# First, we do steps that create or manipulate the data\n+# frame in its entirety. For S-Plus, these are done with\n+# .Data in search position one (the de'..b"e of a memory hog.\n+\n+\n+attach(FEV)\n+# Use e.g. attach(FEV[,Cs(age,sex)]) if you only\n+# want to analyze a small subset of the variables\n+# Use e.g. attach(FEV[FEV$sex=='male',]) to\n+# analyze a subset of the observations\n+\n+\n+summary(height ~ age + sex,\n+ fun=function(y)c(smean.sd(y),\n+ smedian.hilow(y,conf.int=.5)))\n+fit <- lm(height ~ age*sex)\n+\n+\n+# Run generic summary function on height and fev, \n+# stratified by sex\n+by(data.frame(height,fev), sex, summary)\n+\n+\n+# Cross-classify into 4 sex x smoke groups\n+by(FEV, list(sex,smoke), summary)\n+\n+\n+# Plot 5 quantiles\n+s <- summary(fev ~ age + sex + height,\n+ fun=function(y)quantile(y,c(.1,.25,.5,.75,.9)))\n+\n+\n+plot(s, which=1:5, pch=c(1,2,15,2,1), #pch=c('=','[','o',']','='), \n+ main='A Discovery', xlab='FEV')\n+\n+\n+# Use the nonparametric bootstrap to compute a \n+# 0.95 confidence interval for the population mean fev\n+smean.cl.boot(fev) # in Hmisc\n+\n+\n+# Use the Statistics \\dots Compare Samples \\dots One Sample \n+# keys to get a normal-theory-based C.I. Then do it \n+# more manually. The following method assumes that \n+# there are no NAs in fev\n+\n+\n+sd <- sqrt(var(fev))\n+xbar <- mean(fev)\n+xbar\n+sd\n+n <- length(fev)\n+qt(.975,n-1) \n+# prints 0.975 critical value of t dist. with n-1 d.f.\n+\n+\n+xbar + c(-1,1)*sd/sqrt(n)*qt(.975,n-1) \n+# prints confidence limits\n+\n+\n+# Fit a linear model\n+# fit <- lm(fev ~ other variables \\dots)\n+\n+\n+detach()\n+\n+\n+# The last command is only needed if you want to\n+# start operating on another data frame and you want\n+# to get FEV out of the way.\n+\n+\n+\n+\n+# -----------------------------------------------------------------------\n+# Creating data frames from scratch\n+# \n+# Data frames can be created from within S. To\n+# create a small data frame containing ordinary\n+# data, you can use something like\n+\n+\n+dframe <- data.frame(age=c(10,20,30), \n+ sex=c('male','female','male'))\n+\n+\n+# You can also create a data frame using the Data\n+# Sheet. Create an empty data frame with the\n+# correct variable names and types, then edit in the\n+# data.\n+\n+\n+dd <- data.frame(age=numeric(0),sex=character(0))\n+\n+\n+# The sex variable will be stored as a factor, and\n+# levels will be automatically added to it as you\n+# define new values for sex in the Data Sheet's sex\n+# column.\n+# \n+# When the data frame you need to create is defined\n+# by systematically varying variables (e.g., all\n+# possible combinations of values of each variable),\n+# the expand.grid function is useful for quickly\n+# creating the data. Then you can add\n+# non-systematically-varying variables to the object\n+# created by expand.grid, using programming\n+# statements or editing the Data Sheet. This\n+# process is useful for creating a data frame\n+# representing all the values in a printed table.\n+# In what follows we create a data frame\n+# representing the combinations of values from an 8\n+# x 2 x 2 x 2 (event x method x sex x what) table,\n+# and add a non-systematic variable percent to the\n+# data.\n+\n+\n+jcetable <- expand.grid(\n+ event=c('Wheezing at any time',\n+ 'Wheezing and breathless',\n+ 'Wheezing without a cold',\n+ 'Waking with tightness in the chest',\n+ 'Waking with shortness of breath',\n+ 'Waking with an attack of cough',\n+ 'Attack of asthma',\n+ 'Use of medication'),\n+ method=c('Mail','Telephone'), \n+ sex=c('Male','Female'),\n+ what=c('Sensitivity','Specificity'))\n+\n+\n+jcetable$percent <- \n+c(756,618,706,422,356,578,289,333,\n+ 576,421,789,273,273,212,212,212,\n+ 613,763,713,403,377,541,290,226,\n+ 613,684,632,290,387,613,258,129,\n+ 656,597,438,780,732,679,938,919,\n+ 714,600,494,877,850,703,963,987,\n+ 755,420,480,794,779,647,956,941,\n+ 766,423,500,833,833,604,955,986) / 10\n+\n+\n+# In jcetable, event varies most rapidly, then\n+# method, then sex, and what.\n+}\n+}\n+\\keyword{data}\n+\\keyword{manip}\n+\\keyword{programming}\n+\\keyword{interface}\n+\\keyword{htest}\n+\\concept{overview}\n" |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/dataRep.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/dataRep.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,154 @@ +\name{dataRep} +\alias{dataRep} +\alias{print.dataRep} +\alias{predict.dataRep} +\alias{print.predict.dataRep} +\alias{roundN} +\alias{[.roundN} +\title{ +Representativeness of Observations in a Data Set +} +\description{ +These functions are intended to be used to describe how well a given +set of new observations (e.g., new subjects) were represented in a +dataset used to develop a predictive model. +The \code{dataRep} function forms a data frame that contains all the unique +combinations of variable values that existed in a given set of +variable values. Cross--classifications of values are created using +exact values of variables, so for continuous numeric variables it is +often necessary to round them to the nearest \code{v} and to possibly +curtail the values to some lower and upper limit before rounding. +Here \code{v} denotes a numeric constant specifying the matching tolerance +that will be used. \code{dataRep} also stores marginal distribution +summaries for all the variables. For numeric variables, all 101 +percentiles are stored, and for all variables, the frequency +distributions are also stored (frequencies are computed after any +rounding and curtailment of numeric variables). For the purposes of +rounding and curtailing, the \code{roundN} function is provided. A \code{print} +method will summarize the calculations made by \code{dataRep}, and if +\code{long=TRUE} all unique combinations of values and their frequencies in +the original dataset are printed. + +The \code{predict} method for \code{dataRep} takes a new data frame having +variables named the same as the original ones (but whose factor levels +are not necessarily in the same order) and examines the collapsed +cross-classifications created by \code{dataRep} to find how many +observations were similar to each of the new observations after any +rounding or curtailment of limits is done. \code{predict} also does some +calculations to describe how the variable values of the new +observations "stack up" against the marginal distributions of the +original data. For categorical variables, the percent of observations +having a given variable with the value of the new observation (after +rounding for variables that were through \code{roundN} in the formula given +to \code{dataRep}) is computed. For numeric variables, the percentile of +the original distribution in which the current value falls will be +computed. For this purpose, the data are not rounded because the 101 +original percentiles were retained; linear interpolation is used to +estimate percentiles for values between two tabulated percentiles. +The lowest marginal frequency of matching values across all variables +is also computed. For example, if an age, sex combination matches 10 +subjects in the original dataset but the age value matches 100 ages +(after rounding) and the sex value matches the sex code of 300 +observations, the lowest marginal frequency is 100, which is a "best +case" upper limit for multivariable matching. I.e., matching on all +variables has to result on a lower frequency than this amount. +A \code{print} method for the output of \code{predict.dataRep} prints all +calculations done by \code{predict} by default. Calculations can be +selectively suppressed. +} +\usage{ +dataRep(formula, data, subset, na.action) + +roundN(x, tol=1, clip=NULL) + +\method{print}{dataRep}(x, long=FALSE, \dots) + +\method{predict}{dataRep}(object, newdata, \dots) + +\method{print}{predict.dataRep}(x, prdata=TRUE, prpct=TRUE, \dots) +} +\arguments{ +\item{formula}{ +a formula with no left-hand-side. Continuous numeric variables in +need of rounding should appear in the formula as e.g. \code{roundN(x,5)} to +have a tolerance of e.g. +/- 2.5 in matching. Factor or character +variables as well as numeric ones not passed through \code{roundN} are +matched on exactly. +} +\item{x}{ +a numeric vector or an object created by \code{dataRep} +} +\item{object}{ +the object created by \code{dataRep} or \code{predict.dataRep} +} +\item{data, subset, na.action}{ +standard modeling arguments. Default \code{na.action} is \code{na.delete}, +i.e., observations in the original dataset having any variables +missing are deleted up front. +} +\item{tol}{ +rounding constant (tolerance is actually \code{tol/2} as values are rounded +to the nearest \code{tol}) +} +\item{clip}{ +a 2-vector specifying a lower and upper limit to curtail values of \code{x} +before rounding +} +\item{long}{ +set to \code{TRUE} to see all unique combinations and frequency count +} +\item{newdata}{ +a data frame containing all the variables given to \code{dataRep} but not +necessarily in the same order or having factor levels in the same order +} +\item{prdata}{ +set to \code{FALSE} to suppress printing \code{newdata} and the count of matching +observations (plus the worst-case marginal frequency). +} +\item{prpct}{set to \code{FALSE} to not print percentiles and percents} +\item{\dots}{unused} +} +\value{ +\code{dataRep} returns a list of class \code{"dataRep"} containing the collapsed +data frame and frequency counts along with marginal distribution +information. \code{predict} returns an object of class \code{"predict.dataRep"} +containing information determined by matching observations in +\code{newdata} with the original (collapsed) data. +} +\section{Side Effects}{ +\code{print.dataRep} prints. +} +\author{ +Frank Harrell +\cr +Department of Biostatistics +\cr +Vanderbilt University School of Medicine +\cr +\email{f.harrell@vanderbilt.edu} +} +\seealso{ +\code{\link{round}}, \code{\link{table}} +} +\examples{ +set.seed(13) +num.symptoms <- sample(1:4, 1000,TRUE) +sex <- factor(sample(c('female','male'), 1000,TRUE)) +x <- runif(1000) +x[1] <- NA +table(num.symptoms, sex, .25*round(x/.25)) + + +d <- dataRep(~ num.symptoms + sex + roundN(x,.25)) +print(d, long=TRUE) + + +predict(d, data.frame(num.symptoms=1:3, sex=c('male','male','female'), + x=c(.03,.5,1.5))) +} +\keyword{datasets} +\keyword{category} +\keyword{cluster} +\keyword{manip} +\keyword{models} +% Converted by Sd2Rd version 1.21. |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/deff.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/deff.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,48 @@ +\name{deff} +\alias{deff} +\title{ + Design Effect and Intra-cluster Correlation +} +\description{ + Computes the Kish design effect and corresponding intra-cluster correlation + for a single cluster-sampled variable +} +\usage{ +deff(y, cluster) +} +\arguments{ + \item{y}{ + variable to analyze + } + \item{cluster}{ + a variable whose unique values indicate cluster membership. Any + type of variable is allowed. + } +} +\value{ + a vector with named elements \code{n} (total number of non-missing + observations), \code{clusters} (number of clusters after deleting + missing data), \code{rho}(intra-cluster correlation), and \code{deff} + (design effect). +} +\author{ +Frank Harrell +\cr +Department of Biostatistics +\cr +Vanderbilt University +\cr +\email{f.harrell@vanderbilt.edu} +} +\seealso{ +\code{\link[rms]{bootcov}}, \code{\link[rms]{robcov}} +} +\examples{ +set.seed(1) +blood.pressure <- rnorm(1000, 120, 15) +clinic <- sample(letters, 1000, replace=TRUE) +deff(blood.pressure, clinic) +} +\keyword{htest} +\concept{study design} +\concept{cluster sampling} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/describe.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/describe.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,325 @@\n+\\name{describe}\n+\\alias{describe}\n+\\alias{describe.default}\n+\\alias{describe.vector}\n+\\alias{describe.matrix}\n+\\alias{describe.formula}\n+\\alias{describe.data.frame}\n+\\alias{print.describe}\n+\\alias{print.describe.single}\n+\\alias{[.describe}\n+\\alias{latex.describe}\n+\\alias{latex.describe.single}\n+\\title{\n+Concise Statistical Description of a Vector, Matrix, Data Frame, or Formula\n+}\n+\\description{\n+\\code{describe} is a generic method that invokes \\code{describe.data.frame},\n+\\code{describe.matrix}, \\code{describe.vector}, or\n+\\code{describe.formula}. \\code{describe.vector} is the basic \n+function for handling a single variable.\n+This function determines whether the variable is character, factor,\n+category, binary, discrete numeric, and continuous numeric, and prints\n+a concise statistical summary according to each. A numeric variable is\n+deemed discrete if it has <= 10 unique values. In this case,\n+quantiles are not printed. A frequency table is printed \n+for any non-binary variable if it has no more than 20 unique\n+values. For any variable with at least 20 unique values, the 5 lowest\n+and highest values are printed. This behavior can be overriden for long\n+character variables with many levels using the \\code{listunique}\n+parameter, to get a complete tabulation.\n+\n+\\code{describe} is especially useful for\n+describing data frames created by \\code{*.get}, as labels, formats,\n+value labels, and (in the case of \\code{sas.get}) frequencies of special\n+missing values are printed.\n+\n+For a binary variable, the sum (number of 1\'s) and mean (proportion of\n+1\'s) are printed. If the first argument is a formula, a model frame\n+is created and passed to describe.data.frame. If a variable\n+is of class \\code{"impute"}, a count of the number of imputed values is\n+printed. If a date variable has an attribute \\code{partial.date}\n+(this is set up by \\code{sas.get}), counts of how many partial dates are\n+actually present (missing month, missing day, missing both) are also presented.\n+If a variable was created by the special-purpose function \\code{substi} (which\n+substitutes values of a second variable if the first variable is NA),\n+the frequency table of substitutions is also printed.\n+\n+For numeric variables, \\code{describe} adds an item called \\code{Info}\n+which is a relative information measure using the relative efficiency of\n+a proportional odds/Wilcoxon test on the variable relative to the same\n+test on a variable that has no ties. \\code{Info} is related to how\n+continuous the variable is, and ties are less harmful the more untied\n+values there are. The formula for \\code{Info} is one minus the sum of\n+the cubes of relative frequencies of values divided by one minus the\n+square of the reciprocal of the sample size. The lowest information\n+comes from a variable having only one unique values following by a\n+highly skewed binary variable. \\code{Info} is reported to\n+two decimal places.\n+\n+A latex method exists for converting the \\code{describe} object to a\n+LaTeX file. For numeric variables having at least 20 unique values,\n+\\code{describe} saves in its returned object the frequencies of 100\n+evenly spaced bins running from minimum observed value to the maximum.\n+\\code{latex} inserts a spike histogram displaying these frequency counts\n+in the tabular material using the LaTeX picture environment. For\n+example output see\n+\\url{http://biostat.mc.vanderbilt.edu/wiki/pub/Main/Hmisc/counties.pdf}.\n+Note that the latex method assumes you have the following styles\n+installed in your latex installation: setspace and relsize.\n+\n+Sample weights may be specified to any of the functions, resulting\n+in weighted means, quantiles, and frequency tables.\n+}\n+\\usage{\n+\\method{describe}{vector}(x, descript, exclude.missing=TRUE, digits=4,\n+ listunique=0, listnchar=12,\n+ weights=NULL, normwt=FALSE, minlength=NULL, \\dots)\n+\\method{describe}{matrix}(x, descript, exclude.missing=TRUE, digits=4, \\dots)\n+\\method{describe}{data.'..b'ues and the mean of the last\n+column of the response values, with a \\code{names} attribute of \\code{c("N","Mean")}.\n+When the response is a \\code{Surv} object and the mean is used, this will\n+result in the crude proportion of events being used to summarize\n+the response. The actual summary function can be designated through\n+\\code{options(na.fun.response = "function name")}.\n+\n+If you are modifying LaTex \\code{parskip} or certain other parameters,\n+you may need to shrink the area around \\code{tabular} and\n+\\code{verbatim} environments produced by \\code{latex.describe}. You can\n+do this using for example\n+\\code{\\\\usepackage{etoolbox}\\\\makeatletter\\\\preto{\\\\@verbatim}{\\\\topsep=-1.4pt\n+\t\\\\partopsep=0pt}\\\\preto{\\\\@tabular}{\\\\parskip=2pt\n+\t\\\\parsep=0pt}\\\\makeatother} in the LaTeX preamble.\n+}\n+\\author{\n+Frank Harrell\n+\\cr\n+Vanderbilt University\n+\\cr\n+\\email{f.harrell@vanderbilt.edu}\n+}\n+\\seealso{\n+\\code{\\link{sas.get}}, \\code{\\link{quantile}}, \\code{\\link{table}}, \\code{\\link{summary}}, \\code{\\link{model.frame.default}},\n+\\code{\\link{naprint}}, \\code{\\link{lapply}}, \\code{\\link{tapply}}, \\code{\\link[survival]{Surv}}, \\code{\\link{na.delete}}, \\code{\\link{na.keep}},\n+\\code{\\link{na.detail.response}}, \\code{\\link{latex}}\n+}\n+\\examples{\n+set.seed(1)\n+describe(runif(200),dig=2) #single variable, continuous\n+ #get quantiles .05,.10,\\dots\n+\n+dfr <- data.frame(x=rnorm(400),y=sample(c(\'male\',\'female\'),400,TRUE))\n+describe(dfr)\n+\n+\\dontrun{\n+d <- sas.get(".","mydata",special.miss=TRUE,recode=TRUE)\n+describe(d) #describe entire data frame\n+attach(d, 1)\n+describe(relig) #Has special missing values .D .F .M .R .T\n+ #attr(relig,"label") is "Religious preference"\n+\n+#relig : Religious preference Format:relig\n+# n missing D F M R T unique \n+# 4038 263 45 33 7 2 1 8\n+#\n+#0:none (251, 6\\%), 1:Jewish (372, 9\\%), 2:Catholic (1230, 30\\%) \n+#3:Jehovah\'s Witnes (25, 1\\%), 4:Christ Scientist (7, 0\\%) \n+#5:Seventh Day Adv (17, 0\\%), 6:Protestant (2025, 50\\%), 7:other (111, 3\\%) \n+\n+\n+# Method for describing part of a data frame:\n+ describe(death.time ~ age*sex + rcs(blood.pressure))\n+ describe(~ age+sex)\n+ describe(~ age+sex, weights=freqs) # weighted analysis\n+\n+ fit <- lrm(y ~ age*sex + log(height))\n+ describe(formula(fit))\n+ describe(y ~ age*sex, na.action=na.delete) \n+# report on number deleted for each variable\n+ options(na.detail.response=TRUE) \n+# keep missings separately for each x, report on dist of y by x=NA\n+ describe(y ~ age*sex)\n+ options(na.fun.response="quantile")\n+ describe(y ~ age*sex) # same but use quantiles of y by x=NA\n+\n+ d <- describe(my.data.frame)\n+ d$age # print description for just age\n+ d[c(\'age\',\'sex\')] # print description for two variables\n+ d[sort(names(d))] # print in alphabetic order by var. names\n+ d2 <- d[20:30] # keep variables 20-30\n+ page(d2) # pop-up window for these variables\n+\n+# Test date/time formats and suppression of times when they don\'t vary\n+ library(chron)\n+ d <- data.frame(a=chron((1:20)+.1),\n+ b=chron((1:20)+(1:20)/100),\n+ d=ISOdatetime(year=rep(2003,20),month=rep(4,20),day=1:20,\n+ hour=rep(11,20),min=rep(17,20),sec=rep(11,20)),\n+ f=ISOdatetime(year=rep(2003,20),month=rep(4,20),day=1:20,\n+ hour=1:20,min=1:20,sec=1:20),\n+ g=ISOdate(year=2001:2020,month=rep(3,20),day=1:20))\n+ describe(d)\n+\n+# Make a function to run describe, latex.describe, and use the kdvi\n+# previewer in Linux to view the result and easily make a pdf file\n+\n+ ldesc <- function(data) {\n+ options(xdvicmd=\'kdvi\')\n+ d <- describe(data, desc=deparse(substitute(data)))\n+ dvi(latex(d, file=\'/tmp/z.tex\'), nomargins=FALSE, width=8.5, height=11)\n+ }\n+\n+ ldesc(d)\n+}\n+}\n+\\keyword{interface}\n+\\keyword{nonparametric}\n+\\keyword{category}\n+\\keyword{distribution}\n+\\keyword{robust}\n+\\keyword{models}\n+\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/discrete.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/discrete.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,66 @@ +\name{discrete} +\alias{as.discrete} +\alias{as.discrete.default} +\alias{discrete} +\alias{[<-.discrete} +\alias{[.discrete} +\alias{[[.discrete} +\alias{is.discrete} +\alias{is.na<-.discrete} +\alias{length<-.discrete} + +\title{ Discrete Vector tools } +\description{ + \code{discrete} creates a discrete vector which is distinct from a + continuous vector, or a factor/ordered vector. + The other function are tools for manipulating descrete vectors. +} +\usage{ +as.discrete(x, ...) +\method{as.discrete}{default}(x, ...) +discrete(x, levels = sort(unique.default(x), na.last = TRUE), exclude = NA) +\method{[}{discrete}(x, ...) <- value +\method{[}{discrete}(x, ..., drop = FALSE) +\method{[[}{discrete}(x, i) +is.discrete(x) +\method{is.na}{discrete}(x) <- value +\method{length}{discrete}(x) <- value +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{x}{ a vector } + \item{drop}{ Should unused levels be dropped. } + \item{exclude}{logical: should \code{NA} be excluded. } + \item{i}{ indexing vector } + \item{levels}{ charater: list of individual level values } + \item{value}{ index of elements to set to \code{NA} } + \item{\dots}{ arguments to be passed to other functions } +} +\details{ + \code{as.discrete} converts a vector into a discrete vector. + + \code{discrete} creates a discrete vector from provided values. + + \code{is.discrete} tests to see if the vector is a discrete vector. +} +\value{ + \code{as.discrete}, \code{discrete} returns a vector of + \code{discrete} type. + + \code{is.discrete} returan logical \code{TRUE} if the vector is of + class discrete other wise it returns \code{FALSE}. +} +\author{ Charles Dupont} +\seealso{ \code{\link{[[}}, \code{\link{[}}, \code{\link{factor}} } +\examples{ +a <- discrete(1:25) +a + +is.discrete(a) + +b <- as.discrete(2:4) +b +} +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{ manip } |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/dotchart2.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/dotchart2.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,135 @@ +\name{dotchart2} +\alias{dotchart2} +\title{ +Enhanced Dot Chart +} +\description{ +\code{dotchart2} is an enhanced version of the \code{dotchart} function +with several new options. +} +\usage{ +dotchart2(data, labels, groups=NULL, gdata=NA, horizontal=TRUE, pch=16, + xlab='', ylab='', xlim=NULL, auxdata, auxgdata=NULL, auxtitle, + lty=1, lines=TRUE, dotsize = .8, + cex = par("cex"), cex.labels = cex, + cex.group.labels = cex.labels*1.25, sort.=TRUE, + add=FALSE, dotfont=par('font'), groupfont=2, + reset.par=add, xaxis=TRUE, width.factor=1.1, + lcolor='gray', leavepar=FALSE, + axisat=NULL, axislabels=NULL, ...) +} +\arguments{ + \item{data}{a numeric vector whose values are shown on the x-axis} + \item{labels}{a vector of labels for each point, corresponding to + \code{x}. If omitted, \code{names(data)} are used, and if there are + no \code{names}, integers prefixed by \code{"#"} are used.} + \item{groups}{an optional categorical variable indicating how + \code{data} values are grouped} + \item{gdata}{data values for groups, typically summaries such as group + medians} + \item{horizontal}{set to \code{FALSE} to make the chart vertical + instead of the default} + \item{pch}{ + default character number or value for plotting dots in dot charts. + The default is 16.} + \item{xlab}{x-axis title} + \item{ylab}{y-axis title} + \item{xlim}{x-axis limits. Applies only to \code{horizontal=TRUE}.} + \item{auxdata}{ + a vector of auxiliary data given to \code{dotchart2}, of the same length + as the first (\code{data}) argument. If present, this + vector of values will be printed outside the right margin of the dot + chart. Usually \code{auxdata} represents cell sizes. + } + \item{auxgdata}{ + similar to \code{auxdata} but corresponding to the \code{gdata} + argument. These usually represent overall sample sizes for each + group of lines.} + \item{auxtitle}{ + if \code{auxdata} is given, \code{auxtitle} specifies a column + heading for the extra printed data in the chart, e.g., \code{"N"}} + \item{lty}{line type for horizontal lines. Default is 1 for R, 2 for S-Plus} + \item{lines}{set to \code{FALSE} to suppress drawing of reference + lines} + \item{dotsize}{ + \code{cex} value for drawing dots. Default is 0.8. Note that the original + \code{dotchart} function used a default of 1.2.} + \item{cex}{see \code{\link{par}}} + \item{cex.labels}{ + \code{cex} parameter that applies only to the line labels for the + dot chart \code{cex} parameter for major grouping labels for + \code{dotchart2}. Defaults to \code{cex}.} + \item{cex.group.labels}{value of \code{cex} corresponding to \code{gdata}} + \item{sort.}{ + set to \code{FALSE} to keep \code{dotchart2} from sorting the input + data, i.e., it will assume that the data are already properly + arranged. This is especially useful when you are using \code{gdata} + and \code{groups} and you want to control the + order that groups appear on the chart (from top to bottom).} + \item{add}{set to \code{TRUE} to add to an existing plot} + \item{dotfont}{ + font number of plotting dots. Default is one. Use \code{-1} to + use "outline" fonts. For example, \code{pch=183, dotfont=-1} + plots an open circle for UNIX on postscript. \code{pch=1} makes + an open octagon under Windows.} + \item{groupfont}{ + font number to use in drawing \code{group} labels for \code{dotchart2}. + Default is \code{2} for boldface. + } + \item{reset.par}{ + set to \code{FALSE} to cause \code{dotchart2} to not reset the \code{par} + parameters when finished. This is useful when \code{add=TRUE} is about to + be used in another call. The default is to reset the \code{par} + parameters if \code{add=TRUE} and not if \code{add=FALSE}, i.e., the + program assumes that only one set of points will be added to an + existing set. If you fail to use \code{reset.par=TRUE} for the + first of a series of plots, the next call to \code{plot} with + \code{add=TRUE} will result in distorted x-axis scaling.} + \item{xaxis}{set to \code{FALSE} to suppress drawing x-axis} + \item{width.factor}{ + When the calculated left margin turns out to be faulty, specify a + factor by which to multiple the left margin as \code{width.factor} to get + the appropriate space for labels on horizonal charts.} + \item{lcolor}{ + color for horizontal reference lines. Default is \code{"gray"} for R, + \code{par("col")} for S-Plus.} + \item{leavepar}{set to \code{TRUE} to leave \code{par()} unchanged. + This assumes the user has allocated sufficient left and right + margins for a horizontal dot chart.} + \item{axisat}{a vector of tick mark locations to pass to \code{axis}. + Useful if transforming the data axis} + \item{axislabels}{a vector of strings specifying axis tick mark + labels. Useful if transforming the data axis} + \item{...}{arguments passed to \code{plot.default}} +} +\section{Side Effects}{ +\code{dotchart} will leave \code{par} altered if \code{reset.par=FALSE}. +} +\author{ +Frank Harrell +\cr +Department of Biostatistics +\cr +Vanderbilt University +\cr +\email{f.harrell@vanderbilt.edu} +} +\seealso{ +\code{\link{dotchart}} +} +\examples{ +set.seed(135) +maj <- factor(c(rep('North',13),rep('South',13))) +g <- paste('Category',rep(letters[1:13],2)) +n <- sample(1:15000, 26, replace=TRUE) +y1 <- runif(26) +y2 <- pmax(0, y1 - runif(26, 0, .1)) +dotchart2(y1, g, groups=maj, auxdata=n, auxtitle='n', xlab='Y') +dotchart2(y2, g, groups=maj, pch=17, add=TRUE) +## Compare with dotchart function (no superpositioning or auxdata allowed): +## dotchart(y1, g, groups=maj, xlab='Y') + +## To plot using a transformed scale add for example +## axisat=sqrt(pretty(y)), axislabels=pretty(y) +} +\keyword{hplot} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/dotchart3.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/dotchart3.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,131 @@ +\name{dotchart3} +\alias{dotchart3} +\alias{summaryD} +\title{Enhanced Version of dotchart Function} +\description{ +This is an adaptation of the R dotchart function that sorts categories +top to bottom, adds \code{auxdata} and \code{auxtitle} arguments to put +extra information in the right margin, and adds arguments \code{cex.labels}, +\code{cex.group.labels}, and \code{groupfont}. By default, group +headings are in a larger, bold font. +\code{dotchart3} also cuts a bit of white space from the top and bottom +of the chart. The most significant change, however, is in how \code{x} +is interpreted. Columns of \code{x} no longer provide an alternate way +to define groups. Instead, they define superpositioned values. This is +useful for showing three quartiles, for example. Going along with this +change, \code{pch} can now be a vector specifying symbols to use going +across columns of \code{x}. \code{x} was changed in this way because to +put multiple points on a line (e.g., quartiles) and keeping track of +\code{par()} parameters when \code{dotchart2} was called +with \code{add=TRUE} was cumbersome. All the dotchart functions change +the margins to account for horizontal labels. + +\code{summaryD} creates aggregate data using \code{\link{summarize}} and +calls \code{dotchart3} with suitable arguments to summarize data by +major and minor categories. +} +\usage{ +dotchart3(x, labels = NULL, groups = NULL, gdata = NULL, + cex = par("cex"), pch = 21, gpch = pch, bg = par("bg"), + color = par("fg"), gcolor = par("fg"), lcolor = "gray", + xlim = range(c(x, gdata), na.rm=TRUE), main = NULL, xlab = NULL, + ylab = NULL, auxdata = NULL, auxtitle = NULL, auxgdata=NULL, + axisat=NULL, axislabels=NULL, + cex.labels = cex, cex.group.labels = cex.labels * 1.25, + cex.auxdata=cex, groupfont = 2, ...) + +summaryD(formula, data=NULL, fun=mean, funm=fun, + groupsummary=TRUE, auxvar=NULL, auxtitle='', + vals=length(auxvar) > 0, fmtvals=format, + cex.auxdata=.7, xlab=v[1], ylab=NULL, + gridevery=NULL, gridcol=gray(.95), sort=TRUE, ...) +} +\arguments{ + \item{x}{a numeric vector or matrix} + \item{labels}{labels for categories corresponding to rows of + \code{x}. If not specified these are taken from row names of \code{x}.} + \item{groups,gdata,cex,pch,gpch,bg,color,gcolor,lcolor,xlim,main,xlab,ylab}{see \code{\link{dotchart}}} + \item{auxdata}{a vector of information to be put in the right margin, + in the same order as \code{x}. May be numeric, character, or a + vector of expressions containing \code{\link{plotmath}} markup} + \item{auxtitle}{a column heading for \code{auxdata}} + \item{auxgdata}{similar to \code{auxdata} but corresponding to the + \code{gdata} argument. These usually represent overall sample sizes + for each group of lines.} + \item{axisat}{a vector of tick mark locations to pass to \code{axis}. + Useful if transforming the data axis} + \item{axislabels}{a vector of strings specifying axis tick mark + labels. Useful if transforming the data axis} + \item{cex.labels}{\code{cex} for labels} + \item{cex.group.labels}{\code{cex} for group labels} + \item{cex.auxdata}{\code{cex} for \code{auxdata}} + \item{groupfont}{font number for group headings} + \item{\dots}{other arguments passed to some of the graphics functions, + or to \code{dotchart3} from \code{summaryD}} + \item{formula}{a formula with one variable on the left hand side (the + variable to compute summary statistics on), and one or two + variables on the right hand side. If there are two variables, + the first is taken as the major grouping variable. If the left + hand side variable is a matrix it has to be a legal R variable + name, not an expression, and \code{fun} needs to be able to + process a matrix.} + \item{data}{a data frame or list used to find the variables in + \code{formula}. If omitted, the parent environment is used.} + \item{fun}{a summarization function creating a single number from a + vector. Default is the mean.} + \item{funm}{applies if there are two right hand variables and + \code{groupsummary=TRUE} and the marginal summaries over just + the first \code{x} variable need to be computed differently + than the summaries that are cross-classified by both + variables. \code{funm} defaults to \code{fun} and should + have the same structure as \code{fun}.} + \item{groupsummary}{By default, when there are two right-hand + variables, \code{summarize(..., fun)} is called a second time + without the use of the second variable, to obtain marginal + summaries for the major grouping variable and display the + results as a dot (and optionally in the right margin). Set + \code{groupsummary=FALSE} to suppress this information.} + \item{auxvar}{when \code{fun} returns more than one statistic and the + user names the elements in the returned vector, you can specify + \code{auxvar} as a single character string naming one of them. + This will cause the named element to be written in the right + margin, and that element to be deleted when plotting the statistics.} + \item{vals}{set to \code{TRUE} to show data values (dot + locations) in the right margin. Defaults to \code{TRUE} if + \code{auxvar} is specified.} + \item{fmtvals}{an optional function to format values before putting + them in the right margin. Default is the \code{format} + function.} + \item{gridevery}{specify a positive number to draw very faint vertical + grid lines every \code{gridevery} \code{x}-axis units} + \item{gridcol}{color for grid lines; default is very faint gray scale} + \item{sort}{specify \code{sort=FALSE} to plot data in the original + order, from top to bottom on the dot chart} +} +\value{the function returns invisibly} +\author{Frank Harrell} +\seealso{\code{\link{dotchart}},\code{\link{dotchart2}},\code{\link{summarize}}, + \code{\link{rlegend}}} +\examples{ +set.seed(135) +maj <- factor(c(rep('North',13),rep('South',13))) +g <- paste('Category',rep(letters[1:13],2)) +n <- sample(1:15000, 26, replace=TRUE) +y1 <- runif(26) +y2 <- pmax(0, y1 - runif(26, 0, .1)) +dotchart3(cbind(y1,y2), g, groups=maj, auxdata=n, auxtitle='n', + xlab='Y', pch=c(1,17)) +## Compare with dotchart function (no superpositioning or auxdata allowed): +## dotchart(y1, g, groups=maj, xlab='Y') + +summaryD(y1 ~ maj + g, xlab='Mean') +summaryD(y1 ~ maj + g, groupsummary=FALSE) +summaryD(y1 ~ g, fmtvals=function(x) sprintf('\%4.2f', x)) +Y <- cbind(y1, y2) # summaryD cannot handle cbind(...) ~ ... +summaryD(Y ~ maj + g, fun=function(y) y[1,], pch=c(1,17)) +rlegend(.1, 26, c('y1','y2'), pch=c(1,17)) + +summaryD(y1 ~ maj, fun=function(y) c(mean(y), n=length(y)), + auxvar='n', auxtitle='N') +} +\keyword{hplot} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/epi.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/epi.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,120 @@ +\name{mhgr} +\alias{mhgr} +\alias{print.mhgr} +\alias{lrcum} +\alias{print.lrcum} +\title{Miscellaneous Functions for Epidemiology} +\description{ + The \code{mhgr} function computes the Cochran-Mantel-Haenszel stratified + risk ratio and its confidence limits using the Greenland-Robins variance + estimator. + +The \code{lrcum} function takes the results of a series of 2x2 tables +representing the relationship between test positivity and diagnosis and +computes positive and negative likelihood ratios (with all their +deficiencies) and the variance of +their logarithms. Cumulative likelihood ratios and their confidence +intervals (assuming independence of tests) are computed, assuming a +string of all positive tests or a string of all negative tests. The +method of Simel et al as described in Altman et al is used. +} +\usage{ +mhgr(y, group, strata, conf.int = 0.95) +\method{print}{mhgr}(x, \dots) + +lrcum(a, b, c, d, conf.int = 0.95) +\method{print}{lrcum}(x, dec=3, \dots) +} +\arguments{ + \item{y}{a binary response variable} + \item{group}{a variable with two unique values specifying comparison groups} + \item{strata}{the stratification variable} + \item{conf.int}{confidence level} + \item{x}{an object created by \code{mhgr} or \code{lrcum}} + \item{a}{frequency of true positive tests} + \item{b}{frequency of false positive tests} + \item{c}{frequency of false negative tests} + \item{d}{frequency of true negative tests} + \item{dec}{number of places to the right of the decimal to print for + \code{lrcum}} + \item{\dots}{addtitional arguments to be passed to other print functions} +} +\details{ + Uses equations 4 and 13 from Greenland and Robins. +} +\value{ + a list of class \code{"mhgr"} or of class \code{"lrcum"}. +} +\references{ + Greenland S, Robins JM (1985): Estimation of a common effect parameter + from sparse follow-up data. Biometrics 41:55-68. + + Altman DG, Machin D, Bryant TN, Gardner MJ, Eds. (2000): Statistics with + Confidence, 2nd Ed. Bristol: BMJ Books, 105-110. + + Simel DL, Samsa GP, Matchar DB (1991): Likelihood ratios with + confidence: sample size estimation for diagnostic test studies. J + Clin Epi 44:763-770. +} +\author{Frank E Harrell Jr \email{f.harrell@vanderbilt.edu}} +\seealso{\code{\link{logrank}}} +\examples{ +# Greate Migraine dataset used in Example 28.6 in the SAS PROC FREQ guide +d <- expand.grid(response=c('Better','Same'), + treatment=c('Active','Placebo'), + sex=c('female','male')) +d$count <- c(16, 11, 5, 20, 12, 16, 7, 19) +d +# Expand data frame to represent raw data +r <- rep(1:8, d$count) +d <- d[r,] +with(d, mhgr(response=='Better', treatment, sex)) + +# Discrete survival time example, to get Cox-Mantel relative risk and CL +# From Stokes ME, Davis CS, Koch GG, Categorical Data Analysis Using the +# SAS System, 2nd Edition, Sectino 17.3, p. 596-599 +# +# Input data in Table 17.5 +d <- expand.grid(treatment=c('A','P'), center=1:3) +d$healed2w <- c(15,15,17,12, 7, 3) +d$healed4w <- c(17,17,17,13,17,17) +d$notHealed4w <- c( 2, 7,10,15,16,18) +d +# Reformat to the way most people would collect raw data +d1 <- d[rep(1:6, d$healed2w),] +d1$time <- '2' +d1$y <- 1 +d2 <- d[rep(1:6, d$healed4w),] +d2$time <- '4' +d2$y <- 1 +d3 <- d[rep(1:6, d$notHealed4w),] +d3$time <- '4' +d3$y <- 0 +d <- rbind(d1, d2, d3) +d$healed2w <- d$healed4w <- d$notHealed4w <- NULL +d +# Finally, duplicate appropriate observations to create 2 and 4-week +# risk sets. Healed and not healed at 4w need to be in the 2-week +# risk set as not healed +d2w <- subset(d, time=='4') +d2w$time <- '2' +d2w$y <- 0 +d24 <- rbind(d, d2w) +with(d24, table(y, treatment, time, center)) +# Matches Table 17.6 + +with(d24, mhgr(y, treatment, interaction(center, time, sep=';'))) + +# Get cumulative likelihood ratios and their 0.95 confidence intervals +# based on the following two tables +# +# Disease Disease +# + - + - +# Test + 39 3 20 5 +# Test - 21 17 22 15 + +lrcum(c(39,20), c(3,5), c(21,22), c(17,15)) +} +\keyword{category} +\keyword{htest} +\concept{epidemiology} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/equalBins.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/equalBins.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,41 @@ +\name{equalBins} +\alias{equalBins} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{Multicolumn Formating} +\description{ + Expands the width either supercolumns or the subcolumns so that the + the sum of the supercolumn widths is the same as the sum of the + subcolumn widths. +} +\usage{ +equalBins(widths, subwidths) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{widths}{widths of the supercolumns.} + \item{subwidths}{list of widths of the subcolumns for each supercolumn.} +} +\details{ + This determins the correct subwidths of each of various columns in a table + for printing. The correct width of the multicolumns is deterimed by + summing the widths of it subcolumns. +} +\value{ + widths of the the columns for a table. +} +\author{Charles Dupont} +\seealso{\code{\link{nchar}}, \code{\link{stringDims}}} +\examples{ +mcols <- c("Group 1", "Group 2") +mwidth <- nchar(mcols, type="width") +spancols <- c(3,3) +ccols <- c("a", "deer", "ad", "cat", "help", "bob") +cwidth <- nchar(ccols, type="width") + +subwidths <- partition.vector(cwidth, spancols) + +equalBins(mwidth, subwidths) +} +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{print} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/errbar.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/errbar.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,126 @@ +\name{errbar} +\alias{errbar} +\title{Plot Error Bars} +\description{ + Add vertical error bars to an existing plot or makes a new + plot with error bars. +} +\usage{ +errbar(x, y, yplus, yminus, cap=0.015, main = NULL, + sub=NULL, xlab=as.character(substitute(x)), + ylab=if(is.factor(x) || is.character(x)) "" + else as.character(substitute(y)), + add=FALSE, lty=1, type='p', ylim=NULL, + lwd=1, pch=16, errbar.col, Type=rep(1, length(y)), + \dots) +} +\arguments{ + \item{x}{ + vector of numeric x-axis values (for vertical error bars) or a factor or + character variable (for horizontal error bars, \code{x} representing the + group labels) + } + \item{y}{ + vector of y-axis values. + } + \item{yplus}{ + vector of y-axis values: the tops of the error bars. + } + \item{yminus}{ + vector of y-axis values: the bottoms of the error bars. + } + \item{cap}{ + the width of the little lines at the tops and bottoms of the error bars + in units of the width of the plot. Defaults to \code{0.015}. + } + \item{main}{ + a main title for the plot, see also \code{\link{title}}. + } + \item{sub}{ + a sub title for the plot. + } + \item{xlab}{ + optional x-axis labels if \code{add=FALSE}. + } + \item{ylab}{ + optional y-axis labels if \code{add=FALSE}. Defaults to blank for horizontal charts. + } + \item{add}{ + set to \code{TRUE} to add bars to an existing plot (available only for vertical + error bars) + } + \item{lty}{ + type of line for error bars + } + \item{type}{ + type of point. Use \code{type="b"} to connect dots. + } + \item{ylim}{ + y-axis limits. Default is to use range of \code{y}, \code{yminus}, and \code{yplus}. For + horizonal charts, \code{ylim} is really the \code{x}-axis range, excluding + differences. + } + \item{lwd}{ + line width for line segments (not main line) + } + \item{pch}{ + character to use as the point. + } + \item{errbar.col}{ + color to use for drawing error bars. + } + \item{Type}{ + used for horizontal bars only. Is an integer vector with values \code{1} + if corresponding values represent simple estimates, \code{2} if they + represent differences. + } + \item{...}{ + other parameters passed to all graphics functions. + } +} +\details{ + \code{errbar} adds vertical error bars to an existing plot or makes a new + plot with error bars. It can also make a horizontal error bar plot + that shows error bars for group differences as well as bars for + groups. For the latter type of plot, the lower x-axis scale + corresponds to group estimates and the upper scale corresponds to + differences. The spacings of the two scales are identical but the + scale for differences has its origin shifted so that zero may be + included. If at least one of the confidence intervals includes zero, + a vertical dotted reference line at zero is drawn. +} +\author{ +Charles Geyer, University of Chicago. Modified by Frank Harrell, +Vanderbilt University, to handle missing data, to add the parameters +\code{add} and \code{lty}, and to implement horizontal charts with differences. +} +\examples{ +set.seed(1) +x <- 1:10 +y <- x + rnorm(10) +delta <- runif(10) +errbar( x, y, y + delta, y - delta ) + + +# Show bootstrap nonparametric CLs for 3 group means and for +# pairwise differences on same graph +group <- sample(c('a','b','d'), 200, TRUE) +y <- runif(200) + .25*(group=='b') + .5*(group=='d') +cla <- smean.cl.boot(y[group=='a'],B=100,reps=TRUE) # usually B=1000 +a <- attr(cla,'reps') +clb <- smean.cl.boot(y[group=='b'],B=100,reps=TRUE) +b <- attr(clb,'reps') +cld <- smean.cl.boot(y[group=='d'],B=100,reps=TRUE) +d <- attr(cld,'reps') +a.b <- quantile(a-b,c(.025,.975)) +a.d <- quantile(a-d,c(.025,.975)) +b.d <- quantile(b-d,c(.025,.975)) +errbar(c('a','b','d','a - b','a - d','b - d'), + c(cla[1],clb[1],cld[1],cla[1]-clb[1],cla[1]-cld[1],clb[1]-cld[1]), + c(cla[3],clb[3],cld[3],a.b[2],a.d[2],b.d[2]), + c(cla[2],clb[2],cld[2],a.b[1],a.d[1],b.d[1]), + Type=c(1,1,1,2,2,2), xlab='', ylab='') + +} +\keyword{hplot} +% Converted by Sd2Rd version 1.21. |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/escapeRegex.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/escapeRegex.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,52 @@ +\name{escapeRegex} +\alias{escapeRegex} +\alias{escapeBS} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{ Escapes any characters that would have special meaning in a reqular expression. } +\description{ + Escapes any characters that would have special meaning in a reqular expression. +} +\usage{ +escapeRegex(string) +escapeBS(string) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{string}{ string being operated on. } +} +\details{ + \code{escapeRegex} will escape any characters that would have + special meaning in a reqular expression. For any string + \code{grep(regexpEscape(string), string)} will always be true. + + \code{escapeBS} will escape any backslash \samp{\\} in a string. +} +\value{ + The value of the string with any characters that would have + special meaning in a reqular expression escaped. +} +\author{ + Charles Dupont\cr + Department of Biostatistics\cr + Vanderbilt University +} +\seealso{\code{\link[base]{grep}} } +\examples{ +string <- "this\\\\(system) {is} [full]." +escapeRegex(string) + +escapeBS(string) + +\dontshow{ +if(!any(grep(escapeRegex(string), string))) { + stop("function escapeRegex failed test") +} + +if(escapeBS(string) != "this\\\\\\\\(system) {is} [full].") { + stop("function escapeBS failed test") +} +} +} +\keyword{ manip }% at least one, from doc/KEYWORDS +\keyword{ character }% __ONLY ONE__ keyword per line +\keyword{ programming } |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/event.chart.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/event.chart.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,705 @@\n+\\name{event.chart}\n+\\alias{event.chart}\n+\\title{\n+ Flexible Event Chart for Time-to-Event Data\n+}\n+\\description{\n+ Creates an event chart on the current graphics device. Also, allows user\n+ to plot legend on plot area or on separate page.\n+ Contains features useful for plotting data with time-to-event outcomes\n+ Which arise in a variety of studies\n+ including randomized clinical trials and non-randomized cohort studies.\n+ This function can use as input a matrix or a data frame, although greater\n+ utility and ease of use will be seen with a data frame.\n+}\n+\\usage{\n+event.chart(data, subset.r = 1:dim(data)[1], subset.c = 1:dim(data)[2],\n+\n+ sort.by = NA, sort.ascending = TRUE,\n+ sort.na.last = TRUE, sort.after.subset = TRUE,\n+ y.var = NA, y.var.type = "n",\n+ y.jitter = FALSE, y.jitter.factor = 1,\n+ y.renum = FALSE, NA.rm = FALSE, x.reference = NA,\n+ now = max(data[, subset.c], na.rm = TRUE),\n+ now.line = FALSE, now.line.lty = 2,\n+ now.line.lwd = 1, now.line.col = 1, pty = "m",\n+ date.orig = c(1, 1, 1960), titl = "Event Chart",\n+\n+ y.idlabels = NA, y.axis = "auto",\n+ y.axis.custom.at = NA, y.axis.custom.labels = NA,\n+ y.julian = FALSE, y.lim.extend = c(0, 0),\n+ y.lab = ifelse(is.na(y.idlabels), "", as.character(y.idlabels)),\n+\n+ x.axis.all = TRUE, x.axis = "auto",\n+ x.axis.custom.at = NA, x.axis.custom.labels = NA,\n+ x.julian = FALSE, x.lim.extend = c(0, 0), x.scale = 1,\n+ x.lab = ifelse(x.julian, "Follow-up Time", "Study Date"),\n+\n+ line.by = NA, line.lty = 1, line.lwd = 1, line.col = 1,\n+ line.add = NA, line.add.lty = NA,\n+ line.add.lwd = NA, line.add.col = NA,\n+ point.pch = 1:length(subset.c),\n+ point.cex = rep(0.6, length(subset.c)),\n+ point.col = rep(1, length(subset.c)),\n+\n+ point.cex.mult = 1., point.cex.mult.var = NA,\n+ extra.points.no.mult = rep(NA, length(subset.c)),\n+\n+ legend.plot = FALSE, legend.location = "o", legend.titl = titl,\n+ legend.titl.cex = 3, legend.titl.line = 1,\n+ legend.point.at = list(x = c(5, 95), y = c(95, 30)),\n+ legend.point.pch = point.pch,\n+ legend.point.text = ifelse(rep(is.data.frame(data), length(subset.c)),\n+ names(data[, subset.c]),\n+ subset.c),\n+ legend.cex = 2.5, legend.bty = "n",\n+ legend.line.at = list(x = c(5, 95), y = c(20, 5)),\n+ legend.line.text = names(table(as.character(data[, line.by]),\n+ exclude = c("", "NA"))),\n+ legend.line.lwd = line.lwd, legend.loc.num = 1,\n+\n+ \\dots)\n+}\n+\\arguments{\n+ \\item{data}{\n+ a matrix or data frame with rows corresponding to subjects and\n+ columns corresponding to variables. Note that for a data frame or\n+ matrix containing multiple time-to-event\n+ data (e.g., time to recurrence, time to death, and time to\n+ last follow-up), one column is required for each specific event.\n+ }\n+ \\item{subset.r}{\n+ subset of rows of original matrix or data frame to place in event chart.\n+ Logical arguments may be used here (e.g., \\code{treatment.arm == \'a\'}, if\n+ the data frame, data, has been attached to the search directory;\n+ otherwise, \\code{data$treatment.arm == "a"}).\n+ }\n+ \\item{subset.c}{\n+ subset of columns of original matrix or data frame to place in event chart;\n+ if working with a data frame, a vector of data frame variable names may be\n+ used for subsetting purposes (e.g., \\code{c(\'randdate\', \'event1\')}.\n+ }\n+ \\item{sort.by}{\n+ column(s) or data frame variable name(s) with which to sort the chart\'s output.\n+ The default is \\code{NA}, thereby resulting in a chart sorted by original row number.\n+ }\n+ \\item{sort.ascending}{\n'..b"'example1.ps', horizontal=TRUE)\n+ event.chart(cdcaids,\n+ subset.c=c('infedate','diagdate','dethdate','censdate'),\n+ x.lab = 'observation dates',\n+ y.lab='patients (sorted by AIDS diagnosis date)',\n+ titl='AIDS data calendar event chart 1',\n+ point.pch=c(1,2,15,0), point.cex=c(1,1,0.8,0.8),\n+ legend.plot=TRUE, legend.location='i', legend.cex=1.0,\n+ legend.point.text=c('transfusion','AIDS diagnosis','death','censored'),\n+ legend.point.at = list(c(7210, 8100), c(35, 27)), legend.bty='o')\n+\n+\n+# To produce simple interval event chart (with internal legend):\n+# postscript('example2.ps', horizontal=TRUE)\n+ event.chart(cdcaids,\n+ subset.c=c('infedate','diagdate','dethdate','censdate'),\n+ x.lab = 'time since transfusion (in days)',\n+ y.lab='patients (sorted by AIDS diagnosis date)',\n+ titl='AIDS data interval event chart 1',\n+ point.pch=c(1,2,15,0), point.cex=c(1,1,0.8,0.8),\n+ legend.plot=TRUE, legend.location='i', legend.cex=1.0,\n+ legend.point.text=c('transfusion','AIDS diagnosis','death','censored'),\n+ x.reference='infedate', x.julian=TRUE,\n+ legend.bty='o', legend.point.at = list(c(1400, 1950), c(7, -1)))\n+\n+\n+# To produce simple interval event chart (with internal legend),\n+# but now with flexible diagdate symbol size based on viral load variable:\n+# postscript('example2a.ps', horizontal=TRUE)\n+ event.chart(cdcaids,\n+ subset.c=c('infedate','diagdate','dethdate','censdate'),\n+ x.lab = 'time since transfusion (in days)',\n+ y.lab='patients (sorted by AIDS diagnosis date)',\n+ titl='AIDS data interval event chart 1a, with viral load at diagdate represented',\n+ point.pch=c(1,2,15,0), point.cex=c(1,1,0.8,0.8),\n+ point.cex.mult = 0.00002, point.cex.mult.var = 'viralload', extra.points.no.mult = c(1,NA,1,1), \n+ legend.plot=TRUE, legend.location='i', legend.cex=1.0,\n+ legend.point.text=c('transfusion','AIDS diagnosis','death','censored'),\n+ x.reference='infedate', x.julian=TRUE,\n+ legend.bty='o', legend.point.at = list(c(1400, 1950), c(7, -1)))\n+\n+\n+# To produce more complicated interval chart which is\n+# referenced by infection date, and sorted by age and incubation period:\n+# postscript('example3.ps', horizontal=TRUE)\n+ event.chart(cdcaids,\n+ subset.c=c('infedate','diagdate','dethdate','censdate'),\n+ x.lab = 'time since diagnosis of AIDS (in days)',\n+ y.lab='patients (sorted by age and incubation length)',\n+ titl='AIDS data interval event chart 2 (sorted by age, incubation)',\n+ point.pch=c(1,2,15,0), point.cex=c(1,1,0.8,0.8),\n+ legend.plot=TRUE, legend.location='i',legend.cex=1.0,\n+ legend.point.text=c('transfusion','AIDS diagnosis','death','censored'),\n+ x.reference='diagdate', x.julian=TRUE, sort.by=c('age','diffdate'),\n+ line.by='age', line.lty=c(1,3,2,4), line.lwd=rep(1,4), line.col=rep(1,4),\n+ legend.bty='o', legend.point.at = list(c(-1350, -800), c(7, -1)),\n+ legend.line.at = list(c(-1350, -800), c(16, 8)),\n+ legend.line.text=c('age = 1', ' = 2', ' = 3', ' = 4'))\n+\n+\n+# To produce the Goldman chart:\n+# postscript('example4.ps', horizontal=TRUE)\n+ event.chart(cdcaids,\n+ subset.c=c('infedate','diagdate','dethdate','censdate'),\n+ x.lab = 'time since transfusion (in days)', y.lab='dates of observation',\n+ titl='AIDS data Goldman event chart 1',\n+ y.var = c('infedate'), y.var.type='d', now.line=TRUE, y.jitter=FALSE,\n+ point.pch=c(1,2,15,0), point.cex=c(1,1,0.8,0.8), mgp = c(3.1,1.6,0),\n+ legend.plot=TRUE, legend.location='i',legend.cex=1.0,\n+ legend.point.text=c('transfusion','AIDS diagnosis','death','censored'),\n+ x.reference='infedate', x.julian=TRUE,\n+ legend.bty='o', legend.point.at = list(c(1500, 2800), c(9300, 10000)))\n+\n+\n+# To convert coded time-to-event data, then, draw an event chart:\n+surv.time <- c(5,6,3,1,2)\n+cens.ind <- c(1,0,1,1,0)\n+surv.data <- cbind(surv.time,cens.ind)\n+event.data <- event.convert(surv.data)\n+event.chart(cbind(rep(0,5),event.data),x.julian=TRUE,x.reference=1)\n+}\n+\\keyword{hplot}\n+\\keyword{survival}\n+% Converted by Sd2Rd version 1.21.\n" |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/event.convert.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/event.convert.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,68 @@ +\name{event.convert} +\alias{event.convert} +\title{ + Event Conversion for Time-to-Event Data +} +\description{ + Convert a two-column data matrix with event time and event code into + multiple column event time with one event in each column +} +\usage{ +event.convert(data2, event.time = 1, event.code = 2) +} +\arguments{ + \item{data2}{ + a matrix or dataframe with at least 2 columns; by default, the first + column contains the event time and the second column contains the k + event codes (e.g. 1=dead, 0=censord) + } + \item{event.time}{ + the column number in data contains the event time + } + \item{event.code}{ + the column number in data contains the event code + } +} +\details{ + In the survival analysis, the data typically come in two + columns: one column containing survival time and the other + containing censoring indicator or event code. The + \code{event.convert} function converts this type of data into + multiple columns of event times, one column of each event + type, suitable for the \code{event.chart} function. +} +\author{ + J. Jack Lee and Kenneth R. Hess + \cr + Department of Biostatistics + \cr + University of Texas + \cr + M.D. Anderson Cancer Center + \cr + Houston, TX 77030 + \cr + \email{jjlee@mdanderson.org}, \email{khess@mdanderson.org} + + + Joel A. Dubin + \cr + Department of Statistics + \cr + University of Waterloo + \cr + \email{jdubin@uwaterloo.ca} +} +\seealso{ + \code{\link{event.history}}, \code{\link{Date}}, \code{\link{event.chart}} +} +\examples{ +# To convert coded time-to-event data, then, draw an event chart: +surv.time <- c(5,6,3,1,2) +cens.ind <- c(1,0,1,1,0) +surv.data <- cbind(surv.time,cens.ind) +event.data <- event.convert(surv.data) +event.chart(cbind(rep(0,5),event.data),x.julian=TRUE,x.reference=1) +} +\keyword{hplot} +\keyword{survival} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/event.history.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/event.history.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,348 @@\n+\\name{event.history}\n+\\alias{event.history}\n+\\title{Produces event.history graph for survival data}\n+\\description{\n+ Produces an event history graph for right-censored survival data,\n+ including time-dependent covariate status, as described in\n+ Dubin, Muller, and Wang (2001). Effectively,\n+ a Kaplan-Meier curve is produced with supplementary information\n+ regarding individual survival information, censoring information, and\n+ status over time of an individual time-dependent covariate or \n+ time-dependent covariate function for both uncensored and censored \n+ individuals. \n+}\n+\\usage{\n+event.history(data, survtime.col, surv.col,\n+ surv.ind = c(1, 0), subset.rows = NULL,\n+ covtime.cols = NULL, cov.cols = NULL,\n+ num.colors = 1, cut.cov = NULL, colors = 1,\n+ cens.density = 10, mult.end.cens = 1.05,\n+ cens.mark.right =FALSE, cens.mark = "-",\n+ cens.mark.ahead = 0.5, cens.mark.cutoff = -1e-08,\n+ cens.mark.cex = 1,\n+ x.lab = "time under observation",\n+ y.lab = "estimated survival probability",\n+ title = "event history graph", ...)\n+}\n+\\arguments{\n+ \\item{data}{\n+ A matrix or data frame with rows corresponding to units\n+ (often individuals) and columns corresponding to survival time,\n+ event/censoring indicator. Also, multiple columns may be devoted to\n+ time-dependent covariate level and time change.\n+ }\n+ \\item{survtime.col}{\n+ Column (in data) representing minimum of time-to-event or \n+ right-censoring time for individual.\n+ }\n+ \\item{surv.col}{\n+ Column (in data) representing event indicator for an individual.\n+ Though, traditionally, such an indicator will be 1 for an event and\n+ 0 for a censored observation, this indicator can be represented \n+ by any two numbers, made explicit by the surv.ind argument.\n+ }\n+ \\item{surv.ind}{\n+ Two-element vector representing, respectively, the \n+ number for an event, as listed in \\code{surv.col}, \n+ followed by the number for a censored\n+ observation. Default is traditional survival data \n+ represention, i.e., \\code{c(1,0)}.\n+ }\n+ \\item{subset.rows}{\n+ Subset of rows of original matrix or data frame (data) to \n+ place in event history graph.\n+ Logical arguments may be used here (e.g., \\code{treatment.arm == "a"}, if\n+ the data frame, data, has been attached to the search directory; \n+ }\n+ \\item{covtime.cols}{\n+ Column(s) (in data) representing the time when change of time-dependent \n+ covariate (or time-dependent covariate function) occurs. \n+ There should be a unique non-\\code{NA} entry in the column for each such change \n+ (along with corresponding \\code{cov.cols} column entry representing \n+ the value of the covariate or function at that change time). \n+ Default is \\code{NULL}, meaning no time-dependent covariate information \n+ will be presented in the graph. \n+ }\n+ \\item{cov.cols}{\n+ Column(s) (in data) representing the level of the time-dependent \n+ covariate (or time-dependent covariate function). There should be \n+ a unique non-\\code{NA} column entry representing each change in the level \n+ (along with a corresponding covtime.cols column entry representing \n+ the time of the change). Default is \\code{NULL}, meaning\n+ no time-dependent covariate information will be presented in\n+ the graph. \n+ }\n+ \\item{num.colors}{\n+ Colors are utilized for the time-dependent covariate level for an\n+ individual. This argument provides the number of unique covariate\n+ levels which will be displayed by mapping the number of colors \n+ (via \\code{num.colors}) to the number of desired covariate levels. \n+ This will divide the covariate span into roughly equally-sized \n+ intervals, via the S-Plus cut function.\n+ Default is one color, meaning no time-dependent information\n+ will be presented in the graph. '..b"ransplant))\n+ heart.one[,7] <- heart.one[,7] - 1\n+ ## getting back to correct transplantation coding\n+heart.one <- as.data.frame(heart.one[order(unlist(heart.one[,2]), unlist(heart.one[,3])),])\n+names(heart.one) <- names(heart)\n+# back to usual censoring indicator:\n+heart.one[,3][heart.one[,3] == 2] <- 0 \n+# note: transplant says 0 (for no transplants) or 1 (for one transplant)\n+# and event = 1 is death, while event = 0 is censored\n+\n+# plot single Kaplan-Meier curve from heart data, first creating survival object\n+heart.surv <- survfit(Surv(stop, event) ~ 1, data=heart.one, conf.int = FALSE)\n+\n+# figure 3: traditional Kaplan-Meier curve\n+# postscript('ehgfig3.ps', horiz=TRUE)\n+# omi <- par(omi=c(0,1.25,0.5,1.25))\n+ plot(heart.surv, ylab='estimated survival probability',\n+ xlab='observation time (in days)')\n+ title('Figure 3: Kaplan-Meier curve for Stanford data', cex=0.8)\n+# dev.off()\n+\n+## now, draw event history graph for Stanford heart data; use as Figure 4\n+\n+# postscript('ehgfig4.ps', horiz=TRUE, colors = seq(0, 1, len=20))\n+# par(omi=c(0,1.25,0.5,1.25))\n+ event.history(heart.one, \n+\t\tsurvtime.col=heart.one[,2], surv.col=heart.one[,3],\n+\t\tcovtime.cols = cbind(rep(0, dim(heart.one)[1]), heart.one[,1]),\n+\t\tcov.cols = cbind(rep(0, dim(heart.one)[1]), heart.one[,7]),\n+\t\tnum.colors=2, colors=c(6,10),\n+\t\tx.lab = 'time under observation (in days)',\n+\t\ttitle='Figure 4: Event history graph for\\nStanford data',\n+\t\tcens.mark.right =TRUE, cens.mark = '-', \n+\t\tcens.mark.ahead = 30.0, cens.mark.cex = 0.85)\n+# dev.off()\n+\n+\n+\n+# now, draw age-stratified event history graph for Stanford heart data; \n+# use as Figure 5\n+\n+# two plots, stratified by age status\n+# postscript('c:\\\\temp\\\\ehgfig5.ps', horiz=TRUE, colors = seq(0, 1, len=20))\n+# par(omi=c(0,1.25,0.5,1.25))\n+ par(mfrow=c(1,2))\n+\n+ event.history(data=heart.one, subset.rows = (heart.one[,4] < 0),\n+\t\tsurvtime.col=heart.one[,2], surv.col=heart.one[,3],\n+\t\tcovtime.cols = cbind(rep(0, dim(heart.one)[1]), heart.one[,1]),\n+\t\tcov.cols = cbind(rep(0, dim(heart.one)[1]), heart.one[,7]),\n+\t\tnum.colors=2, colors=c(6,10), \n+\t\tx.lab = 'time under observation\\n(in days)',\n+\t\ttitle = 'Figure 5a:\\nStanford data\\n(age < 48)',\n+\t\tcens.mark.right =TRUE, cens.mark = '-', \n+\t\tcens.mark.ahead = 40.0, cens.mark.cex = 0.85,\n+\t\txlim=c(0,1900))\n+\n+ event.history(data=heart.one, subset.rows = (heart.one[,4] >= 0),\n+\t\tsurvtime.col=heart.one[,2], surv.col=heart.one[,3],\n+\t\tcovtime.cols = cbind(rep(0, dim(heart.one)[1]), heart.one[,1]),\n+\t\tcov.cols = cbind(rep(0, dim(heart.one)[1]), heart.one[,7]),\n+\t\tnum.colors=2, colors=c(6,10),\n+\t\tx.lab = 'time under observation\\n(in days)',\n+\t\ttitle = 'Figure 5b:\\nStanford data\\n(age >= 48)',\n+\t\tcens.mark.right =TRUE, cens.mark = '-', \n+\t\tcens.mark.ahead = 40.0, cens.mark.cex = 0.85,\n+\t\txlim=c(0,1900))\n+# dev.off()\n+# par(omi=omi)\n+\n+# we will not show liver cirrhosis data manipulation, as it was \n+# a bit detailed; however, here is the \n+# event.history code to produce Figure 7 / Plate 1\n+\n+# Figure 7 / Plate 1 : prothrombin ehg with color\n+\\dontrun{\n+second.arg <- 1\t\t\t\t### second.arg is for shading\n+third.arg <- c(rep(1,18),0,1)\t\t### third.arg is for intensity\n+\n+# postscript('c:\\\\temp\\\\ehgfig7.ps', horiz=TRUE, \n+# colors = cbind(seq(0, 1, len = 20), second.arg, third.arg)) \n+# par(omi=c(0,1.25,0.5,1.25), col=19)\n+ event.history(cirrhos2.eh, subset.rows = NULL,\n+ survtime.col=cirrhos2.eh$time, surv.col=cirrhos2.eh$event,\n+\t\tcovtime.cols = as.matrix(cirrhos2.eh[, ((2:18)*2)]),\n+\t\tcov.cols = as.matrix(cirrhos2.eh[, ((2:18)*2) + 1]),\n+\t\tcut.cov = as.numeric(quantile(as.matrix(cirrhos2.eh[, ((2:18)*2) + 1]),\n+\t\t\t\tc(0,.2,.4,.6,.8,1), na.rm=TRUE) + c(-1,0,0,0,0,1)),\t\n+ \t\tcolors=c(20,4,8,11,14),\n+\t\tx.lab = 'time under observation (in days)',\n+\t\ttitle='Figure 7: Event history graph for liver cirrhosis data (color)',\n+\t\tcens.mark.right =TRUE, cens.mark = '-', \n+\t\tcens.mark.ahead = 100.0, cens.mark.cex = 0.85)\n+# dev.off()\n+}\n+}\n+\\keyword{survival}\n" |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/find.matches.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/find.matches.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,261 @@\n+\\name{find.matches}\n+\\alias{find.matches}\n+\\alias{summary.find.matches}\n+\\alias{print.find.matches}\n+\\alias{matchCases}\n+\\title{\n+Find Close Matches\n+}\n+\\description{\n+Compares each row in \\code{x} against all the rows in \\code{y}, finding rows in\n+\\code{y} with all columns within a tolerance of the values a given row of\n+\\code{x}. The default tolerance\n+\\code{tol} is zero, i.e., an exact match is required on all columns.\n+For qualifying matches, a distance measure is computed. This is\n+the sum of squares of differences between \\code{x} and \\code{y} after scaling\n+the columns. The default scaling values are \\code{tol}, and for columns\n+with \\code{tol=1} the scale values are set to 1.0 (since they are ignored\n+anyway). Matches (up to \\code{maxmatch} of them) are stored and listed in order of \n+increasing distance.\n+\\cr\n+The \\code{summary} method prints a frequency distribution of the\n+number of matches per observation in \\code{x}, the median of the minimum\n+distances for all matches per \\code{x}, as a function of the number of matches,\n+and the frequency of selection of duplicate observations as those having\n+the smallest distance. The \\code{print} method prints the entire \\code{matches}\n+and \\code{distance} components of the result from \\code{find.matches}.\n+\\cr\n+\\code{matchCases} finds all controls that match cases on a single variable\n+\\code{x} within a tolerance of \\code{tol}. This is intended for prospective\n+cohort studies that use matching for confounder adjustment (even\n+though regression models usually work better).\n+}\n+\\usage{\n+find.matches(x, y, tol=rep(0, ncol(y)), scale=tol, maxmatch=10)\n+\\method{summary}{find.matches}(object, \\dots)\n+\\method{print}{find.matches}(x, digits, \\dots)\n+\n+matchCases(xcase, ycase, idcase=names(ycase),\n+ xcontrol, ycontrol, idcontrol=names(ycontrol),\n+ tol=NULL,\n+ maxobs=max(length(ycase),length(ycontrol))*10,\n+ maxmatch=20, which=c(\'closest\',\'random\'))\n+}\n+\\arguments{\n+\\item{x}{\n+a numeric matrix or the result of \\code{find.matches}\n+}\n+\\item{y}{\n+a numeric matrix with same number of columns as \\code{x}\n+}\n+\\item{xcase}{\n+}\n+\\item{xcontrol}{\n+vectors, not necessarily of the same length, specifying a numeric\n+variable used to match cases and control\n+}\n+\\item{ycase}{\n+}\n+\\item{ycontrol}{\n+vectors or matrices, not necessarily having the same number of rows,\n+specifying a variable to carry along from cases and matching\n+controls. If you instead want to carry along rows from a data frame,\n+let \\code{ycase} and \\code{ycontrol} be non-overlapping integer subscripts of\n+the donor data frame.\n+}\n+\\item{tol}{\n+a vector of tolerances with number of elements the same as the number\n+of columns of \\code{y}, for \\code{find.matches}. For \\code{matchCases}\n+is a scalar tolerance.\n+}\n+\\item{scale}{\n+a vector of scaling constants with number of elements the same as the\n+number of columns of \\code{y}.\n+}\n+\\item{maxmatch}{\n+maximum number of matches to allow. For \\code{matchCases},\n+maximum number of controls to match with a case (default is 20). If more than\n+\\code{maxmatch} matching controls are available, a random sample without\n+replacement of \\code{maxmatch} controls is used (if \\code{which="random"}).\n+}\n+\\item{object}{an object created by \\code{find.matches}}\n+\\item{digits}{\n+number of digits to use in printing distances\n+}\n+\\item{idcase}{\n+}\n+\\item{idcontrol}{\n+vectors the same length as \\code{xcase} and \\code{xcontrol} respectively,\n+specifying the id of cases and controls. Defaults are integers\n+specifying original element positions within each of cases and\n+controls.\n+}\n+\\item{maxobs}{\n+maximum number of cases and all matching controls combined (maximum\n+dimension of data frame resulting from \\code{matchControls}). Default is\n+ten times the maximum of the number of cases and number of controls.\n+\\code{maxobs} is used to allocate space for the resulting data frame.\n+}\n+\\item{which}{\n+set to \\code{"closest"} (the default)'..b" for the functions here but\n+probably should have been.\n+\n+}\n+\\seealso{\n+\\code{\\link{scale}}, \\code{\\link{apply}}\n+}\n+\\examples{\n+y <- rbind(c(.1, .2),c(.11, .22), c(.3, .4), c(.31, .41), c(.32, 5))\n+x <- rbind(c(.09,.21), c(.29,.39))\n+y\n+x\n+w <- find.matches(x, y, maxmatch=5, tol=c(.05,.05))\n+\n+\n+set.seed(111) # so can replicate results\n+x <- matrix(runif(500), ncol=2)\n+y <- matrix(runif(2000), ncol=2)\n+w <- find.matches(x, y, maxmatch=5, tol=c(.02,.03))\n+w$matches[1:5,]\n+w$distance[1:5,]\n+# Find first x with 3 or more y-matches\n+num.match <- apply(w$matches, 1, function(x)sum(x > 0))\n+j <- ((1:length(num.match))[num.match > 2])[1]\n+x[j,]\n+y[w$matches[j,],]\n+\n+\n+summary(w)\n+\n+\n+# For many applications would do something like this:\n+# attach(df1)\n+# x <- cbind(age, sex) # Just do as.matrix(df1) if df1 has no factor objects\n+# attach(df2)\n+# y <- cbind(age, sex)\n+# mat <- find.matches(x, y, tol=c(5,0)) # exact match on sex, 5y on age\n+\n+\n+# Demonstrate matchCases\n+xcase <- c(1,3,5,12)\n+xcontrol <- 1:6\n+idcase <- c('A','B','C','D')\n+idcontrol <- c('a','b','c','d','e','f')\n+ycase <- c(11,33,55,122)\n+ycontrol <- c(11,22,33,44,55,66)\n+matchCases(xcase, ycase, idcase,\n+ xcontrol, ycontrol, idcontrol, tol=1)\n+\n+\n+# If y is a binary response variable, the following code\n+# will produce a Mantel-Haenszel summary odds ratio that \n+# utilizes the matching.\n+# Standard variance formula will not work here because\n+# a control will match more than one case\n+# WARNING: The M-H procedure exemplified here is suspect \n+# because of the small strata and widely varying number\n+# of controls per case.\n+\n+\n+x <- c(1, 2, 3, 3, 3, 6, 7, 12, 1, 1:7)\n+y <- c(0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1)\n+case <- c(rep(TRUE, 8), rep(FALSE, 8))\n+id <- 1:length(x)\n+\n+\n+m <- matchCases(x[case], y[case], id[case],\n+ x[!case], y[!case], id[!case], tol=1)\n+iscase <- m$type=='case'\n+# Note: the first tapply on insures that event indicators are\n+# sorted by case id. The second actually does something.\n+event.case <- tapply(m$y[iscase], m$idcase[iscase], sum)\n+event.control <- tapply(m$y[!iscase], m$idcase[!iscase], sum)\n+n.control <- tapply(!iscase, m$idcase, sum)\n+n <- tapply(m$y, m$idcase, length)\n+or <- sum(event.case * (n.control - event.control) / n) /\n+ sum(event.control * (1 - event.case) / n)\n+or\n+\n+\n+# Bootstrap this estimator by sampling with replacement from\n+# subjects. Assumes id is unique when combine cases+controls\n+# (id was constructed this way above). The following algorithms\n+# puts all sampled controls back with the cases to whom they were\n+# originally matched.\n+\n+\n+ids <- unique(m$id)\n+idgroups <- split(1:nrow(m), m$id)\n+B <- 50 # in practice use many more\n+ors <- numeric(B)\n+# Function to order w by ids, leaving unassigned elements zero\n+align <- function(ids, w) {\n+ z <- structure(rep(0, length(ids)), names=ids)\n+ z[names(w)] <- w\n+ z\n+}\n+for(i in 1:B) {\n+ j <- sample(ids, replace=TRUE)\n+ obs <- unlist(idgroups[j])\n+ u <- m[obs,]\n+ iscase <- u$type=='case'\n+ n.case <- align(ids, tapply(u$type, u$idcase, \n+ function(v)sum(v=='case')))\n+ n.control <- align(ids, tapply(u$type, u$idcase,\n+ function(v)sum(v=='control')))\n+ event.case <- align(ids, tapply(u$y[iscase], u$idcase[iscase], sum))\n+ event.control <- align(ids, tapply(u$y[!iscase], u$idcase[!iscase], sum))\n+ n <- n.case + n.control\n+ # Remove sets having 0 cases or 0 controls in resample\n+ s <- n.case > 0 & n.control > 0\n+ denom <- sum(event.control[s] * (n.case[s] - event.case[s]) / n[s])\n+ or <- if(denom==0) NA else \n+ sum(event.case[s] * (n.control[s] - event.control[s]) / n[s]) / denom\n+ ors[i] <- or\n+}\n+describe(ors)\n+}\n+\\keyword{math}\n+\\keyword{multivariate}\n+\\keyword{htest}\n+\\concept{bootstrap}\n+\\concept{matching}\n+\\concept{epidemiology}\n+\\concept{case-control}\n" |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/first.word.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/first.word.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,53 @@ +\name{first.word} +\alias{first.word} +\title{First Word in a String or Expression} +\description{ +\code{first.word} finds the first word in an expression. A word is defined by +unlisting the elements of the expression found by the S parser and then +accepting any elements whose first character is either a letter or period. +The principal intended use is for the automatic generation of temporary +file names where it is important to exclude special characters from +the file name. For Microsoft Windows, periods in names are deleted and +only up to the first 8 characters of the word is returned. +} +\usage{ +first.word(x, i=1, expr=substitute(x)) +} +\arguments{ +\item{x}{ +any scalar character string +} +\item{i}{ +word number, default value = 1. Used when the second or \code{i}th word is +wanted. Currently only the \code{i=1} case is implemented. +} +\item{expr}{ +any S object of mode \code{expression}. +} +} +\value{ +a character string +} +\author{ +Frank E. Harrell, Jr., +\cr +Department of Biostatistics, +\cr +Vanderbilt University, +\cr +\email{f.harrell@vanderbilt.edu} + + +Richard M. Heiberger, +\cr +Department of Statistics, +\cr +Temple University, Philadelphia, PA. +\cr +\email{rmh@astro.ocis.temple.edu} +} +\examples{ +first.word(expr=expression(y ~ x + log(w))) +} +\keyword{character} +\keyword{manip} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/format.df.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/format.df.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,195 @@ +\name{format.df} +\alias{format.df} +\title{ + Format a Data Frame or Matrix for LaTeX or HTML +} +\description{ + \code{format.df} does appropriate rounding and decimal alignment, and outputs + a character matrix containing the formatted data. If \code{x} is a + \code{data.frame}, then do each component separately. + If \code{x} is a matrix, but not a data.frame, make it a data.frame + with individual components for the columns. + If a component \code{x$x} is a matrix, then do all columns the same. +} + +\usage{ +format.df(x, digits, dec=NULL, rdec=NULL, cdec=NULL, + numeric.dollar=!dcolumn, na.blank=FALSE, na.dot=FALSE, + blank.dot=FALSE, col.just=NULL, cdot=FALSE, + dcolumn=FALSE, matrix.sep=' ', scientific=c(-4,4), + math.row.names=FALSE, math.col.names=FALSE, + double.slash=FALSE, format.Date="\%m/\%d/\%Y", + format.POSIXt="\%m/\%d/\%Y \%H:\%M:\%OS", \dots) +} +\arguments{ + \item{x}{ + a matrix (usually numeric) or data frame + } + \item{digits}{ + causes all values in the table to be formatted to \code{digits} significant + digits. \code{dec} is usually preferred. + } + \item{dec}{ + If \code{dec} is a scalar, all elements of the matrix will be rounded + to \code{dec} + decimal places to the right of the decimal. \code{dec} can also be a matrix + whose elements correspond to \code{x}, for customized rounding of each element. + A matrix \code{dec} must have number of columns equal to number of columns + of input \code{x}. + A scalar \code{dec} is expanded to a vector \code{cdec} with number of + items equal to number of columns of input \code{x}. + } + \item{rdec}{ + a vector specifying the number of decimal places to the right for each row + (\code{cdec} is more commonly used than \code{rdec}) + A vector \code{rdec} must have number of items equal to number of rows of input \code{x}. + \code{rdec} is expanded to matrix \code{dec}. + } + \item{cdec}{ + a vector specifying the number of decimal places for each column. + The vector must have number of items equal to number of columns or components + of input x. + } + \item{cdot}{ + Set to \code{TRUE} to use centered dots rather than ordinary periods in numbers. + The output uses a syntax appropriate for \code{latex}. + } + \item{na.blank}{ + Set to \code{TRUE} to use blanks rather than \code{NA} for missing values. + This usually looks better in \code{latex}. + } + \item{dcolumn}{ + Set to \code{TRUE} to use David Carlisle's dcolumn style for + decimal alignment in \code{latex}. + Default is \code{FALSE}. You will probably want to + use \code{dcolumn} if you use \code{rdec}, as a column may then contain varying + number of places to the right of the decimal. \code{dcolumn} can line up + all such numbers on the decimal point, with integer values right + justified at the decimal point location of numbers that actually + contain decimal places. When you use \code{dcolumn = TRUE}, + \code{numeric.dollar} is set by default to \code{FALSE}. When you + use \code{dcolumn = TRUE}, the + object attribute \code{"style"} set to \samp{dcolumn} as the + \code{latex} \code{usepackage} must reference \code{[dcolumn]}. + The three files \file{dcolumn.sty}, \file{newarray.sty}, and + \file{array.sty} will + need to be in a directory in your \env{TEXINPUTS} path. + When you use \code{dcolumn=TRUE}, \code{numeric.dollar} should be set to \code{FALSE}. + } + \item{numeric.dollar}{ + logical, default \code{!dcolumn}. Set to \code{TRUE} to place dollar + signs around numeric values when \code{dcolumn = FALSE}. This + assures that \code{latex} will use minus signs rather than hyphens to indicate + negative numbers. Set to \code{FALSE} when \code{dcolumn = TRUE}, as + \code{dcolumn.sty} automatically uses minus signs. + } + \item{math.row.names}{ + logical, set true to place dollar signs around the row names. + } + \item{math.col.names}{ + logical, set true to place dollar signs around the column names. + } + \item{na.dot}{ + Set to \code{TRUE} to use periods rather than \code{NA} for missing + numeric values. + This works with the \acronym{SAS} convention that periods indicate missing values. + } + \item{blank.dot}{ + Set to \code{TRUE} to use periods rather than blanks for missing character values. + This works with the \acronym{SAS} convention that periods indicate missing values. + } + \item{col.just}{ + Input vector \code{col.just} must have number of columns equal to + number of columns of the output matrix. When \code{NULL}, the + default, the \code{col.just} attribute of the result is set to + \samp{l} for character columns and to \samp{r} for numeric + columns. The user can override the default by an argument vector + whose length is equal to the number of columns of the result matrix. + When \code{format.df} is called by \code{latex.default}, the + \code{col.just} is used as the \code{cols} argument to the + \code{tabular} environment and the letters \samp{l}, \samp{r}, + and \samp{c} are valid values. When \code{format.df} is called by + \acronym{SAS}, the \code{col.just} is used to determine whether a + \samp{\$} is needed on the \samp{input} line of the \file{sysin} file, + and the letters \samp{l} and \samp{r} are valid values. You can + pass specifications other than \code{l,r,c} in \code{col.just}, + e.g., \code{"p{3in}"} to get paragraph-formatted columns from + \code{latex()}. + } + \item{matrix.sep}{ + When \code{x} is a data frame containing a matrix, so that new column names + are constructed from the name of the matrix object and the names of + the individual columns of the matrix, \code{matrix.sep} specifies the + character to use to separate object names from individual column + names. + } + \item{scientific}{ + specifies ranges of exponents (or a logical vector) specifying values + not to convert to scientific notation. See \code{format.default} for details. + } + \item{double.slash}{ + should escaping backslashes be themselves escaped. + } + \item{format.Date}{ + String used to format objects of the Date class. + } + \item{format.POSIXt}{ + String used to format objects of the POSIXt class. + } + \item{\dots}{ + other arguments are accepted and ignored. For \code{latexVerbatim} these + arguments are passed to the \code{print} function. + } +} +\value{ + a character matrix with character images of properly rounded \code{x}. + Matrix components of input \code{x} are now just sets of columns of + character matrix. + Object attribute\code{"col.just"} repeats the value of the argument \code{col.just} when provided, + otherwise, it includes the recommended justification for columns of output. + See the discussion of the argument \code{col.just}. + The default justification is \samp{l} for characters and factors, + \samp{r} for numeric. + When \code{dcolumn==TRUE}, numerics will have \samp{.} as the justification character. +} + +\author{ + Frank E. Harrell, Jr., + \cr + Department of Biostatistics, + \cr + Vanderbilt University, + \cr + \email{f.harrell@vanderbilt.edu} + + + Richard M. Heiberger, + \cr + Department of Statistics, + \cr + Temple University, Philadelphia, PA. + \cr + \email{rmh@astro.ocis.temple.edu} + + +} +\seealso{ + \code{\link{latex}} +} +\examples{ +\dontrun{ +x <- data.frame(a=1:2, b=3:4) +x$m <- matrix(5:8,nrow=2) +names(x) +dim(x) +x +format.df(x) +dim(format.df(x)) +} +} +\keyword{utilities} +\keyword{interface} +\keyword{methods} +\keyword{file} +\keyword{character} +\keyword{manip} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/format.pval.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/format.pval.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,43 @@ +\name{format.pval} +\alias{format.pval} +\title{Format P Values} +\description{ + \code{format.pval} is intended for formatting p-values. +} +\usage{ +format.pval(x, pv=x, digits = max(1, .Options$digits - 2), + eps = .Machine$double.eps, na.form = "NA", \dots) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{pv}{a numeric vector.} + \item{x}{argument for method compliance.} + \item{digits}{how many significant digits are to be used.} + \item{eps}{a numerical tolerance: see Details.} + \item{na.form}{character representation of \code{NA}s.} + \item{\dots}{ + arguments passed to \code{\link{format}} in the \code{format.pval} + function body. + } +} +\details{ + \code{format.pval} is mainly an auxiliary function for + \code{\link{print.summary.lm}} etc., and does separate formatting for + fixed, floating point and very small values; those less than + \code{eps} are formatted as \dQuote{\samp{< [eps]}} (where + \dQuote{\samp{[eps]}} stands for \code{format(eps, digits)}). +} +\value{ + A character vector. +} +\note{This is the base \code{\link[base]{format.pval}} function with the + ablitiy to pass the \code{nsmall} argument to \code{\link{format}} +} +\examples{ +format.pval(c(runif(5), pi^-100, NA)) +format.pval(c(0.1, 0.0001, 1e-27)) +format.pval(c(0.1, 1e-27), nsmall=3) +} +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{print} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/gbayes.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/gbayes.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,474 @@\n+\\name{gbayes}\n+\\alias{gbayes}\n+\\alias{plot.gbayes}\n+\\alias{gbayes2}\n+\\alias{gbayesMixPredNoData}\n+\\alias{gbayesMixPost}\n+\\alias{gbayesMixPowerNP}\n+\\alias{gbayes1PowerNP}\n+\\title{\n+Gaussian Bayesian Posterior and Predictive Distributions\n+}\n+\\description{\n+\\code{gbayes} derives the (Gaussian) posterior and optionally the predictive\n+distribution when both the prior and the likelihood are Gaussian, and\n+when the statistic of interest comes from a 2-sample problem.\n+This function is especially useful in obtaining the expected power of\n+a statistical test, averaging over the distribution of the population\n+effect parameter (e.g., log hazard ratio) that is obtained using\n+pilot data. \\code{gbayes} is also useful for summarizing studies for\n+which the statistic of interest is approximately Gaussian with\n+known variance. An example is given for comparing two proportions\n+using the angular transformation, for which the variance is\n+independent of unknown parameters except for very extreme probabilities.\n+A \\code{plot} method is also given. This plots the prior, posterior, and\n+predictive distributions on a single graph using a nice default for\n+the x-axis limits and using the \\code{labcurve} function for automatic\n+labeling of the curves.\n+\n+\n+\\code{gbayes2} uses the method of Spiegelhalter and Freedman (1986) to compute the\n+probability of correctly concluding that a new treatment is superior\n+to a control. By this we mean that a 1-\\code{alpha} normal\n+theory-based confidence interval for the new minus old treatment\n+effect lies wholly to the right of \\code{delta.w}, where \\code{delta.w} is the\n+minimally worthwhile treatment effect (which can be zero to be\n+consistent with ordinary null hypothesis testing, a method not always\n+making sense). This kind of power function is averaged over a prior\n+distribution for the unknown treatment effect. This procedure is\n+applicable to the situation where a prior distribution is not to be\n+used in constructing the test statistic or confidence interval, but is\n+only used for specifying the distribution of \\code{delta}, the parameter of\n+interest.\n+\n+\n+Even though \\code{gbayes2}\n+assumes that the test statistic has a normal distribution with known\n+variance (which is strongly a function of the sample size in the two\n+treatment groups), the prior distribution function can be completely\n+general. Instead of using a step-function for the prior distribution\n+as Spiegelhalter and Freedman used in their appendix, \\code{gbayes2} uses\n+the built-in \\code{integrate} function for numerical integration.\n+\\code{gbayes2} also allows the variance of the test statistic to be general\n+as long as it is evaluated by the user. The conditional power given the\n+parameter of interest \\code{delta} is \\code{1 - pnorm((delta.w - delta)/sd + z)}, where z\n+is the normal critical value corresponding to 1 - \\code{alpha}/2.\n+\n+\\code{gbayesMixPredNoData} derives the predictive distribution of a\n+statistic that is Gaussian given \\code{delta} when no data have yet been\n+observed and when the prior is a mixture of two Gaussians.\n+\n+\\code{gbayesMixPost} derives the posterior density or cdf of \\code{delta} given\n+the statistic \\code{x}, when the prior for \\code{delta} is a mixture of two\n+Gaussians and when \\code{x} is Gaussian given \\code{delta}.\n+\n+\\code{gbayesMixPowerNP} computes the power for a test for \\code{delta} > \\code{delta.w}\n+for the case where (1) a Gaussian prior or mixture of two Gaussian priors\n+is used as the prior distribution, (2) this prior is used in forming\n+the statistical test or credible interval, (3) no prior is used for\n+the distribution of \\code{delta} for computing power but instead a fixed\n+single \\code{delta} is given (as in traditional frequentist hypothesis\n+tests), and (4) the test statistic has a Gaussian likelihood with\n+known variance (and mean equal to the specified \\code{delta}).\n+\\code{gbayesMixPowerNP} is handy where you want to use an earlier study in\n+testing for'..b"really constant so we average the variance over plausible\n+# values of the probabilities of response p1 and p2. We\n+# think that these are between .4 and .6 and we take a \n+# further short cut\n+\n+\n+v <- function(n1, n2, p1, p2) 1/(n1*p1*(1-p1)) + 1/(n2*p2*(1-p2))\n+n1 <- 500; n2 <- 300\n+ps <- seq(.4, .6, length=100)\n+vguess <- quantile(v(n1, n2, ps, ps), .75)\n+vguess\n+# 75\\% \n+# 0.02183459\n+\n+\n+# The minimally interesting treatment effect is an odds ratio\n+# of 1.1. The prior distribution on the log odds ratio is\n+# a 50:50 mixture of a vague Gaussian (mean 0, sd 100) and\n+# an informative prior from a previous study (mean 1, sd 1)\n+\n+\n+prior <- function(delta) \n+ 0.5*dnorm(delta, 0, 100)+0.5*dnorm(delta, 1, 1)\n+deltas <- seq(-5, 5, length=150)\n+plot(deltas, prior(deltas), type='l')\n+\n+\n+# Now compute the power, averaged over this prior\n+gbayes2(sqrt(vguess), prior, log(1.1))\n+# [1] 0.6133338\n+\n+\n+# See how much power is lost by ignoring the previous\n+# study completely\n+\n+\n+gbayes2(sqrt(vguess), function(delta)dnorm(delta, 0, 100), log(1.1))\n+# [1] 0.4984588\n+\n+\n+# What happens to the power if we really don't believe the treatment\n+# is very effective? Let's use a prior distribution for the log\n+# odds ratio that is uniform between log(1.2) and log(1.3).\n+# Also check the power against a true null hypothesis\n+\n+\n+prior2 <- function(delta) dunif(delta, log(1.2), log(1.3))\n+gbayes2(sqrt(vguess), prior2, log(1.1))\n+# [1] 0.1385113\n+\n+\n+gbayes2(sqrt(vguess), prior2, 0)\n+# [1] 0.3264065\n+\n+\n+# Compare this with the power of a two-sample binomial test to\n+# detect an odds ratio of 1.25\n+bpower(.5, odds.ratio=1.25, n1=500, n2=300)\n+# Power \n+# 0.3307486\n+\n+\n+# For the original prior, consider a new study with equal\n+# sample sizes n in the two arms. Solve for n to get a\n+# power of 0.9. For the variance of the log odds ratio\n+# assume a common p in the center of a range of suspected\n+# probabilities of response, 0.3. For this example we\n+# use a zero null value and the uniform prior above\n+\n+\n+v <- function(n) 2/(n*.3*.7)\n+pow <- function(n) gbayes2(sqrt(v(n)), prior2)\n+uniroot(function(n) pow(n)-0.9, c(50,10000))$root\n+# [1] 2119.675\n+# Check this value\n+pow(2119.675)\n+# [1] 0.9\n+\n+\n+# Get the posterior density when there is a mixture of two priors,\n+# with mixing probability 0.5. The first prior is almost\n+# non-informative (normal with mean 0 and variance 10000) and the\n+# second has mean 2 and variance 0.3. The test statistic has a value\n+# of 3 with variance 0.4.\n+f <- gbayesMixPost(3, 4, mix=0.5, d0=0, v0=10000, d1=2, v1=0.3)\n+\n+\n+args(f)\n+\n+\n+# Plot this density\n+delta <- seq(-2, 6, length=150)\n+plot(delta, f(delta), type='l')\n+\n+\n+# Add to the plot the posterior density that used only\n+# the almost non-informative prior\n+lines(delta, f(delta, mix=1), lty=2)\n+\n+\n+# The same but for an observed statistic of zero\n+lines(delta, f(delta, mix=1, x=0), lty=3)\n+\n+\n+# Derive the CDF instead of the density\n+g <- gbayesMixPost(3, 4, mix=0.5, d0=0, v0=10000, d1=2, v1=0.3,\n+ what='cdf')\n+# Had mix=0 or 1, gbayes1PowerNP could have been used instead\n+# of gbayesMixPowerNP below\n+\n+\n+# Compute the power to detect an effect of delta=1 if the variance\n+# of the test statistic is 0.2\n+gbayesMixPowerNP(g, 1, 0.2, interval=c(-10,12))\n+\n+\n+# Do the same thing by simulation\n+gbayesMixPowerNP(g, 1, 0.2, interval=c(-10,12), nsim=20000)\n+\n+\n+# Compute by what factor the sample size needs to be larger\n+# (the variance needs to be smaller) so that the power is 0.9\n+ratios <- seq(1, 4, length=50)\n+pow <- single(50)\n+for(i in 1:50) \n+ pow[i] <- gbayesMixPowerNP(g, 1, 0.2/ratios[i], interval=c(-10,12))[2]\n+\n+\n+# Solve for ratio using reverse linear interpolation\n+approx(pow, ratios, xout=0.9)$y\n+\n+\n+# Check this by computing power\n+gbayesMixPowerNP(g, 1, 0.2/2.1, interval=c(-10,12))\n+# So the study will have to be 2.1 times as large as earlier thought\n+}\n+\\keyword{htest}\n+\\concept{study design}\n+\\concept{power}\n+\n" |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/getHdata.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/getHdata.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,81 @@ +\name{getHdata} +\alias{getHdata} +\title{ + Download and Install Datasets for \pkg{Hmisc}, \pkg{rms}, and Statistical + Modeling +} +\description{ + This function downloads and makes ready to use datasets from the main + web site for the \pkg{Hmisc} and \pkg{rms} libraries. For \R, the + datasets were stored in compressed \code{\link{save}} format and + \code{getHdata} makes them available by running \code{\link{load}} + after download. For S-Plus, the datasets were stored in + \code{data.dump} format and are made available by running + \code{data.restore} after import. The dataset is run through the + \code{\link{cleanup.import}} function. Calling \code{getHdata} with no + \code{file} argument provides a character vector of names of available + datasets that are currently on the web site. For \R, \R's default + browser can optionally be launched to view \verb{html} files that were + already prepared using the \pkg{Hmisc} command + \code{html(contents())} or to view \file{.txt} or \file{.html} data + description files when available. +} +\usage{ +getHdata(file, what = c("data", "contents", "description", "all"), + where="http://biostat.mc.vanderbilt.edu/twiki/pub/Main/DataSets") +} +\arguments{ + \item{file}{ + an unquoted name of a dataset on the web site, e.g. \samp{prostate}. + Omit \code{file} to obtain a list of available datasets. + } + \item{what}{ + specify \code{what="contents"} to browse the contents (metadata) for + the dataset rather than fetching the data themselves. Specify + \code{what="description"} to browse a data description file if + available. Specify \code{what="all"} to retrieve the data and see + the metadataand description. + } + \item{where}{ + \acronym{URL} containing the data and metadata files + } +} +\details{ + For S-Plus, Hmisc defines a function \code{download.file} that is used + by \code{getHdata}. This is a stripped-down version of the \R + \code{\link{download.file}} function that uses the system + \command{wget} executable for fetching files from the Internet. For + Unix and Linux systems, \command{wget} will be pre-installed usually. + For windows S-Plus systems, get \command{wget} from + \url{ftp://sunsite.dk/projects/wget/windows}. Once you unzip the file + from there, move \command{wget.exe} to the same Windows directory that + contains \command{ftp.exe}. +} +\value{ + \code{getHdata()} without a \code{file} argument returns a character + vector of dataset base names. When a dataset is downloaded, the data + frame is placed in search position one and is not returned as value of + \code{getHdata}. +} +\author{Frank Harrell} +\seealso{ + \code{\link{download.file}}, \code{\link{cleanup.import}}, + \code{\link[foreign:read.S]{data.restore}}, \code{\link{load}} +} +\examples{ +\dontrun{ +getHdata() # download list of available datasets +getHdata(prostate) # downloads, load( ) or data.restore( ) + # runs cleanup.import for S-Plus 6 +getHdata(valung, "contents") # open browser (options(browser="whatever")) + # after downloading valung.html + # (result of html(contents())) +getHdata(support, "all") # download and open one browser window +datadensity(support) +attach(support) # make individual variables available +getHdata(plasma, "all") # download and open two browser windows + # (description file is available for plasma) +} +} +\keyword{interface} +\keyword{data} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/getZip.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/getZip.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,35 @@ +\name{getZip} +\alias{getZip} +\title{Open a Zip File From a URL.} +\description{ + Allows downloading and reading of a zip file containing one file +} +\usage{ +getZip(url, password=NULL) +} +\arguments{ + \item{url}{either a path to a local file or a valid URL.} + \item{password}{required to decode password-protected zip files} +} +\value{ + Returns a file O/I pipe. +} +\details{ + Allows downloading and reading of zip file containing one file. + The file may be password protected. If a password is needed then one will be requested unless given. + + Note: to make password-protected zip file z.zip, do zip -e z myfile +} +\seealso{ + \code{\link{pipe}} +} +\examples{ +\dontrun{ +read.csv(getZip('http://biostat.mc.vanderbilt.edu/twiki/pub/Sandbox/WebHome/z.zip')) +## Password is 'foo' +} +} +\author{Frank E. Harrell} +\keyword{file} +\keyword{IO} +\concept{compressed file} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/hdquantile.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/hdquantile.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,61 @@ +\name{hdquantile} +\alias{hdquantile} +\title{Harrell-Davis Distribution-Free Quantile Estimator} +\description{ +Computes the Harrell-Davis (1982) quantile estimator and jacknife +standard errors of quantiles. The quantile estimator is a weighted +linear combination or order statistics in which the order statistics +used in traditional nonparametric quantile estimators are given the +greatest weight. In small samples the H-D estimator is more efficient +than traditional ones, and the two methods are asymptotically +equivalent. The H-D estimator is the limit of a bootstrap average as +the number of bootstrap resamples becomes infinitely large. +} +\usage{ +hdquantile(x, probs = seq(0, 1, 0.25), + se = FALSE, na.rm = FALSE, names = TRUE, weights=FALSE) +} +\arguments{ + \item{x}{a numeric vector} + \item{probs}{vector of quantiles to compute} + \item{se}{set to \code{TRUE} to also compute standard errors} + \item{na.rm}{set to \code{TRUE} to remove \code{NA}s from \code{x} + before computing quantiles} + \item{names}{set to \code{FALSE} to prevent names attributions from + being added to quantiles and standard errors} + \item{weights}{set to \code{TRUE} to return a \code{"weights"} + attribution with the matrix of weights used in the H-D estimator + corresponding to order statistics, with columns corresponding to + quantiles.} +} +\details{ +A Fortran routine is used to compute the jackknife leave-out-one +quantile estimates. Standard errors are not computed for quantiles 0 or +1 (\code{NA}s are returned). +} +\value{ + A vector of quantiles. If \code{se=TRUE} this vector will have an + attribute \code{se} added to it, containing the standard errors. If + \code{weights=TRUE}, also has a \code{"weights"} attribute which is a matrix. +} +\references{ + Harrell FE, Davis CE (1982): A new distribution-free quantile + estimator. Biometrika 69:635-640. + + Hutson AD, Ernst MD (2000): The exact bootstrap mean and variance of + an L-estimator. J Roy Statist Soc B 62:89-94. +} +\author{Frank Harrell} +\seealso{\code{\link{quantile}}} +\examples{ +set.seed(1) +x <- runif(100) +hdquantile(x, (1:3)/4, se=TRUE) + +\dontrun{ +# Compare jackknife standard errors with those from the bootstrap +library(boot) +boot(x, function(x,i) hdquantile(x[i], probs=(1:3)/4), R=400) +} +} +\keyword{univar} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/hist.data.frame.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/hist.data.frame.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,42 @@ +\name{hist.data.frame} +\alias{hist.data.frame} +\title{Histograms for Variables in a Data Frame} +\description{ +This functions tries to compute the maximum number of histograms that +will fit on one page, then it draws a matrix of histograms. If there +are more qualifying variables than will fit on a page, the function +waits for a mouse click before drawing the next page. +} +\usage{ +\method{hist}{data.frame}(x, n.unique = 3, nclass = "compute", + na.big = FALSE, rugs = FALSE, freq=TRUE, mtitl = FALSE, ...) +} +\arguments{ + \item{x}{a data frame} + \item{n.unique}{minimum number of unique values a variable must have + before a histogram is drawn} + \item{nclass}{number of bins. Default is + max(2,trunc(min(n/10,25*log(n,10))/2)), where n is the number of + non-missing values for a variable.} + \item{na.big}{set to \code{TRUE} to draw the number of missing values + on the top of the histogram in addition to in a subtitle. In the + subtitle, n is the number of non-missing values and m is the number + of missing values} + \item{rugs}{set to \code{TRUE} to add rug plots at the top of each + histogram} + \item{freq}{see \code{\link{hist}}. Default is to show frequencies.} + \item{mtitl}{set to a character string to set aside extra outside top + margin and to use the string for an overall title} + \item{\dots}{arguments passed to \code{scat1d}} +} +\value{the number of pages drawn} +\author{Frank E Harrell Jr} +\seealso{\code{\link{hist}}, \code{\link{scat1d}}} +\examples{ +d <- data.frame(a=runif(200), b=rnorm(200), + w=factor(sample(c('green','red','blue'), 200, TRUE))) +hist.data.frame(d) # in R, just say hist(d) +} +\keyword{hplot} +\keyword{dplot} +\keyword{distribution} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/histbackback.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/histbackback.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,88 @@ +\name{histbackback} +\alias{histbackback} +\title{ +Back to Back Histograms +} +\description{ +Takes two vectors or a list with \code{x} and \code{y} components, and produces +back to back histograms of the two datasets. +} +\usage{ +histbackback(x, y, brks=NULL, xlab=NULL, axes=TRUE, probability=FALSE, + xlim=NULL, ylab='', \dots) +} +\arguments{ +\item{x,y}{ +either two vectors or a list given as \code{x} with two components. If the +components have names, they will be used to label the axis +(modification FEH). +} +\item{brks}{ +vector of the desired breakpoints for the histograms. +} +\item{xlab}{ +a vector of two character strings naming the two datasets. +} +\item{axes}{ +logical flag stating whether or not to label the axes. +} +\item{probability}{ +logical flag: if \code{TRUE}, then the x-axis corresponds to the units for a +density. If \code{FALSE}, then the units are counts. +} +\item{xlim}{ +x-axis limits. First value must be negative, as the left histogram is +placed at negative x-values. Second value must be positive, for the +right histogram. To make the limits symmetric, use e.g. \code{ylim=c(-20,20)}. +} +\item{ylab}{ +label for y-axis. Default is no label. +} +\item{...}{ +additional graphics parameters may be given. +}} +\value{ +a list is returned invisibly with the following components: + +\item{left}{ +the counts for the dataset plotted on the left. +} +\item{right}{ +the counts for the dataset plotted on the right. +} +\item{breaks}{ +the breakpoints used. +}} +\section{Side Effects}{ +a plot is produced on the current graphics device. +} +\author{ +Pat Burns +\cr +Salomon Smith Barney +\cr +London +\cr +\email{pburns@dorado.sbi.com} +} +\seealso{ +\code{\link{hist}}, \code{\link[lattice]{histogram}} +} +\examples{ +options(digits=3) +set.seed(1) +histbackback(rnorm(20), rnorm(30)) + + +fool <- list(x=rnorm(40), y=rnorm(40)) +histbackback(fool) +age <- rnorm(1000,50,10) +sex <- sample(c('female','male'),1000,TRUE) +histbackback(split(age, sex)) +agef <- age[sex=='female']; agem <- age[sex=='male'] +histbackback(list(Female=agef,Male=agem), probability=TRUE, xlim=c(-.06,.06)) +} +\keyword{dplot} +\keyword{hplot} +\keyword{distribution} +% Converted by Sd2Rd version 1.21. |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/hoeffd.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/hoeffd.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,98 @@ +\name{hoeffd} +\alias{hoeffd} +\alias{print.hoeffd} +\title{ +Matrix of Hoeffding's D Statistics +} +\description{ +Computes a matrix of Hoeffding's (1948) \code{D} statistics for all +possible pairs of columns of a matrix. \code{D} is a measure of the +distance between \code{F(x,y)} and \code{G(x)H(y)}, where \code{F(x,y)} +is the joint CDF of \code{X} and \code{Y}, and \code{G} and \code{H} are +marginal CDFs. Missing values are deleted in pairs rather than deleting +all rows of \code{x} having any missing variables. The \code{D} +statistic is robust against a wide variety of alternatives to +independence, such as non-monotonic relationships. The larger the value +of \code{D}, the more dependent are \code{X} and \code{Y} (for many +types of dependencies). \code{D} used here is 30 times Hoeffding's +original \code{D}, and ranges from -0.5 to 1.0 if there are no ties in +the data. \code{print.hoeffd} prints the information derived by +\code{hoeffd}. The higher the value of \code{D}, the more dependent are +\code{x} and \code{y}. \code{hoeffd} also computes the mean and maximum +absolute values of the difference between the joint empirical CDF and +the product of the marginal empirical CDFs. +} +\usage{ +hoeffd(x, y) +\method{print}{hoeffd}(x, \dots) +} +\arguments{ +\item{x}{ +a numeric matrix with at least 5 rows and at least 2 columns (if +\code{y} is absent), or an object created by \code{hoeffd} +} +\item{y}{ +a numeric vector or matrix which will be concatenated to \code{x} +} +\item{\dots}{ignored} +} +\value{ +a list with elements \code{D}, the +matrix of D statistics, \code{n} the +matrix of number of observations used in analyzing each pair of variables, +and \code{P}, the asymptotic P-values. +Pairs with fewer than 5 non-missing values have the D statistic set to NA. +The diagonals of \code{n} are the number of non-NAs for the single variable +corresponding to that row and column. +} +\details{ +Uses midranks in case of ties, as described by Hollander and Wolfe. +P-values are approximated by linear interpolation on the table +in Hollander and Wolfe, which uses the asymptotically equivalent +Blum-Kiefer-Rosenblatt statistic. For \code{P<.0001} or \code{>0.5}, \code{P} values are +computed using a well-fitting linear regression function in \code{log P} vs. +the test statistic. +Ranks (but not bivariate ranks) are computed using efficient +algorithms (see reference 3). +} +\author{ +Frank Harrell +\cr +Department of Biostatistics +\cr +Vanderbilt University +\cr +\email{f.harrell@vanderbilt.edu} +} +\references{ +Hoeffding W. (1948): A non-parametric test of independence. Ann Math Stat +19:546--57. + + +Hollander M. and Wolfe D.A. (1973). Nonparametric Statistical Methods, +pp. 228--235, 423. New York: Wiley. + + +Press WH, Flannery BP, Teukolsky SA, Vetterling, WT (1988): Numerical +Recipes in C. Cambridge: Cambridge University Press. +} +\seealso{ +\code{\link{rcorr}}, \code{\link{varclus}} +} +\examples{ +x <- c(-2, -1, 0, 1, 2) +y <- c(4, 1, 0, 1, 4) +z <- c(1, 2, 3, 4, NA) +q <- c(1, 2, 3, 4, 5) +hoeffd(cbind(x,y,z,q)) + + +# Hoeffding's test can detect even one-to-many dependency +set.seed(1) +x <- seq(-10,10,length=200) +y <- x*sign(runif(200,-1,1)) +plot(x,y) +hoeffd(x,y) +} +\keyword{nonparametric} +\keyword{htest} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/html.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/html.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,91 @@ +\name{html} +\alias{html} +\alias{html.latex} +\alias{html.data.frame} +\alias{html.default} +\alias{show.html} +\alias{print.html} +\title{ +Convert an S object to HTML +} +\description{ +\code{html} is a generic function, for which only two methods are currently +implemented, \code{html.latex} and a rudimentary +\code{html.data.frame}. The former uses the \code{HeVeA} LaTeX to HTML +translator by Maranget to create an HTML file from a LaTeX file like +the one produced by \code{latex}. The resulting HTML file may be +displayed using a \code{show} or a \code{print} method. The browser +specified in \code{options(browser=)} for R (\code{help.browser} for +S-Plus) is launched to display the HTML file. \code{html.default} just +runs \code{html.data.frame}. +} +\usage{ +html(object, \dots) +\method{html}{latex}(object, file, ...) +\method{html}{data.frame}(object, + file=paste(first.word(deparse(substitute(object))),'html',sep='.'), + append=FALSE, link=NULL, linkCol=1, linkType=c('href','name'), \dots) +\method{html}{default}(object, + file=paste(first.word(deparse(substitute(object))),'html',sep='.'), + append=FALSE, link=NULL, linkCol=1, linkType=c('href','name'), \dots) +\method{print}{html}(x, \dots) +\method{show}{html}(object) +} +\arguments{ +\item{object}{a data frame or an object created by \code{latex}. For + \code{show} is an object created by \code{html}. For the generic + \code{html} is any object for which an \code{html} method exists.} +\item{file}{ +name of the file to create. The default file +name is \code{object.html} where \code{object} is the first word in +the name of the argument for \code{object}. +} +\item{append}{set to \code{TRUE} to append to an existing file} +\item{link}{character vector specifying hyperlink names to attach to + selected elements of the matrix or data frame. No hyperlinks are used + if \code{link} is omitted or for elements of \code{link} that are + \code{""}. To allow multiple links per link, \code{link} may also be + a character matrix shaped as \code{object} in which case + \code{linkCol} is ignored.} +\item{linkCol}{column number of \code{object} to which hyperlinks are + attached. Defaults to first column.} +\item{linkType}{defaults to \code{"href"}} +\item{\dots}{ignored} +\item{x}{an object created by \code{html}} +} +\section{Side Effects}{ +\code{print} or \code{show} launch a browser +} +\author{ +Frank E. Harrell, Jr. +\cr +Department of Biostatistics, +\cr +Vanderbilt University, +\cr +\email{f.harrell@vanderbilt.edu} +} +\references{ +Maranget, Luc. HeVeA: a LaTeX to HTML translater. +URL: http://para.inria.fr/~maranget/hevea/ +} +\seealso{ +\code{\link{latex}} +} +\examples{ +\dontrun{ +x <- matrix(1:6, nrow=2, dimnames=list(c('a','b'),c('c','d','e'))) +w <- latex(x) +h <- html(w) # run HeVeA to convert .tex to .html +h <- html(x) # convert x directly to html +options(browser='konqueror') # use help.browser for S-Plus +h # launch html browser by running print.html +w <- html(x, link=c('','B')) # hyperlink first row first col to B +} +} +\keyword{utilities} +\keyword{interface} +\keyword{methods} +\keyword{file} +\keyword{character} +\keyword{manip} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/impute.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/impute.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,96 @@ +\name{impute} +\alias{impute} +\alias{impute.default} +\alias{print.impute} +\alias{summary.impute} +\alias{[.impute} +\alias{is.imputed} +\title{ +Generic Functions and Methods for Imputation +} +\description{ +These functions do simple and \code{transcan} +imputation and print, summarize, and subscript +variables that have NAs filled-in with imputed values. The simple +imputation method involves filling in NAs with constants, +with a specified single-valued function of the non-NAs, or from +a sample (with replacement) from the non-NA values (this is useful +in multiple imputation). +More complex imputations can be done +with the \code{transcan} function, which also works with the generic methods +shown here, i.e., \code{impute} can take a \code{transcan} object and use the +imputed values created by \code{transcan} (with \code{imputed=TRUE}) to fill-in NAs. +The \code{print} method places * after variable values that were imputed. +The \code{summary} method summarizes all imputed values and then uses +the next \code{summary} method available for the variable. +The subscript method preserves attributes of the variable and subsets +the list of imputed values corresponding with how the variable was +subsetted. The \code{is.imputed} function is for checking if observations +are imputed. +} +\usage{ +impute(x, ...) + +\method{impute}{default}(x, fun=median, ...) + +\method{print}{impute}(x, ...) + +\method{summary}{impute}(object, ...) + +is.imputed(x) +} +\arguments{ +\item{x}{ +a vector or an object created by \code{transcan}, or a vector needing +basic unconditional imputation. If there are no \code{NA}s and \code{x} +is a vector, it is returned unchanged. +} +\item{fun}{ +the name of a function to use in computing the (single) +imputed value from the non-NAs. The default is \code{median}. +If instead of specifying a function as \code{fun}, a single value or vector +(numeric, or character if \code{object} is a factor) is specified, +those values are used for insertion. \code{fun} can also be the character +string \code{"random"} to draw random values for imputation, with the random +values not forced to be the same if there are multiple NAs. +For a vector of constants, the vector must be of length one +(indicating the same value replaces all NAs) or must be as long as +the number of NAs, in which case the values correspond to consecutive NAs +to replace. For a factor \code{object}, constants for imputation may include +character values not in the current levels of \code{object}. In that +case new levels are added. +If \code{object} is of class \code{"factor"}, \code{fun} is ignored and the +most frequent category is used for imputation. +} +\item{object}{an object of class \code{"impute"}} +\item{...}{ignored} +} +\value{ +a vector with class \code{"impute"} placed in front of existing classes. +For \code{is.imputed}, a vector of logical values is returned (all +\code{TRUE} if \code{object} is not of class \code{impute}). +} +\author{ +Frank Harrell +\cr +Department of Biostatistics +\cr +Vanderbilt University +\cr +\email{f.harrell@vanderbilt.edu} +} +\seealso{ +\code{\link{transcan}}, \code{\link{impute.transcan}}, \code{\link{describe}}, \code{\link{na.include}}, \code{\link{sample}} +} +\examples{ +age <- c(1,2,NA,4) +age.i <- impute(age) +# Could have used impute(age,2.5), impute(age,mean), impute(age,"random") +age.i +summary(age.i) +is.imputed(age.i) +} +\keyword{methods} +\keyword{math} +\keyword{htest} +\keyword{models} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/inc-dec.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/inc-dec.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,35 @@ +\name{inc-dec} +\alias{inc<-} +\alias{dec<-} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{ Increment and Decrement } +\description{ + \code{inc<-} increments \code{x} by \code{value}. + Equivelent to \code{x <- x + value}. + + \code{dec<-} decrements \code{x} by the \code{value}. Equivelent to + \code{x <- x - value}. +} +\usage{ +inc(x) <- value + +dec(x) <- value +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{x}{ object to be incremented or decremented } + \item{value}{ value by which \code{x} will be modified } +} +\author{ Charles Dupont } +\examples{ +x <- 1:5 +inc(x) <- 5 +x # c(6,7,8,9,10) + +dec(x) <- 3 +x # c(3,4,5,6,7) +} +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{ manip} +\keyword{ utilities }% __ONLY ONE__ keyword per line |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/labcurve.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/labcurve.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,641 @@\n+\\name{labcurve}\n+\\alias{labcurve}\n+\\alias{putKey}\n+\\alias{putKeyEmpty}\n+\\alias{largest.empty}\n+\\alias{drawPlot}\n+\\alias{plot.drawPlot}\n+\\alias{bezier}\n+\\title{\n+Label Curves, Make Keys, and Interactively Draw Points and Curves\n+}\n+\\description{\n+\\code{labcurve} optionally draws a set of curves then labels the curves.\n+A variety of methods for drawing labels are implemented, ranging from\n+positioning using the mouse to automatic labeling to automatic placement\n+of key symbols with manual placement of key legends to automatic\n+placement of legends. For automatic positioning of labels or keys, a\n+curve is labeled at a point that is maximally separated from all of the\n+other curves. Gaps occurring when curves do not start or end at the\n+same x-coordinates are given preference for positioning labels. If\n+labels are offset from the curves (the default behaviour), if the\n+closest curve to curve i is above curve i, curve i is labeled below its\n+line. If the closest curve is below curve i, curve i is labeled above\n+its line. These directions are reversed if the resulting labels would\n+appear outside the plot region.\n+\n+Both ordinary lines and step functions are handled, and there is an\n+option to draw the labels at the same angle as the curve within a\n+local window.\n+\n+Unless the mouse is used to position labels or plotting symbols are\n+placed along the curves to distinguish them, curves are examined at 100\n+(by default) equally spaced points over the range of x-coordinates in\n+the current plot area. Linear interpolation is used to get\n+y-coordinates to line up (step function or constant interpolation is\n+used for step functions). There is an option to instead examine all\n+curves at the set of unique x-coordinates found by unioning the\n+x-coordinates of all the curves. This option is especially useful when\n+plotting step functions. By setting \\code{adj="auto"} you can have\n+\\code{labcurve} try to optimally left- or right-justify labels depending\n+on the slope of the curves at the points at which labels would be\n+centered (plus a vertical offset). This is especially useful when\n+labels must be placed on steep curve sections.\n+\n+You can use the \\code{on top} method to write (short) curve names\n+directly on the curves (centered on the y-coordinate). This is\n+especially useful when there are many curves whose full labels would run\n+into each other. You can plot letters or numbers on the curves, for\n+example (using the \\code{keys} option), and have \\code{labcurve} use the\n+\\code{key} function to provide long labels for these short ones (see the\n+end of the example). There is another option for connecting labels to\n+curves using arrows. When \\code{keys} is a vector of integers, it is\n+taken to represent plotting symbols (\\code{pch}s), and these symbols are\n+plotted at equally-spaced x-coordinates on each curve (by default, using\n+5 points per curve). The points are offset in the x-direction between\n+curves so as to minimize the chance of collisions.\n+\n+To add a legend defining line types, colors, or line widths with no\n+symbols, specify \\code{keys="lines"}, e.g., \\code{labcurve(curves,\n+keys="lines", lty=1:2)}.\n+\n+\\code{putKey} provides a different way to use \\code{key()} by allowing\n+the user to specify vectors for labels, line types, plotting characters,\n+etc. Elements that do not apply (e.g., \\code{pch} for lines\n+(\\code{type="l"})) may be \\code{NA}. When a series of points is\n+represented by both a symbol and a line, the corresponding elements of\n+both \\code{pch} and \\code{lty}, \\code{col.}, or \\code{lwd} will be\n+non-missing.\n+\n+\\code{putKeyEmpty}, given vectors of all the x-y coordinates that have been\n+plotted, uses \\code{largest.empty} to find the largest empty rectangle large\n+enough to hold the key, and draws the key using \\code{putKey}.\n+\n+\\code{drawPlot} is a simple mouse-driven function for drawing series of\n+lines, step functions, polynomials, Bezier curves, and points, and\n+automatically lab'..b"ons is\n+searched for using \\code{largest.empty}. Then \\code{key} is called again to draw\n+the key there, using the argument \\code{corner=c(.5,.5)} so that the center\n+of the rectangle can be specified to \\code{key}.\n+\n+If you want to plot the data, an easier way to use \\code{labcurve} is\n+through \\code{xYplot} as shown in some of its examples.\n+}\n+\\author{\n+Frank Harrell\n+\\cr\n+Department of Biostatistics\n+\\cr\n+Vanderbilt University\n+\\cr\n+\\email{f.harrell@vanderbilt.edu}\n+}\n+\\seealso{\n+\t\\code{\\link{approx}}, \\code{\\link{text}}, \\code{\\link{legend}},\n+\t\\code{\\link{scat1d}}, \\code{\\link{xYplot}}, \\code{\\link{abline}}\n+}\n+\\examples{\n+n <- 2:8\n+m <- length(n)\n+type <- c('l','l','l','l','s','l','l')\n+# s=step function l=ordinary line (polygon)\n+curves <- vector('list', m)\n+\n+\n+plot(0,1,xlim=c(0,1),ylim=c(-2.5,4),type='n')\n+\n+\n+set.seed(39)\n+\n+\n+for(i in 1:m) {\n+ x <- sort(runif(n[i]))\n+ y <- rnorm(n[i])\n+ lines(x, y, lty=i, type=type[i], col=i)\n+ curves[[i]] <- list(x=x,y=y)\n+}\n+\n+\n+labels <- paste('Label for',letters[1:m])\n+labcurve(curves, labels, tilt=TRUE, type=type, col=1:m)\n+\n+\n+# Put only single letters on curves at points of \n+# maximum space, and use key() to define the letters,\n+# with automatic positioning of the key in the most empty\n+# part of the plot\n+# Have labcurve do the plotting, leaving extra space for key\n+\n+\n+names(curves) <- labels\n+labcurve(curves, keys=letters[1:m], type=type, col=1:m,\n+ pl=TRUE, ylim=c(-2.5,4))\n+\n+\n+# Put plotting symbols at equally-spaced points,\n+# with a key for the symbols, ignoring line types\n+\n+\n+labcurve(curves, keys=1:m, lty=1, type=type, col=1:m,\n+ pl=TRUE, ylim=c(-2.5,4))\n+\n+\n+\n+\n+# Plot and label two curves, with line parameters specified with data\n+set.seed(191)\n+ages.f <- sort(rnorm(50,20,7))\n+ages.m <- sort(rnorm(40,19,7))\n+height.f <- pmin(ages.f,21)*.2+60\n+height.m <- pmin(ages.m,21)*.16+63\n+\n+\n+labcurve(list(Female=list(ages.f,height.f,col=2),\n+ Male =list(ages.m,height.m,col=3,lty='dashed')),\n+ xlab='Age', ylab='Height', pl=TRUE)\n+# add ,keys=c('f','m') to label curves with single letters\n+# For S-Plus use lty=2\n+\n+\n+# Plot power for testing two proportions vs. n for various odds ratios, \n+# using 0.1 as the probability of the event in the control group. \n+# A separate curve is plotted for each odds ratio, and the curves are\n+# labeled at points of maximum separation\n+\n+\n+n <- seq(10, 1000, by=10)\n+OR <- seq(.2,.9,by=.1)\n+pow <- lapply(OR, function(or,n)list(x=n,y=bpower(p1=.1,odds.ratio=or,n=n)),\n+ n=n)\n+names(pow) <- format(OR)\n+labcurve(pow, pl=TRUE, xlab='n', ylab='Power')\n+\n+\n+# Plot some random data and find the largest empty rectangle\n+# that is at least .1 wide and .1 tall\n+\n+\n+x <- runif(50)\n+y <- runif(50)\n+plot(x, y)\n+z <- largest.empty(x, y, .1, .1)\n+z\n+points(z,pch=3) # mark center of rectangle, or\n+polygon(z$rect, col='blue') # to draw the rectangle, or\n+#key(z$x, z$y, \\dots stuff for legend)\n+\n+\n+\n+\n+# Use the mouse to draw a series of points using one symbol, and\n+# two smooth curves or straight lines (if two points are clicked), \n+# none of these being labeled\n+\n+\n+# d <- drawPlot(Points(), Curve(), Curve())\n+# plot(d)\n+\n+\n+\\dontrun{\n+# Use the mouse to draw a Gaussian density, two series of points\n+# using 2 symbols, one Bezier curve, a step function, and raw data\n+# along the x-axis as a 1-d scatter plot (rug plot). Draw a key.\n+# The density function is fit to 3 mouse clicks\n+# Abline draws a dotted horizontal reference line\n+d <- drawPlot(Curve('Normal',type='gauss'),\n+ Points('female'), Points('male'), \n+ Curve('smooth',ask=TRUE,lty=2), Curve('step',type='s',lty=3), \n+ Points(type='r'), Abline(h=.5, lty=2),\n+ xlab='X', ylab='y', xlim=c(0,100), key=TRUE)\n+plot(d, ylab='Y')\n+plot(d, key=FALSE) # label groups using labcurve\n+}\n+}\n+\\keyword{hplot}\n+\\keyword{aplot}\n+\\keyword{dplot}\n+\\keyword{iplot}\n+% Converted by Sd2Rd version 1.21.\n+\n+\n+\n+\n" |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/label.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/label.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,260 @@\n+\\name{label}\n+\\alias{label}\n+\\alias{label<-}\n+\\alias{label.default}\n+\\alias{label.Surv}\n+\\alias{label<-.default}\n+\\alias{labelPlotmath}\n+\\alias{labelLatex}\n+\\alias{[.labelled}\n+\\alias{print.labelled}\n+\\alias{Label}\n+\\alias{Label.data.frame}\n+\\alias{llist}\n+\\alias{plotmathTranslate}\n+\\alias{as.data.frame.labelled}\n+\\alias{data.frame.labelled}\n+\\alias{reLabelled}\n+\\alias{label.data.frame}\n+\\alias{label<-.data.frame}\n+\\alias{combineLabels}\n+\\title{\n+ Label Attribute of an Object\n+}\n+\\description{\n+ \\code{label(x)} retrieves the \\code{label} attribute of \\code{x}.\n+ \\code{label(x) <- "a label"} stores the label attribute, and also puts\n+ the class \\code{labelled} as the first class of \\code{x} (for S-Plus\n+ this class is not used and methods for handling this class are\n+ not defined so the \\code{"label"} and \\code{"units"} attributes are lost\n+ upon subsetting). The reason for having this class is so that the\n+ subscripting method for \\code{labelled}, \\code{[.labelled}, can preserve\n+ the \\code{label} attribute in S. Also, the \\code{print}\n+ method for \\code{labelled} objects prefaces the print with the object\'s\n+ \\code{label} (and \\code{units} if there). If the variable is also given\n+ a \\code{"units"} attribute using the \\code{units} function, subsetting\n+ the variable (using \\code{[.labelled}) will also retain the\n+ \\code{"units"} attribute.\n+\n+ \\code{label} can optionally append a \\code{"units"} attribute to the\n+ string, and it can optionally return a string or expression (for \\R\'s\n+ \\code{plotmath} facility) suitable for plotting. \\code{labelPlotmath}\n+ is a function that also has this function, when the input arguments are\n+ the \\code{\'label\'} and \\code{\'units\'} rather than a vector having those\n+ attributes. When \\code{plotmath} mode is used to construct labels, the\n+ \\code{\'label\'} or \\code{\'units\'} may contain math expressions but they\n+ are typed verbatim if they contain percent signs, blanks, or\n+ underscores.\n+\n+\tFor \\code{Surv} objects, \\code{label} first looks to see if there is\n+\tan overall \\code{"label"} attribute for the object, then it looks for\n+\tsaved attributes that \\code{Surv} put in the \\code{"inputAttributes"}\n+\tobject, looking first at the \\code{event} variable, then \\code{time2},\n+\tand finally \\code{time}. You can restrict the looking by specifying\n+\t\\code{type}.\n+\n+\t\\code{labelLatex} constructs suitable LaTeX labels a variable or from the\n+\t\\code{label} and \\code{units} arguments, optionally right-justifying\n+\t\\code{units} if \\code{hfill=TRUE}. This is useful when making tables\n+\twhen the variable in question is not a column heading. If \\code{x}\n+\tis specified, \\code{label} and \\code{units} values are extracted from\n+\tits attributes instead of from the other arguments.\n+\n+ \\code{Label} (actually \\code{Label.data.frame}) is a function which generates\n+ S source code that makes the labels in all the variables in a data\n+ frame easy to edit. \n+\n+ \\code{llist} is like \\code{list} except that it preserves the names or\n+ labels of the component variables in the variables \\code{label}\n+ attribute. This can be useful when looping over variables or using\n+ \\code{sapply} or \\code{lapply}. By using \\code{llist} instead of\n+ \\code{list} one can annotate the output with the current variable\'s name\n+ or label. \\code{llist} also defines a \\code{names} attribute for the\n+ list and pulls the \\code{names} from the arguments\' expressions for\n+ non-named arguments.\n+\n+ \\code{plotmathTranslate} is a simple function that translates certain\n+ character strings to character strings that can be used as part of \\R\n+ \\code{plotmath} expressions. If the input string has a space or percent\n+ inside, the string is surrounded by a call to \\code{plotmath}\'s\n+ \\code{paste} function.\n+\n+ \\code{as.data.frame.labelled} is a utility function that is called by\n+ \\code{[.data.frame}. It is just a copy of \\code{as.data.frame.vector}.\n+ \\code{data.frame.labelled} is anot'..b'expression instead of a character string) if R is\n+ in effect. If \\code{units} is also \\code{TRUE}, and if both\n+ \\code{\'label\'} and \\code{\'units\'} attributes are present, the\n+ \\code{\'units\'} will appear after the label but in smaller type and\n+ will not be surrounded by brackets.\n+ }\n+ \\item{default}{\n+ if \\code{x} does not have a \\code{\'label\'} attribute and\n+ \\code{default} (a character string) is specified, the label will be\n+ taken as \\code{default}. For \\code{labelLatex} the \\code{default}\n+\t\tis the name of the first argument if it is a variable and not a label.\n+ }\n+ \\item{grid}{\n+ Currently \\R\'s \\code{lattice} and \\code{grid} functions do not support\n+ \\code{plotmath} expressions for \\code{xlab} and \\code{ylab}\n+ arguments. When using \\code{lattice} functions in \\R, set the\n+ argument \\code{grid} to \\code{TRUE} so that \\code{labelPlotmath} can\n+ return an ordinary character string instead of an expression.\n+ }\n+\t\\item{type}{for \\code{Surv} objects specifies the type of element for\n+\t\twhich to restrict the search for a label}\n+ \\item{label}{a character string containing a variable\'s label}\n+ \\item{plotmath}{\n+ set to \\code{TRUE} to have \\code{labelMathplot} return an expression\n+ for plotting using \\R\'s \\code{plotmath} facility. If \\R is not in\n+ effect, an ordinary character string is returned.\n+ }\n+\t\\item{size}{LaTeX size for \\code{units}. Default is two sizes smaller\n+\t\tthan \\code{label}, which assumes that the LaTeX \\code{relsize}\n+\t\tpackage is in use.}\n+\t\\item{hfill}{set to \\code{TRUE} to right-justify \\code{units} in the\n+\t\tfield. This is useful when multiple labels are being put into rows\n+\t\tin a LaTeX \\code{tabular} environment, and will cause a problem if\n+\t\tthe label is used in an environment where \\code{hfill} is not\n+\t\tappropriate.}\n+\t\\item{bold}{set to \\code{TRUE} to have \\code{labelLatex} put the\n+\t\t\\code{label} in bold face.}\n+\t\\item{double}{set to \\code{TRUE} to represent backslash in LaTeX as\n+\t\tfour backslashes in place of two. This is needed if, for example,\n+\t\tyou need to convert the result using \\code{as.formula}}\n+ \\item{value}{\n+ the label of the object, or "".\n+ }\n+ \\item{object}{\n+ a data frame\n+ }\n+ \\item{\\dots}{\n+ a list of variables or expressions to be formed into a \\code{list}.\n+ Ignored for \\code{print.labelled}.\n+ }\n+ \\item{file}{\n+ the name of a file to which to write S source code. Default is\n+ \\code{""}, meaning standard output.\n+ }\n+ \\item{append}{\n+ set to \\code{TRUE} to append code generated by \\code{Label} to file \\code{file}\n+ }\n+ \\item{labels}{\n+ set to \\code{FALSE} to make \\code{llist} ignore the variables\' \\code{label} attribute and\n+ use the variables\' names.\n+ }\n+}\n+\\value{\n+ \\code{label} returns the label attribute of x, if any; otherwise, "". \n+ \\code{label} is used\n+ most often for the individual variables in data frames. The function\n+ \\code{sas.get} copies labels over from SAS if they exist.\n+}\n+\\seealso{\n+ \\code{\\link{sas.get}}, \\code{\\link{describe}}\n+}\n+\\examples{\n+age <- c(21,65,43)\n+y <- 1:3\n+label(age) <- "Age in Years"\n+plot(age, y, xlab=label(age))\n+\n+data <- data.frame(age=age, y=y)\n+label(data)\n+\n+label(data, self=TRUE) <- "A data frame"\n+label(data, self=TRUE)\n+\n+x1 <- 1:10\n+x2 <- 10:1\n+label(x2) <- \'Label for x2\'\n+units(x2) <- \'mmHg\'\n+x2\n+x2[1:5]\n+dframe <- data.frame(x1, x2)\n+Label(dframe)\n+\n+labelLatex(x2, hfill=TRUE, bold=TRUE)\n+labelLatex(label=\'Velocity\', units=\'m/s\')\n+\n+##In these examples of llist, note that labels are printed after\n+##variable names, because of print.labelled\n+a <- 1:3\n+b <- 4:6\n+label(b) <- \'B Label\'\n+llist(a,b)\n+llist(a,b,d=0)\n+llist(a,b,0)\n+\n+\n+w <- llist(a, b>5, d=101:103)\n+sapply(w, function(x){\n+ hist(as.numeric(x), xlab=label(x))\n+ # locator(1) ## wait for mouse click\n+})\n+\n+# Or: for(u in w) {hist(u); title(label(u))}\n+}\n+\\keyword{attribute}\n+\\keyword{misc}\n+\\keyword{utilities}\n+% Converted by Sd2Rd version 1.21.\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/latex.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/latex.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,634 @@\n+\\encoding{latin1}\n+\\name{latex}\n+\\alias{latex}\n+\\alias{latex.default}\n+\\alias{latex.function}\n+\\alias{latex.list}\n+\\alias{latexTranslate}\n+\\alias{latexSN}\n+\\alias{latexVerbatim}\n+\\alias{dvi}\n+\\alias{print.dvi}\n+\\alias{dvi.latex}\n+\\alias{dvips}\n+\\alias{dvips.latex}\n+\\alias{dvips.dvi}\n+\\alias{dvigv}\n+\\alias{dvigv.latex}\n+\\alias{dvigv.dvi}\n+\\alias{print.latex}\n+\\alias{show.latex}\n+\\alias{show.dvi}\n+\\title{\n+ Convert an S object to LaTeX, and Related Utilities\n+}\n+\\description{\n+\\code{latex} converts its argument to a \\file{.tex} file appropriate\n+for inclusion in a LaTeX2e document. \\code{latex} is a generic\n+function that calls one of \\code{latex.default},\n+\\code{latex.function}, \\code{latex.list}. \n+\n+\\code{latex.default}\n+does appropriate rounding and decimal alignment and produces a\n+file containing a LaTeX tabular environment to print the matrix or data.frame\n+\\code{x} as a table.\n+\n+\\code{latex.function} prepares an S function for printing by issuing \\code{sed}\n+commands that are similar to those in the\n+\\code{S.to.latex} procedure in the \\code{s.to.latex} package (Chambers\n+and Hastie, 1993). \\code{latex.function} can also produce\n+\\code{verbatim} output or output that works with the \\code{Sweavel}\n+LaTeX style at \\url{http://biostat.mc.vanderbilt.edu/SweaveTemplate}.\n+\n+\\code{latex.list} calls \\code{latex} recursively for each element in the argument.\n+\n+\\code{latexTranslate} translates particular items in character\n+strings to LaTeX format, e.g., makes \\samp{a^2 = a\\$^2\\$} for superscript within\n+variable labels. LaTeX names of greek letters (e.g., \\code{"alpha"})\n+will have backslashes added if \\code{greek==TRUE}. Math mode is\n+inserted as needed. \n+\\code{latexTranslate} assumes that input text always has matches,\n+e.g. \\code{[) [] (] ()}, and that surrounding by \\samp{\\$\\$} is OK.\n+\n+\\code{latexSN} converts a vector floating point numbers to character\n+strings using LaTeX exponents. Dollar signs to enter math mode are not\n+added.\n+\n+\\code{latexVerbatim} on an object executes the object\'s \\code{print} method,\n+capturing the output for a file inside a LaTeX verbatim environment.\n+\n+\\code{dvi} uses the system \\code{latex} command to compile LaTeX code produced\n+by \\code{latex}, including any needed styles. \\code{dvi}\n+will put a \\samp{\\\\documentclass\\{report\\}} and \\samp{\\\\end\\{document\\}} wrapper\n+around a file produced by \\code{latex}. By default, the \\samp{geometry} LaTeX package is\n+used to omit all margins and to set the paper size to a default of\n+5.5in wide by 7in tall. The result of \\code{dvi} is a .dvi file. To both\n+format and screen display a non-default size, use for example\n+\\code{print(dvi(latex(x), width=3, height=4),width=3,height=4)}. Note that\n+you can use something like \\samp{xdvi -geometry 460x650 -margins 2.25in\n+file} without changing LaTeX defaults to emulate this.\n+\n+\\code{dvips} will use the system \\code{dvips} command to print the .dvi file to\n+the default system printer, or create a postscript file if \\code{file}\n+is specified.\n+\n+\\code{dvigv} uses the system \\code{dvips} command to convert the input object\n+to a .dvi file, and uses the system \\code{dvips} command to convert it to\n+postscript. Then the postscript file is displayed using Ghostview\n+(assumed to be the system command \\command{gv}).\n+\n+There are \\code{show} methods for displaying typeset LaTeX\n+on the screen using the system \\command{xdvi}\n+command. If you \\code{show} a LaTeX file created by\n+\\code{latex} without running it through \\code{dvi} using\n+\\code{show.dvi(object)}, the \n+\\code{show} method will run it through \\code{dvi} automatically.\n+These \\code{show} \n+methods are not S Version 4 methods so you have to use full names such\n+as \\code{show.dvi} and \\code{show.latex}. Use the \\code{print} methods for\n+more automatic display of typesetting, e.g. typing \\code{latex(x)} will\n+invoke xdvi to view the typeset document.\n+}\n+\\usage{\n+latex(object, \\dots)\n+\n+\\method{latex}{default}(object,\n+ t'..b'c OS X is\n+ to install \\samp{X11} and \\samp{X11SDK} if not already installed,\n+ start \\samp{X11} within the R GUI, and issue the command\n+ \\code{Sys.setenv( PATH=paste(Sys.getenv("PATH"),"/usr/texbin",sep=":")\n+ )}. To avoid any complications of using \\samp{X11} under MacOS, users\n+ can install the \\samp{TeXShop} package, which will associate\n+ \\file{.dvi} files with a viewer that displays a \\file{pdf} version of\n+ the file after a hidden conversion from \\file{dvi} to \\file{pdf}.\n+\n+ System options can be used to specify external commands to be used.\n+ Defaults are given by \\code{options(xdvicmd=\'xdvi\')} or\n+ \\code{options(xdvicmd=\'yap\')}, \\code{options(dvipscmd=\'dvips\')},\n+ \\code{options(latexcmd=\'latex\')}. For MacOS specify\n+ \\code{options(xdvicmd=\'MacdviX\')} or if TeXShop is installed,\n+ \\code{options(xdvicmd=\'open\')}.\n+ \n+ To use \\samp{pdflatex} rather than \\samp{latex}, set\n+ \\code{options(latexcmd=\'pdflatex\')},\n+ \\code{options(dviExtension=\'pdf\')}, and set\n+ \\code{options(\'xdvicmd\')} to your chosen PDF previewer.\n+\n+ If running S-Plus and your directory for temporary files is not\n+ \\file{/tmp} (Unix/Linux) or \\file{\\\\windows\\\\temp} (Windows), add your\n+ own \\code{tempdir} function such as \\code{\n+\ttempdir <- function() "/yourmaindirectory/yoursubdirectory"}\n+\n+ To prevent the latex file from being displayed store the result of\n+ \\code{latex} in an object, e.g. \\code{w <- latex(object, file=\'foo.tex\')}.\n+}\n+\\author{\n+ Frank E. Harrell, Jr.,\\cr\n+ Department of Biostatistics,\\cr\n+ Vanderbilt University,\\cr\n+ \\email{f.harrell@vanderbilt.edu}\n+\n+\n+ Richard M. Heiberger,\\cr\n+ Department of Statistics,\\cr\n+ Temple University, Philadelphia, PA.\\cr\n+ \\email{rmh@temple.edu}\n+\n+ David R. Whiting,\\cr\n+ School of Clinical Medical Sciences (Diabetes),\\cr\n+ University of Newcastle upon Tyne, UK.\\cr\n+ \\email{david.whiting@ncl.ac.uk}\n+\n+}\n+\\seealso{\n+\\code{\\link{html}}, \\code{\\link{format.df}}, \\code{\\link[tools]{texi2dvi}}\n+}\n+\\examples{\n+x <- matrix(1:6, nrow=2, dimnames=list(c(\'a\',\'b\'),c(\'c\',\'d\',\'this that\')))\n+\\dontrun{\n+latex(x) # creates x.tex in working directory\n+# The result of the above command is an object of class "latex"\n+# which here is automatically printed by the latex print method.\n+# The latex print method prepends and appends latex headers and\n+# calls the latex program in the PATH. If the latex program is\n+# not in the PATH, you will get error messages from the operating\n+# system.\n+\n+w <- latex(x, file=\'/tmp/my.tex\')\n+# Does not call the latex program as the print method was not invoked\n+print.default(w)\n+# Shows the contents of the w variable without attempting to latex it.\n+\n+d <- dvi(w) # compile LaTeX document, make .dvi\n+ # latex assumed to be in path\n+d # or show(d) : run xdvi (assumed in path) to display\n+w # or show(w) : run dvi then xdvi\n+dvips(d) # run dvips to print document\n+dvips(w) # run dvi then dvips\n+library(tools)\n+texi2dvi(\'/tmp/my.tex\') # compile and produce pdf file in working dir.\n+}\n+latex(x, file="") # just write out LaTeX code to screen\n+\n+\\dontrun{\n+# Use paragraph formatting to wrap text to 3 in. wide in a column\n+d <- data.frame(x=1:2,\n+ y=c(paste("a",\n+ paste(rep("very",30),collapse=" "),"long string"),\n+ "a short string"))\n+latex(d, file="", col.just=c("l", "p{3in}"), table.env=FALSE)\n+}\n+\n+\\dontrun{\n+# After running latex( ) multiple times with different special styles in\n+# effect, make a file that will call for the needed LaTeX packages when\n+# latex is run (especially when using Sweave with R)\n+if(exists(latexStyles))\n+ cat(paste(\'\\\\usepackage{\',latexStyles,\'}\',sep=\'\'),\n+ file=\'stylesused.tex\', sep=\'\\n\')\n+# Then in the latex job have something like:\n+# \\documentclass{article}\n+# \\input{stylesused}\n+# \\begin{document}\n+# ...\n+}\n+}\n+\\keyword{utilities}\n+\\keyword{interface}\n+\\keyword{methods}\n+\\keyword{file}\n+\\keyword{character}\n+\\keyword{manip}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/latexDotchart.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/latexDotchart.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,105 @@ +\name{latexDotchart} +\alias{latexDotchart} +\title{Enhanced Dot Chart for LaTeX Picture Environment with epic} +\description{ +\code{latexDotchart} is a translation of the \code{dotchart3} function +for producing a vector of character strings containing LaTeX picture +environment markup that mimics \code{dotchart3} output. The LaTeX +\code{epic} and \code{color} packages are required. The \code{add} and +\code{horizontal=FALSE} options are not available for +\code{latexDotchart}, however. +} +\usage{ +latexDotchart(data, labels, groups=NULL, gdata=NA, + xlab='', auxdata, auxgdata=NULL, auxtitle, + w=4, h=4, margin, + lines=TRUE, dotsize = .075, size='small', size.labels='small', + size.group.labels='normalsize', ttlabels=FALSE, sort.=TRUE, + xaxis=TRUE, lcolor='gray', ...) +} +\arguments{ + \item{data}{a numeric vector whose values are shown on the x-axis} + \item{labels}{a vector of labels for each point, corresponding to + \code{x}. If omitted, \code{names(data)} are used, and if there are + no \code{names}, integers prefixed by \code{"#"} are used.} + \item{groups}{an optional categorical variable indicating how + \code{data} values are grouped} + \item{gdata}{data values for groups, typically summaries such as group + medians} + \item{xlab}{x-axis title} + \item{auxdata}{ + a vector of auxiliary data, of the same length + as the first (\code{data}) argument. If present, this + vector of values will be printed outside the right margin of the dot + chart. Usually \code{auxdata} represents cell sizes. + } + \item{auxgdata}{ + similar to \code{auxdata} but corresponding to the \code{gdata} + argument. These usually represent overall sample sizes for each + group of lines.} + \item{auxtitle}{ + if \code{auxdata} is given, \code{auxtitle} specifies a column + heading for the extra printed data in the chart, e.g., \code{"N"}} + \item{w}{width of picture in inches} + \item{h}{height of picture in inches} + \item{margin}{a 4-vector representing, in inches, the margin to the + left of the x-axis, below the y-axis, to the right of the x-axis, + and above the y-axis. By default these are computed making educated + cases about how to accommodate \code{auxdata} etc.} + \item{lines}{set to \code{FALSE} to suppress drawing of reference + lines} + \item{dotsize}{diameter of filled circles, in inches, for drawing dots} + \item{size}{size of text in picture. This and the next two arguments + are LaTeX font commands without the opening backslash, e.g., + \code{'normalsize'}, \code{'small'}, \code{'large'}, \code{smaller[2]}.} + \item{size.labels}{size of labels} + \item{size.group.labels}{size of labels corresponding to \code{groups}} + \item{ttlabels}{set to \code{TRUE} to use typewriter monospaced font + for labels} + \item{sort.}{ + set to \code{FALSE} to keep \code{latexDotchart} from sorting the input + data, i.e., it will assume that the data are already properly + arranged. This is especially useful when you are using \code{gdata} + and \code{groups} and you want to control the + order that groups appear on the chart (from top to bottom).} + \item{xaxis}{set to \code{FALSE} to suppress drawing x-axis} + \item{lcolor}{ + color for horizontal reference lines. Default is \code{"gray"}} + \item{...}{ignored} +} +\author{ +Frank Harrell +\cr +Department of Biostatistics +\cr +Vanderbilt University +\cr +\email{f.harrell@vanderbilt.edu} +} +\seealso{ +\code{\link[Hmisc]{dotchart3}} +} +\examples{ +\dontrun{ +z <- latexDotchart(c(.1,.2), c('a','bbAAb'), xlab='This Label', + auxdata=c(.1,.2), auxtitle='Zcriteria') +f <- '/tmp/t.tex' +cat('\\documentclass{article}\n\\usepackage{epic,color}\n\\begin{document}\n', file=f) +cat(z, sep='\n', file=f, append=TRUE) +cat('\\end{document}\n', file=f, append=TRUE) + +set.seed(135) +maj <- factor(c(rep('North',13),rep('South',13))) +g <- paste('Category',rep(letters[1:13],2)) +n <- sample(1:15000, 26, replace=TRUE) +y1 <- runif(26) +y2 <- pmax(0, y1 - runif(26, 0, .1)) +z <- latexDotchart(y1, g, groups=maj, auxdata=n, auxtitle='n', xlab='Y', + size.group.labels='large', ttlabels=TRUE) +f <- '/tmp/t2.tex' +cat('\\documentclass{article}\n\\usepackage{epic,color}\n\\begin{document}\n\\framebox{', file=f) +cat(z, sep='\n', file=f, append=TRUE) +cat('}\\end{document}\n', file=f, append=TRUE) +} +} +\keyword{hplot} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/latexTabular.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/latexTabular.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,54 @@ +\name{latexTabular} +\alias{latexTabular} +\title{Convert a Data Frame or Matrix to a LaTeX Tabular} +\description{ +\code{latexTabular} creates a character vector representing a matrix or +data frame in a simple \samp{tabular} environment. +} +\usage{ +latexTabular(x, headings=colnames(x), + align =paste(rep('c',ncol(x)),collapse=''), + halign=paste(rep('c',ncol(x)),collapse=''), + helvetica=TRUE, translate=TRUE, hline=0, \dots) +} +\arguments{ + \item{x}{a matrix or data frame} + \item{headings}{a vector of character strings specifying column + headings for \samp{latexTabular}, defaulting to \code{x}'s + \code{colnames}} + \item{align}{a character strings specifying column + alignments for \samp{latexTabular}, defaulting to + \code{paste(rep('c',ncol(x)),collapse='')} to center. You may + specify \code{align='c|c'} and other LaTeX tabular formatting.} + \item{halign}{a character strings specifying alignment for + column headings, defaulting to centered.} + \item{helvetica}{set to \code{FALSE} to use default LaTeX font in + \samp{latexTabular} instead of helvetica.} + \item{translate}{set to \code{FALSE} if column headings are already in + LaTeX format, otherwise \code{latexTabular} will run them through + \code{latexTranslate}} + \item{hline}{set to 1 to put \code{hline} after heading, 2 to also put + \code{hline}s before and after heading and at table end} + \item{\dots}{if present, \code{x} is run through \code{format.df} with + those extra arguments} + } +\value{a character string containing LaTeX markup} +\author{ + Frank E. Harrell, Jr.,\cr + Department of Biostatistics,\cr + Vanderbilt University,\cr + \email{f.harrell@vanderbilt.edu} +} +\seealso{ +\code{\link{latex.default}}, \code{\link{format.df}} +} +\examples{ +x <- matrix(1:6, nrow=2, dimnames=list(c('a','b'),c('c','d','this that'))) +latexTabular(x) # a character string with LaTeX markup +} +\keyword{utilities} +\keyword{interface} +\keyword{methods} +\keyword{file} +\keyword{character} +\keyword{manip} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/latexTherm.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/latexTherm.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,82 @@ +\name{latexTherm} +\alias{latexTherm} +\alias{latexNeedle} +\title{Create LaTeX Thermometers and Colored Needles} +\description{ +\code{latexTherm} creates a LaTeX picture environment for drawing a +series of thermometers +whose heights depict the values of a variable \code{y} assumed to be +scaled from 0 to 1. This is useful for showing fractions of sample +analyzed in any table or plot, intended for a legend. For example, four +thermometers might be used to depict the fraction of enrolled patients +included in the current analysis, the fraction randomized, the fraction +of patients randomized to treatment A being analyzed, and the fraction +randomized to B being analyzed. The picture is placed +inside a LaTeX macro definition for macro variable named \code{name}, to +be invoked by the user later in the LaTeX file using \code{name} +preceeded by a backslash. + +If \code{y} has an attribute \code{"table"}, it is assumed to contain a +character string with LaTeX code. This code is used as a tooltip popup +for PDF using the LaTeX \code{ocgtools} package or using style +\code{tooltips}. Typically the code will contain a \code{tabular} +environment. The user must define a LaTeX macro \code{tooltipn} that +takes two arguments (original object and pop-up object) that does +the pop-up. + +\code{latexNeedle} is similar to \code{latexTherm} except that vertical +needles are produced and each may have its own color. A grayscale box +is placed around the needles and provides the 0-1 \code{y}-axis +reference. Horizontal grayscale grid lines may be drawn. +} +\usage{ +latexTherm(y, name, w = 0.075, h = 0.15, spacefactor = 1/2, extra = 0.07, + file = "", append = TRUE) + +latexNeedle(y, col='black', href=0.5, name, w=.05, h=.15, + extra=0, file = "", append=TRUE) +} +\arguments{ + \item{y}{a vector of 0-1 scaled values. Boxes and their frames are + omitted for \code{NA} elements} + \item{name}{name of LaTeX macro variable to be defined} + \item{w}{width of a single box (thermometer) in inches. For + \code{latexNeedle} is the spacing between needles.} + \item{h}{height of a single box in inches. For \code{latexNeedle} is + the height of the frame.} + \item{spacefactor}{fraction of \code{w} added for extra space between + boxes for \code{latexTherm}} + \item{extra}{extra space in inches to set aside to the right of and + above the series of boxes or frame} + \item{file}{name of file to which to write LaTeX code. Default is the + console.} + \item{append}{set to \code{FALSE} to write over \code{file}} + \item{col}{a vector of colors corresponding to positions in \code{y}. + \code{col} is repeated if too short.} + \item{href}{values of \code{y} (0-1) for which horizontal grayscale + reference lines are drawn for \code{latexNeedle}. Set to + \code{NULL} to not draw any.} +} +\author{Frank Harrell} +\examples{ +\dontrun{ +# The following is in the Hmisc tests directory +# For a knitr example see latexTherm.Rnw in that directory +ct <- function(...) cat(..., sep='') +ct('\\documentclass{report}\\begin{document}\n') +latexTherm(c(1, 1, 1, 1), name='lta') +latexTherm(c(.5, .7, .4, .2), name='ltb') +latexTherm(c(.5, NA, .75, 0), w=.3, h=1, name='ltc', extra=0) +latexTherm(c(.5, NA, .75, 0), w=.3, h=1, name='ltcc') +latexTherm(c(0, 0, 0, 0), name='ltd') +ct('This is a the first:\\lta and the second:\\ltb\\\\ and the third +without extra:\\ltc END\\\\\nThird with extra:\\ltcc END\\\\ +\\vspace{2in}\\\\ +All data = zero, frame only:\\ltd\\\\ +\\end{document}\n') +}} +\keyword{utilities} +\keyword{interface} +\keyword{file} +\keyword{character} +\keyword{manip} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/legendfunctions.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/legendfunctions.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,23 @@ +\name{legendfunctions} +\alias{legendfunctions} +\alias{Key} +\alias{sKey} +\alias{Key2} +\alias{Points} +\alias{Abline} +\alias{Curve} +\title{Legend Creation Functions} +\description{ + Wrapers to plot defined legend ploting functions +} +\usage{ +Key(...) +Key2(...) +sKey(...) +Points(...) +Abline(...) +Curve(...) +} +\arguments{ + \item{\dots}{arguments to pass to wrapped functions} +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/list.tree.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/list.tree.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,71 @@ +\name{list.tree} +\alias{list.tree} +\title{ +Pretty-print the Structure of a Data Object +} +\description{ +This is a function to pretty-print the structure of any data object +(usually a list). It is similar to the R function \code{str}. +} +\usage{ +list.tree(struct, depth=-1, numbers=FALSE, maxlen=22, maxcomp=12, + attr.print=TRUE, front="", fill=". ", name.of, size=TRUE) +} +\arguments{ +\item{struct}{ +The object to be displayed +} +\item{depth}{ +Maximum depth of recursion (of lists within lists \dots) to be printed; negative +value means no limit on depth. +} +\item{numbers}{ +If TRUE, use numbers in leader instead of dots to +represent position in structure. +} +\item{maxlen}{ +Approximate maximum length (in characters) allowed on each line to give the +first few values of a vector. maxlen=0 suppresses printing any values. +} +\item{maxcomp}{ +Maximum number of components of any list that will be described. +} +\item{attr.print}{ +Logical flag, determining whether a description of attributes will be printed. +} +\item{front}{ +Front material of a line, for internal use. +} +\item{fill}{ +Fill character used for each level of indentation. +} +\item{name.of}{ +Name of object, for internal use (deparsed version of struct by default). +} +\item{size}{ +Logical flag, should the size of the object in bytes be printed? + + +A description of the structure of struct will be printed in outline +form, with indentation +for each level of recursion, showing the internal storage mode, length, +class(es) if any, attributes, and first few elements of each data vector. +By default each level of list recursion is indicated by a "." and +attributes by "A". +}} +\seealso{ +\code{\link{str}} +} +\examples{ +X <- list(a=ordered(c(1:30,30:1)),b=c("Rick","John","Allan"), + c=diag(300),e=cbind(p=1008:1019,q=4)) +list.tree(X) +# In R you can say str(X) +} +\author{ +Alan Zaslavsky, \email{zaslavsk@hcp.med.harvard.edu} +} +\keyword{documentation} +% Converted by Sd2Rd version 1.21. + + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/mApply.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/mApply.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,85 @@ +\name{mApply} +\concept{apply for matrix} +\concept{apply for vector} +\alias{mApply} + +\title{Apply a Function to Rows of a Matrix or Vector} + +\description{ +\code{mApply} is like \code{tapply} except that the first argument can +be a matrix or a vector, and the output is cleaned up if \code{simplify=TRUE}. +It uses code adapted from Tony Plate (\email{tplate@blackmesacapital.com}) to +operate on grouped submatrices. + +As \code{mApply} can be much faster than using \code{by}, it is often +worth the trouble of converting a data frame to a numeric matrix for +processing by \code{mApply}. \code{asNumericMatrix} will do this, and +\code{matrix2dataFrame} will convert a numeric matrix back into a data +frame. +} + +\usage{ +mApply(X, INDEX, FUN, \dots, simplify=TRUE, keepmatrix=FALSE) +} + +\arguments{ +\item{X}{ +a vector or matrix capable of being operated on by the +function specified as the \code{FUN} argument +} +\item{INDEX}{ +list of factors, each of same number of rows as 'X' has. +} +\item{FUN}{ +the function to be applied. In the case of functions like +'+', '%*%', etc., the function name must be quoted. +} +\item{\dots}{ +optional arguments to 'FUN'. +} +\item{simplify}{ +set to 'FALSE' to suppress simplification of the result in to +an array, matrix, etc. +} +\item{keepmatrix}{set to \code{TRUE} to keep result as a matrix even if + \code{simplify} is \code{TRUE}, in the case of only one stratum + } +} +\value{ +For \code{mApply}, the returned value is a vector, matrix, or list. +If \code{FUN} returns more than one number, the result is an array if +\code{simplify=TRUE} and is a list otherwise. If a matrix is returned, +its rows correspond to unique combinations of \code{INDEX}. If +\code{INDEX} is a list with more than one vector, \code{FUN} returns +more than one number, and \code{simplify=FALSE}, the returned value is a +list that is an array with the first dimension corresponding to the last +vector in \code{INDEX}, the second dimension corresponding to the next +to last vector in \code{INDEX}, etc., and the elements of the list-array +correspond to the values computed by \code{FUN}. In this situation the +returned value is a regular array if \code{simplify=TRUE}. The order +of dimensions is as previously but the additional (last) dimension +corresponds to values computed by \code{FUN}. +} + +\author{ +Frank Harrell +\cr +Department of Biostatistics +\cr +Vanderbilt University +\cr +\email{f.harrell@vanderbilt.edu} +} + +\seealso{ +\code{\link{asNumericMatrix}}, \code{\link{matrix2dataFrame}}, \code{\link{tapply}}, +\code{\link{sapply}}, \code{\link{lapply}}, \code{\link{mapply}}, \code{\link{by}}. +} + +\examples{ +require(datasets, TRUE) +a <- mApply(iris[,-5], iris$Species, mean) +} + +\keyword{iteration} +\keyword{category} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/mChoice.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/mChoice.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,250 @@\n+\\name{mChoice}\n+\\alias{mChoice}\n+\\alias{format.mChoice}\n+\\alias{print.mChoice}\n+\\alias{summary.mChoice}\n+\\alias{as.character.mChoice}\n+\\alias{as.double.mChoice}\n+\\alias{inmChoice}\n+\\alias{match.mChoice}\n+\\alias{[.mChoice}\n+\\alias{print.summary.mChoice}\n+\\alias{is.mChoice}\n+\\alias{Math.mChoice}\n+\\alias{Ops.mChoice}\n+\\alias{Summary.mChoice}\n+\\title{Methods for Storing and Analyzing Multiple Choice Variables}\n+\\description{\n+ \\code{mChoice} is a function that is useful for grouping \n+ variables that represent\n+ individual choices on a multiple choice question. These choices are\n+ typically factor or character values but may be of any type. Levels\n+ of component factor variables need not be the same; all unique levels\n+ (or unique character values) are collected over all of the multiple\n+ variables. Then a new character vector is formed with integer choice\n+ numbers separated by semicolons. Optimally, a database system would\n+ have exported the semicolon-separated character strings with a\n+ \\code{levels} attribute containing strings defining value labels\n+ corresponding to the integer choice numbers. \\code{mChoice} is a\n+ function for creating a multiple-choice variable after the fact.\n+ \\code{mChoice} variables are explicitly handed by the \\code{describe}\n+ and \\code{summary.formula} functions. \\code{NA}s or blanks in input\n+ variables are ignored. \n+\n+ \\code{format.mChoice} will convert the multiple choice representation\n+ to text form by substituting \\code{levels} for integer codes.\n+ \\code{as.double.mChoice} converts the \\code{mChoice} object to a\n+ binary numeric matrix, one column per used level (or all levels of\n+ \\code{drop=FALSE}. This is called by\n+ the user by invoking \\code{as.numeric}. There is a\n+ \\code{print} method and a \\code{summary} method, and a \\code{print}\n+ method for the \\code{summary.mChoice} object. The \\code{summary}\n+ method computes frequencies of all two-way choice combinations, the\n+ frequencies of the top 5 combinations, information about which other\n+ choices are present when each given choice is present, and the\n+ frequency distribution of the number of choices per observation. This\n+ \\code{summary} output is used in the \\code{describe} function.\n+\n+ \\code{in.mChoice} creates a logical vector the same length as \\code{x}\n+ whose elements are \\code{TRUE} when the observation in \\code{x}\n+ contains at least one of the codes or value labels in the second\n+ argument.\n+\n+ \\code{match.mChoice} creats an integer vector of the indexes of all\n+ elements in \\code{table} which contain any of the speicified levels\n+\n+ \\code{is.mChoice} returns \\code{TRUE} is the argument is a multiple\n+ choice variable.\n+}\n+\\usage{\n+mChoice(\\dots, label=\'\',\n+ sort.levels=c(\'original\',\'alphabetic\'), \n+ add.none=FALSE, drop=TRUE)\n+\n+\\method{format}{mChoice}(x, minlength=NULL, sep=";", \\dots)\n+\n+\\method{as.double}{mChoice}(x, drop=FALSE, ...)\n+\n+\\method{print}{mChoice}(x, quote=FALSE, max.levels=NULL,\n+ width=getOption("width"), ...)\n+\n+\\method{as.character}{mChoice}(x, ...)\n+\n+\\method{summary}{mChoice}(object, ncombos=5, minlength=NULL, drop=TRUE, ...)\n+\n+\\method{print}{summary.mChoice}(x, prlabel=TRUE, ...)\n+\n+\\method{[}{mChoice}(x, ..., drop=FALSE)\n+\n+match.mChoice(x, table, nomatch=NA, incomparables=FALSE)\n+\n+inmChoice(x, values)\n+\n+is.mChoice(x)\n+\n+\\method{Summary}{mChoice}(..., na.rm)\n+}\n+\\arguments{\n+ \\item{na.rm}{\n+ Logical: remove \\code{NA}\'s from data\n+ }\n+ \\item{table}{\n+ a vector (mChoice) of values to be matched against.\n+ }\n+ \\item{nomatch}{\n+ value to return if a value for \\code{x} does not exist in\n+ \\code{table}.\n+ }\n+ \\item{incomparables}{\n+ logical whether incomparable values should be compaired.\n+ }\n+ \\item{...}{\n+ a series of vectors\n+ }\n+ \\item{sort.}{\n+ By default, choice codes are sorted in ascending numeric\n+ order. Set \\code{sort=FALSE} to preserve the original left to right\n+ order'..b'd.none}{\n+ Set \\code{add.none} to \\code{TRUE} to make a new category\n+ \\code{\'none\'} if it doesn\'t already exist and if there is an\n+ observations with no choices selected.\n+ }\n+ \\item{drop}{\n+ set \\code{drop=FALSE} to keep unused factor levels as columns of the matrix\n+ produced by \\code{mChoice}\n+ }\n+ \\item{x}{\n+ an object of class \\code{"mchoice"} such as that created by\n+ \\code{mChoice}. For \\code{is.mChoice} is any object.\n+ }\n+ \\item{object}{\n+ an object of class \\code{"mchoice"} such as that created by\n+ \\code{mChoice}\n+ }\n+ \\item{ncombos}{\n+ maximum number of combos.\n+ }\n+ \\item{width}{\n+ With of a line of text to be formated\n+ }\n+ \\item{quote}{\n+ quote the output\n+ }\n+ \\item{max.levels}{max levels to be displayed}\n+ \\item{minlength}{\n+ By default no abbreviation of levels is done in\n+ \\code{format} and \\code{summary}. Specify a positive integer to use\n+ abbreviation in those functions. See \\code{\\link{abbreviate}}.\n+ }\n+ \\item{sep}{\n+ character to use to separate levels when formatting\n+ }\n+ \\item{prlabel}{\n+ set to \\code{FALSE} to keep\n+ \\code{print.summary.mChoice} from printing the variable label and\n+ number of unique values\n+ }\n+ \\item{values}{\n+ a scalar or vector. If \\code{values} is integer, it is\n+ the choice codes, and if it is a character vector, it is assumed to\n+ be value labels.\n+ }\n+}\n+\\value{\n+ \\code{mChoice} returns a character vector of class \\code{"mChoice"}\n+ plus attributes \\code{"levels"} and \\code{"label"}.\n+ \\code{summary.mChoice} returns an object of class\n+ \\code{"summary.mChoice"}. \\code{inmChoice} returns a logical vector.\n+ \\code{format.mChoice} returns a character vector, and\n+ \\code{as.double.mChoice} returns a binary numeric matrix.\n+}\n+\\author{\n+ Frank Harrell\n+ \\cr\n+ Department of Biostatistics\n+ \\cr\n+ Vanderbilt University\n+ \\cr\n+ \\email{f.harrell@vanderbilt.edu}\n+}\n+\\seealso{\n+ \\code{\\link{label}}\n+}\n+\\examples{\n+options(digits=3)\n+set.seed(3)\n+n <- 20\n+sex <- factor(sample(c("m","f"), n, rep=TRUE))\n+age <- rnorm(n, 50, 5)\n+treatment <- factor(sample(c("Drug","Placebo"), n, rep=TRUE))\n+\n+\n+# Generate a 3-choice variable; each of 3 variables has 5 possible levels\n+symp <- c(\'Headache\',\'Stomach Ache\',\'Hangnail\',\n+ \'Muscle Ache\',\'Depressed\')\n+symptom1 <- sample(symp, n, TRUE)\n+symptom2 <- sample(symp, n, TRUE)\n+symptom3 <- sample(symp, n, TRUE)\n+cbind(symptom1, symptom2, symptom3)[1:5,]\n+Symptoms <- mChoice(symptom1, symptom2, symptom3, label=\'Primary Symptoms\')\n+Symptoms\n+print(Symptoms, long=TRUE)\n+format(Symptoms[1:5])\n+inmChoice(Symptoms,\'Headache\')\n+levels(Symptoms)\n+inmChoice(Symptoms, 3)\n+inmChoice(Symptoms, c(\'Headache\',\'Hangnail\'))\n+# Note: In this example, some subjects have the same symptom checked\n+# multiple times; in practice these redundant selections would be NAs\n+# mChoice will ignore these redundant selections\n+\n+meanage <- N <- numeric(5)\n+for(j in 1:5) {\n+ meanage[j] <- mean(age[inmChoice(Symptoms,j)])\n+ N[j] <- sum(inmChoice(Symptoms,j))\n+}\n+names(meanage) <- names(N) <- levels(Symptoms)\n+meanage\n+N\n+\n+# Manually compute mean age for 2 symptoms\n+mean(age[symptom1==\'Headache\' | symptom2==\'Headache\' | symptom3==\'Headache\'])\n+mean(age[symptom1==\'Hangnail\' | symptom2==\'Hangnail\' | symptom3==\'Hangnail\'])\n+\n+summary(Symptoms)\n+\n+#Frequency table sex*treatment, sex*Symptoms\n+summary(sex ~ treatment + Symptoms, fun=table)\n+# Check:\n+ma <- inmChoice(Symptoms, \'Muscle Ache\')\n+table(sex[ma])\n+\n+# could also do:\n+# summary(sex ~ treatment + mChoice(symptom1,symptom2,symptom3), fun=table)\n+\n+#Compute mean age, separately by 3 variables\n+summary(age ~ sex + treatment + Symptoms)\n+\n+\n+summary(age ~ sex + treatment + Symptoms, method="cross")\n+\n+f <- summary(treatment ~ age + sex + Symptoms, method="reverse", test=TRUE)\n+f\n+# trio of numbers represent 25th, 50th, 75th percentile\n+print(f, long=TRUE)\n+}\n+\\keyword{category}\n+\\keyword{manip}\n+\\concept{multiple choice}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/makeNstr.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/makeNstr.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,31 @@ +\name{makeNstr} +\alias{makeNstr} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{ creates a string that is a repeat of a substring } +\description{ +Takes a character and creates a string that is the character repeated \code{len} times. +} +\usage{ +makeNstr(char, len) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{char}{ character to be repeated } + \item{len}{ number of times to repeat \code{char}. } +} +\value{ + A string that is \code{char} repeated \code{len} times. +} +\author{ Charles Dupont } +\seealso{ \code{\link[base]{paste}}, \code{\link[base]{rep}} } +\examples{ +makeNstr(" ", 5) + +\dontshow{ +if(makeNstr(" ", 5) != " ") stop("makeNstr failed test") +} +} +\keyword{ manip }% at least one, from doc/KEYWORDS +\keyword{ character }% __ONLY ONE__ keyword per line +\concept{string} +\concept{concat} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/mdb.get.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/mdb.get.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,62 @@ +\name{mdb.get} +\alias{mdb.get} +\title{Read Tables in a Microsoft Access Database} +\description{ + Assuming the \code{mdb-tools} package has been installed on your + system and is in the system path, \code{mdb.get} imports + one or more tables in a Microsoft Access database. Date-time + variables are converted to dates or \code{chron} package date-time + variables. The \code{csv.get} function is used to import + automatically exported csv files. If \code{tables} + is unspecified all tables in the database are retrieved. If more than + one table is imported, the result is a list of data frames. +} +\usage{ +mdb.get(file, tables=NULL, lowernames=FALSE, allow=NULL, + dateformat='\%m/\%d/\%y', ...) +} +\arguments{ + \item{file}{the file name containing the Access database} + \item{tables}{character vector specifying the names of tables to + import. Default is to import all tables. Specify + \code{tables=TRUE} to return the list of available tables.} + \item{lowernames}{set this to \code{TRUE} to change variable names to + lower case} + \item{allow}{a vector of characters allowed by \R that should not be + converted to periods in variable names. By default, underscores in + variable names are converted to periods as with \R before version + 1.9.} + \item{dateformat}{see \code{\link{cleanup.import}}. Default is the + usual Access format used in the U.S.} + \item{\dots}{arguments to pass to \code{csv.get}} +} +\details{ + Uses the \code{mdbtools} package executables \code{mdb-tables}, + \code{mdb-schema}, and \code{mdb-export} (with option \code{-b strip} + to drop any binary output). In Debian/Ubuntu Linux run + \code{apt get install mdbtools}. + \code{cleanup.import} is invoked by \code{csv.get} to transform + variables and store them as efficiently as possible. +} +\value{a new data frame or a list of data frames} +\author{Frank Harrell, Vanderbilt University} +\seealso{ + \code{\link{data.frame}}, + \code{\link{cleanup.import}}, \code{\link{csv.get}}, + \code{\link{Date}}, \code{\link[chron]{chron}} +} +\examples{ +\dontrun{ +# Read all tables in the Microsoft Access database Nwind.mdb +d <- mdb.get('Nwind.mdb') +contents(d) +for(z in d) print(contents(z)) +# Just print the names of tables in the database +mdb.get('Nwind.mdb', tables=TRUE) +# Import one table +Orders <- mdb.get('Nwind.mdb', tables='Orders') +} +} +\keyword{manip} +\keyword{IO} +\keyword{file} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/mgp.axis.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/mgp.axis.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,70 @@ +\name{mgp.axis} +\alias{mgp.axis} +\alias{mgp.axis.labels} +\title{Draw Axes With Side-Specific mgp Parameters} +\description{ +\code{mgp.axis} is a version of \code{axis} that uses the appropriate +side-specific \code{mgp} parameter (see \code{\link{par}}) to account +for different space requirements for axis labels vertical vs. horizontal +tick marks. \code{mgp.axis} also fixes a bug in \code{axis(2,\dots)} +that causes it to assume \code{las=1}. + +\code{mgp.axis.labels} is used so that different spacing between tick +marks and axis tick mark labels may be specified for x- and y-axes. Use +\code{mgp.axis.labels('default')} to set defaults. Users can set values +manually using \code{mgp.axis.labels(x,y)} where \code{x} and \code{y} +are 2nd value of \code{par('mgp')} to use. Use +\code{mgp.axis.labels(type=w)} to retrieve values, where \code{w='x'}, +\code{'y'}, \code{'x and y'}, \code{'xy'}, to get 3 \code{mgp} values +(first 3 types) or 2 \code{mgp.axis.labels}. +} +\usage{ +mgp.axis(side, at = NULL, \dots, + mgp = mgp.axis.labels(type = if (side == 1 | side == 3) "x" + else "y"), + axistitle = NULL) + +mgp.axis.labels(value,type=c('xy','x','y','x and y')) +} + +\arguments{ + \item{side,at}{see \code{\link{par}}} + \item{\dots}{arguments passed through to \code{\link{axis}}} + \item{mgp}{see \code{\link{par}}} + \item{axistitle}{if specified will cause \code{axistitle} to be drawn + on the appropriate axis as a title} + \item{value}{vector of values to which to set system option + \code{mgp.axis.labels}} + \item{type}{see above} +} +\section{Side Effects}{\code{mgp.axis.labels} stores the value in the + system option \code{mgp.axis.labels}} +\value{ + \code{mgp.axis.labels} returns the value of \code{mgp} (only the + second element of \code{mgp} if \code{type="xy"} or a list with + elements \code{x} and \code{y} if \code{type="x or y"}, each list + element being a 3-vector) for the + appropriate axis if \code{value} is not specified, otherwise it + returns nothing but the system option \code{mgp.axis.labels} is set. + + \code{mgp.axis} returns nothing. +} +\author{Frank Harrell} +\seealso{\code{\link{par}}} +\examples{ +\dontrun{ +mgp.axis.labels(type='x') # get default value for x-axis +mgp.axis.labels(type='y') # get value for y-axis +mgp.axis.labels(type='xy') # get 2nd element of both mgps +mgp.axis.labels(type='x and y') # get a list with 2 elements +mgp.axis.labels(c(3,.5,0), type='x') # set +options('mgp.axis.labels') # retrieve + +plot(..., axes=FALSE) +mgp.axis(1, "X Label") +mgp.axis(2, "Y Label") + +}} +\keyword{iplot} +\keyword{dplot} +\keyword{environment} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/minor.tick.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/minor.tick.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,46 @@ +\name{minor.tick} +\alias{minor.tick} +\title{ +Minor Tick Marks +} +\description{ +Adds minor tick marks to an existing plot. All minor tick marks that +will fit on the axes will be drawn. +} +\usage{ +minor.tick(nx=2, ny=2, tick.ratio=0.5) +} +\arguments{ +\item{nx}{ +number of intervals in which to divide the area between major tick marks on +the X-axis. Set to 1 to suppress minor tick marks. +} +\item{ny}{ +same as \code{nx} but for the Y-axis +} +\item{tick.ratio}{ +ratio of lengths of minor tick marks to major tick marks. The length +of major tick marks is retrieved from \code{par("tck")}. +}} +\section{Side Effects}{ +plots +} +\author{ +Frank Harrell +\cr +Department of Biostatistics +\cr +Vanderbilt University +\cr +\email{f.harrell@vanderbilt.edu} +} +\seealso{ +\code{\link{axis}} +} +\examples{ +plot(runif(20),runif(20)) +minor.tick() +} +\keyword{aplot} +\keyword{hplot} +% Converted by Sd2Rd version 1.21. |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/mtitle.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/mtitle.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,72 @@ +\name{mtitle} +\alias{mtitle} +\title{ +Margin Titles +} +\description{ +Writes overall titles and subtitles after a multiple image plot is drawn. +If \code{par()$oma==c(0,0,0,0)}, \code{title} is used instead of \code{mtext}, to draw +titles or subtitles that are inside the plotting region for a single plot. +} +\usage{ +mtitle(main, ll, lc, + lr=format(Sys.time(),'\%d\%b\%y'), + cex.m=1.75, cex.l=.5, \dots) +none +} +\arguments{ +\item{main}{ +main title to be centered over entire figure, default is none +} +\item{ll}{ +subtitle for lower left of figure, default is none +} +\item{lc}{ +subtitle for lower center of figure, default is none +} +\item{lr}{ +subtitle for lower right of figure, default is today's date in format +23Jan91 for UNIX or R (Thu May 30 09:08:13 1996 format for Windows). +Set to \code{""} to suppress lower right title. +} +\item{cex.m}{ +character size for main, default is 1.75 +} +\item{cex.l}{ +character size for subtitles +} +\item{...}{ +other arguments passed to \code{mtext} +}} +\value{ +nothing +} +\author{ +Frank Harrell +\cr +Department of Biostatistics, Vanderbilt University +\cr +\email{f.harrell@vanderbilt.edu} +} +\section{Side Effects}{ +plots +} +\seealso{ +\code{\link{par}}, \code{\link{mtext}}, \code{\link{title}}, \code{\link{unix}}, \code{\link{pstamp}} +} +\examples{ +#Set up for 1 plot on figure, give a main title, +#use date for lr +plot(runif(20),runif(20)) +mtitle("Main Title") + + +#Set up for 2 x 2 matrix of plots with a lower left subtitle and overall title +par(mfrow=c(2,2), oma=c(3,0,3,0)) +plot(runif(20),runif(20)) +plot(rnorm(20),rnorm(20)) +plot(exp(rnorm(20)),exp(rnorm(20))) +mtitle("Main Title",ll="n=20") +} +\keyword{hplot} +% Converted by Sd2Rd version 1.21. |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/multLines.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/multLines.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,43 @@ +\name{multLines} +\alias{multLines} +\title{Plot Multiple Lines} +\description{ + Plots multiple lines based on a vector \code{x} and a matrix \code{y}, + draws thin vertical lines connecting limits represented by columns of + \code{y} beyond the first. It is assumed that either (1) the second + and third columns of \code{y} represent lower and upper confidence + limits, or that (2) there is an even number of columns beyond the + first and these represent ascending quantiles that are symmetrically + arranged around 0.5. +} +\usage{ +multLines(x, y, pos = c('left', 'right'), col='gray', + lwd=1, lty=1, lwd.vert = .85, lty.vert = 1, + alpha = 0.4, grid = FALSE) +} +\arguments{ + \item{x}{a numeric vector} + \item{y}{a numeric matrix with number of rows equal to the number of + \code{x} elements} + \item{pos}{when \code{pos='left'} the vertical lines are drawn, right + to left, to the left of the point \code{(x, y[,1)}. Otherwise lines + are drawn left to right to the right of the point.} + \item{col}{a color used to connect \code{(x, y[,1])} pairs. The same + color but with transparency given by the \code{alpha} argument is + used to draw the vertical lines} + \item{lwd}{line width for main lines} + \item{lty}{line types for main lines} + \item{lwd.vert}{line width for vertical lines} + \item{lty.vert}{line type for vertical lines} + \item{alpha}{transparency} + \item{grid}{set to \code{TRUE} when using \code{grid}/\code{lattice}} +} +\author{Frank Harrell} +\examples{ +x <- 1:4 +y <- cbind(x, x-3, x-2, x-1, x+1, x+2, x+3) +plot(NA, NA, xlim=c(1,4), ylim=c(-2, 7)) +multLines(x, y, col='blue') +multLines(x, y, col='red', pos='right') +} +\keyword{hplot} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/na.delete.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/na.delete.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,40 @@ +\name{na.delete} +\alias{na.delete} +\title{ +Row-wise Deletion na.action +} +\description{ +Does row-wise deletion as \code{na.omit}, but adds frequency of missing values +for each predictor +to the \code{"na.action"} attribute of the returned model frame. +Optionally stores further details if \code{options(na.detail.response=TRUE)}. +} +\usage{ +na.delete(frame) +} +\arguments{ +\item{frame}{ +a model frame +}} +\value{ +a model frame with rows deleted and the \code{"na.action"} attribute added. +} +\author{ +Frank Harrell +\cr +Department of Biostatistics +\cr +Vanderbilt University +\cr +\email{f.harrell@vanderbilt.edu} +} +\seealso{ +\code{\link{na.omit}}, \code{\link{na.keep}}, \code{\link{na.detail.response}}, \code{\link{model.frame.default}}, +\code{\link{naresid}}, \code{\link{naprint}} +} +\examples{ +# options(na.action="na.delete") +# ols(y ~ x) +} +\keyword{models} +% Converted by Sd2Rd version 1.21. |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/na.detail.response.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/na.detail.response.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,93 @@ +\name{na.detail.response} +\alias{na.detail.response} +\title{ +Detailed Response Variable Information +} +\description{ +This function is called by certain \code{na.action} functions if +\code{options(na.detail.response=TRUE)} is set. By default, this function +returns a matrix of counts of non-NAs and the mean of the response variable +computed separately by whether or not each predictor is NA. The default +action uses the last column of a \code{Surv} object, in effect computing the +proportion of events. Other summary functions may be specified by +using \code{options(na.fun.response="name of function")}. +} +\usage{ +na.detail.response(mf) +} +\arguments{ +\item{mf}{ +a model frame +}} +\value{ +a matrix, with rows representing the different statistics that are +computed for the response, and columns representing the different +subsets for each predictor (NA and non-NA value subsets). +} +\author{ +Frank Harrell +\cr +Department of Biostatistics +\cr +Vanderbilt University +\cr +\email{f.harrell@vanderbilt.edu} +} +\seealso{ +\code{\link{na.omit}}, \code{\link{na.delete}}, \code{\link{model.frame.default}}, +\code{\link{naresid}}, \code{\link{naprint}}, \code{\link{describe}} +} +\examples{ +# sex +# [1] m f f m f f m m m m m m m m f f f m f m +# age +# [1] NA 41 23 30 44 22 NA 32 37 34 38 36 36 50 40 43 34 22 42 30 +# y +# [1] 0 1 0 0 1 0 1 0 0 1 1 1 0 0 1 1 0 1 0 0 +# options(na.detail.response=TRUE, na.action="na.delete", digits=3) +# lrm(y ~ age*sex) +# +# Logistic Regression Model +# +# lrm(formula = y ~ age * sex) +# +# +# Frequencies of Responses +# 0 1 +# 10 8 +# +# Frequencies of Missing Values Due to Each Variable +# y age sex +# 0 2 0 +# +# +# Statistics on Response by Missing/Non-Missing Status of Predictors +# +# age=NA age!=NA sex!=NA Any NA No NA +# N 2.0 18.000 20.00 2.0 18.000 +# Mean 0.5 0.444 0.45 0.5 0.444 +# +# \dots\dots +# options(na.action="na.keep") +# describe(y ~ age*sex) +# Statistics on Response by Missing/Non-Missing Status of Predictors +# +# age=NA age!=NA sex!=NA Any NA No NA +# N 2.0 18.000 20.00 2.0 18.000 +# Mean 0.5 0.444 0.45 0.5 0.444 +# +# \dots +# options(na.fun.response="table") #built-in function table() +# describe(y ~ age*sex) +# +# Statistics on Response by Missing/Non-Missing Status of Predictors +# +# age=NA age!=NA sex!=NA Any NA No NA +# 0 1 10 11 1 10 +# 1 1 8 9 1 8 +# +# \dots +} +\keyword{models} +\keyword{regression} +% Converted by Sd2Rd version 1.21. |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/na.keep.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/na.keep.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,45 @@ +\name{na.keep} +\alias{na.keep} +\title{ +Do-nothing na.action +} +\description{ +Does not delete rows containing NAs, but does add details concerning +the distribution of the response variable if \code{options(na.detail.response=TRUE)}. +This \code{na.action} is primarily for use with \code{describe.formula}. +} +\usage{ +na.keep(mf) +} +\arguments{ +\item{mf}{ +a model frame +}} +\value{ +the same model frame with the \code{"na.action"} attribute + + +} +\author{ +Frank Harrell +\cr +Department of Biostatistics +\cr +Vanderbilt University +\cr +\email{f.harrell@vanderbilt.edu} +} +\seealso{ +\code{\link{na.omit}}, \code{\link{na.delete}}, \code{\link{model.frame.default}}, \code{\link{na.detail.response}}, +\code{\link{naresid}}, \code{\link{naprint}}, \code{\link{describe}} +} +\examples{ +options(na.action="na.keep", na.detail.response=TRUE) +x1 <- runif(20) +x2 <- runif(20) +x2[1:4] <- NA +y <- rnorm(20) +describe(y ~ x1*x2) +} +\keyword{models} +% Converted by Sd2Rd version 1.21. |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/nin.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/nin.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,32 @@ +\name{\%nin\%} +\alias{\%nin\%} +\title{ + Find Matching (or Non-Matching) Elements +} +\description{ + \code{\%nin\%} is a binary operator, which returns a logical vector indicating + if there is a match or not for its left operand. A true vector element + indicates no match in left operand, false indicates a match. +} +\usage{ +x \%nin\% table +} +\arguments{ + \item{x}{ + a vector (numeric, character, factor) + } + \item{table}{ + a vector (numeric, character, factor), matching the mode of \code{x} + } +} +\value{ + vector of logical values with length equal to length of \code{x}. +} +\seealso{ + \code{\link{match}} \code{\link{\%in\%}} +} +\examples{ +c('a','b','c') \%nin\% c('a','b') +} +\keyword{manip} +\keyword{character} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/nobsY.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/nobsY.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,51 @@ +\name{nobsY} +\alias{nobsY} +\title{Compute Number of Observations for Left Hand Side of Formula} +\usage{ +nobsY(formula, group=NULL, data = NULL, subset = NULL, + na.action = na.retain, matrixna=c('all', 'any')) +} +\arguments{ + \item{formula}{a formula object} + \item{group}{character string containing optional name of a + stratification variable for computing sample sizes} + \item{data}{a data frame} + \item{subset}{an optional subsetting criterion} + \item{na.action}{an optional \code{NA}-handling function} + \item{matrixna}{set to \code{"all"} if an observation is to be + considered \code{NA} if all the columns of the variable are + \code{NA}, otherwise use \code{matrixna="any"} to consider the row + missing if any of the columns are missing} +} +\value{an integer, with an attribute \code{"formula"} containing the + original formula but with an \code{id} variable (if present) removed} +\description{ +After removing any artificial observations added by +\code{addMarginal}, computes the number of +non-missing observations for all left-hand-side variables in +\code{formula}. If \code{formula} contains a term \code{id(variable)} +\code{variable} is assumed to be a subject ID variable, and only unique +subject IDs are counted. If group is given and its value is the name of +a variable in the right-hand-side of the model, an additional object +\code{nobsg} is returned that is a matrix with as many columns as there +are left-hand variables, and as many rows as there are levels to the +\code{group} variable. This matrix has the further breakdown of unique +non-missing observations by \code{group}. The concatenation of all ID +variables, is returned in a \code{list} element \code{id}. +} +\examples{ +d <- expand.grid(sex=c('female', 'male', NA), + country=c('US', 'Romania'), + reps=1:2) +d$subject.id <- c(0, 0, 3:12) +dm <- addMarginal(d, sex, country) +dim(dm) +nobsY(sex + country ~ 1, data=d) +nobsY(sex + country ~ id(subject.id), data=d) +nobsY(sex + country ~ id(subject.id) + reps, group='reps', data=d) +nobsY(sex ~ 1, data=d) +nobsY(sex ~ 1, data=dm) +nobsY(sex ~ id(subject.id), data=dm) +} +\keyword{utilities} +\keyword{manip} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/nstr.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/nstr.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,41 @@ +\name{nstr} +\alias{nstr} + +\title{ Creates a string of arbitry length } +\description{ + Creates a vector of strings which consists of the string segment given in + each element of the \code{string} vector repeated \code{times}. +} +\usage{ +nstr(string, times) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{string}{ character: vector of string segments to be + repeated. Will be recycled if argument \code{times} is longer.} + \item{times}{ integer: vector of number of times to repeat the + corisponding segment. Will be recycled if argument \code{string} is + longer. + } +} +\value{ + returns a character vector the same length as the longest of the two arguments. +} +\author{ Charles Dupont } +\note{ + Will throw a warning if the length of the longer argment is not a even + multiple of the shorter argument. +} +\seealso{ \code{\link{paste}}, \code{\link{rep}} } +\examples{ +nstr(c("a"), c(0,3,4)) + +nstr(c("a", "b", "c"), c(1,2,3)) + +nstr(c("a", "b", "c"), 4) +} +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{manip} +\keyword{character}% __ONLY ONE__ keyword per line +\keyword{utilities} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/num.intercepts.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/num.intercepts.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,29 @@ +\name{num.intercepts} +\alias{num.intercepts} +\title{Extract number of intercepts} +\description{ + Extract the number of intercepts from a model +} +\usage{ +num.intercepts(fit, type=c('fit', 'var', 'coef')) +} +\arguments{ + \item{fit}{a model fit object} + + \item{type}{ + the default is to return the formal number of intercepts used when fitting + the model. Set \code{type='var'} to return the actual number of + intercepts stored in the \code{var} object, or \code{type='coef'} to + return the actual number in the fitted coefficients. The former will be + less than the number fitted for \code{\link[rms]{orm}} fits, and the + latter for \code{orm} fits passed through + \code{\link{fit.mult.impute}} + } +} +\value{ + \code{num.intercepts} returns an integer with the number of intercepts + in the model. +} +\seealso{ + \code{\link[rms]{orm}}, \code{\link{fit.mult.impute}} +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/panel.bpplot.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/panel.bpplot.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,299 @@\n+\\name{panel.bpplot}\n+\\alias{panel.bpplot}\n+\\alias{bpplotM}\n+\\alias{bpplt}\n+\\title{\n+Box-Percentile Panel Function for Trellis\n+}\n+\\description{\n+For all their good points, box plots have a high ink/information ratio\n+in that they mainly display 3 quartiles. Many practitioners have\n+found that the "outer values" are difficult to explain to\n+non-statisticians and many feel that the notion of "outliers" is too\n+dependent on (false) expectations that data distributions should be Gaussian.\n+\n+\\code{panel.bpplot} is a \\code{panel} function for use with\n+\\code{trellis}, especially for \\code{bwplot}. It draws box plots\n+(without the whiskers) with any number of user-specified "corners"\n+(corresponding to different quantiles), but it also draws box-percentile\n+plots similar to those drawn by Jeffrey Banfield\'s\n+(\\email{umsfjban@bill.oscs.montana.edu}) \\code{bpplot} function. \n+To quote from Banfield, "box-percentile plots supply more\n+information about the univariate distributions. At any height the\n+width of the irregular \'box\' is proportional to the percentile of that\n+height, up to the 50th percentile, and above the 50th percentile the\n+width is proportional to 100 minus the percentile. Thus, the width at\n+any given height is proportional to the percent of observations that\n+are more extreme in that direction. As in boxplots, the median, 25th\n+and 75th percentiles are marked with line segments across the box."\n+\n+\\code{panel.bpplot} can also be used with base graphics to add extended\n+box plots to an existing plot, by specifying \\code{nogrid=TRUE, height=...}.\n+\n+\\code{panel.bpplot} is a generalization of \\code{bpplot} and\n+\\code{\\link[lattice]{panel.bwplot}} in \n+that it works with \\code{trellis} (making the plots horizontal so that\n+category labels are more visable), it allows the user to specify the\n+quantiles to connect and those for which to draw reference lines, \n+and it displays means (by default using dots).\n+\n+\\code{bpplt} draws horizontal box-percentile plot much like those drawn\n+by \\code{panel.bpplot} but taking as the starting point a matrix\n+containing quantiles summarizing the data. \\code{bpplt} is primarily\n+intended to be used internally by \\code{plot.summary.formula.reverse}\n+but when used with no arguments has a general purpose: to draw an\n+annotated example box-percentile plot with the default quantiles used\n+and with the mean drawn with a solid dot. This schematic plot is\n+rendered nicely in postscript with an image height of 3.5 inches.\n+\n+\\code{bpplotM} uses the \\code{lattice} \\code{bwplot} function to depict\n+multiple numeric continuous variables with varying scales in a single\n+\\code{lattice} graph, after reshaping the dataset into a tall and thin\n+format.\n+}\n+\\usage{\n+panel.bpplot(x, y, box.ratio=1, means=TRUE, qref=c(.5,.25,.75),\n+ probs=c(.05,.125,.25,.375), nout=0,\n+ nloc=c(\'right lower\', \'right\', \'left\', \'none\'), cex.n=.7,\n+ datadensity=FALSE, scat1d.opts=NULL,\n+ violin=FALSE, violin.opts=NULL,\n+ font=box.dot$font, pch=box.dot$pch, \n+ cex.means =box.dot$cex, col=box.dot$col,\n+ nogrid=NULL, height=NULL, \\dots)\n+\n+# E.g. bwplot(formula, panel=panel.bpplot, panel.bpplot.parameters)\n+\n+bpplt(stats, xlim, xlab=\'\', box.ratio = 1, means=TRUE,\n+ qref=c(.5,.25,.75), qomit=c(.025,.975),\n+ pch=16, cex.labels=par(\'cex\'), cex.points=if(prototype)1 else 0.5,\n+ grid=FALSE)\n+\n+bpplotM(formula=NULL, groups=NULL, data=NULL, subset=NULL, na.action=NULL,\n+ qlim=0.01, xlim=NULL,\n+ nloc=c(\'right lower\',\'right\',\'left\',\'none\'),\n+ vnames=c(\'labels\', \'names\'), cex.n=.7, cex.strip=1,\n+ outerlabels=TRUE, \\dots)\n+}\n+\\arguments{\n+\\item{x}{\n+continuous variable whose distribution is to be examined\n+}\n+\\item{y}{\n+grouping variable\n+}\n+\\item{box.ratio}{\n+see \\code{\\link[lattice]{panel.bwplot}}\n+}\n+\\item{means}{\n+set to \\code{FALSE} to suppress drawing a character at the mean value\n+}'..b'l of Medicine\n+\\cr\n+\\email{f.harrell@vanderbilt.edu}\n+}\n+\\references{\n+ Esty WW, Banfield J: The box-percentile plot. J Statistical\n+Software 8 No. 17, 2003.\n+}\n+\\seealso{\n+\\code{\\link{bpplot}}, \\code{\\link[lattice]{panel.bwplot}},\n+ \\code{\\link{scat1d}}, \\code{\\link{quantile}},\n+ \\code{\\link{Ecdf}}, \\code{\\link{summaryP}},\n+ \\code{\\link[latticeExtra]{useOuterStrips}}\n+}\n+\\examples{\n+set.seed(13)\n+x <- rnorm(1000)\n+g <- sample(1:6, 1000, replace=TRUE)\n+x[g==1][1:20] <- rnorm(20)+3 # contaminate 20 x\'s for group 1\n+\n+\n+# default trellis box plot\n+require(lattice)\n+bwplot(g ~ x)\n+\n+\n+# box-percentile plot with data density (rug plot)\n+bwplot(g ~ x, panel=panel.bpplot, probs=seq(.01,.49,by=.01), datadensity=TRUE)\n+# add ,scat1d.opts=list(tfrac=1) to make all tick marks the same size\n+# when a group has > 125 observations\n+\n+\n+# small dot for means, show only .05,.125,.25,.375,.625,.75,.875,.95 quantiles\n+bwplot(g ~ x, panel=panel.bpplot, cex.means=.3)\n+\n+\n+# suppress means and reference lines for lower and upper quartiles\n+bwplot(g ~ x, panel=panel.bpplot, probs=c(.025,.1,.25), means=FALSE, qref=FALSE)\n+\n+\n+# continuous plot up until quartiles ("Tootsie Roll plot")\n+bwplot(g ~ x, panel=panel.bpplot, probs=seq(.01,.25,by=.01))\n+\n+\n+# start at quartiles then make it continuous ("coffin plot")\n+bwplot(g ~ x, panel=panel.bpplot, probs=seq(.25,.49,by=.01))\n+\n+\n+# same as previous but add a spike to give 0.95 interval\n+bwplot(g ~ x, panel=panel.bpplot, probs=c(.025,seq(.25,.49,by=.01)))\n+\n+\n+# decile plot with reference lines at outer quintiles and median\n+bwplot(g ~ x, panel=panel.bpplot, probs=c(.1,.2,.3,.4), qref=c(.5,.2,.8))\n+\n+\n+# default plot with tick marks showing all observations outside the outer\n+# box (.05 and .95 quantiles), with very small ticks\n+bwplot(g ~ x, panel=panel.bpplot, nout=.05, scat1d.opts=list(frac=.01))\n+\n+\n+# show 5 smallest and 5 largest observations\n+bwplot(g ~ x, panel=panel.bpplot, nout=5)\n+\n+\n+# Use a scat1d option (preserve=TRUE) to ensure that the right peak extends \n+# to the same position as the extreme scat1d\n+bwplot(~x , panel=panel.bpplot, probs=seq(.00,.5,by=.001), \n+ datadensity=TRUE, scat1d.opt=list(preserve=TRUE))\n+\n+# Add an extended box plot to an existing base graphics plot\n+plot(x, 1:length(x))\n+panel.bpplot(x, 1070, nogrid=TRUE, pch=19, height=15, cex.means=.5)\n+\n+# Draw a prototype showing how to interpret the plots\n+bpplt()\n+\n+# Example for bpplotM\n+set.seed(1)\n+n <- 800\n+d <- data.frame(treatment=sample(c(\'a\',\'b\'), n, TRUE),\n+ sex=sample(c(\'female\',\'male\'), n, TRUE),\n+ age=rnorm(n, 40, 10),\n+ bp =rnorm(n, 120, 12),\n+ wt =rnorm(n, 190, 30))\n+label(d$bp) <- \'Systolic Blood Pressure\'\n+units(d$bp) <- \'mmHg\'\n+bpplotM(age + bp + wt ~ treatment, data=d)\n+bpplotM(age + bp + wt ~ treatment * sex, data=d, cex.strip=.8)\n+bpplotM(age + bp + wt ~ treatment*sex, data=d,\n+ violin=TRUE,\n+ violin.opts=list(col=adjustcolor(\'blue\', alpha.f=.15),\n+ border=FALSE))\n+\n+\n+bpplotM(c(\'age\', \'bp\', \'wt\'), groups=\'treatment\', data=d)\n+# Can use Hmisc Cs function, e.g. Cs(age, bp, wt)\n+bpplotM(age + bp + wt ~ treatment, data=d, nloc=\'left\')\n+\n+# Without treatment: bpplotM(age + bp + wt ~ 1, data=d)\n+\n+\\dontrun{\n+# Automatically find all variables that appear to be continuous\n+getHdata(support)\n+bpplotM(data=support, group=\'dzgroup\',\n+ cex.strip=.4, cex.means=.3, cex.n=.45)\n+\n+# Separate displays for categorical vs. continuous baseline variables\n+getHdata(pbc)\n+pbc <- upData(pbc, moveUnits=TRUE)\n+\n+s <- summaryM(stage + sex + spiders ~ drug, data=pbc)\n+plot(s)\n+Key(0, .5)\n+s <- summaryP(stage + sex + spiders ~ drug, data=pbc)\n+plot(s, val ~ freq | var, groups=\'drug\', pch=1:3, col=1:3,\n+ key=list(x=.6, y=.8))\n+\n+bpplotM(bili + albumin + protime + age ~ drug, data=pbc)\n+}\n+}\n+\\keyword{nonparametric}\n+\\keyword{hplot}\n+\\keyword{distribution}\n+\\concept{trellis}\n+\\concept{lattice}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/partition.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/partition.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,36 @@ +\name{partition} +\alias{partition} +\alias{partition.vector} +\alias{partition.matrix} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{Patitions an object into different sets} +\description{ + Partitions an object into subsets of length defined in the \code{sep} + argument. +} +\usage{ +partition.vector(x, sep, ...) +partition.matrix(x, rowsep, colsep, ...) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{x}{object to be partitioned. } + \item{sep}{determines how many elements should go into each set. The + sum of \code{sep} should be equal to the length of \code{x}.} + \item{rowsep}{determins how many rows should go into each set. The + sum of \code{rowsep} must equal the number of rows in \code{x}.} + \item{colsep}{determins how many columns should go into each set. The + sum of \code{colsep} must equal the number of columns in \code{x}.} + \item{\dots}{arguments used in other methods of \code{partition}.} +} +\value{ + A list of equal length as \code{sep} containing the partitioned objects. +} +\author{Charles Dupont} +\seealso{ \code{\link{split}}} +\examples{ +a <- 1:7 +partition.vector(a, sep=c(1,3,2,1)) +} +% R documentation directory. +\keyword{manip} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/pc1.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/pc1.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,34 @@ +\name{pc1} +\alias{pc1} +\title{First Principal Component} +\description{ +Given a numeric matrix which may or may not contain \code{NA}s, +\code{pc1} standardizes the columns to have mean 0 and variance 1 and +computes the first principal component using \code{\link{prcomp}}. The +proportion of variance explained by this component is printed, and so +are the coefficients of the original (not scaled) variables. These +coefficients may be applied to the raw data to obtain the first PC. +} +\usage{ +pc1(x, hi) +} +\arguments{ + \item{x}{numeric matrix} + \item{hi}{if specified, the first PC is scaled so that its maximum + value is \code{hi} and its minimum value is zero} +} +\value{ + The vector of observations with the first PC. An attribute + \code{"coef"} is attached to this vector. \code{"coef"} contains the + raw-variable coefficients. + } +\author{Frank Harrell} +\seealso{\code{\link{prcomp}}} +\examples{ +set.seed(1) +x1 <- rnorm(100) +x2 <- x1 + rnorm(100) +w <- pc1(cbind(x1,x2)) +attr(w,'coef') +} +\keyword{multivariate} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/plotCorrPrecision.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/plotCorrPrecision.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,31 @@ +\name{plotCorrPrecision} +\alias{plotCorrPrecision} +\title{Plot Precision of Estimate of Pearson Correlation Coefficient} +\description{ +This function plots the precision (margin of error) of the +product-moment linear +correlation coefficient r vs. sample size, for a given vector of +correlation coefficients \code{rho}. Precision is defined as the larger +of the upper confidence limit minus rho and rho minus the lower confidence +limit. \code{labcurve} is used to automatically label the curves. +} +\usage{ +plotCorrPrecision(rho = c(0, 0.5), n = seq(10, 400, length = 100), + conf.int = 0.95, offset=0.025, \dots) +} +\arguments{ + \item{rho}{single or vector of true correlations. A worst-case + precision graph results from rho=0} + \item{n}{vector of sample sizes to use on the x-axis} + \item{conf.int}{confidence coefficient; default uses 0.95 confidence + limits} + \item{offset}{see \code{\link{labcurve}}} + \item{\dots}{other arguments to \code{\link{labcurve}}} +} +\author{Xing Wang and Frank Harrell} +\seealso{\code{\link{rcorr}},\code{\link{cor}},\code{\link{cor.test}}} +\examples{ +plotCorrPrecision() +plotCorrPrecision(rho=0) +} +\keyword{htest} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/plsmo.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/plsmo.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,243 @@\n+\\name{plsmo}\n+\\alias{plsmo}\n+\\alias{panel.plsmo}\n+\\title{\n+Plot smoothed estimates\n+}\n+\\description{\n+\n+ Plot smoothed estimates of x vs. y, handling missing data for lowess\n+ or supsmu, and adding axis labels. Optionally suppresses plotting\n+ extrapolated estimates. An optional \\code{group} variable can be\n+ specified to compute and plot the smooth curves by levels of\n+ \\code{group}. When \\code{group} is present, the \\code{datadensity}\n+ option will draw tick marks showing the location of the raw\n+ \\code{x}-values, separately for each curve. \\code{plsmo} has an\n+ option to plot connected points for raw data, with no smoothing. The\n+ non-panel version of \\code{plsmo} allows \\code{y} to be a matrix, for\n+ which smoothing is done separately over its columns. If both\n+ \\code{group} and multi-column \\code{y} are used, the number of curves\n+ plotted is the product of the number of groups and the number of\n+ \\code{y} columns.\n+\n+\\code{panel.plsmo} is a \\code{panel} function for \\code{trellis} for the\n+\\code{xyplot} function that uses \\code{plsmo} and its options to draw\n+one or more nonparametric function estimates on each panel. This has\n+advantages over using \\code{xyplot} with \\code{panel.xyplot} and\n+\\code{panel.loess}: (1) by default it will invoke \\code{labcurve} to\n+label the curves where they are most separated, (2) the\n+\\code{datadensity} option will put rug plots on each curve (instead of a\n+single rug plot at the bottom of the graph), and (3) when\n+\\code{panel.plsmo} invokes \\code{plsmo} it can use the "super smoother"\n+(\\code{supsmu} function) instead of \\code{lowess}. \\code{panel.plsmo}\n+senses when a \\code{group} variable is specified to \\code{xyplot} so\n+that it can invoke \\code{\\link[lattice]{panel.superpose}} instead of\n+\\code{panel.xyplot}. Using \\code{panel.plsmo} through \\code{trellis}\n+has some advantages over calling \\code{plsmo} directly in that\n+conditioning variables are allowed and \\code{trellis} uses nicer fonts\n+etc.\n+\n+When a \\code{group} variable was used, \\code{panel.plsmo} creates a function\n+\\code{Key} in the session frame that the user can invoke to draw a key for\n+individual data point symbols used for the \\code{group}s. \n+By default, the key is positioned at the upper right\n+corner of the graph. If \\code{Key(locator(1))} is specified, the key will\n+appear so that its upper left corner is at the coordinates of the\n+mouse click.\n+}\n+\\usage{\n+plsmo(x, y, method=c("lowess","supsmu","raw"), xlab, ylab, \n+ add=FALSE, lty=1 : lc, col=par("col"), lwd=par("lwd"),\n+ iter=if(length(unique(y))>2) 3 else 0, bass=0, f=2/3, trim, \n+ fun, group, prefix, xlim, ylim, \n+ label.curves=TRUE, datadensity=FALSE, scat1d.opts=NULL,\n+ lines.=TRUE, subset=TRUE,\n+ grid=FALSE, evaluate=NULL, \\dots)\n+\n+\n+#To use panel function:\n+#xyplot(formula=y ~ x | conditioningvars, groups,\n+# panel=panel.plsmo, type=\'b\', \n+# label.curves=TRUE,\n+# lwd = superpose.line$lwd, \n+# lty = superpose.line$lty, \n+# pch = superpose.symbol$pch, \n+# cex = superpose.symbol$cex, \n+# font = superpose.symbol$font, \n+# col = NULL, scat1d.opts=NULL, \\dots)\n+}\n+\\arguments{\n+\\item{x}{\n+vector of x-values, NAs allowed\n+}\n+\\item{y}{\n+vector or matrix of y-values, NAs allowed\n+}\n+\\item{method}{\n+"lowess" (the default), "supsmu", or "raw" to not smooth at all\n+}\n+\\item{xlab}{\n+x-axis label iff add=F. Defaults of label(x) or argument name.\n+}\n+\\item{ylab}{\n+y-axis label, like xlab.\n+}\n+\\item{add}{\n+Set to T to call lines instead of plot. Assumes axes already labeled.\n+}\n+\\item{lty}{\n+line type, default=1,2,3,\\dots, corresponding to columns of \\code{y} and\n+\\code{group} combinations\n+}\n+\\item{col}{\n+color for each curve, corresponding to \\code{group}. Default is\n+current \\code{par("col")}. \n+}\n+\\item{lwd}{\n+vector of line widths for the curves, corresponding to \\code{group}.\n+Default is current \\code{par("lwd")}. \n+\\code{lwd} can also be spec'..b'+set to \\code{FALSE} to prevent \\code{labcurve} from being called to label multiple\n+curves corresponding to \\code{group}s. Set to a list to pass options to\n+\\code{labcurve}. \\code{lty} and \\code{col} are passed to \\code{labcurve} automatically.\n+}\n+\\item{datadensity}{\n+set to \\code{TRUE} to draw tick marks on each curve, using x-coordinates\n+of the raw data \\code{x} values. This is done using \\code{scat1d}.\n+}\n+\\item{scat1d.opts}{a list of options to hand to \\code{scat1d}}\n+\\item{lines.}{\n+set to \\code{FALSE} to suppress smoothed curves from being drawn. This can\n+make sense if \\code{datadensity=TRUE}.\n+}\n+\\item{subset}{\n+a logical or integer vector specifying a subset to use for processing,\n+with respect too all variables being analyzed\n+}\n+\\item{grid}{\n+ set to \\code{TRUE} if the \\R \\code{grid} package drew the current\tplot}\n+\\item{evaluate}{\n+\tnumber of points to keep from smoother. If specified, an\n+equally-spaced grid of \\code{evaluate} \\code{x} values will be obtained from the\n+smoother using linear interpolation. This will keep from plotting an\n+enormous number of points if the dataset contains a very large number\n+of unique \\code{x} values.}\n+\\item{\\dots}{\n+ optional arguments that are passed to \\code{scat1d},\n+ or optional parameters to pass to \\code{plsmo} from\n+ \\code{panel.plsmo}. See optional arguments for \\code{plsmo} above.\n+}\n+\\item{type}{\n+set to \\code{p} to have \\code{panel.plsmo} plot points (and not call \\code{plsmo}), \n+\\code{l} to call \\code{plsmo} and not plot points, or use the default \\code{b} to plot both.\n+}\n+\\item{pch,cex,font}{\n+vectors of graphical parameters corresponding to the \\code{group}s (scalars\n+if \\code{group} is absent). By default, the parameters set up by\n+\\code{trellis} will be used.\n+}\n+}\n+\\value{\n+\\code{plsmo} returns a list of curves (x and y coordinates) that was passed to \\code{labcurve}\n+}\n+\\section{Side Effects}{\n+plots, and \\code{panel.plsmo} creates the \\code{Key} function in the session frame.\n+}\n+\\seealso{\n+\\code{\\link{lowess}}, \\code{\\link{supsmu}}, \\code{\\link{label}}, \\code{\\link{quantile}}, \\code{\\link{labcurve}}, \\code{\\link{scat1d}},\n+\\code{\\link[lattice]{xyplot}}, \\code{\\link[lattice]{panel.superpose}}, \\code{\\link[lattice]{panel.xyplot}}\n+}\n+\\examples{\n+set.seed(1)\n+x <- 1:100\n+y <- x + runif(100, -10, 10)\n+plsmo(x, y, "supsmu", xlab="Time of Entry") \n+#Use label(y) or "y" for ylab\n+\n+\n+plsmo(x, y, add=TRUE, lty=2)\n+#Add lowess smooth to existing plot, with different line type\n+\n+\n+age <- rnorm(500, 50, 15)\n+survival.time <- rexp(500)\n+sex <- sample(c(\'female\',\'male\'), 500, TRUE)\n+race <- sample(c(\'black\',\'non-black\'), 500, TRUE)\n+plsmo(age, survival.time < 1, fun=qlogis, group=sex) # plot logit by sex\n+\n+#Bivariate Y\n+sbp <- 120 + (age - 50)/10 + rnorm(500, 0, 8) + 5 * (sex == \'male\')\n+dbp <- 80 + (age - 50)/10 + rnorm(500, 0, 8) - 5 * (sex == \'male\')\n+Y <- cbind(sbp, dbp)\n+plsmo(age, Y)\n+plsmo(age, Y, group=sex)\n+\n+\n+#Plot points and smooth trend line using trellis \n+# (add type=\'l\' to suppress points or type=\'p\' to suppress trend lines)\n+require(lattice)\n+xyplot(survival.time ~ age, panel=panel.plsmo)\n+\n+\n+#Do this for multiple panels\n+xyplot(survival.time ~ age | sex, panel=panel.plsmo)\n+\n+\n+#Do this for subgroups of points on each panel, show the data\n+#density on each curve, and draw a key at the default location\n+xyplot(survival.time ~ age | sex, groups=race, panel=panel.plsmo,\n+ datadensity=TRUE)\n+Key()\n+\n+\n+#Use wloess.noiter to do a fast weighted smooth\n+plot(x, y)\n+lines(wtd.loess.noiter(x, y))\n+lines(wtd.loess.noiter(x, y, weights=c(rep(1,50), 100, rep(1,49))), col=2)\n+points(51, y[51], pch=18) # show overly weighted point\n+#Try to duplicate this smooth by replicating 51st observation 100 times\n+lines(wtd.loess.noiter(c(x,rep(x[51],99)),c(y,rep(y[51],99)),\n+ type=\'ordered all\'), col=3)\n+#Note: These two don\'t agree exactly\n+}\n+\\keyword{smooth}\n+\\keyword{nonparametric}\n+\\keyword{hplot}\n+\\concept{trellis}\n+\\concept{lattice}\n+\n+\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/popower.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/popower.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,98 @@ +\name{popower} +\alias{popower} +\alias{posamsize} +\alias{print.popower} +\alias{print.posamsize} +\title{Power and Sample Size for Ordinal Response} +\description{ +\code{popower} computes the power for a two-tailed two sample comparison +of ordinal outcomes under the proportional odds ordinal logistic +model. The power is the same as that of the Wilcoxon test but with +ties handled properly. \code{posamsize} computes the total sample size +needed to achieve a given power. Both functions compute the efficiency +of the design compared with a design in which the response variable +is continuous. \code{print} methods exist for both functions. Any of the +input arguments may be vectors, in which case a vector of powers or +sample sizes is returned. These functions use the methods of +Whitehead (1993). +} +\usage{ +popower(p, odds.ratio, n, n1, n2, alpha=0.05) +\method{print}{popower}(x, \dots) +posamsize(p, odds.ratio, fraction=.5, alpha=0.05, power=0.8) +\method{print}{posamsize}(x, \dots) +} +\arguments{ +\item{p}{ +a vector of marginal cell probabilities which must add up to one. +The \code{i}th element specifies the probability that a patient will be in response level +\code{i}, averaged over the two treatment groups. +} +\item{odds.ratio}{ +the odds ratio to be able to detect. It doesn't +matter which group is in the numerator. +} +\item{n}{ +total sample size for \code{popower}. You must specify either \code{n} or +\code{n1} and \code{n2}. If you specify \code{n}, \code{n1} and \code{n2} are set to \code{n/2}. +} +\item{n1}{ +for \code{popower}, the number of subjects in treatment group 1 +} +\item{n2}{ +for \code{popower}, the number of subjects in group 2 +} +\item{alpha}{ +type I error +} +\item{x}{an object created by \code{popower} or \code{posamsize}} +\item{fraction}{ +for \code{posamsize}, the fraction of subjects that will be allocated to group 1 +} +\item{power}{ +for \code{posamsize}, the desired power (default is 0.8) +} +\item{\dots}{unused} +} +\value{ +a list containing \code{power} and \code{eff} (relative efficiency) for \code{popower}, +or containing \code{n} and \code{eff} for \code{posamsize}. +} +\author{ +Frank Harrell +\cr +Department of Biostatistics +\cr +Vanderbilt University School of Medicine +\cr +\email{f.harrell@vanderbilt.edu} +} +\references{ +Whitehead J (1993): Sample size calculations for ordered categorical +data. Stat in Med 12:2257--2271. + + +Julious SA, Campbell MJ (1996): Letter to the Editor. Stat in Med 15: +1065--1066. Shows accuracy of formula for binary response case. +} +\seealso{ +\code{\link{bpower}}, \code{\link{cpower}} +} +\examples{ +#For a study of back pain (none, mild, moderate, severe) here are the +#expected proportions (averaged over 2 treatments) that will be in +#each of the 4 categories: + + +p <- c(.1,.2,.4,.3) +popower(p, 1.2, 1000) # OR=1.2, total n=1000 +posamsize(p, 1.2) +popower(p, 1.2, 3148) +} +\keyword{htest} +\keyword{category} +\concept{power} +\concept{study design} +\concept{ordinal logistic model} +\concept{ordinal response} +\concept{proportional odds model} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/print.char.list.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/print.char.list.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,95 @@ +\name{print.char.list} +\alias{print.char.list} +\title{ prints a list of lists in a visually readable format. } +\description{ + Takes a list that is composed of other lists and matrixes and prints + it in a visually readable format. +} +\usage{ +\method{print}{char.list}(x, \dots, hsep = c("|"), vsep = c("-"), csep = c("+"), print.it = TRUE, + rowname.halign = c("left", "centre", "right"), + rowname.valign = c("top", "centre", "bottom"), + colname.halign = c("centre", "left", "right"), + colname.valign = c("centre", "top", "bottom"), + text.halign = c("right", "centre", "left"), + text.valign = c("top", "centre", "bottom"), + rowname.width, rowname.height, + min.colwidth = .Options$digits, max.rowheight = NULL, + abbreviate.dimnames = TRUE, page.width = .Options$width, + colname.width, colname.height, prefix.width, + superprefix.width = prefix.width) +} +\arguments{ + \item{x}{ + list object to be printed + } + \item{\dots}{ + place for extra arguments to reside. + } + \item{hsep}{ + character used to separate horizontal fields + } + \item{vsep}{ + character used to separate veritcal feilds + } + \item{csep}{ + character used where horizontal and veritcal separators meet. + } + \item{print.it}{ + should the value be printed to the console or returned as a string. + } + \item{rowname.halign}{ + horizontal justification of row names. + } + \item{rowname.valign}{ + verical justification of row names. + } + \item{colname.halign}{ + horizontal justification of column names. + } + \item{colname.valign}{ + verical justification of column names. + } + \item{text.halign}{ + horizontal justification of cell text. + } + \item{text.valign}{ + vertical justification of cell text. + } + \item{rowname.width}{ + minimum width of row name strings. + } + \item{rowname.height}{ + minimum height of row name strings. + } + \item{min.colwidth}{ + minimum column width. + } + \item{max.rowheight}{ + maximum row height. + } + \item{abbreviate.dimnames}{ + should the row and column names be abbreviated. + } + \item{page.width}{ + width of the page being printed on. + } + \item{colname.width}{ + minimum width of the column names. + } + \item{colname.height}{ + minimum height of the column names + } + \item{prefix.width}{ + maximum width of the rowname columns + } + \item{superprefix.width}{ + maximum width of the super rowname columns + } +} +\value{ + String that formated table of the list object. +} +\author{ Charles Dupont } +\keyword{ print } +\keyword{ list } |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/print.char.matrix.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/print.char.matrix.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,86 @@ +\name{print.char.matrix} +\alias{print.char.matrix} +%- Also NEED an `\alias' for EACH other topic documented here. +\title{ Function to print a matrix with stacked cells } +\description{ + Prints a dataframe or matrix in stacked cells. Line break charcters + in a matrix element will result in a line break in that cell, but tab + characters are not supported. +} +\usage{ +\method{print}{char.matrix}(x, file = "", col.name.align = "cen", col.txt.align = "right", + cell.align = "cen", hsep = "|", vsep = "-", csep = "+", row.names = TRUE, + col.names = FALSE, append = FALSE, + top.border = TRUE, left.border = TRUE, \dots) +} + +\arguments{ + \item{x}{a matrix or dataframe} + \item{file}{name of file if file output is desired. If left empty, + output will be to the screen} + \item{col.name.align}{if column names are used, they can be aligned + right, left or centre. Default \code{"cen"} results in names centred + between the sides of the columns they name. If the width of the text + in the columns is less than the width of the name, \code{col.name.align} + will have no effect. Other options are \code{"right"} and \code{"left"}.} + \item{col.txt.align}{how character columns are aligned. Options + are the same as for \code{col.name.align} with no effect when the width of + the column is greater than its name.} + \item{cell.align}{how numbers are displayed in columns} + \item{hsep}{character string to use as horizontal separator, + i.e. what separates columns} + \item{vsep}{character string to use as vertical separator, + i.e. what separates rows. Length cannot be more than one.} + + \item{csep}{character string to use where vertical and horizontal + separators cross. If \code{hsep} is more than one character, + \code{csep} will need to be the same length. There is no provision + for multiple vertical separators} + + \item{row.names}{logical: are we printing the names of the rows?} + \item{col.names}{logical: are we printing the names of the columns?} + \item{append}{logical: if \code{file} is not \code{""}, are we appending to + the file or overwriting?} + \item{top.border}{logical: do we want a border along the top above the + columns?} + \item{left.border}{logical: do we want a border along the left of the + first column?} + \item{\dots}{unused} + } + +\details{ + If any column of \code{x} is a mixture of character and numeric, the + distinction between character and numeric columns will be lost. This + is especially so if the matrix is of a form where you would not want + to print the column names, the column information being in the rows at + the beginning of the matrix. + + Row names, if not specified in the making of the matrix will simply be + numbers. To prevent printing them, set \code{row.names = FALSE}.} + +\value{ + No value is returned. The matrix or dataframe will be printed to file + or to the screen. +} +\author{Patrick Connolly \email{p.connolly@hortresearch.co.nz}} + +\seealso{\code{write}, \code{write.table}} + +\examples{ +data(HairEyeColor) +print.char.matrix(HairEyeColor[ , , "Male"], col.names = TRUE) +print.char.matrix(HairEyeColor[ , , "Female"], col.txt.align = "left", col.names = TRUE) + + +z <- rbind(c("", "N", "y"), + c("[ 1.34,40.3)\n[40.30,48.5)\n[48.49,58.4)\n[58.44,87.8]", + " 50\n 50\n 50\n 50", + "0.530\n0.489\n0.514\n0.507"), + c("female\nmale", " 94\n106", "0.552\n0.473" ), + c("", "200", "0.510")) +dimnames(z) <- list(c("", "age", "sex", "Overall"),NULL) + +print.char.matrix(z) +} +\keyword{print} +\keyword{array} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/prnz.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/prnz.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,31 @@ +\name{prnz} +\alias{prn} +\title{ +Print and Object with its Name +} +\description{ +Prints an object with its name and with an optional descriptive +text string. This is useful for annotating analysis output files and +for debugging. +} +\usage{ +prn(x, txt, file) +} +\arguments{ +\item{x}{any object} +\item{txt}{optional text string} +\item{file}{optional file name. By default, writes to console. + \code{append=TRUE} is assumed.} +} +\section{Side Effects}{ +prints +} +\seealso{ +\code{\link{print}}, \code{\link{cat}} +} +\examples{ +x <- 1:5 +prn(x) +# prn(fit, 'Full Model Fit') +} +\keyword{print} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/prselect.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/prselect.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,73 @@ +\name{prselect} +\Rdversion{1.1} +\alias{prselect} +\title{Selectively Print Lines of a Text Vector} +\description{ + Given one or two regular expressions or exact text matches, removes + elements of the input vector that match these specifications. Omitted + lines are replaced by \ldots. This is useful for selectively + suppressing some of the printed output of R functions such as + regression fitting functions, especially in the context of making + statistical reports using Sweave or Odfweave. +} +\usage{ +prselect(x, start = NULL, stop = NULL, i = 0, j = 0, pr = TRUE) +} +\arguments{ + \item{x}{ + input character vector + } + \item{start}{ + text or regular expression to look for starting line to omit. If + omitted, deletions start at the first line. + } + \item{stop}{ + text or regular expression to look for ending line to omit. If + omitted, deletions proceed until the last line. + } + \item{i}{ + increment in number of first line to delete after match is found + } + \item{j}{ + increment in number of last line to delete after match is found + } + \item{pr}{ + set to \code{FALSE} to suppress printing + } +} +\value{an invisible vector of retained lines of text} +\author{Frank Harrell} +\seealso{ + \code{\link[utils]{Sweave}} +} +\examples{ +x <- c('the','cat','ran','past','the','dog') +prselect(x, 'big','bad') # omit nothing- no match +prselect(x, 'the','past') # omit first 4 lines +prselect(x,'the','junk') # omit nothing- no match for stop +prselect(x,'ran','dog') # omit last 4 lines +prselect(x,'cat') # omit lines 2- +prselect(x,'cat',i=1) # omit lines 3- +prselect(x,'cat','past') # omit lines 2-4 +prselect(x,'cat','past',j=1) # omit lines 2-5 +prselect(x,'cat','past',j=-1)# omit lines 2-3 +prselect(x,'t$','dog') # omit lines 2-6; t must be at end + +# Example for Sweave: run a regression analysis with the rms package +# then selectively output only a portion of what print.ols prints. +# (Thanks to \email{romain.francois@dbmail.com}) +# <<z,eval=FALSE,echo=T>>= +# library(rms) +# y <- rnorm(20); x1 <- rnorm(20); x2 <- rnorm(20) +# ols(y ~ x1 + x2) +# <<echo=F>>= +# z <- capture.output( { +# <<z>> +# } ) +# prselect(z, 'Residuals:') # keep only summary stats; or: +# prselect(z, stop='Coefficients', j=-1) # keep coefficients, rmse, R^2; or: +# prselect(z, 'Coefficients', 'Residual standard error', j=-1) # omit coef +# @ +} +\keyword{manip} +\keyword{utilities} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/pstamp.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/pstamp.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,27 @@ +\name{pstamp} +\alias{pstamp} +\title{Date/Time/Directory Stamp the Current Plot} +\description{ +Date-time stamp the current plot in the extreme lower right +corner. Optionally add the current working directory and arbitrary other +text to the stamp. +} +\usage{ +pstamp(txt, pwd = FALSE, time. = TRUE) +} +\arguments{ + \item{txt}{an optional single text string} + \item{pwd}{set to \code{TRUE} to add the current working directory + name to the stamp} + \item{time.}{set to \code{FALSE} to use the date without the time} +} +\details{ + Certain functions are not supported for S-Plus under Windows. For \R, + results may not be satisfactory if \code{par(mfrow=)} is in effect. +} +\author{Frank Harrell} +\examples{ +plot(1:20) +pstamp(pwd=TRUE, time=FALSE) +} +\keyword{aplot} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/rMultinom.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/rMultinom.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,30 @@ +\name{rMultinom} +\alias{rMultinom} +\title{Generate Multinomial Random Variables with Varying Probabilities} +\description{ +Given a matrix of multinomial probabilities where rows correspond to +observations and columns to categories (and each row sums to 1), +generates a matrix with the same number of rows as has \code{probs} and +with \code{m} columns. The columns represent multinomial cell numbers, +and within a row the columns are all samples from the same multinomial +distribution. The code is a modification of that in the +\code{impute.polyreg} function in the \code{MICE} package. +} +\usage{ +rMultinom(probs, m) +} +\arguments{ + \item{probs}{matrix of probabilities} + \item{m}{number of samples for each row of \code{probs}} +} +\value{ + an integer matrix having \code{m} columns +} +\seealso{\code{\link{rbinom}}} +\examples{ +set.seed(1) +w <- rMultinom(rbind(c(.1,.2,.3,.4),c(.4,.3,.2,.1)),200) +t(apply(w, 1, table)/200) +} +\keyword{distribution} + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/rcorr.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/rcorr.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,78 @@ +\name{rcorr} +\alias{rcorr} +\alias{print.rcorr} +\title{Matrix of Correlations and P-values} +\description{ + \code{rcorr} Computes a matrix of Pearson's \code{r} or Spearman's + \code{rho} rank correlation coefficients for all possible pairs of + columns of a matrix. Missing values are deleted in pairs rather than + deleting all rows of \code{x} having any missing variables. Ranks are + computed using efficient algorithms (see reference 2), using midranks + for ties. +} +\usage{ +rcorr(x, y, type=c("pearson","spearman")) + +\method{print}{rcorr}(x, \dots) +} +\arguments{ +\item{x}{ + a numeric matrix with at least 5 rows and at least 2 columns (if + \code{y} is absent). For \code{print}, \code{x} is an object + produced by \code{rcorr}. +} +\item{y}{ + a numeric vector or matrix which will be concatenated to \code{x}. If + \code{y} is omitted for \code{rcorr}, \code{x} must be a matrix. +} +\item{type}{ + specifies the type of correlations to compute. Spearman correlations + are the Pearson linear correlations computed on the ranks of non-missing + elements, using midranks for ties. +} +\item{\dots}{argument for method compatiblity.} +} +\value{ +\code{rcorr} returns a list with elements \code{r}, the +matrix of correlations, \code{n} the +matrix of number of observations used in analyzing each pair of variables, +and \code{P}, the asymptotic P-values. +Pairs with fewer than 2 non-missing values have the r values set to NA. +The diagonals of \code{n} are the number of non-NAs for the single variable +corresponding to that row and column. +} +\details{ +Uses midranks in case of ties, as described by Hollander and Wolfe. +P-values are approximated by using the \code{t} or \code{F} distributions. +} +\author{ +Frank Harrell +\cr +Department of Biostatistics +\cr +Vanderbilt University +\cr +\email{f.harrell@vanderbilt.edu} +} +\references{ +Hollander M. and Wolfe D.A. (1973). Nonparametric Statistical Methods. +New York: Wiley. + +Press WH, Flannery BP, Teukolsky SA, Vetterling, WT (1988): Numerical +Recipes in C. Cambridge: Cambridge University Press. +} +\seealso{ +\code{\link{hoeffd}}, \code{\link{cor}}, \code{\link{combine.levels}}, +\code{\link{varclus}}, \code{\link{dotchart3}}, \code{\link{impute}}, +\code{\link{chisq.test}}, \code{\link{cut2}}. +} +\examples{ +x <- c(-2, -1, 0, 1, 2) +y <- c(4, 1, 0, 1, 4) +z <- c(1, 2, 3, 4, NA) +v <- c(1, 2, 3, 4, 5) +rcorr(cbind(x,y,z,v)) +} +\keyword{nonparametric} +\keyword{htest} +\keyword{category} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/rcorr.cens.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/rcorr.cens.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,155 @@ +\name{rcorr.cens} +\alias{rcorr.cens} +\alias{rcorrcens} +\alias{rcorrcens.formula} +\title{ + Rank Correlation for Censored Data +} +\description{ + Computes the \var{c} index and the corresponding + generalization of Somers' \var{Dxy} rank correlation for a censored response + variable. Also works for uncensored and binary responses, + although its use of all possible pairings + makes it slow for this purpose. \var{Dxy} and \var{c} are related by + \eqn{\var{Dxy}=2(\var{c}-0.5)}{\var{Dxy} = 2*(\var{c} - 0.5)}. + + \code{rcorr.cens} handles one predictor variable. \code{rcorrcens} + computes rank correlation measures separately by a series of + predictors. In addition, \code{rcorrcens} has a rough way of handling + categorical predictors. If a categorical (factor) predictor has two + levels, it is coverted to a numeric having values 1 and 2. If it has + more than 2 levels, an indicator variable is formed for the most + frequently level vs. all others, and another indicator for the second + most frequent level and all others. The correlation is taken as the + maximum of the two (in absolute value). +} +\usage{ +rcorr.cens(x, S, outx=FALSE) + +\method{rcorrcens}{formula}(formula, data=NULL, subset=NULL, + na.action=na.retain, exclude.imputed=TRUE, outx=FALSE, + \dots) +} +\arguments{ + \item{x}{ + a numeric predictor variable + } + \item{S}{ + an \code{Surv} object or a vector. If a vector, assumes that every + observation is uncensored. + } + \item{outx}{ + set to \code{TRUE} to not count pairs of observations tied on \code{x} as a + relevant pair. This results in a Goodman--Kruskal gamma type rank + correlation. + } + \item{formula}{ + a formula with a \code{Surv} object or a numeric vector + on the left-hand side + } + \item{data, subset, na.action}{ + the usual options for models. Default for \code{na.action} is to retain + all values, NA or not, so that NAs can be deleted in only a pairwise + fashion. + } + \item{exclude.imputed}{ + set to \code{FALSE} to include imputed values (created by + \code{impute}) in the calculations. + } + \item{\dots}{ + extra arguments passed to \code{\link{biVar}}. + } +} +\value{ + \code{rcorr.cens} returns a vector with the following named elements: + \code{C Index}, \code{Dxy}, \code{S.D.}, \code{n}, \code{missing}, + \code{uncensored}, \code{Relevant Pairs}, \code{Concordant}, and + \code{Uncertain} + + \item{n}{number of observations not missing on any input variables} + + \item{missing}{number of observations missing on \code{x} or \code{S}} + + \item{relevant}{number of pairs of non-missing observations for which + \code{S} could be ordered} + + \item{concordant}{number of relevant pairs for which \code{x} and \code{S} + are concordant.} + + \item{uncertain}{number of pairs of non-missing observations for which + censoring prevents classification of concordance of \code{x} and + \code{S}. + } + + \code{rcorrcens.formula} returns an object of class \code{biVar} +which is documented with the \code{\link{biVar}} function. +} +\author{ +Frank Harrell +\cr +Department of Biostatistics +\cr +Vanderbilt University +\cr +\email{f.harrell@vanderbilt.edu} +} +\seealso{ +\code{\link{somers2}}, \code{\link{biVar}}, \code{\link{rcorrp.cens}} +} +\references{ +Newson R: Confidence intervals for rank statistics: Somers' D and extensions. Stata Journal 6:309-334; 2006. +} +\examples{ +set.seed(1) +x <- round(rnorm(200)) +y <- rnorm(200) +rcorr.cens(x, y, outx=TRUE) # can correlate non-censored variables +library(survival) +age <- rnorm(400, 50, 10) +bp <- rnorm(400,120, 15) +bp[1] <- NA +d.time <- rexp(400) +cens <- runif(400,.5,2) +death <- d.time <= cens +d.time <- pmin(d.time, cens) +rcorr.cens(age, Surv(d.time, death)) +r <- rcorrcens(Surv(d.time, death) ~ age + bp) +r +plot(r) + +# Show typical 0.95 confidence limits for ROC areas for a sample size +# with 24 events and 62 non-events, for varying population ROC areas +# Repeat for 138 events and 102 non-events +set.seed(8) +par(mfrow=c(2,1)) +for(i in 1:2) { + n1 <- c(24,138)[i] + n0 <- c(62,102)[i] + y <- c(rep(0,n0), rep(1,n1)) + deltas <- seq(-3, 3, by=.25) + C <- se <- deltas + j <- 0 + for(d in deltas) { + j <- j + 1 + x <- c(rnorm(n0, 0), rnorm(n1, d)) + w <- rcorr.cens(x, y) + C[j] <- w['C Index'] + se[j] <- w['S.D.']/2 + } + low <- C-1.96*se; hi <- C+1.96*se + print(cbind(C, low, hi)) + errbar(deltas, C, C+1.96*se, C-1.96*se, + xlab='True Difference in Mean X', + ylab='ROC Area and Approx. 0.95 CI') + title(paste('n1=',n1,' n0=',n0,sep='')) + abline(h=.5, v=0, col='gray') + true <- 1 - pnorm(0, deltas, sqrt(2)) + lines(deltas, true, col='blue') +} +par(mfrow=c(1,1)) +} +\keyword{survival} +\keyword{nonparametric} +\concept{predictive accuracy} +\concept{logistic regression model} +% Converted by Sd2Rd version 1.21. |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/rcorrp.cens.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/rcorrp.cens.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,179 @@ +\name{rcorrp.cens} +\alias{rcorrp.cens} +\alias{improveProb} +\alias{print.improveProb} +\title{ + Rank Correlation for Paired Predictors with a Possibly Censored + Response, and Integrated Discrimination Index +} +\description{ + Computes U-statistics to test for whether predictor \var{X1} is more + concordant than predictor \var{X2}, extending \code{rcorr.cens}. For + \code{method=1}, estimates the fraction of pairs for which the + \code{x1} difference is more impressive than the \code{x2} + difference. For \code{method=2}, estimates the fraction of pairs for + which \code{x1} is concordant with \code{S} but \code{x2} is not. + + For binary responses the function \code{improveProb} provides several + assessments of whether one set of predicted probabilities is better + than another, using the methods describe in + \cite{Pencina et al (2007)}. This involves NRI and IDI to test for + whether predictions from model \code{x1} are significantly different + from those obtained from predictions from model \code{x2}. This is a + distinct improvement over comparing ROC areas, sensitivity, or + specificity. +} +\usage{ +rcorrp.cens(x1, x2, S, outx=FALSE, method=1) + +improveProb(x1, x2, y) + +\method{print}{improveProb}(x, digits=3, conf.int=.95, \dots) +} +\arguments{ + \item{x1}{ + first predictor (a probability, for \code{improveProb}) + } + \item{x2}{ + second predictor (a probability, for \code{improveProb}) + } + \item{S}{ + a possibly right-censored \code{\link[survival]{Surv}} object. If + \code{S} is a vector instead, it is converted to a + \code{\link[survival]{Surv}} object and it is assumed that no + observations are censored. + } + \item{outx}{ + set to \code{TRUE} to exclude pairs tied on \code{x1} or \code{x2} + from consideration + } + \item{method}{ + see above + } + \item{y}{ + a binary 0/1 outcome variable + } + \item{x}{ + the result from \code{improveProb} + } + \item{digits}{ + number of significant digits for use in printing the result of + \code{improveProb} + } + \item{conf.int}{ + level for confidence limits + } + \item{\dots}{ + unused + } +} +\details{ + If \code{x1},\code{x2} represent predictions from models, these + functions assume either that you are using a separate sample from the + one used to build the model, or that the amount of overfitting in + \code{x1} equals the amount of overfitting in \code{x2}. An example + of the latter is giving both models equal opportunity to be complex so + that both models have the same number of effective degrees of freedom, + whether a predictor was included in the model or was screened out by a + variable selection scheme. + + Note that in the first part of their paper, \cite{Pencina et al.} + presented measures that required binning the predicted probabilities. + Those measures were then replaced with better continuous measures that + are implementedhere. +} +\value{ + a vector of statistics for \code{rcorrp.cens}, or a list with class + \code{improveProb} of statistics for \code{improveProb}: + \cr + \item{n}{number of cases} + \item{na}{number of events} + \item{nb}{number of non-events} + \item{pup.ev}{ + mean of pairwise differences in probabilities for those with events + and a pairwise difference of \eqn{\mbox{probabilities}>0} + } + \item{pup.ne}{ + mean of pairwise differences in probabilities for those without + events and a pairwise difference of \eqn{\mbox{probabilities}>0} + } + \item{pdown.ev}{ + mean of pairwise differences in probabilities for those with events + and a pairwise difference of \eqn{\mbox{probabilities}>0} + } + \item{pdown.ne}{ + mean of pairwise differences in probabilities for those without + events and a pairwise difference of \eqn{\mbox{probabilities}>0} + } + \item{nri}{ + Net Reclassification Index = + \eqn{(pup.ev-pdown.ev)-(pup.ne-pdown.ne)} + } + \item{se.nri}{standard error of NRI} + \item{z.nri}{Z score for NRI} + \item{nri.ev}{Net Reclassification Index = \eqn{pup.ev-pdown.ev}} + \item{se.nri.ev}{SE of NRI of events} + \item{z.nri.ev}{Z score for NRI of events} + \item{nri.ne}{Net Reclassification Index = \eqn{pup.ne-pdown.ne}} + \item{se.nri.ne}{SE of NRI of non-events} + \item{z.nri.ne}{Z score for NRI of non-events} + \item{improveSens}{improvement in sensitivity} + \item{improveSpec}{improvement in specificity} + \item{idi}{Integrated Discrimination Index} + \item{se.idi}{SE of IDI} + \item{z.idi}{Z score of IDI} +} +\author{ + Frank Harrell \cr + Department of Biostatistics, Vanderbilt University \cr + \email{f.harrell@vanderbilt.edu} + + Scott Williams \cr + Division of Radiation Oncology \cr + Peter MacCallum Cancer Centre, Melbourne, Australia \cr + \email{scott.williams@petermac.org} +} +\references{ + Pencina MJ, D'Agostino Sr RB, D'Agostino Jr RB, Vasan RS (2008): + Evaluating the added predictive ability of a new marker: From area + under the ROC curve to reclassification and beyond. Stat in Med 27:157-172. + DOI: 10.1002/sim.2929 + + Pencina MJ, D'Agostino Sr RB, D'Agostino Jr RB, Vasan RS: + Rejoinder: Comments on Integrated discrimination and net reclassification + improvements-Practical advice. Stat in Med 2007; DOI: 10.1002/sim.3106 + + Pencina MJ, D'Agostino RB, Steyerberg EW (2011): Extensions of net + reclassification improvement calculations to measure usefulness of new + biomarkers. Stat in Med 30:11-21; DOI: 10.1002/sim.4085 +} +\seealso{ + \code{\link{rcorr.cens}}, \code{\link{somers2}}, + \code{\link[survival]{Surv}}, \code{\link[rms]{val.prob}} +} +\examples{ +set.seed(1) +library(survival) + +x1 <- rnorm(400) +x2 <- x1 + rnorm(400) +d.time <- rexp(400) + (x1 - min(x1)) +cens <- runif(400,.5,2) +death <- d.time <= cens +d.time <- pmin(d.time, cens) +rcorrp.cens(x1, x2, Surv(d.time, death)) +#rcorrp.cens(x1, x2, y) ## no censoring + +set.seed(1) +x1 <- runif(1000) +x2 <- runif(1000) +y <- sample(0:1, 1000, TRUE) +rcorrp.cens(x1, x2, y) +improveProb(x1, x2, y) +} +\keyword{survival} +\keyword{nonparametric} +\keyword{regression} +\concept{logistic regression model} +\concept{predictive accuracy} + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/rcspline.eval.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/rcspline.eval.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,112 @@ +\name{rcspline.eval} +\alias{rcspline.eval} +\title{ + Restricted Cubic Spline Design Matrix +} +\description{ + Computes matrix that expands a single variable into the terms needed + to fit a restricted cubic spline (natural spline) function using the + truncated power basis. Two normalization options are given for + somewhat reducing problems of ill-conditioning. The antiderivative + function can be optionally created. If knot locations are not given, + they will be estimated from the marginal distribution of \code{x}. +} +\usage{ +rcspline.eval(x, knots, nk=5, inclx=FALSE, knots.only=FALSE, + type="ordinary", norm=2, rpm=NULL, pc=FALSE, + fractied=0.05) +} +\arguments{ + \item{x}{ + a vector representing a predictor variable + } + \item{knots}{ + knot locations. If not given, knots will be estimated using default + quantiles of \code{x}. For 3 knots, the outer quantiles used are 0.10 + and 0.90. For 4-6 knots, the outer quantiles used are 0.05 and + 0.95. For \eqn{\code{nk}>6}, the outer quantiles are 0.025 and 0.975. The + knots are equally spaced between these on the quantile scale. For + fewer than 100 non-missing values of \code{x}, the outer knots are + the 5th smallest and largest \code{x}. + } + \item{nk}{ + number of knots. Default is 5. The minimum value is 3. + } + \item{inclx}{ + set to \code{TRUE} to add \code{x} as the first column of the + returned matrix + } + \item{knots.only}{ + return the estimated knot locations but not the expanded matrix + } + \item{type}{ + \samp{"ordinary"} to fit the function, \samp{"integral"} to fit its + anti-derivative. + } + \item{norm}{ + \samp{0} to use the terms as originally given by \cite{Devlin and + Weeks (1986)}, \samp{1} to normalize non-linear terms by the cube + of the spacing between the last two knots, \samp{2} to normalize by + the square of the spacing between the first and last knots (the + default). \code{norm=2} has the advantage of making all nonlinear + terms beon the x-scale. + } + \item{rpm}{ + If given, any \code{NA}s in \code{x} will be replaced with the value + \code{rpm} after estimating any knot locations. + } + \item{pc}{ + Set to \code{TRUE} to replace the design matrix with orthogonal + (uncorrelated) principal components computed on the scaled, centered + design matrix + } + \item{fractied}{ + If the fraction of observations tied at the lowest and/or highest + values of \code{x} is greater than or equal to \code{fractied}, the + algorithm attempts to use a different algorithm for knot finding + based on quantiles of \code{x} after excluding the one or two values + with excessive ties. And if the number of unique \code{x} values + excluding these values is small, the unique values will be used as + the knots. If the number of knots to use other than these exterior + values is only one, that knot will be at the median of the + non-extreme \code{x}. This algorithm is not used if any interior + values of \code{x} also have a proportion of ties equal to or + exceeding \code{fractied}.} +} +\value{ + If \code{knots.only=TRUE}, returns a vector of knot + locations. Otherwise returns a matrix with \code{x} (if + \code{inclx=TRUE}) followed by \eqn{\code{nk}-2} nonlinear terms. The + matrix has an attribute \code{knots} which is the vector of knots + used. When \code{pc} is \code{TRUE}, an additional attribute is + stored: \code{pcparms}, which contains the \code{center} and + \code{scale} vectors and the \code{rotation} matrix. +} +\references{ + Devlin TF and Weeks BJ (1986): Spline functions for logistic regression + modeling. Proc 11th Annual SAS Users Group Intnl Conf, p. 646--651. + Cary NC: SAS Institute, Inc. +} +\seealso{ + \code{\link[splines]{ns}}, \code{\link{rcspline.restate}}, + \code{\link[rms]{rcs}} +} +\examples{ +x <- 1:100 +rcspline.eval(x, nk=4, inclx=TRUE) +#lrm.fit(rcspline.eval(age,nk=4,inclx=TRUE), death) +x <- 1:1000 +attributes(rcspline.eval(x)) +x <- c(rep(0, 744),rep(1,6), rep(2,4), rep(3,10),rep(4,2),rep(6,6), + rep(7,3),rep(8,2),rep(9,4),rep(10,2),rep(11,9),rep(12,10),rep(13,13), + rep(14,5),rep(15,5),rep(16,10),rep(17,6),rep(18,3),rep(19,11),rep(20,16), + rep(21,6),rep(22,16),rep(23,17), 24, rep(25,8), rep(26,6),rep(27,3), + rep(28,7),rep(29,9),rep(30,10),rep(31,4),rep(32,4),rep(33,6),rep(34,6), + rep(35,4), rep(36,5), rep(38,6), 39, 39, 40, 40, 40, 41, 43, 44, 45) +attributes(rcspline.eval(x, nk=3)) +attributes(rcspline.eval(x, nk=5)) +u <- c(rep(0,30), 1:4, rep(5,30)) +attributes(rcspline.eval(u)) +} +\keyword{regression} +\keyword{smooth} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/rcspline.plot.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/rcspline.plot.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,140 @@ +\name{rcspline.plot} +\alias{rcspline.plot} +\title{ + Plot Restricted Cubic Spline Function +} +\description{ + Provides plots of the estimated restricted cubic spline function + relating a single predictor to the response for a logistic or Cox + model. The \code{rcspline.plot} function does not allow for + interactions as do \code{\link[rms]{lrm}} and \code{\link[rms]{cph}}, but it can + provide detailed output for checking spline fits. This function uses + the \code{\link{rcspline.eval}}, \code{\link[rms]{lrm.fit}}, and Therneau's + \code{\link{coxph.fit}} functions and plots the estimated spline + regression and confidence limits, placing summary statistics on the + graph. If there are no adjustment variables, \code{rcspline.plot} can + also plot two alternative estimates of the regression function when + \code{model="logistic"}: proportions or logit proportions on grouped + data, and a nonparametric estimate. The nonparametric regression + estimate is based on smoothing the binary responses and taking the + logit transformation of the smoothed estimates, if desired. The + smoothing uses \code{\link{supsmu}}. +} +\usage{ +rcspline.plot(x,y,model=c("logistic", "cox", "ols"), xrange, event, nk=5, + knots=NULL, show=c("xbeta","prob"), adj=NULL, xlab, ylab, + ylim, plim=c(0,1), plotcl=TRUE, showknots=TRUE, add=FALSE, + subset, lty=1, noprint=FALSE, m, smooth=FALSE, bass=1, + main="auto", statloc) +} +\arguments{ + \item{x}{ + a numeric predictor + } + \item{y}{ + a numeric response. For binary logistic regression, \code{y} should + be either 0 or 1. + } + \item{model}{ + \code{"logistic"} or \code{"cox"}. For \code{"cox"}, uses the + \code{coxph.fit} function with \code{method="efron"} arguement set. + } + \item{xrange}{ + range for evaluating \code{x}, default is \var{f} and + \eqn{1 - \var{f}} quantiles of \code{x}, where + \eqn{\var{f} = \frac{10}{\max{(\var{n}, 200)}}}{\var{f} = 10/max(\var{n}, 200)} + } + \item{event}{ + event/censoring indicator if \code{model="cox"}. If \code{event} is + present, \code{model} is assumed to be \code{"cox"} + } + \item{nk}{ + number of knots + } + \item{knots}{ + knot locations, default based on quantiles of \code{x} (by + \code{\link{rcspline.eval}}) + } + \item{show}{ + \code{"xbeta"} or \code{"prob"} - what is plotted on \verb{y}-axis + } + \item{adj}{ + optional matrix of adjustment variables + } + \item{xlab}{ + \verb{x}-axis label, default is the \dQuote{label} attribute of + \code{x} + } + \item{ylab}{ + \verb{y}-axis label, default is the \dQuote{label} attribute of + \code{y} + } + \item{ylim}{ + \verb{y}-axis limits for logit or log hazard + } + \item{plim}{ + \verb{y}-axis limits for probability scale + } + \item{plotcl}{ + plot confidence limits + } + \item{showknots}{ + show knot locations with arrows + } + \item{add}{ + add this plot to an already existing plot + } + \item{subset}{ + subset of observations to process, e.g. \code{sex == "male"} + } + \item{lty}{ + line type for plotting estimated spline function + } + \item{noprint}{ + suppress printing regression coefficients and standard errors + } + \item{m}{ + for \code{model="logistic"}, plot grouped estimates with + triangles. Each group contains \code{m} ordered observations on + \code{x}. + } + \item{smooth}{ + plot nonparametric estimate if \code{model="logistic"} and + \code{adj} is not specified + } + \item{bass}{ + smoothing parameter (see \code{supsmu}) + } + \item{main}{ + main title, default is \code{"Estimated Spline Transformation"} + } + \item{statloc}{ + location of summary statistics. Default positioning by clicking left + mouse button where upper left corner of statistics should + appear. Alternative is \code{"ll"} to place below the graph on the + lower left, or the actual \code{x} and \code{y} coordinates. Use + \code{"none"} to suppress statistics. + } +} +\value{ + list with components (\samp{knots}, \samp{x}, \samp{xbeta}, + \samp{lower}, \samp{upper}) which are respectively the knot locations, + design matrix, linear predictor, and lower and upper confidence limits +} +\author{ + Frank Harrell \cr + Department of Biostatistics, Vanderbilt University \cr + \email{f.harrell@vanderbilt.edu} +} +\seealso{ + \code{\link[rms]{lrm}}, \code{\link[rms]{cph}}, \code{\link{rcspline.eval}}, + \code{\link[graphics]{plot}}, \code{\link{supsmu}}, + \code{\link[survival:survival-internal]{coxph.fit}}, + \code{\link[rms]{lrm.fit}} +} +\examples{ +#rcspline.plot(cad.dur, tvdlm, m=150) +#rcspline.plot(log10(cad.dur+1), tvdlm, m=150) +} +\keyword{regression} +\keyword{models} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/rcspline.restate.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/rcspline.restate.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,142 @@ +\name{rcspline.restate} +\alias{rcspline.restate} +\alias{rcsplineFunction} +\title{ + Re-state Restricted Cubic Spline Function +} +\description{ + This function re-states a restricted cubic spline function in + the un-linearly-restricted form. Coefficients for that form are + returned, along with an \R functional representation of this function + and a LaTeX character representation of the function. + \code{rcsplineFunction} is a fast function that creates a function to + compute a restricted cubic spline function with given coefficients and + knots, without reformatting the function to be pretty (i.e., into + unrestricted form). +} +\usage{ +rcspline.restate(knots, coef, + type=c("ordinary","integral"), + x="X", lx=nchar(x), + norm=2, columns=65, before="& &", after="\\\\", + begin="", nbegin=0, digits=max(8, .Options$digits)) + +rcsplineFunction(knots, coef, norm=2) +} +\arguments{ + \item{knots}{ + vector of knots used in the regression fit + } + \item{coef}{ + vector of coefficients from the fit. If the length of \code{coef} is + \eqn{\var{k}-1}, where \var{k} is equal to the \code{length(knots)}, the + first coefficient must be for the linear term and remaining + \eqn{k-2} coefficients must be for the constructed terms (e.g., from + \code{rcspline.eval}). If the length of \code{coef} is \var{k}, an + intercept is assumed to be in the first element (or a zero is + prepended to \code{coef} for \code{rcsplineFunction}). + } + \item{type}{ + The default is to represent the cubic spline function corresponding + to the coefficients and knots. Set \code{type = "integral"} to + instead represent its anti-derivative. + } + \item{x}{ + a character string to use as the variable name in the LaTeX expression + for the formula. + } + \item{lx}{ + length of \code{x} to count with respect to \code{columns}. Default + is length of character string contained by \code{x}. You may want to + set \code{lx} smaller than this if it includes non-printable LaTeX + commands. + } + \item{norm}{ + normalization that was used in deriving the original nonlinear terms + used in the fit. See \code{rcspline.eval} for definitions. + } + \item{columns}{ + maximum number of symbols in the LaTeX expression to allow before + inserting a newline (\samp{\\\\}) command. Set to a very large + number to keep text all on one line. + } + \item{before}{ + text to place before each line of LaTeX output. Use \samp{"& &"} + for an equation array environment in LaTeX where you want to have a + left-hand prefix e.g. \samp{"f(X) & = &"} or using + \samp{"\\lefteqn"}. + } + \item{after}{ + text to place at the end of each line of output. + } + \item{begin}{ + text with which to start the first line of output. Useful when + adding LaTeX output to part of an existing formula + } + \item{nbegin}{ + number of columns of printable text in \code{begin} + } + \item{digits}{ + number of significant digits to write for coefficients and knots + } +} +\value{ + \code{rcspline.restate} returns a vector of coefficients. The + coefficients are un-normalized and two coefficients are added that are + linearly dependent on the other coefficients and knots. The vector of + coefficients has four attributes. \code{knots} is a vector of knots, + \code{latex} is a vector of text strings with the LaTeX + representation of the formula. \code{columns.used} is the number of + columns used in the output string since the last newline command. + \code{function} is an \R function, which is also return in character + string format as the \code{text} attribute. \code{rcsplineFunction} + returns an \R function with arguments \code{x} (a user-supplied + numeric vector at which to evaluate the function), and some + automatically-supplied other arguments. +} +\author{ + Frank Harrell + \cr + Department of Biostatistics, Vanderbilt University + \cr + \email{f.harrell@vanderbilt.edu} +} +\seealso{ + \code{\link{rcspline.eval}}, \code{\link{ns}}, \code{\link[rms]{rcs}}, + \code{\link{latex}}, \code{\link{Function.transcan}} +} +\examples{ +set.seed(1) +x <- 1:100 +y <- (x - 50)^2 + rnorm(100, 0, 50) +plot(x, y) +xx <- rcspline.eval(x, inclx=TRUE, nk=4) +knots <- attr(xx, "knots") +coef <- lsfit(xx, y)$coef +options(digits=4) +# rcspline.restate must ignore intercept +w <- rcspline.restate(knots, coef[-1], x="{\\\\rm BP}") +# could also have used coef instead of coef[-1], to include intercept +cat(attr(w,"latex"), sep="\n") + + +xtrans <- eval(attr(w, "function")) +# This is an S function of a single argument +lines(x, coef[1] + xtrans(x), type="l") +# Plots fitted transformation + +xtrans <- rcsplineFunction(knots, coef) +xtrans +lines(x, xtrans(x), col='blue') + + +#x <- blood.pressure +xx.simple <- cbind(x, pmax(x-knots[1],0)^3, pmax(x-knots[2],0)^3, + pmax(x-knots[3],0)^3, pmax(x-knots[4],0)^3) +pred.value <- coef[1] + xx.simple \%*\% w +plot(x, pred.value, type='l') # same as above +} +\keyword{regression} +\keyword{interface} +\keyword{character} +% Converted by Sd2Rd version 1.21. |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/reShape.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/reShape.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,200 @@\n+\\name{reShape}\n+\\alias{reShape}\n+\\title{Reshape Matrices and Serial Data}\n+\\description{\n+ If the first argument is a matrix, \\code{reShape} strings out its values\n+ and creates row and column vectors specifying the row and column each\n+ element came from. This is useful for sending matrices to Trellis\n+ functions, for analyzing or plotting results of \\code{table} or\n+ \\code{crosstabs}, or for reformatting serial data stored in a matrix (with\n+ rows representing multiple time points) into vectors. The number of\n+ observations in the new variables will be the product of the number of\n+ rows and number of columns in the input matrix. If the first\n+ argument is a vector, the \\code{id} and \\code{colvar} variables are used to\n+ restructure it into a matrix, with \\code{NA}s for elements that corresponded\n+ to combinations of \\code{id} and \\code{colvar} values that did not exist in the\n+ data. When more than one vector is given, multiple matrices are\n+ created. This is useful for restructuring irregular serial data into\n+ regular matrices. It is also useful for converting data produced by\n+ \\code{expand.grid} into a matrix (see the last example). The number of\n+ rows of the new matrices equals the number of unique values of \\code{id},\n+ and the number of columns equals the number of unique values of\n+ \\code{colvar}.\n+\n+ When the first argument is a vector and the \\code{id} is a data frame\n+ (even with only one variable),\n+ \\code{reShape} will produce a data frame, and the unique groups are\n+ identified by combinations of the values of all variables in \\code{id}.\n+ If a data frame \\code{constant} is specified, the variables in this data\n+ frame are assumed to be constant within combinations of \\code{id}\n+ variables (if not, an arbitrary observation in \\code{constant} will be\n+ selected for each group). A row of \\code{constant} corresponding to the\n+ target \\code{id} combination is then carried along when creating the\n+ data frame result.\n+\n+ A different behavior of \\code{reShape} is achieved when \\code{base} and \\code{reps}\n+ are specified. In that case \\code{x} must be a list or data frame, and\n+ those data are assumed to contain one or more non-repeating\n+ measurements (e.g., baseline measurements) and one or more repeated\n+ measurements represented by variables named by pasting together the\n+ character strings in the vector \\code{base} with the integers 1, 2, \\dots,\n+ \\code{reps}. The input data are rearranged by repeating each value of the\n+ baseline variables \\code{reps} times and by transposing each observation\'s\n+ values of one of the set of repeated measurements as \\code{reps}\n+ observations under the variable whose name does not have an integer\n+ pasted to the end. if \\code{x} has a \\code{row.names} attribute, those\n+ observation identifiers are each repeated \\code{reps} times in the output\n+ object. See the last example.\n+}\n+\\usage{\n+reShape(x, \\dots, id, colvar, base, reps, times=1:reps,\n+ timevar=\'seqno\', constant=NULL)\n+}\n+\\arguments{\n+ \\item{x}{\n+ a matrix or vector, or, when \\code{base} is specified, a list or data frame\n+ }\n+ \\item{\\dots}{\n+ other optional vectors, if \\code{x} is a vector\n+ }\n+ \\item{id}{\n+ A numeric, character, category, or factor variable containing subject\n+ identifiers, or a data frame of such variables that in combination form\n+ groups of interest. Required if \\code{x} is a vector, ignored otherwise.\n+ }\n+ \\item{colvar}{\n+ A numeric, character, category, or factor variable containing column\n+ identifiers. \\code{colvar} is using a "time of data collection" variable.\n+ Required if \\code{x} is a vector, ignored otherwise.\n+ }\n+ \\item{base}{\n+ vector of character strings containing base names of repeated\n+ measurements\n+ }\n+ \\item{reps}{\n+ number of times variables named in \\code{base} are repeated. This must be\n+ a constant.\n+ }\n+ \\item{times}{\n+ when \\code{base} is g'..b"rting \\code{dimnames} to vectors, the resulting variables are\n+ numeric if all elements of the matrix dimnames can be converted to\n+ numeric, otherwise the corresponding row or column variable remains\n+ character. When the \\code{dimnames} if \\code{x} have a \\code{names} attribute, those\n+ two names become the new variable names. If \\code{x} is a vector and\n+ another vector is also given (in \\code{\\dots}), the matrices in the resulting\n+ list are named the same as the input vector calling arguments. You\n+ can specify customized names for these on-the-fly by using\n+ e.g. \\code{reShape(X=x, Y=y, id= , colvar= )}. The new names will then be\n+ \\code{X} and \\code{Y} instead of \\code{x} and \\code{y}. A new variable named \\code{seqnno} is\n+ also added to the resulting object. \\code{seqno} indicates the sequential\n+ repeated measurement number. When \\code{base} and \\code{times} are\n+ specified, this new variable is named the character value of \\code{timevar} and the values\n+ are given by a table lookup into the vector \\code{times}.\n+}\n+\\author{\n+Frank Harrell\\cr\n+Department of Biostatistics\\cr\n+Vanderbilt University School of Medicine\\cr\n+\\email{f.harrell@vanderbilt.edu}\n+}\n+\\seealso{\n+ \\code{\\link[stats]{reshape}}, \\code{\\link[base:vector]{as.vector}},\n+ \\code{\\link[base]{matrix}}, \\code{\\link[base]{dimnames}},\n+ \\code{\\link[base]{outer}}, \\code{\\link[base]{table}}\n+}\n+\\examples{\n+set.seed(1)\n+Solder <- factor(sample(c('Thin','Thick'),200,TRUE),c('Thin','Thick'))\n+Opening <- factor(sample(c('S','M','L'), 200,TRUE),c('S','M','L'))\n+\n+tab <- table(Opening, Solder)\n+tab\n+reShape(tab)\n+# attach(tab) # do further processing\n+\n+# An example where a matrix is created from irregular vectors\n+follow <- data.frame(id=c('a','a','b','b','b','d'),\n+ month=c(1, 2, 1, 2, 3, 2),\n+ cholesterol=c(225,226, 320,319,318, 270))\n+follow\n+attach(follow)\n+reShape(cholesterol, id=id, colvar=month)\n+detach('follow')\n+# Could have done :\n+# reShape(cholesterol, triglyceride=trig, id=id, colvar=month)\n+\n+# Create a data frame, reshaping a long dataset in which groups are\n+# formed not just by subject id but by combinations of subject id and\n+# visit number. Also carry forward a variable that is supposed to be\n+# constant within subject-visit number combinations. In this example,\n+# it is not constant, so an arbitrary visit number will be selected.\n+w <- data.frame(id=c('a','a','a','a','b','b','b','d','d','d'),\n+ visit=c( 1, 1, 2, 2, 1, 1, 2, 2, 2, 2),\n+ k=c('A','A','B','B','C','C','D','E','F','G'),\n+ var=c('x','y','x','y','x','y','y','x','y','z'),\n+ val=1:10)\n+with(w,\n+ reShape(val, id=data.frame(id,visit),\n+ constant=data.frame(k), colvar=var))\n+\n+# Get predictions from a regression model for 2 systematically\n+# varying predictors. Convert the predictions into a matrix, with\n+# rows corresponding to the predictor having the most values, and\n+# columns corresponding to the other predictor\n+# d <- expand.grid(x2=0:1, x1=1:100)\n+# pred <- predict(fit, d)\n+# reShape(pred, id=d$x1, colvar=d$x2) # makes 100 x 2 matrix\n+\n+\n+# Reshape a wide data frame containing multiple variables representing\n+# repeated measurements (3 repeats on 2 variables; 4 subjects)\n+set.seed(33)\n+n <- 4\n+w <- data.frame(age=rnorm(n, 40, 10),\n+ sex=sample(c('female','male'), n,TRUE),\n+ sbp1=rnorm(n, 120, 15),\n+ sbp2=rnorm(n, 120, 15),\n+ sbp3=rnorm(n, 120, 15),\n+ dbp1=rnorm(n, 80, 15),\n+ dbp2=rnorm(n, 80, 15),\n+ dbp3=rnorm(n, 80, 15), row.names=letters[1:n])\n+options(digits=3)\n+w\n+\n+\n+u <- reShape(w, base=c('sbp','dbp'), reps=3)\n+u\n+reShape(w, base=c('sbp','dbp'), reps=3, timevar='week', times=c(0,3,12))\n+}\n+\\keyword{manip}\n+\\keyword{array}\n+\\concept{trellis}\n+\\concept{lattice}\n+\\concept{repeated measures}\n+\\concept{longitudinal data}\n" |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/redun.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/redun.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,116 @@ +\name{redun} +\alias{redun} +\alias{print.redun} +\title{Redundancy Analysis} +\description{ +Uses flexible parametric additive models (see \code{\link{areg}} and its +use of regression splines) to +determine how well each variable can be predicted from the remaining +variables. Variables are dropped in a stepwise fashion, removing the +most predictable variable at each step. The remaining variables are used +to predict. The process continues until no variable still in the list +of predictors can be predicted with an \eqn{R^2} or adjusted \eqn{R^2} +of at least \code{r2} or until dropping the variable with the highest +\eqn{R^2} (adjusted or ordinary) would cause a variable that was dropped +earlier to no longer be predicted at least at the \code{r2} level from +the now smaller list of predictors. +} +\usage{ +redun(formula, data=NULL, subset=NULL, r2 = 0.9, + type = c("ordinary", "adjusted"), nk = 3, tlinear = TRUE, + allcat=FALSE, minfreq=0, iterms=FALSE, pc=FALSE, pr = FALSE, ...) +\method{print}{redun}(x, digits=3, long=TRUE, ...) +} +\arguments{ + \item{formula}{a formula. Enclose a variable in \code{I()} to force + linearity.} + \item{data}{a data frame} + \item{subset}{usual subsetting expression} + \item{r2}{ordinary or adjusted \eqn{R^2} cutoff for redundancy} + \item{type}{specify \code{"adjusted"} to use adjusted \eqn{R^2}} + \item{nk}{number of knots to use for continuous variables. Use + \code{nk=0} to force linearity for all variables.} + \item{tlinear}{set to \code{FALSE} to allow a variable to be automatically + nonlinearly transformed (see \code{areg}) while being predicted. By + default, only continuous variables on the right hand side (i.e., while + they are being predictors) are automatically transformed, using + regression splines. Estimating transformations for target (dependent) + variables causes more overfitting than doing so for predictors.} + \item{allcat}{set to \code{TRUE} to ensure that all categories of + categorical variables having more than two categories are redundant + (see details below)} + \item{minfreq}{For a binary or categorical variable, there must be at + least two categories with at least \code{minfreq} observations or + the variable will be dropped and not checked for redundancy against + other variables. \code{minfreq} also specifies the minimum + frequency of a category or its complement + before that category is considered when \code{allcat=TRUE}.} + \item{iterms}{set to \code{TRUE} to consider derived terms (dummy + variables and nonlinear spline components) as separate variables. + This will perform a redundancy analysis on pieces of the variables.} + \item{pc}{if \code{iterms=TRUE} you can set \code{pc} to \code{TRUE} + to replace the submatrix of terms corresponding to each variable + with the orthogonal principal components before doing the redundancy + analysis. The components are based on the correlation matrix.} + \item{pr}{set to \code{TRUE} to monitor progress of the stepwise algorithm} + \item{\dots}{arguments to pass to \code{dataframeReduce} to remove + "difficult" variables from \code{data} if \code{formula} is + \code{~.} to use all variables in \code{data} (\code{data} must be + specified when these arguments are used). Ignored for \code{print}.} + \item{x}{an object created by \code{redun}} + \item{digits}{number of digits to which to round \eqn{R^2} values when + printing} + \item{long}{set to \code{FALSE} to prevent the \code{print} method + from printing the \eqn{R^2} history and the original \eqn{R^2} with + which each variable can be predicted from ALL other variables.} +} +\value{an object of class \code{"redun"}} +\details{ +A categorical variable is deemed +redundant if a linear combination of dummy variables representing it can +be predicted from a linear combination of other variables. For example, +if there were 4 cities in the data and each city's rainfall was also +present as a variable, with virtually the same rainfall reported for all +observations for a city, city would be redundant given rainfall (or +vice-versa; the one declared redundant would be the first one in the +formula). If two cities had the same rainfall, \code{city} might be +declared redundant even though tied cities might be deemed non-redundant +in another setting. To ensure that all categories may be predicted well +from other variables, use the \code{allcat} option. To ignore +categories that are too infrequent or too frequent, set \code{minfreq} +to a nonzero integer. When the number of observations in the category +is below this number or the number of observations not in the category +is below this number, no attempt is made to predict observations being +in that category individually for the purpose of redundancy detection.} +\author{ +Frank Harrell +\cr +Department of Biostatistics +\cr +Vanderbilt University +\cr +\email{f.harrell@vanderbilt.edu} +} +\seealso{\code{\link{areg}}, \code{\link{dataframeReduce}}, + \code{\link{transcan}}, \code{\link{varclus}}, + \code{\link[subselect]{genetic}}} +\examples{ +set.seed(1) +n <- 100 +x1 <- runif(n) +x2 <- runif(n) +x3 <- x1 + x2 + runif(n)/10 +x4 <- x1 + x2 + x3 + runif(n)/10 +x5 <- factor(sample(c('a','b','c'),n,replace=TRUE)) +x6 <- 1*(x5=='a' | x5=='c') +redun(~x1+x2+x3+x4+x5+x6, r2=.8) +redun(~x1+x2+x3+x4+x5+x6, r2=.8, minfreq=40) +redun(~x1+x2+x3+x4+x5+x6, r2=.8, allcat=TRUE) +# x5 is no longer redundant but x6 is +} +\keyword{smooth} +\keyword{regression} +\keyword{multivariate} +\keyword{methods} +\keyword{models} +\concept{data reduction} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/rlegend.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/rlegend.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,49 @@ +\name{rlegend} +\alias{rlegend} +\alias{rlegendg} +\title{Special Version of legend for R} +\description{ + \code{rlegend} is a version of \code{\link{legend}} for \R that implements + \code{plot=FALSE}, adds \code{grid=TRUE}, and defaults \code{lty}, + \code{lwd}, \code{pch} to \code{NULL} and checks for \code{length>0} + rather than \code{missing()}, so it's easier to deal with + non-applicable parameters. But when \pkg{grid} is in effect, the + preferred function to use is \code{rlegendg}, which calls the + \pkg{lattice} \code{\link{draw.key}} function. +} +\usage{ +rlegend(x, y, legend, fill, col = "black", lty = NULL, lwd = NULL, + pch = NULL, angle = NULL, density = NULL, bty = "o", + bg = par("bg"), pt.bg = NA, cex = 1, xjust = 0, yjust = 1, + x.intersp = 1, y.intersp = 1, adj = 0, text.width = NULL, + merge = do.lines && has.pch, trace = FALSE, ncol = 1, + horiz = FALSE, plot = TRUE, grid = FALSE, \dots) + +rlegendg(x, y, legend, col=pr$col[1], lty=NULL, + lwd=NULL, pch=NULL, cex=pr$cex[1], other=NULL) +} +\arguments{ + \item{x,y,legend,fill,col,lty,lwd,pch,angle,density,bty,bg,pt.bg,cex,xjust, + yjust,x.intersp,y.intersp,adj,text.width,merge,trace,ncol,horiz}{ + see \code{\link{legend}} + } + \item{plot}{set to \code{FALSE} to suppress drawing the legend. This + is used the compute the size needed for when the legend is drawn + with a later call to \code{rlegend}. + } + \item{grid}{set to \code{TRUE} if the \pkg{grid} package is in effect} + \item{\dots}{see \code{\link{legend}}} + \item{other}{ + a list containing other arguments to pass to + \code{draw.key}. See the help file for \code{\link{xyplot}}. + } +} +\value{ + a list with elements \code{rect} and \code{text}. \code{rect} has + elements \code{w, h, left, top} with size/position information. +} +\author{Frank Harrell and R-Core} +\seealso{ + \code{\link{legend}}, \code{\link[lattice]{draw.key}}, \code{\link{xyplot}} +} +\keyword{aplot} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/rm.boot.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/rm.boot.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b"@@ -0,0 +1,553 @@\n+\\name{rm.boot}\n+\\alias{rm.boot}\n+\\alias{plot.rm.boot}\n+\\title{\n+ Bootstrap Repeated Measurements Model\n+}\n+\\description{\n+ For a dataset containing a time variable, a scalar response variable,\n+ and an optional subject identification variable, obtains least squares\n+ estimates of the coefficients of a restricted cubic spline function or\n+ a linear regression in time after adjusting for subject effects\n+ through the use of subject dummy variables. Then the fit is\n+ bootstrapped \\code{B} times, either by treating time and subject ID as\n+ fixed (i.e., conditioning the analysis on them) or as random\n+ variables. For the former, the residuals from the original model fit\n+ are used as the basis of the bootstrap distribution. For the latter,\n+ samples are taken jointly from the time, subject ID, and response\n+ vectors to obtain unconditional distributions.\n+ \n+ If a subject \\code{id} variable is given, the bootstrap sampling will\n+ be based on samples with replacement from subjects rather than from\n+ individual data points. In other words, either none or all of a given\n+ subject's data will appear in a bootstrap sample. This cluster\n+ sampling takes into account any correlation structure that might exist\n+ within subjects, so that confidence limits are corrected for\n+ within-subject correlation. Assuming that ordinary least squares\n+ estimates, which ignore the correlation structure, are consistent\n+ (which is almost always true) and efficient (which would not be true\n+ for certain correlation structures or for datasets in which the number\n+ of observation times vary greatly from subject to subject), the\n+ resulting analysis will be a robust, efficient repeated measures\n+ analysis for the one-sample problem.\n+ \n+ Predicted values of the fitted models are evaluated by default at a\n+ grid of 100 equally spaced time points ranging from the minimum to\n+ maximum observed time points. Predictions are for the average subject\n+ effect. Pointwise confidence intervals are optionally computed\n+ separately for each of the points on the time grid. However,\n+ simultaneous confidence regions that control the level of confidence\n+ for the entire regression curve lying within a band are often more\n+ appropriate, as they allow the analyst to draw conclusions about\n+ nuances in the mean time response profile that were not stated\n+ apriori. The method of \\cite{Tibshirani (1997)} is used to easily\n+ obtain simultaneous confidence sets for the set of coefficients of the\n+ spline or linear regression function as well as the average intercept\n+ parameter (over subjects). Here one computes the objective criterion\n+ (here both the -2 log likelihood evaluated at the bootstrap estimate\n+ of beta but with respect to the original design matrix and response\n+ vector, and the sum of squared errors in predicting the original\n+ response vector) for the original fit as well as for all of the\n+ bootstrap fits. The confidence set of the regression coefficients is\n+ the set of all coefficients that are associated with objective\n+ function values that are less than or equal to say the 0.95 quantile\n+ of the vector of \\eqn{\\code{B} + 1} objective function values. For\n+ the coefficients satisfying this condition, predicted curves are\n+ computed at the time grid, and minima and maxima of these curves are\n+ computed separately at each time point toderive the final\n+ simultaneous confidence band.\n+ \n+ By default, the log likelihoods that are computed for obtaining the\n+ simultaneous confidence band assume independence within subject. This\n+ will cause problems unless such log likelihoods have very high rank\n+ correlation with the log likelihood allowing for dependence. To allow\n+ for correlation or to estimate the correlation function, see the\n+ \\code{cor.pattern} argument below.\n+}\n+\\usage{\n+rm.boot(time, y, id=seq(along=time), subset,\n+ plot.individual=FALSE,\n+ bootst"..b"ction(n, p = 1, u = rep(0, p), S = diag(p)) {\n+ Z <- matrix(rnorm(n * p), p, n)\n+ t(u + t(chol(S)) \\%*\\% Z)\n+}\n+\n+\n+n <- 20 # Number of subjects\n+sub <- .5*(1:n) # Subject effects\n+\n+\n+# Specify functional form for time trend and compute non-stochastic component\n+times <- seq(0, 1, by=.1)\n+g <- function(times) 5*pmax(abs(times-.5),.3)\n+ey <- g(times)\n+\n+\n+# Generate multivariate normal errors for 20 subjects at 11 times\n+# Assume equal correlations of rho=.7, independent subjects\n+\n+\n+nt <- length(times)\n+rho <- .7\n+\n+\n+ \n+set.seed(19) \n+errors <- mvrnorm(n, p=nt, S=diag(rep(1-rho,nt))+rho)\n+# Note: first random number seed used gave rise to mean(errors)=0.24!\n+\n+\n+# Add E[Y], error components, and subject effects\n+y <- matrix(rep(ey,n), ncol=nt, byrow=TRUE) + errors + \n+ matrix(rep(sub,nt), ncol=nt)\n+\n+\n+# String out data into long vectors for times, responses, and subject ID\n+y <- as.vector(t(y))\n+times <- rep(times, n)\n+id <- sort(rep(1:n, nt))\n+\n+\n+# Show lowess estimates of time profiles for individual subjects\n+f <- rm.boot(times, y, id, plot.individual=TRUE, B=25, cor.pattern='estimate',\n+ smoother=lowess, bootstrap.type='x fixed', nk=6)\n+# In practice use B=400 or 500\n+# This will compute a dependent-structure log-likelihood in addition\n+# to one assuming independence. By default, the dep. structure\n+# objective will be used by the plot method (could have specified rho=.7)\n+# NOTE: Estimating the correlation pattern from the residual does not\n+# work in cases such as this one where there are large subject effects\n+\n+\n+# Plot fits for a random sample of 10 of the 25 bootstrap fits\n+plot(f, individual.boot=TRUE, ncurves=10, ylim=c(6,8.5))\n+\n+\n+# Plot pointwise and simultaneous confidence regions\n+plot(f, pointwise.band=TRUE, col.pointwise=1, ylim=c(6,8.5))\n+\n+\n+# Plot population response curve at average subject effect\n+ts <- seq(0, 1, length=100)\n+lines(ts, g(ts)+mean(sub), lwd=3)\n+\n+\n+\\dontrun{\n+#\n+# Handle a 2-sample problem in which curves are fitted \n+# separately for males and females and we wish to estimate the\n+# difference in the time-response curves for the two sexes. \n+# The objective criterion will be taken by plot.rm.boot as the \n+# total of the two sums of squared errors for the two models\n+#\n+knots <- rcspline.eval(c(time.f,time.m), nk=6, knots.only=TRUE)\n+# Use same knots for both sexes, and use a times vector that \n+# uses a range of times that is included in the measurement \n+# times for both sexes\n+#\n+tm <- seq(max(min(time.f),min(time.m)),\n+ min(max(time.f),max(time.m)),length=100)\n+\n+\n+f.female <- rm.boot(time.f, bp.f, id.f, knots=knots, times=tm)\n+f.male <- rm.boot(time.m, bp.m, id.m, knots=knots, times=tm)\n+plot(f.female)\n+plot(f.male)\n+# The following plots female minus male response, with \n+# a sequence of shaded confidence band for the difference\n+plot(f.female,f.male,multi=TRUE)\n+\n+\n+# Do 1000 simulated analyses to check simultaneous coverage \n+# probability. Use a null regression model with Gaussian errors\n+\n+\n+n.per.pt <- 30\n+n.pt <- 10\n+\n+\n+null.in.region <- 0\n+\n+\n+for(i in 1:1000) {\n+ y <- rnorm(n.pt*n.per.pt)\n+ time <- rep(1:n.per.pt, n.pt)\n+# Add the following line and add ,id=id to rm.boot to use clustering\n+# id <- sort(rep(1:n.pt, n.per.pt))\n+# Because we are ignoring patient id, this simulation is effectively\n+# using 1 point from each of 300 patients, with times 1,2,3,,,30 \n+\n+\n+ f <- rm.boot(time, y, B=500, nk=5, bootstrap.type='x fixed')\n+ g <- plot(f, ylim=c(-1,1), pointwise=FALSE)\n+ null.in.region <- null.in.region + all(g$lower<=0 & g$upper>=0)\n+ prn(c(i=i,null.in.region=null.in.region))\n+}\n+\n+\n+# Simulation Results: 905/1000 simultaneous confidence bands \n+# fully contained the horizontal line at zero\n+}\n+}\n+\\keyword{regression}\n+\\keyword{multivariate}\n+\\keyword{htest}\n+\\keyword{hplot}\n+\\concept{bootstrap}\n+\\concept{repeated measures}\n+\\concept{longitudinal data}\n" |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/samplesize.bin.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/samplesize.bin.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,64 @@ +\name{samplesize.bin} +\alias{samplesize.bin} +\title{ + Sample Size for 2-sample Binomial +} +\description{ + Computes sample size(s) for 2-sample binomial problem given vector or + scalar probabilities in the two groups. +} +\usage{ +samplesize.bin(alpha, beta, pit, pic, rho=0.5) +} +\arguments{ + \item{alpha}{ + scalar ONE-SIDED test size, or two-sided size/2 + } + \item{beta}{ + scalar or vector of powers + } + \item{pit}{ + hypothesized treatment probability of success + } + \item{pic}{ + hypothesized control probability of success + } + \item{rho}{ + proportion of the sample devoted to treated group (\eqn{0 <\code{rho} < 1}) + } +} +\value{ + TOTAL sample size(s) +} +\section{AUTHOR}{ + Rick Chappell\cr + Dept. of Statistics and Human Oncology\cr + University of Wisconsin at Madison\cr + \email{chappell@stat.wisc.edu} +} +\examples{ +alpha <- .05 +beta <- c(.70,.80,.90,.95) + + +# N1 is a matrix of total sample sizes whose +# rows vary by hypothesized treatment success probability and +# columns vary by power +# See Meinert's book for formulae. + + +N1 <- samplesize.bin(alpha, beta, pit=.55, pic=.5) +N1 <- rbind(N1, samplesize.bin(alpha, beta, pit=.60, pic=.5)) +N1 <- rbind(N1, samplesize.bin(alpha, beta, pit=.65, pic=.5)) +N1 <- rbind(N1, samplesize.bin(alpha, beta, pit=.70, pic=.5)) +attr(N1,"dimnames") <- NULL + + +#Accounting for 5\% noncompliance in the treated group +inflation <- (1/.95)**2 +print(round(N1*inflation+.5,0)) +} +\keyword{htest} +\keyword{category} +\concept{study design} +\concept{power} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/sasxport.get.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/sasxport.get.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,166 @@ +\name{sasxport.get} +\alias{sasxport.get} +\alias{sasdsLabels} +\title{Enhanced Importing of SAS Transport Files using read.xport} +\description{ +Uses the \code{read.xport} and \code{lookup.xport} functions in the +\code{foreign} library to import SAS datasets. SAS date, time, and +date/time variables are converted respectively to \code{Date}, +POSIX, or \code{POSIXct} objects in \R, +variable names are converted to lower case, SAS labels are associated +with variables, and (by default) integer-valued variables are converted +from storage mode \code{double} to \code{integer}. If the user ran +\code{PROC FORMAT CNTLOUT=} in SAS and included the resulting dataset in +the SAS version 5 transport file, variables having customized formats +that do not include any ranges (i.e., variables having standard +\code{PROC FORMAT; VALUE} label formats) will have their format labels looked +up, and these variables are converted to S \code{factor}s. + +For those users having access to SAS, \code{method='csv'} is preferred +when importing several SAS datasets. +Run SAS macro \code{exportlib.sas} available from +\url{http://biostat.mc.vanderbilt.edu/twiki/pub/Main/Hmisc/exportlib.sas} +to convert all SAS datasets in a SAS data library (from any engine +supported by your system) into \code{CSV} files. If any customized +formats are used, it is assumed that the \code{PROC FORMAT CNTLOUT=} +dataset is in the data library as a regular SAS dataset, as above. + +\code{SASdsLabels} reads a file containing \code{PROC CONTENTS} +printed output to parse dataset labels, assuming that \code{PROC +CONTENTS} was run on an entire library. +} +\usage{ +sasxport.get(file, force.single = TRUE, + method=c('read.xport','dataload','csv'), formats=NULL, allow=NULL, + out=NULL, keep=NULL, drop=NULL, as.is=0.5, FUN=NULL) +sasdsLabels(file) +} +\arguments{ + \item{file}{name of a file containing the SAS transport file. + \code{file} may be a URL beginning with \code{http://}. For +\code{sasdsLabels}, \code{file} is the name of a file containing a +\code{PROC CONTENTS} output listing. For \code{method='csv'}, +\code{file} is the name of the directory containing all the \code{CSV} +files created by running the \code{exportlib} SAS macro. +} + \item{force.single}{set to \code{FALSE} to keep integer-valued + variables not exceeding \eqn{2^31-1} in value from being converted to + \code{integer} storage mode} + \item{method}{set to \code{"dataload"} if you have the \code{dataload} + executable installed and want to use it instead of + \code{read.xport}. This seems to correct some errors in which + rarely some factor variables are always missing when read by + \code{read.xport} when in fact they have some non-missing values.} + \item{formats}{a data frame or list (like that created by + \code{read.xport}) containing \code{PROC FORMAT} + output, if such output is not stored in the main transport file.} + \item{allow}{a vector of characters allowed by \R that should not be +converted to periods in variable names. By default, underscores in +variable names are converted to periods as with \R before version 1.9.} + \item{out}{a character string specifying a directory in which to write + separate \R \code{save} files (\code{.rda} files) for each regular + dataset. Each file and the data frame inside it is named with the + SAS dataset name translated to lower case and with underscores + changed to periods. The default \code{NULL} value of \code{out} + results in a data frame or a list of data frames being returned. + When \code{out} is given, \code{sasxport.get} returns only metadata (see + below), invisibly. + \code{out} only works with \code{methods='csv'}. \code{out} should + not have a trailing slash.} + \item{keep}{a vector of names of SAS datasets to process (original SAS + upper case names). Must include \code{PROC FORMAT} dataset if it + exists, and if the kept datasets use any of its value label formats.} + \item{drop}{a vector of names of SAS datasets to ignore (original SAS + upper case names)} + \item{as.is}{ + SAS character variables are converted to S factor + objects if \code{as.is=FALSE} or if \code{as.is} is a number between + 0 and 1 inclusive and the number of unique values of the variable is + less than the number of observations (\code{n}) times \code{as.is}. + The default if \code{as.is} is .5, so character variables are + converted to factors only if they have fewer than \code{n/2} unique + values. The primary purpose of this is to keep unique + identification variables as character values in the data frame + instead of using more space to store both the integer factor codes + and the factor labels. + } + \item{FUN}{an optional function that will be run on each data frame + created, when \code{method='csv'} and \code{out} are specified. The + result of all the \code{FUN} calls is made into a list corresponding + to the SAS datasets that are read. This list is the \code{FUN} + attribute of the result returned by \code{sasxport.get}. + } +} +\value{ + If there is more than one dataset in the transport file other than the + \code{PROC FORMAT} file, the result is a list of data frames + containing all the non-\code{PROC FORMAT} datasets. Otherwise the + result is the single data frame. There is an exception if \code{out} + is specified; that causes separate \R \code{save} files to be written + and the returned value to be a list corresponding to the SAS datasets, + with key \code{PROC CONTENTS} information in a data frame making up + each part of the list. + \code{sasdsLabels} returns a named + vector of dataset labels, with names equal to the dataset names. +} +\details{See \code{\link{contents.list}} for a way to print the +directory of SAS datasets when more than one was imported.} +\author{Frank E Harrell Jr} +\seealso{\code{\link[foreign]{read.xport}},\code{\link{label}},\code{\link{sas.get}}, + \code{\link{Dates}},\code{\link{DateTimeClasses}}, + \code{\link[foreign]{lookup.xport}},\code{\link{contents}},\code{\link{describe}}} +\examples{ +\dontrun{ +# SAS code to generate test dataset: +# libname y SASV5XPT "test2.xpt"; +# +# PROC FORMAT; VALUE race 1=green 2=blue 3=purple; RUN; +# PROC FORMAT CNTLOUT=format;RUN; * Name, e.g. 'format', unimportant; +# data test; +# LENGTH race 3 age 4; +# age=30; label age="Age at Beginning of Study"; +# race=2; +# d1='3mar2002'd ; +# dt1='3mar2002 9:31:02'dt; +# t1='11:13:45't; +# output; +# +# age=31; +# race=4; +# d1='3jun2002'd ; +# dt1='3jun2002 9:42:07'dt; +# t1='11:14:13't; +# output; +# format d1 mmddyy10. dt1 datetime. t1 time. race race.; +# run; +# data z; LENGTH x3 3 x4 4 x5 5 x6 6 x7 7 x8 8; +# DO i=1 TO 100; +# x3=ranuni(3); +# x4=ranuni(5); +# x5=ranuni(7); +# x6=ranuni(9); +# x7=ranuni(11); +# x8=ranuni(13); +# output; +# END; +# DROP i; +# RUN; +# PROC MEANS; RUN; +# PROC COPY IN=work OUT=y;SELECT test format z;RUN; *Creates test2.xpt; +w <- sasxport.get('test2.xpt') +# To use an existing copy of test2.xpt available on the web: +w <- sasxport.get('http://biostat.mc.vanderbilt.edu/wiki/pub/Main/Hmisc/test2.xpt') + +describe(w$test) # see labels, format names for dataset test +# Note: if only one dataset (other than format) had been exported, +# just do describe(w) as sasxport.get would not create a list for that +lapply(w, describe)# see descriptive stats for both datasets +contents(w$test) # another way to see variable attributes +lapply(w, contents)# show contents of both datasets +options(digits=7) # compare the following matrix with PROC MEANS output +t(sapply(w$z, function(x) + c(Mean=mean(x),SD=sqrt(var(x)),Min=min(x),Max=max(x)))) +} +} +\keyword{interface} +\keyword{manip} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/scat1d.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/scat1d.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b"@@ -0,0 +1,469 @@\n+\\name{scat1d}\n+\\alias{scat1d}\n+\\alias{jitter2}\n+\\alias{jitter2.default}\n+\\alias{jitter2.data.frame}\n+\\alias{datadensity}\n+\\alias{datadensity.data.frame}\n+\\alias{histSpike}\n+\\title{\n+ One-Dimensional Scatter Diagram, Spike Histogram, or Density\n+}\n+\\description{\n+ \\code{scat1d} adds tick marks (bar codes. rug plot) on any of the four\n+ sides of an existing plot, corresponding with non-missing values of a\n+ vector \\code{x}. This is used to show the data density. Can also\n+ place the tick marks along a curve by specifying y-coordinates to go\n+ along with the \\code{x} values.\n+ \n+ If any two values of \\code{x} are within \\eqn{\\code{eps}*\\var{w}} of\n+ each other, where \\code{eps} defaults to .001 and \\var{w} is the span\n+ of the intended axis, values of \\code{x} are jittered by adding a\n+ value uniformly distributed in \\eqn{[-\\code{jitfrac}*\\var{w},\n+ \\code{jitfrac}*\\var{w}]}, where \\code{jitfrac} defaults to\n+ .008. Specifying \\code{preserve=TRUE} invokes \\code{jitter2} with a\n+ different logic of jittering. Allows plotting random sub-segments to\n+ handle very large \\code{x} vectors (see\\code{tfrac}).\n+ \n+ \\code{jitter2} is a generic method for jittering, which does not add\n+ random noise. It retains unique values and ranks, and randomly spreads\n+ duplicate values at equidistant positions within limits of enclosing\n+ values. \\code{jitter2} is especially useful for numeric variables with\n+ discrete values, like rating scales. Missing values are allowed and\n+ are returned. Currently implemented methods are \\code{jitter2.default}\n+ for vectors and \\code{jitter2.data.frame} which returns a data.frame\n+ with each numeric column jittered.\n+ \n+ \\code{datadensity} is a generic method used to show data densities in\n+ more complex situations. Here, another \\code{datadensity} method is\n+ defined for data frames. Depending on the \\code{which} argument, some\n+ or all of the variables in a data frame will be displayed, with\n+ \\code{scat1d} used to display continuous variables and, by default,\n+ bars used to display frequencies of categorical, character, or\n+ discrete numeric variables. For such variables, when the total length\n+ of value labels exceeds 200, only the first few characters from each\n+ level are used. By default, \\code{datadensity.data.frame} will\n+ construct one axis (i.e., one strip) per variable in the data frame.\n+ Variable names appear to the left of the axes, and the number of\n+ missing values (if greater than zero) appear to the right of the axes.\n+ An optional \\code{group} variable can be used for stratification,\n+ where the different strata are depicted using different colors. If\n+ the \\code{q} vector is specified, the desired quantiles (over all\n+ \\code{group}s) are displayed with solid triangles below each axis.\n+ \n+ When the sample size exceeds 2000 (this value may be modified using\n+ the \\code{nhistSpike} argument, \\code{datadensity} calls\n+ \\code{histSpike} instead of \\code{scat1d} to show the data density for\n+ numeric variables. This results in a histogram-like display that\n+ makes the resulting graphics file much smaller. In this case,\n+ \\code{datadensity} uses the \\code{minf} argument (see below) so that\n+ very infrequent data values will not be lost on the variable's axis,\n+ although this will slightly distortthe histogram.\n+ \n+ \\code{histSpike} is another method for showing a high-resolution data\n+ distribution that is particularly good for very large datasets (say\n+ \\eqn{\\code{n} > 1000}). By default, \\code{histSpike} bins the\n+ continuous \\code{x} variable into 100 equal-width bins and then\n+ computes the frequency counts within bins (if \\code{n} does not exceed\n+ 10, no binning is done). If \\code{add=FALSE} (the default), the\n+ function displays either proportions or frequencies as in a vertical\n+ histogram. Instead of bars, spikes are used to depict the\n+ frequencies. If \\code{add=FALSE}, the function assumes you are ad"..b"rmine the range of data for the axis\n+ of the current plot. This range is used in jittering and in\n+ constructing line segments.\n+}\n+\\author{\n+Frank Harrell\\cr\n+Department of Biostatistics\\cr\n+Vanderbilt University\\cr\n+Nashville TN, USA\\cr\n+\\email{f.harrell@vanderbilt.edu}\n+\n+\n+Martin Maechler (improved \\code{scat1d})\\cr\n+Seminar fuer Statistik\\cr\n+ETH Zurich SWITZERLAND\\cr\n+\\email{maechler@stat.math.ethz.ch}\n+\n+\n+Jens Oehlschlaegel-Akiyoshi (wrote \\code{jitter2})\\cr\n+Center for Psychotherapy Research\\cr\n+Christian-Belser-Strasse 79a\\cr\n+D-70597 Stuttgart Germany\\cr\n+\\email{oehl@psyres-stuttgart.de}\n+}\n+\\seealso{\n+ \\code{\\link{segments}}, \\code{\\link{jitter}}, \\code{\\link{rug}},\n+ \\code{\\link{plsmo}}, \\code{\\link{stripplot}},\n+ \\code{\\link{hist.data.frame}},\\code{\\link{Ecdf}}, \\code{\\link{hist}},\n+ \\code{\\link[lattice]{histogram}}, \\code{\\link{table}},\n+ \\code{\\link{density}}\n+}\n+\\examples{\n+plot(x <- rnorm(50), y <- 3*x + rnorm(50)/2 )\n+scat1d(x) # density bars on top of graph\n+scat1d(y, 4) # density bars at right\n+histSpike(x, add=TRUE) # histogram instead, 100 bins\n+histSpike(y, 4, add=TRUE)\n+histSpike(x, type='density', add=TRUE) # smooth density at bottom\n+histSpike(y, 4, type='density', add=TRUE)\n+\n+\n+smooth <- lowess(x, y) # add nonparametric regression curve\n+lines(smooth) # Note: plsmo() does this\n+scat1d(x, y=approx(smooth, xout=x)$y) # data density on curve\n+scat1d(x, curve=smooth) # same effect as previous command\n+histSpike(x, curve=smooth, add=TRUE) # same as previous but with histogram\n+histSpike(x, curve=smooth, type='density', add=TRUE) \n+# same but smooth density over curve\n+\n+\n+plot(x <- rnorm(250), y <- 3*x + rnorm(250)/2)\n+scat1d(x, tfrac=0) # dots randomly spaced from axis\n+scat1d(y, 4, frac=-.03) # bars outside axis\n+scat1d(y, 2, tfrac=.2) # same bars with smaller random fraction\n+\n+\n+x <- c(0:3,rep(4,3),5,rep(7,10),9)\n+plot(x, jitter2(x)) # original versus jittered values\n+abline(0,1) # unique values unjittered on abline\n+points(x+0.1, jitter2(x, limit=FALSE), col=2)\n+ # allow locally maximum jittering\n+points(x+0.2, jitter2(x, fill=1), col=3); abline(h=seq(0.5,9,1), lty=2)\n+ # fill 3/3 instead of 1/3\n+x <- rnorm(200,0,2)+1; y <- x^2\n+x2 <- round((x+rnorm(200))/2)*2\n+x3 <- round((x+rnorm(200))/4)*4\n+dfram <- data.frame(y,x,x2,x3)\n+plot(dfram$x2, dfram$y) # jitter2 via scat1d\n+scat1d(dfram$x2, y=dfram$y, preserve=TRUE, col=2)\n+scat1d(dfram$x2, preserve=TRUE, frac=-0.02, col=2)\n+scat1d(dfram$y, 4, preserve=TRUE, frac=-0.02, col=2)\n+\n+\n+pairs(jitter2(dfram)) # pairs for jittered data.frame\n+# This gets reasonable pairwise scatter plots for all combinations of\n+# variables where\n+#\n+# - continuous variables (with unique values) are not jittered at all, thus\n+# all relations between continuous variables are shown as they are,\n+# extreme values have exact positions.\n+#\n+# - discrete variables get a reasonable amount of jittering, whether they\n+# have 2, 3, 5, 10, 20 \\dots levels\n+#\n+# - different from adding noise, jitter2() will use the available space\n+# optimally and no value will randomly mask another\n+#\n+# If you want a scatterplot with lowess smooths on the *exact* values and\n+# the point clouds shown jittered, you just need\n+#\n+pairs( dfram ,panel=function(x,y) { points(jitter2(x),jitter2(y))\n+ lines(lowess(x,y)) } )\n+\n+\n+\n+\n+datadensity(dfram) # graphical snapshot of entire data frame\n+datadensity(dfram, group=cut2(dfram$x2,g=3))\n+ # stratify points and frequencies by\n+ # x2 tertiles and use 3 colors\n+\n+\n+# datadensity.data.frame(split(x, grouping.variable))\n+# need to explicitly invoke datadensity.data.frame when the\n+# first argument is a list\n+}\n+\\keyword{dplot}\n+\\keyword{aplot}\n+\\keyword{hplot}\n+\\keyword{distribution}\n+% Converted by Sd2Rd version 1.21.\n" |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/score.binary.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/score.binary.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,77 @@ +\name{score.binary} +\alias{score.binary} +\title{ +Score a Series of Binary Variables +} +\description{ +Creates a new variable from a series of logical conditions. The new +variable can be a hierarchical category or score derived from considering +the rightmost \code{TRUE} value among the input variables, an additive point +score, a union, or any of several others by specifying a function using the +\code{fun} argument. +} +\usage{ +score.binary(\dots, fun=max, points=1:p, + na.rm=funtext == "max", retfactor=TRUE) +} +\arguments{ +\item{...}{ +a list of variables or expressions which are considered to be binary +or logical +} +\item{fun}{ +a function to compute on each row of the matrix represented by +a specific observation of all the variables in \code{\dots} +} +\item{points}{ +points to assign to successive elements of \code{\dots} . The default is +\code{1, 2, \dots, p}, where \code{p} is the number of elements. If you specify +one number for \code{points}, that number will be duplicated (i.e., equal weights +are assumed). +} +\item{na.rm}{ +set to \code{TRUE} to remove \code{NA}s from consideration when processing +each row of the matrix of variables in \code{\dots} . For \code{fun=max}, +\code{na.rm=TRUE} is the default since \code{score.binary} assumes that a +hierarchical scale is based on available information. Otherwise, +\code{na.rm=FALSE} is assumed. For \code{fun=mean} you may want to specify +\code{na.rm=TRUE}. +} +\item{retfactor}{ +applies if \code{fun=max}, in which case \code{retfactor=TRUE} makes \code{score.binary} +return a \code{factor} object since a hierarchical scale implies +a unique choice. +}} +\value{ +a \code{factor} object if \code{retfactor=TRUE} and \code{fun=max} or a numeric vector +otherwise. Will not contain NAs if \code{na.rm=TRUE} unless every variable in +a row is \code{NA}. If a \code{factor} object +is returned, it has levels \code{"none"} followed by character +string versions of the arguments given in \code{\dots} . +} +\seealso{ +\code{\link{any}}, \code{\link{sum}}, \code{\link{max}}, \code{\link{factor}} +} +\examples{ +set.seed(1) +age <- rnorm(25, 70, 15) +previous.disease <- sample(0:1, 25, TRUE) +#Hierarchical scale, highest of 1:age>70 2:previous.disease +score.binary(age>70, previous.disease, retfactor=FALSE) +#Same as above but return factor variable with levels "none" "age>70" +# "previous.disease" +score.binary(age>70, previous.disease) + + +#Additive scale with weights 1:age>70 2:previous.disease +score.binary(age>70, previous.disease, fun=sum) +#Additive scale, equal weights +score.binary(age>70, previous.disease, fun=sum, points=c(1,1)) +#Same as saying points=1 + + +#Union of variables, to create a new binary variable +score.binary(age>70, previous.disease, fun=any) +} +\keyword{manip} +% Converted by Sd2Rd version 1.21. |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/sedit.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/sedit.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,187 @@ +\name{sedit} +\alias{sedit} +\alias{substring.location} +\alias{substring2} +\alias{substring2<-} +\alias{replace.substring.wild} +\alias{numeric.string} +\alias{all.digits} +\title{ + Character String Editing and Miscellaneous Character Handling Functions +} +\description{ + This suite of functions was written to implement many of the features + of the UNIX \code{sed} program entirely within S (function \code{sedit}). + The \code{substring.location} function returns the first and last position + numbers that a sub-string occupies in a larger string. The \code{substring2<-} + function does the opposite of the builtin function \code{substring}. + It is named \code{substring2} because for S-Plus there is a built-in + function \code{substring}, but it does not handle multiple replacements in + a single string. + \code{replace.substring.wild} edits character strings in the fashion of + "change xxxxANYTHINGyyyy to aaaaANYTHINGbbbb", if the "ANYTHING" + passes an optional user-specified \code{test} function. Here, the + "yyyy" string is searched for from right to left to handle + balancing parentheses, etc. \code{numeric.string} + and \code{all.digits} are two examples of \code{test} functions, to check, + respectively if each of a vector of strings is a legal numeric or if it contains only + the digits 0-9. For the case where \code{old="*$" or "^*"}, or for + \code{replace.substring.wild} with the same values of \code{old} or with + \code{front=TRUE} or \code{back=TRUE}, \code{sedit} (if \code{wild.literal=FALSE}) and + \code{replace.substring.wild} will edit the largest substring + satisfying \code{test}. + + \code{substring2} is just a copy of \code{substring} so that + \code{substring2<-} will work. +} +\usage{ +sedit(text, from, to, test, wild.literal=FALSE) +substring.location(text, string, restrict) +# substring(text, first, last) <- setto # S-Plus only +replace.substring.wild(text, old, new, test, front=FALSE, back=FALSE) +numeric.string(string) +all.digits(string) +substring2(text, first, last) +substring2(text, first, last) <- value +} +\arguments{ + \item{text}{ + a vector of character strings for \code{sedit, substring2, substring2<-} + or a single character string for \code{substring.location, + replace.substring.wild}. + } + \item{from}{ + a vector of character strings to translate from, for \code{sedit}. + A single asterisk wild card, meaning allow any sequence of characters + (subject to the \code{test} function, if any) in place of the \code{"*"}. + An element of \code{from} may begin with \code{"^"} to force the match to + begin at the beginning of \code{text}, and an element of \code{from} can end with + \code{"$"} to force the match to end at the end of \code{text}. + } + \item{to}{ + a vector of character strings to translate to, for \code{sedit}. + If a corresponding element in \code{from} had an \code{"*"}, the element + in \code{to} may also have an \code{"*"}. Only single asterisks are allowed. + If \code{to} is not the same length as \code{from}, the \code{rep} function + is used to make it the same length. + } + \item{string}{ + a single character string, for \code{substring.location}, \code{numeric.string}, + \code{all.digits} + } + \item{first}{ + a vector of integers specifying the first position to replace for + \code{substring2<-}. \code{first} may also be a vector of character strings + that are passed to \code{sedit} to use as patterns for replacing + substrings with \code{setto}. See one of the last examples below. + } + \item{last}{ + a vector of integers specifying the ending positions of the character + substrings to be replaced. The default is to go to the end of + the string. When \code{first} is character, \code{last} must be + omitted. + } + \item{setto}{ + a character string or vector of character strings used as replacements, + in \code{substring2<-} + } + \item{old}{ + a character string to translate from for \code{replace.substring.wild}. + May be \code{"*$"} or \code{"^*"} or any string containing a single \code{"*"} but + not beginning with \code{"^"} or ending with \code{"$"}. + } + \item{new}{ + a character string to translate to for \code{replace.substring.wild} + } + \item{test}{ + a function of a vector of character strings returning a logical vector + whose elements are \code{TRUE} or \code{FALSE} according + to whether that string element qualifies as the wild card string for + \code{sedit, replace.substring.wild} + } + \item{wild.literal}{ + set to \code{TRUE} to not treat asterisks as wild cards and to not look for + \code{"^"} or \code{"$"} in \code{old} + } + \item{restrict}{ + a vector of two integers for \code{substring.location} which specifies a + range to which the search for matches should be restricted + } + \item{front}{ + specifying \code{front = TRUE} and \code{old = "*"} is the same as + specifying \code{old = "^*"} + } + \item{back}{ + specifying \code{back = TRUE} and \code{old = "*"} is the same as + specifying \code{old = "*$"} + } + \item{value}{a character vector} +} +\value{ + \code{sedit} returns a vector of character strings the same length as \code{text}. + \code{substring.location} returns a list with components named \code{first} + and \code{last}, each specifying a vector of character positions corresponding + to matches. \code{replace.substring.wild} returns a single character string. + \code{numeric.string} and \code{all.digits} return a single logical value. +} +\section{Side Effects}{ + \code{substring2<-} modifies its first argument +} +\author{ + Frank Harrell + \cr + Department of Biostatistics + \cr + Vanderbilt University School of Medicine + \cr + \email{f.harrell@vanderbilt.edu} +} +\seealso{ + \code{\link{grep}}, \code{\link{substring}} +} +\examples{ +x <- 'this string' +substring2(x, 3, 4) <- 'IS' +x +substring2(x, 7) <- '' +x + + +substring.location('abcdefgabc', 'ab') +substring.location('abcdefgabc', 'ab', restrict=c(3,999)) + + +replace.substring.wild('this is a cat','this*cat','that*dog') +replace.substring.wild('there is a cat','is a*', 'is not a*') +replace.substring.wild('this is a cat','is a*', 'Z') + + +qualify <- function(x) x==' 1.5 ' | x==' 2.5 ' +replace.substring.wild('He won 1.5 million $','won*million', + 'lost*million', test=qualify) +replace.substring.wild('He won 1 million $','won*million', + 'lost*million', test=qualify) +replace.substring.wild('He won 1.2 million $','won*million', + 'lost*million', test=numeric.string) + + +x <- c('a = b','c < d','hello') +sedit(x, c('=','he*o'),c('==','he*')) + + +sedit('x23', '*$', '[*]', test=numeric.string) +sedit('23xx', '^*', 'Y_{*} ', test=all.digits) + + +replace.substring.wild("abcdefabcdef", "d*f", "xy") + + +x <- "abcd" +substring2(x, "bc") <- "BCX" +x +substring2(x, "B*d") <- "B*D" +x +} +\keyword{manip} +\keyword{character} +% Converted by Sd2Rd version 1.21. |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/show.pch.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/show.pch.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,31 @@ +\name{show.pch} +\alias{show.pch} +\alias{show.col} +\alias{character.table} +\title{Display Colors, Plotting Symbols, and Symbol Numeric Equivalents} +\description{ +\code{show.pch} plots the definitions of the \code{pch} parameters. +\code{show.col} plots definitions of integer-valued colors. +\code{character.table} draws numeric equivalents of all latin +characters; the character on line \code{xy} and column \code{z} of the +table has numeric code \code{"xyz"}, which you would surround in quotes +and preceed by a backslash. +} +\usage{ +show.pch(object = par("font")) +show.col(object=NULL) +character.table(font=1) +} +\arguments{ + \item{object}{font for \code{show.pch}, ignored for \code{show.col}.} + \item{font}{font} +} +\author{Pierre Joyet \email{pierre.joyet@bluewin.ch}, Frank Harrell} +\seealso{\code{\link{points}}, \code{\link{text}}} +\examples{ +\dontrun{ +show.pch() +show.col() +character.table() +}} +\keyword{aplot} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/showPsfrag.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/showPsfrag.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,38 @@ +\name{showPsfrag} +\alias{showPsfrag} +\title{ + Display image from psfrag LaTeX strings +} +\description{ + \code{showPsfrag} is used to display (using ghostview) a postscript + image that contained psfrag LaTeX strings, by building a small LaTeX + script and running \command{latex} and \command{dvips}. +} +\usage{ +showPsfrag(filename) +} +\arguments{ + \item{filename}{ + name or character string or character vector specifying file + prefix. + } +} +\author{ + Frank Harrell\cr + Department of Biostatistics\cr + Vanderbilt University\cr + \email{f.harrell@vanderbilt.edu} +} +\references{ + Grant MC, Carlisle (1998): The PSfrag System, Version 3. Full + documentation is obtained by searching www.ctan.org for \file{pfgguide.ps}. +} +\seealso{ + \code{\link{postscript}}, \code{\link{par}}, \code{\link{ps.options}}, + \code{\link{mgp.axis.labels}}, \code{\link{pdf}}, + \code{\link[lattice]{trellis.device}}, \code{\link{setTrellis}} +} +\keyword{hplot} +\keyword{device} +\concept{trellis} +\concept{lattice} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/simplifyDims.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/simplifyDims.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,32 @@ +\name{simplifyDims} +\alias{simplifyDims} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{List Simplification} +\description{ + Takes a list where each element is a group of rows that have been + spanned by a multirow row and combines it into one large matrix. +} +\usage{ +simplifyDims(x) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{x}{list of spanned rows} +} +\details{ + All rows must have the same number of columns. This is used to format + the list for printing. +} +\value{ + a matrix that contains all of the spanned rows. +} +\author{Charles Dupont} +\seealso{\code{\link{rbind}}} +\examples{ +a <- list(a = matrix(1:25, ncol=5), b = matrix(1:10, ncol=5), c = 1:5) + +simplifyDims(a) +} +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{print} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/smean.sd.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/smean.sd.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,109 @@ +\name{smean.sd} +\alias{smean.cl.normal} +\alias{smean.sd} +\alias{smean.sdl} +\alias{smean.cl.boot} +\alias{smedian.hilow} +\title{ +Compute Summary Statistics on a Vector +} +\description{ +A number of statistical summary functions is provided for use +with \code{summary.formula} and \code{summarize} (as well as +\code{tapply} and by themselves). +\code{smean.cl.normal} computes 3 summary variables: the sample mean and +lower and upper Gaussian confidence limits based on the t-distribution. +\code{smean.sd} computes the mean and standard deviation. +\code{smean.sdl} computes the mean plus or minus a constant times the +standard deviation. +\code{smean.cl.boot} is a very fast implementation of the basic +nonparametric bootstrap for obtaining confidence limits for the +population mean without assuming normality. +These functions all delete NAs automatically. +\code{smedian.hilow} computes the sample median and a selected pair of +outer quantiles having equal tail areas. +} +\usage{ +smean.cl.normal(x, mult=qt((1+conf.int)/2,n-1), conf.int=.95, na.rm=TRUE) + +smean.sd(x, na.rm=TRUE) + +smean.sdl(x, mult=2, na.rm=TRUE) + +smean.cl.boot(x, conf.int=.95, B=1000, na.rm=TRUE, reps=FALSE) + +smedian.hilow(x, conf.int=.95, na.rm=TRUE) +} +\arguments{ +\item{x}{ +for summary functions \code{smean.*}, \code{smedian.hilow}, a numeric vector +from which NAs will be removed automatically +} +\item{na.rm}{ +defaults to \code{TRUE} unlike built-in functions, so that by +default \code{NA}s are automatically removed +} +\item{mult}{ +for \code{smean.cl.normal} is the multiplier of the standard error of the +mean to use in obtaining confidence limits of the population mean +(default is appropriate quantile of the t distribution). For +\code{smean.sdl}, \code{mult} is the multiplier of the standard deviation used +in obtaining a coverage interval about the sample mean. The default +is \code{mult=2} to use plus or minus 2 standard deviations. +} +\item{conf.int}{ +for \code{smean.cl.normal} and \code{smean.cl.boot} specifies the confidence +level (0-1) for interval estimation of the population mean. For +\code{smedian.hilow}, \code{conf.int} is the coverage probability the outer +quantiles should target. When the default, 0.95, is used, the lower +and upper quantiles computed are 0.025 and 0.975. +} +\item{B}{ +number of bootstrap resamples for \code{smean.cl.boot} +} +\item{reps}{ +set to \code{TRUE} to have \code{smean.cl.boot} return the vector of bootstrapped +means as the \code{reps} attribute of the returned object +} +} +\value{ +a vector of summary statistics +} +\author{ +Frank Harrell +\cr +Department of Biostatistics +\cr +Vanderbilt University +\cr +\email{f.harrell@vanderbilt.edu} +} +\seealso{ + \code{\link{summarize}}, \code{\link{summary.formula}} +} +\examples{ +set.seed(1) +x <- rnorm(100) +smean.sd(x) +smean.sdl(x) +smean.cl.normal(x) +smean.cl.boot(x) +smedian.hilow(x, conf.int=.5) # 25th and 75th percentiles + +# Function to compute 0.95 confidence interval for the difference in two means +# g is grouping variable +bootdif <- function(y, g) { + g <- as.factor(g) + a <- attr(smean.cl.boot(y[g==levels(g)[1]], B=2000, reps=TRUE),'reps') + b <- attr(smean.cl.boot(y[g==levels(g)[2]], B=2000, reps=TRUE),'reps') + meandif <- diff(tapply(y, g, mean, na.rm=TRUE)) + a.b <- quantile(b-a, c(.025,.975)) + res <- c(meandif, a.b) + names(res) <- c('Mean Difference','.025','.975') + res +} + +} +\keyword{nonparametric} +\keyword{htest} +\concept{bootstrap} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/solvet.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/solvet.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,24 @@ +\name{solvet} +\alias{solvet} +\title{ +solve Function with tol argument +} +\description{ +A slightly modified version of \code{solve} that allows a tolerance argument +for singularity (\code{tol}) which is passed to \code{qr}. +} +\usage{ +solvet(a, b, tol=1e-09) +} +\arguments{ + \item{a}{a square numeric matrix} + \item{b}{a numeric vector or matrix} + \item{tol}{tolerance for detecting linear dependencies in columns of + \code{a}} + } +\seealso{ +\code{\link{solve}} +} +\keyword{array} +\keyword{algebra} + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/somers2.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/somers2.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,77 @@ +\name{somers2} +\alias{somers2} +\title{ +Somers' Dxy Rank Correlation +} +\description{ +Computes Somers' Dxy rank correlation between a variable \code{x} and a +binary (0-1) variable \code{y}, and the corresponding receiver operating +characteristic curve area \code{c}. Note that \code{Dxy = 2(c-0.5)}. +\code{somers} allows for a \code{weights} variable, which specifies frequencies +to associate with each observation. +} +\usage{ +somers2(x, y, weights=NULL, normwt=FALSE, na.rm=TRUE) +} +\arguments{ +\item{x}{ +typically a predictor variable. \code{NA}s are allowed. +} +\item{y}{ +a numeric outcome variable coded \code{0-1}. \code{NA}s are allowed. +} +\item{weights}{ +a numeric vector of observation weights (usually frequencies). Omit +or specify a zero-length vector to do an unweighted analysis. +} +\item{normwt}{ +set to \code{TRUE} to make \code{weights} sum to the actual number of non-missing +observations. +} +\item{na.rm}{ +set to \code{FALSE} to suppress checking for NAs. +}} +\value{ +a vector with the named elements \code{C}, \code{Dxy}, \code{n} (number of non-missing +pairs), and \code{Missing}. Uses the formula +\code{C = (mean(rank(x)[y == 1]) - (n1 + 1)/2)/(n - n1)}, where \code{n1} is the +frequency of \code{y=1}. +} +\details{ +The \code{rcorr.cens} function, which although slower than \code{somers2} for large +sample sizes, can also be used to obtain Dxy for non-censored binary +\code{y}, and it has the advantage of computing the standard deviation of +the correlation index. +} +\author{ +Frank Harrell +\cr +Department of Biostatistics +\cr +Vanderbilt University School of Medicine +\cr +\email{f.harrell@vanderbilt.edu} +} +\seealso{ +\code{\link{rcorr.cens}}, \code{\link{rank}}, \code{\link{wtd.rank}}, +} +\examples{ +set.seed(1) +predicted <- runif(200) +dead <- sample(0:1, 200, TRUE) +roc.area <- somers2(predicted, dead)["C"] + +# Check weights +x <- 1:6 +y <- c(0,0,1,0,1,1) +f <- c(3,2,2,3,2,1) +somers2(x, y) +somers2(rep(x, f), rep(y, f)) +somers2(x, y, f) +} +\keyword{nonparametric} +\concept{logistic regression model} +\concept{predictive accuracy} + + + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/spower.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/spower.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,447 @@\n+\\name{spower}\n+\\alias{spower}\n+\\alias{print.spower}\n+\\alias{Quantile2}\n+\\alias{print.Quantile2}\n+\\alias{plot.Quantile2}\n+\\alias{logrank}\n+\\alias{Gompertz2}\n+\\alias{Lognorm2}\n+\\alias{Weibull2}\n+\\title{\n+ Simulate Power of 2-Sample Test for Survival under Complex Conditions\n+}\n+\\description{\n+ Given functions to generate random variables for survival times and\n+ censoring times, \\code{spower} simulates the power of a user-given\n+ 2-sample test for censored data. By default, the logrank (Cox\n+ 2-sample) test is used, and a \\code{logrank} function for comparing 2\n+ groups is provided. Optionally a Cox model is fitted for each each\n+ simulated dataset and the log hazard ratios are saved (this requires\n+ the \\code{survival} package). A \\code{print} method prints various\n+ measures from these. For composing \\R functions to generate random\n+ survival times under complex conditions, the \\code{Quantile2} function\n+ allows the user to specify the intervention:control hazard ratio as a\n+ function of time, the probability of a control subject actually\n+ receiving the intervention (dropin) as a function of time, and the\n+ probability that an intervention subject receives only the control\n+ agent as a function of time (non-compliance, dropout).\n+ \\code{Quantile2} returns a function that generates either control or\n+ intervention uncensored survival times subject to non-constant\n+ treatment effect, dropin, and dropout. There is a \\code{plot} method\n+ for plotting the results of \\code{Quantile2}, which will aid in\n+ understanding the effects of the two types of non-compliance and\n+ non-constant treatment effects. \\code{Quantile2} assumes that the\n+ hazard function for either treatment group is a mixture of the control\n+ and intervention hazard functions, with mixing proportions defined by\n+ the dropin and dropout probabilities. It computes hazards and\n+ survival distributions by numerical differentiation and integration\n+ using a grid of (by default) 7500 equally-spaced time points.\n+ \n+ The \\code{logrank} function is intended to be used with \\code{spower}\n+ but it can be used by itself. It returns the 1 degree of freedom\n+ chi-square statistic, with the hazard ratio estimate as an attribute.\n+\n+ The \\code{Weibull2} function accepts as input two vectors, one\n+ containing two times and one containing two survival probabilities, and\n+ it solves for the scale and shape parameters of the Weibull distribution\n+ (\\eqn{S(t) = e^{-\\alpha {t}^{\\gamma}}}{S(t) = exp(-\\alpha*t^\\gamma)})\n+ which will yield\n+ those estimates. It creates an \\R function to evaluate survival\n+ probabilities from this Weibull distribution. \\code{Weibull2} is\n+ useful in creating functions to pass as the first argument to\n+ \\code{Quantile2}.\n+\n+ The \\code{Lognorm2} and \\code{Gompertz2} functions are similar to\n+ \\code{Weibull2} except that they produce survival functions for the\n+ log-normal and Gompertz distributions.\n+\n+ When \\code{cox=TRUE} is specified to \\code{spower}, the analyst may wish\n+ to extract the two margins of error by using the \\code{print} method\n+ for \\code{spower} objects (see example below) and take the maximum of\n+ the two.\n+}\n+\\usage{\n+spower(rcontrol, rinterv, rcens, nc, ni, \n+ test=logrank, cox=FALSE, nsim=500, alpha=0.05, pr=TRUE)\n+\n+\\method{print}{spower}(x, conf.int=.95, \\dots)\n+\n+Quantile2(scontrol, hratio, \n+ dropin=function(times)0, dropout=function(times)0,\n+ m=7500, tmax, qtmax=.001, mplot=200, pr=TRUE, \\dots)\n+\n+\\method{print}{Quantile2}(x, \\dots)\n+\n+\\method{plot}{Quantile2}(x, \n+ what=c("survival", "hazard", "both", "drop", "hratio", "all"),\n+ dropsep=FALSE, lty=1:4, col=1, xlim, ylim=NULL,\n+ label.curves=NULL, \\dots)\n+\n+logrank(S, group)\n+\n+Gompertz2(times, surv)\n+Lognorm2(times, surv)\n+Weibull2(times, surv)\n+}\n+\\arguments{\n+ \\item{rcontrol}{\n+ a function of \\var{n} which returns \\var{n} random uncensored\n+ failure times for the'..b"ffect (hazard ratio of .75)\n+\n+\n+# First find the right Weibull distribution for compliant control patients\n+sc <- Weibull2(c(1,3), c(.95,.7))\n+sc\n+\n+\n+# Inverse cumulative distribution for case where all subjects are followed\n+# at least a years and then between a and b years the density rises\n+# as (time - a) ^ d is a + (b-a) * u ^ (1/(d+1))\n+\n+\n+rcens <- function(n) 1 + (5-1) * (runif(n) ^ .5)\n+# To check this, type hist(rcens(10000), nclass=50)\n+\n+\n+# Put it all together\n+\n+\n+f <- Quantile2(sc, \n+ hratio=function(x)ifelse(x<=.75, 1, .75),\n+ dropin=function(x)ifelse(x<=.5, 0, .15*(x-.5)/(5-.5)),\n+ dropout=function(x).3*x/5)\n+\n+\n+par(mfrow=c(2,2))\n+# par(mfrow=c(1,1)) to make legends fit\n+plot(f, 'all', label.curves=list(keys='lines'))\n+\n+\n+rcontrol <- function(n) f(n, 'control')\n+rinterv <- function(n) f(n, 'intervention')\n+\n+\n+set.seed(211)\n+spower(rcontrol, rinterv, rcens, nc=350, ni=350, \n+ test=logrank, nsim=50) # normally nsim=500 or more\n+par(mfrow=c(1,1))\n+\n+# Compose a censoring time generator function such that at 1 year\n+# 5\\% of subjects are accrued, at 3 years 70\\% are accured, and at 10\n+# years 100\\% are accrued. The trial proceeds two years past the last\n+# accrual for a total of 12 years of follow-up for the first subject.\n+# Use linear interporation between these 3 points\n+\n+rcens <- function(n)\n+{\n+ times <- c(0,1,3,10)\n+ accrued <- c(0,.05,.7,1)\n+ # Compute inverse of accrued function at U(0,1) random variables\n+ accrual.times <- approx(accrued, times, xout=runif(n))$y\n+ censor.times <- 12 - accrual.times\n+ censor.times\n+}\n+\n+censor.times <- rcens(500)\n+# hist(censor.times, nclass=20)\n+accrual.times <- 12 - censor.times\n+# Ecdf(accrual.times)\n+# lines(c(0,1,3,10), c(0,.05,.7,1), col='red')\n+# spower(..., rcens=rcens, ...)\n+\n+\\dontrun{\n+# To define a control survival curve from a fitted survival curve\n+# with coordinates (tt, surv) with tt[1]=0, surv[1]=1:\n+\n+Scontrol <- function(times, tt, surv) approx(tt, surv, xout=times)$y\n+tt <- 0:6\n+surv <- c(1, .9, .8, .75, .7, .65, .64)\n+formals(Scontrol) <- list(times=NULL, tt=tt, surv=surv)\n+\n+# To use a mixture of two survival curves, with e.g. mixing proportions\n+# of .2 and .8, use the following as a guide:\n+#\n+# Scontrol <- function(times, t1, s1, t2, s2)\n+# .2*approx(t1, s1, xout=times)$y + .8*approx(t2, s2, xout=times)$y\n+# t1 <- ...; s1 <- ...; t2 <- ...; s2 <- ...;\n+# formals(Scontrol) <- list(times=NULL, t1=t1, s1=s1, t2=t2, s2=s2)\n+\n+# Check that spower can detect a situation where generated censoring times\n+# are later than all failure times\n+\n+rcens <- function(n) runif(n, 0, 7)\n+f <- Quantile2(scontrol=Scontrol, hratio=function(x).8, tmax=6)\n+cont <- function(n) f(n, what='control')\n+int <- function(n) f(n, what='intervention')\n+spower(rcontrol=cont, rinterv=int, rcens=rcens, nc=300, ni=300, nsim=20)\n+\n+# Do an unstratified logrank test\n+library(survival)\n+# From SAS/STAT PROC LIFETEST manual, p. 1801\n+days <- c(179,256,262,256,255,224,225,287,319,264,237,156,270,257,242,\n+ 157,249,180,226,268,378,355,319,256,171,325,325,217,255,256,\n+ 291,323,253,206,206,237,211,229,234,209)\n+status <- c(1,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,1,0,\n+ 0,rep(1,19))\n+treatment <- c(rep(1,10), rep(2,10), rep(1,10), rep(2,10))\n+sex <- Cs(F,F,M,F,M,F,F,M,M,M,F,F,M,M,M,F,M,F,F,M,\n+ M,M,M,M,F,M,M,F,F,F,M,M,M,F,F,M,F,F,F,F)\n+data.frame(days, status, treatment, sex)\n+table(treatment, status)\n+logrank(Surv(days, status), treatment) # agrees with p. 1807\n+# For stratified tests the picture is puzzling.\n+# survdiff(Surv(days,status) ~ treatment + strata(sex))$chisq\n+# is 7.246562, which does not agree with SAS (7.1609)\n+# But summary(coxph(Surv(days,status) ~ treatment + strata(sex)))\n+# yields 7.16 whereas summary(coxph(Surv(days,status) ~ treatment))\n+# yields 5.21 as the score test, not agreeing with SAS or logrank() (5.6485)\n+}\n+}\n+\\keyword{htest}\n+\\keyword{survival}\n+\\concept{power}\n+\\concept{study design}\n" |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/spss.get.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/spss.get.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,55 @@ +\name{spss.get} +\alias{spss.get} +\title{Enhanced Importing of SPSS Files} +\description{ +\code{spss.get} invokes the \code{read.spss} function in the +\pkg{foreign} package to read an SPSS file, with a default output +format of \code{"data.frame"}. The \code{label} function is used to +attach labels to individual variables instead of to the data frame as +done by \code{read.spss}. By default, integer-valued variables are +converted to a storage mode of integer unless +\code{force.single=FALSE}. Date variables are converted to \R \code{Date} +variables. By default, underscores in names are converted to periods. +} +\usage{ +spss.get(file, lowernames=FALSE, datevars = NULL, + use.value.labels = TRUE, to.data.frame = TRUE, + max.value.labels = Inf, force.single=TRUE, + allow=NULL, charfactor=FALSE) +} +\arguments{ + \item{file}{input SPSS save file. May be a file on the WWW, indicated + by \code{file} starting with \code{'http://'}.} + \item{lowernames}{set to \code{TRUE} to convert variable names to + lower case} + \item{datevars}{vector of variable names containing dates to be + converted to \R internal format} + \item{use.value.labels}{see \code{\link[foreign]{read.spss}}} + \item{to.data.frame}{see \code{\link[foreign]{read.spss}}; default is + \code{TRUE} for \code{spss.get}} + \item{max.value.labels}{see \code{\link[foreign]{read.spss}}} + \item{force.single}{set to \code{FALSE} to prevent integer-valued + variables from being converted from storage mode \code{double} to + \code{integer}} + \item{allow}{a vector of characters allowed by \R that should not be + converted to periods in variable names. By default, underscores in + variable names are converted to periods as with \R before version 1.9.} + \item{charfactor}{set to \code{TRUE} to change character variables to + factors if they have fewer than n/2 unique values. Blanks and null + strings are converted to \code{NA}s.} +} +\value{ + a data frame or list +} +\author{Frank Harrell} +\seealso{\code{\link[foreign]{read.spss}},\code{\link{cleanup.import}},\code{\link{sas.get}}} + +\examples{ +\dontrun{ +w <- spss.get('/tmp/my.sav', datevars=c('birthdate','deathdate')) + } +} +\keyword{interface} +\keyword{manip} +\keyword{file} +\concept{SPSS data file} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/src.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/src.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,30 @@ +\name{src} +\alias{src} +\title{Source a File from the Current Working Directory} +\description{ +\code{src} concatenates \code{".s"} to its argument, quotes the result, +and \code{source}s in the file. It sets \code{options(last.source)} to +this file name so that \code{src()} can be issued to re-\code{source} +the file when it is edited. +} +\usage{ +src(x) +} +\arguments{ + \item{x}{an unquoted file name aside from \code{".s"}. This base file + name must be a legal S name.} +} +\section{Side Effects}{ + Sets system option \code{last.source} +} +\author{Frank Harrell} +\seealso{\code{\link{source}}} +\examples{ +\dontrun{ +src(myfile) # source("myfile.s") +src() # re-source myfile.s +} +} +\keyword{file} +\keyword{programming} +\keyword{utilities} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/stata.get.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/stata.get.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,56 @@ +\name{stata.get} +\alias{stata.get} +\title{Enhanced Importing of STATA Files} +\description{ + Reads a file in Stata version 5-11 binary format format into a + data frame. +} +\usage{ +stata.get(file, lowernames = FALSE, convert.dates = TRUE, + convert.factors = TRUE, missing.type = FALSE, + convert.underscore = TRUE, warn.missing.labels = TRUE, + force.single = TRUE, allow=NULL, charfactor=FALSE, \dots) +} +\arguments{ + \item{file}{input \acronym{SPSS} save file. May be a file on the \acronym{WWW}, indicated + by \code{file} starting with \samp{'http://'}.} + \item{lowernames}{set to \code{TRUE} to convert variable names to + lower case} + \item{convert.dates}{see \code{\link[foreign]{read.dta}}} + \item{convert.factors}{see \code{\link[foreign]{read.dta}}} + \item{missing.type}{see \code{\link[foreign]{read.dta}}} + \item{convert.underscore}{see \code{\link[foreign]{read.dta}}} + \item{warn.missing.labels}{see \code{\link[foreign]{read.dta}}} + \item{force.single}{set to \code{FALSE} to prevent integer-valued + variables from being converted from storage mode \code{double} to + \code{integer}} + \item{allow}{a vector of characters allowed by \R that should not be + converted to periods in variable names. By default, underscores in + variable names are converted to periods as with \R before version 1.9.} + \item{charfactor}{set to \code{TRUE} to change character variables to + factors if they have fewer than n/2 unique values. Blanks and null + strings are converted to \code{NA}s.} + \item{\dots}{arguments passed to \code{\link[foreign]{read.dta}}.} +} +\details{ + \code{stata.get} invokes the \code{\link[foreign]{read.dta}} function in the + \pkg{foreign} package to read an STATA file, with a default output + format of \code{\link{data.frame}}. The \code{\link{label}} function is used to + attach labels to individual variables instead of to the data frame as + done by \code{\link[foreign]{read.dta}}. By default, integer-valued variables are + converted to a storage mode of integer unless + \code{force.single=FALSE}. Date variables are converted to \R + \code{\link{Date}} variables. By default, underscores in names are converted to periods. +} +\value{A data frame} +\author{Charles Dupont} +\seealso{\code{\link[foreign]{read.dta}},\code{\link{cleanup.import}},\code{\link{label}},\code{\link{data.frame}},\code{\link{Date}}} +\examples{ +\dontrun{ +w <- stata.get('/tmp/my.dta') +} +} +\keyword{interface} +\keyword{manip} +\keyword{file} +\concept{STATA data file} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/string.bounding.box.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/string.bounding.box.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,33 @@ +\name{string.bounding.box} +\alias{string.bounding.box} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{Determine Diamentions of Strings} +\description{ + This determins the number of rows and maximum number of columns of + each string in a vector. +} +\usage{ +string.bounding.box(string, type = c("chars", "width")) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{string}{vector of strings} + \item{type}{character: whether to count characters or screen columns} +} +\value{ + \item{rows}{vector containing the number of character rows in each string.} + \item{columns}{vector containing the maximum number of character + columns in each string.} +} +\author{Charles Dupont} +\note{ + compatable with Splus \code{string.bounding.box} +} +\seealso{\code{\link{nchar}}, \code{\link{stringDims}}} +\examples{ +a <- c("this is a single line string", "This is a\nmulty line string") +stringDims(a) +} +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{print} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/string.break.line.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/string.break.line.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,32 @@ +\name{string.break.line} +\alias{string.break.line} +\title{Break a String into Many Lines at Newlines} +\description{ + Takes a string and breaks it into seperate substrings where there are + newline characters. +} +\usage{ +string.break.line(string) +} +\arguments{ + \item{string}{character vector to be separated into many lines.} +} +\value{ + Returns a list that is the same length of as the \code{string} + argument. + + Each list element is a character vector. + + Each character vectors elements are the + split lines of the corresponding element in the \code{string} argument vector. +} +\author{Charles Dupont} +\seealso{\code{\link{strsplit}}} +\examples{ +a <- c('', 'this is a single line string', + 'This is a\nmulti-line string.') + +b <- string.break.line(a) +} +\keyword{print} +\keyword{character} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/stringDims.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/stringDims.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,32 @@ +\name{stringDims} +\alias{stringDims} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{String Dimentions} +\description{ + Finds the height and width of all the string in a character vector. +} +\usage{ +stringDims(string) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{string}{vector of strings} +} +\details{ + \code{stringDims} finds the number of characters in width and number of + lines in height for each string in the \code{string} argument. +} +\value{ + \item{height}{a vector of the number of lines in each string.} + \item{width}{a vector with the number of character columns in the + longest line.} +} +\author{Charles Dupont} +\seealso{\code{\link{string.bounding.box}}, \code{\link{nchar}}} +\examples{ +a <- c("this is a single line string", "This is a\nmulty line string") +stringDims(a) +} +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{print} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/subplot.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/subplot.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,98 @@ +\name{subplot} +\alias{subplot} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{Embed a new plot within an existing plot} +\description{ + Subplot will embed a new plot within an existing plot at the + coordinates specified (in user units of the existing plot). +} +\usage{ +subplot(fun, x, y, size=c(1,1), vadj=0.5, hadj=0.5, pars=NULL) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{fun}{an expression or function defining the new plot to be embedded.} + \item{x}{\code{x}-coordinate(s) of the new plot (in user coordinates + of the existing plot).} + \item{y}{\code{y}-coordinate(s) of the new plot, \code{x} and \code{y} + can be specified in any of the ways understood by \code{xy.coords}.} + \item{size}{The size of the embedded plot in inches if \code{x} and + \code{y} have length 1.} + \item{vadj}{vertical adjustment of the plot when \code{y} is a scalar, + the default is to center vertically, 0 means place the bottom of the + plot at \code{y}, 1 places the top of the plot at \code{y}.} + \item{hadj}{horizontal adjustment of the plot when \code{x} is a + scalar, the default is to center horizontally, 0 means place the + left edge of the plot at \code{x}, and 1 means place the right edge + of the plot at \code{x}.} + \item{pars}{a list of parameters to be passed to \code{par} before + running \code{fun}.} +} +\details{ + The coordinates \code{x} and \code{y} can be scalars or vectors of + length 2. If vectors of length 2 then they determine the opposite + corners of the rectangle for the embedded plot (and the parameters + \code{size}, \code{vadj}, and \code{hadj} are all ignored. + + If \code{x} and \code{y} are given as scalars then the plot position + relative to the point and the size of the plot will be determined by + the arguments \code{size}, \code{vadj}, and \code{hadj}. The default + is to center a 1 inch by 1 inch plot at \code{x,y}. Setting + \code{vadj} and \code{hadj} to \code{(0,0)} will position the lower + left corner of the plot at \code{(x,y)}. + + The rectangle defined by \code{x}, \code{y}, \code{size}, \code{vadj}, + and \code{hadj} will be used as the plotting area of the new plot. + Any tick marks, axis labels, main and sub titles will be outside of + this rectangle. + + Any graphical parameter settings that you would like to be in place + before \code{fun} is evaluated can be specified in the \code{pars} + argument (warning: specifying layout parameters here (\code{plt}, + \code{mfrow}, etc.) may cause unexpected results). + + After the function completes the graphical parameters will have been + reset to what they were before calling the function (so you can + continue to augment the original plot). +} +\value{ + An invisible list with the graphical parameters that were in effect + when the subplot was created. Passing this list to \code{par} will + enable you to augment the embedded plot. +} +%\references{ ~put references to the literature/web site here ~ } +\author{Greg Snow \email{greg.snow@imail.org}} +%\note{ ~~further notes~~ } + +% ~Make other sections like Warning with \section{Warning }{....} ~ + +\seealso{\code{\link{cnvrt.coords}}, \code{\link{par}}, \code{\link{symbols}}} +\examples{ +# make an original plot +plot( 11:20, sample(51:60) ) + +# add some histograms + +subplot( hist(rnorm(100)), 15, 55) +subplot( hist(runif(100),main='',xlab='',ylab=''), 11, 51, hadj=0, vadj=0) +subplot( hist(rexp(100, 1/3)), 20, 60, hadj=1, vadj=1, size=c(0.5,2) ) +subplot( hist(rt(100,3)), c(12,16), c(57,59), pars=list(lwd=3,ask=FALSE) ) + +tmp <- rnorm(25) +qqnorm(tmp) +qqline(tmp) +tmp2 <- subplot( hist(tmp,xlab='',ylab='',main=''), + cnvrt.coords(0.1,0.9,'plt')$usr, vadj=1, hadj=0 ) +abline(v=0, col='red') # wrong way to add a reference line to histogram + +# right way to add a reference line to histogram +op <- par(no.readonly=TRUE) +par(tmp2) +abline(v=0, col='green') +par(op) + + +} +\keyword{aplot}% at least one, from doc/KEYWORDS +\keyword{dplot} + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/summarize.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/summarize.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,262 @@\n+\\name{summarize}\n+\\alias{summarize}\n+\\alias{asNumericMatrix}\n+\\alias{matrix2dataFrame}\n+\\title{Summarize Scalars or Matrices by Cross-Classification}\n+\\description{\n+\\code{summarize} is a fast version of \\code{summary.formula(formula,\n+method="cross",overall=FALSE)} for producing stratified summary statistics\n+and storing them in a data frame for plotting (especially with trellis\n+\\code{xyplot} and \\code{dotplot} and Hmisc \\code{xYplot}). Unlike\n+\\code{aggregate}, \\code{summarize} accepts a matrix as its first\n+argument and a multi-valued \\code{FUN}\n+argument and \\code{summarize} also labels the variables in the new data\n+frame using their original names. Unlike methods based on\n+\\code{tapply}, \\code{summarize} stores the values of the stratification\n+variables using their original types, e.g., a numeric \\code{by} variable\n+will remain a numeric variable in the collapsed data frame.\n+\\code{summarize} also retains \\code{"label"} attributes for variables.\n+\\code{summarize} works especially well with the Hmisc \\code{xYplot}\n+function for displaying multiple summaries of a single variable on each\n+panel, such as means and upper and lower confidence limits.\n+\n+\n+\\code{asNumericMatrix} converts a data frame into a numeric matrix,\n+saving attributes to reverse the process by \\code{matrix2dataframe}.\n+It saves attributes that are commonly preserved across row\n+subsetting (i.e., it does not save \\code{dim}, \\code{dimnames}, or\n+\\code{names} attributes).\n+\n+\\code{matrix2dataFrame} converts a numeric matrix back into a data\n+frame if it was created by \\code{asNumericMatrix}.\n+}\n+\\usage{\n+summarize(X, by, FUN, \\dots, \n+ stat.name=deparse(substitute(X)),\n+ type=c(\'variables\',\'matrix\'), subset=TRUE,\n+ keepcolnames=FALSE)\n+\n+asNumericMatrix(x)\n+\n+matrix2dataFrame(x, at=attr(x, \'origAttributes\'), restoreAll=TRUE)\n+}\n+\\arguments{\n+\\item{X}{\n+a vector or matrix capable of being operated on by the\n+function specified as the \\code{FUN} argument\n+}\n+\\item{by}{\n+one or more stratification variables. If a single\n+variable, \\code{by} may be a vector, otherwise it should be a list.\n+Using the Hmisc \\code{llist} function instead of \\code{list} will result\n+in individual variable names being accessible to \\code{summarize}. For\n+example, you can specify \\code{llist(age.group,sex)} or\n+\\code{llist(Age=age.group,sex)}. The latter gives \\code{age.group} a\n+new temporary name, \\code{Age}. \n+}\n+\\item{FUN}{\n+a function of a single vector argument, used to create the statistical\n+summaries for \\code{summarize}. \\code{FUN} may compute any number of\n+statistics. \n+}\n+\\item{...}{extra arguments are passed to \\code{FUN}}\n+\\item{stat.name}{\n+the name to use when creating the main summary variable. By default,\n+the name of the \\code{X} argument is used. Set \\code{stat.name} to\n+\\code{NULL} to suppress this name replacement.\n+}\n+\\item{type}{\n+Specify \\code{type="matrix"} to store the summary variables (if there are\n+more than one) in a matrix.\n+}\n+\\item{subset}{\n+a logical vector or integer vector of subscripts used to specify the\n+subset of data to use in the analysis. The default is to use all\n+observations in the data frame.\n+}\n+\\item{keepcolnames}{by default when \\code{type="matrix"}, the first\n+\tcolumn of the computed matrix is the name of the first argument to\n+\t\\code{summarize}. Set \\code{keepcolnames=TRUE} to retain the name of\n+\tthe first column created by \\code{FUN}}\n+\\item{x}{\n+ a data frame (for \\code{asNumericMatrix}) or a numeric matrix (for\n+ \\code{matrix2dataFrame}).\n+}\n+\\item{at}{List containing attributes of original data frame that survive\n+ subsetting. Defaults to attribute \\code{"origAttributes"} of the\n+ object \\code{x}, created by the call to \\code{asNumericMatrix}}\n+\\item{restoreAll}{\n+ set to \\code{FALSE} to only restore attributes \\code{label},\n+ \\code{units}, and \\code{levels} instead of all attributes\n+}\n+}\n+\\value{\n+For \\code{summarize}, a data frame containing the \\code{by} va'..b"ear-1997\n+s <- summarize(y, llist(month,year), smedian.hilow, conf.int=.5)\n+s\n+mApply(y, llist(month,year), smedian.hilow, conf.int=.5)\n+\n+xYplot(Cbind(y,Lower,Upper) ~ month, groups=year, data=s, \n+ keys='lines', method='alt')\n+# Can also do:\n+s <- summarize(y, llist(month,year), quantile, probs=c(.5,.25,.75),\n+ stat.name=c('y','Q1','Q3'))\n+xYplot(Cbind(y, Q1, Q3) ~ month, groups=year, data=s, keys='lines')\n+# To display means and bootstrapped nonparametric confidence intervals\n+# use for example:\n+s <- summarize(y, llist(month,year), smean.cl.boot)\n+xYplot(Cbind(y, Lower, Upper) ~ month | year, data=s)\n+\n+# For each subject use the trapezoidal rule to compute the area under\n+# the (time,response) curve using the Hmisc trap.rule function\n+x <- cbind(time=c(1,2,4,7, 1,3,5,10),response=c(1,3,2,4, 1,3,2,4))\n+subject <- c(rep(1,4),rep(2,4))\n+trap.rule(x[1:4,1],x[1:4,2])\n+summarize(x, subject, function(y) trap.rule(y[,1],y[,2]))\n+\n+\\dontrun{\n+# Another approach would be to properly re-shape the mm array below\n+# This assumes no missing cells. There are many other approaches.\n+# mApply will do this well while allowing for missing cells.\n+m <- tapply(y, list(year,month), quantile, probs=c(.25,.5,.75))\n+mm <- array(unlist(m), dim=c(3,2,12), \n+ dimnames=list(c('lower','median','upper'),c('1997','1998'),\n+ as.character(1:12)))\n+# aggregate will help but it only allows you to compute one quantile\n+# at a time; see also the Hmisc mApply function\n+dframe <- aggregate(y, list(Year=year,Month=month), quantile, probs=.5)\n+\n+# Compute expected life length by race assuming an exponential\n+# distribution - can also use summarize\n+g <- function(y) { # computations for one race group\n+ futime <- y[,1]; event <- y[,2]\n+ sum(futime)/sum(event) # assume event=1 for death, 0=alive\n+}\n+mApply(cbind(followup.time, death), race, g)\n+\n+# To run mApply on a data frame:\n+xn <- asNumericMatrix(x)\n+m <- mApply(xn, race, h)\n+# Here assume h is a function that returns a matrix similar to x\n+matrix2dataFrame(m)\n+\n+\n+# Get stratified weighted means\n+g <- function(y) wtd.mean(y[,1],y[,2])\n+summarize(cbind(y, wts), llist(sex,race), g, stat.name='y')\n+mApply(cbind(y,wts), llist(sex,race), g)\n+\n+# Compare speed of mApply vs. by for computing \n+d <- data.frame(sex=sample(c('female','male'),100000,TRUE),\n+ country=sample(letters,100000,TRUE),\n+ y1=runif(100000), y2=runif(100000))\n+g <- function(x) {\n+ y <- c(median(x[,'y1']-x[,'y2']),\n+ med.sum =median(x[,'y1']+x[,'y2']))\n+ names(y) <- c('med.diff','med.sum')\n+ y\n+}\n+\n+system.time(by(d, llist(sex=d$sex,country=d$country), g))\n+system.time({\n+ x <- asNumericMatrix(d)\n+ a <- subsAttr(d)\n+ m <- mApply(x, llist(sex=d$sex,country=d$country), g)\n+ })\n+system.time({\n+ x <- asNumericMatrix(d)\n+ summarize(x, llist(sex=d$sex, country=d$country), g)\n+ })\n+\n+# An example where each subject has one record per diagnosis but sex of\n+# subject is duplicated for all the rows a subject has. Get the cross-\n+# classified frequencies of diagnosis (dx) by sex and plot the results\n+# with a dot plot\n+\n+count <- rep(1,length(dx))\n+d <- summarize(count, llist(dx,sex), sum)\n+Dotplot(dx ~ count | sex, data=d)\n+}\n+d <- list(x=1:10, a=factor(rep(c('a','b'),5)),\n+ b=structure(letters[1:10], label='label for a'))\n+x <- asNumericMatrix(d)\n+attr(x, 'origAttributes')\n+matrix2dataFrame(x)\n+\n+detach('dfr')\n+\n+# Run summarize on a matrix to get column means\n+x <- c(1:19,NA)\n+y <- 101:120\n+z <- cbind(x, y)\n+g <- c(rep(1, 10), rep(2, 10))\n+summarize(z, g, colMeans, na.rm=TRUE, stat.name='x')\n+# Also works on an all numeric data frame\n+summarize(as.data.frame(z), g, colMeans, na.rm=TRUE, stat.name='x')\n+}\n+\\keyword{category}\n+\\keyword{manip}\n+\\keyword{multivariate}\n+\\concept{grouping}\n+\\concept{stratification}\n+\\concept{aggregation}\n+\\concept{cross-classification}\n+\n" |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/summary.formula.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/summary.formula.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,937 @@\n+\\name{summary.formula}\n+\\alias{summary.formula}\n+\\alias{stratify}\n+\\alias{print.summary.formula.response}\n+\\alias{plot.summary.formula.response}\n+\\alias{latex.summary.formula.response}\n+\\alias{print.summary.formula.reverse}\n+\\alias{plot.summary.formula.reverse}\n+\\alias{latex.summary.formula.reverse}\n+\\alias{[.summary.formula.response}\n+\\alias{print.summary.formula.cross}\n+\\alias{latex.summary.formula.cross}\n+\\alias{formula.summary.formula.cross}\n+\\alias{na.retain}\n+\\alias{cumcategory}\n+\\alias{conTestkw}\n+\\alias{catTestchisq}\n+\\alias{ordTestpo}\n+\\title{Summarize Data for Making Tables and Plots}\n+\\description{\n+ \\code{summary.formula} summarizes the variables listed in an S formula,\n+ computing descriptive statistics (including ones in a\n+ user-specified function). The summary statistics may be passed to\n+ \\code{print} methods, \\code{plot} methods for making annotated dot charts, and\n+ \\code{latex} methods for typesetting tables using LaTeX. \n+ \\code{summary.formula} has three methods for computing descriptive\n+ statistics on univariate or multivariate responses, subsetted by\n+ categories of other variables. The method of summarization is\n+ specified in the parameter \\code{method} (see details below). For the\n+ \\code{response} and \\code{cross} methods, the statistics used to\n+ summarize the data \n+ may be specified in a very flexible way (e.g., the geometric mean,\n+ 33rd percentile, Kaplan-Meier 2-year survival estimate, mixtures of\n+ several statistics). The default summary statistic for these methods\n+ is the mean (the proportion of positive responses for a binary\n+ response variable). The \\code{cross} method is useful for creating data\n+ frames which contain summary statistics that are passed to \\code{trellis}\n+ as raw data (to make multi-panel dot charts, for example). The\n+ \\code{print} methods use the \\code{print.char.matrix} function to print boxed\n+ tables.\n+\n+ The right hand side of \\code{formula} may contain \\code{mChoice}\n+ (\\dQuote{multiple choice}) variables. When \\code{test=TRUE} each choice is\n+ tested separately as a binary categorical response.\n+\n+ The \\code{plot} method for \\code{method="reverse"} creates a temporary\n+ function \\code{Key} in frame 0 as is done by the \\code{xYplot} and\n+ \\code{Ecdf.formula} functions. After \\code{plot} runs, you can type\n+ \\code{Key()} to put a legend in a default location, or\n+ e.g. \\code{Key(locator(1))} to draw a legend where you click the left\n+ mouse button. This key is for categorical variables, so to have the\n+ opportunity to put the key on the graph you will probably want to use\n+ the command \\code{plot(object, which="categorical")}. A second function\n+ \\code{Key2} is created if continuous variables are being plotted. It is\n+ used the same as \\code{Key}. If the \\code{which} argument is not\n+ specified to \\code{plot}, two pages of plots will be produced. If you\n+ don\'t define \\code{par(mfrow=)} yourself,\n+ \\code{plot.summary.formula.reverse} will try to lay out a multi-panel\n+ graph to best fit all the individual dot charts for continuous\n+ variables.\n+\n+ There is a subscripting method for objects created with\n+ \\code{method="response"}. \n+ This can be used to print or plot selected variables or summary statistics\n+ where there would otherwise be too many on one page.\n+\n+ \\code{cumcategory} is a utility function useful when summarizing an ordinal\n+ response variable. It converts such a variable having \\code{k} levels to a\n+ matrix with \\code{k-1} columns, where column \\code{i} is a vector of zeros and\n+ ones indicating that the categorical response is in level \\code{i+1} or\n+ greater. When the left hand side of \\code{formula} is \\code{cumcategory(y)},\n+ the default \\code{fun} will summarize it by computing all of the relevant\n+ cumulative proportions.\n+\n+ Functions \\code{conTestkw}, \\code{catTestchisq}, \\code{ordTestpo} are\n+ the default statistical test functions for \\code{summary.form'..b'd the median. Make separate tables for the two randomized\n+#groups and make plots for the active arm.\n+\n+\n+g <- function(y) c(Mean=mean(y), Median=median(y))\n+\n+\n+for(sub in c("D-penicillamine", "placebo")) {\n+ ss <- summary(bili ~ age.groups + ascites + chol, fun=g,\n+ subset=drug==sub)\n+ cat(\'\\n\',sub,\'\\n\\n\')\n+ print(ss)\n+\n+\n+ if(sub==\'D-penicillamine\') {\n+ par(mfrow=c(1,1))\n+ plot(s4, which=1:2, dotfont=c(1,-1), subtitles=FALSE, main=\'\')\n+ #1=mean, 2=median -1 font = open circle\n+ title(sub=\'Closed circle: mean; Open circle: median\', adj=0)\n+ title(sub=sub, adj=1)\n+ }\n+\n+\n+ w <- latex(ss, append=TRUE, fi=\'my.tex\', \n+ label=if(sub==\'placebo\') \'s4b\' else \'s4a\',\n+ caption=paste(label(bili),\' {\\\\\\\\em (\',sub,\')}\', sep=\'\'))\n+ #Note symbolic labels for tables for two subsets: s4a, s4b\n+ prlatex(w)\n+}\n+\n+\n+#Now consider examples in \'reverse\' format, where the lone dependent\n+#variable tells the summary function how to stratify all the \n+#\'independent\' variables. This is typically used to make tables \n+#comparing baseline variables by treatment group, for example.\n+\n+\n+s5 <- summary(drug ~ bili + albumin + stage + protime + sex + \n+ age + spiders,\n+ method=\'reverse\')\n+#To summarize all variables, use summary(drug ~., data=pbc)\n+#To summarize all variables with no stratification, use\n+#summary(~a+b+c) or summary(~.,data=\\dots)\n+\n+\n+options(digits=1)\n+print(s5, npct=\'both\')\n+#npct=\'both\' : print both numerators and denominators\n+plot(s5, which=\'categorical\')\n+Key(locator(1)) # draw legend at mouse click\n+par(oma=c(3,0,0,0)) # leave outer margin at bottom\n+plot(s5, which=\'continuous\')\n+Key2() # draw legend at lower left corner of plot\n+ # oma= above makes this default key fit the page better\n+\n+\n+options(digits=3)\n+w <- latex(s5, npct=\'both\', here=TRUE) \n+# creates s5.tex\n+\n+\n+#Turn to a different dataset and do cross-classifications on possibly \n+#more than one independent variable. The summary function with \n+#method=\'cross\' produces a data frame containing the cross-\n+#classifications. This data frame is suitable for multi-panel \n+#trellis displays, although `summarize\' works better for that.\n+\n+\n+attach(prostate)\n+size.quartile <- cut2(sz, g=4)\n+bone <- factor(bm,labels=c("no mets","bone mets"))\n+\n+\n+s7 <- summary(ap>1 ~ size.quartile + bone, method=\'cross\')\n+#In this case, quartiles are the default so could have said sz + bone\n+\n+\n+options(digits=3)\n+print(s7, twoway=FALSE)\n+s7 # same as print(s7)\n+w <- latex(s7, here=TRUE) # Make s7.tex\n+\n+\n+library(trellis,TRUE)\n+invisible(ps.options(reset=TRUE))\n+trellis.device(postscript, file=\'demo2.ps\')\n+\n+\n+dotplot(S ~ size.quartile|bone, data=s7, #s7 is name of summary stats\n+ xlab="Fraction ap>1", ylab="Quartile of Tumor Size")\n+#Can do this more quickly with summarize:\n+# s7 <- summarize(ap>1, llist(size=cut2(sz, g=4), bone), mean,\n+# stat.name=\'Proportion\')\n+# dotplot(Proportion ~ size | bone, data=s7)\n+\n+\n+summary(age ~ stage, method=\'cross\')\n+summary(age ~ stage, fun=quantile, method=\'cross\')\n+summary(age ~ stage, fun=smean.sd, method=\'cross\')\n+summary(age ~ stage, fun=smedian.hilow, method=\'cross\')\n+summary(age ~ stage, fun=function(x) c(Mean=mean(x), Median=median(x)),\n+ method=\'cross\')\n+#The next statements print real two-way tables\n+summary(cbind(age,ap) ~ stage + bone, \n+ fun=function(y) apply(y, 2, quantile, c(.25,.75)),\n+ method=\'cross\')\n+options(digits=2)\n+summary(log(ap) ~ sz + bone,\n+ fun=function(y) c(Mean=mean(y), quantile(y)),\n+ method=\'cross\')\n+\n+\n+#Summarize an ordered categorical response by all of the needed\n+#cumulative proportions\n+summary(cumcategory(disease.severity) ~ age + sex)\n+\n+}\n+}\n+\\keyword{category}\n+\\keyword{interface}\n+\\keyword{hplot}\n+\\keyword{manip}\n+\\concept{grouping}\n+\\concept{stratification}\n+\\concept{aggregation}\n+\\concept{cross-classification}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/summaryM.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/summaryM.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,469 @@\n+\\name{summaryM}\n+\\alias{summaryM}\n+\\alias{print.summaryM}\n+\\alias{plot.summaryM}\n+\\alias{latex.summaryM}\n+\\title{Summarize Mixed Data Types vs. Groups}\n+\\description{\n+ \\code{summaryM} summarizes the variables listed in an S formula,\n+ computing descriptive statistics and optionally statistical tests for\n+ group differences. This function is typically used when there are\n+ multiple left-hand-side variables that are independently against by\n+ groups marked by a single right-hand-side variable. The summary\n+ statistics may be passed to \\code{print} methods, \\code{plot} methods\n+ for making annotated dot charts and extended box plots, and \n+ \\code{latex} methods for typesetting tables using LaTeX. \n+ The \\code{print} methods use the \\code{print.char.matrix} function to\n+ print boxed tables.\n+\n+ Continuous variables are described by three quantiles (quartiles by\n+ default) when printing, or by the following quantiles when plotting\n+ expended box plots using the \\code{\\link{bpplt}} function:\n+ 0.05, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 0.95. The box\n+ plots are scaled to the 0.025 and 0.975 quantiles of each continuous\n+ left-hand-side variable. Categorical variables are \n+ described by counts and percentages.\n+ \n+ The left hand side of \\code{formula} may contain \\code{mChoice}\n+ ("multiple choice") variables. When \\code{test=TRUE} each choice is\n+ tested separately as a binary categorical response.\n+\n+ The \\code{plot} method for \\code{method="reverse"} creates a temporary\n+ function \\code{Key} as is done by the \\code{xYplot} and\n+ \\code{Ecdf.formula} functions. After \\code{plot}\n+ runs, you can type \\code{Key()} to put a legend in a default location, or\n+ e.g. \\code{Key(locator(1))} to draw a legend where you click the left\n+ mouse button. This key is for categorical variables, so to have the\n+ opportunity to put the key on the graph you will probably want to use\n+ the command \\code{plot(object, which="categorical")}. A second function\n+ \\code{Key2} is created if continuous variables are being plotted. It is\n+ used the same as \\code{Key}. If the \\code{which} argument is not\n+ specified to \\code{plot}, two pages of plots will be produced. If you\n+ don\'t define \\code{par(mfrow=)} yourself,\n+ \\code{plot.summaryM} will try to lay out a multi-panel\n+ graph to best fit all the individual charts for continuous\n+ variables.\n+}\n+\\usage{\n+summaryM(formula, groups=NULL, data=NULL, subset, na.action=na.retain,\n+ overall=FALSE, continuous=10, na.include=FALSE,\n+ quant=c(0.025, 0.05, 0.125, 0.25, 0.375, 0.5, 0.625,\n+ 0.75, 0.875, 0.95, 0.975),\n+ nmin=100, test=FALSE,\n+ conTest=conTestkw, catTest=catTestchisq,\n+ ordTest=ordTestpo)\n+\n+\\method{print}{summaryM}(x, digits, prn = any(n != N),\n+ what=c(\'proportion\', \'\\%\'), pctdig = if(what == \'\\%\') 0 else 2,\n+ npct = c(\'numerator\', \'both\', \'denominator\', \'none\'),\n+ exclude1 = TRUE, vnames = c(\'labels\', \'names\'), prUnits = TRUE,\n+ sep = \'/\', abbreviate.dimnames = FALSE,\n+ prefix.width = max(nchar(lab)), min.colwidth, formatArgs=NULL, round=NULL,\n+ prtest = c(\'P\',\'stat\',\'df\',\'name\'), prmsd = FALSE, long = FALSE,\n+ pdig = 3, eps = 0.001, \\dots)\n+\n+\\method{plot}{summaryM}(x, vnames = c(\'labels\', \'names\'),\n+ what = c(\'proportion\', \'\\%\'),\n+ which = c(\'both\', \'categorical\', \'continuous\'),\n+ xlim = if(what == \'proportion\') c(0,1)\n+ else c(0,100), \n+ xlab = if(what==\'proportion\') \'Proportion\'\n+ else \'Percentage\', \n+ pch = c(16, 1, 2, 17, 15, 3, 4, 5, 0), exclude1 = TRUE,\n+ main, subtitles = TRUE,\n+ prtest = c(\'P\', \'stat\', \'df\', \'name\'), pdig = 3, eps = 0.001,\n+ conType = c(\'bp\', \'dot\', \'raw\'), cex.means = 0.5, cex=par(\'cex\'), \\dots)\n+\n+\\method{latex}{summaryM}(object, title =\n+ first.word(deparse(substitute(object))),\n+ file=paste(title, \'tex\', sep=\'.\'), append=FALSE, digits, \n+ '..b'x} suffix. Default\n+ is the name of the \\code{summary} object. If \\code{caption} is specied,\n+ \\code{title} is also used for the table\'s symbolic reference label. \n+ }\n+\t\\item{file}{name of file to write LaTeX code to. Specifying\n+ \\code{file=""} will cause LaTeX code to just be printed to\n+ standard output rather than be stored in a permanent file.\n+\t\t}\n+ \\item{append}{specify \\code{TRUE} to add code to an existing file}\n+ \\item{rowlabel}{\n+ see \\code{latex.default} (under the help file \\code{latex})\n+ }\n+ \\item{middle.bold}{\n+ set to \\code{TRUE} to have LaTeX use bold face for the middle\n+ quantile\n+ }\n+ \\item{outer.size}{the font size for outer quantiles\n+ }\n+ \\item{insert.bottom}{\n+ set to \\code{FALSE} to suppress inclusion of definitions placed at the\n+ bottom of LaTeX tables. You can also specify a character string\n+ containing other text that overrides the automatic text.\n+ }\n+ \\item{dcolumn}{see \\code{latex}}\n+ }\n+\\value{\n+ a list. \\code{plot.summaryM} returns the number\n+ of pages of plots that were made.\n+}\n+\\section{Side Effects}{\n+ \\code{plot.summaryM} creates a function \\code{Key} and\n+ \\code{Key2} in frame 0 that will draw legends.\n+}\n+\\author{\n+ Frank Harrell\n+ \\cr\n+ Department of Biostatistics\n+ \\cr\n+ Vanderbilt University\n+ \\cr\n+ \\email{f.harrell@vanderbilt.edu}\n+}\n+\\references{\n+ Harrell FE (2004): Statistical tables and plots using S and LaTeX.\n+ Document available from \\url{http://biostat.mc.vanderbilt.edu/twiki/pub/Main/StatReport/summary.pdf}.\n+}\n+\\seealso{\n+ \\code{\\link{mChoice}}, \\code{\\link{label}}, \\code{\\link{dotchart3}},\n+ \\code{\\link{print.char.matrix}}, \\code{\\link{update}},\n+ \\code{\\link{formula}}, \n+ \\code{\\link{format.default}}, \\code{\\link{latex}},\n+ \\code{\\link{latexTranslate}}, \\code{\\link{bpplt}},\n+\t\\code{\\link{tabulr}}, \\code{\\link{bpplotM}}, \\code{\\link{summaryP}}\n+}\n+\\examples{\n+options(digits=3)\n+set.seed(173)\n+sex <- factor(sample(c("m","f"), 500, rep=TRUE))\n+country <- factor(sample(c(\'US\', \'Canada\'), 500, rep=TRUE))\n+age <- rnorm(500, 50, 5)\n+sbp <- rnorm(500, 120, 12)\n+label(sbp) <- \'Systolic BP\'\n+units(sbp) <- \'mmHg\'\n+treatment <- factor(sample(c("Drug","Placebo"), 500, rep=TRUE))\n+treatment[1]\n+sbp[1] <- NA\n+\n+# Generate a 3-choice variable; each of 3 variables has 5 possible levels\n+symp <- c(\'Headache\',\'Stomach Ache\',\'Hangnail\',\n+ \'Muscle Ache\',\'Depressed\')\n+symptom1 <- sample(symp, 500,TRUE)\n+symptom2 <- sample(symp, 500,TRUE)\n+symptom3 <- sample(symp, 500,TRUE)\n+Symptoms <- mChoice(symptom1, symptom2, symptom3, label=\'Primary Symptoms\')\n+table(as.character(Symptoms))\n+\n+# Note: In this example, some subjects have the same symptom checked\n+# multiple times; in practice these redundant selections would be NAs\n+# mChoice will ignore these redundant selections\n+\n+f <- summaryM(age + sex + sbp + Symptoms ~ treatment, test=TRUE)\n+f\n+# trio of numbers represent 25th, 50th, 75th percentile\n+print(f, long=TRUE)\n+plot(f)\n+plot(f, conType=\'dot\', prtest=\'P\')\n+bpplt() # annotated example showing layout of bp plot\n+\n+# Produce separate tables by country\n+f <- summaryM(age + sex + sbp + Symptoms ~ treatment + country,\n+ groups=\'treatment\', test=TRUE)\n+f\n+\n+\\dontrun{\n+getHdata(pbc)\n+s5 <- summaryM(bili + albumin + stage + protime + sex + \n+ age + spiders ~ drug, data=pbc)\n+\n+print(s5, npct=\'both\')\n+# npct=\'both\' : print both numerators and denominators\n+plot(s5, which=\'categorical\')\n+Key(locator(1)) # draw legend at mouse click\n+par(oma=c(3,0,0,0)) # leave outer margin at bottom\n+plot(s5, which=\'continuous\') # see also bpplotM\n+Key2() # draw legend at lower left corner of plot\n+ # oma= above makes this default key fit the page better\n+\n+options(digits=3)\n+w <- latex(s5, npct=\'both\', here=TRUE, file=\'\') \n+}\n+}\n+\\keyword{category}\n+\\keyword{interface}\n+\\keyword{hplot}\n+\\keyword{manip}\n+\\concept{grouping}\n+\\concept{stratification}\n+\\concept{aggregation}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/summaryP.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/summaryP.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,182 @@\n+\\name{summaryP}\n+\\alias{summaryP}\n+\\alias{plot.summaryP}\n+\\alias{latex.summaryP}\n+\\title{Multi-way Summary of Proportions}\n+\\description{\n+\t\\code{summaryP} produces a tall and thin data frame containing\n+\tnumerators (\\code{freq}) and denominators (\\code{denom}) after\n+\tstratifying the data by a series of variables. A special capability\n+\tto group a series of related yes/no variables is included through the\n+\tuse of the \\code{\\link{ynbind}} function, for which the user specials a final\n+\targument \\code{label} used to label the panel created for that group\n+\tof related variables.\n+\t\n+\tThe \\code{plot} method for \\code{summaryP}\tdisplays proportions as a\n+\tmulti-panel dot chart using the \\code{lattice} package\'s \\code{dotplot}\n+\tfunction with a special \\code{panel} function. Numerators and\n+\tdenominators of proportions are also included as text, in the same\n+\tcolors as used by an optional \\code{groups} variable. The\n+\t\\code{formula} argument used in the \\code{dotplot} call is constructed,\n+\tbut the user can easily reorder the variables by specifying\n+\t\\code{formula}, with elements named \\code{val} (category levels),\n+\t\\code{var} (classification variable name), \\code{freq} (calculated\n+\tresult) plus the overall cross-classification variables excluding\n+\t\\code{groups}.\n+\n+\tThe \\code{latex} method produces one or more LaTeX \\code{tabular}s\n+\tcontaining a table representation of the result, with optional\n+\tside-by-side display if \\code{groups} is specified. Multiple\n+\t\\code{tabular}s result from the presence of non-group stratification\n+\tfactors.\n+}\n+\\usage{\n+summaryP(formula, data = NULL, subset = NULL,\n+ na.action = na.retain, exclude1=TRUE, sort=TRUE,\n+ asna = c("unknown", "unspecified"), \\dots)\n+\\method{plot}{summaryP}(x, formula, groups=NULL, xlim = c(-.05, 1.05),\n+ text.at=NULL, cex.values = 0.5,\n+ key = list(columns = length(groupslevels), x = 0.75,\n+ y = -0.04, cex = 0.9,\n+ col = trellis.par.get(\'superpose.symbol\')$col,\n+ corner=c(0,1)),\n+ outerlabels=TRUE, autoarrange=TRUE, \\dots)\n+\\method{latex}{summaryP}(object, groups=NULL, file=\'\', round=3,\n+ size=NULL, append=TRUE, \\dots)\n+}\n+\\arguments{\n+ \\item{formula}{a formula with the variables for whose levels\n+\t\tproportions are computed on the left hand side, and major\n+\t\tclassification variables on the right. The formula need to include\n+\t\tany variable later used as \\code{groups}, as the data summarization\n+\t\tdoes not distinguish between superpositioning and paneling. For the\n+\tplot method, \\code{formula} can provide an overall to the default\n+\tformula for \\code{dotplot()}.}\n+ \\item{data}{an optional data frame}\n+ \\item{subset}{an optional subsetting expression or vector}\n+ \\item{na.action}{function specifying how to handle \\code{NA}s. The\n+\t\tdefault is to keep all \\code{NA}s in the analysis frame.}\n+ \\item{exclude1}{By default, \\code{summaryP} removes redundant entries\n+\t\tfrom tables for variables with only two levels. For example, if you\n+\t\tprint the proportion of females, you don\'t need to print the\n+\t\tproportion of males. To override this, set \\code{exclude1=FALSE}.}\n+ \\item{sort}{set to \\code{FALSE} to not sort category levels in\n+\t\tdescending order of global proportions}\n+ \\item{asna}{character vector specifying level names to consider the\n+\t\tsame as \\code{NA}. Set \\code{asna=NULL} to not consider any.}\n+\t\\item{x}{an object produced by \\code{summaryP}}\n+ \\item{groups}{a character string containing the name of a\n+ \tsuperpositioning variable for obtaining \n+\t\tfurther stratification within a horizontal line in the dot chart.}\n+ \\item{xlim}{\\code{x}-axis limits. Default is \\code{c(0,1)}.}\n+\t\\item{text.at}{specify to leave unused space to the right of each\n+\tpanel to prevent numerators and denominators from touching data\n+\tpoints. \\code{text.at} is the upper limit for scaling panels\'\n+\t\\code{x}-axes but tick marks a'..b'SE} to prevent usage of \\code{useOuterStrips}.}\n+\t\\item{autoarrange}{If \\code{TRUE}, the formula is re-arranged so that\n+ \t if there are two conditioning (paneling) variables, the variable with\n+\t the most levels is taken as the vertical condition.}\n+ \\item{\\dots}{ignored}\n+ \\item{object}{an object produced by \\code{summaryP}}\n+ \\item{file}{file name, defaults to writing to console}\n+ \\item{round}{number of digits to the right of the decimal place for\n+\t proportions}\n+ \\item{size}{optional font size such as \\code{"small"}}\n+ \\item{append}{set to \\code{FALSE} to start output over}\n+}\n+\\value{\\code{summaryP} produces a data frame of class\n+\t\\code{"summaryP"}. The \\code{plot} method produces a \\code{lattice}\n+\tobject of class \\code{"trellis"}. The \\code{latex} method produces an\n+\tobject of class \\code{"latex"} with an additional attribute\n+\t\\code{ngrouplevels} specifying the number of levels of any\n+\t\\code{groups} variable.\n+\t}\n+\\author{Frank Harrell\n+ \\cr\n+ Department of Biostatistics\n+ \\cr\n+ Vanderbilt University\n+ \\cr\n+ \\email{f.harrell@vanderbilt.edu}}\n+\\seealso{\\code{\\link{bpplotM}}, \\code{\\link{summaryM}},\n+\t\\code{\\link{ynbind}}, \\code{\\link{pBlock}}}\n+\\examples{\n+n <- 100\n+f <- function(na=FALSE) {\n+ x <- sample(c(\'N\', \'Y\'), n, TRUE)\n+ if(na) x[runif(100) < .1] <- NA\n+ x\n+}\n+set.seed(1)\n+d <- data.frame(x1=f(), x2=f(), x3=f(), x4=f(), x5=f(), x6=f(), x7=f(TRUE),\n+ age=rnorm(n, 50, 10),\n+ race=sample(c(\'Asian\', \'Black/AA\', \'White\'), n, TRUE),\n+ sex=sample(c(\'Female\', \'Male\'), n, TRUE),\n+ treat=sample(c(\'A\', \'B\'), n, TRUE),\n+ region=sample(c(\'North America\',\'Europe\'), n, TRUE))\n+d <- upData(d, labels=c(x1=\'MI\', x2=\'Stroke\', x3=\'AKI\', x4=\'Migraines\',\n+ x5=\'Pregnant\', x6=\'Other event\', x7=\'MD withdrawal\',\n+ race=\'Race\', sex=\'Sex\'))\n+dasna <- subset(d, region==\'North America\')\n+with(dasna, table(race, treat))\n+s <- summaryP(race + sex + ynbind(x1, x2, x3, x4, x5, x6, x7, label=\'Exclusions\') ~\n+ region + treat, data=d)\n+# add exclude1=FALSE to include female category\n+plot(s, groups=\'treat\')\n+\n+plot(s, val ~ freq | region * var, groups=\'treat\', outerlabels=FALSE)\n+# Much better looking if omit outerlabels=FALSE; see output at\n+# http://biostat.mc.vanderbilt.edu/HmiscNew#summaryP\n+# See more examples under bpplotM\n+\n+# Make a chart where there is a block of variables that\n+# are only analyzed for males. Keep redundant sex in block for demo.\n+# Leave extra space for numerators, denominators\n+sb <- summaryP(race + sex +\n+ pBlock(race, sex, label=\'Race: Males\', subset=sex==\'Male\') ~\n+ region, data=d)\n+plot(sb, text.at=1.3)\n+plot(sb, groups=\'region\', layout=c(1,3), key=list(space=\'top\'),\n+ text.at=1.15)\n+\\dontrun{\n+plot(s, groups=\'treat\')\n+# plot(s, groups=\'treat\', outerlabels=FALSE) for standard lattice output\n+plot(s, groups=\'region\', key=list(columns=2, space=\'bottom\'))\n+\n+plot(summaryP(race + sex ~ region, data=d, exclude1=FALSE), col=\'green\')\n+\n+# Make your own plot using data frame created by summaryP\n+useOuterStrips(dotplot(val ~ freq | region * var, groups=treat, data=s,\n+ xlim=c(0,1), scales=list(y=\'free\', rot=0), xlab=\'Fraction\',\n+ panel=function(x, y, subscripts, ...) {\n+ denom <- s$denom[subscripts]\n+ x <- x / denom\n+ panel.dotplot(x=x, y=y, subscripts=subscripts, ...) }))\n+\n+# Show marginal summary for all regions combined\n+s <- summaryP(race + sex ~ region, data=addMarginal(d, region))\n+plot(s, groups=\'region\', key=list(space=\'top\'), layout=c(1,2))\n+\n+# Show marginal summaries for both race and sex\n+s <- summaryP(ynbind(x1, x2, x3, x4, label=\'Exclusions\', sort=FALSE) ~\n+ race + sex, data=addMarginal(d, race, sex))\n+plot(s, val ~ freq | sex*race)\n+}\n+}\n+\\keyword{hplot}\n+\\keyword{category}\n+\\keyword{manip}\n+\\concept{grouping}\n+\\concept{stratification}\n+\\concept{aggregation}\n+\\concept{cross-classification}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/summaryRc.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/summaryRc.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,132 @@ +\name{summaryRc} +\alias{summaryRc} +\title{Graphical Summarization of Continuous Variables Against a Response} +\description{ + \code{summaryRc} is a continuous version of \code{\link{summary.formula}} + with \code{method='response'}. It uses the \code{\link{plsmo}} + function to compute the possibly stratified \code{\link{lowess}} + nonparametric regression estimates, and plots them along with the data + density, with selected quantiles of the overall distribution (over + strata) of each \code{x} shown as arrows on top of the graph. All the + \code{x} variables must be numeric and continuous or nearly continuous. +} +\usage{ +summaryRc(formula, data=NULL, subset=NULL, + na.action=NULL, fun = function(x) x, + na.rm = TRUE, ylab=NULL, ylim=NULL, xlim=NULL, + nloc=NULL, datadensity=NULL, + quant = c(0.05, 0.1, 0.25, 0.5, 0.75, + 0.90, 0.95), quantloc=c('top','bottom'), + cex.quant=.6, srt.quant=0, + bpplot = c('none', 'top', 'top outside', 'top inside', 'bottom'), + height.bpplot=0.08, + trim=NULL, test = FALSE, vnames = c('labels', 'names'), \dots) +} +\arguments{ + \item{formula}{ + An \R formula with additive effects. The \code{formula} may contain + one or more invocations of the \code{stratify} function whose + arguments are defined below. This causes + the entire analysis to be stratified by cross-classifications of the + combined list of stratification factors. This stratification will be + reflected as separate \code{lowess} curves.} + \item{data}{ + name or number of a data frame. Default is the current frame. + } + \item{subset}{ + a logical vector or integer vector of subscripts used to specify the + subset of data to use in the analysis. The default is to use all + observations in the data frame. + } + \item{na.action}{ + function for handling missing data in the input data. The default is + a function defined here called \code{na.retain}, which keeps all + observations for processing, with missing variables or not. + } + \item{fun}{ + function for transforming \code{lowess} estimates. Default is the + identity function.} + \item{na.rm}{ + \code{TRUE} (the default) to exclude \code{NA}s before passing data to + \code{fun} to compute statistics, \code{FALSE} otherwise. + } + \item{ylab}{\code{y}-axis label. Default is label attribute of + \code{y} variable, or its name.} + \item{ylim}{\code{y}-axis limits. By default each graph is scaled on + its own.} + \item{xlim}{a list with elements named as the variable names appearing + on the \code{x}-axis, with each element being a 2-vector specifying + lower and upper limits. Any variable not appearing in the list will + have its limits computed and possibly \code{trim}med.} + \item{nloc}{location for sample size. Specify \code{nloc=FALSE} to + suppress, or \code{nloc=list(x=,y=)} where \code{x,y} are relative + coordinates in the data window. Default position is in the largest + empty space.} + \item{datadensity}{see \code{\link{plsmo}}. Defaults to \code{TRUE} + if there is a \code{stratify} variable, \code{FALSE} otherwise.} + \item{quant}{ + vector of quantiles to use for summarizing the marginal distribution + of each \code{x}. This must be numbers between 0 and 1 + inclusive. Use \code{NULL} to omit quantiles. + } + \item{quantloc}{specify \code{quantloc='bottom'} to place at the + bottom of each plot rather than the default} + \item{cex.quant}{character size for writing which quantiles are + represented. Set to \code{0} to suppress quantile labels.} + \item{srt.quant}{angle for text for quantile labels} + \item{bpplot}{if not \code{'none'} will draw extended box plot at + location given by \code{bpplot}, and quantiles discussed above will + be suppressed. Specifying \code{bpplot='top'} is the same as + specifying \code{bpplot='top inside'}.} + \item{height.bpplot}{height in inches of the horizontal extended box plot} + \item{trim}{The default is to plot from the 10th smallest to the 10th + largest \code{x} if the number of non-NAs exceeds 200, otherwise to + use the entire range of \code{x}. Specify another quantile to use + other limits, e.g., \code{trim=0.01} will use the first and last + percentiles} + \item{test}{ + Set to \code{TRUE} to plot test statistics (not yet implemented). + } + \item{vnames}{ + By default, plots are usually labeled with variable labels + (see the \code{label} and \code{sas.get} functions). To use the shorter + variable names, specify \code{vnames="names"}. + } + \item{...}{arguments passed to \code{\link{plsmo}}} +} +\value{no value is returned} +\author{ + Frank Harrell\cr + Department of Biostatistics\cr + Vanderbilt University\cr + \email{f.harrell@vanderbilt.edu} +} +\seealso{ + \code{\link{plsmo}}, \code{\link{stratify}}, + \code{\link{label}}, \code{\link{formula}}, \code{\link{panel.bpplot}} +} +\examples{ +options(digits=3) +set.seed(177) +sex <- factor(sample(c("m","f"), 500, rep=TRUE)) +age <- rnorm(500, 50, 5) +bp <- rnorm(500, 120, 7) +units(age) <- 'Years'; units(bp) <- 'mmHg' +label(bp) <- 'Systolic Blood Pressure' +L <- .5*(sex == 'm') + 0.1 * (age - 50) +y <- rbinom(500, 1, plogis(L)) +par(mfrow=c(1,2)) +summaryRc(y ~ age + bp) +# For x limits use 1st and 99th percentiles to frame extended box plots +summaryRc(y ~ age + bp, bpplot='top', datadensity=FALSE, trim=.01) +summaryRc(y ~ age + bp + stratify(sex), + label.curves=list(keys='lines'), nloc=list(x=.1, y=.05)) +y2 <- rbinom(500, 1, plogis(L + .5)) +Y <- cbind(y, y2) +summaryRc(Y ~ age + bp + stratify(sex), + label.curves=list(keys='lines'), nloc=list(x=.1, y=.05)) +} +\keyword{hplot} + + + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/summaryS.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/summaryS.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,246 @@\n+\\name{summaryS}\n+\\alias{summaryS}\n+\\alias{plot.summaryS}\n+\\alias{mbarclPanel}\n+\\alias{medvPanel}\n+\\title{Summarize Multiple Response Variables and Make Multipanel Scatter\n+\tor Dot Plot}\n+\\description{\n+Multiple left-hand formula variables along with right-hand side\n+conditioning variables are reshaped into a "tall and thin" data frame if\n+\\code{fun} is not specified. The resulting raw data can be plotted with\n+the \\code{plot} method using user-specified \\code{panel} functions for\n+\\code{lattice} graphics, typically to make a scatterplot or \\code{loess}\n+smooths, or both. The \\code{Hmisc} \\code{panel.plsmo} function is handy\n+in this context. Instead, if \\code{fun} is specified, this function\n+takes individual response variables (which may be matrices, as in\n+\\code{\\link[survival]{Surv}} objects) and creates one or more summary\n+statistics that will be computed while the resulting data frame is being\n+collapsed to one row per condition. The \\code{plot} method in this case\n+plots a multi-panel dot chart using the \\code{lattice}\n+\\code{\\link[lattice]{dotplot}} function if \\code{panel} is not specified\n+to \\code{plot}. There is an option to print\n+selected statistics as text on the panels. \\code{summaryS} pays special\n+attention to \\code{Hmisc} variable annotations: \\code{label, units}.\n+When \\code{panel} is specified in addition to \\code{fun}, a special\n+\\code{x-y} plot is made that assumes that the \\code{x}-axis variable\n+(typically time) is discrete. This is used for example to plot multiple\n+quantile intervals as vertical lines next to the main point. A special\n+panel function \\code{mvarclPanel} is provided for this purpose.\n+\n+When \\code{fun} is given and \\code{panel} is omitted, and the result of\n+\\code{fun} is a vector of more than one \n+statistic, the first statistic is taken as the main one. Any columns\n+with names not in \\code{textonly} will figure into the calculation of\n+axis limits. Those in \\code{textonly} will be printed right under the\n+dot lines in the dot chart. Statistics with names in \\code{textplot}\n+will figure into limits, be plotted, and printed. \\code{pch.stats} can\n+be used to specify symbols for statistics after the first column. When\n+\\code{fun} computed three columns that are plotted, columns two and\n+three are taken as confidence limits for which horizontal "error bars"\n+are drawn. Two levels with different thicknesses are drawn if there are\n+four plotted summary statistics beyond the first.\n+\n+\\code{mbarclPanel} is used to draw multiple vertical lines around the\n+main points, such as a series of quantile intervals stratified by\n+\\code{x} and paneling variables. If \\code{mbarclPanel} finds a column\n+of an arument \\code{yother} that is named \\code{"se"}, and if there are\n+exactly two levels to a superpositioning variable, the half-height of\n+the approximate 0.95 confidence interval for the difference between two\n+point estimates is shown, positioned at the midpoint of the two point\n+estimates at an \\code{x} value. This assume normality of point\n+estimates, and the standard error of the difference is the square root\n+of the sum of squares of the two standard errors. By positioning the\n+intervals in this fashion, a failure of the two point estimates to touch\n+the half-confidence interval is consistent with rejecting the null\n+hypothesis of no difference at the 0.05 level.\n+\n+\\code{medvPanel} takes raw data and plots median \\code{y} vs. \\code{x},\n+along with confidence intervals and half-interval for the difference in\n+medians as with \\code{mbarclPanel}. Quantile intervals are optional.\n+Very transparent vertical violin plots are added by default. Unlike\n+\\code{panel.violin}, only half of the violin is plotted, and when there\n+are two superpose groups they are side-by-side in different colors.\n+}\n+\\usage{\n+summaryS(formula, fun = NULL, data = NULL, subset = NULL,\n+ na.action = na.retain, continuous=10, \\dots)\n+\n+\\method{plot}{summaryS}(x, formula=NULL, groups'..b'de{\\link{summarize}}}\n+\\examples{\n+# See tests directory file summaryS.r for more examples\n+n <- 100\n+set.seed(1)\n+d <- data.frame(sbp=rnorm(n, 120, 10),\n+ dbp=rnorm(n, 80, 10),\n+ age=rnorm(n, 50, 10),\n+ days=sample(1:n, n, TRUE),\n+ S1=Surv(2*runif(n)), S2=Surv(runif(n)),\n+ race=sample(c(\'Asian\', \'Black/AA\', \'White\'), n, TRUE),\n+ sex=sample(c(\'Female\', \'Male\'), n, TRUE),\n+ treat=sample(c(\'A\', \'B\'), n, TRUE),\n+ region=sample(c(\'North America\',\'Europe\'), n, TRUE),\n+ meda=sample(0:1, n, TRUE), medb=sample(0:1, n, TRUE))\n+\n+d <- upData(d, labels=c(sbp=\'Systolic BP\', dbp=\'Diastolic BP\',\n+ race=\'Race\', sex=\'Sex\', treat=\'Treatment\',\n+ days=\'Time Since Randomization\',\n+ S1=\'Hospitalization\', S2=\'Re-Operation\',\n+ meda=\'Medication A\', medb=\'Medication B\'),\n+ units=c(sbp=\'mmHg\', dbp=\'mmHg\', age=\'Year\', days=\'Days\'))\n+\n+s <- summaryS(age + sbp + dbp ~ days + region + treat, data=d)\n+# plot(s) # 3 pages\n+plot(s, groups=\'treat\', datadensity=TRUE,\n+ scat1d.opts=list(lwd=.5, nhistSpike=0))\n+plot(s, groups=\'treat\', panel=panel.loess, key=list(space=\'bottom\', columns=2),\n+ datadensity=TRUE, scat1d.opts=list(lwd=.5))\n+\n+# Make your own plot using data frame created by summaryP\n+# xyplot(y ~ days | yvar * region, groups=treat, data=s,\n+# scales=list(y=\'free\', rot=0))\n+\n+# Use loess to estimate the probability of two different types of events as\n+# a function of time\n+s <- summaryS(meda + medb ~ days + treat + region, data=d)\n+pan <- function(...)\n+ panel.plsmo(..., type=\'l\', label.curves=max(which.packet()) == 1,\n+ datadensity=TRUE)\n+plot(s, groups=\'treat\', panel=pan, paneldoesgroups=TRUE,\n+ scat1d.opts=list(lwd=.7), cex.strip=.8)\n+\n+# Demonstrate dot charts of summary statistics\n+s <- summaryS(age + sbp + dbp ~ region + treat, data=d, fun=mean)\n+plot(s)\n+plot(s, groups=\'treat\', funlabel=expression(bar(X)))\n+# Compute parametric confidence limits for mean, and include sample\n+# sizes by naming a column "n"\n+\n+f <- function(x) {\n+ x <- x[! is.na(x)]\n+ c(smean.cl.normal(x, na.rm=FALSE), n=length(x))\n+}\n+s <- summaryS(age + sbp + dbp ~ region + treat, data=d, fun=f)\n+plot(s, funlabel=expression(bar(X) \\%+-\\% t[0.975] \\%*\\% s))\n+plot(s, groups=\'treat\', cex.values=.65,\n+ key=list(space=\'bottom\', columns=2,\n+ text=c(\'Treatment A:\',\'Treatment B:\')))\n+\n+# For discrete time, plot Harrell-Davis quantiles of y variables across\n+# time using different line characteristics to distinguish quantiles\n+d <- upData(d, days=round(days / 30) * 30)\n+g <- function(y) {\n+ probs <- c(0.05, 0.125, 0.25, 0.375)\n+ probs <- sort(c(probs, 1 - probs))\n+ y <- y[! is.na(y)]\n+ w <- hdquantile(y, probs)\n+ m <- hdquantile(y, 0.5, se=TRUE)\n+ se <- as.numeric(attr(m, \'se\'))\n+ c(Median=as.numeric(m), w, se=se, n=length(y))\n+}\n+s <- summaryS(sbp + dbp ~ days + region, fun=g, data=d)\n+plot(s, panel=mbarclPanel)\n+plot(s, groups=\'region\', panel=mbarclPanel, paneldoesgroups=TRUE)\n+\n+# For discrete time, plot median y vs x along with CL for difference,\n+# using Harrell-Davis median estimator and its s.e., and use violin\n+# plots\n+\n+s <- summaryS(sbp + dbp ~ days + region, data=d)\n+plot(s, groups=\'region\', panel=medvPanel, paneldoesgroups=TRUE)\n+\n+# Proportions and Wilson confidence limits, plus approx. Gaussian\n+# based half/width confidence limits for difference in probabilities\n+g <- function(y) {\n+ y <- y[!is.na(y)]\n+ n <- length(y)\n+ p <- mean(y)\n+ se <- sqrt(p * (1. - p) / n)\n+ structure(c(binconf(sum(y), n), se=se, n=n),\n+ names=c(\'Proportion\', \'Lower\', \'Upper\', \'se\', \'n\'))\n+}\n+s <- summaryS(meda + medb ~ days + region, fun=g, data=d)\n+plot(s, groups=\'region\', panel=mbarclPanel, paneldoesgroups=TRUE)\n+}\n+\\keyword{category}\n+\\keyword{hplot}\n+\\keyword{manip}\n+\\keyword{grouping}\n+\\concept{stratification}\n+\\concept{aggregation}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/symbol.freq.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/symbol.freq.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,49 @@ +\name{symbol.freq} +\alias{symbol.freq} +\title{Graphic Representation of a Frequency Table} +\description{ +This function can be used to represent +contingency tables graphically. Frequency counts are represented as +the heights of "thermometers" by default; you can also specify +\code{symbol='circle'} to the function. There is an option to include +marginal frequencies, which are plotted on a halved scale so as to not +overwhelm the plot. If you do not ask for marginal frequencies to be +plotted using \code{marginals=T}, \code{symbol.freq} will ask you to click +the mouse where a reference symbol is to be drawn to assist in reading +the scale of the frequencies. + +\code{label} attributes, if present, are used for x- and y-axis labels. +Otherwise, names of calling arguments are used. +} +\usage{ +symbol.freq(x, y, symbol = c("thermometer", "circle"), + marginals = FALSE, orig.scale = FALSE, + inches = 0.25, width = 0.15, subset, srtx = 0, ...) +} +\arguments{ + \item{x}{first variable to cross-classify} + \item{y}{second variable} + \item{symbol}{specify \code{"thermometer"} (the default) or \code{"circle"}} + \item{marginals}{set to \code{TRUE} to add marginal frequencies + (scaled by half) to the plot} + \item{orig.scale}{set to \code{TRUE} when the first two arguments are + numeric variables; this uses their original values for x and y + coordinates)} + \item{inches}{see \code{\link{symbols}}} + \item{width}{see \code{thermometers} option in \code{symbols}} + \item{subset}{the usual subsetting vector} + \item{srtx}{rotation angle for x-axis labels} + \item{\dots}{other arguments to pass to \code{symbols}} +} +\author{Frank Harrell} +\seealso{\code{\link{symbols}}} +\examples{ +\dontrun{ +getHdata(titanic) +attach(titanic) +age.tertile <- cut2(titanic$age, g=3) +symbol.freq(age.tertile, pclass, marginals=T, srtx=45) +detach(2) +}} +\keyword{hplot} + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/sys.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/sys.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,37 @@ +\name{sys} +\alias{sys} +\title{ +Run Unix or Dos Depending on System +} +\description{ +Runs \code{unix} or \code{dos} depending on the current operating system. For +\R, just runs \code{system} with optional concatenation of first two +arguments which are assumed named \code{command} and \code{text}. +} +\usage{ +sys(command, text=NULL, output=TRUE) +# S-Plus: sys(\dots, minimized=FALSE) +} +\arguments{ +\item{command}{ +system command to execute +} +\item{text}{ +text to concatenate to system command, if any (typically options or file +names or both) +} +\item{output}{ + set to \code{FALSE} to not return output of command as a character + vector +} +} +\value{ +see \code{unix} or \code{dos} +} +\section{Side Effects}{ +executes system commands +} +\seealso{ +\code{\link{unix}}, \code{\link{system}} +} +\keyword{interface} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/t.test.cluster.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/t.test.cluster.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,45 @@ +\name{t.test.cluster} +\alias{t.test.cluster} +\alias{print.t.test.cluster} +\title{t-test for Clustered Data} +\description{ +Does a 2-sample t-test for clustered data. +} +\usage{ +t.test.cluster(y, cluster, group, conf.int = 0.95) +\method{print}{t.test.cluster}(x, digits, \dots) +} +\arguments{ + \item{y}{normally distributed response variable to test} + \item{cluster}{cluster identifiers, e.g. subject ID} + \item{group}{grouping variable with two values} + \item{conf.int}{confidence coefficient to use for confidence limits} + \item{x}{an object created by \code{t.test.cluster}} + \item{digits}{number of significant digits to print} + \item{\dots}{unused} +} +\value{ + a matrix of statistics of class \code{t.test.cluster} +} +\references{ + Donner A, Birkett N, Buck C, Am J Epi 114:906-914, 1981. + + Donner A, Klar N, J Clin Epi 49:435-439, 1996. + + Hsieh FY, Stat in Med 8:1195-1201, 1988. +} +\author{Frank Harrell} +\seealso{\code{\link{t.test}}} +\examples{ +set.seed(1) +y <- rnorm(800) +group <- sample(1:2, 800, TRUE) +cluster <- sample(1:40, 800, TRUE) +table(cluster,group) +t.test(y ~ group) # R only +t.test.cluster(y, cluster, group) +# Note: negate estimates of differences from t.test to +# compare with t.test.cluster +} +\keyword{htest} + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/tabulr.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/tabulr.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,165 @@ +\name{tabulr} +\alias{tabulr} +\alias{table_trio} +\alias{table_N} +\alias{table_freq} +\alias{table_pc} +\alias{table_latexdefs} +\alias{table_formatpct} +\alias{nFm} +\title{Interface to Tabular Function} +\description{ +\code{\link{tabulr}} is a front-end to the \code{tables} package's +\code{\link[tables]{tabular}} function so that the user can take +advantage of variable annotations used by the \code{Hmisc} package, +particular those created by the \code{\link{label}}, \code{\link{units}}, and +\code{\link{upData}} functions. When a variable appears in a +\code{\link[tables]{tabular}} function, the +variable \code{x} is found in the \code{data} argument or in the parent +environment, and the \code{\link{labelLatex}} function is used to create +a LaTeX label. By default any units of measurement are right justified +in the current LaTeX tabular field using \code{hfill}; use \code{nofill} +to list variables for which \code{units} are not right-justified with +\code{hfill}. Once the label is constructed, the variable name is +preceeded by \code{Heading("LaTeX label")*x} in the formula before it is +passed to \code{\link[tables]{tabular}}. \code{nolabel} can be used to +specify variables for which labels are ignored. + +\code{tabulr} also replaces \code{trio} with \code{table_trio}, \code{N} +with \code{table_N}, and \code{freq} with \code{table_freq} in the +formula. + +\code{table_trio} is a function that takes a numeric vector and computes +the three quartiles and optionally the mean and standard deviation, and +outputs a LaTeX-formatted character string representing the results. By +default, calculated statistics are formatted with 3 digits to the left +and 1 digit to the right of the decimal point. Running +\code{\link[tables]{table_options}(left=l, right=r)} will use \code{l} +and \code{r} digits instead. Other options that can be given to +\code{table_options} are \code{prmsd=TRUE} to add mean +/- standard +deviation to the result, \code{pn=TRUE} to add the sample size, +\code{bold=TRUE} to set the median in bold face, \code{showfreq='all', + 'low', 'high'} used by the \code{table_freq} function, \code{pctdec}, +specifying the number of places to the right of the decimal point for +percentages (default is zero), and +\code{npct='both','numerator','denominator','none'} used by +\code{table_formatpct} to control what appears after the percent. +Option \code{pnformat} may be specified to control the formatting for +\code{pn}. The default is \code{"(n=..)"}. Specify +\code{pnformat="non"} to suppress \code{"n="}. \code{pnwhen} specifies +when to print the number of observations. The default is +\code{"always"}. Specify \code{pnwhen="ifna"} to include \code{n} only +if there are missing values in the vector being processed. + +\code{tabulr} substitutes \code{table_N} for \code{N} in the formula. +This is used to create column headings for the number of observations, +without a row label. + +\code{table_freq} analyzes a character variable to compute, for a single +output cell, the percents, numerator, and denominator for each category, +or optimally just the maximum or minimum, as specified by +\code{table_options(showfreq)}. + +\code{table_formatpct} is a function that formats percents depending on +settings of options in \code{table_options}. + +\code{nFm} is a function that calls \code{\link{sprintf}} to format +numeric values to have a specific number of digits to the \code{left} +and to the \code{right} of the point. + +\code{table_latexdefs} writes (by default) to the console a set of LaTeX +definitions that can be invoked at any point thereafter in a \code{knitr} or +\code{sweave} document by naming the macro, preceeded by a single +slash. The \code{blfootnote} macro is called with a single LaTeX +argument which will appear as a footnote without a number. +\code{keytrio} invokes \code{blfootnote} to define the output of +\code{table_trio} if mean and SD are not included. If mean and SD are +included, use \code{keytriomsd}. +} +\usage{ +tabulr(formula, data = NULL, nolabel=NULL, nofill=NULL, \dots) +table_trio(x) +table_freq(x) +table_formatpct(num, den) +nFm(x, left, right, neg=FALSE, pad=FALSE) +table_latexdefs(file='') +} +\arguments{ + \item{formula}{a formula suitable for \code{\link[tables]{tabular}} + except for the addition of \code{.(variable name)}, + \code{.n()}, \code{trio}.} + \item{data}{a data frame or list. If omitted, the parent environment + is assumed to contain the variables.} + \item{nolabel}{a formula such as \code{~ x1 + x2} containing the list + of variables for which labels are to be ignored, forcing use of the + variable name} + \item{nofill}{a formula such as \code{~ x1 + x2} contaning the list of + variables for which units of measurement are not to be + right-justified in the field using the LaTeX \code{hfill} directive} + \item{\dots}{other arguments to \code{tabular}} + \item{x}{a numeric vector} + \item{num}{a single numerator or vector of numerators} + \item{den}{a single denominator} + \item{left, right}{number of places to the left and right of the + decimal point, respectively} + \item{neg}{set to \code{TRUE} if negative \code{x} values are allowed, + to add one more space to the left of the decimal place} + \item{pad}{set to \code{TRUE} to replace blanks with the LaTeX tilde + placeholder} + \item{file}{location of output of \code{table_latexdefs}} +} +\value{\code{tabulr} returns an object of class \code{"tabular"}} +\author{Frank Harrell} +\seealso{\code{\link[tables]{tabular}}, \code{\link{label}}, + \code{\link{latex}}, \code{\link{summaryM}}} +\examples{ +\dontrun{ +n <- 400 +set.seed(1) +d <- data.frame(country=factor(sample(c('US','Canada','Mexico'), n, TRUE)), + sex=factor(sample(c('Female','Male'), n, TRUE)), + age=rnorm(n, 50, 10), + sbp=rnorm(n, 120, 8)) +d <- upData(d, + preghx=ifelse(sex=='Female', sample(c('No','Yes'), n, TRUE), NA), + labels=c(sbp='Systolic BP', age='Age', preghx='Pregnancy History'), + units=c(sbp='mmHg', age='years')) +contents(d) +require(tables) +invisible(booktabs()) # use booktabs LaTeX style for tabular +g <- function(x) { + x <- x[!is.na(x)] + if(length(x) == 0) return('') + paste(latexNumeric(nFm(mean(x), 3, 1)), + ' \\hfill{\\smaller[2](', length(x), ')}', sep='') +} +tab <- tabulr((age + Heading('Females')*(sex == 'Female')*sbp)* + Heading()*g + (age + sbp)*Heading()*trio ~ + Heading()*country*Heading()*sex, data=d) +# Formula after interpretation by tabulr: +# (Heading('Age\\hfill {\\smaller[2] years}') * age + Heading("Females") +# * (sex == "Female") * Heading('Systolic BP {\\smaller[2] mmHg}') * sbp) +# * Heading() * g + (age + sbp) * Heading() * table_trio ~ Heading() +# * country * Heading() * sex +cat('\\begin{landscape}\n') +cat('\\begin{minipage}{\\textwidth}\n') +cat('\\keytrio\n') +latex(tab) +cat('\\end{minipage}\\end{landscape}\n') + +getHdata(pbc) +pbc <- upData(pbc, moveUnits=TRUE) +# Convert to character to prevent tabular from stratifying +for(x in c('sex', 'stage', 'spiders')) { + pbc[[x]] <- as.character(pbc[[x]]) + label(pbc[[x]]) <- paste(toupper(substring(x, 1, 1)), substring(x, 2), sep='') +} +table_options(pn=TRUE, showfreq='all') +tab <- tabulr((bili + albumin + protime + age) * + Heading()*trio + + (sex + stage + spiders)*Heading()*freq ~ drug, data=pbc) +latex(tab) +} +} +\keyword{utilities} +\keyword{interface} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/tex.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/tex.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,78 @@ +\name{tex} +\alias{tex} +\title{ + function for use in graphs that are used with the psfrag package in LaTeX +} +\description{ + \code{tex} is a little function to save typing when including TeX + commands in graphs that are used with the psfrag package in LaTeX to + typeset any LaTeX text inside a postscript graphic. \code{tex} + surrounds the input character string with \samp{\tex[options]\{\}}. + This is especially useful for getting Greek letters and math symbols + in postscript graphs. By default \code{tex} returns a string with + \code{psfrag} commands specifying that the string be centered, not + rotated, and not specially enlarged or shrunk. +} +\usage{ +tex(string, lref='c', psref='c', scale=1, srt=0) +} +\arguments{ + \item{string}{ + a character string to be processed by \code{psfrag} in LaTeX. + } + \item{lref}{ + LaTeX reference point for \code{string}. See the \code{psfrag} + documentation referenced below. Default is \code{"c"} for centered + (this is also the default for \code{psref}). + } + \item{psref}{ + PostScript reference point. + } + \item{scale}{ + scall factor, default is 1 + } + \item{srt}{ + rotation for \code{string} in degrees (default is zero) + } +} +\value{ + \code{tex} returns a modified character string. +} +\author{ + Frank Harrell\cr + Department of Biostatistics\cr + Vanderbilt University\cr + \email{f.harrell@vanderbilt.edu} +} +\references{ + Grant MC, Carlisle (1998): The PSfrag System, Version 3. Full + documentation is obtained by searching www.ctan.org for \file{pfgguide.ps}. +} +\seealso{ + \code{\link{postscript}}, \code{\link{par}}, \code{\link{ps.options}}, + \code{\link{mgp.axis.labels}}, \code{\link{pdf}}, + \code{\link[lattice]{trellis.device}}, \code{\link{setTrellis}} +} +\examples{ +\dontrun{ +pdf('test.pdf') +x <- seq(0,15,length=100) +plot(x, dchisq(x, 5), xlab=tex('$x$'), + ylab=tex('$f(x)$'), type='l') +title(tex('Density Function of the $\\chi_{5}^{2}$ Distribution')) +dev.off() +# To process this file in LaTeX do something like +#\documentclass{article} +#\usepackage[scanall]{psfrag} +#\begin{document} +#\begin{figure} +#\includegraphics{test.ps} +#\caption{This is an example} +#\end{figure} +#\end{document} +} +} +\keyword{hplot} +\keyword{device} +\concept{trellis} +\concept{lattice} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/transace.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/transace.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,575 @@\n+\\name{transace}\n+\\alias{transace}\n+\\alias{areg.boot}\n+\\alias{print.areg.boot}\n+\\alias{plot.areg.boot}\n+\\alias{predict.areg.boot}\n+\\alias{summary.areg.boot}\n+\\alias{print.summary.areg.boot}\n+\\alias{Function.areg.boot}\n+\\alias{Mean}\n+\\alias{Mean.areg.boot}\n+\\alias{Quantile}\n+\\alias{Quantile.areg.boot}\n+\\alias{monotone}\n+\\alias{smearingEst}\n+\\title{\n+ Additive Regression and Transformations using ace or avas\n+}\n+\\description{\n+ \\code{transace} is \\code{\\link[acepack]{ace}} packaged for easily automatically\n+ transforming all variables in a matrix. \\code{transace} is a fast\n+ one-iteration version of \\code{\\link{transcan}} without imputation of\n+ \\code{NA}s.\n+\n+ \\code{areg.boot} uses \\code{\\link{areg}} or\n+ \\code{\\link[acepack]{avas}} to fit additive regression models allowing\n+ all variables in the model (including the left-hand-side) to be\n+ transformed, with transformations chosen so as to optimize certain\n+ criteria. The default method uses \\code{\\link{areg}} whose goal it is\n+ to maximize \\eqn{R^2}. \\code{method="avas"} explicity tries to\n+ transform the response variable so as to stabilize the variance of the\n+ residuals. All-variables-transformed models tend to inflate \\code{R^2}\n+ and it can be difficult to get confidence limits for each\n+ transformation. \\code{areg.boot} solves both of these problems using\n+ the bootstrap. As with the \\code{\\link[rms]{validate}} function in the\n+ \\pkg{rms} library, the Efron bootstrap is used to estimate the\n+ optimism in the apparent \\eqn{R^2}, and this optimism is subtracted\n+ from the apparent \\eqn{R^2} to optain a bias-corrected \\eqn{R^2}.\n+ This is done however on the transformed response variable scale.\n+\n+ Tests with 3 predictors show that the \\code{\\link[acepack]{avas}} and\n+ \\code{\\link[acepack]{ace}} estimates are unstable unless the sample size\n+ exceeds 350. Apparent \\eqn{R^2} with low sample sizes can be very\n+ inflated, and bootstrap estimates of \\eqn{R^2} can be even more\n+ unstable in such cases, resulting in optimism-corrected \\eqn{R^2} that\n+ are much lower even than the actual \\eqn{R^2}. The situation can be\n+ improved a little by restricting predictor transformations to be\n+ monotonic. On the other hand, the \\code{areg} approach allows one to\n+ control overfitting by specifying the number of knots to use for each\n+ continuous variable in a restricted cubic spline function.\n+\n+ For \\code{method="avas"} the response transformation is restricted to\n+ be monotonic. You can specify restrictions for transformations of\n+ predictors (and linearity for the response). When the first argument\n+ is a formula, the function automatically determines which variables\n+ are categorical (i.e., \\code{factor}, \\code{category}, or character\n+ vectors). Specify linear transformations by enclosing variables by\n+ the identify function (\\code{I()}), and specify monotonicity by using\n+ \\code{monotone(\\var{variable})}. Monotonicity restrictions are not\n+ allowed with \\code{method="areg"}.\n+ \n+ The \\code{\\link{summary}} method for \\code{areg.boot} computes\n+ bootstrap estimates of standard errors of differences in predicted\n+ responses (usually on the original scale) for selected levels of each\n+ predictor against the lowest level of the predictor. The smearing\n+ estimator (see below) can be used here to estimate differences in\n+ predicted means, medians, or many other statistics. By default,\n+ quartiles are used for continuous predictors and all levels are used\n+ for categorical ones. See \\cite{Details} below. There is also a\n+ \\code{\\link{plot}} method for plotting transformation estimates,\n+ transformations for individual bootstrap re-samples, and pointwise\n+ confidence limits for transformations. Unless you already have a\n+ \\code{par(mfrow=)} in effect with more than one row or column,\n+ \\code{plot} will try to fit the plots on one page. A\n+ \\code{\\link{predict}} method computes predicted values on the ori'..b"(sample(c('cat','dog','cow'), 200,TRUE)) # also unrelated to y\n+y <- exp(x1 + rnorm(200)/3)\n+f <- areg.boot(y ~ x1 + x2 + x3, B=40)\n+f\n+plot(f)\n+# Note that the fitted transformation of y is very nearly log(y)\n+# (the appropriate one), the transformation of x1 is nearly linear,\n+# and the transformations of x2 and x3 are essentially flat \n+# (specifying monotone(x2) if method='avas' would have resulted\n+# in a smaller confidence band for x2)\n+\n+\n+summary(f)\n+\n+\n+# use summary(f, values=list(x2=c(.2,.5,.8))) for example if you\n+# want to use nice round values for judging effects\n+\n+\n+# Plot Y hat vs. Y (this doesn't work if there were NAs)\n+plot(fitted(f), y) # or: plot(predict(f,statistic='fitted'), y)\n+\n+\n+# Show fit of model by varying x1 on the x-axis and creating separate\n+# panels for x2 and x3. For x2 using only a few discrete values\n+newdat <- expand.grid(x1=seq(-2,2,length=100),x2=c(.25,.75),\n+ x3=c('cat','dog','cow'))\n+yhat <- predict(f, newdat, statistic='fitted') \n+# statistic='mean' to get estimated mean rather than simple inverse trans.\n+xYplot(yhat ~ x1 | x2, groups=x3, type='l', data=newdat)\n+\n+\n+\\dontrun{\n+# Another example, on hypothetical data\n+f <- areg.boot(response ~ I(age) + monotone(blood.pressure) + race)\n+# use I(response) to not transform the response variable\n+plot(f, conf.int=.9)\n+# Check distribution of residuals\n+plot(fitted(f), resid(f))\n+qqnorm(resid(f))\n+# Refit this model using ols so that we can draw a nomogram of it.\n+# The nomogram will show the linear predictor, median, mean.\n+# The last two are smearing estimators.\n+Function(f, type='individual') # create transformation functions\n+f.ols <- ols(.response(response) ~ age + \n+ .blood.pressure(blood.pressure) + .race(race))\n+# Note: This model is almost exactly the same as f but there\n+# will be very small differences due to interpolation of\n+# transformations\n+meanr <- Mean(f) # create function of lp computing mean response\n+medr <- Quantile(f) # default quantile is .5\n+nomogram(f.ols, fun=list(Mean=meanr,Median=medr))\n+\n+\n+# Create S functions that will do the transformations\n+# This is a table look-up with linear interpolation\n+g <- Function(f)\n+plot(blood.pressure, g$blood.pressure(blood.pressure))\n+# produces the central curve in the last plot done by plot(f)\n+}\n+\n+\n+# Another simulated example, where y has a log-normal distribution\n+# with mean x and variance 1. Untransformed y thus has median\n+# exp(x) and mean exp(x + .5sigma^2) = exp(x + .5)\n+# First generate data from the model y = exp(x + epsilon),\n+# epsilon ~ Gaussian(0, 1)\n+\n+\n+set.seed(139)\n+n <- 1000\n+x <- rnorm(n)\n+y <- exp(x + rnorm(n))\n+f <- areg.boot(y ~ x, B=20)\n+plot(f) # note log shape for y, linear for x. Good!\n+xs <- c(-2, 0, 2)\n+d <- data.frame(x=xs)\n+predict(f, d, 'fitted')\n+predict(f, d, 'median') # almost same; median residual=-.001\n+exp(xs) # population medians\n+predict(f, d, 'mean')\n+exp(xs + .5) # population means\n+\n+\n+# Show how smearingEst works\n+res <- c(-1,0,1) # define residuals\n+y <- 1:5\n+ytrans <- log(y)\n+ys <- seq(.1,15,length=50)\n+trans.approx <- list(x=log(ys), y=ys)\n+options(digits=4)\n+smearingEst(ytrans, exp, res, 'fitted') # ignores res\n+smearingEst(ytrans, trans.approx, res, 'fitted') # ignores res \n+smearingEst(ytrans, exp, res, 'median') # median res=0\n+smearingEst(ytrans, exp, res+.1, 'median') # median res=.1\n+smearingEst(ytrans, trans.approx, res, 'median')\n+smearingEst(ytrans, exp, res, 'mean')\n+mean(exp(ytrans[2] + res)) # should equal 2nd # above\n+smearingEst(ytrans, trans.approx, res, 'mean')\n+smearingEst(ytrans, trans.approx, res, mean)\n+# Last argument can be any statistical function operating\n+# on a vector that returns a single value\n+}\n+\\keyword{nonparametric}\n+\\keyword{smooth}\n+\\keyword{multivariate}\n+\\keyword{nonlinear}\n+\\keyword{regression}\n+\\concept{bootstrap}\n+\\concept{model validation}\n" |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/transcan.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/transcan.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,1054 @@\n+\\name{transcan}\n+\\alias{transcan}\n+\\alias{summary.transcan}\n+\\alias{print.transcan}\n+\\alias{plot.transcan}\n+\\alias{impute.transcan}\n+\\alias{predict.transcan}\n+\\alias{Function}\n+\\alias{Function.transcan}\n+\\alias{fit.mult.impute}\n+\\alias{vcov.default}\n+\\alias{vcov.fit.mult.impute}\n+\\alias{[.transcan}\n+\\alias{invertTabulated}\n+\\title{\n+ Transformations/Imputations using Canonical Variates\n+}\n+\\description{\n+ \\code{transcan} is a nonlinear additive transformation and imputation\n+ function, and there are several functions for using and operating on\n+ its results. \\code{transcan} automatically transforms continuous and\n+ categorical variables to have maximum correlation with the best linear\n+ combination of the other variables. There is also an option to use a\n+ substitute criterion - maximum correlation with the first principal\n+ component of the other variables. Continuous variables are expanded\n+ as restricted cubic splines and categorical variables are expanded as\n+ contrasts (e.g., dummy variables). By default, the first canonical\n+ variate is used to find optimum linear combinations of component\n+ columns. This function is similar to \\code{\\link[acepack]{ace}} except that\n+ transformations for continuous variables are fitted using restricted\n+ cubic splines, monotonicity restrictions are not allowed, and\n+ \\code{NA}s are allowed. When a variable has any \\code{NA}s,\n+ transformed scores for that variable are imputed using least squares\n+ multiple regression incorporating optimum transformations, or\n+ \\code{NA}s are optionally set to constants. Shrinkage can be used to\n+ safeguard against overfitting when imputing. Optionally, imputed\n+ values on the original scale are also computed and returned. For this\n+ purpose, recursive partitioning or multinomial logistic models can\n+ optionally be used to impute categorical variables, using what is\n+ predicted to be the most probable category.\n+ \n+ By default, \\code{transcan} imputes \\code{NA}s with \\dQuote{best\n+ guess} expected values of transformed variables, back transformed to\n+ the original scale. Values thus imputed are most like conditional\n+ medians assuming the transformations make variables\' distributions\n+ symmetric (imputed values are similar to conditionl modes for\n+ categorical variables). By instead specifying \\code{n.impute},\n+ \\code{transcan} does approximate multiple imputation from the\n+ distribution of each variable conditional on all other variables.\n+ This is done by sampling \\code{n.impute} residuals from the\n+ transformed variable, with replacement (a la bootstrapping), or by\n+ default, using Rubin\'s approximate Bayesian bootstrap, where a sample\n+ of size \\var{n} with replacement is selected from the residuals on\n+ \\var{n} non-missing values of the target variable, and then a sample\n+ of size \\var{m} with replacement is chosen from this sample, where\n+ \\var{m} is the number of missing values needing imputation for the\n+ current multiple imputation repetition. Neither of these bootstrap\n+ procedures assume normality or even symmetry of residuals. For\n+ sometimes-missing categorical variables, optimal scores are computed\n+ by adding the \\dQuote{best guess} predicted mean score to random\n+ residuals off this score. Then categories having scores closest to\n+ these predicted scores are taken as the random multiple imputations\n+ (\\code{impcat = "rpart"} is not currently allowed\n+ with \\code{n.impute}). The literature recommends using \\code{n.impute\n+ = 5} or greater. \\code{transcan} provides only an approximation to\n+ multiple imputation, especially since it \\dQuote{freezes} the\n+ imputation model before drawing the multiple imputations rather than\n+ using different estimates of regression coefficients for each\n+ imputation. For multiple imputation, the \\code{\\link{aregImpute}} function\n+ provides a much better approximation to the full Bayesian approach\n+ while still not requiri'..b"0)\n+x1[1:20] <- NA\n+x2[18:23] <- NA\n+d <- data.frame(x1,x2,y)\n+n <- naclus(d)\n+plot(n); naplot(n) # Show patterns of NAs\n+f <- transcan(~y + x1 + x2, n.impute=10, shrink=FALSE, data=d)\n+options(digits=3)\n+summary(f)\n+\n+\n+f <- transcan(~y + x1 + x2, n.impute=10, shrink=TRUE, data=d)\n+summary(f)\n+\n+\n+h <- fit.mult.impute(y ~ x1 + x2, lm, f, data=d)\n+# Add ,fit.reps=TRUE to save all fit objects in h, then do something like:\n+# for(i in 1:length(h$fits)) print(summary(h$fits[[i]]))\n+\n+\n+diag(vcov(h))\n+\n+\n+h.complete <- lm(y ~ x1 + x2, na.action=na.omit)\n+h.complete\n+diag(vcov(h.complete))\n+\n+\n+# Note: had the rms ols function been used in place of lm, any\n+# function run on h (anova, summary, etc.) would have automatically\n+# used imputation-corrected variances and covariances\n+\n+\n+# Example demonstrating how using the multinomial logistic model\n+# to impute a categorical variable results in a frequency\n+# distribution of imputed values that matches the distribution\n+# of non-missing values of the categorical variable\n+\n+\n+\\dontrun{\n+set.seed(11)\n+x1 <- factor(sample(letters[1:4], 1000,TRUE))\n+x1[1:200] <- NA\n+table(x1)/sum(table(x1))\n+x2 <- runif(1000)\n+z <- transcan(~ x1 + I(x2), n.impute=20, impcat='multinom')\n+table(z$imputed$x1)/sum(table(z$imputed$x1))\n+\n+# Here is how to create a completed dataset\n+d <- data.frame(x1, x2)\n+z <- transcan(~x1 + I(x2), n.impute=5, data=d)\n+imputed <- impute(z, imputation=1, data=d,\n+ list.out=TRUE, pr=FALSE, check=FALSE)\n+sapply(imputed, function(x)sum(is.imputed(x)))\n+sapply(imputed, function(x)sum(is.na(x)))\n+}\n+\n+# Example where multiple imputations are for basic variables and\n+# modeling is done on variables derived from these\n+\n+\n+set.seed(137)\n+n <- 400\n+x1 <- runif(n)\n+x2 <- runif(n)\n+y <- x1*x2 + x1/(1+x2) + rnorm(n)/3\n+x1[1:5] <- NA\n+d <- data.frame(x1,x2,y)\n+w <- transcan(~ x1 + x2 + y, n.impute=5, data=d)\n+# Add ,show.imputed.actual for graphical diagnostics\n+\\dontrun{\n+g <- fit.mult.impute(y ~ product + ratio, ols, w,\n+ data=data.frame(x1,x2,y),\n+ derived=expression({\n+ product <- x1*x2\n+ ratio <- x1/(1+x2)\n+ print(cbind(x1,x2,x1*x2,product)[1:6,])}))\n+}\n+\n+\n+# Here's a method for creating a permanent data frame containing\n+# one set of imputed values for each variable specified to transcan\n+# that had at least one NA, and also containing all the variables\n+# in an original data frame. The following is based on the fact\n+# that the default output location for impute.transcan is\n+# given by the global environment\n+\n+\n+\\dontrun{\n+xt <- transcan(~. , data=mine,\n+ imputed=TRUE, shrink=TRUE, n.impute=10, trantab=TRUE)\n+attach(mine, use.names=FALSE)\n+impute(xt, imputation=1) # use first imputation\n+# omit imputation= if using single imputation\n+detach(1, 'mine2')\n+}\n+\n+\n+# Example of using invertTabulated outside transcan\n+x <- c(1,2,3,4,5,6,7,8,9,10)\n+y <- c(1,2,3,4,5,5,5,5,9,10)\n+freq <- c(1,1,1,1,1,2,3,4,1,1)\n+# x=5,6,7,8 with prob. .1 .2 .3 .4 when y=5\n+# Within a tolerance of .05*(10-1) all y's match exactly\n+# so the distance measure does not play a role\n+set.seed(1) # so can reproduce\n+for(inverse in c('linearInterp','sample'))\n+ print(table(invertTabulated(x, y, freq, rep(5,1000), inverse=inverse)))\n+\n+\n+# Test inverse='sample' when the estimated transformation is\n+# flat on the right. First show default imputations\n+set.seed(3)\n+x <- rnorm(1000)\n+y <- pmin(x, 0)\n+x[1:500] <- NA\n+for(inverse in c('linearInterp','sample')) {\n+par(mfrow=c(2,2))\n+ w <- transcan(~ x + y, imputed.actual='hist',\n+ inverse=inverse, curtail=FALSE,\n+ data=data.frame(x,y))\n+ if(inverse=='sample') next\n+# cat('Click mouse on graph to proceed\\n')\n+# locator(1)\n+}\n+}\n+\\keyword{smooth}\n+\\keyword{regression}\n+\\keyword{multivariate}\n+\\keyword{methods}\n+\\keyword{models}\n+\\concept{bootstrap}\n+% Converted by Sd2Rd version 1.21.\n" |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/translate.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/translate.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,45 @@ +\name{translate} +\alias{translate} +\title{ +Translate Vector or Matrix of Text Strings +} +\description{ +Uses the UNIX tr command to translate any character in \code{old} in +\code{text} to the corresponding character in \code{new}. If multichar=T +or \code{old} and \code{new} have more than one element, or each have one element +but they have different numbers of characters, +uses the UNIX \code{sed} command to translate the series of characters in +\code{old} to the series in \code{new} when these characters occur in \code{text}. +If \code{old} or \code{new} contain a backslash, you sometimes have to quadruple +it to make the UNIX command work. If they contain a forward slash, +preceed it by two backslashes. Invokes the builtin chartr function if +\code{multichar=FALSE}. +} +\usage{ +translate(text, old, new, multichar=FALSE) +} +\arguments{ +\item{text}{ +scalar, vector, or matrix of character strings to translate. +} +\item{old}{ +vector old characters +} +\item{new}{ +corresponding vector of new characters +} +\item{multichar}{See above.} +} +\value{ +an object like text but with characters translated +} +\seealso{grep} +\examples{ +translate(c("ABC","DEF"),"ABCDEFG", "abcdefg") +translate("23.12","[.]","\\\\cdot ") # change . to \cdot +translate(c("dog","cat","tiger"),c("dog","cat"),c("DOG","CAT")) +# S-Plus gives [1] "DOG" "CAT" "tiger" - check discrepency +translate(c("dog","cat2","snake"),c("dog","cat"),"animal") +# S-Plus gives [1] "animal" "animal2" "snake" +} +\keyword{character} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/trunc.POSIXt.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/trunc.POSIXt.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,51 @@ +\name{trunc.POSIXt} +\alias{trunc.POSIXt} +\alias{ceil.POSIXt} +\alias{ceil} +\alias{ceil.default} +\alias{round.POSIXt} +\title{ return the floor, ceiling, or rounded value of date or time to + specified unit. } +\description{ + \code{trunc.POSIXt} returns the date truncated to the specified unit. + \code{ceiling.POSIXt} returns next ceiling of the date at the unit selected in + \code{units}. + \code{floor.POSIXt} \code{trunk.POSIXt} by another name. + \code{round.POSIXt} returns the date or time value rounded to nearest + specified unit selected in \code{digits}. + + \code{trunc.POSIXt} and \code{round.POSIXt} have been extended from + the \code{base} package functions. +} +\usage{ +ceil(x, units,\dots) +\method{ceil}{default}(x, units, \dots) +\method{trunc}{POSIXt}(x, units = c("secs", "mins", "hours", "days", +"months", "years"), \dots) +\method{ceil}{POSIXt}(x, units = c("secs", "mins", "hours", "days", +"months", "years"), \dots) +\method{round}{POSIXt}(x, digits = c("secs", "mins", "hours", "days", "months", "years")) +} +\arguments{ + \item{x}{ date to be floored, ceilinged, truncated, or rounded } + \item{units}{ unit to that is is rounded up or down to. } + \item{digits}{ + same as \code{units} but different name to be compatible + with \code{\link{round}} generic. + } + \item{\dots}{further arguments to be passed to or from other methods.} +} +\value{ + An object of class \code{POSIXlt}. +} +\author{ Charles Dupont } +\seealso{ \code{\link{Date}} \code{\link{POSIXt}} \code{\link{POSIXlt}} \code{\link{DateTimeClasses}}} +\examples{ +date <- ISOdate(1832, 7, 12) +ceil(date, units='months') # '1832-8-1' +trunc(date, units='years') # '1832-1-1' +round.POSIXt(date, digits='months') # '1832-7-1' +} +\keyword{ manip }% at least one, from doc/KEYWORDS +\keyword{ utilities } +\keyword{ chron }% __ONLY ONE__ keyword per line |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/units.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/units.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,48 @@ +\name{units} +\alias{units} +\alias{units.default} +\alias{units.Surv} +\alias{units<-.default} +\title{ +Units Attribute of a Vector +} +\description{ +Sets or retrieves the \code{"units"} attribute of an object. +For \code{units.default} replaces the builtin +version, which only works for time series objects. If the variable is +also given a \code{label}, subsetting (using \code{[.labelled}) will +retain the \code{"units"} attribute. For a \code{Surv} object, +\code{units} first looks for an overall \code{"units"} attribute, then +it looks for \code{units} for the \code{time2} variable then for \code{time1}. +} +\usage{ +units(x, \dots) +\method{units}{default}(x, none='', \dots) +\method{units}{Surv}(x, none='', \dots) +\method{units}{default}(x) <- value +} +\arguments{ +\item{x}{any object} +\item{\dots}{ignored} +\item{value}{the units of the object, or ""} +\item{none}{value to which to set result if no appropriate attribute is + found} +} +\value{ +the units attribute of x, if any; otherwise, the \code{units} attribute of +the \code{tspar} attribute of \code{x} if any; otherwise the value +\code{none}. Handling for \code{Surv} objects is different (see above). +} +\seealso{\code{\link{label}}} +\examples{ +fail.time <- c(10,20) +units(fail.time) <- "Day" +describe(fail.time) +S <- Surv(fail.time) +units(S) + +label(fail.time) <- 'Failure Time' +fail.time +} +\keyword{utilities} +\keyword{interface} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/unix/sas.get.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/unix/sas.get.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,435 @@\n+\\name{sas.get}\n+\\alias{sas.get}\n+\\alias{is.special.miss}\n+\\alias{[.special.miss}\n+\\alias{print.special.miss}\n+\\alias{format.special.miss}\n+\\alias{sas.codes}\n+\\alias{code.levels}\n+\\alias{timePOSIXt}\n+\\title{Convert a SAS Dataset to an S Data Frame}\n+\\description{\n+ Converts a \\acronym{SAS} dataset into an S data frame. \n+ You may choose to extract only a subset of variables \n+ or a subset of observations in the \\acronym{SAS} dataset.\n+ You may have the function automatically convert \\preformatted{PROC FORMAT}-coded\n+ variables to factor objects. The original \\acronym{SAS} codes are stored in an\n+ attribute called \\code{sas.codes} and these may be added back to the\n+ \\code{levels} of a \\code{factor} variable using the \\code{code.levels} function.\n+ Information about special missing values may be captured in an attribute\n+ of each variable having special missing values. This attribute is\n+ called \\code{special.miss}, and such variables are given class \\code{special.miss}.\n+ There are \\code{print}, \\code{[]}, \\code{format}, and \\code{is.special.miss}\n+ methods for such variables.\n+ The \\code{chron} function is used to set up date, time, and date-time variables.\n+ If using S-Plus 5 or 6 or later, the \\code{timeDate} function is used\n+ instead.\n+ Under R, \\code{\\link{Dates}} is used for dates and \\code{\\link[chron]{chron}}\n+ for date-times. For times without\n+ dates, these still need to be stored in date-time format in POSIX.\n+ Such \\acronym{SAS} time variables are given a major class of \\code{POSIXt} and a\n+ \\code{format.POSIXt} function so that the date portion (which will\n+ always be 1/1/1970) will not print by default.\n+ If a date variable represents a partial date (0.5 added if\n+ month missing, 0.25 added if day missing, 0.75 if both), an attribute\n+ \\code{partial.date} is added to the variable, and the variable also becomes\n+ a class \\code{imputed} variable.\n+ The \\code{describe} function uses information about partial dates and\n+ special missing values.\n+ There is an option to automatically uncompress (or \\command{gunzip}) compressed\n+ \\acronym{SAS} datasets.\n+}\n+\\usage{\n+sas.get(libraryName, member, variables=character(0), ifs=character(0),\n+ format.library=libraryName, id,\n+ dates.=c("sas","yymmdd","yearfrac","yearfrac2"),\n+ keep.log=TRUE, log.file="_temp_.log", macro=sas.get.macro,\n+ data.frame.out=existsFunction("data.frame"), clean.up=FALSE, quiet=FALSE,\n+ temp=tempfile("SaS"), formats=TRUE, recode=formats,\n+ special.miss=FALSE, sasprog="sas", \n+ as.is=.5, check.unique.id=TRUE, force.single=FALSE,\n+ pos, uncompress=FALSE, defaultencoding="latin1")\n+\n+is.special.miss(x, code)\n+\n+\\method{[}{special.miss}(x, ..., drop=FALSE)\n+\n+\\method{print}{special.miss}(x, ...)\n+\n+\\method{format}{special.miss}(x, ...)\n+\n+sas.codes(object)\n+\n+code.levels(object)\n+}\n+\\arguments{\n+ \\item{libraryName}{\n+ character string naming the directory in which the dataset is kept.\n+ }\n+ \\item{drop}{\n+ logical. If \\code{TRUE} the result is coerced to the\n+ lowest possible dimension.\n+ }\n+ \\item{member}{\n+ character string giving the second part of the two part \\acronym{SAS} dataset name. \n+ (The first part is irrelevant here - it is mapped to the UNIX directory name.)\n+ }\n+ \\item{x}{\n+ a variable that may have been created by \\code{sas.get} with\n+ \\code{special.miss=T} or with \\code{recode} in effect.\n+ }\n+ \\item{variables}{\n+ vector of character strings naming the variables in the \\acronym{SAS} dataset. \n+ The S dataset will contain only those variables from the\n+ \\acronym{SAS} dataset. \n+ To get all of the variables (the default), an empty string may be given.\n+ It is a fatal error if any one of the variables is not\n+ in the \\acronym{SAS} dataset. You can use \\code{sas.contents} to get\n+ the variables in the \\acronym{SAS} dataset.\n+ If you have retrieved a subset of the variables\n+ in the \\acronym{SAS} data'..b'r long character variables), you\n+ may want to edit these \\preformatted{LRECL}s to quadruple them, for example.\n+}\n+\\note{\n+ You must be able to run \\acronym{SAS} (by typing \\command{sas}) on your system.\n+ If the S command \\code{!sas} does not start \\acronym{SAS}, then this function cannot work.\n+\n+ If you are reading time or\n+ date-time variables, you will need to execute the command \\code{library(chron)}\n+ to print those variables or the data frame if the \\code{timeDate} function\n+ is not available.\n+}\n+\\section{BACKGROUND}{\n+ The references cited below explain the structure of \\acronym{SAS} datasets and how\n+ they are stored under \\acronym{UNIX}.\n+ See \\emph{\\acronym{SAS} Language} \n+ for a discussion of the \\dQuote{subsetting if} statement.\n+}\n+\\author{\n+ Terry Therneau, Mayo Clinic\n+ \\cr\n+ Frank Harrell, Vanderbilt University\n+ \\cr\n+ Bill Dunlap, University of Washington and Insightful Corporation\n+ \\cr\n+ Michael W. Kattan, Cleveland Clinic Foundation\n+ \\cr\n+ Reinhold Koch (encoding)\n+}\n+\\references{\n+ \\acronym{SAS} Institute Inc. (1990).\n+ \\emph{\\acronym{SAS} Language: Reference, Version 6.}\n+ First Edition.\n+ \\acronym{SAS} Institute Inc., Cary, North Carolina.\n+\n+\n+ \\acronym{SAS} Institute Inc. (1988).\n+ \\acronym{SAS} Technical Report P-176,\n+ \\emph{Using the \\acronym{SAS} System, Release 6.03, under UNIX Operating Systems and Derivatives. }\n+ \\acronym{SAS} Institute Inc., Cary, North Carolina.\n+\n+\n+ \\acronym{SAS} Institute Inc. (1985).\n+ \\emph{\\acronym{SAS} Introductory Guide.}\n+ Third Edition.\n+ \\acronym{SAS} Institute Inc., Cary, North Carolina.\n+}\n+\\seealso{\n+ \\code{\\link{data.frame}}, \\code{\\link[Hmisc]{describe}},\n+ \\code{\\link[Hmisc]{label}},\n+ \\code{\\link[Hmisc]{upData}},\n+ \\code{\\link[Hmisc:upData]{cleanup.import}}\n+}\n+\\examples{\n+\\dontrun{\n+sas.contents("saslib", "mice")\n+# [1] "dose" "ld50" "strain" "lab_no"\n+attr(, "n"):\n+# [1] 117\n+mice <- sas.get("saslib", mem="mice", var=c("dose", "strain", "ld50"))\n+plot(mice$dose, mice$ld50)\n+\n+\n+nude.mice <- sas.get(lib=unix("echo $HOME/saslib"), mem="mice",\n+\tifs="if strain=\'nude\'")\n+\n+\n+nude.mice.dl <- sas.get(lib=unix("echo $HOME/saslib"), mem="mice",\n+\tvar=c("dose", "ld50"), ifs="if strain=\'nude\'")\n+\n+\n+# Get a dataset from current directory, recode PROC FORMAT; VALUE \\dots \n+# variables into factors with labels of the form "good(1)" "better(2)",\n+# get special missing values, recode missing codes .D and .R into new\n+# factor levels "Don\'t know" and "Refused to answer" for variable q1\n+d <- sas.get(".", "mydata", recode=2, special.miss=TRUE)\n+attach(d)\n+nl <- length(levels(q1))\n+lev <- c(levels(q1), "Don\'t know", "Refused")\n+q1.new <- as.integer(q1)\n+q1.new[is.special.miss(q1,"D")] <- nl+1\n+q1.new[is.special.miss(q1,"R")] <- nl+2\n+q1.new <- factor(q1.new, 1:(nl+2), lev)\n+# Note: would like to use factor() in place of as.integer \\dots but\n+# factor in this case adds "NA" as a category level\n+\n+\n+d <- sas.get(".", "mydata")\n+sas.codes(d$x) # for PROC FORMATted variables returns original data codes\n+d$x <- code.levels(d$x) # or attach(d); x <- code.levels(x)\n+# This makes levels such as "good" "better" "best" into e.g.\n+# "1:good" "2:better" "3:best", if the original SAS values were 1,2,3\n+\n+\n+# Retrieve the same variables from another dataset (or an update of\n+# the original dataset)\n+mydata2 <- sas.get(\'mydata2\', var=names(d))\n+# This only works if none of the original SAS variable names contained _\n+mydata2 <- cleanup.import(mydata2) # will make true integer variables\n+\n+# Code from Don MacQueen to generate SAS dataset to test import of\n+# date, time, date-time variables\n+# data ssd.test;\n+# d1=\'3mar2002\'d ;\n+# dt1=\'3mar2002 9:31:02\'dt;\n+# t1=\'11:13:45\'t;\n+# output;\n+#\n+# d1=\'3jun2002\'d ;\n+# dt1=\'3jun2002 9:42:07\'dt;\n+# t1=\'11:14:13\'t;\n+# output;\n+# format d1 mmddyy10. dt1 datetime. t1 time.;\n+# run;\n+}\n+}\n+\\keyword{interface}\n+\\keyword{manip}\n+% Converted by Sd2Rd version 1.21.\n+\n+\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/upData.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/upData.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| b'@@ -0,0 +1,226 @@\n+\\name{upData}\n+\\alias{cleanup.import}\n+\\alias{upData}\n+\\alias{dataframeReduce}\n+\\title{\n+ Update a Data Frame or Cleanup a Data Frame after Importing\n+}\n+\\description{\n+ \\code{cleanup.import} will correct errors and shrink\n+ the size of data frames. By default, double precision numeric\n+ variables are changed to integer when they contain no fractional components. \n+ Infinite values or values greater than 1e20 in absolute value are set\n+ to NA. This solves problems of importing Excel spreadsheets that\n+ contain occasional character values for numeric columns, as S\n+ converts these to \\code{Inf} without warning. There is also an option to\n+ convert variable names to lower case and to add labels to variables.\n+ The latter can be made easier by importing a CNTLOUT dataset created\n+ by SAS PROC FORMAT and using the \\code{sasdict} option as shown in the\n+ example below. \\code{cleanup.import} can also transform character or\n+ factor variables to dates.\n+\n+ \\code{upData} is a function facilitating the updating of a data frame\n+ without attaching it in search position one. New variables can be\n+ added, old variables can be modified, variables can be removed or renamed, and\n+ \\code{"labels"} and \\code{"units"} attributes can be provided. Various checks\n+ are made for errors and inconsistencies, with warnings issued to help\n+ the user. Levels of factor variables can be replaced, especially\n+ using the \\code{list} notation of the standard \\code{merge.levels}\n+ function. Unless \\code{force.single} is set to \\code{FALSE}, \n+ \\code{upData} also converts double precision vectors to integer if no\n+ fractional values are present in \n+ a vector. \\code{upData} is also used to process R workspace objects\n+ created by StatTransfer, which puts variable labels as an attribute on\n+ the data frame rather than on each variable. If such an attribute is\n+ present, it is used to define all the labels before any label changes\n+ take place, and \\code{force.single} is set to a default of\n+ \\code{FALSE}, as StatTransfer already does conversion to integer.\n+\n+ The \\code{dataframeReduce} function removes variables from a data frame\n+ that are problematic for certain analyses. Variables can be removed\n+ because the fraction of missing values exceeds a threshold, because they\n+ are character or categorical variables having too many levels, or\n+ because they are binary and have too small a prevalence in one of the\n+ two values. Categorical variables can also have their levels combined\n+ when a level is of low prevalence.\n+}\n+\\usage{\n+cleanup.import(obj, labels, lowernames=FALSE, \n+ force.single=TRUE, force.numeric=TRUE, rmnames=TRUE,\n+ big=1e20, sasdict, print, datevars=NULL, datetimevars=NULL,\n+ dateformat=\'\\%F\',\n+ fixdates=c(\'none\',\'year\'), charfactor=FALSE)\n+\n+upData(object, \\dots, \n+ rename, drop, keep, labels, units, levels, force.single=TRUE,\n+ lowernames=FALSE, caplabels=FALSE, moveUnits=FALSE,\n+ charfactor=FALSE, print=TRUE)\n+\n+dataframeReduce(data, fracmiss=1, maxlevels=NULL, minprev=0, print=TRUE)\n+}\n+\\arguments{\n+ \\item{obj}{a data frame or list}\n+ \\item{object}{a data frame or list}\n+ \\item{data}{a data frame}\n+ \\item{force.single}{\n+ By default, double precision variables are converted to single precision\n+ (in S-Plus only) unless \\code{force.single=FALSE}.\n+ \\code{force.single=TRUE} will also convert vectors having only integer\n+ values to have a storage mode of integer, in R or S-Plus.\n+ }\n+ \\item{force.numeric}{\n+ Sometimes importing will cause a numeric variable to be\n+ changed to a factor vector. By default, \\code{cleanup.import} will check\n+ each factor variable to see if the levels contain only numeric values\n+ and \\code{""}. In that case, the variable will be converted to numeric,\n+ with \\code{""} converted to NA. Set \\code{force.numeric=FALSE} to prevent\n+ this beha'..b'hifted to the alternate number of digits when\n+ \\code{dateform} is the default \\code{"\\%F"} or is \\code{"\\%y-\\%m-\\%d"},\n+ \\code{"\\%m/\\%d/\\%y"}, or \\code{"\\%m/\\%d/\\%Y"}. Two-digits years are padded with \\code{20}\n+ on the left. Set \\code{dateformat} to the desired format, not the\n+ exceptional format.\n+ }\n+ \\item{charfactor}{set to \\code{TRUE} to change character variables to\n+\tfactors if they have fewer than n/2 unique values. Null strings and\n+\tblanks are converted to \\code{NA}s.}\n+ \\item{\\dots}{\n+ for \\code{upData}, one or more expressions of the form\n+ \\code{variable=expression}, to derive new variables or change old ones.\n+ }\n+ \\item{rename}{\n+ list or named vector specifying old and new names for variables. Variables are\n+ renamed before any other operations are done. For example, to rename\n+ variables \\code{age} and \\code{sex} to respectively \\code{Age} and\n+ \\code{gender}, specify \\code{rename=list(age="Age", sex="gender")} or\n+ \\code{rename=c(age=\\dots)}. \n+ }\n+ \\item{drop}{a vector of variable names to remove from the data frame}\n+\t\\item{keep}{a vector of variable names to keep, with all other\n+\t\tvariables dropped}\n+ \\item{units}{\n+ a named vector or list defining \\code{"units"} attributes of\n+\t\tvariables, in no specific order\n+ }\n+ \\item{levels}{\n+ a named list defining \\code{"levels"} attributes for factor variables, in\n+ no specific order. The values in this list may be character vectors\n+ redefining \\code{levels} (in order) or another list (see\n+ \\code{merge.levels} if using S-Plus).\n+ }\n+ \\item{caplabels}{\n+\tset to \\code{TRUE} to capitalize the first letter of each word in\n+\teach variable label\n+\t}\n+ \\item{moveUnits}{\n+ set to \\code{TRUE} to look for units of measurements in variable\n+ labels and move them to a \\code{"units"} attribute. If an expression\n+ in a label is enclosed in parentheses or brackets it is assumed to be\n+ units if \\code{moveUnits=TRUE}.\n+ }\n+ \\item{fracmiss}{the maximum permissable proportion of \\code{NA}s for a\n+ variable to be kept. Default is to keep all variables no matter how\n+ many \\code{NA}s are present.}\n+ \\item{maxlevels}{the maximum number of levels of a character or\n+ categorical or factor variable before the variable is dropped}\n+ \\item{minprev}{the minimum proportion of non-missing observations in a\n+ category for a binary variable to be retained, and the minimum\n+ relative frequency of a category before it will be combined with other\n+ small categories}\n+}\n+\\value{a new data frame}\n+\\author{\n+ Frank Harrell, Vanderbilt University\n+}\n+\\seealso{\n+ \\code{\\link{sas.get}}, \\code{\\link{data.frame}}, \\code{\\link{describe}},\n+ \\code{\\link{label}}, \\code{\\link{read.csv}}, \\code{\\link{strptime}},\n+ \\code{\\link{POSIXct}},\\code{\\link{Date}}\n+}\n+\\examples{\n+\\dontrun{\n+dat <- read.table(\'myfile.asc\')\n+dat <- cleanup.import(dat)\n+}\n+dat <- data.frame(a=1:3, d=c(\'01/02/2004\',\' 1/3/04\',\'\'))\n+cleanup.import(dat, datevars=\'d\', dateformat=\'\\%m/\\%d/\\%y\', fixdates=\'year\')\n+\n+dat <- data.frame(a=(1:3)/7, y=c(\'a\',\'b1\',\'b2\'), z=1:3)\n+dat2 <- upData(dat, x=x^2, x=x-5, m=x/10, \n+ rename=c(a=\'x\'), drop=\'z\',\n+ labels=c(x=\'X\', y=\'test\'),\n+ levels=list(y=list(a=\'a\',b=c(\'b1\',\'b2\'))))\n+dat2\n+describe(dat2)\n+dat <- dat2 # copy to original name and delete dat2 if OK\n+rm(dat2)\n+\n+# Remove hard to analyze variables from a redundancy analysis of all\n+# variables in the data frame\n+d <- dataframeReduce(dat, fracmiss=.1, minprev=.05, maxlevels=5)\n+# Could run redun(~., data=d) at this point or include dataframeReduce\n+# arguments in the call to redun\n+\n+# If you import a SAS dataset created by PROC CONTENTS CNTLOUT=x.datadict,\n+# the LABELs from this dataset can be added to the data. Let\'s also\n+# convert names to lower case for the main data file\n+\\dontrun{\n+mydata2 <- cleanup.import(mydata2, lowernames=TRUE, sasdict=datadict)\n+}\n+}\n+\\keyword{data}\n+\\keyword{manip}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/valueTags.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/valueTags.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,99 @@ +\name{valueTags} +\alias{valueTags} +\alias{valueTags<-} +\alias{valueLabel} +\alias{valueLabel<-} +\alias{valueUnit} +\alias{valueUnit<-} +\alias{valueName} +\alias{valueName<-} +\title{Store Discriptive Information About an Object} +\description{ + Functions get or set useful information about the contents of the + object for later use. +} +\usage{ +valueTags(x) +valueTags(x) <- value + +valueLabel(x) +valueLabel(x) <- value + +valueName(x) +valueName(x) <- value + +valueUnit(x) +valueUnit(x) <- value +} +\arguments{ + \item{x}{ + an object + } + \item{value}{ + for \code{valueTags<-} a named list of value tags. + a character vector of length 1, or \code{NULL}. + } +} +\value{ + \code{valueTag} returns \code{NULL} or a named list with each of the + named values \code{name}, \code{label}, \code{unit} set if they exists + in the object. + + For \code{valueTag<-} returns \code{list} + + For \code{valueName}, \code{valueLable}, and \code{valueUnit} returns + \code{NULL} or character vector of length 1. + + For \code{valueName<-}, \code{valueLabel<-}, and \code{valueUnit} returns \code{value} +} +\details{ + These functions store the a short name of for the contents, a longer + label that is useful for display, and the units of the contents that + is useful for display. + + \code{valueTag} is an accessor, and \code{valueTag<-} is a replacement + function for all of the value's information. + + \code{valueName} is an accessor, and \code{valueName<-} is a + replacement function for the value's name. This name is used when a + plot or a latex table needs a short name and the variable name is not + useful. + + \code{valueLabel} is an accessor, and \code{valueLabel<-} is a + replacement function for the value's label. The label is used in a + plots or latex tables when they need a descriptive name. + + \code{valueUnit} is an accessor, and \code{valueUnit<-} is a + replacement function for the value's unit. The unit is used to add + unit information to the R output. +} +\seealso{ + \code{\link{names}}, \code{\link{attributes}} +} +\examples{ +age <- c(21,65,43) +y <- 1:3 +valueLabel(age) <- "Age in Years" +plot(age, y, xlab=valueLabel(age)) + + +x1 <- 1:10 +x2 <- 10:1 +valueLabel(x2) <- 'Label for x2' +valueUnit(x2) <- 'mmHg' +x2 +x2[1:5] +dframe <- data.frame(x1, x2) +Label(dframe) + + +##In these examples of llist, note that labels are printed after +##variable names, because of print.labelled +a <- 1:3 +b <- 4:6 +valueLabel(b) <- 'B Label' +} +\author{Charles Dupont} +\keyword{attribute} +\keyword{misc} +\keyword{utilities} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/varclus.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/varclus.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,344 @@\n+\\name{varclus}\n+\\alias{varclus}\n+\\alias{print.varclus}\n+\\alias{plot.varclus}\n+\\alias{naclus}\n+\\alias{naplot}\n+\\alias{combine.levels}\n+\\alias{plotMultSim}\n+\\alias{na.pattern}\n+\\title{\n+Variable Clustering\n+}\n+\\description{\n+Does a hierarchical cluster analysis on variables, using the Hoeffding\n+D statistic, squared Pearson or Spearman correlations, or proportion\n+of observations for which two variables are both positive as similarity\n+measures. Variable clustering is used for assessing collinearity,\n+redundancy, and for separating variables into clusters that can be\n+scored as a single variable, thus resulting in data reduction. For\n+computing any of the three similarity measures, pairwise deletion of\n+NAs is done. The clustering is done by \\code{hclust()}. A small function\n+\\code{naclus} is also provided which depicts similarities in which\n+observations are missing for variables in a data frame. The\n+similarity measure is the fraction of \\code{NAs} in common between any two\n+variables. The diagonals of this \\code{sim} matrix are the fraction of NAs\n+in each variable by itself. \\code{naclus} also computes \\code{na.per.obs}, the\n+number of missing variables in each observation, and \\code{mean.na}, a\n+vector whose ith element is the mean number of missing variables other\n+than variable i, for observations in which variable i is missing. The\n+\\code{naplot} function makes several plots (see the \\code{which} argument).\n+\n+So as to not generate too many dummy variables for multi-valued\n+character or categorical predictors, \\code{varclus} will automatically\n+combine infrequent cells of such variables using an auxiliary\n+function \\code{combine.levels} that is defined here. If all values of\n+\\code{x} are \\code{NA}, \\code{combine.levels} returns a numeric vector\n+is returned that is all \\code{NA}. \n+\n+\\code{plotMultSim} plots multiple similarity matrices, with the similarity\n+measure being on the x-axis of each subplot.\n+\n+\\code{na.pattern} prints a frequency table of all combinations of\n+missingness for multiple variables. If there are 3 variables, a\n+frequency table entry labeled \\code{110} corresponds to the number of\n+observations for which the first and second variables were missing but\n+the third variable was not missing.\n+}\n+\\usage{\n+varclus(x, similarity=c("spearman","pearson","hoeffding","bothpos","ccbothpos"),\n+ type=c("data.matrix","similarity.matrix"), \n+ method="complete",\n+ data=NULL, subset=NULL, na.action=na.retain,\n+ trans=c("square", "abs", "none"), ...)\n+\\method{print}{varclus}(x, abbrev=FALSE, ...)\n+\\method{plot}{varclus}(x, ylab, abbrev=FALSE, legend.=FALSE, loc, maxlen, labels, \\dots)\n+\n+naclus(df, method)\n+naplot(obj, which=c(\'all\',\'na per var\',\'na per obs\',\'mean na\',\n+ \'na per var vs mean na\'), \\dots)\n+\n+combine.levels(x, minlev=.05)\n+\n+plotMultSim(s, x=1:dim(s)[3],\n+ slim=range(pretty(c(0,max(s,na.rm=TRUE)))),\n+ slimds=FALSE,\n+ add=FALSE, lty=par(\'lty\'), col=par(\'col\'),\n+ lwd=par(\'lwd\'), vname=NULL, h=.5, w=.75, u=.05,\n+ labelx=TRUE, xspace=.35)\n+\n+na.pattern(x)\n+}\n+\\arguments{\n+\\item{x}{\n+a formula,\n+a numeric matrix of predictors, or a similarity matrix. If \\code{x} is\n+a formula, \\code{model.matrix} is used to convert it to a design matrix.\n+If the formula excludes an intercept (e.g., \\code{~ a + b -1}),\n+the first categorical (\\code{factor}) variable in the formula will have\n+dummy variables generated for all levels instead of omitting one for\n+the first level. For \\code{combine.levels}, \\code{x} is a character, category,\n+or factor vector (or other vector that is converted to factor). For\n+\\code{plot} and \\code{print}, \\code{x} is an object created by\n+\\code{varclus}. For \\code{na.pattern}, \\code{x} is a list, data frame,\n+or numeric matrix.\n+\n+For \\code{plotMultSim}, is a numeric vector specifying the ordered\n+unique values on the x-axis, corresponding to the third dimension'..b'ist of class \\code{varclus} with elements\n+\\code{call} (containing the calling statement), \\code{sim} (similarity matrix),\n+\\code{n} (sample size used if \\code{x} was not a correlation matrix already -\n+\\code{n} is a matrix), \\code{hclust}, the object created by \\code{hclust},\n+\\code{similarity}, and \\code{method}. \\code{naclus} also returns the\n+two vectors listed under \n+description, and \\code{naplot} returns an invisible vector that is the\n+frequency table of the number of missing variables per observation.\n+\\code{plotMultSim} invisibly returns the limits of similarities used in\n+constructing the y-axes of each subplot. For \\code{similarity="ccbothpos"}\n+the \\code{hclust} object is \\code{NULL}.\n+\n+\\code{na.pattern} creates an integer vector of frequencies.\n+}\n+\\details{\n+\\code{options(contrasts= c("contr.treatment", "contr.poly"))} is issued \n+temporarily by \\code{varclus} to make sure that ordinary dummy variables\n+are generated for \\code{factor} variables. Pass arguments to the\n+\\code{\\link{dataframeReduce}} function to remove problematic variables\n+(especially if analyzing all variables in a data frame).\n+}\n+\\author{\n+Frank Harrell\n+\\cr\n+Department of Biostatistics, Vanderbilt University\n+\\cr\n+\\email{f.harrell@vanderbilt.edu}\n+}\n+\\section{Side Effects}{\n+plots\n+}\n+\\references{\n+Sarle, WS: The VARCLUS Procedure. SAS/STAT User\'s Guide, 4th Edition,\n+1990. Cary NC: SAS Institute, Inc.\n+\n+\n+Hoeffding W. (1948): A non-parametric test of independence. Ann Math Stat\n+19:546--57.\n+}\n+\\seealso{\n+\\code{\\link{hclust}}, \\code{\\link{plclust}}, \\code{\\link{hoeffd}}, \\code{\\link{rcorr}}, \\code{\\link{cor}}, \\code{\\link{model.matrix}},\n+\\code{\\link{locator}}, \\code{\\link{na.pattern}}\n+}\n+\\examples{\n+set.seed(1)\n+x1 <- rnorm(200)\n+x2 <- rnorm(200)\n+x3 <- x1 + x2 + rnorm(200)\n+x4 <- x2 + rnorm(200)\n+x <- cbind(x1,x2,x3,x4)\n+v <- varclus(x, similarity="spear") # spearman is the default anyway\n+v # invokes print.varclus\n+print(round(v$sim,2))\n+plot(v)\n+\n+\n+# plot(varclus(~ age + sys.bp + dias.bp + country - 1), abbrev=TRUE)\n+# the -1 causes k dummies to be generated for k countries\n+# plot(varclus(~ age + factor(disease.code) - 1))\n+#\n+#\n+# use varclus(~., data= fracmiss= maxlevels= minprev=) to analyze all\n+# "useful" variables - see dataframeReduce for details about arguments\n+\n+\n+df <- data.frame(a=c(1,2,3),b=c(1,2,3),c=c(1,2,NA),d=c(1,NA,3),\n+ e=c(1,NA,3),f=c(NA,NA,NA),g=c(NA,2,3),h=c(NA,NA,3))\n+par(mfrow=c(2,2))\n+for(m in c("ward","complete","median")) {\n+ plot(naclus(df, method=m))\n+ title(m)\n+}\n+naplot(naclus(df))\n+n <- naclus(df)\n+plot(n); naplot(n)\n+na.pattern(df) # builtin function\n+\n+\n+x <- c(1, rep(2,11), rep(3,9))\n+combine.levels(x)\n+x <- c(1, 2, rep(3,20))\n+combine.levels(x)\n+\n+\n+# plotMultSim example: Plot proportion of observations\n+# for which two variables are both positive (diagonals\n+# show the proportion of observations for which the\n+# one variable is positive). Chance-correct the\n+# off-diagonals by subtracting the product of the\n+# marginal proportions. On each subplot the x-axis\n+# shows month (0, 4, 8, 12) and there is a separate\n+# curve for females and males\n+d <- data.frame(sex=sample(c(\'female\',\'male\'),1000,TRUE),\n+ month=sample(c(0,4,8,12),1000,TRUE),\n+ x1=sample(0:1,1000,TRUE),\n+ x2=sample(0:1,1000,TRUE),\n+ x3=sample(0:1,1000,TRUE))\n+s <- array(NA, c(3,3,4))\n+opar <- par(mar=c(0,0,4.1,0)) # waste less space\n+for(sx in c(\'female\',\'male\')) {\n+ for(i in 1:4) {\n+ mon <- (i-1)*4\n+ s[,,i] <- varclus(~x1 + x2 + x3, sim=\'ccbothpos\', data=d,\n+ subset=d$month==mon & d$sex==sx)$sim\n+ }\n+ plotMultSim(s, c(0,4,8,12), vname=c(\'x1\',\'x2\',\'x3\'),\n+ add=sx==\'male\', slimds=TRUE,\n+ lty=1+(sx==\'male\'))\n+ # slimds=TRUE causes separate scaling for diagonals and\n+ # off-diagonals\n+}\n+par(opar)\n+}\n+\\keyword{cluster}\n+\\keyword{multivariate}\n+\\keyword{category}\n+\\keyword{manip}\n+\n+\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/windows/sas.get.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/windows/sas.get.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,465 @@\n+\\name{sas.get}\n+\\alias{sas.get}\n+\\alias{is.special.miss}\n+\\alias{[.special.miss}\n+\\alias{print.special.miss}\n+\\alias{format.special.miss}\n+\\alias{sas.codes}\n+\\alias{code.levels}\n+\\title{Convert a SAS Dataset to an S Data Frame}\n+\\description{\n+ Converts a \\acronym{SAS} dataset into an S data frame. \n+ You may choose to extract only a subset of variables \n+ or a subset of observations in the \\acronym{SAS} dataset.\n+ The function will automatically convert \\preformatted{PROC FORMAT}-coded\n+ variables to factor objects. The original \\acronym{SAS} codes are stored in an\n+ attribute called \\code{sas.codes} and these may be added back to the\n+ \\code{levels} of a \\code{factor} variable using the \\code{code.levels}\n+ function. \n+ Information about special missing values may be captured in an attribute\n+ of each variable having special missing values. This attribute is\n+ called \\code{special.miss}, and such variables are given class \\code{special.miss}.\n+ There are \\code{print}, \\code{[]}, \\code{format}, and \\code{is.special.miss}\n+ methods for such variables.\n+ date, time, and date-time variables use respectively\n+ \\code{\\link{Dates}}, \\code{\\link{DateTimeClasses}}, and\n+ \\code{\\link[chron]{chron}} variables.\n+ If using S-Plus 5 or 6 or later, the \\code{timeDate} function is used instead.\n+ If a date variable represents a partial date (0.5 added if\n+ month missing, 0.25 added if day missing, 0.75 if both), an attribute\n+ \\code{partial.date} is added to the variable, and the variable also becomes\n+ a class \\code{imputed} variable.\n+ The \\code{describe} function uses information about partial dates and\n+ special missing values.\n+ There is an option to automatically \\command{PKUNZIP} compressed\n+ \\acronym{SAS} datasets.\n+\n+ \\code{sas.get} works by composing and running a \\acronym{SAS} job that\n+ creates various \\acronym{ASCII} files that are read and analyzed\n+ by \\code{sas.get}. You can also run the \\acronym{SAS} \\code{sas_get} macro,\n+ which writes the \\acronym{ASCII} files for downloading, in a separate\n+ step or on another computer, and then tell \\code{sas.get} (through the\n+ \\code{sasout} argument) to access these files instead of running \\acronym{SAS}.\n+}\n+\\usage{\n+sas.get(libraryName, member, variables=character(0), ifs=character(0),\n+ format.library=libraryName, id,\n+ dates.=c("sas","yymmdd","yearfrac","yearfrac2"),\n+ keep.log=TRUE, log.file="_temp_.log", macro=sas.get.macro,\n+ data.frame.out=existsFunction("data.frame"), clean.up=FALSE, quiet=FALSE,\n+ temp=tempfile("SaS"), formats=TRUE, \n+ recode=formats, special.miss=FALSE, sasprog="sas",\n+ as.is=.5, check.unique.id=TRUE, force.single=FALSE, pos,\n+ uncompress=FALSE, defaultencoding="latin1")\n+\n+is.special.miss(x, code)\n+\n+\\method{[}{special.miss}(x, \\dots, drop=FALSE)\n+\n+\\method{print}{special.miss}(x, \\dots)\n+\n+\\method{format}{special.miss}(x, \\dots)\n+\n+sas.codes(object)\n+\n+code.levels(object)\n+}\n+\\arguments{\n+ \\item{libraryName}{\n+ character string naming the directory in which the dataset is kept.\n+ The default is \\code{libraryName = "."}, indicating that the current\n+ directory is to be used.\n+ }\n+ \\item{member}{\n+ character string giving the second part of the two part \\acronym{SAS} dataset name. \n+ (The first part is irrelevant here - it is mapped to the directory name.)\n+ }\n+ \\item{x}{\n+ a variable that may have been created by \\code{sas.get} with \\code{special.miss=TRUE}\n+ or with \\code{recode} in effect.\n+ }\n+ \\item{variables}{\n+ vector of character strings naming the variables in the \\acronym{SAS} dataset. \n+ The resulting data frame will contain only those variables from the\n+ \\acronym{SAS} dataset. \n+ To get all of the variables (the default), an empty string may be given.\n+ It is a fatal error if any one of the variables is not\n+ in the \\acronym{SAS} dataset. If you have retrieved a subset of the variables\n+ in the \\acronym{SAS'..b'Corp.\n+ \\cr\n+ Michael W. Kattan, Cleveland Clinic Foundation\n+ \\cr\n+ Reinhold Koch (encoding)\n+}\n+\\references{\n+ \\acronym{SAS} Institute Inc. (1990).\n+ \\emph{\\acronym{SAS} Language: Reference, Version 6.}\n+ First Edition.\n+ \\acronym{SAS} Institute Inc., Cary, North Carolina.\n+\n+\n+ \\acronym{SAS} Institute Inc. (1988).\n+ \\acronym{SAS} Technical Report P-176,\n+ \\emph{Using the \\acronym{SAS} System, Release 6.03, under UNIX Operating Systems and Derivatives. }\n+ \\acronym{SAS} Institute Inc., Cary, North Carolina.\n+\n+\n+ \\acronym{SAS} Institute Inc. (1985).\n+ \\emph{\\acronym{SAS} Introductory Guide.}\n+ Third Edition.\n+ \\acronym{SAS} Institute Inc., Cary, North Carolina.\n+}\n+\\seealso{\n+ \\code{\\link{data.frame}}, \\code{\\link[Hmisc]{describe}},\n+ \\code{\\link[Hmisc]{label}}, \\code{\\link[Hmisc]{upData}}\n+}\n+\\examples{\n+\\dontrun{\n+mice <- sas.get("saslib", mem="mice", var=c("dose", "strain", "ld50"))\n+plot(mice$dose, mice$ld50)\n+\n+nude.mice <- sas.get(lib=unix("echo $HOME/saslib"), mem="mice",\n+\tifs="if strain=\'nude\'")\n+\n+nude.mice.dl <- sas.get(lib=unix("echo $HOME/saslib"), mem="mice",\n+\tvar=c("dose", "ld50"), ifs="if strain=\'nude\'")\n+\n+# Get a dataset from current directory, recode PROC FORMAT; VALUE \\dots \n+# variables into factors with labels of the form "good(1)" "better(2)",\n+# get special missing values, recode missing codes .D and .R into new\n+# factor levels "Don\'t know" and "Refused to answer" for variable q1\n+d <- sas.get(mem="mydata", recode=2, special.miss=TRUE)\n+attach(d)\n+nl <- length(levels(q1))\n+lev <- c(levels(q1), "Don\'t know", "Refused")\n+q1.new <- as.integer(q1)\n+q1.new[is.special.miss(q1,"D")] <- nl+1\n+q1.new[is.special.miss(q1,"R")] <- nl+2\n+q1.new <- factor(q1.new, 1:(nl+2), lev)\n+# Note: would like to use factor() in place of as.integer ... but\n+# factor in this case adds "NA" as a category level\n+\n+d <- sas.get(mem="mydata")\n+sas.codes(d$x) # for PROC FORMATted variables returns original data codes\n+d$x <- code.levels(d$x) # or attach(d); x <- code.levels(x)\n+# This makes levels such as "good" "better" "best" into e.g.\n+# "1:good" "2:better" "3:best", if the original SAS values were 1,2,3\n+\n+# For the following example, suppose that SAS is run on a\n+# different machine from the one on which S is run.\n+# The sas_get macro is used to create files needed by\n+# sas.get. To make a text file containing the sas_get macro\n+# run the following S command, for example:\n+# cat(sas.get.macro, file=\'/sasmacro/sas_get.sas\', sep=\'\\n\')\n+\n+# Here is the SAS job. This job assumes that you put\n+# sas_get.sas in an autocall macro library.\n+\n+\n+# libname db \'/my/sasdata/area\';\n+# \\%sas_get(db.mydata, dict, data, formats, specmiss,\n+# formats=1, specmiss=1)\n+\n+\n+# Substitute whatever file names you may want.\n+# Next the 4 files are moved to the S machine (using\n+# ASCII file transfer mode) and the following S\n+# program is run:\n+\n+\n+mydata <- sas.get(sasout=c(\'dict\',\'data\',\'formats\',\'specmiss\'),\n+ id=\'idvar\')\n+\n+\n+# If PKZIP is run after \\%sas_get, e.g. "PKZIP port dict data formats"\n+# (assuming that specmiss was not used here), use\n+\n+\n+mydata <- sas.get(sasout=\'a:port\', id=\'idvar\')\n+\n+\n+# which will run PKUNZIP port to unzip a:port.zip, creating the\n+# dict, data, and formats files which are generated (and later\n+# deleted) by sas.get\n+\n+\n+# Retrieve the same variables from another dataset (or an update of\n+# the original dataset)\n+mydata2 <- sas.get(\'mydata2\', var=names(mydata))\n+# This only works if none of the original SAS variable names contained _\n+\n+# Code from Don MacQueen to generate SAS dataset to test import of\n+# date, time, date-time variables\n+# data ssd.test;\n+# d1=\'3mar2002\'d ;\n+# dt1=\'3mar2002 9:31:02\'dt;\n+# t1=\'11:13:45\'t;\n+# output;\n+#\n+# d1=\'3jun2002\'d ;\n+# dt1=\'3jun2002 9:42:07\'dt;\n+# t1=\'11:14:13\'t;\n+# output;\n+# format d1 mmddyy10. dt1 datetime. t1 time.;\n+# run;\n+}\n+}\n+\\keyword{interface}\n+\\keyword{manip}\n+\n+\n+\n+\n+\n+\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/wtd.stats.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/wtd.stats.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,233 @@\n+\\name{wtd.stats}\n+\\alias{wtd.mean}\n+\\alias{wtd.var}\n+\\alias{wtd.quantile}\n+\\alias{wtd.Ecdf}\n+\\alias{wtd.table}\n+\\alias{wtd.rank}\n+\\alias{wtd.loess.noiter}\n+\\alias{num.denom.setup}\n+\\title{\n+Weighted Statistical Estimates\n+}\n+\\description{\n+These functions compute various weighted versions of standard\n+estimators. In most cases the \\code{weights} vector is a vector the same\n+length of \\code{x}, containing frequency counts that in effect expand \\code{x}\n+by these counts. \\code{weights} can also be sampling weights, in which\n+setting \\code{normwt} to \\code{TRUE} will often be appropriate. This results in\n+making \\code{weights} sum to the length of the non-missing elements in\n+\\code{x}. \\code{normwt=TRUE} thus reflects the fact that the true sample size is\n+the length of the \\code{x} vector and not the sum of the original values of\n+\\code{weights} (which would be appropriate had \\code{normwt=FALSE}). When \\code{weights}\n+is all ones, the estimates are all identical to unweighted estimates\n+(unless one of the non-default quantile estimation options is\n+specified to \\code{wtd.quantile}). When missing data have already been\n+deleted for, \\code{x}, \\code{weights}, and (in the case of \\code{wtd.loess.noiter}) \\code{y},\n+specifying \\code{na.rm=FALSE} will save computation time. Omitting the\n+\\code{weights} argument or specifying \\code{NULL} or a zero-length vector will\n+result in the usual unweighted estimates.\n+\n+\\code{wtd.mean}, \\code{wtd.var}, and \\code{wtd.quantile} compute\n+weighted means, variances, and quantiles, respectively. \\code{wtd.Ecdf}\n+computes a weighted empirical distribution function. \\code{wtd.table}\n+computes a weighted frequency table (although only one stratification\n+variable is supported at present). \\code{wtd.rank} computes weighted\n+ranks, using mid--ranks for ties. This can be used to obtain Wilcoxon\n+tests and rank correlation coefficients. \\code{wtd.loess.noiter} is a\n+weighted version of \\code{loess.smooth} when no iterations for outlier\n+rejection are desired. This results in especially good smoothing when\n+\\code{y} is binary.\n+\n+\\code{num.denom.setup} is a utility function that allows one to deal with\n+observations containing numbers of events and numbers of trials, by\n+outputting two observations when the number of events and non-events\n+(trials - events) exceed zero. A vector of subscripts is generated\n+that will do the proper duplications of observations, and a new binary\n+variable \\code{y} is created along with usual cell frequencies (\\code{weights})\n+for each of the \\code{y=0}, \\code{y=1} cells per observation.\n+}\n+\\usage{\n+wtd.mean(x, weights=NULL, normwt="ignored", na.rm=TRUE)\n+wtd.var(x, weights=NULL, normwt=FALSE, na.rm=TRUE)\n+wtd.quantile(x, weights=NULL, probs=c(0, .25, .5, .75, 1), \n+ type=c(\'quantile\',\'(i-1)/(n-1)\',\'i/(n+1)\',\'i/n\'), \n+ normwt=FALSE, na.rm=TRUE)\n+wtd.Ecdf(x, weights=NULL, \n+ type=c(\'i/n\',\'(i-1)/(n-1)\',\'i/(n+1)\'), \n+ normwt=FALSE, na.rm=TRUE)\n+wtd.table(x, weights=NULL, type=c(\'list\',\'table\'), \n+ normwt=FALSE, na.rm=TRUE)\n+wtd.rank(x, weights=NULL, normwt=FALSE, na.rm=TRUE)\n+wtd.loess.noiter(x, y, weights=rep(1,n),\n+ span=2/3, degree=1, cell=.13333, \n+ type=c(\'all\',\'ordered all\',\'evaluate\'), \n+ evaluation=100, na.rm=TRUE)\n+num.denom.setup(num, denom)\n+}\n+\\arguments{\n+\\item{x}{\n+a numeric vector (may be a character or \\code{category} or \\code{factor} vector\n+for \\code{wtd.table})\n+}\n+\\item{num}{\n+vector of numerator frequencies\n+}\n+\\item{denom}{\n+vector of denominators (numbers of trials)\n+}\n+\\item{weights}{\n+a numeric vector of weights\n+}\n+\\item{normwt}{\n+specify \\code{normwt=TRUE} to make \\code{weights} sum to \\code{length(x)} after deletion\n+of NAs\n+}\n+\\item{na.rm}{\n+set to \\code{FALSE} to suppress checking for NAs\n+}\n+\\item{probs}{\n+a vector of quantiles to compute. Default is 0 (min), .25, .5, .75, 1\n+(max).\n+}\n+\\item{type}{\n+For \\code{wtd.quant'..b'e \\code{loess.smooth}. The default is linear (\\code{degree}=1) and 100 points\n+to evaluation (if \\code{type="evaluate"}).\n+}}\n+\\value{\n+\\code{wtd.mean} and \\code{wtd.var} return scalars. \\code{wtd.quantile} returns a\n+vector the same length as \\code{probs}. \\code{wtd.Ecdf} returns a list whose\n+elements \\code{x} and \\code{Ecdf} correspond to unique sorted values of \\code{x}.\n+If the first CDF estimate is greater than zero, a point (min(x),0) is\n+placed at the beginning of the estimates.\n+See above for \\code{wtd.table}. \\code{wtd.rank} returns a vector the same\n+length as \\code{x} (after removal of NAs, depending on \\code{na.rm}). See above\n+for \\code{wtd.loess.noiter}.\n+}\n+\\details{\n+The functions correctly combine weights of observations having\n+duplicate values of \\code{x} before computing estimates.\n+\n+When \\code{normwt=FALSE} the weighted variance will not equal the\n+unweighted variance even if the weights are identical. That is because\n+of the subtraction of 1 from the sum of the weights in the denominator\n+of the variance formula. If you want the weighted variance to equal the\n+unweighted variance when weights do not vary, use \\code{normwt=TRUE}.\n+The articles by Gatz and Smith discuss alternative approaches, to arrive\n+at estimators of the standard error of a weighted mean.\n+\n+\\code{wtd.rank} does not handle NAs as elegantly as \\code{rank} if\n+\\code{weights} is specified.\n+}\n+\\author{\n+Frank Harrell\n+\\cr\n+Department of Biostatistics\n+\\cr\n+Vanderbilt University School of Medicine\n+\\cr\n+\\email{f.harrell@vanderbilt.edu}\n+}\n+\\references{\n+Research Triangle Institute (1995): SUDAAN User\'s Manual, Release\n+6.40, pp. 8-16 to 8-17.\n+\n+Gatz DF, Smith L (1995): The standard error of a weighted mean\n+concentration--I. Bootstrapping vs other methods. Atmospheric Env\n+11:1185-1193.\n+\n+Gatz DF, Smith L (1995): The standard error of a weighted mean\n+concentration--II. Estimating confidence intervals. Atmospheric Env\n+29:1195-1200.\n+\n+http://en.wikipedia.org/wiki/Weighted_arithmetic_mean\n+}\n+\\seealso{\n+\\code{\\link{mean}}, \\code{\\link{var}}, \\code{\\link{quantile}}, \\code{\\link{table}}, \\code{\\link{rank}}, \\code{\\link{loess.smooth}}, \\code{\\link{lowess}},\n+\\code{\\link{plsmo}}, \\code{\\link{Ecdf}}, \\code{\\link{somers2}}, \\code{\\link{describe}}\n+}\n+\\examples{\n+set.seed(1)\n+x <- runif(500)\n+wts <- sample(1:6, 500, TRUE)\n+std.dev <- sqrt(wtd.var(x, wts))\n+wtd.quantile(x, wts)\n+death <- sample(0:1, 500, TRUE)\n+plot(wtd.loess.noiter(x, death, wts, type=\'evaluate\'))\n+describe(~x, weights=wts)\n+# describe uses wtd.mean, wtd.quantile, wtd.table\n+xg <- cut2(x,g=4)\n+table(xg)\n+wtd.table(xg, wts, type=\'table\')\n+\n+# Here is a method for getting stratified weighted means\n+y <- runif(500)\n+g <- function(y) wtd.mean(y[,1],y[,2])\n+summarize(cbind(y, wts), llist(xg), g, stat.name=\'y\')\n+\n+# Empirically determine how methods used by wtd.quantile match with\n+# methods used by quantile, when all weights are unity\n+set.seed(1)\n+u <- eval(formals(wtd.quantile)$type)\n+v <- as.character(1:9)\n+r <- matrix(0, nrow=length(u), ncol=9, dimnames=list(u,v))\n+\n+for(n in c(8, 13, 22, 29))\n+ {\n+ x <- rnorm(n)\n+ for(i in 1:5) {\n+ probs <- sort( runif(9))\n+ for(wtype in u) {\n+ wq <- wtd.quantile(x, type=wtype, weights=rep(1,length(x)), probs=probs)\n+ for(qtype in 1:9) {\n+ rq <- quantile(x, type=qtype, probs=probs)\n+ r[wtype, qtype] <- max(r[wtype,qtype], max(abs(wq-rq)))\n+ }\n+ }\n+ }\n+ }\n+\n+r\n+\n+# Restructure data to generate a dichotomous response variable\n+# from records containing numbers of events and numbers of trials\n+num <- c(10,NA,20,0,15) # data are 10/12 NA/999 20/20 0/25 15/35\n+denom <- c(12,999,20,25,35)\n+w <- num.denom.setup(num, denom)\n+w\n+# attach(my.data.frame[w$subs,])\n+}\n+\\keyword{nonparametric}\n+\\keyword{category}\n+\\keyword{distribution}\n+\\keyword{robust}\n+\\keyword{loess}\n+\\keyword{smooth}\n+\\keyword{manip}\n+\\concept{weighted sampling}\n+\\concept{grouping}\n+\\concept{weights}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/xYplot.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/xYplot.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| b'@@ -0,0 +1,545 @@\n+\\name{xYplot}\n+\\alias{xYplot}\n+\\alias{panel.xYplot}\n+\\alias{prepanel.xYplot}\n+\\alias{Dotplot}\n+\\alias{panel.Dotplot}\n+\\alias{prepanel.Dotplot}\n+\\alias{Cbind}\n+\\alias{[.Cbind}\n+\\alias{setTrellis}\n+\\alias{numericScale}\n+\\title{xyplot and dotplot with Matrix Variables to Plot Error Bars and Bands}\n+\\description{\n+A utility function \\code{Cbind} returns the first argument as a vector and\n+combines all other arguments into a matrix stored as an attribute called\n+\\code{"other"}. The arguments can be named (e.g.,\n+\\code{Cbind(pressure=y,ylow,yhigh)}) or a \\code{label} attribute may be pre-attached\n+to the first argument. In either case, the name or label of the first\n+argument is stored as an attribute \\code{"label"} of the object returned by\n+\\code{Cbind}. Storing other vectors as a matrix attribute facilitates plotting\n+error bars, etc., as \\code{trellis} really wants the x- and y-variables to be\n+vectors, not matrices. If a single argument is given to \\code{Cbind} and that\n+argument is a matrix with column dimnames, the first column is taken as the\n+main vector and remaining columns are taken as \\code{"other"}. A subscript\n+method for \\code{Cbind} objects subscripts the \\code{other} matrix along\n+with the main \\code{y} vector.\n+\n+The \\code{xYplot} function is a substitute for \\code{xyplot} that allows for\n+simulated multi-column \\code{y}. It uses by default the \\code{panel.xYplot} and\n+\\code{prepanel.xYplot} functions to do the actual work. The \\code{method} argument\n+passed to \\code{panel.xYplot} from \\code{xYplot} allows you to make error bars, the\n+upper-only or lower-only portions of error bars, alternating lower-only and\n+upper-only bars, bands, or filled bands. \\code{panel.xYplot} decides how to\n+alternate upper and lower bars according to whether the median \\code{y} value of\n+the current main data line is above the median \\code{y} for all \\code{groups} of\n+lines or not. If the median is above the overall median, only the upper\n+bar is drawn. For \\code{bands} (but not \'filled bands\'), any number of other\n+columns of \\code{y} will be drawn as lines having the same thickness, color, and\n+type as the main data line. If plotting bars, bands, or filled bands and\n+only one additional column is specified for the response variable, that\n+column is taken as the half width of a precision interval for \\code{y}, and the\n+lower and upper values are computed automatically as \\code{y} plus or minus the\n+value of the additional column variable.\n+\n+\n+When a \\code{groups} variable is present, \\code{panel.xYplot} will create a function\n+in frame 0 (\\code{.GlobalEnv} in \\R) called \\code{Key} that when\n+invoked will draw a key describing the \n+\\code{groups} labels, point symbols, and colors. By default, the key is outside\n+the graph. For S-Plus, if \\code{Key(locator(1))} is specified, the key will appear so that\n+its upper left corner is at the coordinates of the mouse click. For\n+R/Lattice the first two arguments of \\code{Key} (\\code{x} and \\code{y}) are fractions\n+of the page, measured from the lower left corner, and the default\n+placement is at \\code{x=0.05, y=0.95}. For \\R, an optional argument\n+to \\code{sKey}, \\code{other}, may contain a list of arguments to pass to \\code{draw.key} (see\n+\\code{\\link[lattice]{xyplot}} for a list of possible arguments, under\n+the \\code{key} option). \n+\n+\n+When \\code{method="quantile"} is specified, \\code{xYplot} automatically groups the\n+\\code{x} variable into intervals containing a target of \\code{nx} observations each,\n+and within each \\code{x} group computes three quantiles of \\code{y} and plots these\n+as three lines. The mean \\code{x} within each \\code{x} group is taken as the\n+\\code{x}-coordinate. This will make a useful empirical display for large\n+datasets in which scatterdiagrams are too busy to see patterns of central\n+tendency and variability. You can also specify a general function of a\n+data vector that returns a matrix of statistics for the \\code{method} argument.\n+A'..b'b\')\n+# Can also use Y <- cbind(y, Lower, Upper); xYplot(Cbind(Y) ~ ...) \n+# Or:\n+xYplot(y ~ month | year, nx=FALSE, method=smean.cl.boot, type=\'b\')\n+\n+\n+# This example uses the summarize function in Hmisc to \n+# compute the median and outer quartiles. The outer quartiles are \n+# displayed using "filled bands"\n+\n+\n+s <- summarize(y, llist(month,year), smedian.hilow, conf.int=.5) \n+\n+\n+# filled bands: default fill = pastel colors matching solid colors\n+# in superpose.line (this works differently in R)\n+xYplot ( Cbind ( y, Lower, Upper ) ~ month, groups=year, \n+ method="filled bands" , data=s, type="l")\n+\n+\n+# note colors based on levels of selected subgroups, not first two colors\n+xYplot ( Cbind ( y, Lower, Upper ) ~ month, groups=year, \n+ method="filled bands" , data=s, type="l",\n+ subset=(year == 1998 | year == 2000), label.curves=FALSE )\n+\n+\n+# filled bands using black lines with selected solid colors for fill\n+xYplot ( Cbind ( y, Lower, Upper ) ~ month, groups=year, \n+ method="filled bands" , data=s, label.curves=FALSE,\n+ type="l", col=1, col.fill = 2:3)\n+Key(.5,.8,col = 2:3) #use fill colors in key\n+\n+\n+# A good way to check for stable variance of residuals from ols \n+# xYplot(resid(fit) ~ fitted(fit), method=smean.sdl) \n+# smean.sdl is defined with summary.formula in Hmisc\n+\n+\n+# Plot y vs. a special variable x\n+# xYplot(y ~ numericScale(x, label=\'Label for X\') | country) \n+# For this example could omit label= and specify \n+# y ~ numericScale(x) | country, xlab=\'Label for X\'\n+\n+\n+# Here is an example of using xYplot with several options\n+# to change various Trellis parameters,\n+# xYplot(y ~ x | z, groups=v, pch=c(\'1\',\'2\',\'3\'),\n+# layout=c(3,1), # 3 panels side by side\n+# ylab=\'Y Label\', xlab=\'X Label\',\n+# main=list(\'Main Title\', cex=1.5),\n+# par.strip.text=list(cex=1.2),\n+# strip=function(\\dots) strip.default(\\dots, style=1),\n+# scales=list(alternating=FALSE))\n+\n+\n+#\n+# Dotplot examples\n+#\n+\n+\n+s <- summarize(y, llist(month,year), smedian.hilow, conf.int=.5) \n+\n+\n+setTrellis() # blank conditioning panel backgrounds \n+Dotplot(month ~ Cbind(y, Lower, Upper) | year, data=s) \n+# or Cbind(\\dots), groups=year, data=s\n+\n+\n+# Display a 5-number (5-quantile) summary (2 intervals, dot=median) \n+# Note that summarize produces a matrix for y, and Cbind(y) trusts the \n+# first column to be the point estimate (here the median) \n+s <- summarize(y, llist(month,year), quantile,\n+ probs=c(.5,.05,.25,.75,.95), type=\'matrix\') \n+Dotplot(month ~ Cbind(y) | year, data=s) \n+# Use factor(year) to make actual years appear in conditioning title strips\n+\n+# Plot proportions and their Wilson confidence limits\n+set.seed(3)\n+d <- expand.grid(continent=c(\'USA\',\'Europe\'), year=1999:2001,\n+ reps=1:100)\n+# Generate binary events from a population probability of 0.2\n+# of the event, same for all years and continents\n+d$y <- ifelse(runif(6*100) <= .2, 1, 0)\n+s <- with(d,\n+ summarize(y, llist(continent,year),\n+ function(y) {\n+ n <- sum(!is.na(y))\n+ s <- sum(y, na.rm=TRUE)\n+ binconf(s, n)\n+ }, type=\'matrix\')\n+)\n+\n+Dotplot(year ~ Cbind(y) | continent, data=s, ylab=\'Year\',\n+ xlab=\'Probability\')\n+\n+\n+# Dotplot(z ~ x | g1*g2) \n+# 2-way conditioning \n+# Dotplot(z ~ x | g1, groups=g2); Key() \n+# Key defines symbols for g2\n+\n+\n+# If the data are organized so that the mean, lower, and upper \n+# confidence limits are in separate records, the Hmisc reShape \n+# function is useful for assembling these 3 values as 3 variables \n+# a single observation, e.g., assuming type has values such as \n+# c(\'Mean\',\'Lower\',\'Upper\'):\n+# a <- reShape(y, id=month, colvar=type) \n+# This will make a matrix with 3 columns named Mean Lower Upper \n+# and with 1/3 as many rows as the original data \n+}\n+\\keyword{hplot}\n+\\concept{trellis}\n+\\concept{lattice}\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/xtfrm.labelled.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/xtfrm.labelled.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,20 @@ +\name{xtfrm.labelled} +\alias{xtfrm.labelled} +\title{ + Auxiliary Function Method for Sorting and Ranking +} +\description{ + An auxiliary function method that is a workaround for bug in the + implementation of xtfrm handles inheritance. +} +\usage{ +\method{xtfrm}{labelled}(x) +} +\arguments{ + \item{x}{ + any object of class labelled. + } +} +\seealso{ + \code{\link{xtfrm}} +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/xy.group.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/xy.group.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,51 @@ +\name{xy.group} +\alias{xy.group} +\title{ +Mean x vs. function of y in groups of x +} +\description{ +Compute mean x vs. a function of y (e.g. median) by quantile +groups of x or by x grouped to have a given average number of +observations. Deletes NAs in x and y before doing computations. +} +\usage{ +xy.group(x, y, m=150, g, fun=mean, result="list") +} +\arguments{ +\item{x}{ +a vector, may contain NAs +} +\item{y}{ +a vector of same length as x, may contain NAs +} +\item{m}{ +number of observations per group +} +\item{g}{ +number of quantile groups +} +\item{fun}{ +function of y such as median or mean (the default) +} +\item{result}{ +"list" (the default), or "matrix" +}} +\value{ +if result="list", a list with components x and y suitable for plotting. +if result="matrix", matrix with rows corresponding to x-groups and columns named +n, x, and y. +} +\seealso{ +\code{\link{cut2}}, \code{\link{tapply}} +} +\examples{ +# plot(xy.group(x, y, g=10)) #Plot mean y by deciles of x +# xy.group(x, y, m=100, result="matrix") #Print table, 100 obs/group +} +\keyword{category} +\keyword{nonparametric} +\concept{grouping} +\concept{stratification} +\concept{aggregation} +\concept{discretization} + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/yearDays.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/yearDays.Rd Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,26 @@ +\name{yearDays} +\alias{yearDays} +\alias{monthDays} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{ Get Number of Days in Year or Month } +\description{ + Returns the number of days in a specific year or month. +} +\usage{ +yearDays(time) + +monthDays(time) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{time}{ + A POSIXt or Date object describing the month or year in + question. + } +} +\author{ Charles Dupont } +\seealso{ \code{\link{POSIXt}}, \code{\link{Date}} } +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{ utilities } +\keyword{ chron }% __ONLY ONE__ keyword per line |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/man/ynbind.Rd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/man/ynbind.Rd Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,58 @@ +\name{ynbind} +\alias{ynbind} +\alias{[.ynbind} +\alias{pBlock} +\alias{[.pBlock} +\title{Combine Variables in a Matrix} +\description{ +\code{ynbind} column binds a series of related yes/no variables, +allowing for a final argument \code{label} used to label the panel +created for the group. \code{label}s for individual variables are +collected into a vector attribute \code{"labels"} for the result; +original variable names are used in place of labels for those variables +without labels. A positive response is taken to be \code{y, yes, +present} (ignoring case) or a \code{logical} \code{TRUE} value. By +default, the columns are sorted be ascending order or the overall +proportion of positives. A subsetting method is provided for objects of +class \code{"ynbind"}. + +\code{pBlock} creates a matrix similarly labeled, from a general set of +variables (without special handling of binaries), and sets to \code{NA} +any observation not in \code{subset} so that when that block of +variables is analyzed it will be only for that subset. +} + +\usage{ +ynbind(..., label = deparse(substitute(...)), + asna = c("unknown", "unspecified"), sort = TRUE) + +pBlock(..., subset=NULL, label = deparse(substitute(...))) +} +\arguments{ + \item{\dots}{a series of vectors} + \item{label}{a label for the group, to be attached to the resulting + matrix as a \code{"label"} attribute, used by \code{\link{summaryP}}.} + \item{asna}{a vector of character strings specifying levels that are + to be treated the same as \code{NA} if present} + \item{sort}{set to \code{FALSE} to not sort the columns by their + proportions} + \item{subset}{subset criteria - either a vector of logicals or subscripts} +} +\value{a matrix of class \code{"ynbind"} or + \code{"pBlock"} with \code{"label"} and \code{"labels"} attributes. + For \code{"pBlock"}, factor input vectors will have values converted + to \code{character}. +} +\author{Frank Harrell} +\seealso{\code{\link{summaryP}}} +\examples{ +x1 <- c('yEs', 'no', 'UNKNOWN', NA) +x2 <- c('y', 'n', 'no', 'present') +label(x2) <- 'X2' +X <- ynbind(x1, x2, label='x1-2') +X[1:3,] + +pBlock(x1, x2, subset=2:3, label='x1-2') +} +\keyword{misc} +\keyword{utilities} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/src/Hmisc.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/src/Hmisc.c Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,36 @@ +#include "Hmisc.h" + +char *Hmisc_AllocStringBuffer(size_t blen, Hmisc_StringBuffer *buf) +{ + size_t blen1, bsize = buf->defaultSize; + S_EVALUATOR + + if(blen * sizeof(char) < buf->bufsize) return buf->data; + blen1 = blen = (blen + 1) * sizeof(char); + blen = (blen / bsize) * bsize; + if(blen < blen1) blen += bsize; + + if(buf->data == NULL) { + buf->data = (char *) malloc(blen); + buf->data[0] = '\0'; + } else + buf->data = (char *) realloc(buf->data, blen); + buf->bufsize = blen; + if(!buf->data) { + buf->bufsize = 0; + /* don't translate internal error message */ + PROBLEM "could not allocate memory (%u Mb) in C function 'Hmisc_AllocStringBuffer'", + (unsigned int) blen/1024/1024 ERROR; + } + return buf->data; +} + + +void Hmisc_FreeStringBuffer(Hmisc_StringBuffer *buf) +{ + if (buf->data != NULL) { + free(buf->data); + buf->bufsize = 0; + buf->data = NULL; + } +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/src/Hmisc.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/src/Hmisc.h Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,38 @@ +#ifndef _HMISC_H_ +#define _HMISC_H_ + +#include <string.h> +#include <errno.h> +#include <S.h> +#include <R.h> +#include <Rdefines.h> + +#ifdef _SPLUS_ +# define STRING_ELT(x,i) (CHARACTER_POINTER(x)[i]) +# define TO_CHAR(x) (x) +# define translateChar(x) (x) +# define IS_NA_LGL(x) (is_na(&x, LGL)) +# define SET_NA_LGL(x) (na_set(&x, LGL)) + typedef s_object *SEXP ; + typedef char *STR_ELT; +#else +# define TO_CHAR(x) (CHAR(x)) +# define STR_ELT SEXP +# define IS_NA_LGL(x) (x == NA_LOGICAL) +# define SET_NA_LGL(x) (x = NA_LOGICAL) +#endif + +#define MAXELTSIZE 8192 + +typedef struct +{ + char *data; + size_t bufsize; + size_t defaultSize; +} Hmisc_StringBuffer; + +char *Hmisc_AllocStringBuffer(size_t blen, Hmisc_StringBuffer *buf); + +void Hmisc_FreeStringBuffer(Hmisc_StringBuffer *buf); + +#endif |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/src/cidxcn.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/src/cidxcn.f Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,88 @@ +C Output from Public domain Ratfor, version 1.01 +C------------------------------------------------------------------------ +C Compute c-index (c) and Brown-Hollander-Krowar-Goodman-Kruskal-Somer +C rank correlation (gamma) between X and Y with censoring indicator E +C Also returns number of relevant, concordant, and uncertain pairs +C (nrel, nconc, nuncert) and estimated s.d. of gamma (sd) using +C Quade formula (see SAS PROC MATPAR). Pairs with tied x are +C excluded if outx=.TRUE. +C +C F. Harrell 27Nov90 +C Modification of SAS Procedure KGKC (1980) +C------------------------------------------------------------------------- + subroutine cidxcn(x,y,e,n,nrel,nconc,nuncert,c,gamma,sd,outx) + implicit double precision (a-h,o-z) + double precision x(n),y(n),dx,dy + logical e(n),outx + double precision nrel,nuncert,nconc + nconc=0d0 + nrel=0d0 + nuncert=0d0 + sumr=0d0 + sumr2=0d0 + sumw=0d0 + sumw2=0d0 + sumrw=0d0 + do23000 i=1,n + wi=0d0 + ri=0d0 + do23002 j=1,n + if(j.ne.i)then + dx=x(i)-x(j) + dy=y(i)-y(j) + if(dx.ne.0. .or. .not.outx)then + if((e(i).and.dy.lt.0.).or.(e(i).and..not.e(j).and.dy.eq.0.))then + if(dx.lt.0.)then + nconc=nconc+1d0 + wi=wi+1d0 + else + if(dx.eq.0.)then + nconc=nconc+.5d0 + else + wi=wi-1d0 + endif + endif + nrel=nrel+1d0 + ri=ri+1d0 + else + if((e(j).and.dy.gt.0.).or.(e(j).and..not.e(i).and.dy.eq.0.))then + if(dx.gt.0.)then + nconc=nconc+1d0 + wi=wi+1d0 + else + if(dx.eq.0.)then + nconc=nconc+.5d0 + else + wi=wi-1d0 + endif + endif + nrel=nrel+1d0 + ri=ri+1d0 + else + if(.not.(e(i).and.e(j)))then + nuncert=nuncert+1d0 + endif + endif + endif + endif + endif +23002 continue +23003 continue + sumr=sumr+ri + sumr2=sumr2+ri*ri + sumw=sumw+wi + sumw2=sumw2+wi*wi + sumrw=sumrw+ri*wi +23000 continue +23001 continue + c=nconc/nrel + gamma=2.*(c-.5) +Ccall dblepr('sumr',4,sumr,1) +Ccall dblepr('sumw',4,sumw,1) +Ccall dblepr('sumr2',5,sumr2,1) +Ccall dblepr('sumw2',5,sumw2,1) +Ccall dblepr('sumrw',5,sumrw,1) + sd=sumr2*sumw**2-2d0*sumr*sumw*sumrw+sumw2*sumr**2 + sd=2.*dsqrt(sd)/sumr/sumr + return + end |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/src/cidxcp.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/src/cidxcp.f Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,92 @@ +C Output from Public domain Ratfor, version 1.01 + subroutine cidxcp(x1,x2,y,e,n,method,outx, nrel,nuncer,c1,c2,gamma + *1,gamma2,gamma,sd,c12,c21) + implicit double precision (a-h,o-z) + double precision x1(n),x2(n),y(n) + logical e(n),outx + integer n,method,i,j + double precision nrel,nuncer,nconc1,nconc2,c1,c2,gamma1,gamma2, su + *mr,sumr2,sumw,sumw2,sumrw, wi,ri,sumc,c12,c21,gamma,sd + double precision dx,dx2,dy + nconc1=0d0 + nconc2=0d0 + nrel=0d0 + nuncer=0d0 + sumr=0d0 + sumr2=0d0 + sumw=0d0 + sumw2=0d0 + sumrw=0d0 + sumc=0d0 + do23000 i=1,n + wi=0d0 + ri=0d0 + do23002 j=1,n + dx=x1(i)-x1(j) + dx2=x2(i)-x2(j) + if((i.ne.j) .and. (.not.outx .or. dx.ne.0. .or. dx2.ne.0.))then + dy=y(i)-y(j) + if((e(i).and.(dy.lt.0.)).or.(e(i).and..not.e(j).and.(dy.eq.0.)))th + *en + nrel=nrel+1d0 + nconc1=nconc1+(z(dx.lt.0.)+.5d0*z(dx.eq.0.)) + nconc2=nconc2+(z(dx2.lt.0.)+.5d0*z(dx2.eq.0.)) + ri=ri+1d0 + if(method.eq.1)then + wi=wi+(z(dx.lt.dx2)-z(dx.gt.dx2)) + sumc=sumc+z(dx.lt.dx2) + else + wi=wi+(z(dx.lt.0..and.dx2.ge.0.)-z(dx.gt.0..and.dx2.le.0.)) + sumc=sumc+z(dx.lt.0..and.dx2.ge.0.) + endif + else + if((e(j).and.(dy.gt.0.)).or.(e(j).and..not.e(i).and.(dy.eq.0.)))th + *en + nrel=nrel+1d0 + nconc1=nconc1+(z(dx.gt.0.)+.5d0*z(dx.eq.0.)) + nconc2=nconc2+(z(dx2.gt.0.)+.5d0*z(dx2.eq.0.)) + ri=ri+1d0 + if(method.eq.1)then + wi=wi+(z(dx.gt.dx2)-z(dx.lt.dx2)) + sumc=sumc+z(dx.gt.dx2) + else + wi=wi+(z(dx.gt.0..and.dx2.le.0.)-z(dx.lt.0..and.dx2.ge.0.)) + sumc=sumc+z(dx.gt.0..and.dx2.le.0.) + endif + else + if(.not.(e(i).and.e(j)))then + nuncer=nuncer+1d0 + endif + endif + endif + endif +23002 continue +23003 continue + sumr=sumr+ri + sumr2=sumr2+ri*ri + sumw=sumw+wi + sumw2=sumw2+wi*wi + sumrw=sumrw+ri*wi +23000 continue +23001 continue + c1=nconc1/nrel + gamma1=2d0*(c1-.5d0) + c2=nconc2/nrel + gamma2=2d0*(c2-.5d0) + gamma=sumw/sumr + sd=sumr2*sumw**2-2d0*sumr*sumw*sumrw+sumw2*sumr**2 + sd=2d0*dsqrt(sd)/sumr/sumr + c12=sumc/sumr + c21=sumc/sumr-gamma + return + end + function z(a) + double precision z + logical a + if(a)then + z=1d0 + else + z=0d0 + endif + return + end |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/src/hoeffd.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/src/hoeffd.f Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,120 @@ +C Output from Public domain Ratfor, version 1.01 + subroutine hoeffd(xx, n, p, dmat, aadmat, madmat, npair, x, y, rx, + * ry, rj) + implicit double precision (a-h,o-z) + integer n, p, npair(p,p) + double precision xx(n,p), dmat(p,p), aadmat(p,p), madmat(p,p), + *x(n), y(n), rx(n), ry(n), rj(n), maxad + do23000 i=1, p + np=0 + do23002 k=1, n + if(xx(k,i) .lt. 1e30)then + np = np + 1 + endif +23002 continue +23003 continue + npair(i,i) = np + do23006 j=(i+1),p + m = 0 + do23008 k=1,n + xk = xx(k,i) + yk = xx(k,j) + if(xk .lt. 1e30 .and. yk .lt. 1e30)then + m = m + 1 + x(m) = xk + y(m) = yk + endif +23008 continue +23009 continue + npair(i,j) = m + if(m .gt. 4)then + call hoeff(x, y, m, d, aad, maxad, rx, ry, rj) + dmat(i,j) = d + aadmat(i,j) = aad + madmat(i,j) = maxad + else + dmat(i,j) = 1e30 + endif +23006 continue +23007 continue +23000 continue +23001 continue + do23014 i=1,p + dmat(i,i) = 1d0/30d0 + do23016 j=(i+1),p + dmat(j,i) = dmat(i,j) + npair(j,i) = npair(i,j) + aadmat(j,i) = aadmat(i,j) + madmat(j,i) = madmat(i,j) +23016 continue +23017 continue +23014 continue +23015 continue + return + end + subroutine hoeff(x, y, n, d, aad, maxad, rx, ry, rj) + implicit double precision (a-h,o-z) + double precision x(n), y(n), rx(n), ry(n), rj(n), maxad + call jrank(x, y, n, rx, ry, rj) + q = 0d0 + r = 0d0 + s = 0d0 + aad = 0d0 + maxad = 0d0 + z = dfloat(n) + do23018 i=1,n + rxi = rx(i) + ryi = ry(i) + rji = rj(i) + ad = dabs((rji/z) - (rxi/z)*(ryi/z)) + aad = aad + ad + maxad = dmax1(maxad, ad) + q = q + (rxi-1d0)*(rxi-2d0)*(ryi-1d0)*(ryi-2d0) + r = r + (rxi-2d0)*(ryi-2d0)*(rji-1d0) + s = s + (rji-1d0)*(rji-2d0) +23018 continue +23019 continue + aad = aad / z + d = (q-2d0*(z-2d0)*r+(z-2d0)*(z-3d0)*s)/z/(z-1d0)/(z-2d0)/(z-3d0)/ + *(z-4d0) + return + end + subroutine jrank(x, y, n, rx, ry, r) + integer n + double precision x(n), y(n), rx(n), ry(n), r(n), cx, cy, ri, rix, + *riy, xi, yi + do23020 i=1,n + xi = x(i) + yi = y(i) + ri = 1d0 + rix = 1d0 + riy = 1d0 + do23022 j=1,n + if(i .ne. j)then + cx = 0d0 + if(x(j) .lt. xi)then + cx = 1d0 + endif + if(x(j) .eq. xi)then + cx = .5d0 + endif + cy = 0d0 + if(y(j) .lt. yi)then + cy = 1d0 + endif + if(y(j) .eq. yi)then + cy = .5d0 + endif + rix = rix + cx + riy = riy + cy + ri = ri + cx*cy + endif +23022 continue +23023 continue + rx(i) = rix + ry(i) = riy + r(i) = ri +23020 continue +23021 continue + return + end |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/src/jacklins.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/src/jacklins.f Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,26 @@ +C Output from Public domain Ratfor, version 1.01 +C Given w, an n-1 by k matrix of weights, say for linear statistics, +C computes all n leave-out-one linear statistics separately by each +C column of w for the data n-vector x + subroutine jacklins(x, w, n, k, res) + integer n, k, l + double precision x(n), w(n-1,k), res(n,k) + do23000 l=1,k + do23002 j=1,n + sj=0d0 + do23004 i=1,n + if(i.lt.j)then + sj=sj+w(i,l)*x(i) + endif + if(i.gt.j)then + sj=sj+w(i-1,l)*x(i) + endif +23004 continue +23005 continue + res(j,l)=sj +23002 continue +23003 continue +23000 continue +23001 continue + return + end |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/src/largrec.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/src/largrec.f Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,103 @@ + SUBROUTINE largrec(x, y, n, xlim, ylim, width, height, + & numbins, itype, rx, ry) +C ********************************************************* +C * x(n) - array of x values of data points + DOUBLE PRECISION x(*) +C * y(n) - array of y values of data points + DOUBLE PRECISION y(*) +C * n - number of data points + INTEGER n +C * xlim(2) - contains left and right limits of x axis + DOUBLE PRECISION xlim(2) +C * ylim(2) - contains bottom and top limits of y axis + DOUBLE PRECISION ylim(2) +C * width - minimum allowable width of empty space + DOUBLE PRECISION width +C * height - minimum allowable height of empty space + DOUBLE PRECISION height +C * numbins - number of blocks to chunk axis ranges into + INTEGER numbins +C * itype - how to favor box size + INTEGER itype +C * rx(2) - left and right limits of found box + DOUBLE PRECISION rx(2) +C * ry(2) - top and bottom limits of found box + DOUBLE PRECISION ry(2) +C * xd - x distance between x limits + DOUBLE PRECISION xd +C * yd - y distance between y limits + DOUBLE PRECISION yd +C * xinc - amount to add to x axis search box bounds + DOUBLE PRECISION xinc +C * yinc - amount to add to y axis search box bounds + DOUBLE PRECISION yinc +C * xl - left search box bound + DOUBLE PRECISION xl +C * xr - right search box bound + DOUBLE PRECISION xr +C * yb - bottom search box bound + DOUBLE PRECISION yb +C * yt - top search box bound + DOUBLE PRECISION yt +C * i - itterator variable + INTEGER i +C * area - area of empty space + DOUBLE PRECISION area +C * w - width of empty space + DOUBLE PRECISION w +C * h - height of empty space + DOUBLE PRECISION h +C * ar - tempory area storage + DOUBLE PRECISION ar +C + xd = xlim(2)-xlim(1) + yd = ylim(2)-ylim(1) + xinc = xd/DFLOAT(numbins) + yinc = yd/DFLOAT(numbins) + rx(1) = 1d30 + rx(2) = 1d30 + ry(1) = 1d30 + ry(2) = 1d30 + IF(width .GE. xd .OR. height .GE. yd) THEN + RETURN + ENDIF +C + w = 0d0 + h = 0d0 + area = 0d0 +C + xl=xlim(1) + DO WHILE (xl .LE. xlim(2)-width) + yb = ylim(1) + DO WHILE (yb .LE. ylim(2)-height) + xr = xl + width + DO WHILE (xr .LE. xlim(2)) + yt = yb + height + DO WHILE (yt .LE. ylim(2)) + DO i=1,n + IF(x(i) .GE. xl .AND. x(i) .LE. xr .AND. + & y(i) .GE. yb .AND. y(i) .LE. yt) GO TO 1 + ENDDO + ar = (yt-yb)*(xr-xl) + if((itype.EQ.1 .AND. ar .GT. area) .OR. + & (itype.EQ.2 .AND. yt-yb .GE. h .AND. + & xr-xl .GE. w)) THEN + area = ar + w = xr - xl + h = yt - yb + rx(1) = xl + rx(2) = xr + ry(1) = yb + ry(2) = yt + ENDIF + yt = yt + yinc + ENDDO + xr = xr + xinc + ENDDO + 1 CONTINUE + yb = yb + yinc + ENDDO + xl = xl + xinc + ENDDO + RETURN + END |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/src/mChoice.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/src/mChoice.c Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,206 @@ +/* #define USE_RINTERNALS 1 */ + +#include "Hmisc.h" + +static Hmisc_StringBuffer cbuff = {NULL, 0, MAXELTSIZE}; + + +int get_next_mchoice(char **s) +{ + long opt; + int errsv; + char *begin, *end, *err_chk; + S_EVALUATOR + + begin = *s; + + if(begin == NULL) + return 0; + + if(*begin == ';') + end = begin; + else if(*begin == '\0') + /* begin points to end of string set end to NULL */ + end = NULL; + else + /* set end to the location of the the next ';' */ + end = strchr(begin + 1, ';'); + + if(end) { + /* set end value to null and increment */ + *end++ = '\0'; + /* set s to the begining of the next substring */ + *s = end; + } + else + /* end points to the end of the string. Set *s to NULL to + indecate all of string consumed. */ + *s = NULL; + + /* if begin is zero length substring return 0 */ + if(strlen(begin) == 0) + return 0; + + /* convert substring begin into its integer value */ + /* set errno to zero */ + errno = 0; + opt = strtol(begin, &err_chk, 10); + + /* Check to see if an error occured in strtol */ + if(errno != 0) { + errsv = errno; + PROBLEM "string to integer conversion error: %s", strerror(errsv) ERROR; + } + + if(err_chk == begin || *err_chk != '\0') + PROBLEM "string %s is not a valid integer number", begin ERROR; + + /* return the integer mChoice option */ + return (int)opt; +} + +SEXP do_mchoice_match(SEXP x, SEXP table, SEXP nomatch) +{ + SEXP elm_index; /* Storage for value of first row of + first match of each element in x *\/ */ + R_len_t len; /* Number of elements in x */ + R_len_t nfound = 0; /* count of number of elements of + x matched in table */ + char *str_ptr; /* current location pointer */ + const char *str; + int i, j, comp; + size_t slen; /* length of string */ + + S_EVALUATOR + /* get number of elements in x */ + len = LENGTH(x); + + /* allocate an index vector of the same length as x */ + PROTECT(elm_index = NEW_INTEGER(len)); + + /* set all values in elm_index to 0 */ + memset((int *)INTEGER_POINTER(elm_index), 0, len * sizeof(int)); + + /* count number of x values that are zero and set nfound to that */ + for(i=0; i < len; i++) { + if(INTEGER_POINTER(x)[i] == 0) { + INTEGER_POINTER(elm_index)[i] = INTEGER_POINTER(nomatch)[0]; + nfound++; + } + } + + + /* iterate through each element of table looking for matches to values in x. + it is done this way because parsing the mChoice string is expensive and looping is not. */ + for(i=0; i < LENGTH(table) && nfound < len; i++) { + if(STRING_ELT(table, i) == NA_STRING) + continue; + + str = translateCharUTF8(STRING_ELT(table, i)); + slen = strlen(str) + 1; + + str_ptr = Hmisc_AllocStringBuffer((slen) * sizeof(char), &cbuff); + strncpy(str_ptr, str, slen); + str_ptr[slen] = '\0'; + + while(str_ptr != NULL && nfound < len) { + /* get the next component of the mChoice string */ + comp = get_next_mchoice(&str_ptr); + + /* if comp is zero the next component was blank continue */ + if(comp == 0) + continue; + + /* Compare the component to all elements of x */ + for(j = 0; j < len && nfound < len; j++) { + /* If the element index is not zero that value has been prevously + matched continue to next value */ + if(INTEGER_POINTER(elm_index)[j] || INTEGER_POINTER(x)[j] == 0) + continue; + + if(INTEGER_POINTER(x)[j] == comp) { + nfound++; + INTEGER_POINTER(elm_index)[j] = i+1; + } + } + } + } + + Hmisc_FreeStringBuffer(&cbuff); + + if(nfound < len) { + /* if not all elements of x are matched to those in table + set the elements of elmt_index that are zero to the value + of nomatch */ + for(i=0; i < len; i++) { + if(INTEGER_POINTER(elm_index)[i] == 0) { + INTEGER_POINTER(elm_index)[i] = INTEGER_POINTER(nomatch)[0]; + } + } + } + + UNPROTECT(1); + return(elm_index); +} + + +SEXP do_mchoice_equals(SEXP x, SEXP y) +{ + int x_len = LENGTH(x); /* length of x vector */ + int y_len = LENGTH(y); /* length of y vector */ + SEXP ans; /* Logical return vector */ + int nfound = 0; /* number of matches found */ + int i,j, comp; /* iterators */ + size_t slen; + char *str_ptr; /* copy of the x string element */ + const char *str; + + S_EVALUATOR + + if(!IS_INTEGER(y) || y_len == 0) + PROBLEM "y must be an integer vector of at least length one." ERROR; + + PROTECT(ans = NEW_LOGICAL(x_len)); + + for(i=0; i < x_len; ++i) { + nfound = 0; + str = translateCharUTF8(STRING_ELT(x, i)); + + slen = strlen(str) + 1; + + /* if length of x element is zero or NA no posible match */ + if(STRING_ELT(x, i) == NA_STRING) { + SET_NA_LGL(LOGICAL_POINTER(ans)[i]); + continue; + } + if(slen == 0) { + LOGICAL_POINTER(ans)[i] = 0; + continue; + } + + str_ptr = Hmisc_AllocStringBuffer((slen) * sizeof(char), &cbuff); + strncpy(str_ptr, str, slen); + str_ptr[slen] = '\0'; + + while(str_ptr != NULL && nfound < y_len) { + comp = get_next_mchoice(&str_ptr); + + for(j=0; j < y_len; j++) { + if(comp == INTEGER_POINTER(y)[j]) { + nfound++; + break; + } + } + } + + if(nfound < y_len) + LOGICAL_POINTER(ans)[i] = 0; + else + LOGICAL_POINTER(ans)[i] = 1; + } + + Hmisc_FreeStringBuffer(&cbuff); + UNPROTECT(1); + return(ans); +} + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/src/maxempr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/src/maxempr.f Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,68 @@ +C Output from Public domain Ratfor, version 1.01 + subroutine maxempr(ax, ay, x, y, n, w, h, z, area, rect) + implicit double precision (a-h,o-z) + integer n + double precision ax(2), ay(2), x(n), y(n), z(3), rect(4), maxr, li + maxr = z(1) * dabs(ay(2) - ay(1)) + rect(1) = z(2) + rect(2) = ay(1) + rect(3) = z(3) + rect(4) = ay(2) + do23000 i=1,n + tl = ax(1) + tr = ax(2) + if(i .lt. n)then + do23004 j=(i+1),n + if(x(j) .gt. tl .and. x(j) .lt. tr)then + area = (tr - tl) * (y(j) - y(i)) + if(area .gt. maxr .and. ((tr - tl) .gt. w) .and. ((y(j) - y(i)) .g + *t. h))then + maxr = area + rect(1) = tl + rect(2) = y(i) + rect(3) = tr + rect(4) = y(j) + endif + if(x(j) .gt. x(i))then + tr = x(j) + else + tl = x(j) + endif + endif +23004 continue +23005 continue + endif + area = (tr - tl) * (ay(2) - y(i)) + if(area .gt. maxr .and. ((tr - tl) .gt. w) .and. ((ay(2) - y(i)) . + *gt. h))then + maxr = area + rect(1) = tl + rect(2) = y(i) + rect(3) = tr + rect(4) = ay(2) + endif + ri = ax(2) + li = ax(1) + do23014 k=1,n + if(y(k) .lt. y(i) .and. x(k) .gt. x(i))then + ri = dmin1(ri, x(k)) + endif + if(y(k) .lt. y(i) .and. x(k) .lt. x(i))then + li = dmax1(li, x(k)) + endif +23014 continue +23015 continue + area = (ri - li) * (ay(2) - y(i)) + if(area .gt. maxr .and. ((ri - li) .gt. w) .and. ((y(i) - ay(1)) . + *gt. h))then + maxr = area + rect(1) = li + rect(2) = ay(1) + rect(3) = ri + rect(4) = y(i) + endif +23000 continue +23001 continue + area = maxr + return + end |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/src/nstr.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/src/nstr.c Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,65 @@ +#include "Hmisc.h" + +static Hmisc_StringBuffer cbuff = {NULL, 0, MAXELTSIZE}; + + +SEXP do_nstr(SEXP s, SEXP n) +{ + SEXP ans; /* the returned character string */ + int i, j; /* the length of the string and number of reps */ + int s_counter = 0, n_counter = 0; + int longest, s_length, n_length; + + S_EVALUATOR + + n_length = length(n); + s_length = length(s); + + longest = n_length < s_length ? s_length : n_length; + + if(n_length == 1 && INTEGER(n)[0] == 1) + return s; + + PROTECT(ans = allocVector(STRSXP, longest)); + + for(i=0; i < longest; i++) + { + int n_reps = INTEGER(n)[n_counter]; + + if(n_reps < 1) + { + SET_STRING_ELT(ans, i, mkChar("")); + } + else if(n_reps == 1) + { + SET_STRING_ELT(ans, i, duplicate(STRING_ELT(s, s_counter))); + } + else + { + char *cbuf, *buf; + const char *seg; + size_t seg_len; + + seg = CHAR(STRING_ELT(s, s_counter)); + seg_len = strlen(seg); + cbuf = buf = Hmisc_AllocStringBuffer((n_reps * seg_len + 1) * sizeof(char), &cbuff); + + for(j=0; j < n_reps; ++j) + { + strcpy(buf, seg); + buf += seg_len; + } + *buf = '\0'; + + SET_STRING_ELT(ans, i, mkChar(cbuf)); + } + + n_counter = (++n_counter < n_length) ? n_counter : 0; + s_counter = (++s_counter < s_length) ? s_counter : 0; + } + + Hmisc_FreeStringBuffer(&cbuff); + + UNPROTECT(1); + return ans; +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/src/ranksort.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/src/ranksort.c Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,98 @@ +#include "R.h" + +void sort2(np,ra,rb) +int *np; +double ra[]; +int rb[]; +{ + int l,j,ir,i,n,rrb,*xrb; + double rra,*xra; + + n = *np; + xra=ra-1; + xrb=rb-1; + + l=(n >> 1)+1; + ir=n; + for(;;) { + if(l > 1) { + rra=xra[--l]; + rrb=xrb[l]; + } else { + rra=xra[ir]; + rrb=xrb[ir]; + xra[ir]=xra[1]; + xrb[ir]=xrb[1]; + if(--ir == 1) { + xra[1]=rra; + xrb[1]=rrb; + return; + } + } + i=l; + j=l << 1; + while (j <= ir) { + if ( j < ir && xra[j] < xra[j+1]) ++j; + if (rra < xra[j]) { + xra[i]=xra[j]; + xrb[i]=xrb[j]; + j += (i=j); + } + else j=ir+1; + } + xra[i]=rra; + xrb[i]=rrb; + } +} + + +void crank(np, w) +int *np; +double w[]; + +{ + int n,j=1,ji,jt; + double rank,*xw; + + n = *np; + xw = w-1; + + while (j < n) { + if(xw[j+1] != xw[j]) { + xw[j]=j; + ++j; + } else { + for (jt=j+1;jt<=n;jt++) + if (xw[jt] != xw[j]) break; + rank=0.5*(j+jt-1); + for (ji=j;ji<=(jt-1);ji++) xw[ji]=rank; + j=jt; + } + } + if (j == n) xw[n]=n; +} + + +void F77_SUB(rank)(np, x, w, ix, r) +int *np, ix[]; +double x[],r[],w[]; + +{ + int n, *xix, i; + double *xx, *xr, *xw; + n = *np; + xx = x-1; + xix = ix-1; + xr = r-1; + xw = w-1; + + for(i=1; i<=n; i++) { + xix[i]=i; + xw[i]=xx[i]; + } + sort2(np, w, ix); + crank(np, w); + for(i=1; i<=n; i++) xr[xix[i]] = xw[i]; +} + + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/src/ratfor/cidxcn.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/src/ratfor/cidxcn.r Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,77 @@ +#------------------------------------------------------------------------ +# Compute c-index (c) and Brown-Hollander-Krowar-Goodman-Kruskal-Somer +# rank correlation (gamma) between X and Y with censoring indicator E +# Also returns number of relevant, concordant, and uncertain pairs +# (nrel, nconc, nuncert) and estimated s.d. of gamma (sd) using +# Quade formula (see SAS PROC MATPAR). Pairs with tied x are +# excluded if outx=.TRUE. +# +# F. Harrell 27Nov90 +# Modification of SAS Procedure KGKC (1980) +#------------------------------------------------------------------------- +SUBROUTINE cidxcn(x,y,e,n,nrel,nconc,nuncert,c,gamma,sd,outx) +IMPLICIT DOUBLE PRECISION (a-h,o-z) +DOUBLE PRECISION x(n),y(n),dx,dy +LOGICAL e(n),outx +DOUBLE PRECISION nrel,nuncert,nconc +nconc=0d0 +nrel=0d0 +nuncert=0d0 +sumr=0d0 +sumr2=0d0 +sumw=0d0 +sumw2=0d0 +sumrw=0d0 +do i=1,n { + wi=0d0 + ri=0d0 + do j=1,n + if(j^=i) { + dx=x(i)-x(j) + dy=y(i)-y(j) + if(dx!=0. | !outx) { + if((e(i)&dy<0.)|(e(i)&!e(j)&dy==0.)) { + if(dx<0.) { + nconc=nconc+1d0 + wi=wi+1d0 } + else + if(dx==0.)nconc=nconc+.5d0 + else + wi=wi-1d0 + nrel=nrel+1d0 + ri=ri+1d0 } + else + if((e(j)&dy>0.)|(e(j)&!e(i)&dy==0.)) { + if(dx>0.) { + nconc=nconc+1d0 + wi=wi+1d0 } + else + if(dx==0.) nconc=nconc+.5d0 + else + wi=wi-1d0 + nrel=nrel+1d0 + ri=ri+1d0 } + else + if(!(e(i)&e(j)))nuncert=nuncert+1d0 } + } + sumr=sumr+ri + sumr2=sumr2+ri*ri + sumw=sumw+wi + sumw2=sumw2+wi*wi + sumrw=sumrw+ri*wi + } +c=nconc/nrel +gamma=2.*(c-.5) +#call dblepr('sumr',4,sumr,1) +#call dblepr('sumw',4,sumw,1) +#call dblepr('sumr2',5,sumr2,1) +#call dblepr('sumw2',5,sumw2,1) +#call dblepr('sumrw',5,sumrw,1) +sd=sumr2*sumw**2-2d0*sumr*sumw*sumrw+sumw2*sumr**2 +sd=2.*dsqrt(sd)/sumr/sumr +return +end + + + + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/src/ratfor/cidxcp.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/src/ratfor/cidxcp.r Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,69 @@ +subroutine cidxcp(x1,x2,y,e,n,method,outx, + nrel,nuncer,c1,c2,gamma1,gamma2,gamma,sd,c12,c21) +implicit DOUBLE PRECISION (a-h,o-z) +DOUBLE PRECISION x1(n),x2(n),y(n) +logical e(n),outx +integer n,method,i,j +DOUBLE PRECISION nrel,nuncer,nconc1,nconc2,c1,c2,gamma1,gamma2, + sumr,sumr2,sumw,sumw2,sumrw, + wi,ri,sumc,c12,c21,gamma,sd +DOUBLE PRECISION dx,dx2,dy +nconc1=0d0;nconc2=0d0;nrel=0d0;nuncer=0d0;sumr=0d0;sumr2=0d0;sumw=0d0; +sumw2=0d0;sumrw=0d0;sumc=0d0; +do i=1,n { + wi=0d0;ri=0d0; + do j=1,n { + dx=x1(i)-x1(j);dx2=x2(i)-x2(j); + if((i!=j) & (!outx | dx!=0. | dx2!=0.)) { + dy=y(i)-y(j); + if ((e(i)&(dy<0.))|(e(i)&^e(j)&(dy==0.))) { + nrel=nrel+1d0; + nconc1=nconc1+(z(dx<0.)+.5D0*z(dx==0.)); + nconc2=nconc2+(z(dx2<0.)+.5D0*z(dx2==0.)); + ri=ri+1d0; + if (method==1) { + wi=wi+(z(dx<dx2)-z(dx>dx2)); + sumc=sumc+z(dx<dx2); + } + else { + wi=wi+(z(dx<0.&dx2>=0.)-z(dx>0.&dx2<=0.)); + sumc=sumc+z(dx<0.&dx2>=0.); + } + } + else if ((e(j)&(dy>0.))|(e(j)&^e(i)&(dy==0.))) { + nrel=nrel+1d0; + nconc1=nconc1+(z(dx>0.)+.5D0*z(dx==0.)); + nconc2=nconc2+(z(dx2>0.)+.5D0*z(dx2==0.)); + ri=ri+1d0; + if (method==1) { + wi=wi+(z(dx>dx2)-z(dx<dx2)); + sumc=sumc+z(dx>dx2); + } + else { + wi=wi+(z(dx>0.&dx2<=0.)-z(dx<0.&dx2>=0.)); + sumc=sumc+z(dx>0.&dx2<=0.); + } + } + else if (^(e(i)&e(j))) nuncer=nuncer+1d0; + } + } + sumr=sumr+ri; sumr2=sumr2+ri*ri + sumw=sumw+wi; sumw2=sumw2+wi*wi; sumrw=sumrw+ri*wi +} +c1=nconc1/nrel; gamma1=2D0*(c1-.5D0); +c2=nconc2/nrel; gamma2=2D0*(c2-.5D0); +gamma=sumw/sumr +sd=sumr2*sumw**2-2D0*sumr*sumw*sumrw+sumw2*sumr**2; +sd=2D0*dsqrt(sd)/sumr/sumr; +c12=sumc/sumr; c21=sumc/sumr-gamma +return +end + +function z(a) +DOUBLE PRECISION z +logical a +if(a)z=1d0 +else z=0d0 +return +end + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/src/ratfor/hoeffd.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/src/ratfor/hoeffd.r Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,121 @@ +# ratfor -o ../hoeffd.f hoeffd.r +# +SUBROUTINE hoeffd(xx, n, p, dmat, aadmat, madmat, npair, x, y, rx, ry, rj) +IMPLICIT DOUBLE PRECISION (a-h,o-z) +INTEGER p, npair(p,p) +DOUBLE PRECISION xx(n,p), dmat(p,p), aadmat(p,p), madmat(p,p), x(n), y(n), rx(n), ry(n), rj(n), maxad + + DO i=1, p { + np=0 + DO k=1, n { + if(xx(k,i) < 1e30) np = np + 1 + } + npair(i,i) = np + + DO j=(i+1),p { + m = 0 + DO k=1,n { + xk = xx(k,i) + yk = xx(k,j) + if(xk < 1e30 & yk < 1e30) { + m = m + 1 + x(m) = xk + y(m) = yk + } + } + npair(i,j) = m + if(m > 4) { + CALL hoeff(x, y, m, d, aad, maxad, rx, ry, rj) + dmat(i,j) = d + aadmat(i,j) = aad + madmat(i,j) = maxad + } + else dmat(i,j) = 1e30 + } + } + DO i=1,p { + dmat(i,i) = 1d0/30d0 + DO j=(i+1),p { + dmat(j,i) = dmat(i,j) + npair(j,i) = npair(i,j) + aadmat(j,i) = aadmat(i,j) + madmat(j,i) = madmat(i,j) + } + } + RETURN +END + +SUBROUTINE hoeff(x, y, n, d, aad, maxad, rx, ry, rj) +IMPLICIT DOUBLE PRECISION (a-h,o-z) +DOUBLE PRECISION x(n), y(n), rx(n), ry(n), rj(n), maxad +# INTEGER iwork(1) +# CALL rank(n, x, work, iwork, rx) +# CALL rank(n, y, work, iwork, ry) + CALL jrank(x, y, n, rx, ry, rj) + q = 0d0 + r = 0d0 + s = 0d0 + aad = 0d0 + maxad = 0d0 + z = dfloat(n) + DO i=1,n { + rxi = rx(i) + ryi = ry(i) + rji = rj(i) + ad = dabs((rji/z) - (rxi/z)*(ryi/z)) + aad = aad + ad + maxad = dmax1(maxad, ad) + q = q + (rxi-1d0)*(rxi-2d0)*(ryi-1d0)*(ryi-2d0) + r = r + (rxi-2d0)*(ryi-2d0)*(rji-1d0) + s = s + (rji-1d0)*(rji-2d0) + } + aad = aad / z + d = (q-2d0*(z-2d0)*r+(z-2d0)*(z-3d0)*s)/z/(z-1d0)/(z-2d0)/(z-3d0)/(z-4d0) + RETURN +END + +# Use C version of this which is much faster (since uses a sort) +# SUBROUTINE rank(x, n, r) # simple rank with midranks for ties +# REAL*4 x(1), r(1) +# DO i=1,n { +# xi=x(i) +# ir=2 # will be 2*rank(x(i)) +# DO j=1,n { +# if(i.ne.j) { +# if(x(j)<xi) ir=ir+2 +# if(x(j)==xi) ir=ir+1 +# } +# } +# r(i)=float(ir)/2.0 +# } +# RETURN +# END + +SUBROUTINE jrank(x, y, n, rx, ry, r) # joint rank with midranks for ties +INTEGER n +DOUBLE PRECISION x(n), y(n), rx(n), ry(n), r(n), cx, cy, ri, rix, riy, xi, yi + DO i=1,n { + xi = x(i) + yi = y(i) + ri = 1d0 + rix = 1d0 + riy = 1d0 + DO j=1,n { + if(i .ne. j) { + cx = 0d0 + if(x(j) < xi) cx = 1d0 + if(x(j) == xi) cx = .5d0 + cy = 0d0 + if(y(j) < yi) cy = 1d0 + if(y(j) == yi) cy = .5d0 + rix = rix + cx + riy = riy + cy + ri = ri + cx*cy + } + } + rx(i) = rix + ry(i) = riy + r(i) = ri + } + RETURN +END |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/src/ratfor/jacklins.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/src/ratfor/jacklins.r Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,21 @@ +# Given w, an n-1 by k matrix of weights, say for linear statistics, +# computes all n leave-out-one linear statistics separately by each +# column of w for the data n-vector x + +subroutine jacklins(x, w, n, k, res) +INTEGER n, k, l +DOUBLE PRECISION x(n), w(n-1,k), res(n,k) + +do l=1,k { + do j=1,n { + sj=0d0 + do i=1,n { + if(i<j) sj=sj+w(i,l)*x(i) + if(i>j) sj=sj+w(i-1,l)*x(i) + } + res(j,l)=sj + } +} +return +end + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/src/ratfor/maxempr.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/src/ratfor/maxempr.r Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,72 @@ +# Converted from R code provided by Hans Werner Borchers <hwborchers@googlemail.com> +# ax = x-limits for region of interest +# ay = y-limits " " +# x, y = coordinates of points to avoid +# Assume x, y are sorted in y-order, e.g +# o = order(y); x <- x[o]; y <- y[o] +# n = length(x) = length(y) +# z = c(D[m], d[m], d[m+1]), d=sort(c(ax,x)), D=diff(d), m=which.max(D) +# Output: area, rect[4] +# To convert to Fortran: +# sudo apt-get install ratfor +# ratfor -o ../maxempr.f maxempr.r +SUBROUTINE maxempr(ax, ay, x, y, n, w, h, z, area, rect) +IMPLICIT DOUBLE PRECISION (a-h,o-z) +INTEGER n +DOUBLE PRECISION ax(2), ay(2), x(n), y(n), z(3), rect(4), maxr, li +# check vertical slices +maxr = z(1) * dabs(ay(2) - ay(1)) +rect(1) = z(2) +rect(2) = ay(1) +rect(3) = z(3) +rect(4) = ay(2) + +do i=1,n { + tl = ax(1); tr = ax(2) + if (i < n) { + do j=(i+1),n { + if (x(j) > tl & x(j) < tr) { + ## check horizontal slices (j == i+1) + ## and (all) rectangles above (x(i), y(i)) + area = (tr - tl) * (y(j) - y(i)) + if (area > maxr & ((tr - tl) > w) & ((y(j) - y(i)) > h)) { + maxr = area + rect(1) = tl + rect(2) = y(i) + rect(3) = tr + rect(4) = y(j) + } + if (x(j) > x(i)) tr = x(j) + else tl = x(j) + } + } + } + ## check open rectangles above (x(i), y(i)) + area = (tr - tl) * (ay(2) - y(i)) + if (area > maxr & ((tr - tl) > w) & + ((ay(2) - y(i)) > h)) { + maxr = area + rect(1) = tl + rect(2) = y(i) + rect(3) = tr + rect(4) = ay(2) + } + ## check open rectangles below (x(i), y(i)) + ri = ax(2); li = ax(1) + do k=1,n { + if(y(k) < y(i) & x(k) > x(i)) ri = dmin1(ri, x(k)) + if(y(k) < y(i) & x(k) < x(i)) li = dmax1(li, x(k)) + } + area = (ri - li) * (ay(2) - y(i)) + if (area > maxr & ((ri - li) > w) & + ((y(i) - ay(1)) > h)) { + maxr = area + rect(1) = li + rect(2) = ay(1) + rect(3) = ri + rect(4) = y(i) + } +} +area = maxr +return +end |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/src/ratfor/rcorr.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/src/ratfor/rcorr.r Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,76 @@ +SUBROUTINE rcorr(xx, n, p, itype, dmat, npair, x, y, rx, ry, work, iwork) +INTEGER p, npair(p,p) +DOUBLE PRECISION xx(n,p), dmat(p,p), x(n), y(n), rx(n), ry(n), work(n) +INTEGER iwork(n) +DOUBLE PRECISION sumx,sumx2,sumy,sumy2,sumxy,z,a,b + +DO i=1, p { + np=0 + DO k=1, n { + if(xx(k,i)<1e30) np=np+1 + } + npair(i,i)=np + + DO j=(i+1),p { + m=0 + if(itype==1) { + sumx=0d0; sumy=0d0; sumx2=0d0; sumy2=0d0; sumxy=0d0 + } + DO k=1,n { + xk=xx(k,i) + yk=xx(k,j) + if(xk<1e30 & yk<1e30) { + m=m+1 + if(itype==1) { + a=xk; b=yk + sumx=sumx+a + sumx2=sumx2+a*a + sumy=sumy+b + sumy2=sumy2+b*b + sumxy=sumxy+a*b + } else { + x(m)=xk + y(m)=yk + } + } + } + npair(i,j)=m + if(m>1) { + if(itype==1) { + z=m + d=(sumxy-sumx*sumy/z)/dsqrt((sumx2-sumx*sumx/z)*(sumy2-sumy*sumy/z)) + } else CALL docorr(x, y, m, d, rx, ry, work, iwork) + dmat(i,j)=d + } else dmat(i,j)=1e30 + } +} +DO i=1,p { + dmat(i,i)=1. + DO j=(i+1),p { + dmat(j,i)=dmat(i,j) + npair(j,i)=npair(i,j) + } +} +RETURN +END + + SUBROUTINE docorr(x, y, n, d, rx, ry, work, iwork) + DOUBLE PRECISION x(1), y(1), rx(n), ry(n), work(1) + INTEGER iwork(1) + DOUBLE PRECISION sumx,sumx2,sumy,sumy2,sumxy,a,b,z + CALL rank(n, x, work, iwork, rx) + CALL rank(n, y, work, iwork, ry) + sumx=0d0; sumx2=0d0; sumy=0d0; sumy2=0d0; sumxy=0d0 + DO i=1,n { + a=rx(i) + b=ry(i) + sumx=sumx+a + sumx2=sumx2+a*a + sumy=sumy+b + sumy2=sumy2+b*b + sumxy=sumxy+a*b + } + z=n + d=(sumxy-sumx*sumy/z)/dsqrt((sumx2-sumx*sumx/z)*(sumy2-sumy*sumy/z)) + RETURN + END |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/src/ratfor/wclosest.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/src/ratfor/wclosest.r Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,50 @@ +SUBROUTINE wclosest(w, x, lw, lx, j) +IMPLICIT DOUBLE PRECISION (a-h,o-z) +INTEGER lw, lx, j(lw) +DOUBLE PRECISION w(lw), x(lx) + +do i=1,lw { + wi=w(i) + dmin=1d40 + m=0 + do k=1,lx { + d = dabs(x(k) - wi) + if(d < dmin) { + dmin = d + m = k + } + } + j(i) = m +} +return +end + +SUBROUTINE wclosepw(w, x, r, f, lw, lx, xd, j) +IMPLICIT DOUBLE PRECISION (a-h,o-z) +DOUBLE PRECISION w(lw),x(lx),r(lw),xd(lx) +INTEGER lw, lx, j(lw) +do i=1, lw { + wi = w(i) + dmean = 0d0 + do k=1, lx { + xd(k) = dabs(x(k) - wi) + dmean = dmean + xd(k) + } + dmean = f*dmean/dfloat(lx) + sump = 0d0 + do k=1, lx { + z = min(xd(k)/dmean, 1d0) + xd(k) = (1d0 - z**3)**3 + sump = sump + xd(k) + } + prob = 0d0 + ri = r(i) + m = 1 + do k=1, lx { + prob = prob + xd(k) / sump + if(ri > prob) m = m + 1 + } + j(i) = m +} +return +end |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/src/rcorr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/src/rcorr.f Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,100 @@ +C Output from Public domain Ratfor, version 1.01 + subroutine rcorr(xx, n, p, itype, dmat, npair, x, y, rx, ry, work, + * iwork) + integer p, npair(p,p) + double precision xx(n,p), dmat(p,p), x(n), y(n), rx(n), ry(n), wor + *k(n) + integer iwork(n) + double precision sumx,sumx2,sumy,sumy2,sumxy,z,a,b + do23000 i=1, p + np=0 + do23002 k=1, n + if(xx(k,i).lt.1e30)then + np=np+1 + endif +23002 continue +23003 continue + npair(i,i)=np + do23006 j=(i+1),p + m=0 + if(itype.eq.1)then + sumx=0d0 + sumy=0d0 + sumx2=0d0 + sumy2=0d0 + sumxy=0d0 + endif + do23010 k=1,n + xk=xx(k,i) + yk=xx(k,j) + if(xk.lt.1e30 .and. yk.lt.1e30)then + m=m+1 + if(itype.eq.1)then + a=xk + b=yk + sumx=sumx+a + sumx2=sumx2+a*a + sumy=sumy+b + sumy2=sumy2+b*b + sumxy=sumxy+a*b + else + x(m)=xk + y(m)=yk + endif + endif +23010 continue +23011 continue + npair(i,j)=m + if(m.gt.1)then + if(itype.eq.1)then + z=m + d=(sumxy-sumx*sumy/z)/dsqrt((sumx2-sumx*sumx/z)*(sumy2-sumy*sumy/z + *)) + else + call docorr(x, y, m, d, rx, ry, work, iwork) + endif + dmat(i,j)=d + else + dmat(i,j)=1e30 + endif +23006 continue +23007 continue +23000 continue +23001 continue + do23020 i=1,p + dmat(i,i)=1. + do23022 j=(i+1),p + dmat(j,i)=dmat(i,j) + npair(j,i)=npair(i,j) +23022 continue +23023 continue +23020 continue +23021 continue + return + end + subroutine docorr(x, y, n, d, rx, ry, work, iwork) + double precision x(1), y(1), rx(n), ry(n), work(1) + integer iwork(1) + double precision sumx,sumx2,sumy,sumy2,sumxy,a,b,z + call rank(n, x, work, iwork, rx) + call rank(n, y, work, iwork, ry) + sumx=0d0 + sumx2=0d0 + sumy=0d0 + sumy2=0d0 + sumxy=0d0 + do23024 i=1,n + a=rx(i) + b=ry(i) + sumx=sumx+a + sumx2=sumx2+a*a + sumy=sumy+b + sumy2=sumy2+b*b + sumxy=sumxy+a*b +23024 continue +23025 continue + z=n + d=(sumxy-sumx*sumy/z)/dsqrt((sumx2-sumx*sumx/z)*(sumy2-sumy*sumy/z + *)) + return + end |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/src/sas/exportlib.sas --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/src/sas/exportlib.sas Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,42 @@ +/* Macro exportlib + + Exports all SAS datasets in a data library to csv files. One of + the datasets is assumed to be the result of PROC FORMAT CNTLOUT= + if any user formats are referenced. Numeric variables are + formatted in BEST16 format so that date/time variables will be + exported with their internal numeric values. A special file + _contents_.csv is created to hold, for all datasets combined, the + dataset name, dataset label, variable names, labels, formats, + types, and lengths. + + Usage: + + %INCLUDE "foo\exportlib.sas"; * Define macro; + LIBNAME lib ...; * E.g. LIBNAME d SASV5XPT "foo.xpt"; + %exportlib(lib, outdir, tempdir); + + Arguments: + lib - SAS libname for input datasets + outdir - directory in which to write .csv files (default ".") + tempdir - temporary directory to hold generated SAS code + (default C:/WINDOWS/TEMP) + */ +%macro exportlib(lib, outdir, tempdir); +%IF %QUOTE(&outdir)= %THEN %LET outdir=.; +%IF %QUOTE(&tempdir)= %THEN %LET tempdir=C:/WINDOWS/TEMP; +OPTIONS NOFMTERR; +PROC COPY IN=&lib OUT=work;RUN; +PROC CONTENTS DATA=work._ALL_ NOPRINT + OUT=_contents_(KEEP=memname memlabel name type label format length + nobs);RUN; +PROC EXPORT DATA=_contents_ OUTFILE="&outdir/_contents_.csv" REPLACE;RUN; +DATA _NULL_; SET _contents_; BY MEMNAME; + FILE "&tempdir/_export_.sas"; RETAIN bk -1; + if FIRST.MEMNAME & (NOBS > 0) THEN DO; + PUT "DATA " MEMNAME "; SET " MEMNAME ";FORMAT _NUMERIC_ BEST14.;RUN;"; + PUT "PROC EXPORT DATA=" MEMNAME " OUTFILE=" '"' "&outdir/" + MEMNAME +bk ".csv" '" ' "REPLACE;RUN;"; + END; + RUN; +%INCLUDE "&tempdir/_export_.sas";RUN; +%MEND exportlib; |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/src/string_box.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/src/string_box.c Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,61 @@ +#include <R.h> +#include <Rinternals.h> + +SEXP string_box(SEXP string) +{ + int i,j; + int num_string = LENGTH(string); + SEXP ans; + SEXP names; + SEXP height; + SEXP width; + + PROTECT(ans = allocVector(VECSXP, 2)); + SET_VECTOR_ELT(ans, 0, height = allocVector(INTSXP, num_string)); + SET_VECTOR_ELT(ans, 1, width = allocVector(INTSXP, num_string)); + setAttrib(ans, R_NamesSymbol, names = allocVector(STRSXP, 2)); + + SET_STRING_ELT(names, 0, mkChar("rows")); + SET_STRING_ELT(names, 1, mkChar("columns")); + + + for(i=0; i < num_string; i++) + { + int str_width = 0; + int str_subwidth = 0; + int str_height= 0; + + const char *substring = CHAR(STRING_ELT(string, i)); + + j = 0; + + while(substring[j] != '\0') + { + if(substring[j] == '\n') + { + if(str_subwidth > str_width) + str_width = str_subwidth; + + str_subwidth = 0; + str_height++; + } + else + str_subwidth++; + j++; + } + + if(j > 0) + str_height++; + + if(str_subwidth > str_width) + INTEGER(width)[i] = str_subwidth; + else + INTEGER(width)[i] = str_width; + + INTEGER(height)[i] = str_height; + } + + + UNPROTECT(1); + return(ans); +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/src/wclosest.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/src/wclosest.f Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,57 @@ +C Output from Public domain Ratfor, version 1.01 + subroutine wclosest(w, x, lw, lx, j) + implicit double precision (a-h,o-z) + integer lw, lx, j(lw) + double precision w(lw), x(lx) + do23000 i=1,lw + wi=w(i) + dmin=1d40 + m=0 + do23002 k=1,lx + d = dabs(x(k) - wi) + if(d .lt. dmin)then + dmin = d + m = k + endif +23002 continue +23003 continue + j(i) = m +23000 continue +23001 continue + return + end + subroutine wclosepw(w, x, r, f, lw, lx, xd, j) + implicit double precision (a-h,o-z) + double precision w(lw),x(lx),r(lw),xd(lx) + integer lw, lx, j(lw) + do23006 i=1, lw + wi = w(i) + dmean = 0d0 + do23008 k=1, lx + xd(k) = dabs(x(k) - wi) + dmean = dmean + xd(k) +23008 continue +23009 continue + dmean = f*dmean/dfloat(lx) + sump = 0d0 + do23010 k=1, lx + z = min(xd(k)/dmean, 1d0) + xd(k) = (1d0 - z**3)**3 + sump = sump + xd(k) +23010 continue +23011 continue + prob = 0d0 + ri = r(i) + m = 1 + do23012 k=1, lx + prob = prob + xd(k) / sump + if(ri .gt. prob)then + m = m + 1 + endif +23012 continue +23013 continue + j(i) = m +23006 continue +23007 continue + return + end |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/Ecdf.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/Ecdf.r Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,12 @@ +## From Bayazid Sarkar <sarkarbayazid@gmail.com> +require(Hmisc) +set.seed(1) +x <- exp(rnorm(100)) +w <- sample(1:5, 100, TRUE) +g <- sample(c('a','b','c'), 100, TRUE) + +Ecdf(log(x), weights=w, lty=1:3, col=1:3, group=g, label.curves=list(keys=1:3), + subtitles=FALSE) + +Ecdf(x, weights=w, lty=1:3, col=1:3, group=g, label.curves=list(keys=1:3), + subtitles=FALSE, log='x') |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/ace.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/ace.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,23 @@ +# Verify that ace works for categorical response variable, giving +# a y-transformation that is a linear translation of Fisher's optimum scores +# (y-specific mean of x) when there is one predictor that is forced to +# be linear. For now using aregImpute's override of ace +library(acepack) +set.seed(1) +y <- rep(1:3,100) +x <- -3*(y==1) -7*(y==2) + 30*(y==3) + runif(300) - .5 +xbar <- tapply(as.matrix(x), y, mean) +xbar + 1 2 3 +-3.010843 -7.021050 30.002227 + +z <- ace(x, y, cat=0, lin=1) +table(y, z$ty) + -0.82366 -0.583755 1.40741 +1 0 100 0 +2 100 0 0 +3 0 0 100 +plot(xbar[y], z$ty) +cor(xbar[y], z$ty) +[1] 1 + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/areg.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/areg.s Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,75 @@ +# Tests for parametric version of ace in acepack + +set.seed(1) +library(Hmisc) +source('~/R/test/parAce.s') + +ns <- c(30,300,3000,10000) +for(n in ns) { + y <- sample(1:5,n,TRUE) + x <- abs(y-3) + runif(n) + par(mfrow=c(4,3)) + for(k in c(0,3:5)) { + z <- parAce(x,y,xtype='spline',ytype='cat',k=k) + plot(x, z$tx) + title(paste('R2=',format(z$rsquared))) + tapply(z$ty, y, range) + a <- tapply(x,y,mean) + b <- tapply(z$ty,y,mean) + plot(a,b) + abline(lsfit(a,b)) +# Should get same result to within linear transformation if reverse x and y + + w <- parAce(y,x,xtype='cat',ytype='spline',k=k) + plot(z$ty, w$tx) + title(paste('R2=',format(w$rsquared))) + abline(lsfit(z$ty, w$tx)) + } + if(n < max(ns)) {cat('Press enter to continue:');readline()} +} + +# Example where one category in y differs from others but only in variance of x +n <- 50 + y <- sample(1:5,n,TRUE) + x <- rnorm(n) + x[y==1] <- rnorm(sum(y==1), 0, 5) + z <- parAce(x,y,xtype='lin',ytype='cat') + summary(z) + plot(z) + z <- parAce(x,y,xtype='spline',ytype='cat',k=4) + summary(z) + plot(z) + + +par(mfrow=c(1,2)) +for(n in c(200,2000)) { + x <- rnorm(n); y <- rnorm(n) + x + z <- parAce(x,y,xtype='spline',ytype='spline',k=5) + plot(x, z$x) + plot(y, z$y) + title(n) + readline() +} + +n <- 200 +x1 <- rnorm(n); x2 <- rnorm(n); y <- rnorm(n) + x1^2 +z <- + parAce(cbind(x1,x2),y,xtype=c('spline','lin'),ytype='spline',k=3) +par(mfrow=c(2,2)) +plot(x1, z$x[,1]) +plot(x2, z$x[,2]) +plot(y, z$y) + +n <- 5000 +x1 <- rnorm(n); x2 <- rnorm(n); y <- (x1 + rnorm(n))^2 +z <- + parAce(cbind(x1,x2),y,xtype=c('spline','spline'),ytype='spline',k=5) +par(mfrow=c(2,2)) +plot(x1, z$x[,1]) +plot(x2, z$x[,2]) +plot(y, z$y) + +n <- 10000 +x <- matrix(runif(n*20),n,20) +y <- rnorm(n) +z <- parAce(x,y,xtype=rep('spline',20),ytype='spline',k=5) |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/aregImpute.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/aregImpute.r Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,106 @@ +## See Paul T von Hippel, The American Statistician 58:160-164, 2004 + +mvector <- c(0,0) +msigma <- matrix(c(1,0.5,0.5,1), nrow=2) +library(mvtnorm) +library(Hmisc) + +# XZ <- rmvnorm(1000, mvector, msigma) +mvrnorm <- function(n, p = 1, u = rep(0, p), S = diag(p)) { + Z <- matrix(rnorm(n * p), p, n) + t(u + t(chol(S)) %*% Z) +} + +XZ <- mvrnorm(1000, 2, mvector, msigma) + +U <- rnorm(1000) +Y <- XZ[,1]+XZ[,2]+U +summary(lm(Y ~ XZ)) +X <- XZ[,1] +Z <- XZ[,2] +Z.ni <- Z + +type <- c('random','X<0','Y<0','Z<0')[3] +i <- switch(type, + random= runif(1000) < .5, + 'X<0' = X<0, + 'Y<0' = Y<0, + 'Z<0' = Z<0) + +Zna <- Z +Zna[i] <- NA +summary(lm(Y ~ X + Zna)) + +#w <- aregImpute(~monotone(Y)+monotone(X)+monotone(Zna)) +#w <- aregImpute(~I(Y)+I(X)+I(Zna),fweight=.75) +w <- aregImpute(~monotone(Y)+monotone(X)+monotone(Zna), n.impute=5, + type='regression') + +plot(w) +ecdf(Zna, add=T, col='red') +ecdf(Z, add=T, col='green') +# plot(w$imputed$Zna, Z[is.na(Zna)]) # use if n.impute=1 +# abline(a=0,b=1,lty=2) +# lm(Z[is.na(Zna)] ~ w$imputed$Zna) + +coef(fit.mult.impute(Y~X+Zna, lm, w, data=data.frame(X,Zna,Y),pr=F)) + +#-------------------------------------------------------------------- +## From Ewout Steyerberg +# Missing values: illustrate MCAR, MAR, MNAR mechanism +# linear models +library(rms) + +## 1. x1 and x2 with y1 outcome +## A) X only +## B) X+Y + +######################### +### Test Imputation ### +### use aregImpute in default settings +######################### + +n <- 20000 # arbitrary sample size +x2 <- rnorm(n=n, mean=0, sd=1) # x2 standard normal +x1 <- sqrt(.5) * x2 + rnorm(n=n, mean=0, sd=sqrt(1-.5)) # x2 correlated with x1 +y1 <- 1 * x1 + 1 * x2 + rnorm(n=n, mean=0, sd=sqrt(1-0)) # generate y +# var of y1 larger with correlated x1 - x2 + +x1MCAR <- ifelse(runif(n) < .5, x1, NA) # MCAR mechanism for 50% of x1 +x1MARx <- ifelse(rnorm(n=n,sd=.8) < x2, x1, NA) # MAR on x2, R2 50%, 50% missing (since mean x2==0) +x1MARy <- ifelse(rnorm(n=n,sd=(sqrt(3)*.8)) >y1, x1, NA) # MAR on y, R2 50%, 50% missing (since mean y1==0) +# x1MNAR <- ifelse(rnorm(n=n,sd=.8) < x1, x1, NA) # MNAR on x1, R2 50%, 50% missing (since mean x1==0) +x1MNAR <- ifelse(rnorm(n=n,sd=.8) < x1, x1, NA) # MNAR on x1, R2 50%, 50% missing (since mean x1==0) + +plot(x2, x1MARx) +plsmo(x2, is.na(x1MARx), datadensity=TRUE) +dd <- datadist(x2,x1MARx) +options(datadist='dd') +f <- lrm(is.na(x1MARx) ~ rcs(x2,4)) +plot(f, x2=NA, fun=plogis) + +d <- data.frame(y1,x1,x2,x1MCAR, x1MARx,x1MARy,x1MNAR) +ols(y1~x1+x2) +ols(y1 ~ x1MARx + x2) + +# MAR on x: 3 approaches; CC, MI with X, MI with X+Y + +g <- aregImpute(~I(y1) + I(x1MARx) + I(x2), n.impute=5, data=d, pr=F, + type=c('pmm','regression')[1], match='closest', plotTrans=TRUE) +plot(g) +Ecdf(x1, add=TRUE, col='red',q=.5) +Ecdf(x1MARx, add=TRUE, col='blue',q=.5) + +f <- fit.mult.impute(y1 ~ x1MARx + x2, ols, xtrans=g, data=d, pr=F) +g <- aregImpute(~y1 + x1MARx + x2, n.impute=5, data=d, pr=F, type='regression', plotTrans=TRUE) +f <- fit.mult.impute(y1 ~ x1MARx + x2, ols, xtrans=g, data=d, pr=F) + +# MAR on y: 3 approaches; CC, MI with X, MI with X+Y +f <- ols(y1~x1MARy+x2) +Mat.imputation[i,29:32] <- c(coef(f)[2:3], sqrt(diag(Varcov(f)))[2:3]) +g <- aregImpute(~x1MARy + x2, n.impute=5, data=d, pr=F, type='regression') +f <- fit.mult.impute(y1 ~ x1MARy + x2, ols, xtrans=g, data=d, pr=F) +Mat.imputation[i,33:36] <- c(coef(f)[2:3], sqrt(diag(Varcov(f)))[2:3]) +g <- aregImpute(~y1 + x1MARy + x2, n.impute=5, data=d, pr=F, type='regression') +f <- fit.mult.impute(y1 ~ x1MARy + x2, ols, xtrans=g, data=d, pr=F) +Mat.imputation[i,37:40] <- c(coef(f)[2:3], sqrt(diag(Varcov(f)))[2:3]) |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/aregImpute2.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/aregImpute2.r Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,133 @@ +library(rms) +source('/tmp/hmisc.s') +set.seed(4) +n <- c(20000,2000,200)[1] +x2 <- rnorm(n) +x1 <- sqrt(.5) * x2 + rnorm(n, sd=sqrt(1-.5)) +y <- 1 * x1 + 1 * x2 + rnorm(n) + +type <- c('mcar','mar.x2')[2] + +x1m <- if(type=='mcar') ifelse(runif(n) < .5, x1, NA) else + ifelse(rnorm(n,sd=.8) < x2, x1, NA) # MAR on x2, R2 50%, 50% missing +coef(ols(y ~ x1+x2)) +coef(ols(y ~ x1m + x2)) + +Ecdf(x1) +Ecdf(x1m, lty=2, add=TRUE) +Ecdf(x1[is.na(x1m)], lty=2, lwd=3, add=TRUE) + +plot(x2, x1m) +plsmo(x2, is.na(x1m), datadensity=TRUE) +dd <- datadist(x2,x1m) +options(datadist='dd') +f <- lrm(is.na(x1m) ~ rcs(x2,4)) +plot(f, x2=NA, fun=plogis) + + +d <- data.frame(x1,x1m,x2,y) + +# Find best-validating (in terms of bootstrap R^2) value of nk +g <- aregImpute(~ y + x1m + x2, nk=c(0,3:5), data=d) +g +# nk=0 is best with respect to mean and median absolute deviations +# Another good model is one that forces the target variable (x1m) to +# be transformed linearly using tlinear=TRUE + +g <- aregImpute(~y + x1m + x2, nk=0, n.impute=5, data=d, pr=F, + type=c('pmm','regression')[1], plotTrans=FALSE) +s <- is.na(x1m) +c(mean(g$imputed$x1), mean(x1[s])) +ix1 <- g$imputed$x1[,5] +x1i <- x1m +x1i[s] <- ix1 +rcorr(cbind(x1,x2,y)[s,]) +rcorr(cbind(x1i,x2,y)[s,]) +# allowing x1 to be nonlinearly transformed seems to increase the +# correlation between imputed x1 and x2 and imputed x1 and y, +# in addition to variance of imputed values increasing + +f <- fit.mult.impute(y ~ x1m + x2, ols, xtrans=g, data=d, pr=F) +coef(f) + + + +g2 <- g +g1 <- g +Ecdf(g1) +Ecdf(g2, add=TRUE, col='blue') + +# For MARx2, pmm works reasonably well when nk=3, regression doesn't +# both work well when nk=0 +# For MCAR, pmm works well when nk=3, regression works moderately +# well but imputed values have higher variance than real x1 values +# when x1m is missing, and coefficient of x2 on y is 0.92 when n=20000 +# Did not get worse by setting nk=6 +# Regression imputation works fine with nk=6 with ~y+I(x1m)+x2 +# Problem with I(y)+x1m+I(x2) + +plot(g) +Ecdf(x1, add=TRUE, col='blue') +Ecdf(x1m, lty=2, add=TRUE) +Ecdf(x1[is.na(x1m)], lty=2, lwd=3, add=TRUE) + + +# Look at distribution of residuals from areg for various nk +s <- !is.na(x1m) +f <- lm.fit.qr.bare(cbind(y,x2)[s,],x1m[s]) +Ecdf(resid(f), lwd=2, col='gray') +py <- f$fitted.values +ry <- resid(f) + +g <- areg(cbind(y,x2), x1m, nk=6, xtype=rep('l',2)) +p <- g$linear.predictors +r <- resid(g) +Ecdf(r, add=TRUE, col='blue') + +plot(py, p) +coef(lm.fit.qr.bare(py,p)) +plot(ry,r) +coef(lm.fit.qr.bare(ry,r)) +cor(ry,r) +sd(ry) +sd(r) +pr <- predict(g,cbind(x1,x2)) +pr2 <- g$linear.predictors +describe(pr-pr2) +Pr <- fitted(f) +plot(Pr,pr) +coef(lm.fit.qr.bare(Pr,pr)) + +obs.trans <- pr + r +plot(obs.trans, y) +w <- lm.fit.qr.bare(obs.trans,y) +coef(w) +w$rsquared + +# Strip out aregImpute code for regression imputation, force linearity, +# no bootstrap, x1 is only variable with NAs + +ai <- function(x1, x2, y) { + n <- length(x1) + na <- (1:n)[is.na(x1)] + nna <- length(na) + j <- (1:n)[-na] + + f <- lm.fit.qr.bare(cbind(y,x2)[j,], x1[j]) + prn(coef(f)) + + # Predicted mean x1 for only those that missing: + predx1 <- matxv(cbind(y,x2)[na,], coef(f)) + Ecdf(predx1, add=TRUE, col='blue') + res <- f$residuals + rp <- length(na) > length(res) + px1 <- predx1 + sample(res, length(na), replace=rp) + px1e <- approxExtrap(f$fitted.values, f$fitted.values, xout=px1)$y + print(describe(abs(px1-px1e))) + Ecdf(px1e, add=TRUE, col='green') + x1[na] <- px1e + x1 +} + +x1i <- ai(x1m, x2, y) +ols(y ~ x1i + x2) |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/aregImpute3.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/aregImpute3.r Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,24 @@ +require(Hmisc) +n <- 100 +set.seed(1) +y <- sample(0:8, n, TRUE) +x1 <- runif(n) +x2 <- runif(n) +x2[1:10] <- NA +z <- sample(1:20, n, TRUE) +d <- data.frame(y, x1, x2, z) +f1 <- glm(y ~ x1 + x2, family=poisson) +f2 <- glm(y ~ x1 + x2 + offset(log(z)), family=poisson) + +a <- aregImpute(~ y + x1 + x2) +g1 <- fit.mult.impute(y ~ x1 + x2 , glm, a, + family=poisson, data=d) +g2 <- fit.mult.impute(y ~ x1 + x2 + offset(log(z)), glm, a, + family=poisson, data=d) +# g3 <- fit.mult.impute(y ~ x1 + x2 + offset(log(z)), Glm, a, family=poisson, data=d) +coef(g1) +coef(g2) +# coef(g3) +coef(f1) +coef(f2) + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/consolidate.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/consolidate.R Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,88 @@ +library(Hmisc) + +named.equal <- function(x,y) { + x.names <- sort(names(x)) + y.names <- sort(names(y)) + + if(!identical(x.names, y.names)) { + cat("x names: ", paste(x.names, consolidate=', '), "\ny names: ", paste(y.names, consolidate=', '), sep='') + stop("x and y do not have the same element names") + } + + if(any(x.names == "") || any(y.names == "")) { + cat("x names: ", paste(x.names, consolidate=', '), "\ny names: ", paste(y.names, consolidate=', '), sep='') + stop("x or y has unnamed elements") + } + + if(!identical(x[x.names], y[x.names])) { + print(x) + print(y) + stop("x and y do not have identical element values") + } + return(TRUE) +} + +a <- c(a = 5, b = 2, c = 4) +b <- c(c = 3, d = 4, e = 12) +c <- list(q = 5, h = 2, b = 14) +d <- list(w = 2, h = 3, e = 21) + +a1 <- structure(c(5, 2, 3, 4, 12), + .Names = c("a", "b", "c", "d", "e")) +a2 <- structure(list(a = 5, b = 14, c = 4, q = 5, h = 2), + .Names = c("a", "b", "c", "q", "h")) +a3 <- structure(list(q = 5, h = 2, b = 2, a = 5, c = 4), + .Names = c("q", "h", "b", "a", "c")) +a4 <- structure(list(q = 5, h = 3, b = 14, w = 2, e = 21), + .Names = c("q", "h", "b", "w", "e")) +a5 <- structure(c(5,2,4,4,12), + .Names = c("a", "b", "c", "d", "e")) +a6 <- structure(list(a = 5, b = 2, c = 4, q = 5, h = 2), + .Names = c("a", "b", "c", "q", "h")) +a7 <- structure(list(q = 5, h = 2, b = 14, a = 5, c = 4), + .Names = c("q", "h", "b", "a", "c")) +a8 <- structure(list(q = 5, h = 2, b = 14, w = 2, e = 21), + .Names = c("q", "h", "b", "w", "e")) + +r1 <- consolidate(a, b, protect=FALSE) +r2 <- consolidate(a, c, protect=FALSE) +r3 <- consolidate(c, a, protect=FALSE) +r4 <- consolidate(c, d, protect=FALSE) + +is.vector(r1) +is.list(r2) +is.list(r3) +is.list(r4) + +named.equal(r1, a1) +named.equal(r2, a2) +named.equal(r3, a3) +named.equal(r4, a4) + +r5 <- consolidate(a, b, protect=TRUE) +r6 <- consolidate(a, c, protect=TRUE) +r7 <- consolidate(c, a, protect=TRUE) +r8 <- consolidate(c, d, protect=TRUE) + +named.equal(r5, a5) +named.equal(r6, a6) +named.equal(r7, a7) +named.equal(r8, a8) + +named.equal(r3, r6) +named.equal(r2, r7) + +e <- a +consolidate(e) <- b +named.equal(e, r1) + +e <- a +consolidate(e, protect = TRUE) <- b +named.equal(e, r5) + +f <- c(1,2,3,5) +consolidate(attributes(f)) <- c +named.equal(attributes(f), c) + +consolidate(attributes(f)) <- NULL +named.equal(attributes(f), c) |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/csv/FORMAT.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/csv/FORMAT.csv Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,4 @@ +FMTNAME,START,END,LABEL,MIN,MAX,DEFAULT,LENGTH,FUZZ,PREFIX,MULT,FILL,NOEDIT,TYPE,SEXCL,EEXCL,HLO,DECSEP,DIG3SEP,DATATYPE,LANGUAGE +RACE,1,1,green,1,40,6,6,1E-12,,0,,0,N,N,N,,,,, +RACE,2,2,blue,1,40,6,6,1E-12,,0,,0,N,N,N,,,,, +RACE,3,3,purple,1,40,6,6,1E-12,,0,,0,N,N,N,,,,, |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/csv/TEST.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/csv/TEST.csv Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,3 @@ +race,age,d1,dt1,t1 +2,30,15402,1330767062,40425 +4,31,15494,1338716527,40453 |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/csv/_contents_.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/csv/_contents_.csv Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,27 @@ +MEMNAME,MEMLABEL,NAME,TYPE,LENGTH,LABEL,FORMAT,NOBS +FORMAT,,DATATYPE,2,8,Date/time/datetime?,,3 +FORMAT,,DECSEP,2,1,Decimal separator,,3 +FORMAT,,DEFAULT,1,3,Default length,,3 +FORMAT,,DIG3SEP,2,1,Three-digit separator,,3 +FORMAT,,EEXCL,2,1,End exclusion,,3 +FORMAT,,END,2,16,Ending value for format,,3 +FORMAT,,FILL,2,1,Fill character,,3 +FORMAT,,FMTNAME,2,8,Format name,,3 +FORMAT,,FUZZ,1,8,Fuzz value,,3 +FORMAT,,HLO,2,11,Additional information,,3 +FORMAT,,LABEL,2,6,Format value label,,3 +FORMAT,,LANGUAGE,2,8,Language for date strings,,3 +FORMAT,,LENGTH,1,3,Format length,,3 +FORMAT,,MAX,1,3,Maximum length,,3 +FORMAT,,MIN,1,3,Minimum length,,3 +FORMAT,,MULT,1,8,Multiplier,,3 +FORMAT,,NOEDIT,1,3,Is picture string noedit?,,3 +FORMAT,,PREFIX,2,2,Prefix characters,,3 +FORMAT,,SEXCL,2,1,Start exclusion,,3 +FORMAT,,START,2,16,Starting value for format,,3 +FORMAT,,TYPE,2,1,Type of format,,3 +TEST,,age,1,4,Age at Beginning of Study,,2 +TEST,,d1,1,8,,MMDDYY,2 +TEST,,dt1,1,8,,DATETIME,2 +TEST,,race,1,3,,RACE,2 +TEST,,t1,1,8,,TIME,2 |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/dataframeReduce.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/dataframeReduce.r Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,17 @@ +# Thanks: JoAnn Alvarez +require(Hmisc) +set.seed(3) +NN <- 200 +x1 <- rnorm(NN) +x2 <- x1^2 +x3 <- runif(NN) +x4 <- factor(NA, levels = c("January", "February", "March")) +x5 <- factor(sample(c(1, 2, 3), size = NN, replace = TRUE), labels = + c("January", "February", "March")) +m <- 30 +x2[1:m] <- NA +x5[1:m] <- NA +xdat <- data.frame(x1, x2, x3, x4) +combine.levels(xdat$x4) +xdat2 <- dataframeReduce(xdat, minprev=0.05, fracmiss = 0.05) + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/dataload.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/dataload.r Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,26 @@ +# Compare dataload with Stat/Transfer, the latter producing binary S+ +# data frames and the former binary R data frames + +library(Hmisc) +setwd('/tmp') +ds <- list.files('~/projects/consulting/gsk/REDUCE/oct03/data/sass') + +for(i in 1:1) { + sys(paste('dataload fh10sep.xpt z.rda',ds[i])) + load('z.rda') + s <- + read.S(paste('~/projects/consulting/gsk/REDUCE/oct03/data/sass', + ds[i] sys(paste('dataload fh10sep.xpt z.rda',ds[i])) + load('z.rda') + s <- + read.S(paste('~/projects/consulting/gsk/REDUCE/oct03/data/sass', + ds[i],sep='/')) + +,sep='/')) + + +} + + + + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/fit.mult.impute.bootstrap.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/fit.mult.impute.bootstrap.r Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,31 @@ +# Edited example from Jane Cook jane.cookng@gmail.com +require(rms) +set.seed(1) +n <- 101 +y <- runif(n) +y[1:2] <- NA +x1 <- sample(c('a','b'), n, TRUE) +x2 <- runif(n) + .15 * y +d <- data.frame(y, x1, x2) +a <- aregImpute(~ y + x1 + x2, burnin=10, n.impute=100, data=d) +f <- fit.mult.impute(y ~ x1 + x2, ols, a, data=d) + +B <- 20 # actually use B=1000 +ranks <- matrix(NA, nrow=B, ncol=2) +## Put - in front of plot in next line to have rank 1 = best +rankvars <- function(fit) rank(plot(anova(fit), sort='none', pl=FALSE)) +Rank <- rankvars(f) +for(i in 1:B) { +j <- sample(1:n, n, TRUE) +bootfit <- update(f, data=d, subset=j, pr=FALSE) +ranks[i,] <- rankvars(bootfit) +} +for(k in 1 : 2) { + cat('Frequency of Ranks for Predictor:', k, '\n') + print(table(ranks[, k])) + cat('\n') +} + +lim <- t(apply(ranks, 2, quantile, probs=c(.025,.975))) + + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/hoeff.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/hoeff.r Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,46 @@ +require(Hmisc) +.Fortran('jrank', as.double(1:5), as.double(1:5), 5L, + double(5), double(5), double(5)) +hoeffd(1:6, c(1,3,2,4,5,6)) +y <- 1:20; y[3] <- 17; y[17] <- 3 +hoeffd(1:20, y)$D +set.seed(5) +x <- runif(800); y <- runif(800) +hoeffd(x,y)$D + +for(n in c(50,100,200,400,1000)) { + set.seed(1) + x <- seq(-10,10,length=n) + y <- x*sign(runif(n,-1,1)) + h <- hoeffd(x,y) + print(c(h$D[1,2], h$aad[1,2], h$maxad[1,2])) +} +#[1] 0.06812286 in old version (real*4 in places) +#[1] 0.04667929 +#[1] 0.05657654 +#[1] 0.07048487 +#[1] 0.06323746 + + +# From http://www.sciencemag.org/content/suppl/2011/12/14/334.6062.1518.DC1/Reshef.SOM.pdf +# Table S2: Definitions of functions used for Figure 2A in the Science article + +w <- function(y) { + ylab <- deparse(substitute(y)) + plot(x, y, ylab=substitute(y), type='l') + h <- hoeffd(x, y) + cat(ylab, '\n') + print(c(D=h$D[1,2],P=h$P[1,2],aDif=h$aad[1,2],mDif=h$maxad[1,2])) +} + +x <- seq(0, 1, length=320) +par(mfrow=c(3,3)) +w(x) +w(4*(x-.5)^2) +w(128*(x-1/3)^3 -48*(x-1/3)^2 - 12*(x-1/3) + 2) +w(10^(10*x) - 1) +w(sin(10*pi*x) + x) +w(sin(16*pi*x)) +w(sin(13*pi*x)) +w(sin(7*pi*x*(1+x))) +w(runif(320)) |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/howto.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/howto.html Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,43 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<html> + <head> + <title>How to Create SAS Transport Files</title> + </head> + + <body> + <h1>How to Create SAS Transport Files</h1> +<ol> +<li>If any of the datasets you are exporting are not already in the + <tt>WORK</tt> library, copy them to there: +<pre> +PROC COPY IN=mylib OUT=WORK; SELECT test1 test2; RUN; +</pre> +</li> +<li>If you have created value label formats using <tt>PROC FORMAT; + VALUE ...</tt>, output these value labels into a SAS + dataset: +<pre> +PROC FORMAT CNTLOUT=format;RUN; +</pre> +</li> +<li>Define a <tt>LIBNAME</tt> to reference the SAS Version 5 transport + file engine: +<pre> +libname xp SASV5XPT "test.xpt"; +</pre> +</li> +<li>Copy all needed datasets to, e.g., <tt>test.xpt</tt>: +<pre> +PROC COPY IN=work OUT=xp;SELECT test1 test2 format;RUN; +</pre> +<b>DO NOT</b> use <tt>PROC CPORT</tt> to create the file. +</li> +</ol> + <hr> + <address><a href="mailto:fharrell@virginia.edu">Frank E Harrell Jr</a></address> +<!-- Created: Wed May 21 09:11:25 EDT 2003 --> +<!-- hhmts start --> +Last modified: Fri Jun 6 15:47:58 EDT 2003 +<!-- hhmts end --> + </body> +</html> |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/inverseFunction.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/inverseFunction.r Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,49 @@ +library(Hmisc) + +z <- +structure(list(x = c(-1.01157732356344, -0.844512148091014, -0.723389895873506, +-0.598091014269186, -0.518735055919784, -0.42684920940995, -0.347493251060548, +-0.263960663324335, -0.113602005399152, 0.195468569224836, 0.441889703046664, +0.746783648283841, 0.901318935595835, 0.947261858850752, 0.99738141149248 +), y = c(-1.0034980323568, -0.861827721906428, -0.668211630957586, +-0.49820725841714, -0.309313511149978, -0.0920857017927416, 0.0637516397026673, +0.0920857017927417, 0.0212505465675558, -0.0826410144293835, +-0.0873633581110625, 0.0684739833843463, 0.517096633143857, 0.75321381722781, +0.894884127678181)), .Names = c("x", "y")) +library(rms) +dd <- datadist(as.data.frame(z)); options(datadist='dd') +f <- ols(y ~ rcs(x,5), data=z) +par(mfrow=c(1,2)) +plot(f) +abline(v=c(-.1772,.31375)) +points(z) +xx <- seq(-1,1,length=1000) +g <- Function(f) +h <- inverseFunction(xx, g(xx)) +plot(xx[-1], diff(g(xx))) +abline(h=0) +par(mfrow=c(1,1)) + +plot(f) +turns <- formals(h)$turns +abline(v=turns) + +a <- seq(-1.2,1.2,by=.001) +w <- h(a) +for(i in 1:ncol(w)) lines(w[,i], a, col=i+1) +w <- h(a, what='sample') +points(w, a, col='gray') + + +x <- seq(-1, 1, by=.01) +y <- x^2 +h <- inverseFunction(x,y) +formals(h)$turns # vertex +a <- seq(0, 1, by=.01) +plot(0, 0, type='n', xlim=c(-.5,1.5)) +lines(a, h(a)[,1]) ## first inverse +lines(a, h(a)[,2], col='red') ## second inverse +a <- c(-.1, 1.01, 1.1, 1.2) +points(a, h(a)[,1]) + + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/largest.empty.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/largest.empty.r Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,169 @@ +library(Hmisc) +par(mfrow=c(2,2)) +w <- 2 +for(i in 1:4) { + if(w==1) { + y <- exp(rnorm(20)) + } else { + x <- rnorm(20) + y <- rnorm(20) + plot(x, y) + z <- list(x=x, y=y) + } + for(m in c('maxdim','area')) + { + for(numbins in c(25,100)) + { + u <- largest.empty(z$x, z$y, pl=TRUE, + height=.05*diff(range(z$x)), + width =.05*diff(range(z$y)), + method=m, numbins=numbins) + text(u, labels=m, adj=.5) + if(w==2) points(z) + } + } +} + +par(mfrow=c(1,1)) +set.seed(1) +x <- rnorm(1000); y <- rnorm(1000) +plot(x,y) +for(m in c('area', 'rexhaustive', 'exhaustive')) { + cat('Method:', m, '\n') + print(system.time(largest.empty(x, y, + width=1.5, height=.5, + method=m, pl=TRUE))) +} +comp <- function(a, b) { + i <- identical(a,b) + if(!i) print(cbind(a,b)) + i +} + +for(i in 1:70) { + cat(i,'\n') + set.seed(i) + n <- sample(8:800, 1) + x <- runif(n); y <- runif(n) + plot(x, y) + xl <- range(pretty(x)); yl <- range(pretty(y)) + a <- largest.empty(x, y, xlim=xl, ylim=yl, width=.03, height=.03, + method='rexhaustive', pl=TRUE) + b <- largest.empty(x, y, xlim=xl, ylim=yl, width=.03, height=.03, + method='exhaustive', pl=TRUE) + comp(a[Cs(x,y,area)], b[Cs(x,y,area)]) + comp(a$rect$x, b$rect$x) + comp(a$rect$y, b$rect$y) +} + + +par(mfrow=c(2,2)) +N <- 100; set.seed(8237) +for(i in 1:4) { + x <- runif(N); y <- runif(100) + plot(x, y, pch="+", xlim=c(0,1), ylim=c(0,1), col="darkgray") + for(m in c('area', 'rexhaustive', 'exhaustive')) { + z <- largest.empty(x, y, 0.075, 0.075, pl=TRUE, numbins=100, + xlim=c(0,1), ylim=c(0,1), method=m) + cat(m, 'largest.empty Area:', z$area, '\n') + print(cbind(z$rect$x, z$rect$y)) + } +} + +if(FALSE) { +z <- Ecdf(y) +points(lr(z$x, z$y, width=1.5, height=.05, pl=0, numbins=20)) + +lr <- function(x, y, xlim=par('usr')[1:2], ylim=par('usr')[3:4], + width, height, numbins=25, pl=1) + { + area <- 0 + xinc <- diff(xlim)/numbins + yinc <- diff(ylim)/numbins + i <- 1 + j <- 0 + for(xl in seq(xlim[1], xlim[2]-width, by=xinc)) + { + for(yl in seq(ylim[1],ylim[2]-height, by=yinc)) + { + j <- j + 1 + if(j > 500) stop() + xr <- if(any(x >= xl & y >= yl)) min(x[x >= xl & y >= yl]) + else xlim[2] + yu <- if(any(y >= yl & x >= xl)) min(y[y >= yl & x >= xl]) + else ylim[2] + + if(pl==1) + { +## Ecdf(Y) + i <- i + 1 + if(i > 8) i <- 2 + polygon(c(xl,xr,xr,xl),c(yl,yl,yu,yu), col=i) + } + ar <- (yu-yl)*(xr-xl) + if(ar > area) + { + area <- ar + x1 <- xl + x2 <- xr + y1 <- yl + y2 <- yu + if(pl==2) + { + i <- i + 1 + if(i > 8) i <- 2 + polygon(c(x1,x2,x2,x1),c(y1,y1,y2,y2), col=i) + } + } + } + } + list(x=mean(c(x1,x2)), y=mean(c(y1,y2))) + } + +lr <- function(x, y, xlim=par('usr')[1:2], ylim=par('usr')[3:4], + width, height, numbins=25, pl=0) + { + area <- 0 + xinc <- diff(xlim)/numbins + yinc <- diff(ylim)/numbins + i <- 1 + for(xl in seq(xlim[1], xlim[2]-width, by=xinc)) + { + for(yl in seq(ylim[1],ylim[2]-height, by=yinc)) + { + for(xr in seq(xl+width,xlim[2],by=xinc)) + { + for(yu in seq(yl+height,ylim[2],by=yinc)) + { + if(any(x >= xl & x <= xr & y >= yl & y <= yu)) break + if(pl==1) + { + Ecdf(Y) + polygon(c(xl,xr,xr,xl),c(yl,yl,yu,yu), col=2) + } + +## if(!any(x >= xl & x <= xr & y >= yl & y <= yu)) + { + ar <- (yu-yl)*(xr-xl) + if(ar > area) + { + area <- ar + x1 <- xl + x2 <- xr + y1 <- yl + y2 <- yu + if(pl==2) + { + i <- i + 1 + if(i > 8) i <- 2 + polygon(c(x1,x2,x2,x1),c(y1,y1,y2,y2), col=i) + } + } + } + } + } + } + } + list(x=mean(c(x1,x2)), y=mean(c(y1,y2))) + } +} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/latex.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/latex.s Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,78 @@ +# Copy to /tmp, and after running to create z.tex, run pdflatex + +require(Hmisc) +x <- cbind(x1=1:5, x2=2:6) +file <- '/tmp/z.tex' +# Note: adding here package caused LaTeX problems +cat('\\documentclass{article}\n\\usepackage{hyperref,lscape,ctable,booktabs,longtable}\n\\begin{document}\n', file=file) + +# Example from Johannes Hofrichter +dat <- data.frame(a=c(1,2), b=c(2,3)) +w <- latex(dat, file=file, ctable=TRUE, + caption = "caption", label="test", append=TRUE) + +# Example from Ben Bolker +d <- data.frame(x=1:2, + y=c(paste("a", + paste(rep("very",30),collapse=" "),"long string"), + "a short string")) +w <- latex(d, file=file, col.just=c("l","p{3in}"), table.env=FALSE, append=TRUE) + +# Example from Yacine H +df <- data.frame(matrix(1:16, ncol=4)) +latex(df, file="", rownamesTexCmd="bfseries") +latex(df, file="", cgroup=c("G1","G2"), n.cgroup=c(2,2)) +latex(df, file="", cgroup=c("G1","G2"), n.cgroup=c(2,2), + rownamesTexCmd="bfseries") + +## Test various permutations of options +test <- function(caption=NULL, center=NULL, table.env=TRUE, size=NULL, + booktabs=FALSE, landscape=FALSE, ctable=FALSE, longtable=FALSE, + hyperref=NULL, insert=TRUE, caption.loc='top', + colheads=NULL) { + i <<- i + 1 + cat('\\clearpage\ni=', i, '\n\\hrule\n', sep='', file=file, append=TRUE) + ib <- it <- NULL + g <- function(x) { + if(! length(x)) return(NULL) + if(is.character(x)) paste(substitute(x), '=', x, ', ', sep='') + else if(x) paste(substitute(x), '=T, ', sep='') + else NULL + } + colh <- colheads + if(insert) { + z <- paste(g(caption), g(center), g(table.env), g(size), g(booktabs), + g(landscape), g(ctable), g(longtable), g(hyperref), + if(caption.loc != 'top') g(caption.loc), sep='') + if(length(colheads)) { + colheads <- paste(colheads, collapse=',') + z <- paste(z, g(colheads), sep='') + } + it <- paste('Top: i=', i, ':', z, sep='') + ib <- 'Text for bottom' + } + w <- latex(x, file=file, append=TRUE, + caption=caption, center=center, table.env=table.env, + size=size, booktabs=booktabs, landscape=landscape, + ctable=ctable, longtable=longtable, hyperref=hyperref, + insert.top=it, insert.bottom=ib, caption.loc=caption.loc, + colheads=colh) + invisible() +} + +i <- 0 +test() +test(hyperref='rrrrr') +test(caption='This caption') +test(caption='This caption, supposed to be at bottom', caption.loc='bottom') +for(cen in c('center', 'centering', 'centerline')) test(center=cen) +test(table.env=FALSE) +test(size='scriptsize') +test(table.env=FALSE) +test(booktabs=TRUE, landscape=TRUE) +test(ctable=TRUE, landscape=TRUE) +test(longtable=TRUE) +test(table.env=FALSE, colheads=FALSE) + +cat('\\end{document}\n', file=file, append=TRUE) +# Run pdflatex /tmp/z |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/latex.summaryM.Rnw --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/latex.summaryM.Rnw Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,109 @@ +% From Yonghao Pua <puayonghao@gmail.com> +% Lauren Samuels <lauren.samuels@vanderbilt.edu> +\documentclass{article} +\usepackage{relsize,setspace} % used by latex(describe( )) +\usepackage{longtable} +\usepackage{pdfpages} +\usepackage{hyperref} +\usepackage{lscape} % for landscape mode tables +\usepackage{calc,epic,color} % used for latex(..., dotchart=TRUE) +\usepackage[superscript,nomove]{cite} % use if \cite is used and superscripts wanted +\usepackage{helvet} +\usepackage{moreverb} +\renewcommand{\familydefault}{\sfdefault} +\newcommand{\R}{{\normalfont\textsf{R}}{}} + +\textwidth 6.75in % set dimensions before fancyhdr +\textheight 9.25in +\topmargin -.875in +\oddsidemargin -.125in +\evensidemargin -.125in +\usepackage{fancyhdr} % this and next line are for fancy headers/footers +\pagestyle{fancy} +\lhead{\textsc{}} %uncomment to remove left-sided headings +\setlength{\parindent}{0ex} % don't indent first line of paragraph +\setlength{\parskip}{2ex} % do skip 2 spaces between paragraphs + +\title{\textsf{Example illustrating problems with latex.summaryM}} +\author{Pua Yong Hao\\\smaller\href{mailto:puayonghao@gmail.com}{puayonghao@gmail.com}} +\date{\today} +\begin{document} +\maketitle + +\section{Descriptive Stats} +<<echo=TRUE, results='hide'>>= +library(rms) +n <- 500; set.seed(88) +sex <- factor(sample(c("female","male"), n, TRUE)) +age <- rnorm(n, 50, 10) +height <- rnorm(n, 1.7, 0.5) +type <- factor(sample(c('A', 'B'), n, TRUE)) +dbase= data.frame(sex, age, height, type) +dbase.dd <- datadist(dbase) +options(datadist = "dbase.dd") +@ + +When I use the \texttt{summaryM} function, note that the table footers are shown as captions. +<<echo=FALSE, results='asis'>>= +latex(summaryM(age + height + type ~ sex , data=dbase, overall=TRUE, test=TRUE), + size='small', where="ht", + long=TRUE, prmsd = TRUE, npct='slash', + caption="Descriptive Statistics", + msdsize='scriptsize', round = 2, digits=2, prtest='P', pdig =2, file='', + label="table:summary") +@ + +<<results='asis'>>= +# From Lauren Samuels +set.seed(1) +d <- expand.grid(x1=c('A', 'B'), x2=c('a', 'b', 'c')) +d$y <- runif(nrow(d)) +d +latex( + summaryM(x2 + y ~ x1, data= d, test=TRUE, overall=TRUE, continuous=6 ), + file="", where="htbp", + caption="Descriptive stats and tests of between-group differences for all primary and secondary neuroimaging outcomes", + label= "tbl:descrOutcomes", + exclude1=FALSE, digits=2, long=TRUE, prmsd=TRUE, + npct="slash", size="tiny", npct.size='tiny', + center="centering") +@ + +Put a complex table in an external \texttt{.tex} file for conversion +to \texttt{html} using \texttt{htlatex}: +<<extex>>= +## Example taken from help file for summaryM +options(digits=3) +set.seed(173) +sex <- factor(sample(c("m","f"), 500, rep=TRUE)) +country <- factor(sample(c('US', 'Canada'), 500, rep=TRUE)) +age <- rnorm(500, 50, 5) +sbp <- rnorm(500, 120, 12) +label(sbp) <- 'Systolic BP' +units(sbp) <- 'mmHg' +treatment <- factor(sample(c("Drug","Placebo"), 500, rep=TRUE)) +treatment[1] +sbp[1] <- NA + +# Generate a 3-choice variable; each of 3 variables has 5 possible levels +symp <- c('Headache','Stomach Ache','Hangnail', + 'Muscle Ache','Depressed') +symptom1 <- sample(symp, 500,TRUE) +symptom2 <- sample(symp, 500,TRUE) +symptom3 <- sample(symp, 500,TRUE) +Symptoms <- mChoice(symptom1, symptom2, symptom3, label='Primary Symptoms') +table(as.character(Symptoms)) +# Produce separate tables by country +f <- summaryM(age + sex + sbp + Symptoms ~ treatment + country, + groups='treatment', test=TRUE) +fi <- '/tmp/z.tex' +cat('\\documentclass{report}\\begin{document}\n', file=fi) +w <- latex(f, file=fi, npct='slash', middle.bold=TRUE, prmsd=TRUE, append=TRUE) +cat('\\end{document}\n', file=fi, append=TRUE) +## In /tmp run htlatex z.tex to produce z.html +## To get htlatex install the linux tex4ht package +## You may also need to install the tth package +## See http://biostat.mc.vanderbilt.edu/SweaveConvert +@ + +\end{document} |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/latexTherm.Rnw --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/latexTherm.Rnw Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,27 @@ +\documentclass{report} +\begin{document} +@ +<<results='asis'>>= +require(Hmisc) +knitrSet() +latexTherm(c(1, 1, 1, 1), name='lta') +latexTherm(c(.5, .7, .4, .2), name='ltb') +latexTherm(c(.5, NA, .75, 0), w=.3, h=1, name='ltc', extra=0) +latexTherm(c(.5, NA, .75, 0), w=.3, h=1, name='ltcc') +latexTherm(c(0, 0, 0, 0), name='ltd') +@ +This is a the first:\lta and the second:\ltb\\ and the third without extra:\ltc END\\ +Third with extra:\ltcc END\\ +\vspace{2in}\\ +All data = zero, frame only:\ltd + + +<<results='asis'>>= +latexTherm(c(.5, .7, .4, .2), name='lte') +@ +% Note that the period after figure is necessary +<<myplot,cap='This is a caption for the figure. \\lte'>>= +plot(runif(20)) +@ +\end{document} + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/latexTherm.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/latexTherm.r Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,16 @@ +# Usage: After running R, run latex on /tmp/z.tex +require(Hmisc) +source('~/R/Hmisc/R/latexTherm.s') +f <- '/tmp/lt.tex' +cat('', file='/tmp/z.tex'); cat('', file=f) +ct <- function(...) cat(..., sep='', file='/tmp/z.tex', append=TRUE) +ct('\\documentclass{report}\\begin{document}\n') +latexTherm(c(1, 1, 1, 1), name='lta', file=f) +latexTherm(c(.5, .7, .4, .2), name='ltb', file=f) +latexTherm(c(.5, NA, .75, 0), w=.3, h=1, name='ltc', extra=0, file=f) +latexTherm(c(.5, NA, .75, 0), w=.3, h=1, name='ltcc', file=f) +latexTherm(c(0, 0, 0, 0), name='ltd', file=f) +ct('\\input{/tmp/lt}\n') +ct('This is a the first:\\lta and the second:\\ltb\\\\ and the third without extra:\\ltc END\\\\\nThird with extra:\\ltcc END\\\\ \n\\vspace{2in}\\\\ \n') +ct('All data = zero, frame only:\\ltd\\\\') +ct('\\end{document}\n') |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/minor.tick.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/minor.tick.r Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,23 @@ +# Thanks: 袁超磊 <yuanclfeng@gmail.com> from ROBERT I. KABACOFF, 2011, R in Action, +#function could not give two minor tick marks +#between each major tick mark on the y-axis. +## This seemed to fail but it really succeeded. +## minor.tick uses par('xaxp' or 'yaxp') and worked with respect +## to the plot( ) as can be seen if yaxt='n' is omitted +require(Hmisc) +x <- c(1:10) +y <- x +z <- 10/x + +plot(x, y, type="b", + pch=21, col="red", + yaxt="n", lty=3, ann=FALSE) +lines(x, z, type="b", pch=22, col="blue", lty=2) +axis(2, at=x, labels=x, col.axis="red", las=2) +axis(4, at=z, labels=round(z, digits=2), + col.axis="blue", las=2, cex.axis=0.7, tck=-.01) +mtext("y=1/x", side=4, line=3, cex.lab=1, las=2, col="blue") +title("An Example of Creative Axes", + xlab="X values", + ylab="Y=X") +minor.tick(nx=2, ny=3, tick.ratio=0.5) |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/panelbp.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/panelbp.r Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,71 @@ +require(Hmisc) +set.seed(1) +var <- c(rep('A', 100), rep('B', 100)) +trt <- sample(c('T1','T2'), 200, TRUE) +x <- c(runif(100), 10*runif(100)) +y <- x + c(runif(100)/10, runif(100)) +N <- tapply(x, llist(var, trt), function(x) sum(!is.na(x))) +print(N) +#trt <- factor(paste(trt, ' (n=', N[cbind(var,trt)], ')', sep='')) + +#var <- factor(paste(var, ' (n=', N[cbind(var,trt)], ')', sep='')) + +vn <- var +for(v in unique(var)) { + i <- var == v + n <- tapply(!is.na(x[i]), trt[i], sum) + nam <- names(n) +# n <- sprintf('%s,(n[%s]==%g, n[%s1]==%g)', nam[1], n[1], nam[2], n[2]) +# w <- sprintf('paste(%s," (", n[%s]==%g,~~n[%s]==%g,")")', +# v, nam[1], n[1], nam[2], n[2]) +# cat(w, '\n') +# vn[var == v] <- parse(text=w) + n <- sprintf('%s (n%s=%g, n%s=%g)', v, nam[1],n[1], nam[2],n[2]) + vn[var == v] <- n +} +trt <- factor(trt) + +xyplot(as.integer(trt) ~ x | vn, panel=panel.bpplot, ylim=c(0,3), + scale=list(y=list(at=1:2, labels=levels(trt)), + x=list(relation='free', limits=list(c(0,1),c(0,13)))), + ylab='Treatment', layout=c(1,2)) + +# strip.default or strip.custom may provide workarounds +# http://r.789695.n4.nabble.com/Expressions-in-lattice-conditional-variables-td4660089.html + +bpl <- function(x, group, lab, cex.labels=.75) { + quants=c(0.025, 0.05, 0.125, 0.25, 0.375, 0.5, 0.625, + 0.75, 0.875, 0.95, 0.975) + group <- factor(group) + xlim <- quantile(x, c(.025,.975), na.rm=TRUE) + sfn <- function(x, quants) { + o <- options(digits=10) + ## So won't lose precision in quantile names + on.exit(options(o)) + c(quantile(x,quants), Mean=mean(x), SD=sqrt(var(x)), N=sum(!is.na(x))) + } + + qu <- tapply(x, group, sfn, simplify=TRUE, quants) + qu$Combined <- sfn(x, quants) + sm <- matrix(unlist(qu), ncol=length(quants)+3, + byrow=TRUE, + dimnames=list(names(qu), + c(format(quants),'Mean','SD','N'))) + bpplt(sm[,-ncol(sm)], xlab=lab, xlim=xlim, cex.points=.5) + upedge <- par('usr')[4] + outerText('N', + upedge+strheight('N', cex=cex.labels)/2, + cex=cex.labels) + for(i in 1:nrow(sm)) + outerText(sm[i,'N'], 4-i, cex=cex.labels) +} + +spar(mfrow=c(2,1), left=-1,rt=3,bot=1.5, mgp=c(2.5,.6,0), tcl=-.3, ps=12) +set.seed(2) +trt <- c(rep('T1',100), rep('T2',100)) +x1 <- runif(100) +x2 <- 10*runif(100) +trt <- sample(c('T1','T2'), 100, TRUE) +bpl(x1, trt, expression(x[1])) +title(sub=expression(F[1,20] == 2.53), cex.sub=.75, adj=0, line=2) +bpl(x2, trt, expression(x[list(2,23)])) |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/procmeans.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/procmeans.txt Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,11 @@ +The MEANS Procedure + +Variable N Mean Std Dev Minimum Maximum +-------------------------------------------------------------------------- +x3 100 0.5131445 0.2944341 0.0057602 0.9938965 +x4 100 0.5119257 0.3100749 0.0263616 0.9826741 +x5 100 0.4887739 0.3141976 0.0041338 0.9972528 +x6 100 0.4986746 0.2710817 0.0100958 0.9951080 +x7 100 0.5533156 0.2843679 0.0420104 0.9979081 +x8 100 0.4809487 0.2892945 0.0072688 0.9596358 +-------------------------------------------------------------------------- |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/readsasxml.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/readsasxml.r Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,23 @@ +w <- xmlTreeParse('demo.xml') + +z <- w$doc$children$Transfer$children$Catalog$children$CatalogSchemas$children$Schema$children$SchemaTables$children + +u <- z[[length(z)]]$children + +v <- matrix(unlist(u), nrow=length(u), byrow=T) + +v[,seq(3,ncol(v),by=4)][1,] + +[1] "#_4" "#_5" "#_6" "#_7" "#_8" "#_9" + +v[,seq(5,ncol(v),by=4)] + + [,1] [,2] [,3] [,4] [,5] [,6] +[1,] "111" "ABC" "27" "1" "2" "1976-04-22" +[2,] "222" "XYX" "35.2" "2" "1" "1968-02-10" +[3,] "333" "WHO" "19" "1" "1" "1984-04-20" +[4,] "444" "WHY" "45.7" "1" "3" "1957-08-14" +[5,] "555" "HUH" "82" "2" "3" "1921-05-06" + +Process variables one column of v at a time, converting appropriate +ones to numeric. |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/redun.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/redun.r Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,14 @@ +set.seed(1) +n <- 100 +x1 <- runif(n) +x2 <- runif(n) +x3 <- x1 + x2 + runif(n)/10 +x4 <- x1 + x2 + x3 + runif(n)/10 +x5 <- factor(sample(c('a','b','c'),n,replace=TRUE)) +x6 <- 1*(x5=='a' | x5=='c') +redun(~x1+x2+x3+x4+x5+x6, r2=.8) +redun(~x1+x2+x3+x4+x5+x6, r2=.8, allcat=TRUE) +# redun(.., allcat=TRUE, minfreq=40) gives same result as allcat=FALSE + +x0 <- c(rep(0,99),1) +redun(~x0+x1+x2+x3+x4+x5+x6, r2=.8, minfreq=2) |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/summary.formula.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/summary.formula.r Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,25 @@ +library(Hmisc) +getHdata(titanic3) + +g <- function(x) c(Mean=mean(x,na.rm=TRUE), N=sum(!is.na(x))) +with(titanic3, tapply(age, llist(sex,pclass), g)) + +g <- function(x) c(Mean=apply(x, 2, mean, na.rm=TRUE), + N=apply(x, 2, function(w)sum(!is.na(w)))) +options(digits=3) +summary(cbind(age,fare) ~ sex + pclass, method='cross', fun=g, data=titanic3) +with(titanic3, g(cbind(age,fare))) + + +## From Kevin Thorpe kevin.thorpe@utoronto.ca +### generate data +set.seed(31) +demo <- data.frame(age=rnorm(100,50,10),sex=sample(c("Male","Female"),100,TRUE)) +summary(~age,data=demo,method="reverse") +summary(~sex,data=demo,method="reverse") + +### used to work + +summary(~ age + sex, data=demo, method="reverse") + +summaryM(age + sex ~ 1, data=demo) |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/summary.formula.response.stratify.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/summary.formula.response.stratify.r Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,6 @@ +require(Hmisc) +set.seed(1) +d <- expand.grid(day=c(1, 3), rx=c('A','B'), reps=1:3) +d$x <- runif(nrow(d)) +s <- summary(x ~ day + stratify(rx), fun=smean.sd, overall=FALSE, data=d) +w <- latex(s, file='/tmp/z.tex', table.env=FALSE, booktabs=TRUE) |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/summaryD.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/summaryD.r Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,27 @@ +set.seed(135) +maj <- factor(c(rep('North',13),rep('South',13))) +g <- paste('Category',rep(letters[1:13],2)) +n <- sample(1:15000, 26, replace=TRUE) +y1 <- runif(26) +y2 <- pmax(0, y1 - runif(26, 0, .1)) +png('/tmp/summaryD.png', width=550, height=800) +spar(mfrow=c(3,2)) +f <- function(x) sprintf('%4.2f', x) +summaryD(y1 ~ maj + g, xlab='Mean', auxtitle='', fmtvals=f) +summaryD(y1 ~ maj + g, groupsummary=FALSE) +summaryD(y1 ~ g, fmtvals=f, auxtitle='') +Y <- cbind(y1, y2) +summaryD(Y ~ maj + g, fun=function(y) y[1,], pch=c(1,17)) +rlegend(.1, 26, c('y1','y2'), pch=c(1,17)) + +summaryD(y1 ~ maj, fun=function(y) c(mean(y), n=length(y)), + auxvar='n') +dev.off() + +png('/tmp/summaryD2.png', width=300, height=100) +# Or: pdf('/tmp/z.pdf', width=3.5, height=1.25) +spar() +summaryD(y1 ~ maj, fmtvals=function(x) round(x,4), + xlab=labelPlotmath('Velocity', 'm/s')) +dev.off() + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/summaryP.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/summaryP.r Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,43 @@ +require(Hmisc) +n <- 100 +f <- function(na=FALSE) { + x <- sample(c('N', 'Y'), n, TRUE) + if(na) x[runif(100) < .1] <- NA + x +} +set.seed(1) +d <- data.frame(x1=f(), x2=f(), x3=f(), x4=f(), x5=f(), x6=f(), x7=f(TRUE), + age=rnorm(n, 50, 10), + race=sample(c('Asian', 'Black/AA', 'White'), n, TRUE), + sex=sample(c('Female', 'Male'), n, TRUE), + treat=sample(c('A', 'B'), n, TRUE), + region=sample(c('North America','Europe'), n, TRUE)) + +d <- upData(d, labels=c(x1='MI', x2='Stroke', x3='AKI', x4='Migraines', + x5='Pregnant', x6='Other event', x7='MD withdrawal', + race='Race', sex='Sex')) + +dasna <- subset(d, region=='North America') +with(dasna, table(race, treat)) + +png('/tmp/summaryP.png', width=550, height=550) +s <- summaryP(race + sex + yn(x1, x2, x3, x4, x5, x6, x7, label='Exclusions') ~ + region + treat, data=d) +# add exclude1=FALSE to include female category +plot(s, val ~ freq | region * var, groups=treat) # best looking +dev.off() + +plot(s, groups=treat) +# plot(s, groups=treat, outerlabels=FALSE) for standard lattice output +plot(s, groups=region, key=list(columns=2, space='bottom')) + +plot(summaryP(race + sex ~ region, data=d, exclude1=FALSE), col='green') + +# Make your own plot using data frame created by summaryP +dotplot(val ~ freq | region * var, groups=treat, data=s, + xlim=c(0,1), scales=list(y='free', rot=0), xlab='Fraction', + panel=function(x, y, subscripts, ...) { + denom <- s$denom[subscripts] + x <- x / denom + panel.dotplot(x=x, y=y, subscripts=subscripts, ...) }) + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/summaryRc.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/summaryRc.r Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,17 @@ +require(Hmisc) +set.seed(177) +sex <- factor(sample(c("m","f"), 500, rep=TRUE)) +age <- rnorm(500, 50, 5) +bp <- rnorm(500, 120, 7) +units(age) <- 'Years'; units(bp) <- 'mmHg' +label(bp) <- 'Systolic Blood Pressure' +L <- .5*(sex == 'm') + 0.1 * (age - 50) +y <- rbinom(500, 1, plogis(L)) +png('/tmp/summaryRc.png', height=750) +spar(mfrow=c(3,2), top=2, cex.axis=1) +summaryRc(y ~ age + bp) +# For x limits use 1st and 99th percentiles to frame extended box plots +summaryRc(y ~ age + bp, bpplot='top', datadensity=FALSE, trim=.01) +summaryRc(y ~ age + bp + stratify(sex), + label.curves=list(keys='lines'), nloc=list(x=.1, y=.05)) +dev.off() |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/summaryS.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/summaryS.r Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,150 @@ +require(Hmisc) +n <- 100 +set.seed(1) +d <- data.frame(sbp=rnorm(n, 120, 10), + dbp=rnorm(n, 80, 10), + age=rnorm(n, 50, 10), + days=sample(1:n, n, TRUE), + S1=Surv(2*runif(n)), S2=Surv(runif(n)), + race=sample(c('Asian', 'Black/AA', 'White'), n, TRUE), + sex=sample(c('Female', 'Male'), n, TRUE), + treat=sample(c('A', 'B'), n, TRUE), + region=sample(c('North America','Europe'), n, TRUE), + meda=sample(0:1, n, TRUE), medb=sample(0:1, n, TRUE)) + +d <- upData(d, labels=c(sbp='Systolic BP', dbp='Diastolic BP', + race='Race', sex='Sex', treat='Treatment', + days='Time Since Randomization', + S1='Hospitalization', S2='Re-Operation', + meda='Medication A', medb='Medication B'), + units=c(sbp='mmHg', dbp='mmHg', age='years', days='days')) + +Png <- function(z) png(paste('/tmp/summaryS', z, '.png', sep='')) +Png(1) +s <- summaryS(age + sbp + dbp ~ days + region + treat, data=d) +# plot(s) # 3 pages +plot(s, groups='treat', datadensity=TRUE, + scat1d.opts=list(lwd=.5, nhistSpike=0)) +dev.off() +Png(2) +plot(s, groups='treat', panel=panel.loess, + key=list(space='bottom', columns=2), + datadensity=TRUE, scat1d.opts=list(lwd=.5)) +dev.off() +# Show both points and smooth curves: +Png(3) +plot(s, groups='treat', + panel=function(...) {panel.xyplot(...); panel.loess(...)}) +dev.off() +plot(s, y ~ days | yvar * region, groups='treat') + +# Make your own plot using data frame created by summaryP +xyplot(y ~ days | yvar * region, groups=treat, data=s, + scales=list(y='free', rot=0)) + +# Use loess to estimate the probability of two different types of events as +# a function of time +s <- summaryS(meda + medb ~ days + treat + region, data=d) +pan <- function(...) + panel.plsmo(..., type='l', label.curves=max(which.packet()) == 1, + datadensity=TRUE) +Png(4) +plot(s, groups='treat', panel=pan, paneldoesgroups=TRUE, + scat1d.opts=list(lwd=.7), cex.strip=.8) +dev.off() + +# Demonstrate dot charts of summary statistics +s <- summaryS(age + sbp + dbp ~ region + treat, data=d, fun=mean) +plot(s) +Png(5) +plot(s, groups='treat', funlabel=expression(bar(X))) +dev.off() + +# Compute parametric confidence limits for mean, and include sample sizes +f <- function(x) { + x <- x[! is.na(x)] + c(smean.cl.normal(x, na.rm=FALSE), n=length(x)) +} +s <- summaryS(age + sbp + dbp ~ region + treat, data=d, fun=f) +# Draw [ ] for lower and upper confidence limits in addition to thick line +Png(6) +plot(s, funlabel=expression(bar(X) %+-% t[0.975] %*% s), + pch.stats=c(Lower=91, Upper=93)) # type show.pch() to see defs. +dev.off() +Png(7) +plot(s, textonly='n', textplot='Mean', digits=1) +dev.off() + +# Customize printing of statistics to use X bar symbol and smaller +# font for n=... +cust <- function(y) { + means <- format(round(y[, 'Mean'], 1)) + ns <- format(y[, 'n']) + simplyformatted <- paste('X=', means, ' n=', ns, ' ', sep='') + s <- NULL + for(i in 1:length(ns)) { + w <- paste('paste(bar(X)==', means[i], ',~~scriptstyle(n==', ns[i], + '))', sep='') + s <- c(s, parse(text=w)) + } + list(result=s, + longest=simplyformatted[which.max(nchar(simplyformatted))]) +} +Png(8) +plot(s, groups='treat', cex.values=.65, + textplot='Mean', custom=cust, + key=list(space='bottom', columns=2, + text=c('Treatment A:','Treatment B:'))) +dev.off() + +## Stratifying by region and treat fit an exponential distribution to +## S1 and S2 and estimate the probability of an event within 0.5 years + +f <- function(y) { + hazard <- sum(y[,2]) / sum(y[,1]) + 1. - exp(- hazard * 0.5) +} + +s <- summaryS(S1 + S2 ~ region + treat, data=d, fun=f) +plot(s, groups='treat', funlabel='Prob[Event Within 6m]', xlim=c(.3, .7)) + + +## Demonstrate simultaneous use of fun and panel +## First show the same quantile intervals used in panel.bppplot by +## default, stratified by region and day + +d <- upData(d, days=round(days / 30) * 30) +g <- function(y) { + probs <- c(0.05, 0.125, 0.25, 0.375) + probs <- sort(c(probs, 1 - probs)) + y <- y[! is.na(y)] + w <- hdquantile(y, probs) + m <- hdquantile(y, 0.5, se=TRUE) + se <- as.numeric(attr(m, 'se')) + c(Median=as.numeric(m), w, se=se, n=length(y)) +} +s <- summaryS(sbp + dbp ~ days + region, fun=g, data=d) +Png(9) +plot(s, groups='region', panel=mbarclPanel, paneldoesgroups=TRUE) +dev.off() + +# Similar but use half-violin plots +s <- summaryS(sbp + dbp ~ days + region, data=d) +Png('9v') +plot(s, groups='region', panel=medvPanel, paneldoesgroups=TRUE) +dev.off() + +## Show Wilson confidence intervals for proportions, and confidence +## intervals for difference in two proportions +g <- function(y) { + y <- y[!is.na(y)] + n <- length(y) + p <- mean(y) + se <- sqrt(p * (1. - p) / n) + structure(c(binconf(sum(y), n), se=se, n=n), + names=c('Proportion', 'Lower', 'Upper', 'se', 'n')) +} +s <- summaryS(meda + medb ~ days + region, fun=g, data=d) +Png(10) +plot(s, groups='region', panel=mbarclPanel, paneldoesgroups=TRUE) +dev.off() |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/test.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/test.r Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,2 @@ +library(foreign) +w <- lookup.xport(' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/test.rda |
| b |
| Binary file Hmisc/tests/test.rda has changed |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/test.sas --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/test.sas Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,39 @@ +libname x SASV5XPT "test.xpt"; +libname y SASV5XPT "test2.xpt"; + +PROC FORMAT; VALUE race 1=green 2=blue 3=purple; RUN; +PROC FORMAT CNTLOUT=format;RUN; +data test; +LENGTH race 3 age 4; +age=30; label age="Age at Beginning of Study"; +race=2; +d1='3mar2002'd ; +dt1='3mar2002 9:31:02'dt; +t1='11:13:45't; +output; + +age=31; +race=4; +d1='3jun2002'd ; +dt1='3jun2002 9:42:07'dt; +t1='11:14:13't; +output; +format d1 mmddyy10. dt1 datetime. t1 time. race race.; +run; + +data z; LENGTH x3 3 x4 4 x5 5 x6 6 x7 7 x8 8; + DO i=1 TO 100; + x3=ranuni(3); + x4=ranuni(5); + x5=ranuni(7); + x6=ranuni(9); + x7=ranuni(11); + x8=ranuni(13); + output; + END; + DROP i; + RUN; + PROC MEANS;RUN; +/* PROC CPORT LIB=work FILE='test.xpt';run; * no; */ +PROC COPY IN=work OUT=x;SELECT test;RUN; +PROC COPY IN=work OUT=y;SELECT test format z;RUN; |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/test.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/test.xml Wed Jun 28 20:28:48 2017 -0400 |
| b |
| b'@@ -0,0 +1,177 @@\n+<?xml version="1.0" encoding="iso-8859-1" ?>\n+<oim:Transfer xmlns:oim="http://www.mdcinfo.com/oim/oim.dtd"\n+ xmlns:dbm="http://www.mdcinfo.com/oim/dbm.dtd"\n+ xmlns:tfm="http://www.mdcinfo.com/oim/tfm.dtd">\n+\n+ <!-- VersionHeader OimVersion="1.1" OimStatus="Proposal" -->\n+ <oim:TransferHeader Exporter="SAS Proprietary Software Release 8.2(8.02.02M0P02062001)"\n+ ExporterVersion="8.2"\n+ TransferDateTime="2003-04-16T13:35:53" />\n+\n+ <dbm:ColumnTypeSet oim:id="_7999" name="http://www.w3.org/TR/1998/NOTE-XML-data-0105/">\n+ <dbm:ColumnTypeSetColumnTypes>\n+ <dbm:ColumnType oim:id="_8000" name="string" IsFixedLength="True" />\n+ <dbm:ColumnType oim:id="_8001" name="number" />\n+ <dbm:ColumnType oim:id="_8002" name="int" />\n+ <dbm:ColumnType oim:id="_8003" name="float" />\n+ <dbm:ColumnType oim:id="_8004" name="fixed.14.4" />\n+ <dbm:ColumnType oim:id="_8005" name="boolean" />\n+ <dbm:ColumnType oim:id="_8006" name="dateTime.iso8601" />\n+ <dbm:ColumnType oim:id="_8007" name="dateTime.iso8601tz" />\n+ <dbm:ColumnType oim:id="_8008" name="date.iso8601" />\n+ <dbm:ColumnType oim:id="_8009" name="time.iso8601" />\n+ <dbm:ColumnType oim:id="_8010" name="time.iso8601tz" />\n+ <dbm:ColumnType oim:id="_8011" name="i1" />\n+ <dbm:ColumnType oim:id="_8012" name="i2" />\n+ <dbm:ColumnType oim:id="_8013" name="i4" />\n+ <dbm:ColumnType oim:id="_8014" name="i8" />\n+ <dbm:ColumnType oim:id="_8015" name="ui1" />\n+ <dbm:ColumnType oim:id="_8016" name="ui2" />\n+ <dbm:ColumnType oim:id="_8017" name="ui4" />\n+ <dbm:ColumnType oim:id="_8018" name="ui8" />\n+ <dbm:ColumnType oim:id="_8019" name="r4" />\n+ <dbm:ColumnType oim:id="_8020" name="r8" />\n+ <dbm:ColumnType oim:id="_8021" name="float.IEEE.754.32" />\n+ <dbm:ColumnType oim:id="_8022" name="float.IEEE.754.64" />\n+ <dbm:ColumnType oim:id="_8023" name="uuid" />\n+ <dbm:ColumnType oim:id="_8024" name="uri" />\n+ <dbm:ColumnType oim:id="_8026" name="bin.hex" />\n+ <dbm:ColumnType oim:id="_8027" name="char" />\n+ <dbm:ColumnType oim:id="_8028" name="string.ansi" />\n+ <dbm:ColumnType oim:id="_8025" name="bin.base64" />\n+ </dbm:ColumnTypeSetColumnTypes>\n+ </dbm:ColumnTypeSet>\n+\n+\n+ <dbm:Catalog oim:id="_1">\n+ <dbm:CatalogSchemas>\n+ <dbm:Schema oim:id="_2">\n+ <dbm:SchemaTables>\n+\n+ <!-- -->\n+ <!-- version 8.2 -->\n+ <!-- this is a new location for the transformation -->\n+ <!-- desired for supporting multiple table exports -->\n+ <!-- -->\n+ <tfm:Transformation>\n+ <tfm:TransformationConversion>\n+ <tfm:CodeDecodeSet name="SEXFMT">\n+ <tfm:CodeDecodeSetCodeColumn oim:href="#_6" />\n+ <tfm:CodeDecodeSetCodeColumn oim:href="#_7" />\n+ <tfm:CodeDecodeValue name="_TYPE" value="FORMAT" />\n+ <tfm:CodeDecodeValue name="_GLOBAL" value="VALUE SEXFMT" />\n+ <tfm:CodeDecodeValue value="1" DecodeValue="'Female'" />\n+ <tfm:CodeDecodeValue value="2" DecodeValue="'Male'" />\n+ </tfm:CodeDecodeSet>\n+ <tfm:CodeDecodeSet name="RACEFMT">\n+ <tfm:CodeDecodeSetCodeColumn oim:href="#_8" />\n+ <tfm:CodeDecodeValue name="_TYPE" value="FORMAT" />\n+ <tfm:CodeDecodeValue name="_GLOBAL" value="VALUE RACEFMT" />\n+ <tfm:CodeDecodeValue value="1" DecodeValue="&apo'..b' name="initials"\n+ Length="3">\n+ <dbm:ColumnDataType>\n+ <dbm:ColumnType oim:href="#_8000" />\n+ </dbm:ColumnDataType>\n+ </dbm:Column>\n+ <dbm:Column oim:id="_6"\n+ name="age"\n+ description="Age at baseline">\n+ <dbm:ColumnDataType>\n+ <dbm:ColumnType oim:href="#_8003" />\n+ </dbm:ColumnDataType>\n+ </dbm:Column>\n+ <dbm:Column oim:id="_7"\n+ name="sex">\n+ <dbm:ColumnDataType>\n+ <dbm:ColumnType oim:href="#_8003" />\n+ </dbm:ColumnDataType>\n+ </dbm:Column>\n+ <dbm:Column oim:id="_8"\n+ name="race">\n+ <dbm:ColumnDataType>\n+ <dbm:ColumnType oim:href="#_8003" />\n+ </dbm:ColumnDataType>\n+ </dbm:Column>\n+ <dbm:Column oim:id="_9"\n+ name="dob"\n+ description="date of birth">\n+ <dbm:ColumnDataType>\n+ <dbm:ColumnType oim:href="#_8008" />\n+ </dbm:ColumnDataType>\n+ </dbm:Column>\n+ </dbm:ColumnSetColumns>\n+ </dbm:Table>\n+\n+ <Table oim:href="#_3">\n+ <ColumnSetColumns>\n+ <Column oim:href="#_4"> 111 </Column>\n+ <Column oim:href="#_5"> ABC </Column>\n+ <Column oim:href="#_6"> 27 </Column>\n+ <Column oim:href="#_7"> 1 </Column>\n+ <Column oim:href="#_8"> 2 </Column>\n+ <Column oim:href="#_9"> 1976-04-22 </Column>\n+ </ColumnSetColumns>\n+ <ColumnSetColumns>\n+ <Column oim:href="#_4"> 222 </Column>\n+ <Column oim:href="#_5"> XYX </Column>\n+ <Column oim:href="#_6"> 35.2 </Column>\n+ <Column oim:href="#_7"> 2 </Column>\n+ <Column oim:href="#_8"> 1 </Column>\n+ <Column oim:href="#_9"> 1968-02-10 </Column>\n+ </ColumnSetColumns>\n+ <ColumnSetColumns>\n+ <Column oim:href="#_4"> 333 </Column>\n+ <Column oim:href="#_5"> WHO </Column>\n+ <Column oim:href="#_6"> 19 </Column>\n+ <Column oim:href="#_7"> 1 </Column>\n+ <Column oim:href="#_8"> 1 </Column>\n+ <Column oim:href="#_9"> 1984-04-20 </Column>\n+ </ColumnSetColumns>\n+ <ColumnSetColumns>\n+ <Column oim:href="#_4"> 444 </Column>\n+ <Column oim:href="#_5"> WHY </Column>\n+ <Column oim:href="#_6"> 45.7 </Column>\n+ <Column oim:href="#_7"> 1 </Column>\n+ <Column oim:href="#_8"> 3 </Column>\n+ <Column oim:href="#_9"> 1957-08-14 </Column>\n+ </ColumnSetColumns>\n+ <ColumnSetColumns>\n+ <Column oim:href="#_4"> 555 </Column>\n+ <Column oim:href="#_5"> HUH </Column>\n+ <Column oim:href="#_6"> 82 </Column>\n+ <Column oim:href="#_7"> 2 </Column>\n+ <Column oim:href="#_8"> 3 </Column>\n+ <Column oim:href="#_9"> 1921-05-06 </Column>\n+ </ColumnSetColumns>\n+ </Table>\n+\n+ </dbm:SchemaTables>\n+ </dbm:Schema>\n+ </dbm:CatalogSchemas>\n+ </dbm:Catalog>\n+\n+\n+</oim:Transfer>\n+\n' |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/test.xpt |
| b |
| Binary file Hmisc/tests/test.xpt has changed |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/test2.xpt |
| b |
| Binary file Hmisc/tests/test2.xpt has changed |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/testexportlib.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/testexportlib.r Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,2 @@ +library(Hmisc) +d <- sasxport.get('csv', method='csv') |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/testexportlib.sas --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/testexportlib.sas Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,22 @@ +PROC FORMAT; VALUE race 1=green 2=blue 3=purple; RUN; +PROC FORMAT CNTLOUT=format;RUN; +data test; +LENGTH race 3 age 4; +age=30; label age="Age at Beginning of Study"; +race=2; +d1='3mar2002'd ; +dt1='3mar2002 9:31:02'dt; +t1='11:13:45't; +output; + +age=31; +race=4; +d1='3jun2002'd ; +dt1='3jun2002 9:42:07'dt; +t1='11:14:13't; +output; +format d1 mmddyy10. dt1 datetime. t1 time. race race.; +run; +%INCLUDE "H:\R\Hmisc\sas\exportlib.sas"; +%exportlib(work, H:\R\Hmisc\tests\csv); + |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/wtd.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/wtd.r Wed Jun 28 20:28:48 2017 -0400 |
| [ |
| @@ -0,0 +1,14 @@ +# Jose.M.Pavia@uv.es +require(Hmisc) +PerCapita <- c(10, 20, 30, 20, 20, 40) +Group <- c( "A", "B", "B", "A", "A", "B") +W <- c(1.5, 2.3, 4.5, 2.6, 1.7, 3.9) + +## Works +wtd.mean(PerCapita, weights=W) +wtd.quantile(PerCapita, weights=W) +wtd.mean(PerCapita[Group=="A"], weights=W[Group=="A"]) +wtd.mean(PerCapita[Group=="B"], weights=W[Group=="B"]) + +g <- function(y) wtd.mean(y[,1],y[,2]) +summarize(cbind(PerCapita, W), llist(Group), g, stat.name='y') |
| b |
| diff -r 000000000000 -r c9dc7254a2ac Hmisc/tests/xYplotFilledBands.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Hmisc/tests/xYplotFilledBands.r Wed Jun 28 20:28:48 2017 -0400 |
| b |
| @@ -0,0 +1,14 @@ +# This example uses the summarize function in Hmisc to +# compute the median and outer quartiles. The outer quartiles are +# displayed using "filled bands" + +set.seed(111) +dfr <- expand.grid(month=1:12, year=c(1997,1998), reps=1:100) +month <- dfr$month; year <- dfr$year +y <- abs(month-6.5) + 2*runif(length(month)) + year-1997 +s <- summarize(y, llist(month,year), smedian.hilow, conf.int=.5) + +# filled bands: default fill = pastel colors matching solid colors +# in superpose.line (this works differently in R) +xYplot ( Cbind ( y, Lower, Upper ) ~ month, groups=year, + method="filled bands" , data=s, type="l") |