Introduction
Cookbook
proc freq
proc freq data=mydata;
tables myvar / nocol nopercent nocum;
run;
mydata.myvar.value_counts().sort_index()
sort by frequency
proc freq order=freq data=mydata;
tables myvar / nocol nopercent nocum;
run;
mydata.myvar.value_counts()
with missing
proc freq order=freq data=mydata;
tables myvar / nocol nopercent nocum missing;
run;
mydata.myvar.value_counts(dropna=False)
proc means
proc means data=mydata n mean std min max p25 median p75;
var myvar;
run;
mydata.myvar.describe()
more percentiles
proc means data=mydata n mean std min max p1 p5 p10 p25 median p75 p90 p95 p99;
var myvar;
run;
mydata.myvar.describe(percentiles=[.01, .05, .1, .25, .5, .75, .9, .95, .99])
data
step
concatenate datasets
data concatenated;
set mydata1 mydata2;
run;
concatenated = pandas.concat([mydata1, mydata2])
proc contents
proc contents data=mydata;
run;
mydata.info()
save output
proc contents noprint data=mydata out=contents;
run;
contents = mydata.info() # check this is right
Misc
number of rows in a datastep
* Try this for size: http://www2.sas.com/proceedings/sugi26/p095-26.pdf;
len(mydata)