Developer Blog

Tipps und Tricks für Entwickler und IT-Interessierte

SAS | Migrate from SAS to Python

Introduction

Cookbook

proc freq

proc freq data=mydata;
    tables myvar / nocol nopercent nocum;
run;
mydata.myvar.value_counts().sort_index()

sort by frequency

proc freq order=freq data=mydata;
	tables myvar / nocol nopercent nocum;
run;
mydata.myvar.value_counts()

with missing

proc freq order=freq data=mydata;
    tables myvar / nocol nopercent nocum missing;
run;
mydata.myvar.value_counts(dropna=False)

proc means

proc means data=mydata n mean std min max p25 median p75;
    var myvar;
run;
mydata.myvar.describe()

more percentiles

proc means data=mydata n mean std min max p1 p5 p10 p25 median p75 p90 p95 p99;
	var myvar;
run;
mydata.myvar.describe(percentiles=[.01, .05, .1, .25, .5, .75, .9, .95, .99])

data step

concatenate datasets

data concatenated;
    set mydata1 mydata2;
run;
concatenated = pandas.concat([mydata1, mydata2])

proc contents

proc contents data=mydata;
run;
mydata.info()

save output

proc contents noprint data=mydata out=contents;
run;
contents = mydata.info()  # check this is right

Misc

number of rows in a datastep

* Try this for size: http://www2.sas.com/proceedings/sugi26/p095-26.pdf;
len(mydata)

SAS | Cookbook

Handling data

Split fields

Data Cleaning



Filter out by value of an entry

if prxmatch('/^(TST|TEST|ek-test-)/', USERNAME) then
   output &_TSTDSN.;            
else
   output &_OUTDSN.;

SAS | Using Git and multi-repository environments

Using Git Hooks

$ git config --global core.hooksPath .githooks

Anhang: .githooks

prepare-commit-msg

#!/bin/bash
#

COMMIT_MSG_FILE=$1
COMMIT_SOURCE=$2
SHA1=$3

branchPath=$(git symbolic-ref -q HEAD)
branchName=${branchPath##*/}


if [ -n "$branchName" ]; then
    echo "$branchName | $(cat $1)" > $1
fi

pre-commit

#!/bin/bash
#

_BRANCHPATH=$(git symbolic-ref -q HEAD)
_BRANCH=${_BRANCHPATH##*/}

_TIMESTAMP="$(date '

LOG() {
        if [[ "$GIT_COMMIT_DETAILED_LOGGING" == "YES" ]]; then
                echo "LOG: $*"
        fi
}

REPLACE()
{
   local _TYP;   _TYP="$1"; shift
   local _TAG;   _TAG="$1"; shift
   local _WITH; _WITH="$1"; shift
   local _FILE; _FILE="$1"; shift

   case "$_TYP" in
      SAS)       # 
                 perl -pi -e 's/(.*)
                ;;
      CUSTOMER) # /* __DEPLOY_TAG =
                perl -pi -e 's/(\/\*\s*)('"__DEPLOY_$_TAG"'\s*=\s*)(.*$)/${1}${2
                ;;
      *)        LOG "Undefined typ '$TYP' for file $_FILE"
                ;;
   esac

   rm -f "${_FILE}.bak"
}

LOG "working on branch $_BRANCH"

for _FILE in $(git diff-index --name-status --cached HEAD | grep -v ^D | cut -c3-)
do
        LOG "checking file $_FILE"

        # Only examine known text files
        if [[ "$_FILE" =~ [.](sas)$ ]]; then
                LOG "working on file $_FILE"
                REPLACE SAS TAG       "$_BRANCH"    "$_FILE"
                REPLACE SAS TIMESTAMP "$_TIMESTAMP" "$_FILE"
        fi

        if [[ "$_FILE" =~ ^MA03 ]]; then
                LOG "working on bihis-customer script  $_FILE"
                REPLACE CUSTOMER TAG       "$_BRANCH"    "$_FILE"
                REPLACE CUSTOMER TIMESTAMP "$_TIMESTAMP" "$_FILE"
        fi
done