Wednesday, August 30, 2006

Teradata extract function

case when b.Branch_Cust_IP is null then extract(year from a.Ruler_Date)
when a.Branch_Cust_IP is null then extract(year from b.Ruler_Date)
else extract(year from a.Ruler_Date) end as shana

Sas proc mi-fill missing

proc mi data=events.no_gil seed=555536 simple nimpute=1 out=ll;
em itprint outem=outem;
var gil_max sum_PASIV sum_maskoret sum_KIZBA_YELED sum_KIZBAOT ;
run;

Sas proc tabulate

proc tabulate data=shotef.matah ;
class division_att_Desc Nalan_Cluster_Desc L_GLISHA_YESHIRA;
var schum internet emda phone pakid_all;
table ((division_att_Desc='name' all)*Nalan_Cluster_Desc='name'),L_GLISHA_YESHIRA=''*(schum='name'*(sum='name'*f=14. n)
internet='name'*(mean='name'*f=5.1 n)
emda='name'*(mean='name'*f=5.1 n)
phone='name'*(mean='name'*f=5.1 n)
pakid_all='name'*(mean='name'*f=5.1 n));
format format Nalan_Cluster_Desc $nalan. L_GLISHA_YESHIRA membr.;
keylabel all='total';
run;

Sas array demi

data a;
input id name $;
cards;
10 -11
10 222
10 -33
23 333
23 -33
76 -11
76 222
76 -33
;
run;
data b;
input names $;
cards;
-11
-33
222
333
999
888
555
666
777
;
run;
data bb; length nam_nam $200;set b end=eof;
if _n_=1 then nam_nam=' ';
retain nam_nam;
nam_nam=trim(compress(nam_nam||"','"||names));
if eof then do;
nam_nam=compress(substr(nam_nam,3)||"'");
output;
end;
call symput('name_macro',nam_nam);
run;
proc sort data=a; by id name; run;
options symbolgen mprint;
data b;
set a; by id;
array nam [9] $ _temporary_ (&name_macro);
array vars v1-v9;
retain v1-v9;
do jj=1 to dim(vars);
vars[jj]=.;
end;
do j=1 to dim(vars);
if name=nam[j] then vars[j]=1;
end;
if last.id then output;
drop jj j;
run;

Sas array transpose

data average_lag6;
set niud.ovr_lag_all_200110_200201;
array product ms_kartisim
maskorot
l_miuazim
miuazim
ms_hk
sch_hk
halvaot
misgeret_ashrai
osher
pasiv;

array name{10} $ 16;
do i=1 to dim(product);
name[i]=vname(product[i]);
schum=product [i];
varname=name[i];
month_lag=month_lag6;
output;
end;
keep schum varname month_lag;
run;


proc sort data=dug;
BY b;
run;

proc sql;
select distinct
year into: tkufa
separated by '+'
from dug;
quit;



%macro dugma;
%do i=1 %to 2;
DATA stam
(DROP=I year x y year1 year2 z);
;
ARRAY years {2} year1- year2;
ARRAY xs {2} x1-x2;
ARRAY ys {2} y1-y2;
DO I=1 TO 2 UNTIL (LAST.b);
SET dug;
BY b;
years {I}=year;
x_%scan(&tkufa,&i,+)=xs{I};

xs{I}=x;
ys{I}=y;
END;

run;
%end;
%mend;
%dugma;

Tuesday, August 29, 2006

Sas array combination

data zevet1.arr;
set zevet1.for_model;
array dich[15] ms_harshot_d sum_mispar_hor_keva_d arhrai_d maskorot1 mashkanta1 TIH_HAP1 TIH_TASH1 avg_sch_kranot1 avg_sch_ne1 avg_pkl_pkd1 avg_pkl_pkd_zm1 GEMEL_NZL1 halvaot1 modern1 count;

array name{15} $ _temporary_ ('ms_hars','hor_keva_d','arhrai_d',
'maskorot1','mashkanta1','TIH_HAP1','TIH_TASH1', 'sch_kranot1',
'sch_ne1','pkl_pkd1','pkl_pkd_zm1','GEMEL_NZL1','halvaot1',
'modern1', 'count' );

tzvt_activ_form=put(tzvt_activ,zev.);
kod_vetek=put(vetek_lak,vetek.);
do i=1 to 15;
category=dich [i];
prod=name[i];
num_product=i;
output;
end;
keep sd10_numerator category num_product tzvt_activ tzvt_activ_form prod kod_vetek;
run;


data b4_300;
set zevet1.for_model(where=(tzvt_activ_form='300'));
array aa ms_harshot_d sum_mispar_hor_keva_d arhrai_d maskorot1
avg_pkl_pkd1 ;
m=0;
do i=1 to 5;
do j=i+1 to 5;
do k=j+1 to 5;

comb=aa[i]+aa[j]+aa[k];
name=compress(vname(aa[i])||'_'||vname(aa[j])||'_'||vname(aa[k]));
m+1;
if comb<3 then continue;
else output;


end;
end;
end;
run;

Sas array replace missing

data toz;
set toz;
array toz{*} _NUMERIC_ ;
do i = 1 to dim(toz);
if toz{i} = . then toz{i} = 0;
end;
drop i;
run;

Sas array transpose

/***The equivalent DATA step code using arrays*/

DATA NEW (KEEP=NAME DATE1-DATE3);
SET OLD;
BY NAME;
ARRAY DATES {3} DATE1-DATE3;
RETAIN DATE1-DATE3;
IF FIRST.NAME THEN I=1;
ELSE I + 1;
DATES{I} = DATE;
IF LAST.NAME;
run;


DATA NEW (KEEP=NAME DATE1-DATE3);
ARRAY DATES {3} DATE1-DATE3;
DO I=1 TO 3;
SET OLD;
DATES{I} = DATE;
END;
/*if we have missing values:*/

PROC FREQ DATA=OLD ORDER=FREQ;
TABLES NAME / NOPRINT OUT=TEMP;
DATA _NULL_;
SET TEMP;
CALL SYMPUT('N',
COMPRESS(PUT(COUNT,3.)));
STOP;


DATA NEW (KEEP=NAME DATE1-DATE&N);
ARRAY DATES {&N} DATE1-DATE&N;
DO I=1 TO &N UNTIL (LAST.NAME);
SET OLD;
BY NAME;
DATES{I} = DATE;
END;

DATA NEW (KEEP=NAME DATE1-DATE&N);
SET OLD;
BY NAME;
ARRAY DATES {&N} DATE1-DATE&N;
RETAIN DATE1-DATE&N;
IF FIRST.NAME THEN I=1;
ELSE I + 1;
DATES{I} = DATE;
IF LAST.NAME;
IF I < &N THEN DO I=I+1 TO &N;
DATES{I}=.;
END;
DATA NEW (KEEP=NAME DATE1-DATE20);
ARRAY DATES {20} DATE1-DATE20;
OBSNO + 1;
DO I=1 TO 20 UNTIL (LAST.NAME);
SET OLD;
BY NAME;
IF FIRST.NAME THEN OBSNO=1;
DATES{I} = DATE;
END;


DATA ALL7VARS
(DROP=I DATE RESULT);
ARRAY DATES {3} DATE1-DATE3;
ARRAY RESULTS {3} RESULT1-RESULT3;
DO I=1 TO 3 UNTIL (LAST.NAME);
SET OLD;
BY NAME;
DATES {I} = DATE;
RESULTS{I} = RESULT;
END;
BY NAME;
run;


DATA DIFFER;
SET NEW;
ARRAY DATES {20} DATE1-DATE20;
ARRAY DIFFS {19} DIFF1-DIFF19;
DO I=1 TO 19;
DIFFS{I} = DATES{I+1} - DATES{I};
END;

DATA ALL7VARS
(DROP=I DATE RESULT);
ARRAY DATES {3}DATE1-DATE3;
ARRAY RESULTS {3}RESULT1-RESULT3;
DO I=1 TO 3 UNTIL (LAST.NAME);
SET OLD;
BY b;
DATES {I}=DATE;
RESULTS{I}=RESULT;

END;

Sas proc transpose

data old;
NAME DATE
Amy Date #A1
Amy Date #A2
Amy Date #A3
Bob Date #B1
Bob Date #B2
Bob Date #B3


PROC TRANSPOSE DATA=OLD OUT=NEW
PREFIX=DATE;
VAR DATE;
BY NAME;
run;

******* ;
data new;
NAME DATE1 DATE2 DATE3
Amy Date #A1 Date #A2 Date #A3
Bob Date #B1 Date #B2 Date #B3
******* ;
The PREFIX= option controls the names for the*/
transposed variables (DATE1, DATE2, etc.)*/

/**************************/

Monday, August 28, 2006

Sas macro format

текст

DATA _NULL_;
FILE PRINT;
RUN;
%MACRO FORMAT(VAL=VAL,LOW=,START=,STOP=,COUNT=);
PROC FORMAT;
VALUE &VAL
%DO X=&START %TO &STOP %BY &COUNT;
%LET Y=%SYSEVALF(&X-5/10);
&LOW<-&Y="&LOW-&Y"
%LET LOW=%SYSEVALF(&Y+5/10);
%PUT &LOW;
%END;
;
%MEND FORMAT;

%FORMAT(LOW=0,START=1,STOP=5,COUNT=1) ;

data next;
DO VALUE=1 TO 5 BY 0.05;
OUTPUT;
END;
run;

proc print;
format value val.;
RUN;

Sas format

/************* to get out format********/

proc format lib=formats
cntlout=peulot ;
select $ origact;
run;

/********* to get in format *********/

proc format cntlin=ofi
lib=formats;
run;


/********** convert numeric to character*****/
new_char=put(numeric,4.0);
/********** convert character to numeric *****/
new_num=input(char4,best4.);


DATASTEP using a CNTLIN= option.The smaller file must not have any duplicates of the key variable used for matching. The DATASET created must have these elements:

FMTNAME: name of format to create
TYPE: ‘C’ for character or ‘N’ for numeric
START: the value you want to format into a label.

If you are specifying a range,
START specifies the lower end of the range and
END specifies the upper end.*/
LABEL: the label you wish to generate.*/


proc sort data=small
out=temp
nodupkey
force;
by seqnum;
data fmt (rename=(seqnum=start));
retain fmtname ‘$key’
type ‘C’
label ‘Y’;
set temp;
proc format cntlin=fmt;
run;
data _null_;
infile bigfile;
file extract;
if put(seqnum,$key.)=’Y’
then put _infile_ ;
run;
data match;
set bigfile;
if put(seqnum,$key.)=’Y’;
run;



proc format;
value score 370 - 670 = ‘670-‘
671 - 870 = ‘671+’
other = ‘unscored’
;
proc freq data=cb;
tables score;
format score score.;
run;


1. The name of the format does not have to be the
name of the variable that it will be assigned to.
2. The assignment of the FORMAT occurs in the
PROC with a FORMAT statement.
3.The format definition ends with the ‘;’ on a new line.



/***************GENERATING NEW VARIABLES WITH PROC
FORMAT AND A VALUE REPLACEMENT FORMAT
SPECIFYING RANGES IN PROC FORMAT
*********/



proc format;
value edr low-159 = ’53.4’
160-169 = ’39.3’
170-179 = ’32.3’
180-high = ’25.8’
;
data stuff;
set cb2;
edr=input(put(score,edr.),4.1);
run;


Ranges can include intervals such as:
. means that the interval includes
both endpoints.
<- . means that the interval
includes higher endpoint, but not the lower
one.
- < means that the interval
includes lower endpoint, but not the higher
one.
<- < means that the interval
does not include either endpoint.
3. The numeric “ . “ and character ‘ “ “ ‘ missing
values can be individually assigned values.
4. Ranges can be specified with special kewords:
LOW From the least (most negative) possible
number.
HIGH To the largest (positve) possible number.
OTHER All other numbers not otherwise
specified.
5. The LOW keyword does not format missing values.
6. The OTHER keyword does include missing values
unless accounted for with a ‘.’ or ‘ ‘.*/;


SAS PICTURE FORMATS
SAS PICTURE Formats provide a template for
printing numbers .
To use the saved format in a subsequent program
without having to enter the FORMAT code, specify a
LIBNAME of LIBRARY referencing the library .


One example of using PICTURE FORMATS is to truncate numbers that represents dates from
YYMMDD display to YY/MM.
proc format;
picture dt 0-991231=’11/11’
(multiplier=.01);
run;

The ‘11/11’ specifies that all leading zeros will be displayed.
If the label was ‘01/11’ then 001213 would
be displayed as 0/12 instead of 00/12.


will print a leading 0 if the percentage is less than 1
(i.e., .67% displays as 0.67%).
proc format;
picture p8r (round)
0-100 = '0009.99%'
run;

Sas sql-examples

proc sql;
create table ff as
select Branch_Cust_Nbr,
branch_nbr ,
mask_3 format=comma10.2
from events.Gidul_maskorot_10_1 a
where not exists
(select *
from events.Gidul_maskorot_9_1 b
where A.branch_cust_ip=b.branch_cust_ip);

quit;


proc sql;
create table jjj as
select Branch_Cust_Nbr,
branch_nbr ,mask_3
from events.Gidul_maskorot_10_1
where branch_cust_ip not in (select branch_cust_ip
from events.Gidul_maskorot_9_1);
quit;

proc sql;
create table jjj as
select Branch_Cust_Nbr,
branch_nbr ,mask_3
from events.Gidul_maskorot_10_1
where mask_3 > (select avg(mask_3)
from events.Gidul_maskorot_9_1);
quit;



proc sql;
create table jjj as
select Branch_Cust_Nbr,
branch_nbr ,mask_3/3 as avg_mask
from events.Gidul_maskorot_10_1
where branch_cust_ip not in (select branch_cust_ip
from events.Gidul_maskorot_9_1)
and
2*min_mask>calculated avg_mask;
quit;



proc sql;
create table res as
select coalesce (a.branch_cust_ip,b.branch_cust_ip) label='x'
from events.Gidul_maskorot_9_1 a
left join events.Gidul_maskorot_10_1 b
on a.branch_cust_ip=b.branch_cust_ip;
quit;


proc sql;
create table jjj as
select *
from events.Gidul_maskorot_10_1
except
select *
from events.Gidul_maskorot_9_1
;
quit;
/*delete double*/

proc sql;
create table jjj as
select *
from events.Gidul_maskorot_10_1
except all
select *
from events.Gidul_maskorot_9_1
;
quit;
/*not delete double*/


proc sql;
create table jjj as
select *
from events.Gidul_maskorot_10_1
except corr
select *
from events.Gidul_maskorot_9_1
;
quit;
/*relate only to variables this the same name */


proc sql;
create table jjj as
select *
from events.Gidul_maskorot_10_1
intersect
select *
from events.Gidul_maskorot_9_1
;
quit;

proc sql;
create table jjj as
select *
from events.Gidul_maskorot_10_1
union
select *
from events.Gidul_maskorot_9_1
;
quit;


proc sql;
create table jjj as
select *
from events.Gidul_maskorot_10_1
union
select *
from events.Gidul_maskorot_9_1
union
select *
from events.Gidul_maskorot_12_1
;
quit;

proc sql;
create table jjj as
select *
from events.Gidul_maskorot_10_1
outer union
select *
from events.Gidul_maskorot_9_1
outer union
select *
from events.Gidul_maskorot_12_1
;
quit;
SAS SQL

Sas macro scan

%macro stam;
%do i=1 %to 4;
%let k=&i;
data d_&k ;
set dug;
if x=&k;

RUN;
%end;
%mend;
%stam;


%let data= nohesh.nopail_200304+nohesh.nopail_200305
+nohesh.nopail_200306;

%macro sort;
%do i=1 %to 3;
proc sort data=%scan(&data,&i,+) ;
by sd10_numerator;
%end;
%mend;
%sort;

Sas symput

proc univariate data=nizul_kitun ;
var nizul_lifnei ;
output out= univout
pctlpre= pop_ pctlpts= 10 20 30 40 50 60 70 80 90;
run;
data _null_;
set univout;
call symput('pop_10',pop_10);
call symput('pop_20',pop_20);
call symput('pop_30',pop_30);
call symput('pop_40',pop_40);
call symput('pop_50',pop_50);
call symput('pop_60',pop_60);
call symput('pop_70',pop_70);
call symput('pop_80',pop_80);
call symput('pop_90',pop_90);

run;
data kkk;
set kkk;
if nizul_aharei<=&pop_10 then p=1;
else if nizul_aharei<=&pop_20 then p=2;
else if nizul_aharei<=&pop_30 then p=3;
else if nizul_aharei<=&pop_40 then p=4;
else if nizul_aharei<=&pop_50 then p=5;
else if nizul_aharei<=&pop_60 then p=6;
else if nizul_aharei<=&pop_70 then p=7;
else if nizul_aharei<=&pop_80 then p=8;
else if nizul_aharei<=&pop_90 then p=9;
else if nizul_aharei>&pop_90 then p=10;

run;

Sas macro-symput-symget

data _null_;
set sikum;
call symput('brut'||k,sumbruto);
run;
/*k have to be char*/

data k;
set sikum;

ahuzbrut=(bruto*100)/symget('brut'||k);
jj=symget('brut'||k);
run;

Sas sql macro

proc sql noprint;
select quote(facility) into :lowcount
separated by ' and facilty ne '
from low;
quit;

proc plan-sas

%let title = NESUG 2005 TEST DESIGN;
%let var = intro duration goto fee color;
%let class = color ;
%let factors = intro=3 duration=3 goto=3 fee=3 color=2;
%let levels = intro nvals=(0 1.99 2.99)
duration nvals=(6 9 12)
goto nvals=(3.99 4.99 5.99)
fee nvals=(0 15 45)
color cvals=('RED' 'WHITE')
;
%let model = intro|duration|goto|fee|color@2
intro*intro duration*duration goto*goto fee*fee
;
PROC PLAN ORDERED seed=940522;
FACTORS &factors
/NOPRINT;
OUTPUT OUT=ENUM
&levels
;
Run;

Metric Conjoint Analysis

data choc;

input Chocolate $ Center $ Nuts $& Rating;

datalines;

Dark Chewy Nuts 7
Dark Chewy No Nuts 6
Dark Soft Nuts 6
Dark Soft No Nuts 4
Milk Chewy Nuts 9
Milk Chewy No Nuts 8
Milk Soft Nuts 9
Milk Soft No Nuts 7

;

run;

ods exclude notes mvanova anova;
proc transreg utilities separators=", " short;
title2 "Metric Conjoint Analysis";
model identity(rating) = class(chocolate center nuts / zero=sum);
run;

proc gplot;
title h=1.5 "Preference for Chocolate Candies";
title2 h=1 "Nonmetric Conjoint Analysis";
plot trating * rating = 1 / frame haxis=axis2 vaxis=axis1;
symbol1 v=plus i=join;
axis1 order=(1 to 10)
label=(angle=90 "Transformation of Rating");
axis2 order=(1 to 9) label=("Original Rating");
run; quit;

Sunday, August 27, 2006

sas-proc corresp

proc corresp all data=MCA_Cars outc=Coor;
tables Marital, Origin;
run;
%plotit(data=Coor, datatype=corresp);run;