Showing posts with label array. Show all posts
Showing posts with label array. Show all posts

Sunday, February 04, 2007

The way of creating the same distribution

Sometimes We want to create the same distribution .
We can do it in this way (The ttt limits the commulative distribution of another variable,which we can receive from proc freq )

DATA resh2;
SET halvaot.resh2;
IF TARGET =0 THEN DO;
ttt=ranuni(31311115)*100;
if ttt<=5 then kod=200502;
else if ttt<=28 then kod=200503;
else if ttt<=40 then kod=200504;
else if ttt<=52 then kod=200505;
else if ttt<=62 then kod=200506;
else if ttt<=71 then kod=200507;
else if ttt<=80 then kod=200508;
else if ttt<=88 then kod=200509;
else if ttt<=93 then kod=200510;
else if ttt<=100 then kod=200511;
end;
if target=1 then kod=100* year( Loan_Value_Date)+month(Loan_Value_Date);
run;


This is more wise way to do the same:



proc freq DATA=halvaot.new_halv;
tables Loan_Value_Date /out=outkod outcum noprint;
run;

data _null_ ; length kod_str pct_str $5000;
set outkod end=eof;
retain kod_str pct_str;
kod_str=compress(kod_str||','||Loan_Value_Date);
pct_str=compress(pct_str||','||cum_pct);
if eof then do;
call symput('a1',substr(pct_str,2));
call symput('a2',substr(kod_str,2));
call symput('nn',_n_);
end;
run;

DATA resh222;
SET halvaot.resh2;
array a1 {&nn} _temporary_ (&a1);
array a2 {&nn} _temporary_ (&a2);
IF TARGET =0 THEN DO;
ttt=ranuni(31311115)*100;
do i=1 to dim(a1);
if i=1 then do;
if ttt<=a1[i] then Loan_Value_Date=a2[i];
end;
else do;
if a1[i-1]<ttt<=a1[i] then Loan_Value_Date=a2[i];
end;
end;
end;

kod=100* year( Loan_Value_Date)+month(Loan_Value_Date);
run;

Wednesday, August 30, 2006

Sas array transpose

data average_lag6;
set niud.ovr_lag_all_200110_200201;
array product ms_kartisim
maskorot
l_miuazim
miuazim
ms_hk
sch_hk
halvaot
misgeret_ashrai
osher
pasiv;

array name{10} $ 16;
do i=1 to dim(product);
name[i]=vname(product[i]);
schum=product [i];
varname=name[i];
month_lag=month_lag6;
output;
end;
keep schum varname month_lag;
run;


proc sort data=dug;
BY b;
run;

proc sql;
select distinct
year into: tkufa
separated by '+'
from dug;
quit;



%macro dugma;
%do i=1 %to 2;
DATA stam
(DROP=I year x y year1 year2 z);
;
ARRAY years {2} year1- year2;
ARRAY xs {2} x1-x2;
ARRAY ys {2} y1-y2;
DO I=1 TO 2 UNTIL (LAST.b);
SET dug;
BY b;
years {I}=year;
x_%scan(&tkufa,&i,+)=xs{I};

xs{I}=x;
ys{I}=y;
END;

run;
%end;
%mend;
%dugma;

Tuesday, August 29, 2006

Sas array combination

data zevet1.arr;
set zevet1.for_model;
array dich[15] ms_harshot_d sum_mispar_hor_keva_d arhrai_d maskorot1 mashkanta1 TIH_HAP1 TIH_TASH1 avg_sch_kranot1 avg_sch_ne1 avg_pkl_pkd1 avg_pkl_pkd_zm1 GEMEL_NZL1 halvaot1 modern1 count;

array name{15} $ _temporary_ ('ms_hars','hor_keva_d','arhrai_d',
'maskorot1','mashkanta1','TIH_HAP1','TIH_TASH1', 'sch_kranot1',
'sch_ne1','pkl_pkd1','pkl_pkd_zm1','GEMEL_NZL1','halvaot1',
'modern1', 'count' );

tzvt_activ_form=put(tzvt_activ,zev.);
kod_vetek=put(vetek_lak,vetek.);
do i=1 to 15;
category=dich [i];
prod=name[i];
num_product=i;
output;
end;
keep sd10_numerator category num_product tzvt_activ tzvt_activ_form prod kod_vetek;
run;


data b4_300;
set zevet1.for_model(where=(tzvt_activ_form='300'));
array aa ms_harshot_d sum_mispar_hor_keva_d arhrai_d maskorot1
avg_pkl_pkd1 ;
m=0;
do i=1 to 5;
do j=i+1 to 5;
do k=j+1 to 5;

comb=aa[i]+aa[j]+aa[k];
name=compress(vname(aa[i])||'_'||vname(aa[j])||'_'||vname(aa[k]));
m+1;
if comb<3 then continue;
else output;


end;
end;
end;
run;

Sas array replace missing

data toz;
set toz;
array toz{*} _NUMERIC_ ;
do i = 1 to dim(toz);
if toz{i} = . then toz{i} = 0;
end;
drop i;
run;

Sas array transpose

/***The equivalent DATA step code using arrays*/

DATA NEW (KEEP=NAME DATE1-DATE3);
SET OLD;
BY NAME;
ARRAY DATES {3} DATE1-DATE3;
RETAIN DATE1-DATE3;
IF FIRST.NAME THEN I=1;
ELSE I + 1;
DATES{I} = DATE;
IF LAST.NAME;
run;


DATA NEW (KEEP=NAME DATE1-DATE3);
ARRAY DATES {3} DATE1-DATE3;
DO I=1 TO 3;
SET OLD;
DATES{I} = DATE;
END;
/*if we have missing values:*/

PROC FREQ DATA=OLD ORDER=FREQ;
TABLES NAME / NOPRINT OUT=TEMP;
DATA _NULL_;
SET TEMP;
CALL SYMPUT('N',
COMPRESS(PUT(COUNT,3.)));
STOP;


DATA NEW (KEEP=NAME DATE1-DATE&N);
ARRAY DATES {&N} DATE1-DATE&N;
DO I=1 TO &N UNTIL (LAST.NAME);
SET OLD;
BY NAME;
DATES{I} = DATE;
END;

DATA NEW (KEEP=NAME DATE1-DATE&N);
SET OLD;
BY NAME;
ARRAY DATES {&N} DATE1-DATE&N;
RETAIN DATE1-DATE&N;
IF FIRST.NAME THEN I=1;
ELSE I + 1;
DATES{I} = DATE;
IF LAST.NAME;
IF I < &N THEN DO I=I+1 TO &N;
DATES{I}=.;
END;
DATA NEW (KEEP=NAME DATE1-DATE20);
ARRAY DATES {20} DATE1-DATE20;
OBSNO + 1;
DO I=1 TO 20 UNTIL (LAST.NAME);
SET OLD;
BY NAME;
IF FIRST.NAME THEN OBSNO=1;
DATES{I} = DATE;
END;


DATA ALL7VARS
(DROP=I DATE RESULT);
ARRAY DATES {3} DATE1-DATE3;
ARRAY RESULTS {3} RESULT1-RESULT3;
DO I=1 TO 3 UNTIL (LAST.NAME);
SET OLD;
BY NAME;
DATES {I} = DATE;
RESULTS{I} = RESULT;
END;
BY NAME;
run;


DATA DIFFER;
SET NEW;
ARRAY DATES {20} DATE1-DATE20;
ARRAY DIFFS {19} DIFF1-DIFF19;
DO I=1 TO 19;
DIFFS{I} = DATES{I+1} - DATES{I};
END;

DATA ALL7VARS
(DROP=I DATE RESULT);
ARRAY DATES {3}DATE1-DATE3;
ARRAY RESULTS {3}RESULT1-RESULT3;
DO I=1 TO 3 UNTIL (LAST.NAME);
SET OLD;
BY b;
DATES {I}=DATE;
RESULTS{I}=RESULT;

END;