data pilotdata;
infile 'D:\SAS_Data\pilot.txt';
input ID $ 1 -6
firstname $ 7 - 19
lastname $ 20 - 34
jobcode $ 35 - 41
salary 42 -47
category $ 48 - 50;
Bonus = Salary * 0.1;
If jobcode = 'PILOT1' then
NewSalary = salary * 1.05;
Else If jobcode = 'PILOT2' then
NewSalary = salary * 1.07;
Else If jobcode = 'PILOT3' then
NewSalary = salary * 1.09;
run;
proc print data = pilotdata label;
title1 'International Airlines';
title2 'Employee Information';
label firstname = 'First Name'
lastname = 'Last Name'
jobcode = 'Job Code';
Format NewSalary Dollar10.;
WHERE jobcode = 'PILOT1';
footnote1 'Here is footnote 1';
footnote2 'Here is footnote 2';
run;
proc print data = pilotdata split = '*';
title1 'International*Airlines';
title2 'Employee Information';
label firstname = 'First*Name'
lastname = 'Last Name'
jobcode = 'Job Code';
run;
Thursday, March 20, 2008
PUT, INPUT, and SUBSTR
/* input() function ---------------------Convert to Numberic
newVar = INPUT(oldVar, format);
put() function -----------------------Convert to Character
newVar = PUT(oldVar, format);
substr() function
*/
Data num;
A = 5;
B = '5';
C = A*B;
D = A*newB;
newB = input(B, 1.);
E = newB;
F = A*newB;
G = 98;
newD = put(G, 2.);
H1 = substr(newD, 2, 1);
H2 = substr('1998', 2);
run;
proc print data=num;
title "Data=put_input_substr";
run;
(OUTPUT)
Obs A B C D newB E F G newD H1 H2
1 5 5 25 . 5 5 25 98 98 8 998
newVar = INPUT(oldVar, format);
put() function -----------------------Convert to Character
newVar = PUT(oldVar, format);
substr() function
*/
Data num;
A = 5;
B = '5';
C = A*B;
D = A*newB;
newB = input(B, 1.);
E = newB;
F = A*newB;
G = 98;
newD = put(G, 2.);
H1 = substr(newD, 2, 1);
H2 = substr('1998', 2);
run;
proc print data=num;
title "Data=put_input_substr";
run;
(OUTPUT)
Obs A B C D newB E F G newD H1 H2
1 5 5 25 . 5 5 25 98 98 8 998
Read missing values
/* By default - FLOWOVER; It will go next value and pick next value. This example shows missing the last two values - 3.4 and 3.5
(OUTPUT)
Obs A1 A2 A3 A4 A5
1 1.1 1.2 1.3 1.4 1.5
2 2.1 2.2 3.1 3.2 3.3
*/
DATA A_FLOWOVER;
INFILE CARDS FLOWOVER; /*DEFAULT*/
INPUT A1-A5;
CARDS;
1.1 1.2 1.3 1.4 1.5
2.1 2.2
3.1 3.2 3.3 3.4 3.5
;
PROC PRINT data=A_FLOWOVER;
title "Data=A_FLOWOVER";
RUN;
/* MISSOVER; it will skip the missing values in the line and grab next line
(OUTPUT)
Obs A1 A2 A3 A4 A5
1 1.1 1.2 1.3 1.4 1.5
2 2.1 2.2 . . .
3 3.1 3.2 3.3 3.4 3.5
*/
DATA B_MISSOVER;
INFILE CARDS MISSOVER;
INPUT A1-A5;
CARDS;
1.1 1.2 1.3 1.4 1.5
2.1 2.2
3.1 3.2 3.3 3.4 3.5
;
PROC PRINT data=B_MISSOVER;
title "Data=B_MISSOVER";
RUN;
/* STOPOVER; After you execute, the program will read the data till there is any missing data
(OUTPUT)
Obs A1 A2 A3 A4 A5
1 1.1 1.2 1.3 1.4 1.5
*/
DATA C2_STOPOVER;
INFILE CARDS STOPOVER;
INPUT A1-A5;
CARDS;
1.1 1.2 1.3 1.4 1.5
2.1 2.2
3.1 3.2 3.3 3.4 3.5
;
PROC PRINT data=C2_STOPOVER;
title "Data=C2_STOPOVER";
RUN;
(OUTPUT)
Obs A1 A2 A3 A4 A5
1 1.1 1.2 1.3 1.4 1.5
2 2.1 2.2 3.1 3.2 3.3
*/
DATA A_FLOWOVER;
INFILE CARDS FLOWOVER; /*DEFAULT*/
INPUT A1-A5;
CARDS;
1.1 1.2 1.3 1.4 1.5
2.1 2.2
3.1 3.2 3.3 3.4 3.5
;
PROC PRINT data=A_FLOWOVER;
title "Data=A_FLOWOVER";
RUN;
/* MISSOVER; it will skip the missing values in the line and grab next line
(OUTPUT)
Obs A1 A2 A3 A4 A5
1 1.1 1.2 1.3 1.4 1.5
2 2.1 2.2 . . .
3 3.1 3.2 3.3 3.4 3.5
*/
DATA B_MISSOVER;
INFILE CARDS MISSOVER;
INPUT A1-A5;
CARDS;
1.1 1.2 1.3 1.4 1.5
2.1 2.2
3.1 3.2 3.3 3.4 3.5
;
PROC PRINT data=B_MISSOVER;
title "Data=B_MISSOVER";
RUN;
/* STOPOVER; After you execute, the program will read the data till there is any missing data
(OUTPUT)
Obs A1 A2 A3 A4 A5
1 1.1 1.2 1.3 1.4 1.5
*/
DATA C2_STOPOVER;
INFILE CARDS STOPOVER;
INPUT A1-A5;
CARDS;
1.1 1.2 1.3 1.4 1.5
2.1 2.2
3.1 3.2 3.3 3.4 3.5
;
PROC PRINT data=C2_STOPOVER;
title "Data=C2_STOPOVER";
RUN;
Proc Contents
To explore the descriptor portion of a SAS data set and list of variables and attributes
Proc Contents Data = work.pilotdata;
Run;
Use the _ALL_ keyword to list all the SAS files in the library and the NODS option to suppress the descriptor portions of the data sets.
proc contents data = sasuser._all_ NODS;
run;
NODS must be used in conjunction with the keyword _ALL_.
Proc Contents Data = work.pilotdata;
Run;
Use the _ALL_ keyword to list all the SAS files in the library and the NODS option to suppress the descriptor portions of the data sets.
proc contents data = sasuser._all_ NODS;
run;
NODS must be used in conjunction with the keyword _ALL_.
Monday, March 17, 2008
PROC PRINT LABEL-SPLIT, WHERE
CONTAINS - selects observations that include ? the specified substring.
where LastName ? 'LAM';
OUTPUT: (LAMBERT, BELLAMY, and ELAM are selected.)
proc print data=tempemp split='*';
id jobcode;
by jobcode;
var gender salary;
sum salary;
label jobcode='Job Code*========'
gender='Gender*======'
salary='Annual Salary*=============';
format salary dollar11.2;
where jobcode in ('PT1','PT2');
title 'Expenses Incurred for';
title2 'Salaries for Pilots';
run;
id jobcode;
The ID statement identifies observations by using the formatted values of the variables that you list instead of by using observation numbers.
sum salary;
The SUM statement totals values of numeric variables. Here the SUM statement totals the values of Salary for each BY group and for the whole report.
label jobcode='Job Code*========'
gender='Gender*======'
salary='Annual Salary*=============';
The LABEL statement associates a label with each variable for the duration of the PROC PRINT step. When you use SPLIT= in the PROC PRINT statement, the procedure uses labels for column headings.
format salary dollar11.2;
The FORMAT statement assigns the DOLLAR12.2 format to Salary for this report.
where jobcode in ('PT1','PT2');
The WHERE statement selects only observations where the value of Jobcode is either PT1 or PT2.
where LastName ? 'LAM';
OUTPUT: (LAMBERT, BELLAMY, and ELAM are selected.)
proc print data=tempemp split='*';
id jobcode;
by jobcode;
var gender salary;
sum salary;
label jobcode='Job Code*========'
gender='Gender*======'
salary='Annual Salary*=============';
format salary dollar11.2;
where jobcode in ('PT1','PT2');
title 'Expenses Incurred for';
title2 'Salaries for Pilots';
run;
id jobcode;
The ID statement identifies observations by using the formatted values of the variables that you list instead of by using observation numbers.
sum salary;
The SUM statement totals values of numeric variables. Here the SUM statement totals the values of Salary for each BY group and for the whole report.
label jobcode='Job Code*========'
gender='Gender*======'
salary='Annual Salary*=============';
The LABEL statement associates a label with each variable for the duration of the PROC PRINT step. When you use SPLIT= in the PROC PRINT statement, the procedure uses labels for column headings.
format salary dollar11.2;
The FORMAT statement assigns the DOLLAR12.2 format to Salary for this report.
where jobcode in ('PT1','PT2');
The WHERE statement selects only observations where the value of Jobcode is either PT1 or PT2.