admin管理员组

文章数量:1794759

sas proc sql 基础入门 (原创作品,转载请注明出处 )

sas proc sql 基础入门 (原创作品,转载请注明出处 )

/*结构*/ /* PROC SQL;  SELECT column(s)  FROM table(s) | view(s)  WHERE expression  GROUP BY column(s)  HAVING expression  ORDER BY column(s); QUIT; */ /*1. 创建数据集a label 等于空去掉原数据集的label*/ data a; set sasuser.Students; label Student_Name= Student_Company = City_State = ; run; /*2. 选出数据集a中的所有数据*/ proc sql; select * from a; quit; /*3. 选择变量student_name 和 student_company所有观测量*/ proc sql; select Student_Name,Student_Company from a; quit; /*4. 限制观测量的数量*/ /*create table 创建新的表格*/ /*使用outobs = 50 即可选择变量student_name 和 student_company 前五十个观测量*/ proc sql outobs = 50; create table student as select Student_Name,Student_Company from a; run; /*5. 给输出的变量重命名,格式为name as name1*/ proc sql; select Student_Name, Student_Company as company from a; quit; /*6. 去除所有变量都重复的观测*/ proc sql; select distinct * from a; quit; /*7. 去除变量student_name 和 student_company重复的观测*/ proc sql; select distinct student_name,student_company from a; quit; /*8. 给变量上标签 label  = ''和格式化 format = .*/ proc sql; select student_name format = $35., student_company label = 'company' from a; quit; /*9. 排序 order by 变量名称 ASC(ascending)升序 DESC(descending)降序*/ proc sql; select student_name,student_company from a order by student_name ASC, student_company DESC; quit; /*10. 选择满足条件的子数据集*/ /*between and : where salary between 1000 and 3000*/ /*contains: where student_name contains 'DE' 或者表示为 where student_name ? 'DE'*/ /*in ();包含括号所有的选项*/ /*is missing or is null: 选出所选变量的值为空的观测*/ /*like:选出类似的观测 _ 单个任意变量 %任意变量任意数量的*/ proc sql; select * from a where student_name contains 'A' and student_company in ('A','B'); quit; proc sql; select * from a where student_company is missing; quit; proc sql; select * from a where student_name like 'A_l%'; quit; /*11. 计算变量表达式calculated*/ /*where calculated new_salary > 500; or where  (salary * 0.1)  > 500;*/ proc sql; select ID, (salary * 0.1) as new_salary from sasuser.pilots where calculated new_salary > 500; quit; proc sql; select ID, (salary * 0.1) as new_salary from sasuser.pilots where  (salary * 0.1)  > 500; quit; /*12. 多条件输出新变量*/ /*case 表达式 = < > NOT NE AND OR IN,between and, contains, ?, is null, is missing = like*/ proc sql; select salary, case when salary between 0 and 1000 then 'LOW' when salary between 1001 and 2000 then 'MEDIUM' when salary between 2001 and 3000 then 'HIGH' else 'VERY HIGH' end as new_flag from sasuser.pilots; quit; /*13. 对数据统计和求和*/ /*count 类似于 proc freq*/ /*max,min,sum 类似于proc means*/ /*avg/mean, count/freq/n,sum,max,min,nmiss,std,var,*/ /*T,uss(uncorrelated sum of square),css(corelatted sum of square),range*/ proc sql; select ID, firstname, count(distinct ID) as t_salary /*distinct 选取唯一的ID进行统计*/ from sasuser.pilots group by ID order by firstname; quit; proc sql; select ID, firstname, max(salary) as M_salary from sasuser.pilots group by firstname order by firstname; quit; proc sql; select max(salary) as M_salary, uss(salary) as uss_s, css(salary) as css_s ,range(salary) as r_s,avg(salary) as a_s,sum(salary) as s_s,std(salary) as std_s from sasuser.pilots; quit; /*14. 计算缺失值的数量 NMISS*/ data miss; input id; cards; 1 . 2 3 4 . . 5 6 . 78 . 1 . 78 ; run; proc sql; create table miss_sts as select nmiss(id) as N_missings, count(id) as N,calculated N_missings + calculated N as total from miss; quit; proc sql; select id, count(id),nmiss(id) from miss group by id; quit; /*15. 去掉某一列和保留某一列*/ /*drop = 变量名称*/ /*keep = 变两名称*/ proc sql; create table student1(drop = city_state) as select * from sasuser.students; quit; /*16. 直接删除某几行*/ /*delete from*/ data pilots; set sasuser.pilots; run; proc sql; delete from pilots where salary > 100000; quit; /*17. 使用having*/ /*对每一个分组进行筛选*/ proc sql; select id, firstname, count(salary) as s_s from sasuser.pilots group by id having s_s >1; quit; /*18. 嵌套使用sql*/ proc sql; create table pilots as select * from sasuser.pilots where salary > 100000; quit; proc sql; select id from sasuser.pilots where id not in(select id from pilots); run;

本文标签: 转载请注明原创作品入门出处基础