Wednesday, February 3, 2010

SAS: code for many comorbid conditions by ICD9 (can be adapted)

*The following code has the ICD9 codes for many common coditions;
*NOTE: ICD9 codes have to be "dot" free;
*Code was originally intended to loop over a number of different years in the Medicare file;
*Note DX(Z) is to search over all diagnosis (primary and all secondary);

*CHD;
if substr(DX(Z),1,3) in ("410", "411", "412", "414") then dx_&type.&yr._chd = 1;
else if substr(DX(Z),1,4) in ("4130", "4139", "4292") then dx_&type.&yr._chd = 1;


*Diabetes;
if substr(DX(Z),1,3) in ("250") then dx_&type.&yr._diabet = 1;
else if substr(DX(Z), 1, 4) in ("3572") then dx_&type.&yr._diabet = 1;
else if substr (DX(Z), 1, 5) in ("36200", "36201", "36641") then dx_&type.&yr._diabet= 1;


*Osteoarthitis;
if substr(DX(Z),1,3) in ("715") then dx_&type.&yr._ostoarth = 1;
*Other arthritis;
if substr(DX(Z),1,3) in ('716') then dx_&type.&yr._otharth=1;
*Arthritis;
if substr(DX(Z),1,3)in ('714', '715', '716') then dx_&type.&yr._arth=1;

*Chronic Heart Failure;
if substr(DX(Z),1,3) in ("416", "417", "425", "428") then dx_&type.&yr._chf = 1;
else if substr(dx(Z),1,4) in ("4150", "4290", "4291") then dx_&type.&yr._chf = 1;
else if substr (DX(Z),1,5) in ("40201", "40211", "40291", "40401", "40403", "40411", "40413", "40491", "40493") then dx_&type.&yr._chf = 1;

*Hypertension;
if substr(dx(z),1,3) in ('401', '402', '403', '404', '405') then dx_&type.&yr._hypten = 1;


*Hyperlipidemia;
if substr(dx(Z),1,4) in ("2720", "2721", "2722", "2723", "2724") then dx_&type.&yr._hyplip = 1;


*acute myocardial infarction;
if substr(dx(z),1,3) in ("410") then dx_&type.&yr._ami = 1;


*other acute and subacture forms of ishemic heart disease;
if substr(dx(Z),1,3) in ("411") then dx_&type.&yr._othrAHD = 1;

*Old myocardial infaction;
if substr(dx(Z),1,3) in ("412") then dx_&type.&yr._oldMI = 1;


*Angina;
if substr(dx(z),1,3) in ("413") then dx_&type.&yr._angina = 1;


*other forms of chronic ishemic heart disease;
if substr(dx(z),1,3) in ("414") then dx_&type.&yr._othrCHD = 1;


*Atherosclerosis;
if substr(dx(z),1,3) in ("440") then dx_&type.&yr._athro = 1;
else if substr(dx(z),1,4) in ("4292", "4370", "4371") then dx_&type.&yr._athro = 1;


*Cerebrovascular disease;
if substr(dx(Z),1,3) in ('430', '431','436') then dx_&type.&yr._cerbvas = 1;
else if substr(dx(z),1,4) in ('4320','4321','4329') then dx_&type.&yr._cerbvas = 1;
else if substr(dx(z),1,5) in ('43301','43311','43321', '43331', '43381', '43391', '43401','43411','43491') then dx_&type.&yr._cerbvas = 1;


*Stroke;
if substr(dx(z),1,3) in ("433", "435", "436") then dx_&type.&yr._stroke = 1;
else if substr(dx(z),1,4) in ("4349", "4340") then dx_&type.&yr._stroke =1;


*Peripheral Vascular Disease;
if substr(dx(z),1,3) in ("441", "443") then dx_&type.&yr._prvasdis=1;


*Alcohol or drug dependence;
if substr(dx(z),1,3) in ("303", "304", "305") then dx_&type.&yr._drug = 1;
else if substr(dx(z),1,4) in ("V113") then dx_&type.&yr._drug = 1;


*Cancer;
if substr(dx(z),1,2) in ("14", "15", "16", "17", "18", "19") then dx_&type.&yr._cancer = 1;
else if substr(dx(z),1,3) in ("200", "201", "202", "203", "204",
"205", "206", "207", "208", "230", "231", "232", "233", "234") then dx_&type.&yr._cancer = 1;


*Benign neoplasms;
if substr(dx(z),1,3) in ('209', '235', '236', '237', '238', '239') then dx_&type.&yr._neoplas = 1;
else if substr(dx(z),1,2) in ('21', '22') then dx_&type.&yr._neoplas = 1;

*Rhematoid arthritis;
if substr(dx(z),1,3) in ("714") then dx_&type.&yr._ra = 1;


*End state renal disease;
if substr(dx(z),1,3) in ('584', '585', '586') then dx_&type.&yr._esrd = 1;


*Obesity;
if substr(dx(z),1,4) in ("2780") then dx_&type.&yr._obese = 1;


*Mood disorders;
if substr(dx(z),1,3) in ('296', '300') then dx_&type.&yr._mood = 1;
else if substr(dx(z),1,4) in ('3062') then dx_&type.&yr._mood = 1;


*Depression;
if substr(DX(Z),1,4) in ('2962','2963', '3004', '311') then dx_&type.&yr._deprss = 1;

*Alzhimers;
if substr(dx(z),1,4)='3310' then dx_&type.&yr._alzhm=1;

*Dementia;
if substr(dx(z),1,4) in ('3310', '3311', '3314', '3318', '0461','4380') then dx_&type.&yr._dmntia=1;


*Osteoporosis;
if substr(dx(z),1,4) in ("7330") then dx_&type.&yr._osteo = 1;


*COPD;
if substr(dx(z),1,3) in ("490", "491", "492", "494", "496") then dx_&type.&yr._copd = 1;
else if substr(dx(z), 1, 4) in ("4932") then dx_&type.&yr._copd = 1;

*Asthma;
if substr(dx(z),1,3) in ("493") then dx_&type.&yr._asthma = 1;


*Mental illness;
if "290" <= dx(z) <= "319" then dx_&type.&yr._mental = 1;


end;
run;

STATA code for easy descriptive table export

*This code creates two separate tables for summary statistics for different variables
*In my code I was getting average immunization rates by vaccine for different *development groups and regions

*You may change which summary statitics are pulled in the local display options

local vac dtp hepb hib polio pneumo rota yf measles
local vaclabels "DTP3" "Hepatitis B" "HIB" "Polio" "Pneumo. Conj." "Rotavirus" "Yellow fever" "Measles"

tempname vacyear_dev
tempname vacyear_geo
file open `vacyear_dev' using "Prelim analysis\dsc_vactoadd_`dsource'.txt", write replace
file write `vacyear_dev' ("Table: Number of years to add a vaccine to the entire national schedule by development status") _n _n
file write `vacyear_dev' ("Group") _tab ("Mean") _tab ("Median") _tab ("s.d.") _tab ("Min") _tab ("Max") _tab ("Count") _tab ("Prcnt add") _n

file open `vacyear_geo' using "Prelim analysis\dsc_vactoadd_geo1.txt", write replace
file write `vacyear_geo' ("Table: Number of years to add a vaccine to the entire national schedule by geography") _n _n
file write `vacyear_geo' ("Group") _tab ("Mean") _tab ("Median") _tab ("s.d.") _tab ("Min") _tab ("Max") _tab ("Count") _tab ("Prcnt add") _n

local i 1
foreach name of local vac {
local vacname : word `i' of "`vaclabels'"
file write `vacyear_dev' ("`vacname'") _n
file write `vacyear_geo'("`vacname'") _n

foreach group in "High income" "Upper-middle income" "Lower-middle income" "Lower income" {
* foreach group in "Developed economy" "Economy in transition" "Developing" "Least developed" {

quietly sum yrsto_`name' if devstatus== "`group'", detail
local vmean : display %-9.1f r(mean)
local vmed : display %-9.0f r(p50)
local vsd : display %-9.1f r(sd)
local vmin : display %-9.1f r(min)
local vmax : display %-9.1f r(max)
local vn : display %-9.0f r(N)
quietly sum max_grp if devstatus== "`group'"
local vpct : display %-9.3f `vn'/r(N)
file write `vacyear_dev' ("`group'") _tab (`vmean') _tab (`vmed') _tab (`vsd') _tab (`vmin') _tab (`vmax') _tab (`vn') _tab (`vpct') _n
}
if "`name'" != "pneumo" {
quietly kwallis yrsto_`name', by(devstatus)
local vchi : display %-9.4f r(chi2)
local vdf : display %-9.0f r(df)
local vprob : display %-9.5f chi2tail(`vdf', `vchi')
file write `vacyear_dev' ("Kruskall-wallis test") _tab ("chi2 = ") (`vchi') (" with ") (`vdf') (" df") _tab _tab _tab ("probaility = ") (`vprob') _n _n
}
foreach group in "AFRO" "AMRO" "EMRO" "EURO" "WPRO" "SEARO" {
quietly sum yrsto_`name' if WHO_region == "`group'", detail
local vmean : display %-9.1f r(mean)
local vmed : display %-9.0f r(p50)
local vsd : display %-9.1f r(sd)
local vmin : display %-9.1f r(min)
local vmax : display %-9.1f r(max)
local vn : display %-9.0f r(N)
quietly sum max_grp if WHO_region == "`group'"
local vpct : display %-9.3f `vn'/r(N)
file write `vacyear_geo' ("`group'") _tab (`vmean') _tab (`vmed') _tab (`vsd') _tab (`vmin') _tab (`vmax') _tab (`vn') _tab (`vpct') _n
}
quietly kwallis yrsto_`name', by(WHO_region)
local vchi : display %-9.4f r(chi2)
local vdf : display %-9.0f r(df)
local vprob : display %-9.5f chi2tail(`vdf', `vchi')
file write `vacyear_geo' ("Kruskall-wallis test") _tab ("chi2 = ") (`vchi') (" with ") (`vdf') (" df") _tab _tab _tab ("probaility = ") (`vprob') _n _n

local i = `i' + 1
}
file close `vacyear_dev'
file close `vacyear_geo'

Macro to combine multiple sheets of EXCEL data in stata

Sub Combine()
Dim J As Integer

On Error Resume Next
Sheets(1).Select
Worksheets.Add ' add a sheet in first place
Sheets(1).Name = "Combined"

' copy headings
Sheets(2).Activate
Range("A1").EntireRow.Select
Selection.Copy Destination:=Sheets(1).Range("A1")

' work through sheets
For J = 2 To Sheets.Count ' from sheet 2 to last sheet
Sheets(J).Activate ' make the sheet active
Range("A1").Select
Selection.CurrentRegion.Select ' select all cells in this sheets

' select all lines except title
Selection.Offset(1, 0).Resize(Selection.Rows.Count - 1).Select

' copy cells selected in the new sheet on last line
Selection.Copy Destination:=Sheets(1).Range("A65536").End(xlUp)(2)
Next
End Sub